Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/device')
-rw-r--r--intern/cycles/kernel/device/cpu/kernel_arch_impl.h10
-rw-r--r--intern/cycles/kernel/device/gpu/kernel.h8
-rw-r--r--intern/cycles/kernel/device/metal/compat.h16
-rw-r--r--intern/cycles/kernel/device/metal/kernel.metal2
-rw-r--r--intern/cycles/kernel/device/oneapi/compat.h19
-rw-r--r--intern/cycles/kernel/device/oneapi/kernel.cpp20
-rw-r--r--intern/cycles/kernel/device/oneapi/kernel_templates.h2
7 files changed, 38 insertions, 39 deletions
diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
index 0e5f7b4a2fd..0d7c06f4fc6 100644
--- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
+++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
@@ -34,7 +34,7 @@
# include "kernel/integrator/megakernel.h"
# include "kernel/film/adaptive_sampling.h"
-# include "kernel/film/id_passes.h"
+# include "kernel/film/cryptomatte_passes.h"
# include "kernel/film/read.h"
# include "kernel/bake/bake.h"
@@ -169,7 +169,7 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)(
STUB_ASSERT(KERNEL_ARCH, adaptive_sampling_convergence_check);
return false;
#else
- return kernel_adaptive_sampling_convergence_check(
+ return film_adaptive_sampling_convergence_check(
kg, render_buffer, x, y, threshold, reset, offset, stride);
#endif
}
@@ -185,7 +185,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobalsCP
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, adaptive_sampling_filter_x);
#else
- kernel_adaptive_sampling_filter_x(kg, render_buffer, y, start_x, width, offset, stride);
+ film_adaptive_sampling_filter_x(kg, render_buffer, y, start_x, width, offset, stride);
#endif
}
@@ -200,7 +200,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCP
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, adaptive_sampling_filter_y);
#else
- kernel_adaptive_sampling_filter_y(kg, render_buffer, x, start_y, height, offset, stride);
+ film_adaptive_sampling_filter_y(kg, render_buffer, x, start_y, height, offset, stride);
#endif
}
@@ -215,7 +215,7 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU *
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, cryptomatte_postprocess);
#else
- kernel_cryptomatte_post(kg, render_buffer, pixel_index);
+ film_cryptomatte_post(kg, render_buffer, pixel_index);
#endif
}
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index e1ab802aa80..d7d2000775f 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -526,7 +526,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
bool converged = true;
if (x < sw && y < sh) {
- converged = ccl_gpu_kernel_call(kernel_adaptive_sampling_convergence_check(
+ converged = ccl_gpu_kernel_call(film_adaptive_sampling_convergence_check(
nullptr, render_buffer, sx + x, sy + y, threshold, reset, offset, stride));
}
@@ -553,7 +553,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
if (y < sh) {
ccl_gpu_kernel_call(
- kernel_adaptive_sampling_filter_x(NULL, render_buffer, sy + y, sx, sw, offset, stride));
+ film_adaptive_sampling_filter_x(NULL, render_buffer, sy + y, sx, sw, offset, stride));
}
}
ccl_gpu_kernel_postfix
@@ -572,7 +572,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
if (x < sw) {
ccl_gpu_kernel_call(
- kernel_adaptive_sampling_filter_y(NULL, render_buffer, sx + x, sy, sh, offset, stride));
+ film_adaptive_sampling_filter_y(NULL, render_buffer, sx + x, sy, sh, offset, stride));
}
}
ccl_gpu_kernel_postfix
@@ -589,7 +589,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
const int pixel_index = ccl_gpu_global_id_x();
if (pixel_index < num_pixels) {
- ccl_gpu_kernel_call(kernel_cryptomatte_post(nullptr, render_buffer, pixel_index));
+ ccl_gpu_kernel_call(film_cryptomatte_post(nullptr, render_buffer, pixel_index));
}
}
ccl_gpu_kernel_postfix
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index b20cfca9a9c..130a9ebafae 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -29,11 +29,12 @@ using namespace metal::raytracing;
/* Qualifiers */
-#if defined(__KERNEL_METAL_APPLE__)
+/* Inline everything for Apple GPUs. This gives ~1.1x speedup and 10% spill
+ * reduction for integator_shade_surface. However it comes at the cost of
+ * longer compile times (~4.5 minutes on M1 Max) and is disabled for that
+ * reason, until there is a user option to manually enable it. */
-/* Inline everything for Apple GPUs.
- * This gives ~1.1x speedup and 10% spill reduction for integator_shade_surface
- * at the cost of longer compile times (~4.5 minutes on M1 Max). */
+#if 0 // defined(__KERNEL_METAL_APPLE__)
# define ccl_device __attribute__((always_inline))
# define ccl_device_inline __attribute__((always_inline))
@@ -45,8 +46,11 @@ using namespace metal::raytracing;
# define ccl_device
# define ccl_device_inline ccl_device
# define ccl_device_forceinline ccl_device
-# define ccl_device_noinline ccl_device __attribute__((noinline))
-
+# if defined(__KERNEL_METAL_APPLE__)
+# define ccl_device_noinline ccl_device
+# else
+# define ccl_device_noinline ccl_device __attribute__((noinline))
+# endif
#endif
#define ccl_device_noinline_cpu ccl_device
diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal
index 3de8c069c30..5646c7446db 100644
--- a/intern/cycles/kernel/device/metal/kernel.metal
+++ b/intern/cycles/kernel/device/metal/kernel.metal
@@ -321,7 +321,7 @@ inline TReturnType metalrt_visibility_test(
constant KernelParamsMetal &launch_params_metal,
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
const uint object,
- const uint prim,
+ uint prim,
const float u)
{
TReturnType result;
diff --git a/intern/cycles/kernel/device/oneapi/compat.h b/intern/cycles/kernel/device/oneapi/compat.h
index 5c49674f247..8ae40b0612e 100644
--- a/intern/cycles/kernel/device/oneapi/compat.h
+++ b/intern/cycles/kernel/device/oneapi/compat.h
@@ -10,6 +10,7 @@
#define CCL_NAMESPACE_END
#include <cstdint>
+#include <math.h>
#ifndef __NODES_MAX_GROUP__
# define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
@@ -174,21 +175,15 @@ using sycl::half;
#define fmodf(x, y) sycl::fmod((x), (y))
#define lgammaf(x) sycl::lgamma((x))
-#define __forceinline __attribute__((always_inline))
-
-/* Types */
-#include "util/half.h"
-#include "util/types.h"
-
-/* NOTE(@nsirgien): Declaring these functions after types headers is very important because they
- * include oneAPI headers, which transitively include math.h headers which will cause redefinitions
- * of the math defines because math.h also uses them and having them defined before math.h include
- * is actually UB. */
-/* Use fast math functions - get them from sycl::native namespace for native math function
- * implementations */
#define cosf(x) sycl::native::cos(((float)(x)))
#define sinf(x) sycl::native::sin(((float)(x)))
#define powf(x, y) sycl::native::powr(((float)(x)), ((float)(y)))
#define tanf(x) sycl::native::tan(((float)(x)))
#define logf(x) sycl::native::log(((float)(x)))
#define expf(x) sycl::native::exp(((float)(x)))
+
+#define __forceinline __attribute__((always_inline))
+
+/* Types */
+#include "util/half.h"
+#include "util/types.h"
diff --git a/intern/cycles/kernel/device/oneapi/kernel.cpp b/intern/cycles/kernel/device/oneapi/kernel.cpp
index 7e90c553c44..097d21b963f 100644
--- a/intern/cycles/kernel/device/oneapi/kernel.cpp
+++ b/intern/cycles/kernel/device/oneapi/kernel.cpp
@@ -645,13 +645,9 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
/* Unsupported kernels */
case DEVICE_KERNEL_NUM:
case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL:
- assert(0);
- return false;
+ kernel_assert(0);
+ break;
}
-
- /* Unknown kernel. */
- assert(0);
- return false;
});
}
catch (sycl::exception const &e) {
@@ -669,7 +665,11 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
return success;
}
-static const int lowest_supported_driver_version_win = 1011660;
+/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
+ * since Windows driver 101.3268. */
+/* The same min compute-runtime version is currently required across Windows and Linux.
+ * For Windows driver 101.3268, compute-runtime version is 23570. */
+static const int lowest_supported_driver_version_win = 1013268;
static const int lowest_supported_driver_version_neo = 23570;
static int parse_driver_build_version(const sycl::device &device)
@@ -773,8 +773,7 @@ static std::vector<sycl::device> oneapi_available_devices()
int driver_build_version = parse_driver_build_version(device);
if ((driver_build_version > 100000 &&
driver_build_version < lowest_supported_driver_version_win) ||
- (driver_build_version > 0 &&
- driver_build_version < lowest_supported_driver_version_neo)) {
+ driver_build_version < lowest_supported_driver_version_neo) {
filter_out = true;
}
}
@@ -818,7 +817,8 @@ char *oneapi_device_capabilities()
GET_NUM_ATTR(max_compute_units)
GET_NUM_ATTR(max_work_item_dimensions)
- sycl::id<3> max_work_item_sizes = device.get_info<sycl::info::device::max_work_item_sizes>();
+ sycl::id<3> max_work_item_sizes =
+ device.get_info<sycl::info::device::max_work_item_sizes<3>>();
WRITE_ATTR("max_work_item_sizes_dim0", ((size_t)max_work_item_sizes.get(0)))
WRITE_ATTR("max_work_item_sizes_dim1", ((size_t)max_work_item_sizes.get(1)))
WRITE_ATTR("max_work_item_sizes_dim2", ((size_t)max_work_item_sizes.get(2)))
diff --git a/intern/cycles/kernel/device/oneapi/kernel_templates.h b/intern/cycles/kernel/device/oneapi/kernel_templates.h
index d8964d9b672..0ae925cf748 100644
--- a/intern/cycles/kernel/device/oneapi/kernel_templates.h
+++ b/intern/cycles/kernel/device/oneapi/kernel_templates.h
@@ -80,7 +80,7 @@ void oneapi_call(
(x, ##__VA_ARGS__)
/* This template automatically casts entries in the void **args array to the types requested by the kernel func.
- Since kernel parameters are passed as void ** to the device, this is the closest that we have to type safety. */
+ * Since kernel parameters are passed as void ** to the device, this is the closest that we have to type safety. */
#define oneapi_template(...) \
template<ONEAPI_CALL_FOR(ONEAPI_TYP, __VA_ARGS__)> \
void oneapi_call( \