diff options
Diffstat (limited to 'intern/cycles/kernel/device/oneapi/compat.h')
-rw-r--r-- | intern/cycles/kernel/device/oneapi/compat.h | 82 |
1 files changed, 19 insertions, 63 deletions
diff --git a/intern/cycles/kernel/device/oneapi/compat.h b/intern/cycles/kernel/device/oneapi/compat.h index 1b25259bcf5..dfaec65130c 100644 --- a/intern/cycles/kernel/device/oneapi/compat.h +++ b/intern/cycles/kernel/device/oneapi/compat.h @@ -10,6 +10,7 @@ #define CCL_NAMESPACE_END #include <cstdint> +#include <math.h> #ifndef __NODES_MAX_GROUP__ # define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX @@ -30,7 +31,7 @@ #define ccl_global #define ccl_always_inline __attribute__((always_inline)) #define ccl_device_inline inline -#define ccl_noinline +#define ccl_noinline __attribute__((noinline)) #define ccl_inline_constant const constexpr #define ccl_static_constant const #define ccl_device_forceinline __attribute__((always_inline)) @@ -54,18 +55,6 @@ #define ccl_gpu_kernel(block_num_threads, thread_num_registers) #define ccl_gpu_kernel_threads(block_num_threads) -#ifdef WITH_ONEAPI_SYCL_HOST_ENABLED -# define KG_ND_ITEMS \ - kg->nd_item_local_id_0 = item.get_local_id(0); \ - kg->nd_item_local_range_0 = item.get_local_range(0); \ - kg->nd_item_group_0 = item.get_group(0); \ - kg->nd_item_group_range_0 = item.get_group_range(0); \ - kg->nd_item_global_id_0 = item.get_global_id(0); \ - kg->nd_item_global_range_0 = item.get_global_range(0); -#else -# define KG_ND_ITEMS -#endif - #define ccl_gpu_kernel_signature(name, ...) \ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \ size_t kernel_global_size, \ @@ -75,8 +64,7 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \ (kg); \ cgh.parallel_for<class kernel_##name>( \ sycl::nd_range<1>(kernel_global_size, kernel_local_size), \ - [=](sycl::nd_item<1> item) { \ - KG_ND_ITEMS + [=](sycl::nd_item<1> item) { #define ccl_gpu_kernel_postfix \ }); \ @@ -94,31 +82,17 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \ } ccl_gpu_kernel_lambda_pass((ONEAPIKernelContext *)kg) /* GPU thread, block, grid size and index */ -#ifndef WITH_ONEAPI_SYCL_HOST_ENABLED -# define ccl_gpu_thread_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_id(0)) -# define ccl_gpu_block_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_range(0)) -# define ccl_gpu_block_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group(0)) -# define ccl_gpu_grid_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group_range(0)) -# define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0]) -# define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp)) - -# define ccl_gpu_global_id_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_id(0)) -# define ccl_gpu_global_size_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_range(0)) -#else -# define ccl_gpu_thread_idx_x (kg->nd_item_local_id_0) -# define ccl_gpu_block_dim_x (kg->nd_item_local_range_0) -# define ccl_gpu_block_idx_x (kg->nd_item_group_0) -# define ccl_gpu_grid_dim_x (kg->nd_item_group_range_0) -# define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0]) -# define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp)) - -# define ccl_gpu_global_id_x() (kg->nd_item_global_id_0) -# define ccl_gpu_global_size_x() (kg->nd_item_global_range_0) -#endif +#define ccl_gpu_thread_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_id(0)) +#define ccl_gpu_block_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_range(0)) +#define ccl_gpu_block_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group(0)) +#define ccl_gpu_grid_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group_range(0)) +#define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0]) +#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp)) +#define ccl_gpu_global_id_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_id(0)) +#define ccl_gpu_global_size_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_range(0)) /* GPU warp synchronization */ - #define ccl_gpu_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier() #define ccl_gpu_local_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier(sycl::access::fence_space::local_space) #ifdef __SYCL_DEVICE_ONLY__ @@ -149,25 +123,13 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \ /* clang-format on */ /* Types */ + /* It's not possible to use sycl types like sycl::float3, sycl::int3, etc - * because these types have different interfaces from blender version */ + * because these types have different interfaces from blender version. */ using uchar = unsigned char; using sycl::half; -struct float3 { - float x, y, z; -}; - -ccl_always_inline float3 make_float3(float x, float y, float z) -{ - return {x, y, z}; -} -ccl_always_inline float3 make_float3(float x) -{ - return {x, x, x}; -} - /* math functions */ #define fabsf(x) sycl::fabs((x)) #define copysignf(x, y) sycl::copysign((x), (y)) @@ -186,21 +148,15 @@ ccl_always_inline float3 make_float3(float x) #define fmodf(x, y) sycl::fmod((x), (y)) #define lgammaf(x) sycl::lgamma((x)) -#define __forceinline __attribute__((always_inline)) - -/* Types */ -#include "util/half.h" -#include "util/types.h" - -/* NOTE(@nsirgien): Declaring these functions after types headers is very important because they - * include oneAPI headers, which transitively include math.h headers which will cause redefinitions - * of the math defines because math.h also uses them and having them defined before math.h include - * is actually UB. */ -/* Use fast math functions - get them from sycl::native namespace for native math function - * implementations */ #define cosf(x) sycl::native::cos(((float)(x))) #define sinf(x) sycl::native::sin(((float)(x))) #define powf(x, y) sycl::native::powr(((float)(x)), ((float)(y))) #define tanf(x) sycl::native::tan(((float)(x))) #define logf(x) sycl::native::log(((float)(x))) #define expf(x) sycl::native::exp(((float)(x))) + +#define __forceinline __attribute__((always_inline)) + +/* Types */ +#include "util/half.h" +#include "util/types.h" |