diff options
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 34 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/shadow_all.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/util.h | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/camera/projection.h | 26 | ||||
-rw-r--r-- | intern/cycles/kernel/device/cpu/bvh.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/device/gpu/parallel_active_index.h | 33 | ||||
-rw-r--r-- | intern/cycles/kernel/device/metal/context_begin.h | 37 | ||||
-rw-r--r-- | intern/cycles/kernel/device/metal/kernel.metal | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/device/oneapi/compat.h | 45 | ||||
-rw-r--r-- | intern/cycles/kernel/device/oneapi/globals.h | 9 | ||||
-rw-r--r-- | intern/cycles/kernel/device/oneapi/kernel.cpp | 17 | ||||
-rw-r--r-- | intern/cycles/kernel/device/optix/bvh.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/types.h | 1 |
13 files changed, 112 insertions, 106 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 36c8b23d983..81c5f593974 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -713,10 +713,17 @@ endif() # oneAPI module if(WITH_CYCLES_DEVICE_ONEAPI) + if(WITH_CYCLES_ONEAPI_BINARIES) + set(cycles_kernel_oneapi_lib_suffix "_aot") + else() + set(cycles_kernel_oneapi_lib_suffix "_jit") + endif() + if(WIN32) - set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi.dll) + set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.dll) + set(cycles_kernel_oneapi_linker_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.lib) else() - set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/libcycles_kernel_oneapi.so) + set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/libcycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.so) endif() set(cycles_oneapi_kernel_sources @@ -751,10 +758,6 @@ if(WITH_CYCLES_DEVICE_ONEAPI) ${SYCL_CPP_FLAGS} ) - if (WITH_CYCLES_ONEAPI_SYCL_HOST_ENABLED) - list(APPEND sycl_compiler_flags -DWITH_ONEAPI_SYCL_HOST_ENABLED) - endif() - # Set defaults for spir64 and spir64_gen options if (NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64) set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'") @@ -767,6 +770,8 @@ if(WITH_CYCLES_DEVICE_ONEAPI) string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "-device ${CYCLES_ONEAPI_SPIR64_GEN_DEVICES} ") if (WITH_CYCLES_ONEAPI_BINARIES) + # AoT binaries aren't currently reused when calling sycl::build. + list (APPEND sycl_compiler_flags -DSYCL_SKIP_KERNELS_PRELOAD) # Iterate over all targest and their options list (JOIN CYCLES_ONEAPI_SYCL_TARGETS "," targets_string) list (APPEND sycl_compiler_flags -fsycl-targets=${targets_string}) @@ -819,12 +824,17 @@ if(WITH_CYCLES_DEVICE_ONEAPI) -DONEAPI_EXPORT) string(REPLACE /Redist/ /Tools/ MSVC_TOOLS_DIR ${MSVC_REDIST_DIR}) - if(NOT CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION) # case for Ninja on Windows + # Version Folder between Redist and Tools can mismatch sometimes + if(NOT EXISTS ${MSVC_TOOLS_DIR}) + get_filename_component(cmake_ar_dir ${CMAKE_AR} DIRECTORY) + get_filename_component(MSVC_TOOLS_DIR "${cmake_ar_dir}/../../../" ABSOLUTE) + endif() + if(CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION) + set(WINDOWS_KIT_DIR ${WINDOWS_KITS_DIR}/Lib/${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION}) + else() # case for Ninja on Windows get_filename_component(cmake_mt_dir ${CMAKE_MT} DIRECTORY) string(REPLACE /bin/ /Lib/ WINDOWS_KIT_DIR ${cmake_mt_dir}) get_filename_component(WINDOWS_KIT_DIR "${WINDOWS_KIT_DIR}/../" ABSOLUTE) - else() - set(WINDOWS_KIT_DIR ${WINDOWS_KITS_DIR}/Lib/${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION}) endif() list(APPEND sycl_compiler_flags -L "${MSVC_TOOLS_DIR}/lib/x64" @@ -836,15 +846,13 @@ if(WITH_CYCLES_DEVICE_ONEAPI) set(sycl_compiler_flags_RelWithDebInfo ${sycl_compiler_flags}) set(sycl_compiler_flags_MinSizeRel ${sycl_compiler_flags}) list(APPEND sycl_compiler_flags_RelWithDebInfo -g) - get_filename_component(sycl_library_debug_name ${SYCL_LIBRARY_DEBUG} NAME_WE) list(APPEND sycl_compiler_flags_Debug -g -D_DEBUG - -nostdlib -Xclang --dependent-lib=msvcrtd - -Xclang --dependent-lib=${sycl_library_debug_name}) + -nostdlib -Xclang --dependent-lib=msvcrtd) add_custom_command( - OUTPUT ${cycles_kernel_oneapi_lib} + OUTPUT ${cycles_kernel_oneapi_lib} ${cycles_kernel_oneapi_linker_lib} COMMAND ${CMAKE_COMMAND} -E env "LIB=${sycl_compiler_root}/../lib" # for compiler to find sycl.lib "PATH=${OCLOC_INSTALL_DIR}\;${sycl_compiler_root}" diff --git a/intern/cycles/kernel/bvh/shadow_all.h b/intern/cycles/kernel/bvh/shadow_all.h index 2ffe1496c72..b31ba479e4f 100644 --- a/intern/cycles/kernel/bvh/shadow_all.h +++ b/intern/cycles/kernel/bvh/shadow_all.h @@ -229,7 +229,7 @@ ccl_device_inline /* Always use baked shadow transparency for curves. */ if (isect.type & PRIMITIVE_CURVE) { *r_throughput *= intersection_curve_shadow_transparency( - kg, isect.object, isect.prim, isect.u); + kg, isect.object, isect.prim, isect.type, isect.u); if (*r_throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) { return true; diff --git a/intern/cycles/kernel/bvh/util.h b/intern/cycles/kernel/bvh/util.h index a57703a8b8c..9ba787550c5 100644 --- a/intern/cycles/kernel/bvh/util.h +++ b/intern/cycles/kernel/bvh/util.h @@ -190,10 +190,8 @@ ccl_device_inline int intersection_find_attribute(KernelGlobals kg, /* Cut-off value to stop transparent shadow tracing when practically opaque. */ #define CURVE_SHADOW_TRANSPARENCY_CUTOFF 0.001f -ccl_device_inline float intersection_curve_shadow_transparency(KernelGlobals kg, - const int object, - const int prim, - const float u) +ccl_device_inline float intersection_curve_shadow_transparency( + KernelGlobals kg, const int object, const int prim, const int type, const float u) { /* Find attribute. */ const int offset = intersection_find_attribute(kg, object, ATTR_STD_SHADOW_TRANSPARENCY); @@ -204,7 +202,7 @@ ccl_device_inline float intersection_curve_shadow_transparency(KernelGlobals kg, /* Interpolate transparency between curve keys. */ const KernelCurve kcurve = kernel_data_fetch(curves, prim); - const int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(kcurve.type); + const int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(type); const int k1 = k0 + 1; const float f0 = kernel_data_fetch(attributes_float, offset + k0); diff --git a/intern/cycles/kernel/camera/projection.h b/intern/cycles/kernel/camera/projection.h index c9fe3a6c7fb..1d16aa35abe 100644 --- a/intern/cycles/kernel/camera/projection.h +++ b/intern/cycles/kernel/camera/projection.h @@ -201,11 +201,35 @@ ccl_device float2 direction_to_mirrorball(float3 dir) return make_float2(u, v); } +/* Single face of a equiangular cube map projection as described in + https://blog.google/products/google-ar-vr/bringing-pixels-front-and-center-vr-video/ */ +ccl_device float3 equiangular_cubemap_face_to_direction(float u, float v) +{ + u = (1.0f - u); + + u = tanf(u * M_PI_2_F - M_PI_4_F); + v = tanf(v * M_PI_2_F - M_PI_4_F); + + return make_float3(1.0f, u, v); +} + +ccl_device float2 direction_to_equiangular_cubemap_face(float3 dir) +{ + float u = atan2f(dir.y, dir.x) * 2.0f / M_PI_F + 0.5f; + float v = atan2f(dir.z, dir.x) * 2.0f / M_PI_F + 0.5f; + + u = 1.0f - u; + + return make_float2(u, v); +} + ccl_device_inline float3 panorama_to_direction(ccl_constant KernelCamera *cam, float u, float v) { switch (cam->panorama_type) { case PANORAMA_EQUIRECTANGULAR: return equirectangular_range_to_direction(u, v, cam->equirectangular_range); + case PANORAMA_EQUIANGULAR_CUBEMAP_FACE: + return equiangular_cubemap_face_to_direction(u, v); case PANORAMA_MIRRORBALL: return mirrorball_to_direction(u, v); case PANORAMA_FISHEYE_EQUIDISTANT: @@ -230,6 +254,8 @@ ccl_device_inline float2 direction_to_panorama(ccl_constant KernelCamera *cam, f switch (cam->panorama_type) { case PANORAMA_EQUIRECTANGULAR: return direction_to_equirectangular_range(dir, cam->equirectangular_range); + case PANORAMA_EQUIANGULAR_CUBEMAP_FACE: + return direction_to_equiangular_cubemap_face(dir); case PANORAMA_MIRRORBALL: return direction_to_mirrorball(dir); case PANORAMA_FISHEYE_EQUIDISTANT: diff --git a/intern/cycles/kernel/device/cpu/bvh.h b/intern/cycles/kernel/device/cpu/bvh.h index d9267e1cd6d..2d7d8c2d704 100644 --- a/intern/cycles/kernel/device/cpu/bvh.h +++ b/intern/cycles/kernel/device/cpu/bvh.h @@ -252,7 +252,7 @@ ccl_device void kernel_embree_filter_occluded_func(const RTCFilterFunctionNArgum /* Always use baked shadow transparency for curves. */ if (current_isect.type & PRIMITIVE_CURVE) { ctx->throughput *= intersection_curve_shadow_transparency( - kg, current_isect.object, current_isect.prim, current_isect.u); + kg, current_isect.object, current_isect.prim, current_isect.type, current_isect.u); if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) { ctx->opaque_hit = true; diff --git a/intern/cycles/kernel/device/gpu/parallel_active_index.h b/intern/cycles/kernel/device/gpu/parallel_active_index.h index c1df49c4f49..38cdcb572eb 100644 --- a/intern/cycles/kernel/device/gpu/parallel_active_index.h +++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h @@ -23,22 +23,6 @@ CCL_NAMESPACE_BEGIN * and keep device specific code in compat.h */ #ifdef __KERNEL_ONEAPI__ -# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED -template<typename IsActiveOp> -void cpu_serial_active_index_array_impl(const uint num_states, - ccl_global int *ccl_restrict indices, - ccl_global int *ccl_restrict num_indices, - IsActiveOp is_active_op) -{ - int write_index = 0; - for (int state_index = 0; state_index < num_states; state_index++) { - if (is_active_op(state_index)) - indices[write_index++] = state_index; - } - *num_indices = write_index; - return; -} -# endif /* WITH_ONEAPI_SYCL_HOST_ENABLED */ template<typename IsActiveOp> void gpu_parallel_active_index_array_impl(const uint num_states, @@ -182,18 +166,11 @@ __device__ num_simd_groups, \ simdgroup_offset) #elif defined(__KERNEL_ONEAPI__) -# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED -# define gpu_parallel_active_index_array( \ - blocksize, num_states, indices, num_indices, is_active_op) \ - if (ccl_gpu_global_size_x() == 1) \ - cpu_serial_active_index_array_impl(num_states, indices, num_indices, is_active_op); \ - else \ - gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op); -# else -# define gpu_parallel_active_index_array( \ - blocksize, num_states, indices, num_indices, is_active_op) \ - gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op) -# endif + +# define gpu_parallel_active_index_array( \ + blocksize, num_states, indices, num_indices, is_active_op) \ + gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op) + #else # define gpu_parallel_active_index_array( \ diff --git a/intern/cycles/kernel/device/metal/context_begin.h b/intern/cycles/kernel/device/metal/context_begin.h index 99cb1e3826e..e75ec9cadec 100644 --- a/intern/cycles/kernel/device/metal/context_begin.h +++ b/intern/cycles/kernel/device/metal/context_begin.h @@ -34,21 +34,48 @@ class MetalKernelContext { kernel_assert(0); return 0; } - + +#ifdef __KERNEL_METAL_INTEL__ + template<typename TextureType, typename CoordsType> + inline __attribute__((__always_inline__)) + auto ccl_gpu_tex_object_read_intel_workaround(TextureType texture_array, + const uint tid, const uint sid, + CoordsType coords) const + { + switch(sid) { + default: + case 0: return texture_array[tid].tex.sample(sampler(address::repeat, filter::nearest), coords); + case 1: return texture_array[tid].tex.sample(sampler(address::clamp_to_edge, filter::nearest), coords); + case 2: return texture_array[tid].tex.sample(sampler(address::clamp_to_zero, filter::nearest), coords); + case 3: return texture_array[tid].tex.sample(sampler(address::repeat, filter::linear), coords); + case 4: return texture_array[tid].tex.sample(sampler(address::clamp_to_edge, filter::linear), coords); + case 5: return texture_array[tid].tex.sample(sampler(address::clamp_to_zero, filter::linear), coords); + } + } +#endif + // texture2d template<> inline __attribute__((__always_inline__)) float4 ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object_2D tex, float x, float y) const { const uint tid(tex); const uint sid(tex >> 32); +#ifndef __KERNEL_METAL_INTEL__ return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y)); +#else + return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_2d, tid, sid, float2(x, y)); +#endif } template<> inline __attribute__((__always_inline__)) float ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object_2D tex, float x, float y) const { const uint tid(tex); const uint sid(tex >> 32); +#ifndef __KERNEL_METAL_INTEL__ return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y)).x; +#else + return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_2d, tid, sid, float2(x, y)).x; +#endif } // texture3d @@ -57,14 +84,22 @@ class MetalKernelContext { float4 ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, float x, float y, float z) const { const uint tid(tex); const uint sid(tex >> 32); +#ifndef __KERNEL_METAL_INTEL__ return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z)); +#else + return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_3d, tid, sid, float3(x, y, z)); +#endif } template<> inline __attribute__((__always_inline__)) float ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, float x, float y, float z) const { const uint tid(tex); const uint sid(tex >> 32); +#ifndef __KERNEL_METAL_INTEL__ return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z)).x; +#else + return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_3d, tid, sid, float3(x, y, z)).x; +#endif } # include "kernel/device/gpu/image.h" diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal index 5646c7446db..8b69ee025cd 100644 --- a/intern/cycles/kernel/device/metal/kernel.metal +++ b/intern/cycles/kernel/device/metal/kernel.metal @@ -228,7 +228,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal, /* Always use baked shadow transparency for curves. */ if (type & PRIMITIVE_CURVE) { float throughput = payload.throughput; - throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, u); + throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, type, u); payload.throughput = throughput; payload.num_hits += 1; diff --git a/intern/cycles/kernel/device/oneapi/compat.h b/intern/cycles/kernel/device/oneapi/compat.h index 8ae40b0612e..dfaec65130c 100644 --- a/intern/cycles/kernel/device/oneapi/compat.h +++ b/intern/cycles/kernel/device/oneapi/compat.h @@ -55,18 +55,6 @@ #define ccl_gpu_kernel(block_num_threads, thread_num_registers) #define ccl_gpu_kernel_threads(block_num_threads) -#ifdef WITH_ONEAPI_SYCL_HOST_ENABLED -# define KG_ND_ITEMS \ - kg->nd_item_local_id_0 = item.get_local_id(0); \ - kg->nd_item_local_range_0 = item.get_local_range(0); \ - kg->nd_item_group_0 = item.get_group(0); \ - kg->nd_item_group_range_0 = item.get_group_range(0); \ - kg->nd_item_global_id_0 = item.get_global_id(0); \ - kg->nd_item_global_range_0 = item.get_global_range(0); -#else -# define KG_ND_ITEMS -#endif - #define ccl_gpu_kernel_signature(name, ...) \ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \ size_t kernel_global_size, \ @@ -76,8 +64,7 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \ (kg); \ cgh.parallel_for<class kernel_##name>( \ sycl::nd_range<1>(kernel_global_size, kernel_local_size), \ - [=](sycl::nd_item<1> item) { \ - KG_ND_ITEMS + [=](sycl::nd_item<1> item) { #define ccl_gpu_kernel_postfix \ }); \ @@ -95,31 +82,17 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \ } ccl_gpu_kernel_lambda_pass((ONEAPIKernelContext *)kg) /* GPU thread, block, grid size and index */ -#ifndef WITH_ONEAPI_SYCL_HOST_ENABLED -# define ccl_gpu_thread_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_id(0)) -# define ccl_gpu_block_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_range(0)) -# define ccl_gpu_block_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group(0)) -# define ccl_gpu_grid_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group_range(0)) -# define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0]) -# define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp)) - -# define ccl_gpu_global_id_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_id(0)) -# define ccl_gpu_global_size_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_range(0)) -#else -# define ccl_gpu_thread_idx_x (kg->nd_item_local_id_0) -# define ccl_gpu_block_dim_x (kg->nd_item_local_range_0) -# define ccl_gpu_block_idx_x (kg->nd_item_group_0) -# define ccl_gpu_grid_dim_x (kg->nd_item_group_range_0) -# define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0]) -# define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp)) - -# define ccl_gpu_global_id_x() (kg->nd_item_global_id_0) -# define ccl_gpu_global_size_x() (kg->nd_item_global_range_0) -#endif +#define ccl_gpu_thread_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_id(0)) +#define ccl_gpu_block_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_range(0)) +#define ccl_gpu_block_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group(0)) +#define ccl_gpu_grid_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group_range(0)) +#define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0]) +#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp)) +#define ccl_gpu_global_id_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_id(0)) +#define ccl_gpu_global_size_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_range(0)) /* GPU warp synchronization */ - #define ccl_gpu_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier() #define ccl_gpu_local_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier(sycl::access::fence_space::local_space) #ifdef __SYCL_DEVICE_ONLY__ diff --git a/intern/cycles/kernel/device/oneapi/globals.h b/intern/cycles/kernel/device/oneapi/globals.h index d60f4f135ba..116620eb725 100644 --- a/intern/cycles/kernel/device/oneapi/globals.h +++ b/intern/cycles/kernel/device/oneapi/globals.h @@ -23,15 +23,6 @@ typedef struct KernelGlobalsGPU { #undef KERNEL_DATA_ARRAY IntegratorStateGPU *integrator_state; const KernelData *__data; -#ifdef WITH_ONEAPI_SYCL_HOST_ENABLED - size_t nd_item_local_id_0; - size_t nd_item_local_range_0; - size_t nd_item_group_0; - size_t nd_item_group_range_0; - - size_t nd_item_global_id_0; - size_t nd_item_global_range_0; -#endif } KernelGlobalsGPU; typedef ccl_global KernelGlobalsGPU *ccl_restrict KernelGlobals; diff --git a/intern/cycles/kernel/device/oneapi/kernel.cpp b/intern/cycles/kernel/device/oneapi/kernel.cpp index 40e0b1f0b2b..525ae288f0c 100644 --- a/intern/cycles/kernel/device/oneapi/kernel.cpp +++ b/intern/cycles/kernel/device/oneapi/kernel.cpp @@ -8,7 +8,7 @@ # include <map> # include <set> -# include <CL/sycl.hpp> +# include <sycl/sycl.hpp> # include "kernel/device/oneapi/compat.h" # include "kernel/device/oneapi/globals.h" @@ -144,6 +144,10 @@ size_t oneapi_kernel_preferred_local_size(SyclQueue *queue, bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features) { +# ifdef SYCL_SKIP_KERNELS_PRELOAD + (void)queue_; + (void)requested_features; +# else assert(queue_); sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_); @@ -175,7 +179,7 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features) sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle = sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id}); - sycl::build(one_kernel_bundle, {queue->get_device()}, sycl::property::queue::in_order()); + sycl::build(one_kernel_bundle); } } catch (sycl::exception const &e) { @@ -184,7 +188,7 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features) } return false; } - +# endif return true; } @@ -226,13 +230,6 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context, /* NOTE(@nsirgien): As for now non-uniform work-groups don't work on most oneAPI devices, * we extend work size to fit uniformity requirements. */ global_size = groups_count * local_size; - -# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED - if (queue->get_device().is_host()) { - global_size = 1; - local_size = 1; - } -# endif } /* Let the compiler throw an error if there are any kernels missing in this implementation. */ diff --git a/intern/cycles/kernel/device/optix/bvh.h b/intern/cycles/kernel/device/optix/bvh.h index fb9907709ce..6d81b44660c 100644 --- a/intern/cycles/kernel/device/optix/bvh.h +++ b/intern/cycles/kernel/device/optix/bvh.h @@ -202,7 +202,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() /* Always use baked shadow transparency for curves. */ if (type & PRIMITIVE_CURVE) { float throughput = __uint_as_float(optixGetPayload_1()); - throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, u); + throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, type, u); optixSetPayload_1(__float_as_uint(throughput)); optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1)); diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 8f7cfd19169..24c5a6a4540 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -490,6 +490,7 @@ enum PanoramaType { PANORAMA_FISHEYE_EQUISOLID = 2, PANORAMA_MIRRORBALL = 3, PANORAMA_FISHEYE_LENS_POLYNOMIAL = 4, + PANORAMA_EQUIANGULAR_CUBEMAP_FACE = 5, PANORAMA_NUM_TYPES, }; |