13 files changed, 112 insertions, 106 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 36c8b23d983..81c5f593974 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -713,10 +713,17 @@ endif()
 # oneAPI module
 
 if(WITH_CYCLES_DEVICE_ONEAPI)
+  if(WITH_CYCLES_ONEAPI_BINARIES)
+    set(cycles_kernel_oneapi_lib_suffix "_aot")
+  else()
+    set(cycles_kernel_oneapi_lib_suffix "_jit")
+  endif()
+
   if(WIN32)
-    set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi.dll)
+    set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.dll)
+    set(cycles_kernel_oneapi_linker_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.lib)
   else()
-    set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/libcycles_kernel_oneapi.so)
+    set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/libcycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.so)
   endif()
 
   set(cycles_oneapi_kernel_sources
@@ -751,10 +758,6 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
       ${SYCL_CPP_FLAGS}
       )
 
-  if (WITH_CYCLES_ONEAPI_SYCL_HOST_ENABLED)
-    list(APPEND sycl_compiler_flags -DWITH_ONEAPI_SYCL_HOST_ENABLED)
-  endif()
-
   # Set defaults for spir64 and spir64_gen options
   if (NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
     set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'")
@@ -767,6 +770,8 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
   string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "-device ${CYCLES_ONEAPI_SPIR64_GEN_DEVICES} ")
 
   if (WITH_CYCLES_ONEAPI_BINARIES)
+    # AoT binaries aren't currently reused when calling sycl::build.
+    list (APPEND sycl_compiler_flags -DSYCL_SKIP_KERNELS_PRELOAD)
     # Iterate over all targest and their options
     list (JOIN CYCLES_ONEAPI_SYCL_TARGETS "," targets_string)
     list (APPEND sycl_compiler_flags -fsycl-targets=${targets_string})
@@ -819,12 +824,17 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
     -DONEAPI_EXPORT)
 
     string(REPLACE /Redist/ /Tools/ MSVC_TOOLS_DIR ${MSVC_REDIST_DIR})
-    if(NOT CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION) # case for Ninja on Windows
+    # Version Folder between Redist and Tools can mismatch sometimes
+    if(NOT EXISTS ${MSVC_TOOLS_DIR})
+      get_filename_component(cmake_ar_dir ${CMAKE_AR} DIRECTORY)
+      get_filename_component(MSVC_TOOLS_DIR "${cmake_ar_dir}/../../../" ABSOLUTE)
+    endif()
+    if(CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION)
+      set(WINDOWS_KIT_DIR ${WINDOWS_KITS_DIR}/Lib/${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION})
+    else() # case for Ninja on Windows
       get_filename_component(cmake_mt_dir ${CMAKE_MT} DIRECTORY)
       string(REPLACE /bin/ /Lib/ WINDOWS_KIT_DIR ${cmake_mt_dir})
       get_filename_component(WINDOWS_KIT_DIR "${WINDOWS_KIT_DIR}/../" ABSOLUTE)
-    else()
-      set(WINDOWS_KIT_DIR ${WINDOWS_KITS_DIR}/Lib/${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION})
     endif()
     list(APPEND sycl_compiler_flags
                 -L "${MSVC_TOOLS_DIR}/lib/x64"
@@ -836,15 +846,13 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
     set(sycl_compiler_flags_RelWithDebInfo ${sycl_compiler_flags})
     set(sycl_compiler_flags_MinSizeRel ${sycl_compiler_flags})
     list(APPEND sycl_compiler_flags_RelWithDebInfo -g)
-    get_filename_component(sycl_library_debug_name ${SYCL_LIBRARY_DEBUG} NAME_WE)
     list(APPEND sycl_compiler_flags_Debug
                 -g
                 -D_DEBUG
-                -nostdlib -Xclang --dependent-lib=msvcrtd
-                -Xclang --dependent-lib=${sycl_library_debug_name})
+                -nostdlib -Xclang --dependent-lib=msvcrtd)
 
     add_custom_command(
-      OUTPUT ${cycles_kernel_oneapi_lib}
+      OUTPUT ${cycles_kernel_oneapi_lib} ${cycles_kernel_oneapi_linker_lib}
       COMMAND ${CMAKE_COMMAND} -E env
               "LIB=${sycl_compiler_root}/../lib" # for compiler to find sycl.lib
               "PATH=${OCLOC_INSTALL_DIR}\;${sycl_compiler_root}"
diff --git a/intern/cycles/kernel/bvh/shadow_all.h b/intern/cycles/kernel/bvh/shadow_all.h
index 2ffe1496c72..b31ba479e4f 100644
--- a/intern/cycles/kernel/bvh/shadow_all.h
+++ b/intern/cycles/kernel/bvh/shadow_all.h
@@ -229,7 +229,7 @@ ccl_device_inline
               /* Always use baked shadow transparency for curves. */
               if (isect.type & PRIMITIVE_CURVE) {
                 *r_throughput *= intersection_curve_shadow_transparency(
-                    kg, isect.object, isect.prim, isect.u);
+                    kg, isect.object, isect.prim, isect.type, isect.u);
 
                 if (*r_throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
                   return true;
diff --git a/intern/cycles/kernel/bvh/util.h b/intern/cycles/kernel/bvh/util.h
index a57703a8b8c..9ba787550c5 100644
--- a/intern/cycles/kernel/bvh/util.h
+++ b/intern/cycles/kernel/bvh/util.h
@@ -190,10 +190,8 @@ ccl_device_inline int intersection_find_attribute(KernelGlobals kg,
 /* Cut-off value to stop transparent shadow tracing when practically opaque. */
 #define CURVE_SHADOW_TRANSPARENCY_CUTOFF 0.001f
 
-ccl_device_inline float intersection_curve_shadow_transparency(KernelGlobals kg,
-                                                               const int object,
-                                                               const int prim,
-                                                               const float u)
+ccl_device_inline float intersection_curve_shadow_transparency(
+    KernelGlobals kg, const int object, const int prim, const int type, const float u)
 {
   /* Find attribute. */
   const int offset = intersection_find_attribute(kg, object, ATTR_STD_SHADOW_TRANSPARENCY);
@@ -204,7 +202,7 @@ ccl_device_inline float intersection_curve_shadow_transparency(KernelGlobals kg,
 
   /* Interpolate transparency between curve keys. */
   const KernelCurve kcurve = kernel_data_fetch(curves, prim);
-  const int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(kcurve.type);
+  const int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(type);
   const int k1 = k0 + 1;
 
   const float f0 = kernel_data_fetch(attributes_float, offset + k0);
diff --git a/intern/cycles/kernel/camera/projection.h b/intern/cycles/kernel/camera/projection.h
index c9fe3a6c7fb..1d16aa35abe 100644
--- a/intern/cycles/kernel/camera/projection.h
+++ b/intern/cycles/kernel/camera/projection.h
@@ -201,11 +201,35 @@ ccl_device float2 direction_to_mirrorball(float3 dir)
   return make_float2(u, v);
 }
 
+/* Single face of a equiangular cube map projection as described in
+   https://blog.google/products/google-ar-vr/bringing-pixels-front-and-center-vr-video/ */
+ccl_device float3 equiangular_cubemap_face_to_direction(float u, float v)
+{
+  u = (1.0f - u);
+
+  u = tanf(u * M_PI_2_F - M_PI_4_F);
+  v = tanf(v * M_PI_2_F - M_PI_4_F);
+
+  return make_float3(1.0f, u, v);
+}
+
+ccl_device float2 direction_to_equiangular_cubemap_face(float3 dir)
+{
+  float u = atan2f(dir.y, dir.x) * 2.0f / M_PI_F + 0.5f;
+  float v = atan2f(dir.z, dir.x) * 2.0f / M_PI_F + 0.5f;
+
+  u = 1.0f - u;
+
+  return make_float2(u, v);
+}
+
 ccl_device_inline float3 panorama_to_direction(ccl_constant KernelCamera *cam, float u, float v)
 {
   switch (cam->panorama_type) {
     case PANORAMA_EQUIRECTANGULAR:
       return equirectangular_range_to_direction(u, v, cam->equirectangular_range);
+    case PANORAMA_EQUIANGULAR_CUBEMAP_FACE:
+      return equiangular_cubemap_face_to_direction(u, v);
     case PANORAMA_MIRRORBALL:
       return mirrorball_to_direction(u, v);
     case PANORAMA_FISHEYE_EQUIDISTANT:
@@ -230,6 +254,8 @@ ccl_device_inline float2 direction_to_panorama(ccl_constant KernelCamera *cam, f
   switch (cam->panorama_type) {
     case PANORAMA_EQUIRECTANGULAR:
       return direction_to_equirectangular_range(dir, cam->equirectangular_range);
+    case PANORAMA_EQUIANGULAR_CUBEMAP_FACE:
+      return direction_to_equiangular_cubemap_face(dir);
     case PANORAMA_MIRRORBALL:
       return direction_to_mirrorball(dir);
     case PANORAMA_FISHEYE_EQUIDISTANT:
diff --git a/intern/cycles/kernel/device/cpu/bvh.h b/intern/cycles/kernel/device/cpu/bvh.h
index d9267e1cd6d..2d7d8c2d704 100644
--- a/intern/cycles/kernel/device/cpu/bvh.h
+++ b/intern/cycles/kernel/device/cpu/bvh.h
@@ -252,7 +252,7 @@ ccl_device void kernel_embree_filter_occluded_func(const RTCFilterFunctionNArgum
       /* Always use baked shadow transparency for curves. */
       if (current_isect.type & PRIMITIVE_CURVE) {
         ctx->throughput *= intersection_curve_shadow_transparency(
-            kg, current_isect.object, current_isect.prim, current_isect.u);
+            kg, current_isect.object, current_isect.prim, current_isect.type, current_isect.u);
 
         if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
           ctx->opaque_hit = true;
diff --git a/intern/cycles/kernel/device/gpu/parallel_active_index.h b/intern/cycles/kernel/device/gpu/parallel_active_index.h
index c1df49c4f49..38cdcb572eb 100644
--- a/intern/cycles/kernel/device/gpu/parallel_active_index.h
+++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h
@@ -23,22 +23,6 @@ CCL_NAMESPACE_BEGIN
  * and keep device specific code in compat.h */
 
 #ifdef __KERNEL_ONEAPI__
-#  ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
-template<typename IsActiveOp>
-void cpu_serial_active_index_array_impl(const uint num_states,
-                                        ccl_global int *ccl_restrict indices,
-                                        ccl_global int *ccl_restrict num_indices,
-                                        IsActiveOp is_active_op)
-{
-  int write_index = 0;
-  for (int state_index = 0; state_index < num_states; state_index++) {
-    if (is_active_op(state_index))
-      indices[write_index++] = state_index;
-  }
-  *num_indices = write_index;
-  return;
-}
-#  endif /* WITH_ONEAPI_SYCL_HOST_ENABLED */
 
 template<typename IsActiveOp>
 void gpu_parallel_active_index_array_impl(const uint num_states,
@@ -182,18 +166,11 @@ __device__
                                          num_simd_groups, \
                                          simdgroup_offset)
 #elif defined(__KERNEL_ONEAPI__)
-#  ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
-#    define gpu_parallel_active_index_array( \
-        blocksize, num_states, indices, num_indices, is_active_op) \
-      if (ccl_gpu_global_size_x() == 1) \
-        cpu_serial_active_index_array_impl(num_states, indices, num_indices, is_active_op); \
-      else \
-        gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op);
-#  else
-#    define gpu_parallel_active_index_array( \
-        blocksize, num_states, indices, num_indices, is_active_op) \
-      gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op)
-#  endif
+
+#  define gpu_parallel_active_index_array( \
+      blocksize, num_states, indices, num_indices, is_active_op) \
+    gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op)
+
 #else
 
 #  define gpu_parallel_active_index_array( \
diff --git a/intern/cycles/kernel/device/metal/context_begin.h b/intern/cycles/kernel/device/metal/context_begin.h
index 99cb1e3826e..e75ec9cadec 100644
--- a/intern/cycles/kernel/device/metal/context_begin.h
+++ b/intern/cycles/kernel/device/metal/context_begin.h
@@ -34,21 +34,48 @@ class MetalKernelContext {
       kernel_assert(0);
       return 0;
     }
-
+    
+#ifdef __KERNEL_METAL_INTEL__
+    template<typename TextureType, typename CoordsType>
+    inline __attribute__((__always_inline__))
+    auto ccl_gpu_tex_object_read_intel_workaround(TextureType texture_array,
+                                                  const uint tid, const uint sid,
+                                                  CoordsType coords) const
+    {
+      switch(sid) {
+        default:
+        case 0: return texture_array[tid].tex.sample(sampler(address::repeat, filter::nearest), coords);
+        case 1: return texture_array[tid].tex.sample(sampler(address::clamp_to_edge, filter::nearest), coords);
+        case 2: return texture_array[tid].tex.sample(sampler(address::clamp_to_zero, filter::nearest), coords);
+        case 3: return texture_array[tid].tex.sample(sampler(address::repeat, filter::linear), coords);
+        case 4: return texture_array[tid].tex.sample(sampler(address::clamp_to_edge, filter::linear), coords);
+        case 5: return texture_array[tid].tex.sample(sampler(address::clamp_to_zero, filter::linear), coords);
+      }
+    }
+#endif
+    
     // texture2d
     template<>
     inline __attribute__((__always_inline__))
     float4 ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object_2D tex, float x, float y) const {
       const uint tid(tex);
       const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
       return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y));
+#else
+      return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_2d, tid, sid, float2(x, y));
+#endif
     }
     template<>
     inline __attribute__((__always_inline__))
     float ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object_2D tex, float x, float y) const {
       const uint tid(tex);
       const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
       return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y)).x;
+#else
+      return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_2d, tid, sid, float2(x, y)).x;
+#endif
     }
 
     // texture3d
@@ -57,14 +84,22 @@ class MetalKernelContext {
     float4 ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, float x, float y, float z) const {
       const uint tid(tex);
       const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
       return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z));
+#else
+      return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_3d, tid, sid, float3(x, y, z));
+#endif
     }
     template<>
     inline __attribute__((__always_inline__))
     float ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, float x, float y, float z) const {
       const uint tid(tex);
       const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
       return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z)).x;
+#else
+      return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_3d, tid, sid, float3(x, y, z)).x;
+#endif
     }
 #    include "kernel/device/gpu/image.h"
 
diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal
index 5646c7446db..8b69ee025cd 100644
--- a/intern/cycles/kernel/device/metal/kernel.metal
+++ b/intern/cycles/kernel/device/metal/kernel.metal
@@ -228,7 +228,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
   /* Always use baked shadow transparency for curves. */
   if (type & PRIMITIVE_CURVE) {
     float throughput = payload.throughput;
-    throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, u);
+    throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, type, u);
     payload.throughput = throughput;
     payload.num_hits += 1;
 
diff --git a/intern/cycles/kernel/device/oneapi/compat.h b/intern/cycles/kernel/device/oneapi/compat.h
index 8ae40b0612e..dfaec65130c 100644
--- a/intern/cycles/kernel/device/oneapi/compat.h
+++ b/intern/cycles/kernel/device/oneapi/compat.h
@@ -55,18 +55,6 @@
 #define ccl_gpu_kernel(block_num_threads, thread_num_registers)
 #define ccl_gpu_kernel_threads(block_num_threads)
 
-#ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
-#  define KG_ND_ITEMS \
-  kg->nd_item_local_id_0 = item.get_local_id(0); \
-  kg->nd_item_local_range_0 = item.get_local_range(0); \
-  kg->nd_item_group_0 = item.get_group(0); \
-  kg->nd_item_group_range_0 = item.get_group_range(0); \
-  kg->nd_item_global_id_0 = item.get_global_id(0); \
-  kg->nd_item_global_range_0 = item.get_global_range(0);
-#else
-# define KG_ND_ITEMS
-#endif
-
 #define ccl_gpu_kernel_signature(name, ...) \
 void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
                           size_t kernel_global_size, \
@@ -76,8 +64,7 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
       (kg); \
       cgh.parallel_for<class kernel_##name>( \
           sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
-          [=](sycl::nd_item<1> item) { \
-            KG_ND_ITEMS
+          [=](sycl::nd_item<1> item) {
 
 #define ccl_gpu_kernel_postfix \
           }); \
@@ -95,31 +82,17 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
   } ccl_gpu_kernel_lambda_pass((ONEAPIKernelContext *)kg)
 
 /* GPU thread, block, grid size and index */
-#ifndef WITH_ONEAPI_SYCL_HOST_ENABLED
-#  define ccl_gpu_thread_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_id(0))
-#  define ccl_gpu_block_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_range(0))
-#  define ccl_gpu_block_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group(0))
-#  define ccl_gpu_grid_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group_range(0))
-#  define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0])
-#  define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
-
-#  define ccl_gpu_global_id_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_id(0))
-#  define ccl_gpu_global_size_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_range(0))
-#else
-#  define ccl_gpu_thread_idx_x (kg->nd_item_local_id_0)
-#  define ccl_gpu_block_dim_x (kg->nd_item_local_range_0)
-#  define ccl_gpu_block_idx_x (kg->nd_item_group_0)
-#  define ccl_gpu_grid_dim_x (kg->nd_item_group_range_0)
-#  define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0])
-#  define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
-
-#  define ccl_gpu_global_id_x() (kg->nd_item_global_id_0)
-#  define ccl_gpu_global_size_x() (kg->nd_item_global_range_0)
-#endif
+#define ccl_gpu_thread_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_id(0))
+#define ccl_gpu_block_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_range(0))
+#define ccl_gpu_block_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group(0))
+#define ccl_gpu_grid_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group_range(0))
+#define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0])
+#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
 
+#define ccl_gpu_global_id_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_id(0))
+#define ccl_gpu_global_size_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_range(0))
 
 /* GPU warp synchronization */
-
 #define ccl_gpu_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier()
 #define ccl_gpu_local_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier(sycl::access::fence_space::local_space)
 #ifdef __SYCL_DEVICE_ONLY__
diff --git a/intern/cycles/kernel/device/oneapi/globals.h b/intern/cycles/kernel/device/oneapi/globals.h
index d60f4f135ba..116620eb725 100644
--- a/intern/cycles/kernel/device/oneapi/globals.h
+++ b/intern/cycles/kernel/device/oneapi/globals.h
@@ -23,15 +23,6 @@ typedef struct KernelGlobalsGPU {
 #undef KERNEL_DATA_ARRAY
   IntegratorStateGPU *integrator_state;
   const KernelData *__data;
-#ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
-  size_t nd_item_local_id_0;
-  size_t nd_item_local_range_0;
-  size_t nd_item_group_0;
-  size_t nd_item_group_range_0;
-
-  size_t nd_item_global_id_0;
-  size_t nd_item_global_range_0;
-#endif
 } KernelGlobalsGPU;
 
 typedef ccl_global KernelGlobalsGPU *ccl_restrict KernelGlobals;
diff --git a/intern/cycles/kernel/device/oneapi/kernel.cpp b/intern/cycles/kernel/device/oneapi/kernel.cpp
index 40e0b1f0b2b..525ae288f0c 100644
--- a/intern/cycles/kernel/device/oneapi/kernel.cpp
+++ b/intern/cycles/kernel/device/oneapi/kernel.cpp
@@ -8,7 +8,7 @@
 #  include <map>
 #  include <set>
 
-#  include <CL/sycl.hpp>
+#  include <sycl/sycl.hpp>
 
 #  include "kernel/device/oneapi/compat.h"
 #  include "kernel/device/oneapi/globals.h"
@@ -144,6 +144,10 @@ size_t oneapi_kernel_preferred_local_size(SyclQueue *queue,
 
 bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
 {
+#  ifdef SYCL_SKIP_KERNELS_PRELOAD
+  (void)queue_;
+  (void)requested_features;
+#  else
   assert(queue_);
   sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
 
@@ -175,7 +179,7 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
 
       sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle =
           sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
-      sycl::build(one_kernel_bundle, {queue->get_device()}, sycl::property::queue::in_order());
+      sycl::build(one_kernel_bundle);
     }
   }
   catch (sycl::exception const &e) {
@@ -184,7 +188,7 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
     }
     return false;
   }
-
+#  endif
   return true;
 }
 
@@ -226,13 +230,6 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
     /* NOTE(@nsirgien): As for now non-uniform work-groups don't work on most oneAPI devices,
      * we extend work size to fit uniformity requirements. */
     global_size = groups_count * local_size;
-
-#  ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
-    if (queue->get_device().is_host()) {
-      global_size = 1;
-      local_size = 1;
-    }
-#  endif
   }
 
   /* Let the compiler throw an error if there are any kernels missing in this implementation. */
diff --git a/intern/cycles/kernel/device/optix/bvh.h b/intern/cycles/kernel/device/optix/bvh.h
index fb9907709ce..6d81b44660c 100644
--- a/intern/cycles/kernel/device/optix/bvh.h
+++ b/intern/cycles/kernel/device/optix/bvh.h
@@ -202,7 +202,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
   /* Always use baked shadow transparency for curves. */
   if (type & PRIMITIVE_CURVE) {
     float throughput = __uint_as_float(optixGetPayload_1());
-    throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, u);
+    throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, type, u);
     optixSetPayload_1(__float_as_uint(throughput));
     optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1));
 
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index 8f7cfd19169..24c5a6a4540 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -490,6 +490,7 @@ enum PanoramaType {
   PANORAMA_FISHEYE_EQUISOLID = 2,
   PANORAMA_MIRRORBALL = 3,
   PANORAMA_FISHEYE_LENS_POLYNOMIAL = 4,
+  PANORAMA_EQUIANGULAR_CUBEMAP_FACE = 5,
 
   PANORAMA_NUM_TYPES,
 };