15 files changed, 201 insertions, 133 deletions
diff --git a/intern/cycles/kernel/device/cpu/compat.h b/intern/cycles/kernel/device/cpu/compat.h
index e1c20169582..3bfc37e98ee 100644
--- a/intern/cycles/kernel/device/cpu/compat.h
+++ b/intern/cycles/kernel/device/cpu/compat.h
@@ -35,20 +35,6 @@ CCL_NAMESPACE_BEGIN
 
 #define kernel_assert(cond) assert(cond)
 
-/* Texture types to be compatible with CUDA textures. These are really just
- * simple arrays and after inlining fetch hopefully revert to being a simple
- * pointer lookup. */
-template<typename T> struct texture {
-  ccl_always_inline const T &fetch(int index) const
-  {
-    kernel_assert(index >= 0 && index < width);
-    return data[index];
-  }
-
-  T *data;
-  int width;
-};
-
 /* Macros to handle different memory storage on different devices */
 
 #ifdef __KERNEL_SSE2__
diff --git a/intern/cycles/kernel/device/cpu/globals.h b/intern/cycles/kernel/device/cpu/globals.h
index 7e080d428ea..309afae412e 100644
--- a/intern/cycles/kernel/device/cpu/globals.h
+++ b/intern/cycles/kernel/device/cpu/globals.h
@@ -12,7 +12,7 @@
 CCL_NAMESPACE_BEGIN
 
 /* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in
- * the kernel, to access constant data. These are all stored as "textures", but
+ * the kernel, to access constant data. These are all stored as flat arrays.
  * these are really just standard arrays. We can't use actually globals because
  * multiple renders may be running inside the same process. */
 
@@ -22,11 +22,23 @@ struct OSLThreadData;
 struct OSLShadingSystem;
 #endif
 
+/* Array for kernel data, with size to be able to assert on invalid data access. */
+template<typename T> struct kernel_array {
+  ccl_always_inline const T &fetch(int index) const
+  {
+    kernel_assert(index >= 0 && index < width);
+    return data[index];
+  }
+
+  T *data;
+  int width;
+};
+
 typedef struct KernelGlobalsCPU {
-#define KERNEL_TEX(type, name) texture<type> name;
-#include "kernel/textures.h"
+#define KERNEL_DATA_ARRAY(type, name) kernel_array<type> name;
+#include "kernel/data_arrays.h"
 
-  KernelData __data;
+  KernelData data;
 
 #ifdef __OSL__
   /* On the CPU, we also have the OSL globals here. Most data structures are shared
@@ -44,8 +56,8 @@ typedef struct KernelGlobalsCPU {
 typedef const KernelGlobalsCPU *ccl_restrict KernelGlobals;
 
 /* Abstraction macros */
-#define kernel_tex_fetch(tex, index) (kg->tex.fetch(index))
-#define kernel_tex_array(tex) (kg->tex.data)
-#define kernel_data (kg->__data)
+#define kernel_data_fetch(name, index) (kg->name.fetch(index))
+#define kernel_data_array(name) (kg->name.data)
+#define kernel_data (kg->data)
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/cpu/image.h b/intern/cycles/kernel/device/cpu/image.h
index 3b714a3e580..320e6309128 100644
--- a/intern/cycles/kernel/device/cpu/image.h
+++ b/intern/cycles/kernel/device/cpu/image.h
@@ -733,7 +733,7 @@ template<typename TexT, typename OutT = float4> struct NanoVDBInterpolator {
 
 ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y)
 {
-  const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+  const TextureInfo &info = kernel_data_fetch(texture_info, id);
 
   if (UNLIKELY(!info.data)) {
     return zero_float4();
@@ -776,7 +776,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
                                              float3 P,
                                              InterpolationType interp)
 {
-  const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+  const TextureInfo &info = kernel_data_fetch(texture_info, id);
 
   if (UNLIKELY(!info.data)) {
     return zero_float4();
@@ -817,6 +817,16 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
     }
     case IMAGE_DATA_TYPE_NANOVDB_FLOAT3:
       return NanoVDBInterpolator<nanovdb::Vec3f>::interp_3d(info, P.x, P.y, P.z, interp);
+    case IMAGE_DATA_TYPE_NANOVDB_FPN: {
+      const float f = NanoVDBInterpolator<nanovdb::FpN, float>::interp_3d(
+          info, P.x, P.y, P.z, interp);
+      return make_float4(f, f, f, 1.0f);
+    }
+    case IMAGE_DATA_TYPE_NANOVDB_FP16: {
+      const float f = NanoVDBInterpolator<nanovdb::Fp16, float>::interp_3d(
+          info, P.x, P.y, P.z, interp);
+      return make_float4(f, f, f, 1.0f);
+    }
 #endif
     default:
       assert(0);
diff --git a/intern/cycles/kernel/device/cpu/kernel.cpp b/intern/cycles/kernel/device/cpu/kernel.cpp
index b12e3089378..01087c96dd6 100644
--- a/intern/cycles/kernel/device/cpu/kernel.cpp
+++ b/intern/cycles/kernel/device/cpu/kernel.cpp
@@ -53,8 +53,8 @@ CCL_NAMESPACE_BEGIN
 
 void kernel_const_copy(KernelGlobalsCPU *kg, const char *name, void *host, size_t)
 {
-  if (strcmp(name, "__data") == 0) {
-    kg->__data = *(KernelData *)host;
+  if (strcmp(name, "data") == 0) {
+    kg->data = *(KernelData *)host;
   }
   else {
     assert(0);
@@ -66,13 +66,13 @@ void kernel_global_memory_copy(KernelGlobalsCPU *kg, const char *name, void *mem
   if (0) {
   }
 
-#define KERNEL_TEX(type, tname) \
+#define KERNEL_DATA_ARRAY(type, tname) \
   else if (strcmp(name, #tname) == 0) \
   { \
     kg->tname.data = (type *)mem; \
     kg->tname.width = size; \
   }
-#include "kernel/textures.h"
+#include "kernel/data_arrays.h"
   else {
     assert(0);
   }
diff --git a/intern/cycles/kernel/device/cuda/globals.h b/intern/cycles/kernel/device/cuda/globals.h
index e77fcd2b424..f5f7bcf58ee 100644
--- a/intern/cycles/kernel/device/cuda/globals.h
+++ b/intern/cycles/kernel/device/cuda/globals.h
@@ -20,18 +20,24 @@ struct KernelGlobalsGPU {
 };
 typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
 
-/* Global scene data and textures */
-__constant__ KernelData __data;
-#define KERNEL_TEX(type, name) const __constant__ __device__ type *name;
-#include "kernel/textures.h"
+struct KernelParamsCUDA {
+  /* Global scene data and textures */
+  KernelData data;
+#define KERNEL_DATA_ARRAY(type, name) const type *name;
+#include "kernel/data_arrays.h"
+
+  /* Integrator state */
+  IntegratorStateGPU integrator_state;
+};
 
-/* Integrator state */
-__constant__ IntegratorStateGPU __integrator_state;
+#ifdef __KERNEL_GPU__
+__constant__ KernelParamsCUDA kernel_params;
+#endif
 
 /* Abstraction macros */
-#define kernel_data __data
-#define kernel_tex_fetch(t, index) t[(index)]
-#define kernel_tex_array(t) (t)
-#define kernel_integrator_state __integrator_state
+#define kernel_data kernel_params.data
+#define kernel_data_fetch(name, index) kernel_params.name[(index)]
+#define kernel_data_array(name) (kernel_params.name)
+#define kernel_integrator_state kernel_params.integrator_state
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/gpu/image.h b/intern/cycles/kernel/device/gpu/image.h
index c5bc7d88e02..a8c72645569 100644
--- a/intern/cycles/kernel/device/gpu/image.h
+++ b/intern/cycles/kernel/device/gpu/image.h
@@ -125,7 +125,8 @@ kernel_tex_image_interp_tricubic(ccl_global const TextureInfo &info, float x, fl
 
 #ifdef WITH_NANOVDB
 template<typename T, typename S>
-ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, float z)
+ccl_device typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_tricubic_nanovdb(
+    S &s, float x, float y, float z)
 {
   float px = floorf(x);
   float py = floorf(y);
@@ -157,7 +158,7 @@ ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, fl
 }
 
 template<typename T>
-ccl_device_noinline T kernel_tex_image_interp_nanovdb(
+ccl_device_noinline typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_nanovdb(
     ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
 {
   using namespace nanovdb;
@@ -180,7 +181,7 @@ ccl_device_noinline T kernel_tex_image_interp_nanovdb(
 
 ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y)
 {
-  ccl_global const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+  ccl_global const TextureInfo &info = kernel_data_fetch(texture_info, id);
 
   /* float4, byte4, ushort4 and half4 */
   const int texture_type = info.data_type;
@@ -215,7 +216,7 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
                                              float3 P,
                                              InterpolationType interp)
 {
-  ccl_global const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
+  ccl_global const TextureInfo &info = kernel_data_fetch(texture_info, id);
 
   if (info.use_transform_3d) {
     P = transform_point(&info.transform_3d, P);
@@ -238,6 +239,14 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
         info, x, y, z, interpolation);
     return make_float4(f[0], f[1], f[2], 1.0f);
   }
+  if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FPN) {
+    float f = kernel_tex_image_interp_nanovdb<nanovdb::FpN>(info, x, y, z, interpolation);
+    return make_float4(f, f, f, 1.0f);
+  }
+  if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FP16) {
+    float f = kernel_tex_image_interp_nanovdb<nanovdb::Fp16>(info, x, y, z, interpolation);
+    return make_float4(f, f, f, 1.0f);
+  }
 #endif
   if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
       texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) {
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index 82b51843864..d657571a5fa 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -241,7 +241,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
 }
 ccl_gpu_kernel_postfix
 
-#ifdef __KERNEL_METAL__
+#if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
 constant int __dummy_constant [[function_constant(0)]];
 #endif
 
@@ -256,7 +256,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
   if (global_index < work_size) {
     const int state = (path_index_array) ? path_index_array[global_index] : global_index;
 
-#ifdef __KERNEL_METAL__
+#if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
     KernelGlobals kg = NULL;
     /* Workaround Ambient Occlusion and Bevel nodes not working with Metal.
      * Dummy offset should not affect result, but somehow fixes bug! */
@@ -270,6 +270,21 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
 ccl_gpu_kernel_postfix
 
 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
+    ccl_gpu_kernel_signature(integrator_shade_surface_mnee,
+                             ccl_global const int *path_index_array,
+                             ccl_global float *render_buffer,
+                             const int work_size)
+{
+  const int global_index = ccl_gpu_global_id_x();
+
+  if (global_index < work_size) {
+    const int state = (path_index_array) ? path_index_array[global_index] : global_index;
+    ccl_gpu_kernel_call(integrator_shade_surface_mnee(NULL, state, render_buffer));
+  }
+}
+ccl_gpu_kernel_postfix
+
+ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
     ccl_gpu_kernel_signature(integrator_shade_volume,
                              ccl_global const int *path_index_array,
                              ccl_global float *render_buffer,
@@ -647,8 +662,9 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
     const int x = render_pixel_index % width; \
     const int y = render_pixel_index / width; \
 \
-    ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert.pass_stride + \
-                                     y * stride * kfilm_convert.pass_stride; \
+    const uint64_t buffer_pixel_index = x + y * stride; \
+    ccl_global const float *buffer = render_buffer + offset + \
+                                     buffer_pixel_index * kfilm_convert.pass_stride; \
 \
     ccl_global float *pixel = pixels + \
                               (render_pixel_index + rgba_offset) * kfilm_convert.pixel_stride; \
@@ -677,8 +693,9 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
     const int x = render_pixel_index % width; \
     const int y = render_pixel_index / width; \
 \
-    ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert.pass_stride + \
-                                     y * stride * kfilm_convert.pass_stride; \
+    const uint64_t buffer_pixel_index = x + y * stride; \
+    ccl_global const float *buffer = render_buffer + offset + \
+                                     buffer_pixel_index * kfilm_convert.pass_stride; \
 \
     float pixel[4]; \
     film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel); \
diff --git a/intern/cycles/kernel/device/hip/globals.h b/intern/cycles/kernel/device/hip/globals.h
index 50f117038a2..3a334b21a9e 100644
--- a/intern/cycles/kernel/device/hip/globals.h
+++ b/intern/cycles/kernel/device/hip/globals.h
@@ -20,18 +20,24 @@ struct KernelGlobalsGPU {
 };
 typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
 
-/* Global scene data and textures */
-__constant__ KernelData __data;
-#define KERNEL_TEX(type, name) __attribute__((used)) const __constant__ __device__ type *name;
-#include "kernel/textures.h"
+struct KernelParamsHIP {
+  /* Global scene data and textures */
+  KernelData data;
+#define KERNEL_DATA_ARRAY(type, name) const type *name;
+#include "kernel/data_arrays.h"
+
+  /* Integrator state */
+  IntegratorStateGPU integrator_state;
+};
 
-/* Integrator state */
-__constant__ IntegratorStateGPU __integrator_state;
+#ifdef __KERNEL_GPU__
+__constant__ KernelParamsHIP kernel_params;
+#endif
 
 /* Abstraction macros */
-#define kernel_data __data
-#define kernel_tex_fetch(t, index) t[(index)]
-#define kernel_tex_array(t) (t)
-#define kernel_integrator_state __integrator_state
+#define kernel_data kernel_params.data
+#define kernel_data_fetch(name, index) kernel_params.name[(index)]
+#define kernel_data_array(name) (kernel_params.name)
+#define kernel_integrator_state kernel_params.integrator_state
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index 4e309f16c08..0ed52074a90 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -29,10 +29,26 @@ using namespace metal::raytracing;
 
 /* Qualifiers */
 
-#define ccl_device
-#define ccl_device_inline ccl_device
-#define ccl_device_forceinline ccl_device
-#define ccl_device_noinline ccl_device __attribute__((noinline))
+#if defined(__KERNEL_METAL_APPLE__)
+
+/* Inline everything for Apple GPUs.
+ * This gives ~1.1x speedup and 10% spill reduction for integator_shade_surface
+ * at the cost of longer compile times (~4.5 minutes on M1 Max). */
+
+#  define ccl_device __attribute__((always_inline))
+#  define ccl_device_inline __attribute__((always_inline))
+#  define ccl_device_forceinline __attribute__((always_inline))
+#  define ccl_device_noinline __attribute__((always_inline))
+
+#else
+
+#  define ccl_device
+#  define ccl_device_inline ccl_device
+#  define ccl_device_forceinline ccl_device
+#  define ccl_device_noinline ccl_device __attribute__((noinline))
+
+#endif
+
 #define ccl_device_noinline_cpu ccl_device
 #define ccl_device_inline_method ccl_device
 #define ccl_global device
diff --git a/intern/cycles/kernel/device/metal/context_end.h b/intern/cycles/kernel/device/metal/context_end.h
index b4c8661c401..44ac0478266 100644
--- a/intern/cycles/kernel/device/metal/context_end.h
+++ b/intern/cycles/kernel/device/metal/context_end.h
@@ -7,4 +7,4 @@
 /* NOTE: These macros will need maintaining as entry-points change. */
 
 #undef kernel_integrator_state
-#define kernel_integrator_state context.launch_params_metal.__integrator_state
+#define kernel_integrator_state context.launch_params_metal.integrator_state
diff --git a/intern/cycles/kernel/device/metal/globals.h b/intern/cycles/kernel/device/metal/globals.h
index 1c3e775dbae..a336c096440 100644
--- a/intern/cycles/kernel/device/metal/globals.h
+++ b/intern/cycles/kernel/device/metal/globals.h
@@ -12,11 +12,11 @@ CCL_NAMESPACE_BEGIN
 
 typedef struct KernelParamsMetal {
 
-#define KERNEL_TEX(type, name) ccl_global const type *name;
-#include "kernel/textures.h"
-#undef KERNEL_TEX
+#define KERNEL_DATA_ARRAY(type, name) ccl_global const type *name;
+#include "kernel/data_arrays.h"
+#undef KERNEL_DATA_ARRAY
 
-  const IntegratorStateGPU __integrator_state;
+  const IntegratorStateGPU integrator_state;
   const KernelData data;
 
 } KernelParamsMetal;
@@ -27,12 +27,10 @@ typedef struct KernelGlobalsGPU {
 
 typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
 
+/* Abstraction macros */
 #define kernel_data launch_params_metal.data
-#define kernel_integrator_state launch_params_metal.__integrator_state
-
-/* data lookup defines */
-
-#define kernel_tex_fetch(tex, index) launch_params_metal.tex[index]
-#define kernel_tex_array(tex) launch_params_metal.tex
+#define kernel_data_fetch(name, index) launch_params_metal.name[index]
+#define kernel_data_array(name) launch_params_metal.name
+#define kernel_integrator_state launch_params_metal.integrator_state
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal
index a7252570e64..3c31dc3354c 100644
--- a/intern/cycles/kernel/device/metal/kernel.metal
+++ b/intern/cycles/kernel/device/metal/kernel.metal
@@ -59,7 +59,7 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
   TReturn result;
   
 #ifdef __BVH_LOCAL__
-  uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
+  uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
 
   if ((object != payload.local_object) || intersection_skip_self_local(payload.self, prim)) {
     /* Only intersect with matching object and skip self-intersecton. */
@@ -113,16 +113,16 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
   isect->t = ray_tmax;
   isect->prim = prim;
   isect->object = object;
-  isect->type = kernel_tex_fetch(__objects, object).primitive_type;
+  isect->type = kernel_data_fetch(objects, object).primitive_type;
 
   isect->u = 1.0f - barycentrics.y - barycentrics.x;
   isect->v = barycentrics.x;
 
   /* Record geometric normal */
-  const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect->prim).w;
-  const float3 tri_a = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0));
-  const float3 tri_b = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1));
-  const float3 tri_c = float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2));
+  const uint tri_vindex = kernel_data_fetch(tri_vindex, isect->prim).w;
+  const float3 tri_a = float3(kernel_data_fetch(tri_verts, tri_vindex + 0));
+  const float3 tri_b = float3(kernel_data_fetch(tri_verts, tri_vindex + 1));
+  const float3 tri_c = float3(kernel_data_fetch(tri_verts, tri_vindex + 2));
   payload.local_isect.Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
 
   /* Continue tracing (without this the trace call would return after the first hit) */
@@ -168,7 +168,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
 #ifdef __SHADOW_RECORD_ALL__
 #  ifdef __VISIBILITY_FLAG__
   const uint visibility = payload.visibility;
-  if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+  if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
     /* continue search */
     return true;
   }
@@ -184,14 +184,14 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
   if (intersection_type == METALRT_HIT_TRIANGLE) {
     u = 1.0f - barycentrics.y - barycentrics.x;
     v = barycentrics.x;
-    type = kernel_tex_fetch(__objects, object).primitive_type;
+    type = kernel_data_fetch(objects, object).primitive_type;
   }
 #  ifdef __HAIR__
   else {
     u = barycentrics.x;
     v = barycentrics.y;
     
-    const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+    const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
     type = segment.type;
     prim = segment.prim;
 
@@ -294,7 +294,7 @@ __anyhit__cycles_metalrt_shadow_all_hit_tri(constant KernelParamsMetal &launch_p
                                             float2 barycentrics [[barycentric_coord]],
                                             float ray_tmax [[distance]])
 {
-  uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
+  uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
 
   TriangleIntersectionResult result;
   result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>(
@@ -337,7 +337,7 @@ inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_pa
 
   uint visibility = payload.visibility;
 #  ifdef __VISIBILITY_FLAG__
-  if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+  if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
     result.accept = false;
     result.continue_search = true;
     return result;
@@ -377,12 +377,12 @@ __anyhit__cycles_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_
                                              unsigned int object [[user_instance_id]],
                                              unsigned int primitive_id [[primitive_id]])
 {
-  uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
+  uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
   TriangleIntersectionResult result = metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
             launch_params_metal, payload, object, prim, 0.0f);
   if (result.accept) {
     payload.prim = prim;
-    payload.type = kernel_tex_fetch(__objects, object).primitive_type;
+    payload.type = kernel_data_fetch(objects, object).primitive_type;
   }
   return result;
 }
@@ -414,7 +414,7 @@ void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
 {
 #  ifdef __VISIBILITY_FLAG__
   const uint visibility = payload.visibility;
-  if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+  if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
     return;
   }
 #  endif
@@ -495,8 +495,8 @@ __intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[b
                              const float3 ray_direction [[direction]],
                              const float ray_tmax [[max_distance]])
 {
-  uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
-  const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+  uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+  const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
 
   BoundingBoxIntersectionResult result;
   result.accept = false;
@@ -526,8 +526,8 @@ __intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_me
                                     const float3 ray_direction [[direction]],
                                     const float ray_tmax [[max_distance]])
 {
-  uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
-  const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+  uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+  const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
 
   BoundingBoxIntersectionResult result;
   result.accept = false;
@@ -557,8 +557,8 @@ __intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buff
                           const float3 ray_direction [[direction]],
                           const float ray_tmax [[max_distance]])
 {
-  uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
-  const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+  uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+  const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
     
   BoundingBoxIntersectionResult result;
   result.accept = false;
@@ -585,8 +585,8 @@ __intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal
                                  const float3 ray_direction [[direction]],
                                  const float ray_tmax [[max_distance]])
 {
-  uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
-  const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+  uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+  const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
 
   BoundingBoxIntersectionResult result;
   result.accept = false;
@@ -620,7 +620,7 @@ void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal,
 {
 #  ifdef __VISIBILITY_FLAG__
   const uint visibility = payload.visibility;
-  if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+  if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
     return;
   }
 #  endif
@@ -701,8 +701,8 @@ __intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1
                              const float3 ray_direction [[direction]],
                              const float ray_tmax [[max_distance]])
 {
-  const uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
-  const int type = kernel_tex_fetch(__objects, object).primitive_type;
+  const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+  const int type = kernel_data_fetch(objects, object).primitive_type;
 
   BoundingBoxIntersectionResult result;
   result.accept = false;
@@ -730,8 +730,8 @@ __intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[b
                                     const float3 ray_direction [[direction]],
                                     const float ray_tmax [[max_distance]])
 {
-  const uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object);
-  const int type = kernel_tex_fetch(__objects, object).primitive_type;
+  const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
+  const int type = kernel_data_fetch(objects, object).primitive_type;
 
   BoundingBoxIntersectionResult result;
   result.accept = false;
diff --git a/intern/cycles/kernel/device/optix/globals.h b/intern/cycles/kernel/device/optix/globals.h
index bb752c531f0..7af2e421378 100644
--- a/intern/cycles/kernel/device/optix/globals.h
+++ b/intern/cycles/kernel/device/optix/globals.h
@@ -28,21 +28,21 @@ struct KernelParamsOptiX {
 
   /* Global scene data and textures */
   KernelData data;
-#define KERNEL_TEX(type, name) const type *name;
-#include "kernel/textures.h"
+#define KERNEL_DATA_ARRAY(type, name) const type *name;
+#include "kernel/data_arrays.h"
 
   /* Integrator state */
-  IntegratorStateGPU __integrator_state;
+  IntegratorStateGPU integrator_state;
 };
 
 #ifdef __NVCC__
-extern "C" static __constant__ KernelParamsOptiX __params;
+extern "C" static __constant__ KernelParamsOptiX kernel_params;
 #endif
 
 /* Abstraction macros */
-#define kernel_data __params.data
-#define kernel_tex_array(t) __params.t
-#define kernel_tex_fetch(t, index) __params.t[(index)]
-#define kernel_integrator_state __params.__integrator_state
+#define kernel_data kernel_params.data
+#define kernel_data_array(name) kernel_params.name
+#define kernel_data_fetch(name, index) kernel_params.name[(index)]
+#define kernel_integrator_state kernel_params.integrator_state
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu
index 9843b2e99be..949bf41d171 100644
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -51,15 +51,15 @@ ccl_device_forceinline int get_object_id()
 extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest()
 {
   const int global_index = optixGetLaunchIndex().x;
-  const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
+  const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
                                                        global_index;
-  integrator_intersect_closest(nullptr, path_index, __params.render_buffer);
+  integrator_intersect_closest(nullptr, path_index, kernel_params.render_buffer);
 }
 
 extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow()
 {
   const int global_index = optixGetLaunchIndex().x;
-  const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
+  const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
                                                        global_index;
   integrator_intersect_shadow(nullptr, path_index);
 }
@@ -67,7 +67,7 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow()
 extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_subsurface()
 {
   const int global_index = optixGetLaunchIndex().x;
-  const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
+  const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
                                                        global_index;
   integrator_intersect_subsurface(nullptr, path_index);
 }
@@ -75,7 +75,7 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_subsurfac
 extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_volume_stack()
 {
   const int global_index = optixGetLaunchIndex().x;
-  const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
+  const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
                                                        global_index;
   integrator_intersect_volume_stack(nullptr, path_index);
 }
@@ -151,17 +151,17 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
   isect->t = optixGetRayTmax();
   isect->prim = prim;
   isect->object = get_object_id();
-  isect->type = kernel_tex_fetch(__objects, isect->object).primitive_type;
+  isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
 
   const float2 barycentrics = optixGetTriangleBarycentrics();
   isect->u = 1.0f - barycentrics.y - barycentrics.x;
   isect->v = barycentrics.x;
 
   /* Record geometric normal. */
-  const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w;
-  const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0);
-  const float3 tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1);
-  const float3 tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
+  const uint tri_vindex = kernel_data_fetch(tri_vindex, prim).w;
+  const float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0);
+  const float3 tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1);
+  const float3 tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
   local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
 
   /* Continue tracing (without this the trace call would return after the first hit). */
@@ -176,7 +176,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
   const uint object = get_object_id();
 #  ifdef __VISIBILITY_FLAG__
   const uint visibility = optixGetPayload_4();
-  if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+  if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
     return optixIgnoreIntersection();
   }
 #  endif
@@ -192,14 +192,14 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
     const float2 barycentrics = optixGetTriangleBarycentrics();
     u = 1.0f - barycentrics.y - barycentrics.x;
     v = barycentrics.x;
-    type = kernel_tex_fetch(__objects, object).primitive_type;
+    type = kernel_data_fetch(objects, object).primitive_type;
   }
 #  ifdef __HAIR__
   else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
     u = __uint_as_float(optixGetAttribute_0());
     v = __uint_as_float(optixGetAttribute_1());
 
-    const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+    const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
     type = segment.type;
     prim = segment.prim;
 
@@ -212,7 +212,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
   }
 #  endif
   else {
-    type = kernel_tex_fetch(__objects, object).primitive_type;
+    type = kernel_data_fetch(objects, object).primitive_type;
     u = 0.0f;
     v = 0.0f;
   }
@@ -307,12 +307,12 @@ extern "C" __global__ void __anyhit__kernel_optix_volume_test()
   const uint object = get_object_id();
 #ifdef __VISIBILITY_FLAG__
   const uint visibility = optixGetPayload_4();
-  if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+  if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
     return optixIgnoreIntersection();
   }
 #endif
 
-  if ((kernel_tex_fetch(__object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
+  if ((kernel_data_fetch(object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
     return optixIgnoreIntersection();
   }
 
@@ -340,7 +340,7 @@ extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
   const uint object = get_object_id();
   const uint visibility = optixGetPayload_4();
 #ifdef __VISIBILITY_FLAG__
-  if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+  if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
     return optixIgnoreIntersection();
   }
 #endif
@@ -377,10 +377,10 @@ extern "C" __global__ void __closesthit__kernel_optix_hit()
     optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x));
     optixSetPayload_2(__float_as_uint(barycentrics.x));
     optixSetPayload_3(prim);
-    optixSetPayload_5(kernel_tex_fetch(__objects, object).primitive_type);
+    optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
   }
   else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
-    const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
+    const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
     optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */
     optixSetPayload_2(optixGetAttribute_1());
     optixSetPayload_3(segment.prim);
@@ -390,7 +390,7 @@ extern "C" __global__ void __closesthit__kernel_optix_hit()
     optixSetPayload_1(0);
     optixSetPayload_2(0);
     optixSetPayload_3(prim);
-    optixSetPayload_5(kernel_tex_fetch(__objects, object).primitive_type);
+    optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
   }
 }
 
@@ -401,7 +401,7 @@ ccl_device_inline void optix_intersection_curve(const int prim, const int type)
 
 #  ifdef __VISIBILITY_FLAG__
   const uint visibility = optixGetPayload_4();
-  if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+  if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
     return;
   }
 #  endif
@@ -436,7 +436,7 @@ ccl_device_inline void optix_intersection_curve(const int prim, const int type)
 
 extern "C" __global__ void __intersection__curve_ribbon()
 {
-  const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, optixGetPrimitiveIndex());
+  const KernelCurveSegment segment = kernel_data_fetch(curve_segments, optixGetPrimitiveIndex());
   const int prim = segment.prim;
   const int type = segment.type;
   if (type & PRIMITIVE_CURVE_RIBBON) {
@@ -451,11 +451,11 @@ extern "C" __global__ void __intersection__point()
 {
   const int prim = optixGetPrimitiveIndex();
   const int object = get_object_id();
-  const int type = kernel_tex_fetch(__objects, object).primitive_type;
+  const int type = kernel_data_fetch(objects, object).primitive_type;
 
 #  ifdef __VISIBILITY_FLAG__
   const uint visibility = optixGetPayload_4();
-  if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
+  if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
     return;
   }
 #  endif
diff --git a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu
index e2c5d2ff024..41e6224f6da 100644
--- a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu
+++ b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu
@@ -11,7 +11,15 @@
 extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_raytrace()
 {
   const int global_index = optixGetLaunchIndex().x;
-  const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
+  const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
                                                        global_index;
-  integrator_shade_surface_raytrace(nullptr, path_index, __params.render_buffer);
+  integrator_shade_surface_raytrace(nullptr, path_index, kernel_params.render_buffer);
+}
+
+extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_mnee()
+{
+  const int global_index = optixGetLaunchIndex().x;
+  const int path_index = (kernel_params.path_index_array) ? kernel_params.path_index_array[global_index] :
+                                                       global_index;
+  integrator_shade_surface_mnee(nullptr, path_index, kernel_params.render_buffer);
 }