From 1df3b51988852fa8ee6b530a64aa23346db9acd4 Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brecht>
Date: Sun, 17 Oct 2021 16:10:10 +0200
Subject: Cycles: replace integrator state argument macros

* Rename struct KernelGlobals to struct KernelGlobalsCPU
* Add KernelGlobals, IntegratorState and ConstIntegratorState typedefs
  that every device can define in its own way.
* Remove INTEGRATOR_STATE_ARGS and INTEGRATOR_STATE_PASS macros and
  replace with these new typedefs.
* Add explicit state argument to INTEGRATOR_STATE and similar macros

In preparation for decoupling main and shadow paths.

Differential Revision: https://developer.blender.org/D12888
---
 intern/cycles/kernel/bvh/bvh.h                     |  10 +-
 intern/cycles/kernel/bvh/bvh_embree.h              |   8 +-
 intern/cycles/kernel/bvh/bvh_local.h               |   4 +-
 intern/cycles/kernel/bvh/bvh_nodes.h               |  10 +-
 intern/cycles/kernel/bvh/bvh_shadow_all.h          |   4 +-
 intern/cycles/kernel/bvh/bvh_traversal.h           |   4 +-
 intern/cycles/kernel/bvh/bvh_util.h                |  20 +--
 intern/cycles/kernel/bvh/bvh_volume.h              |   4 +-
 intern/cycles/kernel/bvh/bvh_volume_all.h          |   4 +-
 intern/cycles/kernel/closure/bsdf.h                |   8 +-
 .../cycles/kernel/closure/bsdf_hair_principled.h   |   8 +-
 intern/cycles/kernel/closure/bsdf_microfacet.h     |   8 +-
 .../cycles/kernel/closure/bsdf_microfacet_multi.h  |   4 +-
 intern/cycles/kernel/device/cpu/globals.h          |   6 +-
 intern/cycles/kernel/device/cpu/image.h            |   4 +-
 intern/cycles/kernel/device/cpu/kernel.cpp         |   4 +-
 intern/cycles/kernel/device/cpu/kernel.h           |  14 +-
 intern/cycles/kernel/device/cpu/kernel_arch.h      |  20 +--
 intern/cycles/kernel/device/cpu/kernel_arch_impl.h |  20 +--
 intern/cycles/kernel/device/cuda/globals.h         |   3 +-
 intern/cycles/kernel/device/gpu/image.h            |   4 +-
 intern/cycles/kernel/device/gpu/kernel.h           |  24 +--
 intern/cycles/kernel/device/optix/globals.h        |   3 +-
 intern/cycles/kernel/geom/geom_attribute.h         |  12 +-
 intern/cycles/kernel/geom/geom_curve.h             |  17 +-
 intern/cycles/kernel/geom/geom_curve_intersect.h   |   4 +-
 intern/cycles/kernel/geom/geom_motion_curve.h      |  15 +-
 intern/cycles/kernel/geom/geom_motion_triangle.h   |  15 +-
 .../kernel/geom/geom_motion_triangle_intersect.h   |   8 +-
 .../kernel/geom/geom_motion_triangle_shader.h      |   2 +-
 intern/cycles/kernel/geom/geom_object.h            |  89 +++++-----
 intern/cycles/kernel/geom/geom_patch.h             |  18 +-
 intern/cycles/kernel/geom/geom_primitive.h         |  32 ++--
 intern/cycles/kernel/geom/geom_shader_data.h       |  13 +-
 intern/cycles/kernel/geom/geom_subd_triangle.h     |  24 ++-
 intern/cycles/kernel/geom/geom_triangle.h          |  29 ++-
 .../cycles/kernel/geom/geom_triangle_intersect.h   |   8 +-
 intern/cycles/kernel/geom/geom_volume.h            |   4 +-
 .../kernel/integrator/integrator_init_from_bake.h  |  19 +-
 .../integrator/integrator_init_from_camera.h       |  15 +-
 .../integrator/integrator_intersect_closest.h      |  58 +++---
 .../integrator/integrator_intersect_shadow.h       |  42 ++---
 .../integrator/integrator_intersect_subsurface.h   |   4 +-
 .../integrator/integrator_intersect_volume_stack.h |  25 +--
 .../kernel/integrator/integrator_megakernel.h      |  31 ++--
 .../integrator/integrator_shade_background.h       |  74 ++++----
 .../kernel/integrator/integrator_shade_light.h     |  38 ++--
 .../kernel/integrator/integrator_shade_shadow.h    |  72 ++++----
 .../kernel/integrator/integrator_shade_surface.h   | 196 +++++++++++----------
 .../kernel/integrator/integrator_shade_volume.h    | 158 +++++++++--------
 intern/cycles/kernel/integrator/integrator_state.h |  69 +++-----
 .../kernel/integrator/integrator_state_flow.h      |  41 ++---
 .../kernel/integrator/integrator_state_util.h      | 170 +++++++++---------
 .../kernel/integrator/integrator_subsurface.h      |  62 +++----
 .../kernel/integrator/integrator_subsurface_disk.h |  17 +-
 .../integrator/integrator_subsurface_random_walk.h |  25 +--
 .../kernel/integrator/integrator_volume_stack.h    |  37 ++--
 intern/cycles/kernel/kernel_accumulate.h           | 150 ++++++++--------
 intern/cycles/kernel/kernel_adaptive_sampling.h    |  11 +-
 intern/cycles/kernel/kernel_bake.h                 |   8 +-
 intern/cycles/kernel/kernel_camera.h               |  16 +-
 intern/cycles/kernel/kernel_color.h                |   4 +-
 intern/cycles/kernel/kernel_emission.h             |  19 +-
 intern/cycles/kernel/kernel_id_passes.h            |   2 +-
 intern/cycles/kernel/kernel_jitter.h               |   7 +-
 intern/cycles/kernel/kernel_light.h                |  46 +++--
 intern/cycles/kernel/kernel_light_background.h     |  31 ++--
 intern/cycles/kernel/kernel_light_common.h         |   5 +-
 intern/cycles/kernel/kernel_lookup_table.h         |   7 +-
 intern/cycles/kernel/kernel_passes.h               |  59 ++++---
 intern/cycles/kernel/kernel_path_state.h           | 157 +++++++++--------
 intern/cycles/kernel/kernel_random.h               |   8 +-
 intern/cycles/kernel/kernel_shader.h               |  90 +++++-----
 intern/cycles/kernel/kernel_shadow_catcher.h       |  30 ++--
 intern/cycles/kernel/kernel_types.h                |  56 +++---
 intern/cycles/kernel/osl/osl_closures.cpp          |   2 +-
 intern/cycles/kernel/osl/osl_services.cpp          |  38 ++--
 intern/cycles/kernel/osl/osl_services.h            |   6 +-
 intern/cycles/kernel/osl/osl_shader.cpp            |  16 +-
 intern/cycles/kernel/osl/osl_shader.h              |  16 +-
 intern/cycles/kernel/svm/svm.h                     |  86 +++++----
 intern/cycles/kernel/svm/svm_ao.h                  |  18 +-
 intern/cycles/kernel/svm/svm_aov.h                 |  18 +-
 intern/cycles/kernel/svm/svm_attribute.h           |  11 +-
 intern/cycles/kernel/svm/svm_bevel.h               |  26 +--
 intern/cycles/kernel/svm/svm_blackbody.h           |   2 +-
 intern/cycles/kernel/svm/svm_brick.h               |   7 +-
 intern/cycles/kernel/svm/svm_bump.h                |   4 +-
 intern/cycles/kernel/svm/svm_camera.h              |   2 +-
 intern/cycles/kernel/svm/svm_checker.h             |   2 +-
 intern/cycles/kernel/svm/svm_clamp.h               |   2 +-
 intern/cycles/kernel/svm/svm_closure.h             |  40 +++--
 intern/cycles/kernel/svm/svm_convert.h             |   2 +-
 intern/cycles/kernel/svm/svm_displace.h            |  13 +-
 intern/cycles/kernel/svm/svm_geometry.h            |  12 +-
 intern/cycles/kernel/svm/svm_hsv.h                 |   2 +-
 intern/cycles/kernel/svm/svm_ies.h                 |   9 +-
 intern/cycles/kernel/svm/svm_image.h               |  14 +-
 intern/cycles/kernel/svm/svm_light_path.h          |  25 ++-
 intern/cycles/kernel/svm/svm_magic.h               |   7 +-
 intern/cycles/kernel/svm/svm_map_range.h           |   2 +-
 intern/cycles/kernel/svm/svm_mapping.h             |   6 +-
 intern/cycles/kernel/svm/svm_math.h                |   4 +-
 intern/cycles/kernel/svm/svm_mix.h                 |   2 +-
 intern/cycles/kernel/svm/svm_musgrave.h            |   2 +-
 intern/cycles/kernel/svm/svm_noisetex.h            |   2 +-
 intern/cycles/kernel/svm/svm_normal.h              |   2 +-
 intern/cycles/kernel/svm/svm_ramp.h                |  39 ++--
 intern/cycles/kernel/svm/svm_sepcomb_hsv.h         |   4 +-
 intern/cycles/kernel/svm/svm_sky.h                 |  13 +-
 intern/cycles/kernel/svm/svm_tex_coord.h           |  10 +-
 intern/cycles/kernel/svm/svm_value.h               |   4 +-
 intern/cycles/kernel/svm/svm_vector_transform.h    |   2 +-
 intern/cycles/kernel/svm/svm_vertex_color.h        |   6 +-
 intern/cycles/kernel/svm/svm_voronoi.h             |  11 +-
 intern/cycles/kernel/svm/svm_voxel.h               |   7 +-
 intern/cycles/kernel/svm/svm_wave.h                |   7 +-
 intern/cycles/kernel/svm/svm_wavelength.h          |   2 +-
 intern/cycles/kernel/svm/svm_white_noise.h         |   2 +-
 intern/cycles/kernel/svm/svm_wireframe.h           |   4 +-
 120 files changed, 1442 insertions(+), 1416 deletions(-)

(limited to 'intern/cycles/kernel')
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index a501cbe7a4b..bdbd574bf0f 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -154,7 +154,7 @@ ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray)
   return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
 }
 
-ccl_device_intersect bool scene_intersect(ccl_global const KernelGlobals *kg,
+ccl_device_intersect bool scene_intersect(KernelGlobals kg,
                                           ccl_private const Ray *ray,
                                           const uint visibility,
                                           ccl_private Intersection *isect)
@@ -248,7 +248,7 @@ ccl_device_intersect bool scene_intersect(ccl_global const KernelGlobals *kg,
 }
 
 #ifdef __BVH_LOCAL__
-ccl_device_intersect bool scene_intersect_local(ccl_global const KernelGlobals *kg,
+ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
                                                 ccl_private const Ray *ray,
                                                 ccl_private LocalIntersection *local_isect,
                                                 int local_object,
@@ -360,7 +360,7 @@ ccl_device_intersect bool scene_intersect_local(ccl_global const KernelGlobals *
 #endif
 
 #ifdef __SHADOW_RECORD_ALL__
-ccl_device_intersect bool scene_intersect_shadow_all(ccl_global const KernelGlobals *kg,
+ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
                                                      ccl_private const Ray *ray,
                                                      ccl_private Intersection *isect,
                                                      uint visibility,
@@ -448,7 +448,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(ccl_global const KernelGlob
 #endif /* __SHADOW_RECORD_ALL__ */
 
 #ifdef __VOLUME__
-ccl_device_intersect bool scene_intersect_volume(ccl_global const KernelGlobals *kg,
+ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
                                                  ccl_private const Ray *ray,
                                                  ccl_private Intersection *isect,
                                                  const uint visibility)
@@ -510,7 +510,7 @@ ccl_device_intersect bool scene_intersect_volume(ccl_global const KernelGlobals
 #endif /* __VOLUME__ */
 
 #ifdef __VOLUME_RECORD_ALL__
-ccl_device_intersect uint scene_intersect_volume_all(ccl_global const KernelGlobals *kg,
+ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg,
                                                      ccl_private const Ray *ray,
                                                      ccl_private Intersection *isect,
                                                      const uint max_hits,
diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h
index d3db6295ea5..7fa0cfdc510 100644
--- a/intern/cycles/kernel/bvh/bvh_embree.h
+++ b/intern/cycles/kernel/bvh/bvh_embree.h
@@ -35,7 +35,7 @@ struct CCLIntersectContext {
     RAY_VOLUME_ALL = 4,
   } RayType;
 
-  const KernelGlobals *kg;
+  KernelGlobals kg;
   RayType type;
 
   /* for shadow rays */
@@ -50,7 +50,7 @@ struct CCLIntersectContext {
   int local_object_id;
   uint *lcg_state;
 
-  CCLIntersectContext(const KernelGlobals *kg_, RayType type_)
+  CCLIntersectContext(KernelGlobals kg_, RayType type_)
   {
     kg = kg_;
     type = type_;
@@ -101,7 +101,7 @@ ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray,
   rayhit.hit.primID = RTC_INVALID_GEOMETRY_ID;
 }
 
-ccl_device_inline void kernel_embree_convert_hit(const KernelGlobals *kg,
+ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
                                                  const RTCRay *ray,
                                                  const RTCHit *hit,
                                                  Intersection *isect)
@@ -137,7 +137,7 @@ ccl_device_inline void kernel_embree_convert_hit(const KernelGlobals *kg,
 }
 
 ccl_device_inline void kernel_embree_convert_sss_hit(
-    const KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object)
+    KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object)
 {
   isect->u = 1.0f - hit->v - hit->u;
   isect->v = hit->u;
diff --git a/intern/cycles/kernel/bvh/bvh_local.h b/intern/cycles/kernel/bvh/bvh_local.h
index 78ad4a34da9..79cde69699e 100644
--- a/intern/cycles/kernel/bvh/bvh_local.h
+++ b/intern/cycles/kernel/bvh/bvh_local.h
@@ -36,7 +36,7 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-    bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+    bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
                                      ccl_private const Ray *ray,
                                      ccl_private LocalIntersection *local_isect,
                                      int local_object,
@@ -196,7 +196,7 @@ ccl_device_inline
   return false;
 }
 
-ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg,
                                          ccl_private const Ray *ray,
                                          ccl_private LocalIntersection *local_isect,
                                          int local_object,
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
index 49b37f39671..71122085f69 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -16,7 +16,7 @@
 
 // TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and
 // 3-vector which might be faster.
-ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(ccl_global const KernelGlobals *kg,
+ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals kg,
                                                                 int node_addr,
                                                                 int child)
 {
@@ -28,7 +28,7 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(ccl_global const
   return space;
 }
 
-ccl_device_forceinline int bvh_aligned_node_intersect(ccl_global const KernelGlobals *kg,
+ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
                                                       const float3 P,
                                                       const float3 idir,
                                                       const float t,
@@ -76,7 +76,7 @@ ccl_device_forceinline int bvh_aligned_node_intersect(ccl_global const KernelGlo
 #endif
 }
 
-ccl_device_forceinline bool bvh_unaligned_node_intersect_child(ccl_global const KernelGlobals *kg,
+ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg,
                                                                const float3 P,
                                                                const float3 dir,
                                                                const float t,
@@ -102,7 +102,7 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(ccl_global const
   return tnear <= tfar;
 }
 
-ccl_device_forceinline int bvh_unaligned_node_intersect(ccl_global const KernelGlobals *kg,
+ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
                                                         const float3 P,
                                                         const float3 dir,
                                                         const float3 idir,
@@ -134,7 +134,7 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(ccl_global const KernelG
   return mask;
 }
 
-ccl_device_forceinline int bvh_node_intersect(ccl_global const KernelGlobals *kg,
+ccl_device_forceinline int bvh_node_intersect(KernelGlobals kg,
                                               const float3 P,
                                               const float3 dir,
                                               const float3 idir,
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index 7e2edd2684c..42ab9eda37e 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -36,7 +36,7 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-    bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+    bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
                                      ccl_private const Ray *ray,
                                      ccl_private Intersection *isect_array,
                                      const uint visibility,
@@ -298,7 +298,7 @@ ccl_device_inline
   return false;
 }
 
-ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg,
                                          ccl_private const Ray *ray,
                                          ccl_private Intersection *isect_array,
                                          const uint visibility,
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index 9f271a4730c..1c17ebf767f 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -31,7 +31,7 @@
  * BVH_MOTION: motion blur rendering
  */
 
-ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
                                                      ccl_private const Ray *ray,
                                                      ccl_private Intersection *isect,
                                                      const uint visibility)
@@ -228,7 +228,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlob
   return (isect->prim != PRIM_NONE);
 }
 
-ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg,
                                          ccl_private const Ray *ray,
                                          ccl_private Intersection *isect,
                                          const uint visibility)
diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h
index 31aae389da0..d45eeec4815 100644
--- a/intern/cycles/kernel/bvh/bvh_util.h
+++ b/intern/cycles/kernel/bvh/bvh_util.h
@@ -139,8 +139,9 @@ ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection *
 
 /* Utility to quickly get flags from an intersection. */
 
-ccl_device_forceinline int intersection_get_shader_flags(
-    ccl_global const KernelGlobals *ccl_restrict kg, const int prim, const int type)
+ccl_device_forceinline int intersection_get_shader_flags(KernelGlobals kg,
+                                                         const int prim,
+                                                         const int type)
 {
   int shader = 0;
 
@@ -159,8 +160,9 @@ ccl_device_forceinline int intersection_get_shader_flags(
   return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
 }
 
-ccl_device_forceinline int intersection_get_shader_from_isect_prim(
-    ccl_global const KernelGlobals *ccl_restrict kg, const int prim, const int isect_type)
+ccl_device_forceinline int intersection_get_shader_from_isect_prim(KernelGlobals kg,
+                                                                   const int prim,
+                                                                   const int isect_type)
 {
   int shader = 0;
 
@@ -179,23 +181,21 @@ ccl_device_forceinline int intersection_get_shader_from_isect_prim(
   return shader & SHADER_MASK;
 }
 
-ccl_device_forceinline int intersection_get_shader(ccl_global const KernelGlobals *ccl_restrict kg,
-                                                   ccl_private const Intersection *ccl_restrict
-                                                       isect)
+ccl_device_forceinline int intersection_get_shader(
+    KernelGlobals kg, ccl_private const Intersection *ccl_restrict isect)
 {
   return intersection_get_shader_from_isect_prim(kg, isect->prim, isect->type);
 }
 
 ccl_device_forceinline int intersection_get_object_flags(
-    ccl_global const KernelGlobals *ccl_restrict kg,
-    ccl_private const Intersection *ccl_restrict isect)
+    KernelGlobals kg, ccl_private const Intersection *ccl_restrict isect)
 {
   return kernel_tex_fetch(__object_flag, isect->object);
 }
 
 /* TODO: find a better (faster) solution for this. Maybe store offset per object for
  * attributes needed in intersection? */
-ccl_device_inline int intersection_find_attribute(ccl_global const KernelGlobals *kg,
+ccl_device_inline int intersection_find_attribute(KernelGlobals kg,
                                                   const int object,
                                                   const uint id)
 {
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index d3bfce2d96b..fa56bd02bef 100644
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -35,7 +35,7 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-    bool BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+    bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
                                      ccl_private const Ray *ray,
                                      ccl_private Intersection *isect,
                                      const uint visibility)
@@ -221,7 +221,7 @@ ccl_device_inline
   return (isect->prim != PRIM_NONE);
 }
 
-ccl_device_inline bool BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg,
                                          ccl_private const Ray *ray,
                                          ccl_private Intersection *isect,
                                          const uint visibility)
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index f0fe95924cf..1d7d942e736 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -35,7 +35,7 @@ ccl_device
 #else
 ccl_device_inline
 #endif
-    uint BVH_FUNCTION_FULL_NAME(BVH)(ccl_global const KernelGlobals *kg,
+    uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
                                      ccl_private const Ray *ray,
                                      Intersection *isect_array,
                                      const uint max_hits,
@@ -289,7 +289,7 @@ ccl_device_inline
   return num_hits;
 }
 
-ccl_device_inline uint BVH_FUNCTION_NAME(ccl_global const KernelGlobals *kg,
+ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals kg,
                                          ccl_private const Ray *ray,
                                          Intersection *isect_array,
                                          const uint max_hits,
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index e115bef3170..28c889f2841 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -111,7 +111,7 @@ ccl_device_inline float shift_cos_in(float cos_in, const float frequency_multipl
   return val;
 }
 
-ccl_device_inline int bsdf_sample(ccl_global const KernelGlobals *kg,
+ccl_device_inline int bsdf_sample(KernelGlobals kg,
                                   ccl_private ShaderData *sd,
                                   ccl_private const ShaderClosure *sc,
                                   float randu,
@@ -467,7 +467,7 @@ ccl_device
 ccl_device_inline
 #endif
     float3
-    bsdf_eval(ccl_global const KernelGlobals *kg,
+    bsdf_eval(KernelGlobals kg,
               ccl_private ShaderData *sd,
               ccl_private const ShaderClosure *sc,
               const float3 omega_in,
@@ -652,9 +652,7 @@ ccl_device_inline
   return eval;
 }
 
-ccl_device void bsdf_blur(ccl_global const KernelGlobals *kg,
-                          ccl_private ShaderClosure *sc,
-                          float roughness)
+ccl_device void bsdf_blur(KernelGlobals kg, ccl_private ShaderClosure *sc, float roughness)
 {
   /* TODO: do we want to blur volume closures? */
 #ifdef __SVM__
diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h
index 17097b0739b..a474c5661b3 100644
--- a/intern/cycles/kernel/closure/bsdf_hair_principled.h
+++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h
@@ -180,7 +180,7 @@ ccl_device_inline float longitudinal_scattering(
 }
 
 /* Combine the three values using their luminances. */
-ccl_device_inline float4 combine_with_energy(ccl_global const KernelGlobals *kg, float3 c)
+ccl_device_inline float4 combine_with_energy(KernelGlobals kg, float3 c)
 {
   return make_float4(c.x, c.y, c.z, linear_rgb_to_gray(kg, c));
 }
@@ -229,7 +229,7 @@ ccl_device int bsdf_principled_hair_setup(ccl_private ShaderData *sd,
 #endif /* __HAIR__ */
 
 /* Given the Fresnel term and transmittance, generate the attenuation terms for each bounce. */
-ccl_device_inline void hair_attenuation(ccl_global const KernelGlobals *kg,
+ccl_device_inline void hair_attenuation(KernelGlobals kg,
                                         float f,
                                         float3 T,
                                         ccl_private float4 *Ap)
@@ -281,7 +281,7 @@ ccl_device_inline void hair_alpha_angles(float sin_theta_i,
 }
 
 /* Evaluation function for our shader. */
-ccl_device float3 bsdf_principled_hair_eval(ccl_global const KernelGlobals *kg,
+ccl_device float3 bsdf_principled_hair_eval(KernelGlobals kg,
                                             ccl_private const ShaderData *sd,
                                             ccl_private const ShaderClosure *sc,
                                             const float3 omega_in,
@@ -359,7 +359,7 @@ ccl_device float3 bsdf_principled_hair_eval(ccl_global const KernelGlobals *kg,
 }
 
 /* Sampling function for the hair shader. */
-ccl_device int bsdf_principled_hair_sample(ccl_global const KernelGlobals *kg,
+ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,
                                            ccl_private const ShaderClosure *sc,
                                            ccl_private ShaderData *sd,
                                            float randu,
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 41c35867a6b..a4e1b7a491c 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -55,7 +55,7 @@ static_assert(sizeof(ShaderClosure) >= sizeof(MicrofacetBsdf), "MicrofacetBsdf i
 
 /* Beckmann and GGX microfacet importance sampling. */
 
-ccl_device_inline void microfacet_beckmann_sample_slopes(ccl_global const KernelGlobals *kg,
+ccl_device_inline void microfacet_beckmann_sample_slopes(KernelGlobals kg,
                                                          const float cos_theta_i,
                                                          const float sin_theta_i,
                                                          float randu,
@@ -195,7 +195,7 @@ ccl_device_inline void microfacet_ggx_sample_slopes(const float cos_theta_i,
   *slope_y = S * z * safe_sqrtf(1.0f + (*slope_x) * (*slope_x));
 }
 
-ccl_device_forceinline float3 microfacet_sample_stretched(ccl_global const KernelGlobals *kg,
+ccl_device_forceinline float3 microfacet_sample_stretched(KernelGlobals kg,
                                                           const float3 omega_i,
                                                           const float alpha_x,
                                                           const float alpha_y,
@@ -549,7 +549,7 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(ccl_private const ShaderClos
   return make_float3(out, out, out);
 }
 
-ccl_device int bsdf_microfacet_ggx_sample(ccl_global const KernelGlobals *kg,
+ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals kg,
                                           ccl_private const ShaderClosure *sc,
                                           float3 Ng,
                                           float3 I,
@@ -977,7 +977,7 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(ccl_private const Shade
   return make_float3(out, out, out);
 }
 
-ccl_device int bsdf_microfacet_beckmann_sample(ccl_global const KernelGlobals *kg,
+ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals kg,
                                                ccl_private const ShaderClosure *sc,
                                                float3 Ng,
                                                float3 I,
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
index 6ee1139ddbb..b7bd7faaa54 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
@@ -476,7 +476,7 @@ ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(ccl_private const Shade
                         bsdf->extra->cspec0);
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_sample(ccl_global const KernelGlobals *kg,
+ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals kg,
                                                 ccl_private const ShaderClosure *sc,
                                                 float3 Ng,
                                                 float3 I,
@@ -639,7 +639,7 @@ ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(ccl_private const
                        bsdf->extra->cspec0);
 }
 
-ccl_device int bsdf_microfacet_multi_ggx_glass_sample(ccl_global const KernelGlobals *kg,
+ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals kg,
                                                       ccl_private const ShaderClosure *sc,
                                                       float3 Ng,
                                                       float3 I,
diff --git a/intern/cycles/kernel/device/cpu/globals.h b/intern/cycles/kernel/device/cpu/globals.h
index 98b036e269d..fb9aae38cfc 100644
--- a/intern/cycles/kernel/device/cpu/globals.h
+++ b/intern/cycles/kernel/device/cpu/globals.h
@@ -34,7 +34,7 @@ struct OSLThreadData;
 struct OSLShadingSystem;
 #endif
 
-typedef struct KernelGlobals {
+typedef struct KernelGlobalsCPU {
 #define KERNEL_TEX(type, name) texture<type> name;
 #include "kernel/kernel_textures.h"
 
@@ -51,7 +51,9 @@ typedef struct KernelGlobals {
   /* **** Run-time data ****  */
 
   ProfilingState profiler;
-} KernelGlobals;
+} KernelGlobalsCPU;
+
+typedef const KernelGlobalsCPU *ccl_restrict KernelGlobals;
 
 /* Abstraction macros */
 #define kernel_tex_fetch(tex, index) (kg->tex.fetch(index))
diff --git a/intern/cycles/kernel/device/cpu/image.h b/intern/cycles/kernel/device/cpu/image.h
index 57e81ab186d..44c5d7ef065 100644
--- a/intern/cycles/kernel/device/cpu/image.h
+++ b/intern/cycles/kernel/device/cpu/image.h
@@ -583,7 +583,7 @@ template<typename T> struct NanoVDBInterpolator {
 
 #undef SET_CUBIC_SPLINE_WEIGHTS
 
-ccl_device float4 kernel_tex_image_interp(const KernelGlobals *kg, int id, float x, float y)
+ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y)
 {
   const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
 
@@ -611,7 +611,7 @@ ccl_device float4 kernel_tex_image_interp(const KernelGlobals *kg, int id, float
   }
 }
 
-ccl_device float4 kernel_tex_image_interp_3d(const KernelGlobals *kg,
+ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
                                              int id,
                                              float3 P,
                                              InterpolationType interp)
diff --git a/intern/cycles/kernel/device/cpu/kernel.cpp b/intern/cycles/kernel/device/cpu/kernel.cpp
index ac1cdf5fffe..8519b77aa08 100644
--- a/intern/cycles/kernel/device/cpu/kernel.cpp
+++ b/intern/cycles/kernel/device/cpu/kernel.cpp
@@ -64,7 +64,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Memory Copy */
 
-void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t)
+void kernel_const_copy(KernelGlobalsCPU *kg, const char *name, void *host, size_t)
 {
   if (strcmp(name, "__data") == 0) {
     kg->__data = *(KernelData *)host;
@@ -74,7 +74,7 @@ void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t)
   }
 }
 
-void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size)
+void kernel_global_memory_copy(KernelGlobalsCPU *kg, const char *name, void *mem, size_t size)
 {
   if (0) {
   }
diff --git a/intern/cycles/kernel/device/cpu/kernel.h b/intern/cycles/kernel/device/cpu/kernel.h
index ae2a841835a..28337a58898 100644
--- a/intern/cycles/kernel/device/cpu/kernel.h
+++ b/intern/cycles/kernel/device/cpu/kernel.h
@@ -29,17 +29,17 @@ CCL_NAMESPACE_BEGIN
 #define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name)
 
 struct IntegratorStateCPU;
-struct KernelGlobals;
+struct KernelGlobalsCPU;
 struct KernelData;
 
-KernelGlobals *kernel_globals_create();
-void kernel_globals_free(KernelGlobals *kg);
+KernelGlobalsCPU *kernel_globals_create();
+void kernel_globals_free(KernelGlobalsCPU *kg);
 
-void *kernel_osl_memory(const KernelGlobals *kg);
-bool kernel_osl_use(const KernelGlobals *kg);
+void *kernel_osl_memory(const KernelGlobalsCPU *kg);
+bool kernel_osl_use(const KernelGlobalsCPU *kg);
 
-void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size);
-void kernel_global_memory_copy(KernelGlobals *kg, const char *name, void *mem, size_t size);
+void kernel_const_copy(KernelGlobalsCPU *kg, const char *name, void *host, size_t size);
+void kernel_global_memory_copy(KernelGlobalsCPU *kg, const char *name, void *mem, size_t size);
 
 #define KERNEL_ARCH cpu
 #include "kernel/device/cpu/kernel_arch.h"
diff --git a/intern/cycles/kernel/device/cpu/kernel_arch.h b/intern/cycles/kernel/device/cpu/kernel_arch.h
index 8b7b0ec0548..ae7fab65100 100644
--- a/intern/cycles/kernel/device/cpu/kernel_arch.h
+++ b/intern/cycles/kernel/device/cpu/kernel_arch.h
@@ -21,16 +21,16 @@
  */
 
 #define KERNEL_INTEGRATOR_FUNCTION(name) \
-  void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobals *ccl_restrict kg, \
+  void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *ccl_restrict kg, \
                                                     IntegratorStateCPU *state)
 
 #define KERNEL_INTEGRATOR_SHADE_FUNCTION(name) \
-  void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobals *ccl_restrict kg, \
+  void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *ccl_restrict kg, \
                                                     IntegratorStateCPU *state, \
                                                     ccl_global float *render_buffer)
 
 #define KERNEL_INTEGRATOR_INIT_FUNCTION(name) \
-  bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobals *ccl_restrict kg, \
+  bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *ccl_restrict kg, \
                                                     IntegratorStateCPU *state, \
                                                     KernelWorkTile *tile, \
                                                     ccl_global float *render_buffer)
@@ -56,11 +56,11 @@ KERNEL_INTEGRATOR_SHADE_FUNCTION(megakernel);
  * Shader evaluation.
  */
 
-void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobals *kg,
+void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobalsCPU *kg,
                                                        const KernelShaderEvalInput *input,
                                                        float *output,
                                                        const int offset);
-void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobals *kg,
+void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobalsCPU *kg,
                                                      const KernelShaderEvalInput *input,
                                                      float *output,
                                                      const int offset);
@@ -70,7 +70,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobals *kg,
  */
 
 bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)(
-    const KernelGlobals *kg,
+    const KernelGlobalsCPU *kg,
     ccl_global float *render_buffer,
     int x,
     int y,
@@ -79,14 +79,14 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)(
     int offset,
     int stride);
 
-void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobals *kg,
+void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobalsCPU *kg,
                                                            ccl_global float *render_buffer,
                                                            int y,
                                                            int start_x,
                                                            int width,
                                                            int offset,
                                                            int stride);
-void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobals *kg,
+void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCPU *kg,
                                                            ccl_global float *render_buffer,
                                                            int x,
                                                            int start_y,
@@ -98,7 +98,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobals *
  * Cryptomatte.
  */
 
-void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobals *kg,
+void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU *kg,
                                                         ccl_global float *render_buffer,
                                                         int pixel_index);
 
@@ -108,6 +108,6 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobals *kg,
 /* TODO(sergey): Needs to be re-implemented. Or not? Brecht did it already :) */
 
 void KERNEL_FUNCTION_FULL_NAME(bake)(
-    const KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride);
+    const KernelGlobalsCPU *kg, float *buffer, int sample, int x, int y, int offset, int stride);
 
 #undef KERNEL_ARCH
diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
index 23e371f165f..bf8667ac045 100644
--- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
+++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
@@ -70,7 +70,7 @@ CCL_NAMESPACE_BEGIN
 #endif
 
 #define DEFINE_INTEGRATOR_KERNEL(name) \
-  void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobals *kg, \
+  void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \
                                                     IntegratorStateCPU *state) \
   { \
     KERNEL_INVOKE(name, kg, state); \
@@ -78,7 +78,7 @@ CCL_NAMESPACE_BEGIN
 
 #define DEFINE_INTEGRATOR_SHADE_KERNEL(name) \
   void KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \
-      const KernelGlobals *kg, IntegratorStateCPU *state, ccl_global float *render_buffer) \
+      const KernelGlobalsCPU *kg, IntegratorStateCPU *state, ccl_global float *render_buffer) \
   { \
     KERNEL_INVOKE(name, kg, state, render_buffer); \
   }
@@ -86,7 +86,7 @@ CCL_NAMESPACE_BEGIN
 /* TODO: Either use something like get_work_pixel(), or simplify tile which is passed here, so
  * that it does not contain unused fields. */
 #define DEFINE_INTEGRATOR_INIT_KERNEL(name) \
-  bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobals *kg, \
+  bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \
                                                     IntegratorStateCPU *state, \
                                                     KernelWorkTile *tile, \
                                                     ccl_global float *render_buffer) \
@@ -112,7 +112,7 @@ DEFINE_INTEGRATOR_SHADE_KERNEL(megakernel)
  * Shader evaluation.
  */
 
-void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobals *kg,
+void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobalsCPU *kg,
                                                      const KernelShaderEvalInput *input,
                                                      float *output,
                                                      const int offset)
@@ -124,7 +124,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobals *kg,
 #endif
 }
 
-void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobals *kg,
+void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobalsCPU *kg,
                                                        const KernelShaderEvalInput *input,
                                                        float *output,
                                                        const int offset)
@@ -141,7 +141,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobals *kg,
  */
 
 bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)(
-    const KernelGlobals *kg,
+    const KernelGlobalsCPU *kg,
     ccl_global float *render_buffer,
     int x,
     int y,
@@ -159,7 +159,7 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)(
 #endif
 }
 
-void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobals *kg,
+void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobalsCPU *kg,
                                                            ccl_global float *render_buffer,
                                                            int y,
                                                            int start_x,
@@ -174,7 +174,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobals *
 #endif
 }
 
-void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobals *kg,
+void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCPU *kg,
                                                            ccl_global float *render_buffer,
                                                            int x,
                                                            int start_y,
@@ -193,7 +193,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobals *
  * Cryptomatte.
  */
 
-void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobals *kg,
+void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU *kg,
                                                         ccl_global float *render_buffer,
                                                         int pixel_index)
 {
@@ -210,7 +210,7 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobals *kg,
 /* TODO(sergey): Needs to be re-implemented. Or not? Brecht did it already :) */
 
 void KERNEL_FUNCTION_FULL_NAME(bake)(
-    const KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride)
+    const KernelGlobalsCPU *kg, float *buffer, int sample, int x, int y, int offset, int stride)
 {
 #if 0
 #  ifdef KERNEL_STUB
diff --git a/intern/cycles/kernel/device/cuda/globals.h b/intern/cycles/kernel/device/cuda/globals.h
index 169047175f5..2c187cf8a23 100644
--- a/intern/cycles/kernel/device/cuda/globals.h
+++ b/intern/cycles/kernel/device/cuda/globals.h
@@ -27,9 +27,10 @@ CCL_NAMESPACE_BEGIN
 
 /* Not actually used, just a NULL pointer that gets passed everywhere, which we
  * hope gets optimized out by the compiler. */
-struct KernelGlobals {
+struct KernelGlobalsGPU {
   int unused[1];
 };
+typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
 
 /* Global scene data and textures */
 __constant__ KernelData __data;
diff --git a/intern/cycles/kernel/device/gpu/image.h b/intern/cycles/kernel/device/gpu/image.h
index b015c78a8f5..95a37c693ae 100644
--- a/intern/cycles/kernel/device/gpu/image.h
+++ b/intern/cycles/kernel/device/gpu/image.h
@@ -189,7 +189,7 @@ ccl_device_noinline T kernel_tex_image_interp_nanovdb(
 }
 #endif
 
-ccl_device float4 kernel_tex_image_interp(const KernelGlobals *kg, int id, float x, float y)
+ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y)
 {
   const TextureInfo &info = kernel_tex_fetch(__texture_info, id);
 
@@ -221,7 +221,7 @@ ccl_device float4 kernel_tex_image_interp(const KernelGlobals *kg, int id, float
   }
 }
 
-ccl_device float4 kernel_tex_image_interp_3d(const KernelGlobals *kg,
+ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
                                              int id,
                                              float3 P,
                                              InterpolationType interp)
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index 21901215757..56beaf1fd91 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -51,8 +51,8 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
   const int state = ccl_gpu_global_id_x();
 
   if (state < num_states) {
-    INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0;
-    INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0;
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
   }
 }
 
@@ -244,7 +244,7 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_B
 {
   gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>(
       num_states, indices, num_indices, [kernel](const int state) {
-        return (INTEGRATOR_STATE(path, queued_kernel) == kernel);
+        return (INTEGRATOR_STATE(state, path, queued_kernel) == kernel);
       });
 }
 
@@ -256,7 +256,7 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_B
 {
   gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>(
       num_states, indices, num_indices, [kernel](const int state) {
-        return (INTEGRATOR_STATE(shadow_path, queued_kernel) == kernel);
+        return (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == kernel);
       });
 }
 
@@ -265,8 +265,8 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_B
 {
   gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>(
       num_states, indices, num_indices, [](const int state) {
-        return (INTEGRATOR_STATE(path, queued_kernel) != 0) ||
-               (INTEGRATOR_STATE(shadow_path, queued_kernel) != 0);
+        return (INTEGRATOR_STATE(state, path, queued_kernel) != 0) ||
+               (INTEGRATOR_STATE(state, shadow_path, queued_kernel) != 0);
       });
 }
 
@@ -278,8 +278,8 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_B
 {
   gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>(
       num_states, indices + indices_offset, num_indices, [](const int state) {
-        return (INTEGRATOR_STATE(path, queued_kernel) == 0) &&
-               (INTEGRATOR_STATE(shadow_path, queued_kernel) == 0);
+        return (INTEGRATOR_STATE(state, path, queued_kernel) == 0) &&
+               (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0);
       });
 }
 
@@ -289,8 +289,8 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_SORTED_INDEX_DEFAULT_B
 {
   gpu_parallel_sorted_index_array<GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE>(
       num_states, indices, num_indices, key_prefix_sum, [kernel](const int state) {
-        return (INTEGRATOR_STATE(path, queued_kernel) == kernel) ?
-                   INTEGRATOR_STATE(path, shader_sort_key) :
+        return (INTEGRATOR_STATE(state, path, queued_kernel) == kernel) ?
+                   INTEGRATOR_STATE(state, path, shader_sort_key) :
                    GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY;
       });
 }
@@ -304,8 +304,8 @@ extern "C" __global__ void __launch_bounds__(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_B
   gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>(
       num_states, indices, num_indices, [num_active_paths](const int state) {
         return (state >= num_active_paths) &&
-               ((INTEGRATOR_STATE(path, queued_kernel) != 0) ||
-                (INTEGRATOR_STATE(shadow_path, queued_kernel) != 0));
+               ((INTEGRATOR_STATE(state, path, queued_kernel) != 0) ||
+                (INTEGRATOR_STATE(state, shadow_path, queued_kernel) != 0));
       });
 }
 
diff --git a/intern/cycles/kernel/device/optix/globals.h b/intern/cycles/kernel/device/optix/globals.h
index 7d898ed5d91..7b8ebfe50e6 100644
--- a/intern/cycles/kernel/device/optix/globals.h
+++ b/intern/cycles/kernel/device/optix/globals.h
@@ -27,9 +27,10 @@ CCL_NAMESPACE_BEGIN
 
 /* Not actually used, just a NULL pointer that gets passed everywhere, which we
  * hope gets optimized out by the compiler. */
-struct KernelGlobals {
+struct KernelGlobalsGPU {
   int unused[1];
 };
+typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
 
 /* Launch parameters */
 struct KernelParamsOptiX {
diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h
index 850ac44e6e0..848e0430caa 100644
--- a/intern/cycles/kernel/geom/geom_attribute.h
+++ b/intern/cycles/kernel/geom/geom_attribute.h
@@ -27,11 +27,9 @@ CCL_NAMESPACE_BEGIN
  * Lookup of attributes is different between OSL and SVM, as OSL is ustring
  * based while for SVM we use integer ids. */
 
-ccl_device_inline uint subd_triangle_patch(ccl_global const KernelGlobals *kg,
-                                           ccl_private const ShaderData *sd);
+ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, ccl_private const ShaderData *sd);
 
-ccl_device_inline uint attribute_primitive_type(ccl_global const KernelGlobals *kg,
-                                                ccl_private const ShaderData *sd)
+ccl_device_inline uint attribute_primitive_type(KernelGlobals kg, ccl_private const ShaderData *sd)
 {
   if ((sd->type & PRIMITIVE_ALL_TRIANGLE) && subd_triangle_patch(kg, sd) != ~0) {
     return ATTR_PRIM_SUBD;
@@ -50,12 +48,12 @@ ccl_device_inline AttributeDescriptor attribute_not_found()
 
 /* Find attribute based on ID */
 
-ccl_device_inline uint object_attribute_map_offset(ccl_global const KernelGlobals *kg, int object)
+ccl_device_inline uint object_attribute_map_offset(KernelGlobals kg, int object)
 {
   return kernel_tex_fetch(__objects, object).attribute_map_offset;
 }
 
-ccl_device_inline AttributeDescriptor find_attribute(ccl_global const KernelGlobals *kg,
+ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg,
                                                      ccl_private const ShaderData *sd,
                                                      uint id)
 {
@@ -102,7 +100,7 @@ ccl_device_inline AttributeDescriptor find_attribute(ccl_global const KernelGlob
 
 /* Transform matrix attribute on meshes */
 
-ccl_device Transform primitive_attribute_matrix(ccl_global const KernelGlobals *kg,
+ccl_device Transform primitive_attribute_matrix(KernelGlobals kg,
                                                 ccl_private const ShaderData *sd,
                                                 const AttributeDescriptor desc)
 {
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index 07f218d781b..7271193eef8 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -27,7 +27,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Reading attributes on various curve elements */
 
-ccl_device float curve_attribute_float(ccl_global const KernelGlobals *kg,
+ccl_device float curve_attribute_float(KernelGlobals kg,
                                        ccl_private const ShaderData *sd,
                                        const AttributeDescriptor desc,
                                        ccl_private float *dx,
@@ -69,7 +69,7 @@ ccl_device float curve_attribute_float(ccl_global const KernelGlobals *kg,
   }
 }
 
-ccl_device float2 curve_attribute_float2(ccl_global const KernelGlobals *kg,
+ccl_device float2 curve_attribute_float2(KernelGlobals kg,
                                          ccl_private const ShaderData *sd,
                                          const AttributeDescriptor desc,
                                          ccl_private float2 *dx,
@@ -115,7 +115,7 @@ ccl_device float2 curve_attribute_float2(ccl_global const KernelGlobals *kg,
   }
 }
 
-ccl_device float3 curve_attribute_float3(ccl_global const KernelGlobals *kg,
+ccl_device float3 curve_attribute_float3(KernelGlobals kg,
                                          ccl_private const ShaderData *sd,
                                          const AttributeDescriptor desc,
                                          ccl_private float3 *dx,
@@ -157,7 +157,7 @@ ccl_device float3 curve_attribute_float3(ccl_global const KernelGlobals *kg,
   }
 }
 
-ccl_device float4 curve_attribute_float4(ccl_global const KernelGlobals *kg,
+ccl_device float4 curve_attribute_float4(KernelGlobals kg,
                                          ccl_private const ShaderData *sd,
                                          const AttributeDescriptor desc,
                                          ccl_private float4 *dx,
@@ -201,8 +201,7 @@ ccl_device float4 curve_attribute_float4(ccl_global const KernelGlobals *kg,
 
 /* Curve thickness */
 
-ccl_device float curve_thickness(ccl_global const KernelGlobals *kg,
-                                 ccl_private const ShaderData *sd)
+ccl_device float curve_thickness(KernelGlobals kg, ccl_private const ShaderData *sd)
 {
   float r = 0.0f;
 
@@ -230,8 +229,7 @@ ccl_device float curve_thickness(ccl_global const KernelGlobals *kg,
 /* Curve location for motion pass, linear interpolation between keys and
  * ignoring radius because we do the same for the motion keys */
 
-ccl_device float3 curve_motion_center_location(ccl_global const KernelGlobals *kg,
-                                               ccl_private const ShaderData *sd)
+ccl_device float3 curve_motion_center_location(KernelGlobals kg, ccl_private const ShaderData *sd)
 {
   KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
   int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
@@ -247,8 +245,7 @@ ccl_device float3 curve_motion_center_location(ccl_global const KernelGlobals *k
 
 /* Curve tangent normal */
 
-ccl_device float3 curve_tangent_normal(ccl_global const KernelGlobals *kg,
-                                       ccl_private const ShaderData *sd)
+ccl_device float3 curve_tangent_normal(KernelGlobals kg, ccl_private const ShaderData *sd)
 {
   float3 tgN = make_float3(0.0f, 0.0f, 0.0f);
 
diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h
index 04af8ea1421..fb0b80b281f 100644
--- a/intern/cycles/kernel/geom/geom_curve_intersect.h
+++ b/intern/cycles/kernel/geom/geom_curve_intersect.h
@@ -625,7 +625,7 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,
   return false;
 }
 
-ccl_device_forceinline bool curve_intersect(ccl_global const KernelGlobals *kg,
+ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
                                             ccl_private Intersection *isect,
                                             const float3 P,
                                             const float3 dir,
@@ -679,7 +679,7 @@ ccl_device_forceinline bool curve_intersect(ccl_global const KernelGlobals *kg,
   }
 }
 
-ccl_device_inline void curve_shader_setup(ccl_global const KernelGlobals *kg,
+ccl_device_inline void curve_shader_setup(KernelGlobals kg,
                                           ccl_private ShaderData *sd,
                                           float3 P,
                                           float3 D,
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
index 5754608a69b..2dd213d43f6 100644
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -27,7 +27,7 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __HAIR__
 
-ccl_device_inline void motion_curve_keys_for_step_linear(ccl_global const KernelGlobals *kg,
+ccl_device_inline void motion_curve_keys_for_step_linear(KernelGlobals kg,
                                                          int offset,
                                                          int numkeys,
                                                          int numsteps,
@@ -54,13 +54,8 @@ ccl_device_inline void motion_curve_keys_for_step_linear(ccl_global const Kernel
 }
 
 /* return 2 curve key locations */
-ccl_device_inline void motion_curve_keys_linear(ccl_global const KernelGlobals *kg,
-                                                int object,
-                                                int prim,
-                                                float time,
-                                                int k0,
-                                                int k1,
-                                                float4 keys[2])
+ccl_device_inline void motion_curve_keys_linear(
+    KernelGlobals kg, int object, int prim, float time, int k0, int k1, float4 keys[2])
 {
   /* get motion info */
   int numsteps, numkeys;
@@ -86,7 +81,7 @@ ccl_device_inline void motion_curve_keys_linear(ccl_global const KernelGlobals *
   keys[1] = (1.0f - t) * keys[1] + t * next_keys[1];
 }
 
-ccl_device_inline void motion_curve_keys_for_step(ccl_global const KernelGlobals *kg,
+ccl_device_inline void motion_curve_keys_for_step(KernelGlobals kg,
                                                   int offset,
                                                   int numkeys,
                                                   int numsteps,
@@ -119,7 +114,7 @@ ccl_device_inline void motion_curve_keys_for_step(ccl_global const KernelGlobals
 }
 
 /* return 2 curve key locations */
-ccl_device_inline void motion_curve_keys(ccl_global const KernelGlobals *kg,
+ccl_device_inline void motion_curve_keys(KernelGlobals kg,
                                          int object,
                                          int prim,
                                          float time,
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
index 547f03af47c..69d15f950ec 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -33,7 +33,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Time interpolation of vertex positions and normals */
 
-ccl_device_inline void motion_triangle_verts_for_step(ccl_global const KernelGlobals *kg,
+ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals kg,
                                                       uint4 tri_vindex,
                                                       int offset,
                                                       int numverts,
@@ -60,7 +60,7 @@ ccl_device_inline void motion_triangle_verts_for_step(ccl_global const KernelGlo
   }
 }
 
-ccl_device_inline void motion_triangle_normals_for_step(ccl_global const KernelGlobals *kg,
+ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals kg,
                                                         uint4 tri_vindex,
                                                         int offset,
                                                         int numverts,
@@ -88,7 +88,7 @@ ccl_device_inline void motion_triangle_normals_for_step(ccl_global const KernelG
 }
 
 ccl_device_inline void motion_triangle_vertices(
-    ccl_global const KernelGlobals *kg, int object, int prim, float time, float3 verts[3])
+    KernelGlobals kg, int object, int prim, float time, float3 verts[3])
 {
   /* get motion info */
   int numsteps, numverts;
@@ -116,13 +116,8 @@ ccl_device_inline void motion_triangle_vertices(
   verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
 }
 
-ccl_device_inline float3 motion_triangle_smooth_normal(ccl_global const KernelGlobals *kg,
-                                                       float3 Ng,
-                                                       int object,
-                                                       int prim,
-                                                       float u,
-                                                       float v,
-                                                       float time)
+ccl_device_inline float3 motion_triangle_smooth_normal(
+    KernelGlobals kg, float3 Ng, int object, int prim, float u, float v, float time)
 {
   /* get motion info */
   int numsteps, numverts;
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
index 94d00875f0a..256e7add21e 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
@@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN
  * a closer distance.
  */
 
-ccl_device_inline float3 motion_triangle_refine(ccl_global const KernelGlobals *kg,
+ccl_device_inline float3 motion_triangle_refine(KernelGlobals kg,
                                                 ccl_private ShaderData *sd,
                                                 float3 P,
                                                 float3 D,
@@ -92,7 +92,7 @@ ccl_device_noinline
 ccl_device_inline
 #  endif
     float3
-    motion_triangle_refine_local(ccl_global const KernelGlobals *kg,
+    motion_triangle_refine_local(KernelGlobals kg,
                                  ccl_private ShaderData *sd,
                                  float3 P,
                                  float3 D,
@@ -145,7 +145,7 @@ ccl_device_inline
  * time and do a ray intersection with the resulting triangle.
  */
 
-ccl_device_inline bool motion_triangle_intersect(ccl_global const KernelGlobals *kg,
+ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg,
                                                  ccl_private Intersection *isect,
                                                  float3 P,
                                                  float3 dir,
@@ -202,7 +202,7 @@ ccl_device_inline bool motion_triangle_intersect(ccl_global const KernelGlobals
  * Returns whether traversal should be stopped.
  */
 #ifdef __BVH_LOCAL__
-ccl_device_inline bool motion_triangle_intersect_local(ccl_global const KernelGlobals *kg,
+ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg,
                                                        ccl_private LocalIntersection *local_isect,
                                                        float3 P,
                                                        float3 dir,
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
index 25a68fa7781..fc7c181882e 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
@@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN
  * normals */
 
 /* return 3 triangle vertex normals */
-ccl_device_noinline void motion_triangle_shader_setup(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals kg,
                                                       ccl_private ShaderData *sd,
                                                       const float3 P,
                                                       const float3 D,
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index 730c01d4709..34a9d639d9d 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -37,7 +37,7 @@ enum ObjectVectorTransform { OBJECT_PASS_MOTION_PRE = 0, OBJECT_PASS_MOTION_POST
 
 /* Object to world space transformation */
 
-ccl_device_inline Transform object_fetch_transform(ccl_global const KernelGlobals *kg,
+ccl_device_inline Transform object_fetch_transform(KernelGlobals kg,
                                                    int object,
                                                    enum ObjectTransform type)
 {
@@ -51,9 +51,7 @@ ccl_device_inline Transform object_fetch_transform(ccl_global const KernelGlobal
 
 /* Lamp to world space transformation */
 
-ccl_device_inline Transform lamp_fetch_transform(ccl_global const KernelGlobals *kg,
-                                                 int lamp,
-                                                 bool inverse)
+ccl_device_inline Transform lamp_fetch_transform(KernelGlobals kg, int lamp, bool inverse)
 {
   if (inverse) {
     return kernel_tex_fetch(__lights, lamp).itfm;
@@ -65,7 +63,7 @@ ccl_device_inline Transform lamp_fetch_transform(ccl_global const KernelGlobals
 
 /* Object to world space transformation for motion vectors */
 
-ccl_device_inline Transform object_fetch_motion_pass_transform(ccl_global const KernelGlobals *kg,
+ccl_device_inline Transform object_fetch_motion_pass_transform(KernelGlobals kg,
                                                                int object,
                                                                enum ObjectVectorTransform type)
 {
@@ -76,9 +74,7 @@ ccl_device_inline Transform object_fetch_motion_pass_transform(ccl_global const
 /* Motion blurred object transformations */
 
 #ifdef __OBJECT_MOTION__
-ccl_device_inline Transform object_fetch_transform_motion(ccl_global const KernelGlobals *kg,
-                                                          int object,
-                                                          float time)
+ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals kg, int object, float time)
 {
   const uint motion_offset = kernel_tex_fetch(__objects, object).motion_offset;
   ccl_global const DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset);
@@ -90,7 +86,7 @@ ccl_device_inline Transform object_fetch_transform_motion(ccl_global const Kerne
   return tfm;
 }
 
-ccl_device_inline Transform object_fetch_transform_motion_test(ccl_global const KernelGlobals *kg,
+ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals kg,
                                                                int object,
                                                                float time,
                                                                ccl_private Transform *itfm)
@@ -117,7 +113,7 @@ ccl_device_inline Transform object_fetch_transform_motion_test(ccl_global const
 
 /* Get transform matrix for shading point. */
 
-ccl_device_inline Transform object_get_transform(ccl_global const KernelGlobals *kg,
+ccl_device_inline Transform object_get_transform(KernelGlobals kg,
                                                  ccl_private const ShaderData *sd)
 {
 #ifdef __OBJECT_MOTION__
@@ -129,7 +125,7 @@ ccl_device_inline Transform object_get_transform(ccl_global const KernelGlobals
 #endif
 }
 
-ccl_device_inline Transform object_get_inverse_transform(ccl_global const KernelGlobals *kg,
+ccl_device_inline Transform object_get_inverse_transform(KernelGlobals kg,
                                                          ccl_private const ShaderData *sd)
 {
 #ifdef __OBJECT_MOTION__
@@ -142,7 +138,7 @@ ccl_device_inline Transform object_get_inverse_transform(ccl_global const Kernel
 }
 /* Transform position from object to world space */
 
-ccl_device_inline void object_position_transform(ccl_global const KernelGlobals *kg,
+ccl_device_inline void object_position_transform(KernelGlobals kg,
                                                  ccl_private const ShaderData *sd,
                                                  ccl_private float3 *P)
 {
@@ -159,7 +155,7 @@ ccl_device_inline void object_position_transform(ccl_global const KernelGlobals
 
 /* Transform position from world to object space */
 
-ccl_device_inline void object_inverse_position_transform(ccl_global const KernelGlobals *kg,
+ccl_device_inline void object_inverse_position_transform(KernelGlobals kg,
                                                          ccl_private const ShaderData *sd,
                                                          ccl_private float3 *P)
 {
@@ -176,7 +172,7 @@ ccl_device_inline void object_inverse_position_transform(ccl_global const Kernel
 
 /* Transform normal from world to object space */
 
-ccl_device_inline void object_inverse_normal_transform(ccl_global const KernelGlobals *kg,
+ccl_device_inline void object_inverse_normal_transform(KernelGlobals kg,
                                                        ccl_private const ShaderData *sd,
                                                        ccl_private float3 *N)
 {
@@ -201,7 +197,7 @@ ccl_device_inline void object_inverse_normal_transform(ccl_global const KernelGl
 
 /* Transform normal from object to world space */
 
-ccl_device_inline void object_normal_transform(ccl_global const KernelGlobals *kg,
+ccl_device_inline void object_normal_transform(KernelGlobals kg,
                                                ccl_private const ShaderData *sd,
                                                ccl_private float3 *N)
 {
@@ -218,7 +214,7 @@ ccl_device_inline void object_normal_transform(ccl_global const KernelGlobals *k
 
 /* Transform direction vector from object to world space */
 
-ccl_device_inline void object_dir_transform(ccl_global const KernelGlobals *kg,
+ccl_device_inline void object_dir_transform(KernelGlobals kg,
                                             ccl_private const ShaderData *sd,
                                             ccl_private float3 *D)
 {
@@ -235,7 +231,7 @@ ccl_device_inline void object_dir_transform(ccl_global const KernelGlobals *kg,
 
 /* Transform direction vector from world to object space */
 
-ccl_device_inline void object_inverse_dir_transform(ccl_global const KernelGlobals *kg,
+ccl_device_inline void object_inverse_dir_transform(KernelGlobals kg,
                                                     ccl_private const ShaderData *sd,
                                                     ccl_private float3 *D)
 {
@@ -252,8 +248,7 @@ ccl_device_inline void object_inverse_dir_transform(ccl_global const KernelGloba
 
 /* Object center position */
 
-ccl_device_inline float3 object_location(ccl_global const KernelGlobals *kg,
-                                         ccl_private const ShaderData *sd)
+ccl_device_inline float3 object_location(KernelGlobals kg, ccl_private const ShaderData *sd)
 {
   if (sd->object == OBJECT_NONE)
     return make_float3(0.0f, 0.0f, 0.0f);
@@ -270,7 +265,7 @@ ccl_device_inline float3 object_location(ccl_global const KernelGlobals *kg,
 
 /* Color of the object */
 
-ccl_device_inline float3 object_color(ccl_global const KernelGlobals *kg, int object)
+ccl_device_inline float3 object_color(KernelGlobals kg, int object)
 {
   if (object == OBJECT_NONE)
     return make_float3(0.0f, 0.0f, 0.0f);
@@ -281,7 +276,7 @@ ccl_device_inline float3 object_color(ccl_global const KernelGlobals *kg, int ob
 
 /* Pass ID number of object */
 
-ccl_device_inline float object_pass_id(ccl_global const KernelGlobals *kg, int object)
+ccl_device_inline float object_pass_id(KernelGlobals kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0.0f;
@@ -291,7 +286,7 @@ ccl_device_inline float object_pass_id(ccl_global const KernelGlobals *kg, int o
 
 /* Per lamp random number for shader variation */
 
-ccl_device_inline float lamp_random_number(ccl_global const KernelGlobals *kg, int lamp)
+ccl_device_inline float lamp_random_number(KernelGlobals kg, int lamp)
 {
   if (lamp == LAMP_NONE)
     return 0.0f;
@@ -301,7 +296,7 @@ ccl_device_inline float lamp_random_number(ccl_global const KernelGlobals *kg, i
 
 /* Per object random number for shader variation */
 
-ccl_device_inline float object_random_number(ccl_global const KernelGlobals *kg, int object)
+ccl_device_inline float object_random_number(KernelGlobals kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0.0f;
@@ -311,7 +306,7 @@ ccl_device_inline float object_random_number(ccl_global const KernelGlobals *kg,
 
 /* Particle ID from which this object was generated */
 
-ccl_device_inline int object_particle_id(ccl_global const KernelGlobals *kg, int object)
+ccl_device_inline int object_particle_id(KernelGlobals kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0;
@@ -321,7 +316,7 @@ ccl_device_inline int object_particle_id(ccl_global const KernelGlobals *kg, int
 
 /* Generated texture coordinate on surface from where object was instanced */
 
-ccl_device_inline float3 object_dupli_generated(ccl_global const KernelGlobals *kg, int object)
+ccl_device_inline float3 object_dupli_generated(KernelGlobals kg, int object)
 {
   if (object == OBJECT_NONE)
     return make_float3(0.0f, 0.0f, 0.0f);
@@ -333,7 +328,7 @@ ccl_device_inline float3 object_dupli_generated(ccl_global const KernelGlobals *
 
 /* UV texture coordinate on surface from where object was instanced */
 
-ccl_device_inline float3 object_dupli_uv(ccl_global const KernelGlobals *kg, int object)
+ccl_device_inline float3 object_dupli_uv(KernelGlobals kg, int object)
 {
   if (object == OBJECT_NONE)
     return make_float3(0.0f, 0.0f, 0.0f);
@@ -344,7 +339,7 @@ ccl_device_inline float3 object_dupli_uv(ccl_global const KernelGlobals *kg, int
 
 /* Information about mesh for motion blurred triangles and curves */
 
-ccl_device_inline void object_motion_info(ccl_global const KernelGlobals *kg,
+ccl_device_inline void object_motion_info(KernelGlobals kg,
                                           int object,
                                           ccl_private int *numsteps,
                                           ccl_private int *numverts,
@@ -362,7 +357,7 @@ ccl_device_inline void object_motion_info(ccl_global const KernelGlobals *kg,
 
 /* Offset to an objects patch map */
 
-ccl_device_inline uint object_patch_map_offset(ccl_global const KernelGlobals *kg, int object)
+ccl_device_inline uint object_patch_map_offset(KernelGlobals kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0;
@@ -372,7 +367,7 @@ ccl_device_inline uint object_patch_map_offset(ccl_global const KernelGlobals *k
 
 /* Volume step size */
 
-ccl_device_inline float object_volume_density(ccl_global const KernelGlobals *kg, int object)
+ccl_device_inline float object_volume_density(KernelGlobals kg, int object)
 {
   if (object == OBJECT_NONE) {
     return 1.0f;
@@ -381,7 +376,7 @@ ccl_device_inline float object_volume_density(ccl_global const KernelGlobals *kg
   return kernel_tex_fetch(__objects, object).volume_density;
 }
 
-ccl_device_inline float object_volume_step_size(ccl_global const KernelGlobals *kg, int object)
+ccl_device_inline float object_volume_step_size(KernelGlobals kg, int object)
 {
   if (object == OBJECT_NONE) {
     return kernel_data.background.volume_step_size;
@@ -392,14 +387,14 @@ ccl_device_inline float object_volume_step_size(ccl_global const KernelGlobals *
 
 /* Pass ID for shader */
 
-ccl_device int shader_pass_id(ccl_global const KernelGlobals *kg, ccl_private const ShaderData *sd)
+ccl_device int shader_pass_id(KernelGlobals kg, ccl_private const ShaderData *sd)
 {
   return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id;
 }
 
 /* Cryptomatte ID */
 
-ccl_device_inline float object_cryptomatte_id(ccl_global const KernelGlobals *kg, int object)
+ccl_device_inline float object_cryptomatte_id(KernelGlobals kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0.0f;
@@ -407,7 +402,7 @@ ccl_device_inline float object_cryptomatte_id(ccl_global const KernelGlobals *kg
   return kernel_tex_fetch(__objects, object).cryptomatte_object;
 }
 
-ccl_device_inline float object_cryptomatte_asset_id(ccl_global const KernelGlobals *kg, int object)
+ccl_device_inline float object_cryptomatte_asset_id(KernelGlobals kg, int object)
 {
   if (object == OBJECT_NONE)
     return 0;
@@ -417,42 +412,42 @@ ccl_device_inline float object_cryptomatte_asset_id(ccl_global const KernelGloba
 
 /* Particle data from which object was instanced */
 
-ccl_device_inline uint particle_index(ccl_global const KernelGlobals *kg, int particle)
+ccl_device_inline uint particle_index(KernelGlobals kg, int particle)
 {
   return kernel_tex_fetch(__particles, particle).index;
 }
 
-ccl_device float particle_age(ccl_global const KernelGlobals *kg, int particle)
+ccl_device float particle_age(KernelGlobals kg, int particle)
 {
   return kernel_tex_fetch(__particles, particle).age;
 }
 
-ccl_device float particle_lifetime(ccl_global const KernelGlobals *kg, int particle)
+ccl_device float particle_lifetime(KernelGlobals kg, int particle)
 {
   return kernel_tex_fetch(__particles, particle).lifetime;
 }
 
-ccl_device float particle_size(ccl_global const KernelGlobals *kg, int particle)
+ccl_device float particle_size(KernelGlobals kg, int particle)
 {
   return kernel_tex_fetch(__particles, particle).size;
 }
 
-ccl_device float4 particle_rotation(ccl_global const KernelGlobals *kg, int particle)
+ccl_device float4 particle_rotation(KernelGlobals kg, int particle)
 {
   return kernel_tex_fetch(__particles, particle).rotation;
 }
 
-ccl_device float3 particle_location(ccl_global const KernelGlobals *kg, int particle)
+ccl_device float3 particle_location(KernelGlobals kg, int particle)
 {
   return float4_to_float3(kernel_tex_fetch(__particles, particle).location);
 }
 
-ccl_device float3 particle_velocity(ccl_global const KernelGlobals *kg, int particle)
+ccl_device float3 particle_velocity(KernelGlobals kg, int particle)
 {
   return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity);
 }
 
-ccl_device float3 particle_angular_velocity(ccl_global const KernelGlobals *kg, int particle)
+ccl_device float3 particle_angular_velocity(KernelGlobals kg, int particle)
 {
   return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity);
 }
@@ -474,7 +469,7 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir)
 
 /* Transform ray into object space to enter static object in BVH */
 
-ccl_device_inline float bvh_instance_push(ccl_global const KernelGlobals *kg,
+ccl_device_inline float bvh_instance_push(KernelGlobals kg,
                                           int object,
                                           ccl_private const Ray *ray,
                                           ccl_private float3 *P,
@@ -494,7 +489,7 @@ ccl_device_inline float bvh_instance_push(ccl_global const KernelGlobals *kg,
 
 /* Transform ray to exit static object in BVH. */
 
-ccl_device_inline float bvh_instance_pop(ccl_global const KernelGlobals *kg,
+ccl_device_inline float bvh_instance_pop(KernelGlobals kg,
                                          int object,
                                          ccl_private const Ray *ray,
                                          ccl_private float3 *P,
@@ -516,7 +511,7 @@ ccl_device_inline float bvh_instance_pop(ccl_global const KernelGlobals *kg,
 
 /* Same as above, but returns scale factor to apply to multiple intersection distances */
 
-ccl_device_inline void bvh_instance_pop_factor(ccl_global const KernelGlobals *kg,
+ccl_device_inline void bvh_instance_pop_factor(KernelGlobals kg,
                                                int object,
                                                ccl_private const Ray *ray,
                                                ccl_private float3 *P,
@@ -535,7 +530,7 @@ ccl_device_inline void bvh_instance_pop_factor(ccl_global const KernelGlobals *k
 #ifdef __OBJECT_MOTION__
 /* Transform ray into object space to enter motion blurred object in BVH */
 
-ccl_device_inline float bvh_instance_motion_push(ccl_global const KernelGlobals *kg,
+ccl_device_inline float bvh_instance_motion_push(KernelGlobals kg,
                                                  int object,
                                                  ccl_private const Ray *ray,
                                                  ccl_private float3 *P,
@@ -556,7 +551,7 @@ ccl_device_inline float bvh_instance_motion_push(ccl_global const KernelGlobals
 
 /* Transform ray to exit motion blurred object in BVH. */
 
-ccl_device_inline float bvh_instance_motion_pop(ccl_global const KernelGlobals *kg,
+ccl_device_inline float bvh_instance_motion_pop(KernelGlobals kg,
                                                 int object,
                                                 ccl_private const Ray *ray,
                                                 ccl_private float3 *P,
@@ -578,7 +573,7 @@ ccl_device_inline float bvh_instance_motion_pop(ccl_global const KernelGlobals *
 
 /* Same as above, but returns scale factor to apply to multiple intersection distances */
 
-ccl_device_inline void bvh_instance_motion_pop_factor(ccl_global const KernelGlobals *kg,
+ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals kg,
                                                       int object,
                                                       ccl_private const Ray *ray,
                                                       ccl_private float3 *P,
diff --git a/intern/cycles/kernel/geom/geom_patch.h b/intern/cycles/kernel/geom/geom_patch.h
index b54eafd6220..bd797ef52ab 100644
--- a/intern/cycles/kernel/geom/geom_patch.h
+++ b/intern/cycles/kernel/geom/geom_patch.h
@@ -64,7 +64,7 @@ ccl_device_inline int patch_map_resolve_quadrant(float median,
 /* retrieve PatchHandle from patch coords */
 
 ccl_device_inline PatchHandle
-patch_map_find_patch(ccl_global const KernelGlobals *kg, int object, int patch, float u, float v)
+patch_map_find_patch(KernelGlobals kg, int object, int patch, float u, float v)
 {
   PatchHandle handle;
 
@@ -201,7 +201,7 @@ ccl_device_inline void patch_eval_normalize_coords(uint patch_bits,
 
 /* retrieve patch control indices */
 
-ccl_device_inline int patch_eval_indices(ccl_global const KernelGlobals *kg,
+ccl_device_inline int patch_eval_indices(KernelGlobals kg,
                                          ccl_private const PatchHandle *handle,
                                          int channel,
                                          int indices[PATCH_MAX_CONTROL_VERTS])
@@ -218,7 +218,7 @@ ccl_device_inline int patch_eval_indices(ccl_global const KernelGlobals *kg,
 
 /* evaluate patch basis functions */
 
-ccl_device_inline void patch_eval_basis(ccl_global const KernelGlobals *kg,
+ccl_device_inline void patch_eval_basis(KernelGlobals kg,
                                         ccl_private const PatchHandle *handle,
                                         float u,
                                         float v,
@@ -257,7 +257,7 @@ ccl_device_inline void patch_eval_basis(ccl_global const KernelGlobals *kg,
 
 /* generic function for evaluating indices and weights from patch coords */
 
-ccl_device_inline int patch_eval_control_verts(ccl_global const KernelGlobals *kg,
+ccl_device_inline int patch_eval_control_verts(KernelGlobals kg,
                                                int object,
                                                int patch,
                                                float u,
@@ -279,7 +279,7 @@ ccl_device_inline int patch_eval_control_verts(ccl_global const KernelGlobals *k
 
 /* functions for evaluating attributes on patches */
 
-ccl_device float patch_eval_float(ccl_global const KernelGlobals *kg,
+ccl_device float patch_eval_float(KernelGlobals kg,
                                   ccl_private const ShaderData *sd,
                                   int offset,
                                   int patch,
@@ -316,7 +316,7 @@ ccl_device float patch_eval_float(ccl_global const KernelGlobals *kg,
   return val;
 }
 
-ccl_device float2 patch_eval_float2(ccl_global const KernelGlobals *kg,
+ccl_device float2 patch_eval_float2(KernelGlobals kg,
                                     ccl_private const ShaderData *sd,
                                     int offset,
                                     int patch,
@@ -353,7 +353,7 @@ ccl_device float2 patch_eval_float2(ccl_global const KernelGlobals *kg,
   return val;
 }
 
-ccl_device float3 patch_eval_float3(ccl_global const KernelGlobals *kg,
+ccl_device float3 patch_eval_float3(KernelGlobals kg,
                                     ccl_private const ShaderData *sd,
                                     int offset,
                                     int patch,
@@ -390,7 +390,7 @@ ccl_device float3 patch_eval_float3(ccl_global const KernelGlobals *kg,
   return val;
 }
 
-ccl_device float4 patch_eval_float4(ccl_global const KernelGlobals *kg,
+ccl_device float4 patch_eval_float4(KernelGlobals kg,
                                     ccl_private const ShaderData *sd,
                                     int offset,
                                     int patch,
@@ -427,7 +427,7 @@ ccl_device float4 patch_eval_float4(ccl_global const KernelGlobals *kg,
   return val;
 }
 
-ccl_device float4 patch_eval_uchar4(ccl_global const KernelGlobals *kg,
+ccl_device float4 patch_eval_uchar4(KernelGlobals kg,
                                     ccl_private const ShaderData *sd,
                                     int offset,
                                     int patch,
diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h
index 869b911f76f..91b29c7f990 100644
--- a/intern/cycles/kernel/geom/geom_primitive.h
+++ b/intern/cycles/kernel/geom/geom_primitive.h
@@ -31,7 +31,7 @@ CCL_NAMESPACE_BEGIN
  * attributes for performance, mainly for GPU performance to avoid bringing in
  * heavy volume interpolation code. */
 
-ccl_device_inline float primitive_surface_attribute_float(ccl_global const KernelGlobals *kg,
+ccl_device_inline float primitive_surface_attribute_float(KernelGlobals kg,
                                                           ccl_private const ShaderData *sd,
                                                           const AttributeDescriptor desc,
                                                           ccl_private float *dx,
@@ -57,7 +57,7 @@ ccl_device_inline float primitive_surface_attribute_float(ccl_global const Kerne
   }
 }
 
-ccl_device_inline float2 primitive_surface_attribute_float2(ccl_global const KernelGlobals *kg,
+ccl_device_inline float2 primitive_surface_attribute_float2(KernelGlobals kg,
                                                             ccl_private const ShaderData *sd,
                                                             const AttributeDescriptor desc,
                                                             ccl_private float2 *dx,
@@ -83,7 +83,7 @@ ccl_device_inline float2 primitive_surface_attribute_float2(ccl_global const Ker
   }
 }
 
-ccl_device_inline float3 primitive_surface_attribute_float3(ccl_global const KernelGlobals *kg,
+ccl_device_inline float3 primitive_surface_attribute_float3(KernelGlobals kg,
                                                             ccl_private const ShaderData *sd,
                                                             const AttributeDescriptor desc,
                                                             ccl_private float3 *dx,
@@ -109,12 +109,11 @@ ccl_device_inline float3 primitive_surface_attribute_float3(ccl_global const Ker
   }
 }
 
-ccl_device_forceinline float4
-primitive_surface_attribute_float4(ccl_global const KernelGlobals *kg,
-                                   ccl_private const ShaderData *sd,
-                                   const AttributeDescriptor desc,
-                                   ccl_private float4 *dx,
-                                   ccl_private float4 *dy)
+ccl_device_forceinline float4 primitive_surface_attribute_float4(KernelGlobals kg,
+                                                                 ccl_private const ShaderData *sd,
+                                                                 const AttributeDescriptor desc,
+                                                                 ccl_private float4 *dx,
+                                                                 ccl_private float4 *dy)
 {
   if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
     if (subd_triangle_patch(kg, sd) == ~0)
@@ -149,7 +148,7 @@ ccl_device_inline bool primitive_is_volume_attribute(ccl_private const ShaderDat
   return sd->type == PRIMITIVE_VOLUME;
 }
 
-ccl_device_inline float primitive_volume_attribute_float(ccl_global const KernelGlobals *kg,
+ccl_device_inline float primitive_volume_attribute_float(KernelGlobals kg,
                                                          ccl_private const ShaderData *sd,
                                                          const AttributeDescriptor desc)
 {
@@ -161,7 +160,7 @@ ccl_device_inline float primitive_volume_attribute_float(ccl_global const Kernel
   }
 }
 
-ccl_device_inline float3 primitive_volume_attribute_float3(ccl_global const KernelGlobals *kg,
+ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals kg,
                                                            ccl_private const ShaderData *sd,
                                                            const AttributeDescriptor desc)
 {
@@ -173,7 +172,7 @@ ccl_device_inline float3 primitive_volume_attribute_float3(ccl_global const Kern
   }
 }
 
-ccl_device_inline float4 primitive_volume_attribute_float4(ccl_global const KernelGlobals *kg,
+ccl_device_inline float4 primitive_volume_attribute_float4(KernelGlobals kg,
                                                            ccl_private const ShaderData *sd,
                                                            const AttributeDescriptor desc)
 {
@@ -188,8 +187,7 @@ ccl_device_inline float4 primitive_volume_attribute_float4(ccl_global const Kern
 
 /* Default UV coordinate */
 
-ccl_device_inline float3 primitive_uv(ccl_global const KernelGlobals *kg,
-                                      ccl_private const ShaderData *sd)
+ccl_device_inline float3 primitive_uv(KernelGlobals kg, ccl_private const ShaderData *sd)
 {
   const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV);
 
@@ -202,7 +200,7 @@ ccl_device_inline float3 primitive_uv(ccl_global const KernelGlobals *kg,
 
 /* Ptex coordinates */
 
-ccl_device bool primitive_ptex(ccl_global const KernelGlobals *kg,
+ccl_device bool primitive_ptex(KernelGlobals kg,
                                ccl_private ShaderData *sd,
                                ccl_private float2 *uv,
                                ccl_private int *face_id)
@@ -225,7 +223,7 @@ ccl_device bool primitive_ptex(ccl_global const KernelGlobals *kg,
 
 /* Surface tangent */
 
-ccl_device float3 primitive_tangent(ccl_global const KernelGlobals *kg, ccl_private ShaderData *sd)
+ccl_device float3 primitive_tangent(KernelGlobals kg, ccl_private ShaderData *sd)
 {
 #ifdef __HAIR__
   if (sd->type & PRIMITIVE_ALL_CURVE)
@@ -257,7 +255,7 @@ ccl_device float3 primitive_tangent(ccl_global const KernelGlobals *kg, ccl_priv
 
 /* Motion vector for motion pass */
 
-ccl_device_inline float4 primitive_motion_vector(ccl_global const KernelGlobals *kg,
+ccl_device_inline float4 primitive_motion_vector(KernelGlobals kg,
                                                  ccl_private const ShaderData *sd)
 {
   /* center position */
diff --git a/intern/cycles/kernel/geom/geom_shader_data.h b/intern/cycles/kernel/geom/geom_shader_data.h
index 2cf60e263c3..e6a5b8f7923 100644
--- a/intern/cycles/kernel/geom/geom_shader_data.h
+++ b/intern/cycles/kernel/geom/geom_shader_data.h
@@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN
 /* ShaderData setup from incoming ray */
 
 #ifdef __OBJECT_MOTION__
-ccl_device void shader_setup_object_transforms(ccl_global const KernelGlobals *ccl_restrict kg,
+ccl_device void shader_setup_object_transforms(KernelGlobals kg,
                                                ccl_private ShaderData *ccl_restrict sd,
                                                float time)
 {
@@ -38,7 +38,7 @@ ccl_device void shader_setup_object_transforms(ccl_global const KernelGlobals *c
 
 /* TODO: break this up if it helps reduce register pressure to load data from
  * global memory as we write it to shader-data. */
-ccl_device_inline void shader_setup_from_ray(ccl_global const KernelGlobals *ccl_restrict kg,
+ccl_device_inline void shader_setup_from_ray(KernelGlobals kg,
                                              ccl_private ShaderData *ccl_restrict sd,
                                              ccl_private const Ray *ccl_restrict ray,
                                              ccl_private const Intersection *ccl_restrict isect)
@@ -135,7 +135,7 @@ ccl_device_inline void shader_setup_from_ray(ccl_global const KernelGlobals *ccl
 
 /* ShaderData setup from position sampled on mesh */
 
-ccl_device_inline void shader_setup_from_sample(ccl_global const KernelGlobals *ccl_restrict kg,
+ccl_device_inline void shader_setup_from_sample(KernelGlobals kg,
                                                 ccl_private ShaderData *ccl_restrict sd,
                                                 const float3 P,
                                                 const float3 Ng,
@@ -247,7 +247,7 @@ ccl_device_inline void shader_setup_from_sample(ccl_global const KernelGlobals *
 
 /* ShaderData setup for displacement */
 
-ccl_device void shader_setup_from_displace(ccl_global const KernelGlobals *ccl_restrict kg,
+ccl_device void shader_setup_from_displace(KernelGlobals kg,
                                            ccl_private ShaderData *ccl_restrict sd,
                                            int object,
                                            int prim,
@@ -281,8 +281,7 @@ ccl_device void shader_setup_from_displace(ccl_global const KernelGlobals *ccl_r
 
 /* ShaderData setup from ray into background */
 
-ccl_device_inline void shader_setup_from_background(ccl_global const KernelGlobals *ccl_restrict
-                                                        kg,
+ccl_device_inline void shader_setup_from_background(KernelGlobals kg,
                                                     ccl_private ShaderData *ccl_restrict sd,
                                                     const float3 ray_P,
                                                     const float3 ray_D,
@@ -326,7 +325,7 @@ ccl_device_inline void shader_setup_from_background(ccl_global const KernelGloba
 /* ShaderData setup from point inside volume */
 
 #ifdef __VOLUME__
-ccl_device_inline void shader_setup_from_volume(ccl_global const KernelGlobals *ccl_restrict kg,
+ccl_device_inline void shader_setup_from_volume(KernelGlobals kg,
                                                 ccl_private ShaderData *ccl_restrict sd,
                                                 ccl_private const Ray *ccl_restrict ray)
 {
diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h
index 927d630fe91..8a9a3f71231 100644
--- a/intern/cycles/kernel/geom/geom_subd_triangle.h
+++ b/intern/cycles/kernel/geom/geom_subd_triangle.h
@@ -22,15 +22,14 @@ CCL_NAMESPACE_BEGIN
 
 /* Patch index for triangle, -1 if not subdivision triangle */
 
-ccl_device_inline uint subd_triangle_patch(ccl_global const KernelGlobals *kg,
-                                           ccl_private const ShaderData *sd)
+ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, ccl_private const ShaderData *sd)
 {
   return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0;
 }
 
 /* UV coords of triangle within patch */
 
-ccl_device_inline void subd_triangle_patch_uv(ccl_global const KernelGlobals *kg,
+ccl_device_inline void subd_triangle_patch_uv(KernelGlobals kg,
                                               ccl_private const ShaderData *sd,
                                               float2 uv[3])
 {
@@ -43,7 +42,7 @@ ccl_device_inline void subd_triangle_patch_uv(ccl_global const KernelGlobals *kg
 
 /* Vertex indices of patch */
 
-ccl_device_inline uint4 subd_triangle_patch_indices(ccl_global const KernelGlobals *kg, int patch)
+ccl_device_inline uint4 subd_triangle_patch_indices(KernelGlobals kg, int patch)
 {
   uint4 indices;
 
@@ -57,24 +56,21 @@ ccl_device_inline uint4 subd_triangle_patch_indices(ccl_global const KernelGloba
 
 /* Originating face for patch */
 
-ccl_device_inline uint subd_triangle_patch_face(ccl_global const KernelGlobals *kg, int patch)
+ccl_device_inline uint subd_triangle_patch_face(KernelGlobals kg, int patch)
 {
   return kernel_tex_fetch(__patches, patch + 4);
 }
 
 /* Number of corners on originating face */
 
-ccl_device_inline uint subd_triangle_patch_num_corners(ccl_global const KernelGlobals *kg,
-                                                       int patch)
+ccl_device_inline uint subd_triangle_patch_num_corners(KernelGlobals kg, int patch)
 {
   return kernel_tex_fetch(__patches, patch + 5) & 0xffff;
 }
 
 /* Indices of the four corners that are used by the patch */
 
-ccl_device_inline void subd_triangle_patch_corners(ccl_global const KernelGlobals *kg,
-                                                   int patch,
-                                                   int corners[4])
+ccl_device_inline void subd_triangle_patch_corners(KernelGlobals kg, int patch, int corners[4])
 {
   uint4 data;
 
@@ -105,7 +101,7 @@ ccl_device_inline void subd_triangle_patch_corners(ccl_global const KernelGlobal
 
 /* Reading attributes on various subdivision triangle elements */
 
-ccl_device_noinline float subd_triangle_attribute_float(ccl_global const KernelGlobals *kg,
+ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg,
                                                         ccl_private const ShaderData *sd,
                                                         const AttributeDescriptor desc,
                                                         ccl_private float *dx,
@@ -244,7 +240,7 @@ ccl_device_noinline float subd_triangle_attribute_float(ccl_global const KernelG
   }
 }
 
-ccl_device_noinline float2 subd_triangle_attribute_float2(ccl_global const KernelGlobals *kg,
+ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg,
                                                           ccl_private const ShaderData *sd,
                                                           const AttributeDescriptor desc,
                                                           ccl_private float2 *dx,
@@ -387,7 +383,7 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(ccl_global const Kerne
   }
 }
 
-ccl_device_noinline float3 subd_triangle_attribute_float3(ccl_global const KernelGlobals *kg,
+ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg,
                                                           ccl_private const ShaderData *sd,
                                                           const AttributeDescriptor desc,
                                                           ccl_private float3 *dx,
@@ -529,7 +525,7 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(ccl_global const Kerne
   }
 }
 
-ccl_device_noinline float4 subd_triangle_attribute_float4(ccl_global const KernelGlobals *kg,
+ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg,
                                                           ccl_private const ShaderData *sd,
                                                           const AttributeDescriptor desc,
                                                           ccl_private float4 *dx,
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index 17f87b7c570..233e901c7ca 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -25,8 +25,7 @@
 CCL_NAMESPACE_BEGIN
 
 /* Normal on triangle. */
-ccl_device_inline float3 triangle_normal(ccl_global const KernelGlobals *kg,
-                                         ccl_private ShaderData *sd)
+ccl_device_inline float3 triangle_normal(KernelGlobals kg, ccl_private ShaderData *sd)
 {
   /* load triangle vertices */
   const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
@@ -44,7 +43,7 @@ ccl_device_inline float3 triangle_normal(ccl_global const KernelGlobals *kg,
 }
 
 /* Point and normal on triangle. */
-ccl_device_inline void triangle_point_normal(ccl_global const KernelGlobals *kg,
+ccl_device_inline void triangle_point_normal(KernelGlobals kg,
                                              int object,
                                              int prim,
                                              float u,
@@ -76,7 +75,7 @@ ccl_device_inline void triangle_point_normal(ccl_global const KernelGlobals *kg,
 
 /* Triangle vertex locations */
 
-ccl_device_inline void triangle_vertices(ccl_global const KernelGlobals *kg, int prim, float3 P[3])
+ccl_device_inline void triangle_vertices(KernelGlobals kg, int prim, float3 P[3])
 {
   const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
   P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
@@ -86,7 +85,7 @@ ccl_device_inline void triangle_vertices(ccl_global const KernelGlobals *kg, int
 
 /* Triangle vertex locations and vertex normals */
 
-ccl_device_inline void triangle_vertices_and_normals(ccl_global const KernelGlobals *kg,
+ccl_device_inline void triangle_vertices_and_normals(KernelGlobals kg,
                                                      int prim,
                                                      float3 P[3],
                                                      float3 N[3])
@@ -103,7 +102,7 @@ ccl_device_inline void triangle_vertices_and_normals(ccl_global const KernelGlob
 /* Interpolate smooth vertex normal from vertices */
 
 ccl_device_inline float3
-triangle_smooth_normal(ccl_global const KernelGlobals *kg, float3 Ng, int prim, float u, float v)
+triangle_smooth_normal(KernelGlobals kg, float3 Ng, int prim, float u, float v)
 {
   /* load triangle vertices */
   const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
@@ -116,12 +115,8 @@ triangle_smooth_normal(ccl_global const KernelGlobals *kg, float3 Ng, int prim,
   return is_zero(N) ? Ng : N;
 }
 
-ccl_device_inline float3 triangle_smooth_normal_unnormalized(ccl_global const KernelGlobals *kg,
-                                                             ccl_private const ShaderData *sd,
-                                                             float3 Ng,
-                                                             int prim,
-                                                             float u,
-                                                             float v)
+ccl_device_inline float3 triangle_smooth_normal_unnormalized(
+    KernelGlobals kg, ccl_private const ShaderData *sd, float3 Ng, int prim, float u, float v)
 {
   /* load triangle vertices */
   const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
@@ -143,7 +138,7 @@ ccl_device_inline float3 triangle_smooth_normal_unnormalized(ccl_global const Ke
 
 /* Ray differentials on triangle */
 
-ccl_device_inline void triangle_dPdudv(ccl_global const KernelGlobals *kg,
+ccl_device_inline void triangle_dPdudv(KernelGlobals kg,
                                        int prim,
                                        ccl_private float3 *dPdu,
                                        ccl_private float3 *dPdv)
@@ -161,7 +156,7 @@ ccl_device_inline void triangle_dPdudv(ccl_global const KernelGlobals *kg,
 
 /* Reading attributes on various triangle elements */
 
-ccl_device float triangle_attribute_float(ccl_global const KernelGlobals *kg,
+ccl_device float triangle_attribute_float(KernelGlobals kg,
                                           ccl_private const ShaderData *sd,
                                           const AttributeDescriptor desc,
                                           ccl_private float *dx,
@@ -211,7 +206,7 @@ ccl_device float triangle_attribute_float(ccl_global const KernelGlobals *kg,
   }
 }
 
-ccl_device float2 triangle_attribute_float2(ccl_global const KernelGlobals *kg,
+ccl_device float2 triangle_attribute_float2(KernelGlobals kg,
                                             ccl_private const ShaderData *sd,
                                             const AttributeDescriptor desc,
                                             ccl_private float2 *dx,
@@ -261,7 +256,7 @@ ccl_device float2 triangle_attribute_float2(ccl_global const KernelGlobals *kg,
   }
 }
 
-ccl_device float3 triangle_attribute_float3(ccl_global const KernelGlobals *kg,
+ccl_device float3 triangle_attribute_float3(KernelGlobals kg,
                                             ccl_private const ShaderData *sd,
                                             const AttributeDescriptor desc,
                                             ccl_private float3 *dx,
@@ -311,7 +306,7 @@ ccl_device float3 triangle_attribute_float3(ccl_global const KernelGlobals *kg,
   }
 }
 
-ccl_device float4 triangle_attribute_float4(ccl_global const KernelGlobals *kg,
+ccl_device float4 triangle_attribute_float4(KernelGlobals kg,
                                             ccl_private const ShaderData *sd,
                                             const AttributeDescriptor desc,
                                             ccl_private float4 *dx,
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index f637206da19..fee629cc75a 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -26,7 +26,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline bool triangle_intersect(ccl_global const KernelGlobals *kg,
+ccl_device_inline bool triangle_intersect(KernelGlobals kg,
                                           ccl_private Intersection *isect,
                                           float3 P,
                                           float3 dir,
@@ -85,7 +85,7 @@ ccl_device_inline bool triangle_intersect(ccl_global const KernelGlobals *kg,
  */
 
 #ifdef __BVH_LOCAL__
-ccl_device_inline bool triangle_intersect_local(ccl_global const KernelGlobals *kg,
+ccl_device_inline bool triangle_intersect_local(KernelGlobals kg,
                                                 ccl_private LocalIntersection *local_isect,
                                                 float3 P,
                                                 float3 dir,
@@ -200,7 +200,7 @@ ccl_device_inline bool triangle_intersect_local(ccl_global const KernelGlobals *
  * http://www.cs.virginia.edu/~gfx/Courses/2003/ImageSynthesis/papers/Acceleration/Fast%20MinimumStorage%20RayTriangle%20Intersection.pdf
  */
 
-ccl_device_inline float3 triangle_refine(ccl_global const KernelGlobals *kg,
+ccl_device_inline float3 triangle_refine(KernelGlobals kg,
                                          ccl_private ShaderData *sd,
                                          float3 P,
                                          float3 D,
@@ -256,7 +256,7 @@ ccl_device_inline float3 triangle_refine(ccl_global const KernelGlobals *kg,
 /* Same as above, except that t is assumed to be in object space for
  * instancing.
  */
-ccl_device_inline float3 triangle_refine_local(ccl_global const KernelGlobals *kg,
+ccl_device_inline float3 triangle_refine_local(KernelGlobals kg,
                                                ccl_private ShaderData *sd,
                                                float3 P,
                                                float3 D,
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index c466c3fb07a..4e83ad6acb3 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -31,7 +31,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Return position normalized to 0..1 in mesh bounds */
 
-ccl_device_inline float3 volume_normalized_position(ccl_global const KernelGlobals *kg,
+ccl_device_inline float3 volume_normalized_position(KernelGlobals kg,
                                                     ccl_private const ShaderData *sd,
                                                     float3 P)
 {
@@ -70,7 +70,7 @@ ccl_device float3 volume_attribute_value_to_float3(const float4 value)
   }
 }
 
-ccl_device float4 volume_attribute_float4(ccl_global const KernelGlobals *kg,
+ccl_device float4 volume_attribute_float4(KernelGlobals kg,
                                           ccl_private const ShaderData *sd,
                                           const AttributeDescriptor desc)
 {
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_bake.h b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
index c822823de9c..df3c2103c5b 100644
--- a/intern/cycles/kernel/integrator/integrator_init_from_bake.h
+++ b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
@@ -43,7 +43,8 @@ ccl_device_inline float bake_clamp_mirror_repeat(float u, float max)
 /* Return false to indicate that this pixel is finished.
  * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
  * that the pixel did converge. */
-ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
+ccl_device bool integrator_init_from_bake(KernelGlobals kg,
+                                          IntegratorState state,
                                           ccl_global const KernelWorkTile *ccl_restrict tile,
                                           ccl_global float *render_buffer,
                                           const int x,
@@ -53,18 +54,18 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
   PROFILING_INIT(kg, PROFILING_RAY_SETUP);
 
   /* Initialize path state to give basic buffer access and allow early outputs. */
-  path_state_init(INTEGRATOR_STATE_PASS, tile, x, y);
+  path_state_init(state, tile, x, y);
 
   /* Check whether the pixel has converged and should not be sampled anymore. */
-  if (!kernel_need_sample_pixel(INTEGRATOR_STATE_PASS, render_buffer)) {
+  if (!kernel_need_sample_pixel(kg, state, render_buffer)) {
     return false;
   }
 
   /* Always count the sample, even if the camera sample will reject the ray. */
-  const int sample = kernel_accum_sample(INTEGRATOR_STATE_PASS, render_buffer, scheduled_sample);
+  const int sample = kernel_accum_sample(kg, state, render_buffer, scheduled_sample);
 
   /* Setup render buffers. */
-  const int index = INTEGRATOR_STATE(path, render_pixel_index);
+  const int index = INTEGRATOR_STATE(state, path, render_pixel_index);
   const int pass_stride = kernel_data.film.pass_stride;
   render_buffer += index * pass_stride;
 
@@ -91,7 +92,7 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
   }
 
   /* Initialize path state for path integration. */
-  path_state_init_integrator(INTEGRATOR_STATE_PASS, sample, rng_hash);
+  path_state_init_integrator(kg, state, sample, rng_hash);
 
   /* Barycentric UV with sub-pixel offset. */
   float u = primitive[2];
@@ -131,7 +132,7 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
     ray.time = 0.5f;
     ray.dP = differential_zero_compact();
     ray.dD = differential_zero_compact();
-    integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray);
+    integrator_state_write_ray(kg, state, &ray);
 
     /* Setup next kernel to execute. */
     INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
@@ -169,7 +170,7 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
     ray.dD = differential_zero_compact();
 
     /* Write ray. */
-    integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray);
+    integrator_state_write_ray(kg, state, &ray);
 
     /* Setup and write intersection. */
     Intersection isect ccl_optional_struct_init;
@@ -182,7 +183,7 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
 #ifdef __EMBREE__
     isect.Ng = Ng;
 #endif
-    integrator_state_write_isect(INTEGRATOR_STATE_PASS, &isect);
+    integrator_state_write_isect(kg, state, &isect);
 
     /* Setup next kernel to execute. */
     const int shader_index = shader & SHADER_MASK;
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_camera.h b/intern/cycles/kernel/integrator/integrator_init_from_camera.h
index 291f0f106f0..5bab6b2e2fd 100644
--- a/intern/cycles/kernel/integrator/integrator_init_from_camera.h
+++ b/intern/cycles/kernel/integrator/integrator_init_from_camera.h
@@ -25,7 +25,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline void integrate_camera_sample(ccl_global const KernelGlobals *ccl_restrict kg,
+ccl_device_inline void integrate_camera_sample(KernelGlobals kg,
                                                const int sample,
                                                const int x,
                                                const int y,
@@ -63,7 +63,8 @@ ccl_device_inline void integrate_camera_sample(ccl_global const KernelGlobals *c
 /* Return false to indicate that this pixel is finished.
  * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
  * that the pixel did converge. */
-ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS,
+ccl_device bool integrator_init_from_camera(KernelGlobals kg,
+                                            IntegratorState state,
                                             ccl_global const KernelWorkTile *ccl_restrict tile,
                                             ccl_global float *render_buffer,
                                             const int x,
@@ -73,10 +74,10 @@ ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS,
   PROFILING_INIT(kg, PROFILING_RAY_SETUP);
 
   /* Initialize path state to give basic buffer access and allow early outputs. */
-  path_state_init(INTEGRATOR_STATE_PASS, tile, x, y);
+  path_state_init(state, tile, x, y);
 
   /* Check whether the pixel has converged and should not be sampled anymore. */
-  if (!kernel_need_sample_pixel(INTEGRATOR_STATE_PASS, render_buffer)) {
+  if (!kernel_need_sample_pixel(kg, state, render_buffer)) {
     return false;
   }
 
@@ -85,7 +86,7 @@ ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS,
    * This logic allows to both count actual number of samples per pixel, and to add samples to this
    * pixel after it was converged and samples were added somewhere else (in which case the
    * `scheduled_sample` will be different from actual number of samples in this pixel). */
-  const int sample = kernel_accum_sample(INTEGRATOR_STATE_PASS, render_buffer, scheduled_sample);
+  const int sample = kernel_accum_sample(kg, state, render_buffer, scheduled_sample);
 
   /* Initialize random number seed for path. */
   const uint rng_hash = path_rng_hash_init(kg, sample, x, y);
@@ -99,11 +100,11 @@ ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS,
     }
 
     /* Write camera ray to state. */
-    integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray);
+    integrator_state_write_ray(kg, state, &ray);
   }
 
   /* Initialize path state for path integration. */
-  path_state_init_integrator(INTEGRATOR_STATE_PASS, sample, rng_hash);
+  path_state_init_integrator(kg, state, sample, rng_hash);
 
   /* Continue with intersect_closest kernel, optionally initializing volume
    * stack before that if the camera may be inside a volume. */
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_closest.h b/intern/cycles/kernel/integrator/integrator_intersect_closest.h
index 760c08159e3..e915d984e1d 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_closest.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_closest.h
@@ -29,7 +29,8 @@
 CCL_NAMESPACE_BEGIN
 
 template<uint32_t current_kernel>
-ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS,
+ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg,
+                                                           IntegratorState state,
                                                            const int shader_flags)
 {
 
@@ -37,12 +38,12 @@ ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS
    * We continue evaluating emissive/transparent surfaces and volumes, similar
    * to direct lighting. Only if we know there are none can we terminate the
    * path immediately. */
-  if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) {
+  if (path_state_ao_bounce(kg, state)) {
     if (shader_flags & (SD_HAS_TRANSPARENT_SHADOW | SD_HAS_EMISSION)) {
-      INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+      INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
     }
-    else if (!integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) {
-      INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_AFTER_VOLUME;
+    else if (!integrator_state_volume_stack_is_empty(kg, state)) {
+      INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_AFTER_VOLUME;
     }
     else {
       return true;
@@ -51,14 +52,14 @@ ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS
 
   /* Load random number state. */
   RNGState rng_state;
-  path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+  path_state_rng_load(state, &rng_state);
 
   /* We perform path termination in this kernel to avoid launching shade_surface
    * and evaluating the shader when not needed. Only for emission and transparent
    * surfaces in front of emission do we need to evaluate the shader, since we
    * perform MIS as part of indirect rays. */
-  const int path_flag = INTEGRATOR_STATE(path, flag);
-  const float probability = path_state_continuation_probability(INTEGRATOR_STATE_PASS, path_flag);
+  const int path_flag = INTEGRATOR_STATE(state, path, flag);
+  const float probability = path_state_continuation_probability(kg, state, path_flag);
 
   if (probability != 1.0f) {
     const float terminate = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE);
@@ -66,11 +67,11 @@ ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS
     if (probability == 0.0f || terminate >= probability) {
       if (shader_flags & SD_HAS_EMISSION) {
         /* Mark path to be terminated right after shader evaluation on the surface. */
-        INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_ON_NEXT_SURFACE;
+        INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_ON_NEXT_SURFACE;
       }
-      else if (!integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) {
+      else if (!integrator_state_volume_stack_is_empty(kg, state)) {
         /* TODO: only do this for emissive volumes. */
-        INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_IN_NEXT_VOLUME;
+        INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_IN_NEXT_VOLUME;
       }
       else {
         return true;
@@ -85,7 +86,8 @@ ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS
  * leads to poor performance with CUDA atomics. */
 template<uint32_t current_kernel>
 ccl_device_forceinline void integrator_intersect_shader_next_kernel(
-    INTEGRATOR_STATE_ARGS,
+    KernelGlobals kg,
+    IntegratorState state,
     ccl_private const Intersection *ccl_restrict isect,
     const int shader,
     const int shader_flags)
@@ -122,9 +124,9 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel(
 
 #ifdef __SHADOW_CATCHER__
   const int object_flags = intersection_get_object_flags(kg, isect);
-  if (kernel_shadow_catcher_split(INTEGRATOR_STATE_PASS, object_flags)) {
+  if (kernel_shadow_catcher_split(kg, state, object_flags)) {
     if (kernel_data.film.pass_background != PASS_UNUSED && !kernel_data.background.transparent) {
-      INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
+      INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
 
       INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
     }
@@ -137,7 +139,7 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel(
 
     /* If the split happened after bounce through a transparent object it's possible to have shadow
      * patch. Make sure it is properly re-scheduled on the split path. */
-    const int shadow_kernel = INTEGRATOR_STATE(shadow_path, queued_kernel);
+    const int shadow_kernel = INTEGRATOR_STATE(state, shadow_path, queued_kernel);
     if (shadow_kernel != 0) {
       INTEGRATOR_SHADOW_PATH_INIT(shadow_kernel);
     }
@@ -145,21 +147,21 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel(
 #endif
 }
 
-ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS)
+ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState state)
 {
   PROFILING_INIT(kg, PROFILING_INTERSECT_CLOSEST);
 
   /* Read ray from integrator state into local memory. */
   Ray ray ccl_optional_struct_init;
-  integrator_state_read_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_read_ray(kg, state, &ray);
   kernel_assert(ray.t != 0.0f);
 
-  const uint visibility = path_state_ray_visibility(INTEGRATOR_STATE_PASS);
-  const int last_isect_prim = INTEGRATOR_STATE(isect, prim);
-  const int last_isect_object = INTEGRATOR_STATE(isect, object);
+  const uint visibility = path_state_ray_visibility(state);
+  const int last_isect_prim = INTEGRATOR_STATE(state, isect, prim);
+  const int last_isect_object = INTEGRATOR_STATE(state, isect, object);
 
   /* Trick to use short AO rays to approximate indirect light at the end of the path. */
-  if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) {
+  if (path_state_ao_bounce(kg, state)) {
     ray.t = kernel_data.integrator.ao_bounces_distance;
 
     const float object_ao_distance = kernel_tex_fetch(__objects, last_isect_object).ao_distance;
@@ -181,8 +183,8 @@ ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS)
   if (kernel_data.integrator.use_lamp_mis) {
     /* NOTE: if we make lights visible to camera rays, we'll need to initialize
      * these in the path_state_init. */
-    const int last_type = INTEGRATOR_STATE(isect, type);
-    const int path_flag = INTEGRATOR_STATE(path, flag);
+    const int last_type = INTEGRATOR_STATE(state, isect, type);
+    const int path_flag = INTEGRATOR_STATE(state, path, flag);
 
     hit = lights_intersect(
               kg, &ray, &isect, last_isect_prim, last_isect_object, last_type, path_flag) ||
@@ -190,16 +192,16 @@ ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS)
   }
 
   /* Write intersection result into global integrator state memory. */
-  integrator_state_write_isect(INTEGRATOR_STATE_PASS, &isect);
+  integrator_state_write_isect(kg, state, &isect);
 
 #ifdef __VOLUME__
-  if (!integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) {
+  if (!integrator_state_volume_stack_is_empty(kg, state)) {
     const bool hit_surface = hit && !(isect.type & PRIMITIVE_LAMP);
     const int shader = (hit_surface) ? intersection_get_shader(kg, &isect) : SHADER_NONE;
     const int flags = (hit_surface) ? kernel_tex_fetch(__shaders, shader).flags : 0;
 
     if (!integrator_intersect_terminate<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
-            INTEGRATOR_STATE_PASS, flags)) {
+            kg, state, flags)) {
       /* Continue with volume kernel if we are inside a volume, regardless
        * if we hit anything. */
       INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
@@ -225,9 +227,9 @@ ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS)
       const int flags = kernel_tex_fetch(__shaders, shader).flags;
 
       if (!integrator_intersect_terminate<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
-              INTEGRATOR_STATE_PASS, flags)) {
+              kg, state, flags)) {
         integrator_intersect_shader_next_kernel<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
-            INTEGRATOR_STATE_PASS, &isect, shader, flags);
+            kg, state, &isect, shader, flags);
         return;
       }
       else {
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
index 3ebd21e4651..06f58f88bc8 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
@@ -19,19 +19,21 @@
 CCL_NAMESPACE_BEGIN
 
 /* Visibility for the shadow ray. */
-ccl_device_forceinline uint integrate_intersect_shadow_visibility(INTEGRATOR_STATE_CONST_ARGS)
+ccl_device_forceinline uint integrate_intersect_shadow_visibility(KernelGlobals kg,
+                                                                  ConstIntegratorState state)
 {
   uint visibility = PATH_RAY_SHADOW;
 
 #ifdef __SHADOW_CATCHER__
-  const uint32_t path_flag = INTEGRATOR_STATE(shadow_path, flag);
+  const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag);
   visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, visibility);
 #endif
 
   return visibility;
 }
 
-ccl_device bool integrate_intersect_shadow_opaque(INTEGRATOR_STATE_ARGS,
+ccl_device bool integrate_intersect_shadow_opaque(KernelGlobals kg,
+                                                  IntegratorState state,
                                                   ccl_private const Ray *ray,
                                                   const uint visibility)
 {
@@ -46,22 +48,24 @@ ccl_device bool integrate_intersect_shadow_opaque(INTEGRATOR_STATE_ARGS,
   const bool opaque_hit = scene_intersect(kg, ray, visibility & opaque_mask, &isect);
 
   if (!opaque_hit) {
-    INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = 0;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = 0;
   }
 
   return opaque_hit;
 }
 
-ccl_device_forceinline int integrate_shadow_max_transparent_hits(INTEGRATOR_STATE_CONST_ARGS)
+ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals kg,
+                                                                 ConstIntegratorState state)
 {
   const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
-  const int transparent_bounce = INTEGRATOR_STATE(shadow_path, transparent_bounce);
+  const int transparent_bounce = INTEGRATOR_STATE(state, shadow_path, transparent_bounce);
 
   return max(transparent_max_bounce - transparent_bounce - 1, 0);
 }
 
 #ifdef __TRANSPARENT_SHADOWS__
-ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS,
+ccl_device bool integrate_intersect_shadow_transparent(KernelGlobals kg,
+                                                       IntegratorState state,
                                                        ccl_private const Ray *ray,
                                                        const uint visibility)
 {
@@ -69,7 +73,7 @@ ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS,
 
   /* Limit the number hits to the max transparent bounces allowed and the size that we
    * have available in the integrator state. */
-  const uint max_transparent_hits = integrate_shadow_max_transparent_hits(INTEGRATOR_STATE_PASS);
+  const uint max_transparent_hits = integrate_shadow_max_transparent_hits(kg, state);
   const uint max_hits = min(max_transparent_hits, (uint)INTEGRATOR_SHADOW_ISECT_SIZE);
   uint num_hits = 0;
   bool opaque_hit = scene_intersect_shadow_all(kg, ray, isect, visibility, max_hits, &num_hits);
@@ -88,41 +92,39 @@ ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS,
       /* Write intersection result into global integrator state memory.
        * More efficient may be to do this directly from the intersection kernel. */
       for (int hit = 0; hit < num_recorded_hits; hit++) {
-        integrator_state_write_shadow_isect(INTEGRATOR_STATE_PASS, &isect[hit], hit);
+        integrator_state_write_shadow_isect(state, &isect[hit], hit);
       }
     }
 
-    INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = num_hits;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = num_hits;
   }
   else {
-    INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = 0;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = 0;
   }
 
   return opaque_hit;
 }
 #endif
 
-ccl_device void integrator_intersect_shadow(INTEGRATOR_STATE_ARGS)
+ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorState state)
 {
   PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW);
 
   /* Read ray from integrator state into local memory. */
   Ray ray ccl_optional_struct_init;
-  integrator_state_read_shadow_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_read_shadow_ray(kg, state, &ray);
 
   /* Compute visibility. */
-  const uint visibility = integrate_intersect_shadow_visibility(INTEGRATOR_STATE_PASS);
+  const uint visibility = integrate_intersect_shadow_visibility(kg, state);
 
 #ifdef __TRANSPARENT_SHADOWS__
   /* TODO: compile different kernels depending on this? Especially for OptiX
    * conditional trace calls are bad. */
-  const bool opaque_hit =
-      (kernel_data.integrator.transparent_shadows) ?
-          integrate_intersect_shadow_transparent(INTEGRATOR_STATE_PASS, &ray, visibility) :
-          integrate_intersect_shadow_opaque(INTEGRATOR_STATE_PASS, &ray, visibility);
+  const bool opaque_hit = (kernel_data.integrator.transparent_shadows) ?
+                              integrate_intersect_shadow_transparent(kg, state, &ray, visibility) :
+                              integrate_intersect_shadow_opaque(kg, state, &ray, visibility);
 #else
-  const bool opaque_hit = integrate_intersect_shadow_opaque(
-      INTEGRATOR_STATE_PASS, &ray, visibility);
+  const bool opaque_hit = integrate_intersect_shadow_opaque(kg, state, &ray, visibility);
 #endif
 
   if (opaque_hit) {
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h b/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h
index 7c090952dc7..b575e7fd1e6 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h
@@ -20,12 +20,12 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void integrator_intersect_subsurface(INTEGRATOR_STATE_ARGS)
+ccl_device void integrator_intersect_subsurface(KernelGlobals kg, IntegratorState state)
 {
   PROFILING_INIT(kg, PROFILING_INTERSECT_SUBSURFACE);
 
 #ifdef __SUBSURFACE__
-  if (subsurface_scatter(INTEGRATOR_STATE_PASS)) {
+  if (subsurface_scatter(kg, state)) {
     return;
   }
 #endif
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
index 192e9c6ab43..7def3e2f3f3 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
@@ -23,7 +23,8 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_ARGS,
+ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
+                                                              IntegratorState state,
                                                               const float3 from_P,
                                                               const float3 to_P)
 {
@@ -52,7 +53,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_A
 
     for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
       shader_setup_from_ray(kg, stack_sd, &volume_ray, isect);
-      volume_stack_enter_exit(INTEGRATOR_STATE_PASS, stack_sd);
+      volume_stack_enter_exit(kg, state, stack_sd);
     }
   }
 #else
@@ -61,7 +62,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_A
   while (step < 2 * volume_stack_size &&
          scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) {
     shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect);
-    volume_stack_enter_exit(INTEGRATOR_STATE_PASS, stack_sd);
+    volume_stack_enter_exit(kg, state, stack_sd);
 
     /* Move ray forward. */
     volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
@@ -73,7 +74,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_A
 #endif
 }
 
-ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
+ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorState state)
 {
   PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK);
 
@@ -81,16 +82,16 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
   ccl_private ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
 
   Ray volume_ray ccl_optional_struct_init;
-  integrator_state_read_ray(INTEGRATOR_STATE_PASS, &volume_ray);
+  integrator_state_read_ray(kg, state, &volume_ray);
   volume_ray.t = FLT_MAX;
 
-  const uint visibility = (INTEGRATOR_STATE(path, flag) & PATH_RAY_ALL_VISIBILITY);
+  const uint visibility = (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_ALL_VISIBILITY);
   int stack_index = 0, enclosed_index = 0;
 
   /* Write background shader. */
   if (kernel_data.background.volume_shader != SHADER_NONE) {
     const VolumeStack new_entry = {OBJECT_NONE, kernel_data.background.volume_shader};
-    integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry);
+    integrator_state_write_volume_stack(state, stack_index, new_entry);
     stack_index++;
   }
 
@@ -121,7 +122,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
         }
         for (int i = 0; i < stack_index && need_add; ++i) {
           /* Don't add intersections twice. */
-          VolumeStack entry = integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
+          VolumeStack entry = integrator_state_read_volume_stack(state, i);
           if (entry.object == stack_sd->object) {
             need_add = false;
             break;
@@ -129,7 +130,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
         }
         if (need_add && stack_index < volume_stack_size - 1) {
           const VolumeStack new_entry = {stack_sd->object, stack_sd->shader};
-          integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry);
+          integrator_state_write_volume_stack(state, stack_index, new_entry);
           ++stack_index;
         }
       }
@@ -169,7 +170,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
       }
       for (int i = 0; i < stack_index && need_add; ++i) {
         /* Don't add intersections twice. */
-        VolumeStack entry = integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
+        VolumeStack entry = integrator_state_read_volume_stack(state, i);
         if (entry.object == stack_sd->object) {
           need_add = false;
           break;
@@ -177,7 +178,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
       }
       if (need_add) {
         const VolumeStack new_entry = {stack_sd->object, stack_sd->shader};
-        integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry);
+        integrator_state_write_volume_stack(state, stack_index, new_entry);
         ++stack_index;
       }
     }
@@ -196,7 +197,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
 
   /* Write terminator. */
   const VolumeStack new_entry = {OBJECT_NONE, SHADER_NONE};
-  integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry);
+  integrator_state_write_volume_stack(state, stack_index, new_entry);
 
   INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
                        DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
diff --git a/intern/cycles/kernel/integrator/integrator_megakernel.h b/intern/cycles/kernel/integrator/integrator_megakernel.h
index 91363ea1c7f..a3b2b1f9e90 100644
--- a/intern/cycles/kernel/integrator/integrator_megakernel.h
+++ b/intern/cycles/kernel/integrator/integrator_megakernel.h
@@ -29,7 +29,8 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void integrator_megakernel(INTEGRATOR_STATE_ARGS,
+ccl_device void integrator_megakernel(KernelGlobals kg,
+                                      IntegratorState state,
                                       ccl_global float *ccl_restrict render_buffer)
 {
   /* Each kernel indicates the next kernel to execute, so here we simply
@@ -38,46 +39,46 @@ ccl_device void integrator_megakernel(INTEGRATOR_STATE_ARGS,
    * TODO: investigate if we can use device side enqueue for GPUs to avoid
    * having to compile this big kernel. */
   while (true) {
-    if (INTEGRATOR_STATE(shadow_path, queued_kernel)) {
+    if (INTEGRATOR_STATE(state, shadow_path, queued_kernel)) {
       /* First handle any shadow paths before we potentially create more shadow paths. */
-      switch (INTEGRATOR_STATE(shadow_path, queued_kernel)) {
+      switch (INTEGRATOR_STATE(state, shadow_path, queued_kernel)) {
         case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
-          integrator_intersect_shadow(INTEGRATOR_STATE_PASS);
+          integrator_intersect_shadow(kg, state);
           break;
         case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
-          integrator_shade_shadow(INTEGRATOR_STATE_PASS, render_buffer);
+          integrator_shade_shadow(kg, state, render_buffer);
           break;
         default:
           kernel_assert(0);
           break;
       }
     }
-    else if (INTEGRATOR_STATE(path, queued_kernel)) {
+    else if (INTEGRATOR_STATE(state, path, queued_kernel)) {
       /* Then handle regular path kernels. */
-      switch (INTEGRATOR_STATE(path, queued_kernel)) {
+      switch (INTEGRATOR_STATE(state, path, queued_kernel)) {
         case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
-          integrator_intersect_closest(INTEGRATOR_STATE_PASS);
+          integrator_intersect_closest(kg, state);
           break;
         case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
-          integrator_shade_background(INTEGRATOR_STATE_PASS, render_buffer);
+          integrator_shade_background(kg, state, render_buffer);
           break;
         case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
-          integrator_shade_surface(INTEGRATOR_STATE_PASS, render_buffer);
+          integrator_shade_surface(kg, state, render_buffer);
           break;
         case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
-          integrator_shade_volume(INTEGRATOR_STATE_PASS, render_buffer);
+          integrator_shade_volume(kg, state, render_buffer);
           break;
         case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
-          integrator_shade_surface_raytrace(INTEGRATOR_STATE_PASS, render_buffer);
+          integrator_shade_surface_raytrace(kg, state, render_buffer);
           break;
         case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
-          integrator_shade_light(INTEGRATOR_STATE_PASS, render_buffer);
+          integrator_shade_light(kg, state, render_buffer);
           break;
         case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
-          integrator_intersect_subsurface(INTEGRATOR_STATE_PASS);
+          integrator_intersect_subsurface(kg, state);
           break;
         case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
-          integrator_intersect_volume_stack(INTEGRATOR_STATE_PASS);
+          integrator_intersect_volume_stack(kg, state);
           break;
         default:
           kernel_assert(0);
diff --git a/intern/cycles/kernel/integrator/integrator_shade_background.h b/intern/cycles/kernel/integrator/integrator_shade_background.h
index a898f3fb2fc..d98e53e6bbf 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_background.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_background.h
@@ -23,12 +23,13 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS,
+ccl_device float3 integrator_eval_background_shader(KernelGlobals kg,
+                                                    IntegratorState state,
                                                     ccl_global float *ccl_restrict render_buffer)
 {
 #ifdef __BACKGROUND__
   const int shader = kernel_data.background.surface_shader;
-  const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
 
   /* Use visibility flag to skip lights. */
   if (shader & SHADER_EXCLUDE_ANY) {
@@ -54,14 +55,14 @@ ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS,
     PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP);
     shader_setup_from_background(kg,
                                  emission_sd,
-                                 INTEGRATOR_STATE(ray, P),
-                                 INTEGRATOR_STATE(ray, D),
-                                 INTEGRATOR_STATE(ray, time));
+                                 INTEGRATOR_STATE(state, ray, P),
+                                 INTEGRATOR_STATE(state, ray, D),
+                                 INTEGRATOR_STATE(state, ray, time));
 
     PROFILING_SHADER(emission_sd->object, emission_sd->shader);
     PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL);
     shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>(
-        INTEGRATOR_STATE_PASS, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION);
+        kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION);
 
     L = shader_background_eval(emission_sd);
   }
@@ -69,11 +70,12 @@ ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS,
   /* Background MIS weights. */
 #  ifdef __BACKGROUND_MIS__
   /* Check if background light exists or if we should skip pdf. */
-  if (!(INTEGRATOR_STATE(path, flag) & PATH_RAY_MIS_SKIP) && kernel_data.background.use_mis) {
-    const float3 ray_P = INTEGRATOR_STATE(ray, P);
-    const float3 ray_D = INTEGRATOR_STATE(ray, D);
-    const float mis_ray_pdf = INTEGRATOR_STATE(path, mis_ray_pdf);
-    const float mis_ray_t = INTEGRATOR_STATE(path, mis_ray_t);
+  if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) &&
+      kernel_data.background.use_mis) {
+    const float3 ray_P = INTEGRATOR_STATE(state, ray, P);
+    const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
+    const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
+    const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
 
     /* multiple importance sampling, get background light pdf for ray
      * direction, and compute weight with respect to BSDF pdf */
@@ -90,7 +92,8 @@ ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS,
 #endif
 }
 
-ccl_device_inline void integrate_background(INTEGRATOR_STATE_ARGS,
+ccl_device_inline void integrate_background(KernelGlobals kg,
+                                            IntegratorState state,
                                             ccl_global float *ccl_restrict render_buffer)
 {
   /* Accumulate transparency for transparent background. We can skip background
@@ -99,11 +102,11 @@ ccl_device_inline void integrate_background(INTEGRATOR_STATE_ARGS,
   float transparent = 0.0f;
 
   const bool is_transparent_background_ray = kernel_data.background.transparent &&
-                                             (INTEGRATOR_STATE(path, flag) &
+                                             (INTEGRATOR_STATE(state, path, flag) &
                                               PATH_RAY_TRANSPARENT_BACKGROUND);
 
   if (is_transparent_background_ray) {
-    transparent = average(INTEGRATOR_STATE(path, throughput));
+    transparent = average(INTEGRATOR_STATE(state, path, throughput));
 
 #ifdef __PASSES__
     eval_background = (kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND));
@@ -113,32 +116,31 @@ ccl_device_inline void integrate_background(INTEGRATOR_STATE_ARGS,
   }
 
   /* Evaluate background shader. */
-  float3 L = (eval_background) ?
-                 integrator_eval_background_shader(INTEGRATOR_STATE_PASS, render_buffer) :
-                 zero_float3();
+  float3 L = (eval_background) ? integrator_eval_background_shader(kg, state, render_buffer) :
+                                 zero_float3();
 
   /* When using the ao bounces approximation, adjust background
    * shader intensity with ao factor. */
-  if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) {
+  if (path_state_ao_bounce(kg, state)) {
     L *= kernel_data.integrator.ao_bounces_factor;
   }
 
   /* Write to render buffer. */
-  kernel_accum_background(
-      INTEGRATOR_STATE_PASS, L, transparent, is_transparent_background_ray, render_buffer);
+  kernel_accum_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer);
 }
 
-ccl_device_inline void integrate_distant_lights(INTEGRATOR_STATE_ARGS,
+ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
+                                                IntegratorState state,
                                                 ccl_global float *ccl_restrict render_buffer)
 {
-  const float3 ray_D = INTEGRATOR_STATE(ray, D);
-  const float ray_time = INTEGRATOR_STATE(ray, time);
+  const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
+  const float ray_time = INTEGRATOR_STATE(state, ray, time);
   LightSample ls ccl_optional_struct_init;
   for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) {
     if (light_sample_from_distant_ray(kg, ray_D, lamp, &ls)) {
       /* Use visibility flag to skip lights. */
 #ifdef __PASSES__
-      const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+      const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
 
       if (ls.shader & SHADER_EXCLUDE_ANY) {
         if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) ||
@@ -156,8 +158,7 @@ ccl_device_inline void integrate_distant_lights(INTEGRATOR_STATE_ARGS,
       /* TODO: does aliasing like this break automatic SoA in CUDA? */
       ShaderDataTinyStorage emission_sd_storage;
       ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-      float3 light_eval = light_sample_shader_eval(
-          INTEGRATOR_STATE_PASS, emission_sd, &ls, ray_time);
+      float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
       if (is_zero(light_eval)) {
         return;
       }
@@ -166,33 +167,34 @@ ccl_device_inline void integrate_distant_lights(INTEGRATOR_STATE_ARGS,
       if (!(path_flag & PATH_RAY_MIS_SKIP)) {
         /* multiple importance sampling, get regular light pdf,
          * and compute weight with respect to BSDF pdf */
-        const float mis_ray_pdf = INTEGRATOR_STATE(path, mis_ray_pdf);
+        const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
         const float mis_weight = power_heuristic(mis_ray_pdf, ls.pdf);
         light_eval *= mis_weight;
       }
 
       /* Write to render buffer. */
-      const float3 throughput = INTEGRATOR_STATE(path, throughput);
-      kernel_accum_emission(INTEGRATOR_STATE_PASS, throughput, light_eval, render_buffer);
+      const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+      kernel_accum_emission(kg, state, throughput, light_eval, render_buffer);
     }
   }
 }
 
-ccl_device void integrator_shade_background(INTEGRATOR_STATE_ARGS,
+ccl_device void integrator_shade_background(KernelGlobals kg,
+                                            IntegratorState state,
                                             ccl_global float *ccl_restrict render_buffer)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_LIGHT_SETUP);
 
   /* TODO: unify these in a single loop to only have a single shader evaluation call. */
-  integrate_distant_lights(INTEGRATOR_STATE_PASS, render_buffer);
-  integrate_background(INTEGRATOR_STATE_PASS, render_buffer);
+  integrate_distant_lights(kg, state, render_buffer);
+  integrate_background(kg, state, render_buffer);
 
 #ifdef __SHADOW_CATCHER__
-  if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) {
-    INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_SHADOW_CATCHER_BACKGROUND;
+  if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) {
+    INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_SHADOW_CATCHER_BACKGROUND;
 
-    const int isect_prim = INTEGRATOR_STATE(isect, prim);
-    const int isect_type = INTEGRATOR_STATE(isect, type);
+    const int isect_prim = INTEGRATOR_STATE(state, isect, prim);
+    const int isect_type = INTEGRATOR_STATE(state, isect, type);
     const int shader = intersection_get_shader_from_isect_prim(kg, isect_prim, isect_type);
     const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
 
diff --git a/intern/cycles/kernel/integrator/integrator_shade_light.h b/intern/cycles/kernel/integrator/integrator_shade_light.h
index d8f8da63023..4f0f5a39756 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_light.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_light.h
@@ -23,29 +23,30 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS,
+ccl_device_inline void integrate_light(KernelGlobals kg,
+                                       IntegratorState state,
                                        ccl_global float *ccl_restrict render_buffer)
 {
   /* Setup light sample. */
   Intersection isect ccl_optional_struct_init;
-  integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect);
+  integrator_state_read_isect(kg, state, &isect);
 
-  float3 ray_P = INTEGRATOR_STATE(ray, P);
-  const float3 ray_D = INTEGRATOR_STATE(ray, D);
-  const float ray_time = INTEGRATOR_STATE(ray, time);
+  float3 ray_P = INTEGRATOR_STATE(state, ray, P);
+  const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
+  const float ray_time = INTEGRATOR_STATE(state, ray, time);
 
   /* Advance ray beyond light. */
   /* TODO: can we make this more numerically robust to avoid reintersecting the
    * same light in some cases? */
   const float3 new_ray_P = ray_offset(ray_P + ray_D * isect.t, ray_D);
-  INTEGRATOR_STATE_WRITE(ray, P) = new_ray_P;
-  INTEGRATOR_STATE_WRITE(ray, t) -= isect.t;
+  INTEGRATOR_STATE_WRITE(state, ray, P) = new_ray_P;
+  INTEGRATOR_STATE_WRITE(state, ray, t) -= isect.t;
 
   /* Set position to where the BSDF was sampled, for correct MIS PDF. */
-  const float mis_ray_t = INTEGRATOR_STATE(path, mis_ray_t);
+  const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
   ray_P -= ray_D * mis_ray_t;
   isect.t += mis_ray_t;
-  INTEGRATOR_STATE_WRITE(path, mis_ray_t) = mis_ray_t + isect.t;
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = mis_ray_t + isect.t;
 
   LightSample ls ccl_optional_struct_init;
   const bool use_light_sample = light_sample_from_intersection(kg, &isect, ray_P, ray_D, &ls);
@@ -56,7 +57,7 @@ ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS,
 
   /* Use visibility flag to skip lights. */
 #ifdef __PASSES__
-  const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
 
   if (ls.shader & SHADER_EXCLUDE_ANY) {
     if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) ||
@@ -73,7 +74,7 @@ ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS,
   /* TODO: does aliasing like this break automatic SoA in CUDA? */
   ShaderDataTinyStorage emission_sd_storage;
   ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-  float3 light_eval = light_sample_shader_eval(INTEGRATOR_STATE_PASS, emission_sd, &ls, ray_time);
+  float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
   if (is_zero(light_eval)) {
     return;
   }
@@ -82,22 +83,23 @@ ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS,
   if (!(path_flag & PATH_RAY_MIS_SKIP)) {
     /* multiple importance sampling, get regular light pdf,
      * and compute weight with respect to BSDF pdf */
-    const float mis_ray_pdf = INTEGRATOR_STATE(path, mis_ray_pdf);
+    const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
     const float mis_weight = power_heuristic(mis_ray_pdf, ls.pdf);
     light_eval *= mis_weight;
   }
 
   /* Write to render buffer. */
-  const float3 throughput = INTEGRATOR_STATE(path, throughput);
-  kernel_accum_emission(INTEGRATOR_STATE_PASS, throughput, light_eval, render_buffer);
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+  kernel_accum_emission(kg, state, throughput, light_eval, render_buffer);
 }
 
-ccl_device void integrator_shade_light(INTEGRATOR_STATE_ARGS,
+ccl_device void integrator_shade_light(KernelGlobals kg,
+                                       IntegratorState state,
                                        ccl_global float *ccl_restrict render_buffer)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_LIGHT_SETUP);
 
-  integrate_light(INTEGRATOR_STATE_PASS, render_buffer);
+  integrate_light(kg, state, render_buffer);
 
   /* TODO: we could get stuck in an infinite loop if there are precision issues
    * and the same light is hit again.
@@ -105,8 +107,8 @@ ccl_device void integrator_shade_light(INTEGRATOR_STATE_ARGS,
    * As a workaround count this as a transparent bounce. It makes some sense
    * to interpret lights as transparent surfaces (and support making them opaque),
    * but this needs to be revisited. */
-  uint32_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce) + 1;
-  INTEGRATOR_STATE_WRITE(path, transparent_bounce) = transparent_bounce;
+  uint32_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce) + 1;
+  INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce;
 
   if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
     INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
diff --git a/intern/cycles/kernel/integrator/integrator_shade_shadow.h b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
index 3857b522b25..cdbe85f6b8c 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_shadow.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
@@ -29,7 +29,9 @@ ccl_device_inline bool shadow_intersections_has_remaining(const int num_hits)
 }
 
 #ifdef __TRANSPARENT_SHADOWS__
-ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_ARGS, const int hit)
+ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg,
+                                                              IntegratorState state,
+                                                              const int hit)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SURFACE);
 
@@ -43,22 +45,22 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_A
 
   /* Setup shader data at surface. */
   Intersection isect ccl_optional_struct_init;
-  integrator_state_read_shadow_isect(INTEGRATOR_STATE_PASS, &isect, hit);
+  integrator_state_read_shadow_isect(state, &isect, hit);
 
   Ray ray ccl_optional_struct_init;
-  integrator_state_read_shadow_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_read_shadow_ray(kg, state, &ray);
 
   shader_setup_from_ray(kg, shadow_sd, &ray, &isect);
 
   /* Evaluate shader. */
   if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
     shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
-        INTEGRATOR_STATE_PASS, shadow_sd, NULL, PATH_RAY_SHADOW);
+        kg, state, shadow_sd, NULL, PATH_RAY_SHADOW);
   }
 
 #  ifdef __VOLUME__
   /* Exit/enter volume. */
-  shadow_volume_stack_enter_exit(INTEGRATOR_STATE_PASS, shadow_sd);
+  shadow_volume_stack_enter_exit(kg, state, shadow_sd);
 #  endif
 
   /* Compute transparency from closures. */
@@ -66,7 +68,8 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_A
 }
 
 #  ifdef __VOLUME__
-ccl_device_inline void integrate_transparent_volume_shadow(INTEGRATOR_STATE_ARGS,
+ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg,
+                                                           IntegratorState state,
                                                            const int hit,
                                                            const int num_recorded_hits,
                                                            ccl_private float3 *ccl_restrict
@@ -80,26 +83,29 @@ ccl_device_inline void integrate_transparent_volume_shadow(INTEGRATOR_STATE_ARGS
 
   /* Setup shader data. */
   Ray ray ccl_optional_struct_init;
-  integrator_state_read_shadow_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_read_shadow_ray(kg, state, &ray);
 
   /* Modify ray position and length to match current segment. */
-  const float start_t = (hit == 0) ? 0.0f : INTEGRATOR_STATE_ARRAY(shadow_isect, hit - 1, t);
-  const float end_t = (hit < num_recorded_hits) ? INTEGRATOR_STATE_ARRAY(shadow_isect, hit, t) :
-                                                  ray.t;
+  const float start_t = (hit == 0) ? 0.0f :
+                                     INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t);
+  const float end_t = (hit < num_recorded_hits) ?
+                          INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) :
+                          ray.t;
   ray.P += start_t * ray.D;
   ray.t = end_t - start_t;
 
   shader_setup_from_volume(kg, shadow_sd, &ray);
 
-  const float step_size = volume_stack_step_size(INTEGRATOR_STATE_PASS, [=](const int i) {
-    return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i);
-  });
+  const float step_size = volume_stack_step_size(
+      kg, state, [=](const int i) { return integrator_state_read_shadow_volume_stack(state, i); });
 
-  volume_shadow_heterogeneous(INTEGRATOR_STATE_PASS, &ray, shadow_sd, throughput, step_size);
+  volume_shadow_heterogeneous(kg, state, &ray, shadow_sd, throughput, step_size);
 }
 #  endif
 
-ccl_device_inline bool integrate_transparent_shadow(INTEGRATOR_STATE_ARGS, const int num_hits)
+ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg,
+                                                    IntegratorState state,
+                                                    const int num_hits)
 {
   /* Accumulate shadow for transparent surfaces. */
   const int num_recorded_hits = min(num_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
@@ -108,29 +114,28 @@ ccl_device_inline bool integrate_transparent_shadow(INTEGRATOR_STATE_ARGS, const
     /* Volume shaders. */
     if (hit < num_recorded_hits || !shadow_intersections_has_remaining(num_hits)) {
 #  ifdef __VOLUME__
-      if (!integrator_state_shadow_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) {
-        float3 throughput = INTEGRATOR_STATE(shadow_path, throughput);
-        integrate_transparent_volume_shadow(
-            INTEGRATOR_STATE_PASS, hit, num_recorded_hits, &throughput);
+      if (!integrator_state_shadow_volume_stack_is_empty(kg, state)) {
+        float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
+        integrate_transparent_volume_shadow(kg, state, hit, num_recorded_hits, &throughput);
         if (is_zero(throughput)) {
           return true;
         }
 
-        INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput;
+        INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput;
       }
 #  endif
     }
 
     /* Surface shaders. */
     if (hit < num_recorded_hits) {
-      const float3 shadow = integrate_transparent_surface_shadow(INTEGRATOR_STATE_PASS, hit);
-      const float3 throughput = INTEGRATOR_STATE(shadow_path, throughput) * shadow;
+      const float3 shadow = integrate_transparent_surface_shadow(kg, state, hit);
+      const float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput) * shadow;
       if (is_zero(throughput)) {
         return true;
       }
 
-      INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput;
-      INTEGRATOR_STATE_WRITE(shadow_path, transparent_bounce) += 1;
+      INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput;
+      INTEGRATOR_STATE_WRITE(state, shadow_path, transparent_bounce) += 1;
     }
 
     /* Note we do not need to check max_transparent_bounce here, the number
@@ -141,26 +146,27 @@ ccl_device_inline bool integrate_transparent_shadow(INTEGRATOR_STATE_ARGS, const
   if (shadow_intersections_has_remaining(num_hits)) {
     /* There are more hits that we could not recorded due to memory usage,
      * adjust ray to intersect again from the last hit. */
-    const float last_hit_t = INTEGRATOR_STATE_ARRAY(shadow_isect, num_recorded_hits - 1, t);
-    const float3 ray_P = INTEGRATOR_STATE(shadow_ray, P);
-    const float3 ray_D = INTEGRATOR_STATE(shadow_ray, D);
-    INTEGRATOR_STATE_WRITE(shadow_ray, P) = ray_offset(ray_P + last_hit_t * ray_D, ray_D);
-    INTEGRATOR_STATE_WRITE(shadow_ray, t) -= last_hit_t;
+    const float last_hit_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, num_recorded_hits - 1, t);
+    const float3 ray_P = INTEGRATOR_STATE(state, shadow_ray, P);
+    const float3 ray_D = INTEGRATOR_STATE(state, shadow_ray, D);
+    INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray_offset(ray_P + last_hit_t * ray_D, ray_D);
+    INTEGRATOR_STATE_WRITE(state, shadow_ray, t) -= last_hit_t;
   }
 
   return false;
 }
 #endif /* __TRANSPARENT_SHADOWS__ */
 
-ccl_device void integrator_shade_shadow(INTEGRATOR_STATE_ARGS,
+ccl_device void integrator_shade_shadow(KernelGlobals kg,
+                                        IntegratorState state,
                                         ccl_global float *ccl_restrict render_buffer)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SETUP);
-  const int num_hits = INTEGRATOR_STATE(shadow_path, num_hits);
+  const int num_hits = INTEGRATOR_STATE(state, shadow_path, num_hits);
 
 #ifdef __TRANSPARENT_SHADOWS__
   /* Evaluate transparent shadows. */
-  const bool opaque = integrate_transparent_shadow(INTEGRATOR_STATE_PASS, num_hits);
+  const bool opaque = integrate_transparent_shadow(kg, state, num_hits);
   if (opaque) {
     INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
     return;
@@ -174,7 +180,7 @@ ccl_device void integrator_shade_shadow(INTEGRATOR_STATE_ARGS,
     return;
   }
   else {
-    kernel_accum_light(INTEGRATOR_STATE_PASS, render_buffer);
+    kernel_accum_light(kg, state, render_buffer);
     INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
     return;
   }
diff --git a/intern/cycles/kernel/integrator/integrator_shade_surface.h b/intern/cycles/kernel/integrator/integrator_shade_surface.h
index 0d739517592..bc97fde0e4a 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_surface.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_surface.h
@@ -28,33 +28,35 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_forceinline void integrate_surface_shader_setup(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_forceinline void integrate_surface_shader_setup(KernelGlobals kg,
+                                                           ConstIntegratorState state,
                                                            ccl_private ShaderData *sd)
 {
   Intersection isect ccl_optional_struct_init;
-  integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect);
+  integrator_state_read_isect(kg, state, &isect);
 
   Ray ray ccl_optional_struct_init;
-  integrator_state_read_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_read_ray(kg, state, &ray);
 
   shader_setup_from_ray(kg, sd, &ray, &isect);
 }
 
 #ifdef __HOLDOUT__
-ccl_device_forceinline bool integrate_surface_holdout(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg,
+                                                      ConstIntegratorState state,
                                                       ccl_private ShaderData *sd,
                                                       ccl_global float *ccl_restrict render_buffer)
 {
   /* Write holdout transparency to render buffer and stop if fully holdout. */
-  const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
 
   if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
       (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
     const float3 holdout_weight = shader_holdout_apply(kg, sd);
     if (kernel_data.background.transparent) {
-      const float3 throughput = INTEGRATOR_STATE(path, throughput);
+      const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
       const float transparent = average(holdout_weight * throughput);
-      kernel_accum_transparent(INTEGRATOR_STATE_PASS, transparent, render_buffer);
+      kernel_accum_transparent(kg, state, transparent, render_buffer);
     }
     if (isequal_float3(holdout_weight, one_float3())) {
       return false;
@@ -66,12 +68,13 @@ ccl_device_forceinline bool integrate_surface_holdout(INTEGRATOR_STATE_CONST_ARG
 #endif /* __HOLDOUT__ */
 
 #ifdef __EMISSION__
-ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
+                                                       ConstIntegratorState state,
                                                        ccl_private const ShaderData *sd,
                                                        ccl_global float *ccl_restrict
                                                            render_buffer)
 {
-  const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
 
   /* Evaluate emissive closure. */
   float3 L = shader_emissive_eval(sd);
@@ -83,8 +86,8 @@ ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_AR
   if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS))
 #  endif
   {
-    const float bsdf_pdf = INTEGRATOR_STATE(path, mis_ray_pdf);
-    const float t = sd->ray_length + INTEGRATOR_STATE(path, mis_ray_t);
+    const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
+    const float t = sd->ray_length + INTEGRATOR_STATE(state, path, mis_ray_t);
 
     /* Multiple importance sampling, get triangle light pdf,
      * and compute weight with respect to BSDF pdf. */
@@ -94,15 +97,16 @@ ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_AR
     L *= mis_weight;
   }
 
-  const float3 throughput = INTEGRATOR_STATE(path, throughput);
-  kernel_accum_emission(INTEGRATOR_STATE_PASS, throughput, L, render_buffer);
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+  kernel_accum_emission(kg, state, throughput, L, render_buffer);
 }
 #endif /* __EMISSION__ */
 
 #ifdef __EMISSION__
 /* Path tracing: sample point on light and evaluate light shader, then
  * queue shadow ray to be traced. */
-ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS,
+ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
+                                                           IntegratorState state,
                                                            ccl_private ShaderData *sd,
                                                            ccl_private const RNGState *rng_state)
 {
@@ -114,8 +118,8 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS
   /* Sample position on a light. */
   LightSample ls ccl_optional_struct_init;
   {
-    const int path_flag = INTEGRATOR_STATE(path, flag);
-    const uint bounce = INTEGRATOR_STATE(path, bounce);
+    const int path_flag = INTEGRATOR_STATE(state, path, flag);
+    const uint bounce = INTEGRATOR_STATE(state, path, bounce);
     float light_u, light_v;
     path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
 
@@ -135,8 +139,7 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS
    * non-constant light sources. */
   ShaderDataTinyStorage emission_sd_storage;
   ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-  const float3 light_eval = light_sample_shader_eval(
-      INTEGRATOR_STATE_PASS, emission_sd, &ls, sd->time);
+  const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time);
   if (is_zero(light_eval)) {
     return;
   }
@@ -165,39 +168,39 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS
   const bool is_light = light_sample_is_light(&ls);
 
   /* Copy volume stack and enter/exit volume. */
-  integrator_state_copy_volume_stack_to_shadow(INTEGRATOR_STATE_PASS);
+  integrator_state_copy_volume_stack_to_shadow(kg, state);
 
   if (is_transmission) {
 #  ifdef __VOLUME__
-    shadow_volume_stack_enter_exit(INTEGRATOR_STATE_PASS, sd);
+    shadow_volume_stack_enter_exit(kg, state, sd);
 #  endif
   }
 
   /* Write shadow ray and associated state to global memory. */
-  integrator_state_write_shadow_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_write_shadow_ray(kg, state, &ray);
 
   /* Copy state from main path to shadow path. */
-  const uint16_t bounce = INTEGRATOR_STATE(path, bounce);
-  const uint16_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce);
-  uint32_t shadow_flag = INTEGRATOR_STATE(path, flag);
+  const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce);
+  const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
+  uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
   shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
   shadow_flag |= (is_transmission) ? PATH_RAY_TRANSMISSION_PASS : PATH_RAY_REFLECT_PASS;
-  const float3 throughput = INTEGRATOR_STATE(path, throughput) * bsdf_eval_sum(&bsdf_eval);
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * bsdf_eval_sum(&bsdf_eval);
 
   if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
     const float3 diffuse_glossy_ratio = (bounce == 0) ?
                                             bsdf_eval_diffuse_glossy_ratio(&bsdf_eval) :
-                                            INTEGRATOR_STATE(path, diffuse_glossy_ratio);
-    INTEGRATOR_STATE_WRITE(shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
+                                            INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
+    INTEGRATOR_STATE_WRITE(state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
   }
 
-  INTEGRATOR_STATE_WRITE(shadow_path, flag) = shadow_flag;
-  INTEGRATOR_STATE_WRITE(shadow_path, bounce) = bounce;
-  INTEGRATOR_STATE_WRITE(shadow_path, transparent_bounce) = transparent_bounce;
-  INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, flag) = shadow_flag;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, bounce) = bounce;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, transparent_bounce) = transparent_bounce;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput;
 
   if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) {
-    INTEGRATOR_STATE_WRITE(shadow_path, unshadowed_throughput) = throughput;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, unshadowed_throughput) = throughput;
   }
 
   /* Branch off shadow kernel. */
@@ -207,7 +210,10 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS
 
 /* Path tracing: bounce off or through surface with new direction. */
 ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
-    INTEGRATOR_STATE_ARGS, ccl_private ShaderData *sd, ccl_private const RNGState *rng_state)
+    KernelGlobals kg,
+    IntegratorState state,
+    ccl_private ShaderData *sd,
+    ccl_private const RNGState *rng_state)
 {
   /* Sample BSDF or BSSRDF. */
   if (!(sd->flag & (SD_BSDF | SD_BSSRDF))) {
@@ -221,7 +227,7 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
 #ifdef __SUBSURFACE__
   /* BSSRDF closure, we schedule subsurface intersection kernel. */
   if (CLOSURE_IS_BSSRDF(sc->type)) {
-    return subsurface_bounce(INTEGRATOR_STATE_PASS, sd, sc);
+    return subsurface_bounce(kg, state, sd, sc);
   }
 #endif
 
@@ -240,63 +246,64 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
   }
 
   /* Setup ray. Note that clipping works through transparent bounces. */
-  INTEGRATOR_STATE_WRITE(ray, P) = ray_offset(sd->P, (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng);
-  INTEGRATOR_STATE_WRITE(ray, D) = normalize(bsdf_omega_in);
-  INTEGRATOR_STATE_WRITE(ray, t) = (label & LABEL_TRANSPARENT) ?
-                                       INTEGRATOR_STATE(ray, t) - sd->ray_length :
-                                       FLT_MAX;
+  INTEGRATOR_STATE_WRITE(state, ray, P) = ray_offset(sd->P,
+                                                     (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng);
+  INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in);
+  INTEGRATOR_STATE_WRITE(state, ray, t) = (label & LABEL_TRANSPARENT) ?
+                                              INTEGRATOR_STATE(state, ray, t) - sd->ray_length :
+                                              FLT_MAX;
 
 #ifdef __RAY_DIFFERENTIALS__
-  INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP);
-  INTEGRATOR_STATE_WRITE(ray, dD) = differential_make_compact(bsdf_domega_in);
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
+  INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in);
 #endif
 
   /* Update throughput. */
-  float3 throughput = INTEGRATOR_STATE(path, throughput);
+  float3 throughput = INTEGRATOR_STATE(state, path, throughput);
   throughput *= bsdf_eval_sum(&bsdf_eval) / bsdf_pdf;
-  INTEGRATOR_STATE_WRITE(path, throughput) = throughput;
+  INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput;
 
   if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
-    if (INTEGRATOR_STATE(path, bounce) == 0) {
-      INTEGRATOR_STATE_WRITE(path,
-                             diffuse_glossy_ratio) = bsdf_eval_diffuse_glossy_ratio(&bsdf_eval);
+    if (INTEGRATOR_STATE(state, path, bounce) == 0) {
+      INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = bsdf_eval_diffuse_glossy_ratio(
+          &bsdf_eval);
     }
   }
 
   /* Update path state */
   if (label & LABEL_TRANSPARENT) {
-    INTEGRATOR_STATE_WRITE(path, mis_ray_t) += sd->ray_length;
+    INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
   }
   else {
-    INTEGRATOR_STATE_WRITE(path, mis_ray_pdf) = bsdf_pdf;
-    INTEGRATOR_STATE_WRITE(path, mis_ray_t) = 0.0f;
-    INTEGRATOR_STATE_WRITE(path, min_ray_pdf) = fminf(bsdf_pdf,
-                                                      INTEGRATOR_STATE(path, min_ray_pdf));
+    INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = bsdf_pdf;
+    INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
+    INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
+        bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
   }
 
-  path_state_next(INTEGRATOR_STATE_PASS, label);
+  path_state_next(kg, state, label);
   return label;
 }
 
 #ifdef __VOLUME__
-ccl_device_forceinline bool integrate_surface_volume_only_bounce(INTEGRATOR_STATE_ARGS,
+ccl_device_forceinline bool integrate_surface_volume_only_bounce(IntegratorState state,
                                                                  ccl_private ShaderData *sd)
 {
-  if (!path_state_volume_next(INTEGRATOR_STATE_PASS)) {
+  if (!path_state_volume_next(state)) {
     return LABEL_NONE;
   }
 
   /* Setup ray position, direction stays unchanged. */
-  INTEGRATOR_STATE_WRITE(ray, P) = ray_offset(sd->P, -sd->Ng);
+  INTEGRATOR_STATE_WRITE(state, ray, P) = ray_offset(sd->P, -sd->Ng);
 
   /* Clipping works through transparent. */
-  INTEGRATOR_STATE_WRITE(ray, t) -= sd->ray_length;
+  INTEGRATOR_STATE_WRITE(state, ray, t) -= sd->ray_length;
 
 #  ifdef __RAY_DIFFERENTIALS__
-  INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP);
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
 #  endif
 
-  INTEGRATOR_STATE_WRITE(path, mis_ray_t) += sd->ray_length;
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
 
   return LABEL_TRANSMIT | LABEL_TRANSPARENT;
 }
@@ -304,17 +311,19 @@ ccl_device_forceinline bool integrate_surface_volume_only_bounce(INTEGRATOR_STAT
 
 #if defined(__AO__) && defined(__SHADER_RAYTRACE__)
 ccl_device_forceinline void integrate_surface_ao_pass(
-    INTEGRATOR_STATE_CONST_ARGS,
+    KernelGlobals kg,
+    ConstIntegratorState state,
     ccl_private const ShaderData *ccl_restrict sd,
     ccl_private const RNGState *ccl_restrict rng_state,
     ccl_global float *ccl_restrict render_buffer)
 {
 #  ifdef __KERNEL_OPTIX__
-  optixDirectCall<void>(2, INTEGRATOR_STATE_PASS, sd, rng_state, render_buffer);
+  optixDirectCall<void>(2, kg, state, sd, rng_state, render_buffer);
 }
 
 extern "C" __device__ void __direct_callable__ao_pass(
-    INTEGRATOR_STATE_CONST_ARGS,
+    KernelGlobals kg,
+    ConstIntegratorState state,
     ccl_private const ShaderData *ccl_restrict sd,
     ccl_private const RNGState *ccl_restrict rng_state,
     ccl_global float *ccl_restrict render_buffer)
@@ -339,9 +348,8 @@ extern "C" __device__ void __direct_callable__ao_pass(
 
     Intersection isect ccl_optional_struct_init;
     if (!scene_intersect(kg, &ray, PATH_RAY_SHADOW_OPAQUE, &isect)) {
-      ccl_global float *buffer = kernel_pass_pixel_render_buffer(INTEGRATOR_STATE_PASS,
-                                                                 render_buffer);
-      const float3 throughput = INTEGRATOR_STATE(path, throughput);
+      ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
+      const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
       kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, throughput);
     }
   }
@@ -349,7 +357,8 @@ extern "C" __device__ void __direct_callable__ao_pass(
 #endif /* defined(__AO__) && defined(__SHADER_RAYTRACE__) */
 
 template<uint node_feature_mask>
-ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
+ccl_device bool integrate_surface(KernelGlobals kg,
+                                  IntegratorState state,
                                   ccl_global float *ccl_restrict render_buffer)
 
 {
@@ -357,7 +366,7 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
 
   /* Setup shader data. */
   ShaderData sd;
-  integrate_surface_shader_setup(INTEGRATOR_STATE_PASS, &sd);
+  integrate_surface_shader_setup(kg, state, &sd);
   PROFILING_SHADER(sd.object, sd.shader);
 
   int continue_path_label = 0;
@@ -366,7 +375,7 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
 #ifdef __VOLUME__
   if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
 #endif
-    const int path_flag = INTEGRATOR_STATE(path, flag);
+    const int path_flag = INTEGRATOR_STATE(state, path, flag);
 
 #ifdef __SUBSURFACE__
     /* Can skip shader evaluation for BSSRDF exit point without bump mapping. */
@@ -375,23 +384,23 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
     {
       /* Evaluate shader. */
       PROFILING_EVENT(PROFILING_SHADE_SURFACE_EVAL);
-      shader_eval_surface<node_feature_mask>(INTEGRATOR_STATE_PASS, &sd, render_buffer, path_flag);
+      shader_eval_surface<node_feature_mask>(kg, state, &sd, render_buffer, path_flag);
     }
 
 #ifdef __SUBSURFACE__
     if (path_flag & PATH_RAY_SUBSURFACE) {
       /* When coming from inside subsurface scattering, setup a diffuse
        * closure to perform lighting at the exit point. */
-      subsurface_shader_data_setup(INTEGRATOR_STATE_PASS, &sd, path_flag);
-      INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_SUBSURFACE;
+      subsurface_shader_data_setup(kg, state, &sd, path_flag);
+      INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_SUBSURFACE;
     }
 #endif
 
-    shader_prepare_surface_closures(INTEGRATOR_STATE_PASS, &sd);
+    shader_prepare_surface_closures(kg, state, &sd);
 
 #ifdef __HOLDOUT__
     /* Evaluate holdout. */
-    if (!integrate_surface_holdout(INTEGRATOR_STATE_PASS, &sd, render_buffer)) {
+    if (!integrate_surface_holdout(kg, state, &sd, render_buffer)) {
       return false;
     }
 #endif
@@ -399,19 +408,19 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
 #ifdef __EMISSION__
     /* Write emission. */
     if (sd.flag & SD_EMISSION) {
-      integrate_surface_emission(INTEGRATOR_STATE_PASS, &sd, render_buffer);
+      integrate_surface_emission(kg, state, &sd, render_buffer);
     }
 #endif
 
 #ifdef __PASSES__
     /* Write render passes. */
     PROFILING_EVENT(PROFILING_SHADE_SURFACE_PASSES);
-    kernel_write_data_passes(INTEGRATOR_STATE_PASS, &sd, render_buffer);
+    kernel_write_data_passes(kg, state, &sd, render_buffer);
 #endif
 
     /* Load random number state. */
     RNGState rng_state;
-    path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+    path_state_rng_load(state, &rng_state);
 
     /* Perform path termination. Most paths have already been terminated in
      * the intersect_closest kernel, this is just for emission and for dividing
@@ -421,52 +430,50 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
     if (!(path_flag & PATH_RAY_SUBSURFACE)) {
       const float probability = (path_flag & PATH_RAY_TERMINATE_ON_NEXT_SURFACE) ?
                                     0.0f :
-                                    path_state_continuation_probability(INTEGRATOR_STATE_PASS,
-                                                                        path_flag);
+                                    path_state_continuation_probability(kg, state, path_flag);
       if (probability == 0.0f) {
         return false;
       }
       else if (probability != 1.0f) {
-        INTEGRATOR_STATE_WRITE(path, throughput) /= probability;
+        INTEGRATOR_STATE_WRITE(state, path, throughput) /= probability;
       }
     }
 
 #ifdef __DENOISING_FEATURES__
-    kernel_write_denoising_features_surface(INTEGRATOR_STATE_PASS, &sd, render_buffer);
+    kernel_write_denoising_features_surface(kg, state, &sd, render_buffer);
 #endif
 
 #ifdef __SHADOW_CATCHER__
-    kernel_write_shadow_catcher_bounce_data(INTEGRATOR_STATE_PASS, &sd, render_buffer);
+    kernel_write_shadow_catcher_bounce_data(kg, state, &sd, render_buffer);
 #endif
 
     /* Direct light. */
     PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT);
-    integrate_surface_direct_light(INTEGRATOR_STATE_PASS, &sd, &rng_state);
+    integrate_surface_direct_light(kg, state, &sd, &rng_state);
 
 #if defined(__AO__) && defined(__SHADER_RAYTRACE__)
     /* Ambient occlusion pass. */
     if (node_feature_mask & KERNEL_FEATURE_NODE_RAYTRACE) {
       if ((kernel_data.film.pass_ao != PASS_UNUSED) &&
-          (INTEGRATOR_STATE(path, flag) & PATH_RAY_CAMERA)) {
+          (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_CAMERA)) {
         PROFILING_EVENT(PROFILING_SHADE_SURFACE_AO);
-        integrate_surface_ao_pass(INTEGRATOR_STATE_PASS, &sd, &rng_state, render_buffer);
+        integrate_surface_ao_pass(kg, state, &sd, &rng_state, render_buffer);
       }
     }
 #endif
 
     PROFILING_EVENT(PROFILING_SHADE_SURFACE_INDIRECT_LIGHT);
-    continue_path_label = integrate_surface_bsdf_bssrdf_bounce(
-        INTEGRATOR_STATE_PASS, &sd, &rng_state);
+    continue_path_label = integrate_surface_bsdf_bssrdf_bounce(kg, state, &sd, &rng_state);
 #ifdef __VOLUME__
   }
   else {
     PROFILING_EVENT(PROFILING_SHADE_SURFACE_INDIRECT_LIGHT);
-    continue_path_label = integrate_surface_volume_only_bounce(INTEGRATOR_STATE_PASS, &sd);
+    continue_path_label = integrate_surface_volume_only_bounce(state, &sd);
   }
 
   if (continue_path_label & LABEL_TRANSMIT) {
     /* Enter/Exit volume. */
-    volume_stack_enter_exit(INTEGRATOR_STATE_PASS, &sd);
+    volume_stack_enter_exit(kg, state, &sd);
   }
 #endif
 
@@ -475,15 +482,16 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
 
 template<uint node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE,
          int current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
-ccl_device_forceinline void integrator_shade_surface(INTEGRATOR_STATE_ARGS,
+ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg,
+                                                     IntegratorState state,
                                                      ccl_global float *ccl_restrict render_buffer)
 {
-  if (integrate_surface<node_feature_mask>(INTEGRATOR_STATE_PASS, render_buffer)) {
-    if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SUBSURFACE) {
+  if (integrate_surface<node_feature_mask>(kg, state, render_buffer)) {
+    if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE) {
       INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
     }
     else {
-      kernel_assert(INTEGRATOR_STATE(ray, t) != 0.0f);
+      kernel_assert(INTEGRATOR_STATE(state, ray, t) != 0.0f);
       INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
     }
   }
@@ -493,11 +501,11 @@ ccl_device_forceinline void integrator_shade_surface(INTEGRATOR_STATE_ARGS,
 }
 
 ccl_device_forceinline void integrator_shade_surface_raytrace(
-    INTEGRATOR_STATE_ARGS, ccl_global float *ccl_restrict render_buffer)
+    KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
 {
   integrator_shade_surface<KERNEL_FEATURE_NODE_MASK_SURFACE,
-                           DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE>(INTEGRATOR_STATE_PASS,
-                                                                            render_buffer);
+                           DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE>(
+      kg, state, render_buffer);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h
index 72c609751f7..e465a993041 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_volume.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h
@@ -70,12 +70,13 @@ typedef struct VolumeShaderCoefficients {
 } VolumeShaderCoefficients;
 
 /* Evaluate shader to get extinction coefficient at P. */
-ccl_device_inline bool shadow_volume_shader_sample(INTEGRATOR_STATE_ARGS,
+ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg,
+                                                   IntegratorState state,
                                                    ccl_private ShaderData *ccl_restrict sd,
                                                    ccl_private float3 *ccl_restrict extinction)
 {
-  shader_eval_volume<true>(INTEGRATOR_STATE_PASS, sd, PATH_RAY_SHADOW, [=](const int i) {
-    return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i);
+  shader_eval_volume<true>(kg, state, sd, PATH_RAY_SHADOW, [=](const int i) {
+    return integrator_state_read_shadow_volume_stack(state, i);
   });
 
   if (!(sd->flag & SD_EXTINCTION)) {
@@ -88,13 +89,14 @@ ccl_device_inline bool shadow_volume_shader_sample(INTEGRATOR_STATE_ARGS,
 }
 
 /* Evaluate shader to get absorption, scattering and emission at P. */
-ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS,
+ccl_device_inline bool volume_shader_sample(KernelGlobals kg,
+                                            IntegratorState state,
                                             ccl_private ShaderData *ccl_restrict sd,
                                             ccl_private VolumeShaderCoefficients *coeff)
 {
-  const int path_flag = INTEGRATOR_STATE(path, flag);
-  shader_eval_volume<false>(INTEGRATOR_STATE_PASS, sd, path_flag, [=](const int i) {
-    return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
+  const int path_flag = INTEGRATOR_STATE(state, path, flag);
+  shader_eval_volume<false>(kg, state, sd, path_flag, [=](const int i) {
+    return integrator_state_read_volume_stack(state, i);
   });
 
   if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) {
@@ -123,7 +125,7 @@ ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS,
   return true;
 }
 
-ccl_device_forceinline void volume_step_init(ccl_global const KernelGlobals *kg,
+ccl_device_forceinline void volume_step_init(KernelGlobals kg,
                                              ccl_private const RNGState *rng_state,
                                              const float object_step_size,
                                              float t,
@@ -169,14 +171,14 @@ ccl_device_forceinline void volume_step_init(ccl_global const KernelGlobals *kg,
 #  if 0
 /* homogeneous volume: assume shader evaluation at the starts gives
  * the extinction coefficient for the entire line segment */
-ccl_device void volume_shadow_homogeneous(INTEGRATOR_STATE_ARGS,
+ccl_device void volume_shadow_homogeneous(KernelGlobals kg, IntegratorState state,
                                           ccl_private Ray *ccl_restrict ray,
                                           ccl_private ShaderData *ccl_restrict sd,
                                           ccl_global float3 *ccl_restrict throughput)
 {
   float3 sigma_t = zero_float3();
 
-  if (shadow_volume_shader_sample(INTEGRATOR_STATE_PASS, sd, &sigma_t)) {
+  if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) {
     *throughput *= volume_color_transmittance(sigma_t, ray->t);
   }
 }
@@ -184,7 +186,8 @@ ccl_device void volume_shadow_homogeneous(INTEGRATOR_STATE_ARGS,
 
 /* heterogeneous volume: integrate stepping through the volume until we
  * reach the end, get absorbed entirely, or run out of iterations */
-ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS,
+ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
+                                            IntegratorState state,
                                             ccl_private Ray *ccl_restrict ray,
                                             ccl_private ShaderData *ccl_restrict sd,
                                             ccl_private float3 *ccl_restrict throughput,
@@ -192,7 +195,7 @@ ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS,
 {
   /* Load random number state. */
   RNGState rng_state;
-  shadow_path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+  shadow_path_state_rng_load(state, &rng_state);
 
   float3 tp = *throughput;
 
@@ -227,7 +230,7 @@ ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS,
 
     /* compute attenuation over segment */
     sd->P = new_P;
-    if (shadow_volume_shader_sample(INTEGRATOR_STATE_PASS, sd, &sigma_t)) {
+    if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) {
       /* Compute `expf()` only for every Nth step, to save some calculations
        * because `exp(a)*exp(b) = exp(a+b)`, also do a quick #VOLUME_THROUGHPUT_EPSILON
        * check then. */
@@ -510,7 +513,8 @@ ccl_device_forceinline void volume_integrate_step_scattering(
  * iterations. this does probabilistically scatter or get transmitted through
  * for path tracing where we don't want to branch. */
 ccl_device_forceinline void volume_integrate_heterogeneous(
-    INTEGRATOR_STATE_ARGS,
+    KernelGlobals kg,
+    IntegratorState state,
     ccl_private Ray *ccl_restrict ray,
     ccl_private ShaderData *ccl_restrict sd,
     ccl_private const RNGState *rng_state,
@@ -560,7 +564,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
   vstate.distance_pdf = 1.0f;
 
   /* Initialize volume integration result. */
-  const float3 throughput = INTEGRATOR_STATE(path, throughput);
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
   result.direct_throughput = throughput;
   result.indirect_throughput = throughput;
 
@@ -571,7 +575,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
   }
 
 #  ifdef __DENOISING_FEATURES__
-  const bool write_denoising_features = (INTEGRATOR_STATE(path, flag) &
+  const bool write_denoising_features = (INTEGRATOR_STATE(state, path, flag) &
                                          PATH_RAY_DENOISING_FEATURES);
   float3 accum_albedo = zero_float3();
 #  endif
@@ -585,7 +589,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
 
     /* compute segment */
     VolumeShaderCoefficients coeff ccl_optional_struct_init;
-    if (volume_shader_sample(INTEGRATOR_STATE_PASS, sd, &coeff)) {
+    if (volume_shader_sample(kg, state, sd, &coeff)) {
       const int closure_flag = sd->flag;
 
       /* Evaluate transmittance over segment. */
@@ -654,15 +658,14 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
 
   /* Write accumulated emission. */
   if (!is_zero(accum_emission)) {
-    kernel_accum_emission(
-        INTEGRATOR_STATE_PASS, result.indirect_throughput, accum_emission, render_buffer);
+    kernel_accum_emission(kg, state, result.indirect_throughput, accum_emission, render_buffer);
   }
 
 #  ifdef __DENOISING_FEATURES__
   /* Write denoising features. */
   if (write_denoising_features) {
     kernel_write_denoising_features_volume(
-        INTEGRATOR_STATE_PASS, accum_albedo, result.indirect_scatter, render_buffer);
+        kg, state, accum_albedo, result.indirect_scatter, render_buffer);
   }
 #  endif /* __DENOISING_FEATURES__ */
 }
@@ -671,7 +674,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
 /* Path tracing: sample point on light and evaluate light shader, then
  * queue shadow ray to be traced. */
 ccl_device_forceinline bool integrate_volume_sample_light(
-    INTEGRATOR_STATE_ARGS,
+    KernelGlobals kg,
+    IntegratorState state,
     ccl_private const ShaderData *ccl_restrict sd,
     ccl_private const RNGState *ccl_restrict rng_state,
     ccl_private LightSample *ccl_restrict ls)
@@ -682,8 +686,8 @@ ccl_device_forceinline bool integrate_volume_sample_light(
   }
 
   /* Sample position on a light. */
-  const int path_flag = INTEGRATOR_STATE(path, flag);
-  const uint bounce = INTEGRATOR_STATE(path, bounce);
+  const int path_flag = INTEGRATOR_STATE(state, path, flag);
+  const uint bounce = INTEGRATOR_STATE(state, path, bounce);
   float light_u, light_v;
   path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
 
@@ -700,7 +704,8 @@ ccl_device_forceinline bool integrate_volume_sample_light(
 /* Path tracing: sample point on light and evaluate light shader, then
  * queue shadow ray to be traced. */
 ccl_device_forceinline void integrate_volume_direct_light(
-    INTEGRATOR_STATE_ARGS,
+    KernelGlobals kg,
+    IntegratorState state,
     ccl_private const ShaderData *ccl_restrict sd,
     ccl_private const RNGState *ccl_restrict rng_state,
     const float3 P,
@@ -720,8 +725,8 @@ ccl_device_forceinline void integrate_volume_direct_light(
    * TODO: decorrelate random numbers and use light_sample_new_position to
    * avoid resampling the CDF. */
   {
-    const int path_flag = INTEGRATOR_STATE(path, flag);
-    const uint bounce = INTEGRATOR_STATE(path, bounce);
+    const int path_flag = INTEGRATOR_STATE(state, path, flag);
+    const uint bounce = INTEGRATOR_STATE(state, path, bounce);
     float light_u, light_v;
     path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
 
@@ -743,8 +748,7 @@ ccl_device_forceinline void integrate_volume_direct_light(
    * non-constant light sources. */
   ShaderDataTinyStorage emission_sd_storage;
   ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-  const float3 light_eval = light_sample_shader_eval(
-      INTEGRATOR_STATE_PASS, emission_sd, ls, sd->time);
+  const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time);
   if (is_zero(light_eval)) {
     return;
   }
@@ -772,12 +776,12 @@ ccl_device_forceinline void integrate_volume_direct_light(
   const bool is_light = light_sample_is_light(ls);
 
   /* Write shadow ray and associated state to global memory. */
-  integrator_state_write_shadow_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_write_shadow_ray(kg, state, &ray);
 
   /* Copy state from main path to shadow path. */
-  const uint16_t bounce = INTEGRATOR_STATE(path, bounce);
-  const uint16_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce);
-  uint32_t shadow_flag = INTEGRATOR_STATE(path, flag);
+  const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce);
+  const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
+  uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
   shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
   shadow_flag |= PATH_RAY_VOLUME_PASS;
   const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval);
@@ -785,20 +789,20 @@ ccl_device_forceinline void integrate_volume_direct_light(
   if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
     const float3 diffuse_glossy_ratio = (bounce == 0) ?
                                             one_float3() :
-                                            INTEGRATOR_STATE(path, diffuse_glossy_ratio);
-    INTEGRATOR_STATE_WRITE(shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
+                                            INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
+    INTEGRATOR_STATE_WRITE(state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
   }
 
-  INTEGRATOR_STATE_WRITE(shadow_path, flag) = shadow_flag;
-  INTEGRATOR_STATE_WRITE(shadow_path, bounce) = bounce;
-  INTEGRATOR_STATE_WRITE(shadow_path, transparent_bounce) = transparent_bounce;
-  INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput_phase;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, flag) = shadow_flag;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, bounce) = bounce;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, transparent_bounce) = transparent_bounce;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput_phase;
 
   if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) {
-    INTEGRATOR_STATE_WRITE(shadow_path, unshadowed_throughput) = throughput;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, unshadowed_throughput) = throughput;
   }
 
-  integrator_state_copy_volume_stack_to_shadow(INTEGRATOR_STATE_PASS);
+  integrator_state_copy_volume_stack_to_shadow(kg, state);
 
   /* Branch off shadow kernel. */
   INTEGRATOR_SHADOW_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
@@ -807,7 +811,8 @@ ccl_device_forceinline void integrate_volume_direct_light(
 
 /* Path tracing: scatter in new direction using phase function */
 ccl_device_forceinline bool integrate_volume_phase_scatter(
-    INTEGRATOR_STATE_ARGS,
+    KernelGlobals kg,
+    IntegratorState state,
     ccl_private ShaderData *sd,
     ccl_private const RNGState *rng_state,
     ccl_private const ShaderVolumePhases *phases)
@@ -838,31 +843,31 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
   }
 
   /* Setup ray. */
-  INTEGRATOR_STATE_WRITE(ray, P) = sd->P;
-  INTEGRATOR_STATE_WRITE(ray, D) = normalize(phase_omega_in);
-  INTEGRATOR_STATE_WRITE(ray, t) = FLT_MAX;
+  INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
+  INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_omega_in);
+  INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
 
 #  ifdef __RAY_DIFFERENTIALS__
-  INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP);
-  INTEGRATOR_STATE_WRITE(ray, dD) = differential_make_compact(phase_domega_in);
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
+  INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in);
 #  endif
 
   /* Update throughput. */
-  const float3 throughput = INTEGRATOR_STATE(path, throughput);
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
   const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval) / phase_pdf;
-  INTEGRATOR_STATE_WRITE(path, throughput) = throughput_phase;
+  INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput_phase;
 
   if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
-    INTEGRATOR_STATE_WRITE(path, diffuse_glossy_ratio) = one_float3();
+    INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = one_float3();
   }
 
   /* Update path state */
-  INTEGRATOR_STATE_WRITE(path, mis_ray_pdf) = phase_pdf;
-  INTEGRATOR_STATE_WRITE(path, mis_ray_t) = 0.0f;
-  INTEGRATOR_STATE_WRITE(path, min_ray_pdf) = fminf(phase_pdf,
-                                                    INTEGRATOR_STATE(path, min_ray_pdf));
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf;
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
+  INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
+      phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
 
-  path_state_next(INTEGRATOR_STATE_PASS, label);
+  path_state_next(kg, state, label);
   return true;
 }
 
@@ -870,7 +875,8 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
  * ray, with the assumption that there are no surfaces blocking light
  * between the endpoints. distance sampling is used to decide if we will
  * scatter or not. */
-ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
+ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
+                                                 IntegratorState state,
                                                  ccl_private Ray *ccl_restrict ray,
                                                  ccl_global float *ccl_restrict render_buffer)
 {
@@ -879,29 +885,29 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
 
   /* Load random number state. */
   RNGState rng_state;
-  path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+  path_state_rng_load(state, &rng_state);
 
   /* Sample light ahead of volume stepping, for equiangular sampling. */
   /* TODO: distant lights are ignored now, but could instead use even distribution. */
   LightSample ls ccl_optional_struct_init;
-  const bool need_light_sample = !(INTEGRATOR_STATE(path, flag) & PATH_RAY_TERMINATE);
+  const bool need_light_sample = !(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_TERMINATE);
   const bool have_equiangular_sample = need_light_sample &&
                                        integrate_volume_sample_light(
-                                           INTEGRATOR_STATE_PASS, &sd, &rng_state, &ls) &&
+                                           kg, state, &sd, &rng_state, &ls) &&
                                        (ls.t != FLT_MAX);
 
   VolumeSampleMethod direct_sample_method = (have_equiangular_sample) ?
-                                                volume_stack_sample_method(INTEGRATOR_STATE_PASS) :
+                                                volume_stack_sample_method(kg, state) :
                                                 VOLUME_SAMPLE_DISTANCE;
 
   /* Step through volume. */
-  const float step_size = volume_stack_step_size(INTEGRATOR_STATE_PASS, [=](const int i) {
-    return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
-  });
+  const float step_size = volume_stack_step_size(
+      kg, state, [=](const int i) { return integrator_state_read_volume_stack(state, i); });
 
   /* TODO: expensive to zero closures? */
   VolumeIntegrateResult result = {};
-  volume_integrate_heterogeneous(INTEGRATOR_STATE_PASS,
+  volume_integrate_heterogeneous(kg,
+                                 state,
                                  ray,
                                  &sd,
                                  &rng_state,
@@ -914,11 +920,10 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
   /* Perform path termination. The intersect_closest will have already marked this path
    * to be terminated. That will shading evaluating to leave out any scattering closures,
    * but emission and absorption are still handled for multiple importance sampling. */
-  const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
   const float probability = (path_flag & PATH_RAY_TERMINATE_IN_NEXT_VOLUME) ?
                                 0.0f :
-                                path_state_continuation_probability(INTEGRATOR_STATE_PASS,
-                                                                    path_flag);
+                                path_state_continuation_probability(kg, state, path_flag);
   if (probability == 0.0f) {
     return VOLUME_PATH_MISSED;
   }
@@ -927,7 +932,8 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
   if (result.direct_scatter) {
     const float3 direct_P = ray->P + result.direct_t * ray->D;
     result.direct_throughput /= probability;
-    integrate_volume_direct_light(INTEGRATOR_STATE_PASS,
+    integrate_volume_direct_light(kg,
+                                  state,
                                   &sd,
                                   &rng_state,
                                   direct_P,
@@ -943,13 +949,12 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
   if (result.indirect_scatter) {
     result.indirect_throughput /= probability;
   }
-  INTEGRATOR_STATE_WRITE(path, throughput) = result.indirect_throughput;
+  INTEGRATOR_STATE_WRITE(state, path, throughput) = result.indirect_throughput;
 
   if (result.indirect_scatter) {
     sd.P = ray->P + result.indirect_t * ray->D;
 
-    if (integrate_volume_phase_scatter(
-            INTEGRATOR_STATE_PASS, &sd, &rng_state, &result.indirect_phases)) {
+    if (integrate_volume_phase_scatter(kg, state, &sd, &rng_state, &result.indirect_phases)) {
       return VOLUME_PATH_SCATTERED;
     }
     else {
@@ -963,7 +968,8 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
 
 #endif
 
-ccl_device void integrator_shade_volume(INTEGRATOR_STATE_ARGS,
+ccl_device void integrator_shade_volume(KernelGlobals kg,
+                                        IntegratorState state,
                                         ccl_global float *ccl_restrict render_buffer)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_SETUP);
@@ -971,20 +977,20 @@ ccl_device void integrator_shade_volume(INTEGRATOR_STATE_ARGS,
 #ifdef __VOLUME__
   /* Setup shader data. */
   Ray ray ccl_optional_struct_init;
-  integrator_state_read_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_read_ray(kg, state, &ray);
 
   Intersection isect ccl_optional_struct_init;
-  integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect);
+  integrator_state_read_isect(kg, state, &isect);
 
   /* Set ray length to current segment. */
   ray.t = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX;
 
   /* Clean volume stack for background rays. */
   if (isect.prim == PRIM_NONE) {
-    volume_stack_clean(INTEGRATOR_STATE_PASS);
+    volume_stack_clean(kg, state);
   }
 
-  VolumeIntegrateEvent event = volume_integrate(INTEGRATOR_STATE_PASS, &ray, render_buffer);
+  VolumeIntegrateEvent event = volume_integrate(kg, state, &ray, render_buffer);
 
   if (event == VOLUME_PATH_SCATTERED) {
     /* Queue intersect_closest kernel. */
@@ -1015,7 +1021,7 @@ ccl_device void integrator_shade_volume(INTEGRATOR_STATE_ARGS,
       const int flags = kernel_tex_fetch(__shaders, shader).flags;
 
       integrator_intersect_shader_next_kernel<DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME>(
-          INTEGRATOR_STATE_PASS, &isect, shader, flags);
+          kg, state, &isect, shader, flags);
       return;
     }
   }
diff --git a/intern/cycles/kernel/integrator/integrator_state.h b/intern/cycles/kernel/integrator/integrator_state.h
index 517e2891769..3aab456a021 100644
--- a/intern/cycles/kernel/integrator/integrator_state.h
+++ b/intern/cycles/kernel/integrator/integrator_state.h
@@ -27,24 +27,17 @@
  * to every kernel, or the pointer may exist at program scope or in constant memory. To abstract
  * these differences between devices and experiment with different layouts, macros are used.
  *
- * INTEGRATOR_STATE_ARGS: prepend to argument definitions for every function that accesses
- * path state.
- * INTEGRATOR_STATE_CONST_ARGS: same as INTEGRATOR_STATE_ARGS, when state is read-only
- * INTEGRATOR_STATE_PASS: use to pass along state to other functions access it.
+ * Use IntegratorState to pass a reference to the integrator state for the current path. These are
+ * defined differently on the CPU and GPU. Use ConstIntegratorState instead of const
+ * IntegratorState for passing state as read-only, to avoid oddities in typedef behavior.
  *
- * INTEGRATOR_STATE(x, y): read nested struct member x.y of IntegratorState
- * INTEGRATOR_STATE_WRITE(x, y): write to nested struct member x.y of IntegratorState
+ * INTEGRATOR_STATE(state, x, y): read nested struct member x.y of IntegratorState
+ * INTEGRATOR_STATE_WRITE(state, x, y): write to nested struct member x.y of IntegratorState
  *
- * INTEGRATOR_STATE_ARRAY(x, index, y): read x[index].y
- * INTEGRATOR_STATE_ARRAY_WRITE(x, index, y): write x[index].y
+ * INTEGRATOR_STATE_ARRAY(state, x, index, y): read x[index].y
+ * INTEGRATOR_STATE_ARRAY_WRITE(state, x, index, y): write x[index].y
  *
- * INTEGRATOR_STATE_COPY(to_x, from_x): copy contents of one nested struct to another
- *
- * INTEGRATOR_STATE_IS_NULL: test if any integrator state is available, for shader evaluation
- * INTEGRATOR_STATE_PASS_NULL: use to pass empty state to other functions.
- *
- * NOTE: if we end up with a device that passes no arguments, the leading comma will be a problem.
- * Can solve it with more macros if we encounter it, but rather ugly so postpone for now.
+ * INTEGRATOR_STATE_NULL: use to pass empty state to other functions.
  */
 
 #include "kernel/kernel_types.h"
@@ -146,50 +139,36 @@ typedef struct IntegratorStateGPU {
 /* Scalar access on CPU. */
 
 typedef IntegratorStateCPU *ccl_restrict IntegratorState;
+typedef const IntegratorStateCPU *ccl_restrict ConstIntegratorState;
 
-#  define INTEGRATOR_STATE_ARGS \
-    ccl_attr_maybe_unused const KernelGlobals *ccl_restrict kg, \
-        IntegratorStateCPU *ccl_restrict state
-#  define INTEGRATOR_STATE_CONST_ARGS \
-    ccl_attr_maybe_unused const KernelGlobals *ccl_restrict kg, \
-        const IntegratorStateCPU *ccl_restrict state
-#  define INTEGRATOR_STATE_PASS kg, state
-
-#  define INTEGRATOR_STATE_PASS_NULL kg, NULL
-#  define INTEGRATOR_STATE_IS_NULL (state == NULL)
+#  define INTEGRATOR_STATE_NULL nullptr
 
-#  define INTEGRATOR_STATE(nested_struct, member) \
-    (((const IntegratorStateCPU *)state)->nested_struct.member)
-#  define INTEGRATOR_STATE_WRITE(nested_struct, member) (state->nested_struct.member)
+#  define INTEGRATOR_STATE(state, nested_struct, member) ((state)->nested_struct.member)
+#  define INTEGRATOR_STATE_WRITE(state, nested_struct, member) ((state)->nested_struct.member)
 
-#  define INTEGRATOR_STATE_ARRAY(nested_struct, array_index, member) \
-    (((const IntegratorStateCPU *)state)->nested_struct[array_index].member)
-#  define INTEGRATOR_STATE_ARRAY_WRITE(nested_struct, array_index, member) \
+#  define INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) \
+    ((state)->nested_struct[array_index].member)
+#  define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
     ((state)->nested_struct[array_index].member)
 
 #else /* __KERNEL_CPU__ */
 
 /* Array access on GPU with Structure-of-Arrays. */
 
-typedef int IntegratorState;
-
-#  define INTEGRATOR_STATE_ARGS \
-    ccl_global const KernelGlobals *ccl_restrict kg, const IntegratorState state
-#  define INTEGRATOR_STATE_CONST_ARGS \
-    ccl_global const KernelGlobals *ccl_restrict kg, const IntegratorState state
-#  define INTEGRATOR_STATE_PASS kg, state
+typedef const int IntegratorState;
+typedef const int ConstIntegratorState;
 
-#  define INTEGRATOR_STATE_PASS_NULL kg, -1
-#  define INTEGRATOR_STATE_IS_NULL (state == -1)
+#  define INTEGRATOR_STATE_NULL -1
 
-#  define INTEGRATOR_STATE(nested_struct, member) \
+#  define INTEGRATOR_STATE(state, nested_struct, member) \
     kernel_integrator_state.nested_struct.member[state]
-#  define INTEGRATOR_STATE_WRITE(nested_struct, member) INTEGRATOR_STATE(nested_struct, member)
+#  define INTEGRATOR_STATE_WRITE(state, nested_struct, member) \
+    INTEGRATOR_STATE(state, nested_struct, member)
 
-#  define INTEGRATOR_STATE_ARRAY(nested_struct, array_index, member) \
+#  define INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) \
     kernel_integrator_state.nested_struct[array_index].member[state]
-#  define INTEGRATOR_STATE_ARRAY_WRITE(nested_struct, array_index, member) \
-    INTEGRATOR_STATE_ARRAY(nested_struct, array_index, member)
+#  define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
+    INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member)
 
 #endif /* __KERNEL_CPU__ */
 
diff --git a/intern/cycles/kernel/integrator/integrator_state_flow.h b/intern/cycles/kernel/integrator/integrator_state_flow.h
index 8477efd7b66..9829da875eb 100644
--- a/intern/cycles/kernel/integrator/integrator_state_flow.h
+++ b/intern/cycles/kernel/integrator/integrator_state_flow.h
@@ -42,48 +42,49 @@ CCL_NAMESPACE_BEGIN
  * one of them, and only once.
  */
 
-#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(path, queued_kernel) == 0)
-#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED (INTEGRATOR_STATE(shadow_path, queued_kernel) == 0)
+#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(state, path, queued_kernel) == 0)
+#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED \
+  (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0)
 
 #ifdef __KERNEL_GPU__
 
 #  define INTEGRATOR_PATH_INIT(next_kernel) \
     atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
                                 1); \
-    INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel;
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
 #  define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
     atomic_fetch_and_sub_uint32( \
         &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
     atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
                                 1); \
-    INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel;
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
 #  define INTEGRATOR_PATH_TERMINATE(current_kernel) \
     atomic_fetch_and_sub_uint32( \
         &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
-    INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0;
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
 
 #  define INTEGRATOR_SHADOW_PATH_INIT(next_kernel) \
     atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
                                 1); \
-    INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
 #  define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
     atomic_fetch_and_sub_uint32( \
         &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
     atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
                                 1); \
-    INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
 #  define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
     atomic_fetch_and_sub_uint32( \
         &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
-    INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
 
 #  define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
     { \
       const int key_ = key; \
       atomic_fetch_and_add_uint32( \
           &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
-      INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \
-      INTEGRATOR_STATE_WRITE(path, shader_sort_key) = key_; \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
+      INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
       atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
                                   1); \
     }
@@ -94,8 +95,8 @@ CCL_NAMESPACE_BEGIN
           &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
       atomic_fetch_and_add_uint32( \
           &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
-      INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \
-      INTEGRATOR_STATE_WRITE(path, shader_sort_key) = key_; \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
+      INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
       atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
                                   1); \
     }
@@ -103,39 +104,39 @@ CCL_NAMESPACE_BEGIN
 #else
 
 #  define INTEGRATOR_PATH_INIT(next_kernel) \
-    INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel;
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
 #  define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
     { \
-      INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
       (void)key; \
     }
 #  define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
     { \
-      INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
       (void)current_kernel; \
     }
 #  define INTEGRATOR_PATH_TERMINATE(current_kernel) \
     { \
-      INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0; \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; \
       (void)current_kernel; \
     }
 #  define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
     { \
-      INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
       (void)key; \
       (void)current_kernel; \
     }
 
 #  define INTEGRATOR_SHADOW_PATH_INIT(next_kernel) \
-    INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
 #  define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
     { \
-      INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel; \
+      INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; \
       (void)current_kernel; \
     }
 #  define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
     { \
-      INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0; \
+      INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; \
       (void)current_kernel; \
     }
 
diff --git a/intern/cycles/kernel/integrator/integrator_state_util.h b/intern/cycles/kernel/integrator/integrator_state_util.h
index fddd9eb5ac8..fee59e451d9 100644
--- a/intern/cycles/kernel/integrator/integrator_state_util.h
+++ b/intern/cycles/kernel/integrator/integrator_state_util.h
@@ -23,145 +23,150 @@ CCL_NAMESPACE_BEGIN
 
 /* Ray */
 
-ccl_device_forceinline void integrator_state_write_ray(INTEGRATOR_STATE_ARGS,
+ccl_device_forceinline void integrator_state_write_ray(KernelGlobals kg,
+                                                       IntegratorState state,
                                                        ccl_private const Ray *ccl_restrict ray)
 {
-  INTEGRATOR_STATE_WRITE(ray, P) = ray->P;
-  INTEGRATOR_STATE_WRITE(ray, D) = ray->D;
-  INTEGRATOR_STATE_WRITE(ray, t) = ray->t;
-  INTEGRATOR_STATE_WRITE(ray, time) = ray->time;
-  INTEGRATOR_STATE_WRITE(ray, dP) = ray->dP;
-  INTEGRATOR_STATE_WRITE(ray, dD) = ray->dD;
+  INTEGRATOR_STATE_WRITE(state, ray, P) = ray->P;
+  INTEGRATOR_STATE_WRITE(state, ray, D) = ray->D;
+  INTEGRATOR_STATE_WRITE(state, ray, t) = ray->t;
+  INTEGRATOR_STATE_WRITE(state, ray, time) = ray->time;
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = ray->dP;
+  INTEGRATOR_STATE_WRITE(state, ray, dD) = ray->dD;
 }
 
-ccl_device_forceinline void integrator_state_read_ray(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_forceinline void integrator_state_read_ray(KernelGlobals kg,
+                                                      ConstIntegratorState state,
                                                       ccl_private Ray *ccl_restrict ray)
 {
-  ray->P = INTEGRATOR_STATE(ray, P);
-  ray->D = INTEGRATOR_STATE(ray, D);
-  ray->t = INTEGRATOR_STATE(ray, t);
-  ray->time = INTEGRATOR_STATE(ray, time);
-  ray->dP = INTEGRATOR_STATE(ray, dP);
-  ray->dD = INTEGRATOR_STATE(ray, dD);
+  ray->P = INTEGRATOR_STATE(state, ray, P);
+  ray->D = INTEGRATOR_STATE(state, ray, D);
+  ray->t = INTEGRATOR_STATE(state, ray, t);
+  ray->time = INTEGRATOR_STATE(state, ray, time);
+  ray->dP = INTEGRATOR_STATE(state, ray, dP);
+  ray->dD = INTEGRATOR_STATE(state, ray, dD);
 }
 
 /* Shadow Ray */
 
 ccl_device_forceinline void integrator_state_write_shadow_ray(
-    INTEGRATOR_STATE_ARGS, ccl_private const Ray *ccl_restrict ray)
+    KernelGlobals kg, IntegratorState state, ccl_private const Ray *ccl_restrict ray)
 {
-  INTEGRATOR_STATE_WRITE(shadow_ray, P) = ray->P;
-  INTEGRATOR_STATE_WRITE(shadow_ray, D) = ray->D;
-  INTEGRATOR_STATE_WRITE(shadow_ray, t) = ray->t;
-  INTEGRATOR_STATE_WRITE(shadow_ray, time) = ray->time;
-  INTEGRATOR_STATE_WRITE(shadow_ray, dP) = ray->dP;
+  INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray->P;
+  INTEGRATOR_STATE_WRITE(state, shadow_ray, D) = ray->D;
+  INTEGRATOR_STATE_WRITE(state, shadow_ray, t) = ray->t;
+  INTEGRATOR_STATE_WRITE(state, shadow_ray, time) = ray->time;
+  INTEGRATOR_STATE_WRITE(state, shadow_ray, dP) = ray->dP;
 }
 
-ccl_device_forceinline void integrator_state_read_shadow_ray(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_forceinline void integrator_state_read_shadow_ray(KernelGlobals kg,
+                                                             ConstIntegratorState state,
                                                              ccl_private Ray *ccl_restrict ray)
 {
-  ray->P = INTEGRATOR_STATE(shadow_ray, P);
-  ray->D = INTEGRATOR_STATE(shadow_ray, D);
-  ray->t = INTEGRATOR_STATE(shadow_ray, t);
-  ray->time = INTEGRATOR_STATE(shadow_ray, time);
-  ray->dP = INTEGRATOR_STATE(shadow_ray, dP);
+  ray->P = INTEGRATOR_STATE(state, shadow_ray, P);
+  ray->D = INTEGRATOR_STATE(state, shadow_ray, D);
+  ray->t = INTEGRATOR_STATE(state, shadow_ray, t);
+  ray->time = INTEGRATOR_STATE(state, shadow_ray, time);
+  ray->dP = INTEGRATOR_STATE(state, shadow_ray, dP);
   ray->dD = differential_zero_compact();
 }
 
 /* Intersection */
 
 ccl_device_forceinline void integrator_state_write_isect(
-    INTEGRATOR_STATE_ARGS, ccl_private const Intersection *ccl_restrict isect)
+    KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
 {
-  INTEGRATOR_STATE_WRITE(isect, t) = isect->t;
-  INTEGRATOR_STATE_WRITE(isect, u) = isect->u;
-  INTEGRATOR_STATE_WRITE(isect, v) = isect->v;
-  INTEGRATOR_STATE_WRITE(isect, object) = isect->object;
-  INTEGRATOR_STATE_WRITE(isect, prim) = isect->prim;
-  INTEGRATOR_STATE_WRITE(isect, type) = isect->type;
+  INTEGRATOR_STATE_WRITE(state, isect, t) = isect->t;
+  INTEGRATOR_STATE_WRITE(state, isect, u) = isect->u;
+  INTEGRATOR_STATE_WRITE(state, isect, v) = isect->v;
+  INTEGRATOR_STATE_WRITE(state, isect, object) = isect->object;
+  INTEGRATOR_STATE_WRITE(state, isect, prim) = isect->prim;
+  INTEGRATOR_STATE_WRITE(state, isect, type) = isect->type;
 #ifdef __EMBREE__
-  INTEGRATOR_STATE_WRITE(isect, Ng) = isect->Ng;
+  INTEGRATOR_STATE_WRITE(state, isect, Ng) = isect->Ng;
 #endif
 }
 
 ccl_device_forceinline void integrator_state_read_isect(
-    INTEGRATOR_STATE_CONST_ARGS, ccl_private Intersection *ccl_restrict isect)
+    KernelGlobals kg, ConstIntegratorState state, ccl_private Intersection *ccl_restrict isect)
 {
-  isect->prim = INTEGRATOR_STATE(isect, prim);
-  isect->object = INTEGRATOR_STATE(isect, object);
-  isect->type = INTEGRATOR_STATE(isect, type);
-  isect->u = INTEGRATOR_STATE(isect, u);
-  isect->v = INTEGRATOR_STATE(isect, v);
-  isect->t = INTEGRATOR_STATE(isect, t);
+  isect->prim = INTEGRATOR_STATE(state, isect, prim);
+  isect->object = INTEGRATOR_STATE(state, isect, object);
+  isect->type = INTEGRATOR_STATE(state, isect, type);
+  isect->u = INTEGRATOR_STATE(state, isect, u);
+  isect->v = INTEGRATOR_STATE(state, isect, v);
+  isect->t = INTEGRATOR_STATE(state, isect, t);
 #ifdef __EMBREE__
-  isect->Ng = INTEGRATOR_STATE(isect, Ng);
+  isect->Ng = INTEGRATOR_STATE(state, isect, Ng);
 #endif
 }
 
-ccl_device_forceinline VolumeStack integrator_state_read_volume_stack(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_forceinline VolumeStack integrator_state_read_volume_stack(ConstIntegratorState state,
                                                                       int i)
 {
-  VolumeStack entry = {INTEGRATOR_STATE_ARRAY(volume_stack, i, object),
-                       INTEGRATOR_STATE_ARRAY(volume_stack, i, shader)};
+  VolumeStack entry = {INTEGRATOR_STATE_ARRAY(state, volume_stack, i, object),
+                       INTEGRATOR_STATE_ARRAY(state, volume_stack, i, shader)};
   return entry;
 }
 
-ccl_device_forceinline void integrator_state_write_volume_stack(INTEGRATOR_STATE_ARGS,
+ccl_device_forceinline void integrator_state_write_volume_stack(IntegratorState state,
                                                                 int i,
                                                                 VolumeStack entry)
 {
-  INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, i, object) = entry.object;
-  INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, i, shader) = entry.shader;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, i, object) = entry.object;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, i, shader) = entry.shader;
 }
 
-ccl_device_forceinline bool integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_CONST_ARGS)
+ccl_device_forceinline bool integrator_state_volume_stack_is_empty(KernelGlobals kg,
+                                                                   ConstIntegratorState state)
 {
   return (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) ?
-             INTEGRATOR_STATE_ARRAY(volume_stack, 0, shader) == SHADER_NONE :
+             INTEGRATOR_STATE_ARRAY(state, volume_stack, 0, shader) == SHADER_NONE :
              true;
 }
 
 /* Shadow Intersection */
 
 ccl_device_forceinline void integrator_state_write_shadow_isect(
-    INTEGRATOR_STATE_ARGS, ccl_private const Intersection *ccl_restrict isect, const int index)
+    IntegratorState state, ccl_private const Intersection *ccl_restrict isect, const int index)
 {
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, t) = isect->t;
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, u) = isect->u;
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, v) = isect->v;
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, object) = isect->object;
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, prim) = isect->prim;
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, type) = isect->type;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, t) = isect->t;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, u) = isect->u;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, v) = isect->v;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, object) = isect->object;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, prim) = isect->prim;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, type) = isect->type;
 #ifdef __EMBREE__
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, Ng) = isect->Ng;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, Ng) = isect->Ng;
 #endif
 }
 
 ccl_device_forceinline void integrator_state_read_shadow_isect(
-    INTEGRATOR_STATE_CONST_ARGS, ccl_private Intersection *ccl_restrict isect, const int index)
+    ConstIntegratorState state, ccl_private Intersection *ccl_restrict isect, const int index)
 {
-  isect->prim = INTEGRATOR_STATE_ARRAY(shadow_isect, index, prim);
-  isect->object = INTEGRATOR_STATE_ARRAY(shadow_isect, index, object);
-  isect->type = INTEGRATOR_STATE_ARRAY(shadow_isect, index, type);
-  isect->u = INTEGRATOR_STATE_ARRAY(shadow_isect, index, u);
-  isect->v = INTEGRATOR_STATE_ARRAY(shadow_isect, index, v);
-  isect->t = INTEGRATOR_STATE_ARRAY(shadow_isect, index, t);
+  isect->prim = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, prim);
+  isect->object = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, object);
+  isect->type = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, type);
+  isect->u = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, u);
+  isect->v = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, v);
+  isect->t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, t);
 #ifdef __EMBREE__
-  isect->Ng = INTEGRATOR_STATE_ARRAY(shadow_isect, index, Ng);
+  isect->Ng = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, Ng);
 #endif
 }
 
-ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(INTEGRATOR_STATE_ARGS)
+ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(KernelGlobals kg,
+                                                                         IntegratorState state)
 {
   if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) {
     int index = 0;
     int shader;
     do {
-      shader = INTEGRATOR_STATE_ARRAY(volume_stack, index, shader);
+      shader = INTEGRATOR_STATE_ARRAY(state, volume_stack, index, shader);
 
-      INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, index, object) = INTEGRATOR_STATE_ARRAY(
-          volume_stack, index, object);
-      INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, index, shader) = shader;
+      INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, index, object) =
+          INTEGRATOR_STATE_ARRAY(state, volume_stack, index, object);
+      INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, index, shader) = shader;
 
       ++index;
     } while (shader != OBJECT_NONE);
@@ -169,27 +174,27 @@ ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(INTEGRA
 }
 
 ccl_device_forceinline VolumeStack
-integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_CONST_ARGS, int i)
+integrator_state_read_shadow_volume_stack(ConstIntegratorState state, int i)
 {
-  VolumeStack entry = {INTEGRATOR_STATE_ARRAY(shadow_volume_stack, i, object),
-                       INTEGRATOR_STATE_ARRAY(shadow_volume_stack, i, shader)};
+  VolumeStack entry = {INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, i, object),
+                       INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, i, shader)};
   return entry;
 }
 
 ccl_device_forceinline bool integrator_state_shadow_volume_stack_is_empty(
-    INTEGRATOR_STATE_CONST_ARGS)
+    KernelGlobals kg, ConstIntegratorState state)
 {
   return (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) ?
-             INTEGRATOR_STATE_ARRAY(shadow_volume_stack, 0, shader) == SHADER_NONE :
+             INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, 0, shader) == SHADER_NONE :
              true;
 }
 
-ccl_device_forceinline void integrator_state_write_shadow_volume_stack(INTEGRATOR_STATE_ARGS,
+ccl_device_forceinline void integrator_state_write_shadow_volume_stack(IntegratorState state,
                                                                        int i,
                                                                        VolumeStack entry)
 {
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, i, object) = entry.object;
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, i, shader) = entry.shader;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, i, object) = entry.object;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, i, shader) = entry.shader;
 }
 
 #if defined(__KERNEL_GPU__)
@@ -244,15 +249,16 @@ ccl_device_inline void integrator_state_move(const IntegratorState to_state,
 {
   integrator_state_copy_only(to_state, state);
 
-  INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0;
-  INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
 }
 
 #endif
 
 /* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths
  * after this function. */
-ccl_device_inline void integrator_state_shadow_catcher_split(INTEGRATOR_STATE_ARGS)
+ccl_device_inline void integrator_state_shadow_catcher_split(KernelGlobals kg,
+                                                             IntegratorState state)
 {
 #if defined(__KERNEL_GPU__)
   const IntegratorState to_state = atomic_fetch_and_add_uint32(
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface.h b/intern/cycles/kernel/integrator/integrator_subsurface.h
index 153f9b79743..448c99765e3 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface.h
@@ -36,29 +36,30 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __SUBSURFACE__
 
-ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS,
+ccl_device int subsurface_bounce(KernelGlobals kg,
+                                 IntegratorState state,
                                  ccl_private ShaderData *sd,
                                  ccl_private const ShaderClosure *sc)
 {
   /* We should never have two consecutive BSSRDF bounces, the second one should
    * be converted to a diffuse BSDF to avoid this. */
-  kernel_assert(!(INTEGRATOR_STATE(path, flag) & PATH_RAY_DIFFUSE_ANCESTOR));
+  kernel_assert(!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DIFFUSE_ANCESTOR));
 
   /* Setup path state for intersect_subsurface kernel. */
   ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc;
 
   /* Setup ray into surface. */
-  INTEGRATOR_STATE_WRITE(ray, P) = sd->P;
-  INTEGRATOR_STATE_WRITE(ray, D) = bssrdf->N;
-  INTEGRATOR_STATE_WRITE(ray, t) = FLT_MAX;
-  INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP);
-  INTEGRATOR_STATE_WRITE(ray, dD) = differential_zero_compact();
+  INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
+  INTEGRATOR_STATE_WRITE(state, ray, D) = bssrdf->N;
+  INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
+  INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact();
 
   /* Pass along object info, reusing isect to save memory. */
-  INTEGRATOR_STATE_WRITE(isect, Ng) = sd->Ng;
-  INTEGRATOR_STATE_WRITE(isect, object) = sd->object;
+  INTEGRATOR_STATE_WRITE(state, isect, Ng) = sd->Ng;
+  INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object;
 
-  uint32_t path_flag = (INTEGRATOR_STATE(path, flag) & ~PATH_RAY_CAMERA) |
+  uint32_t path_flag = (INTEGRATOR_STATE(state, path, flag) & ~PATH_RAY_CAMERA) |
                        ((sc->type == CLOSURE_BSSRDF_BURLEY_ID) ? PATH_RAY_SUBSURFACE_DISK :
                                                                  PATH_RAY_SUBSURFACE_RANDOM_WALK);
 
@@ -70,27 +71,28 @@ ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS,
   }
 #  endif
 
-  INTEGRATOR_STATE_WRITE(path, throughput) *= weight;
-  INTEGRATOR_STATE_WRITE(path, flag) = path_flag;
+  INTEGRATOR_STATE_WRITE(state, path, throughput) *= weight;
+  INTEGRATOR_STATE_WRITE(state, path, flag) = path_flag;
 
   /* Advance random number offset for bounce. */
-  INTEGRATOR_STATE_WRITE(path, rng_offset) += PRNG_BOUNCE_NUM;
+  INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
 
   if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
-    if (INTEGRATOR_STATE(path, bounce) == 0) {
-      INTEGRATOR_STATE_WRITE(path, diffuse_glossy_ratio) = one_float3();
+    if (INTEGRATOR_STATE(state, path, bounce) == 0) {
+      INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = one_float3();
     }
   }
 
   /* Pass BSSRDF parameters. */
-  INTEGRATOR_STATE_WRITE(subsurface, albedo) = bssrdf->albedo;
-  INTEGRATOR_STATE_WRITE(subsurface, radius) = bssrdf->radius;
-  INTEGRATOR_STATE_WRITE(subsurface, anisotropy) = bssrdf->anisotropy;
+  INTEGRATOR_STATE_WRITE(state, subsurface, albedo) = bssrdf->albedo;
+  INTEGRATOR_STATE_WRITE(state, subsurface, radius) = bssrdf->radius;
+  INTEGRATOR_STATE_WRITE(state, subsurface, anisotropy) = bssrdf->anisotropy;
 
   return LABEL_SUBSURFACE_SCATTER;
 }
 
-ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS,
+ccl_device void subsurface_shader_data_setup(KernelGlobals kg,
+                                             IntegratorState state,
                                              ccl_private ShaderData *sd,
                                              const uint32_t path_flag)
 {
@@ -131,21 +133,21 @@ ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS,
   }
 }
 
-ccl_device_inline bool subsurface_scatter(INTEGRATOR_STATE_ARGS)
+ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState state)
 {
   RNGState rng_state;
-  path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+  path_state_rng_load(state, &rng_state);
 
   Ray ray ccl_optional_struct_init;
   LocalIntersection ss_isect ccl_optional_struct_init;
 
-  if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SUBSURFACE_RANDOM_WALK) {
-    if (!subsurface_random_walk(INTEGRATOR_STATE_PASS, rng_state, ray, ss_isect)) {
+  if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE_RANDOM_WALK) {
+    if (!subsurface_random_walk(kg, state, rng_state, ray, ss_isect)) {
       return false;
     }
   }
   else {
-    if (!subsurface_disk(INTEGRATOR_STATE_PASS, rng_state, ray, ss_isect)) {
+    if (!subsurface_disk(kg, state, rng_state, ray, ss_isect)) {
       return false;
     }
   }
@@ -157,11 +159,11 @@ ccl_device_inline bool subsurface_scatter(INTEGRATOR_STATE_ARGS)
     const int object_flag = kernel_tex_fetch(__object_flag, object);
 
     if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) {
-      float3 P = INTEGRATOR_STATE(ray, P);
-      const float3 Ng = INTEGRATOR_STATE(isect, Ng);
+      float3 P = INTEGRATOR_STATE(state, ray, P);
+      const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
       const float3 offset_P = ray_offset(P, -Ng);
 
-      integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_PASS, offset_P, ray.P);
+      integrator_volume_stack_update_for_subsurface(kg, state, offset_P, ray.P);
     }
   }
 #  endif /* __VOLUME__ */
@@ -172,11 +174,11 @@ ccl_device_inline bool subsurface_scatter(INTEGRATOR_STATE_ARGS)
   ray.P += ray.D * ray.t * 2.0f;
   ray.D = -ray.D;
 
-  integrator_state_write_isect(INTEGRATOR_STATE_PASS, &ss_isect.hits[0]);
-  integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_write_isect(kg, state, &ss_isect.hits[0]);
+  integrator_state_write_ray(kg, state, &ray);
 
   /* Advance random number offset for bounce. */
-  INTEGRATOR_STATE_WRITE(path, rng_offset) += PRNG_BOUNCE_NUM;
+  INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
 
   const int shader = intersection_get_shader(kg, &ss_isect.hits[0]);
   const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
index 788a5e9b929..1de05ea2696 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
@@ -31,7 +31,8 @@ ccl_device_inline float3 subsurface_disk_eval(const float3 radius, float disk_r,
 
 /* Subsurface scattering step, from a point on the surface to other
  * nearby points on the same object. */
-ccl_device_inline bool subsurface_disk(INTEGRATOR_STATE_ARGS,
+ccl_device_inline bool subsurface_disk(KernelGlobals kg,
+                                       IntegratorState state,
                                        RNGState rng_state,
                                        ccl_private Ray &ray,
                                        ccl_private LocalIntersection &ss_isect)
@@ -41,14 +42,14 @@ ccl_device_inline bool subsurface_disk(INTEGRATOR_STATE_ARGS,
   path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &disk_u, &disk_v);
 
   /* Read shading point info from integrator state. */
-  const float3 P = INTEGRATOR_STATE(ray, P);
-  const float ray_dP = INTEGRATOR_STATE(ray, dP);
-  const float time = INTEGRATOR_STATE(ray, time);
-  const float3 Ng = INTEGRATOR_STATE(isect, Ng);
-  const int object = INTEGRATOR_STATE(isect, object);
+  const float3 P = INTEGRATOR_STATE(state, ray, P);
+  const float ray_dP = INTEGRATOR_STATE(state, ray, dP);
+  const float time = INTEGRATOR_STATE(state, ray, time);
+  const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
+  const int object = INTEGRATOR_STATE(state, isect, object);
 
   /* Read subsurface scattering parameters. */
-  const float3 radius = INTEGRATOR_STATE(subsurface, radius);
+  const float3 radius = INTEGRATOR_STATE(state, subsurface, radius);
 
   /* Pick random axis in local frame and point on disk. */
   float3 disk_N, disk_T, disk_B;
@@ -175,7 +176,7 @@ ccl_device_inline bool subsurface_disk(INTEGRATOR_STATE_ARGS,
 
     if (r < next_sum) {
       /* Return exit point. */
-      INTEGRATOR_STATE_WRITE(path, throughput) *= weight * sum_weights / sample_weight;
+      INTEGRATOR_STATE_WRITE(state, path, throughput) *= weight * sum_weights / sample_weight;
 
       ss_isect.hits[0] = ss_isect.hits[hit];
       ss_isect.Ng[0] = ss_isect.Ng[hit];
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
index 45a43ea67a9..5365093decf 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
@@ -180,7 +180,8 @@ ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t,
  * and the value represents the cutoff level */
 #define SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL 9
 
-ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS,
+ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
+                                              IntegratorState state,
                                               RNGState rng_state,
                                               ccl_private Ray &ray,
                                               ccl_private LocalIntersection &ss_isect)
@@ -188,12 +189,12 @@ ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS,
   float bssrdf_u, bssrdf_v;
   path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
 
-  const float3 P = INTEGRATOR_STATE(ray, P);
-  const float3 N = INTEGRATOR_STATE(ray, D);
-  const float ray_dP = INTEGRATOR_STATE(ray, dP);
-  const float time = INTEGRATOR_STATE(ray, time);
-  const float3 Ng = INTEGRATOR_STATE(isect, Ng);
-  const int object = INTEGRATOR_STATE(isect, object);
+  const float3 P = INTEGRATOR_STATE(state, ray, P);
+  const float3 N = INTEGRATOR_STATE(state, ray, D);
+  const float ray_dP = INTEGRATOR_STATE(state, ray, dP);
+  const float time = INTEGRATOR_STATE(state, ray, time);
+  const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
+  const int object = INTEGRATOR_STATE(state, isect, object);
 
   /* Sample diffuse surface scatter into the object. */
   float3 D;
@@ -219,12 +220,12 @@ ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS,
 
   /* Convert subsurface to volume coefficients.
    * The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */
-  const float3 albedo = INTEGRATOR_STATE(subsurface, albedo);
-  const float3 radius = INTEGRATOR_STATE(subsurface, radius);
-  const float anisotropy = INTEGRATOR_STATE(subsurface, anisotropy);
+  const float3 albedo = INTEGRATOR_STATE(state, subsurface, albedo);
+  const float3 radius = INTEGRATOR_STATE(state, subsurface, radius);
+  const float anisotropy = INTEGRATOR_STATE(state, subsurface, anisotropy);
 
   float3 sigma_t, alpha;
-  float3 throughput = INTEGRATOR_STATE_WRITE(path, throughput);
+  float3 throughput = INTEGRATOR_STATE_WRITE(state, path, throughput);
   subsurface_random_walk_coefficients(albedo, radius, anisotropy, &sigma_t, &alpha, &throughput);
   float3 sigma_s = sigma_t * alpha;
 
@@ -459,7 +460,7 @@ ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS,
 
   if (hit) {
     kernel_assert(isfinite3_safe(throughput));
-    INTEGRATOR_STATE_WRITE(path, throughput) = throughput;
+    INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput;
   }
 
   return hit;
diff --git a/intern/cycles/kernel/integrator/integrator_volume_stack.h b/intern/cycles/kernel/integrator/integrator_volume_stack.h
index 0c4a723de6f..e3a4546508f 100644
--- a/intern/cycles/kernel/integrator/integrator_volume_stack.h
+++ b/intern/cycles/kernel/integrator/integrator_volume_stack.h
@@ -24,7 +24,7 @@ CCL_NAMESPACE_BEGIN
  * is inside of. */
 
 template<typename StackReadOp, typename StackWriteOp>
-ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
+ccl_device void volume_stack_enter_exit(KernelGlobals kg,
                                         ccl_private const ShaderData *sd,
                                         StackReadOp stack_read,
                                         StackWriteOp stack_write)
@@ -84,28 +84,29 @@ ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
   }
 }
 
-ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, ccl_private const ShaderData *sd)
+ccl_device void volume_stack_enter_exit(KernelGlobals kg,
+                                        IntegratorState state,
+                                        ccl_private const ShaderData *sd)
 {
   volume_stack_enter_exit(
-      INTEGRATOR_STATE_PASS,
+      kg,
       sd,
-      [=](const int i) { return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i); },
+      [=](const int i) { return integrator_state_read_volume_stack(state, i); },
       [=](const int i, const VolumeStack entry) {
-        integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, i, entry);
+        integrator_state_write_volume_stack(state, i, entry);
       });
 }
 
-ccl_device void shadow_volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
+ccl_device void shadow_volume_stack_enter_exit(KernelGlobals kg,
+                                               IntegratorState state,
                                                ccl_private const ShaderData *sd)
 {
   volume_stack_enter_exit(
-      INTEGRATOR_STATE_PASS,
+      kg,
       sd,
-      [=](const int i) {
-        return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i);
-      },
+      [=](const int i) { return integrator_state_read_shadow_volume_stack(state, i); },
       [=](const int i, const VolumeStack entry) {
-        integrator_state_write_shadow_volume_stack(INTEGRATOR_STATE_PASS, i, entry);
+        integrator_state_write_shadow_volume_stack(state, i, entry);
       });
 }
 
@@ -123,19 +124,21 @@ ccl_device void shadow_volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
  * Use this function after the last bounce to get rid of all volumes apart from
  * the world's one after the last bounce to avoid render artifacts.
  */
-ccl_device_inline void volume_stack_clean(INTEGRATOR_STATE_ARGS)
+ccl_device_inline void volume_stack_clean(KernelGlobals kg, IntegratorState state)
 {
   if (kernel_data.background.volume_shader != SHADER_NONE) {
     /* Keep the world's volume in stack. */
-    INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 1, shader) = SHADER_NONE;
+    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, shader) = SHADER_NONE;
   }
   else {
-    INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 0, shader) = SHADER_NONE;
+    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 0, shader) = SHADER_NONE;
   }
 }
 
 template<typename StackReadOp>
-ccl_device float volume_stack_step_size(INTEGRATOR_STATE_ARGS, StackReadOp stack_read)
+ccl_device float volume_stack_step_size(KernelGlobals kg,
+                                        IntegratorState state,
+                                        StackReadOp stack_read)
 {
   float step_size = FLT_MAX;
 
@@ -182,12 +185,12 @@ typedef enum VolumeSampleMethod {
   VOLUME_SAMPLE_MIS = (VOLUME_SAMPLE_DISTANCE | VOLUME_SAMPLE_EQUIANGULAR),
 } VolumeSampleMethod;
 
-ccl_device VolumeSampleMethod volume_stack_sample_method(INTEGRATOR_STATE_ARGS)
+ccl_device VolumeSampleMethod volume_stack_sample_method(KernelGlobals kg, IntegratorState state)
 {
   VolumeSampleMethod method = VOLUME_SAMPLE_NONE;
 
   for (int i = 0;; i++) {
-    VolumeStack entry = integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
+    VolumeStack entry = integrator_state_read_volume_stack(state, i);
     if (entry.shader == SHADER_NONE) {
       break;
     }
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index dc0aa9356f7..bc45bbd5b07 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -98,9 +98,7 @@ ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(ccl_private const BsdfEv
  * to render buffers instead of using per-thread memory, and to avoid the
  * impact of clamping on other contributions. */
 
-ccl_device_forceinline void kernel_accum_clamp(ccl_global const KernelGlobals *kg,
-                                               ccl_private float3 *L,
-                                               int bounce)
+ccl_device_forceinline void kernel_accum_clamp(KernelGlobals kg, ccl_private float3 *L, int bounce)
 {
 #ifdef __KERNEL_DEBUG_NAN__
   if (!isfinite3_safe(*L)) {
@@ -128,9 +126,9 @@ ccl_device_forceinline void kernel_accum_clamp(ccl_global const KernelGlobals *k
 
 /* Get pointer to pixel in render buffer. */
 ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer(
-    INTEGRATOR_STATE_CONST_ARGS, ccl_global float *ccl_restrict render_buffer)
+    KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
 {
-  const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index);
+  const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
   const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
                                         kernel_data.film.pass_stride;
   return render_buffer + render_buffer_offset;
@@ -140,7 +138,8 @@ ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer(
  * Adaptive sampling.
  */
 
-ccl_device_inline int kernel_accum_sample(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_inline int kernel_accum_sample(KernelGlobals kg,
+                                          ConstIntegratorState state,
                                           ccl_global float *ccl_restrict render_buffer,
                                           int sample)
 {
@@ -148,13 +147,13 @@ ccl_device_inline int kernel_accum_sample(INTEGRATOR_STATE_CONST_ARGS,
     return sample;
   }
 
-  ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
-                                                              render_buffer);
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
 
   return atomic_fetch_and_add_uint32((uint *)(buffer) + kernel_data.film.pass_sample_count, 1);
 }
 
-ccl_device void kernel_accum_adaptive_buffer(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg,
+                                             ConstIntegratorState state,
                                              const float3 contribution,
                                              ccl_global float *ccl_restrict buffer)
 {
@@ -167,7 +166,7 @@ ccl_device void kernel_accum_adaptive_buffer(INTEGRATOR_STATE_CONST_ARGS,
     return;
   }
 
-  const int sample = INTEGRATOR_STATE(path, sample);
+  const int sample = INTEGRATOR_STATE(state, path, sample);
   if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) {
     kernel_write_pass_float4(
         buffer + kernel_data.film.pass_adaptive_aux_buffer,
@@ -186,7 +185,8 @@ ccl_device void kernel_accum_adaptive_buffer(INTEGRATOR_STATE_CONST_ARGS,
  * Returns truth if the contribution is fully handled here and is not to be added to the other
  * passes (like combined, adaptive sampling). */
 
-ccl_device bool kernel_accum_shadow_catcher(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device bool kernel_accum_shadow_catcher(KernelGlobals kg,
+                                            ConstIntegratorState state,
                                             const float3 contribution,
                                             ccl_global float *ccl_restrict buffer)
 {
@@ -198,7 +198,7 @@ ccl_device bool kernel_accum_shadow_catcher(INTEGRATOR_STATE_CONST_ARGS,
   kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
 
   /* Matte pass. */
-  if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) {
+  if (kernel_shadow_catcher_is_matte_path(kg, state)) {
     kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher_matte, contribution);
     /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive
      * sampling is based on how noisy the combined pass is as if there were no catchers in the
@@ -206,7 +206,7 @@ ccl_device bool kernel_accum_shadow_catcher(INTEGRATOR_STATE_CONST_ARGS,
   }
 
   /* Shadow catcher pass. */
-  if (kernel_shadow_catcher_is_object_pass(INTEGRATOR_STATE_PASS)) {
+  if (kernel_shadow_catcher_is_object_pass(kg, state)) {
     kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution);
     return true;
   }
@@ -214,7 +214,8 @@ ccl_device bool kernel_accum_shadow_catcher(INTEGRATOR_STATE_CONST_ARGS,
   return false;
 }
 
-ccl_device bool kernel_accum_shadow_catcher_transparent(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device bool kernel_accum_shadow_catcher_transparent(KernelGlobals kg,
+                                                        ConstIntegratorState state,
                                                         const float3 contribution,
                                                         const float transparent,
                                                         ccl_global float *ccl_restrict buffer)
@@ -226,12 +227,12 @@ ccl_device bool kernel_accum_shadow_catcher_transparent(INTEGRATOR_STATE_CONST_A
   kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED);
   kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
 
-  if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) {
+  if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) {
     return true;
   }
 
   /* Matte pass. */
-  if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) {
+  if (kernel_shadow_catcher_is_matte_path(kg, state)) {
     kernel_write_pass_float4(
         buffer + kernel_data.film.pass_shadow_catcher_matte,
         make_float4(contribution.x, contribution.y, contribution.z, transparent));
@@ -241,7 +242,7 @@ ccl_device bool kernel_accum_shadow_catcher_transparent(INTEGRATOR_STATE_CONST_A
   }
 
   /* Shadow catcher pass. */
-  if (kernel_shadow_catcher_is_object_pass(INTEGRATOR_STATE_PASS)) {
+  if (kernel_shadow_catcher_is_object_pass(kg, state)) {
     /* NOTE: The transparency of the shadow catcher pass is ignored. It is not needed for the
      * calculation and the alpha channel of the pass contains numbers of samples contributed to a
      * pixel of the pass. */
@@ -252,7 +253,8 @@ ccl_device bool kernel_accum_shadow_catcher_transparent(INTEGRATOR_STATE_CONST_A
   return false;
 }
 
-ccl_device void kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device void kernel_accum_shadow_catcher_transparent_only(KernelGlobals kg,
+                                                             ConstIntegratorState state,
                                                              const float transparent,
                                                              ccl_global float *ccl_restrict buffer)
 {
@@ -263,7 +265,7 @@ ccl_device void kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_CO
   kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
 
   /* Matte pass. */
-  if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) {
+  if (kernel_shadow_catcher_is_matte_path(kg, state)) {
     kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, transparent);
   }
 }
@@ -275,12 +277,13 @@ ccl_device void kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_CO
  */
 
 /* Write combined pass. */
-ccl_device_inline void kernel_accum_combined_pass(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_inline void kernel_accum_combined_pass(KernelGlobals kg,
+                                                  ConstIntegratorState state,
                                                   const float3 contribution,
                                                   ccl_global float *ccl_restrict buffer)
 {
 #ifdef __SHADOW_CATCHER__
-  if (kernel_accum_shadow_catcher(INTEGRATOR_STATE_PASS, contribution, buffer)) {
+  if (kernel_accum_shadow_catcher(kg, state, contribution, buffer)) {
     return;
   }
 #endif
@@ -289,19 +292,19 @@ ccl_device_inline void kernel_accum_combined_pass(INTEGRATOR_STATE_CONST_ARGS,
     kernel_write_pass_float3(buffer + kernel_data.film.pass_combined, contribution);
   }
 
-  kernel_accum_adaptive_buffer(INTEGRATOR_STATE_PASS, contribution, buffer);
+  kernel_accum_adaptive_buffer(kg, state, contribution, buffer);
 }
 
 /* Write combined pass with transparency. */
-ccl_device_inline void kernel_accum_combined_transparent_pass(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_inline void kernel_accum_combined_transparent_pass(KernelGlobals kg,
+                                                              ConstIntegratorState state,
                                                               const float3 contribution,
                                                               const float transparent,
                                                               ccl_global float *ccl_restrict
                                                                   buffer)
 {
 #ifdef __SHADOW_CATCHER__
-  if (kernel_accum_shadow_catcher_transparent(
-          INTEGRATOR_STATE_PASS, contribution, transparent, buffer)) {
+  if (kernel_accum_shadow_catcher_transparent(kg, state, contribution, transparent, buffer)) {
     return;
   }
 #endif
@@ -312,11 +315,12 @@ ccl_device_inline void kernel_accum_combined_transparent_pass(INTEGRATOR_STATE_C
         make_float4(contribution.x, contribution.y, contribution.z, transparent));
   }
 
-  kernel_accum_adaptive_buffer(INTEGRATOR_STATE_PASS, contribution, buffer);
+  kernel_accum_adaptive_buffer(kg, state, contribution, buffer);
 }
 
 /* Write background or emission to appropriate pass. */
-ccl_device_inline void kernel_accum_emission_or_background_pass(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_inline void kernel_accum_emission_or_background_pass(KernelGlobals kg,
+                                                                ConstIntegratorState state,
                                                                 float3 contribution,
                                                                 ccl_global float *ccl_restrict
                                                                     buffer,
@@ -327,15 +331,15 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(INTEGRATOR_STATE
   }
 
 #ifdef __PASSES__
-  const int path_flag = INTEGRATOR_STATE(path, flag);
+  const int path_flag = INTEGRATOR_STATE(state, path, flag);
   int pass_offset = PASS_UNUSED;
 
   /* Denoising albedo. */
 #  ifdef __DENOISING_FEATURES__
   if (path_flag & PATH_RAY_DENOISING_FEATURES) {
     if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
-      const float3 denoising_feature_throughput = INTEGRATOR_STATE(path,
-                                                                   denoising_feature_throughput);
+      const float3 denoising_feature_throughput = INTEGRATOR_STATE(
+          state, path, denoising_feature_throughput);
       const float3 denoising_albedo = denoising_feature_throughput * contribution;
       kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
     }
@@ -349,32 +353,34 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(INTEGRATOR_STATE
   else if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
     /* Indirectly visible through reflection. */
     const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
-                                       ((INTEGRATOR_STATE(path, bounce) == 1) ?
+                                       ((INTEGRATOR_STATE(state, path, bounce) == 1) ?
                                             kernel_data.film.pass_glossy_direct :
                                             kernel_data.film.pass_glossy_indirect) :
-                                       ((INTEGRATOR_STATE(path, bounce) == 1) ?
+                                       ((INTEGRATOR_STATE(state, path, bounce) == 1) ?
                                             kernel_data.film.pass_transmission_direct :
                                             kernel_data.film.pass_transmission_indirect);
 
     if (glossy_pass_offset != PASS_UNUSED) {
       /* Glossy is a subset of the throughput, reconstruct it here using the
        * diffuse-glossy ratio. */
-      const float3 ratio = INTEGRATOR_STATE(path, diffuse_glossy_ratio);
+      const float3 ratio = INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
       const float3 glossy_contribution = (one_float3() - ratio) * contribution;
       kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
     }
 
     /* Reconstruct diffuse subset of throughput. */
-    pass_offset = (INTEGRATOR_STATE(path, bounce) == 1) ? kernel_data.film.pass_diffuse_direct :
-                                                          kernel_data.film.pass_diffuse_indirect;
+    pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ?
+                      kernel_data.film.pass_diffuse_direct :
+                      kernel_data.film.pass_diffuse_indirect;
     if (pass_offset != PASS_UNUSED) {
-      contribution *= INTEGRATOR_STATE(path, diffuse_glossy_ratio);
+      contribution *= INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
     }
   }
   else if (path_flag & PATH_RAY_VOLUME_PASS) {
     /* Indirectly visible through volume. */
-    pass_offset = (INTEGRATOR_STATE(path, bounce) == 1) ? kernel_data.film.pass_volume_direct :
-                                                          kernel_data.film.pass_volume_indirect;
+    pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ?
+                      kernel_data.film.pass_volume_direct :
+                      kernel_data.film.pass_volume_indirect;
   }
 
   /* Single write call for GPU coherence. */
@@ -385,52 +391,52 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(INTEGRATOR_STATE
 }
 
 /* Write light contribution to render buffer. */
-ccl_device_inline void kernel_accum_light(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_inline void kernel_accum_light(KernelGlobals kg,
+                                          ConstIntegratorState state,
                                           ccl_global float *ccl_restrict render_buffer)
 {
   /* The throughput for shadow paths already contains the light shader evaluation. */
-  float3 contribution = INTEGRATOR_STATE(shadow_path, throughput);
-  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(shadow_path, bounce));
+  float3 contribution = INTEGRATOR_STATE(state, shadow_path, throughput);
+  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, shadow_path, bounce));
 
-  ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
-                                                              render_buffer);
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
 
-  kernel_accum_combined_pass(INTEGRATOR_STATE_PASS, contribution, buffer);
+  kernel_accum_combined_pass(kg, state, contribution, buffer);
 
 #ifdef __PASSES__
   if (kernel_data.film.light_pass_flag & PASS_ANY) {
-    const int path_flag = INTEGRATOR_STATE(shadow_path, flag);
+    const int path_flag = INTEGRATOR_STATE(state, shadow_path, flag);
     int pass_offset = PASS_UNUSED;
 
     if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
       /* Indirectly visible through reflection. */
       const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
-                                         ((INTEGRATOR_STATE(shadow_path, bounce) == 0) ?
+                                         ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
                                               kernel_data.film.pass_glossy_direct :
                                               kernel_data.film.pass_glossy_indirect) :
-                                         ((INTEGRATOR_STATE(shadow_path, bounce) == 0) ?
+                                         ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
                                               kernel_data.film.pass_transmission_direct :
                                               kernel_data.film.pass_transmission_indirect);
 
       if (glossy_pass_offset != PASS_UNUSED) {
         /* Glossy is a subset of the throughput, reconstruct it here using the
          * diffuse-glossy ratio. */
-        const float3 ratio = INTEGRATOR_STATE(shadow_path, diffuse_glossy_ratio);
+        const float3 ratio = INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio);
         const float3 glossy_contribution = (one_float3() - ratio) * contribution;
         kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
       }
 
       /* Reconstruct diffuse subset of throughput. */
-      pass_offset = (INTEGRATOR_STATE(shadow_path, bounce) == 0) ?
+      pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
                         kernel_data.film.pass_diffuse_direct :
                         kernel_data.film.pass_diffuse_indirect;
       if (pass_offset != PASS_UNUSED) {
-        contribution *= INTEGRATOR_STATE(shadow_path, diffuse_glossy_ratio);
+        contribution *= INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio);
       }
     }
     else if (path_flag & PATH_RAY_VOLUME_PASS) {
       /* Indirectly visible through volume. */
-      pass_offset = (INTEGRATOR_STATE(shadow_path, bounce) == 0) ?
+      pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
                         kernel_data.film.pass_volume_direct :
                         kernel_data.film.pass_volume_indirect;
     }
@@ -443,8 +449,9 @@ ccl_device_inline void kernel_accum_light(INTEGRATOR_STATE_CONST_ARGS,
     /* Write shadow pass. */
     if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) &&
         (path_flag & PATH_RAY_CAMERA)) {
-      const float3 unshadowed_throughput = INTEGRATOR_STATE(shadow_path, unshadowed_throughput);
-      const float3 shadowed_throughput = INTEGRATOR_STATE(shadow_path, throughput);
+      const float3 unshadowed_throughput = INTEGRATOR_STATE(
+          state, shadow_path, unshadowed_throughput);
+      const float3 shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
       const float3 shadow = safe_divide_float3_float3(shadowed_throughput, unshadowed_throughput) *
                             kernel_data.film.pass_shadow_scale;
       kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow, shadow);
@@ -458,61 +465,60 @@ ccl_device_inline void kernel_accum_light(INTEGRATOR_STATE_CONST_ARGS,
  * Note that we accumulate transparency = 1 - alpha in the render buffer.
  * Otherwise we'd have to write alpha on path termination, which happens
  * in many places. */
-ccl_device_inline void kernel_accum_transparent(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_inline void kernel_accum_transparent(KernelGlobals kg,
+                                                ConstIntegratorState state,
                                                 const float transparent,
                                                 ccl_global float *ccl_restrict render_buffer)
 {
-  ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
-                                                              render_buffer);
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
 
   if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
     kernel_write_pass_float(buffer + kernel_data.film.pass_combined + 3, transparent);
   }
 
-  kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_PASS, transparent, buffer);
+  kernel_accum_shadow_catcher_transparent_only(kg, state, transparent, buffer);
 }
 
 /* Write background contribution to render buffer.
  *
  * Includes transparency, matching kernel_accum_transparent. */
-ccl_device_inline void kernel_accum_background(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_inline void kernel_accum_background(KernelGlobals kg,
+                                               ConstIntegratorState state,
                                                const float3 L,
                                                const float transparent,
                                                const bool is_transparent_background_ray,
                                                ccl_global float *ccl_restrict render_buffer)
 {
-  float3 contribution = INTEGRATOR_STATE(path, throughput) * L;
-  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(path, bounce) - 1);
+  float3 contribution = INTEGRATOR_STATE(state, path, throughput) * L;
+  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
 
-  ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
-                                                              render_buffer);
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
 
   if (is_transparent_background_ray) {
-    kernel_accum_transparent(INTEGRATOR_STATE_PASS, transparent, render_buffer);
+    kernel_accum_transparent(kg, state, transparent, render_buffer);
   }
   else {
-    kernel_accum_combined_transparent_pass(
-        INTEGRATOR_STATE_PASS, contribution, transparent, buffer);
+    kernel_accum_combined_transparent_pass(kg, state, contribution, transparent, buffer);
   }
   kernel_accum_emission_or_background_pass(
-      INTEGRATOR_STATE_PASS, contribution, buffer, kernel_data.film.pass_background);
+      kg, state, contribution, buffer, kernel_data.film.pass_background);
 }
 
 /* Write emission to render buffer. */
-ccl_device_inline void kernel_accum_emission(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_inline void kernel_accum_emission(KernelGlobals kg,
+                                             ConstIntegratorState state,
                                              const float3 throughput,
                                              const float3 L,
                                              ccl_global float *ccl_restrict render_buffer)
 {
   float3 contribution = throughput * L;
-  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(path, bounce) - 1);
+  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
 
-  ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
-                                                              render_buffer);
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
 
-  kernel_accum_combined_pass(INTEGRATOR_STATE_PASS, contribution, buffer);
+  kernel_accum_combined_pass(kg, state, contribution, buffer);
   kernel_accum_emission_or_background_pass(
-      INTEGRATOR_STATE_PASS, contribution, buffer, kernel_data.film.pass_emission);
+      kg, state, contribution, buffer, kernel_data.film.pass_emission);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_adaptive_sampling.h b/intern/cycles/kernel/kernel_adaptive_sampling.h
index cdf2601f6c3..b80853fcc51 100644
--- a/intern/cycles/kernel/kernel_adaptive_sampling.h
+++ b/intern/cycles/kernel/kernel_adaptive_sampling.h
@@ -22,14 +22,15 @@ CCL_NAMESPACE_BEGIN
 
 /* Check whether the pixel has converged and should not be sampled anymore. */
 
-ccl_device_forceinline bool kernel_need_sample_pixel(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_forceinline bool kernel_need_sample_pixel(KernelGlobals kg,
+                                                     ConstIntegratorState state,
                                                      ccl_global float *render_buffer)
 {
   if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) {
     return true;
   }
 
-  const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index);
+  const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
   const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
                                         kernel_data.film.pass_stride;
   ccl_global float *buffer = render_buffer + render_buffer_offset;
@@ -40,7 +41,7 @@ ccl_device_forceinline bool kernel_need_sample_pixel(INTEGRATOR_STATE_CONST_ARGS
 
 /* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */
 
-ccl_device bool kernel_adaptive_sampling_convergence_check(ccl_global const KernelGlobals *kg,
+ccl_device bool kernel_adaptive_sampling_convergence_check(KernelGlobals kg,
                                                            ccl_global float *render_buffer,
                                                            int x,
                                                            int y,
@@ -90,7 +91,7 @@ ccl_device bool kernel_adaptive_sampling_convergence_check(ccl_global const Kern
 /* This is a simple box filter in two passes.
  * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */
 
-ccl_device void kernel_adaptive_sampling_filter_x(ccl_global const KernelGlobals *kg,
+ccl_device void kernel_adaptive_sampling_filter_x(KernelGlobals kg,
                                                   ccl_global float *render_buffer,
                                                   int y,
                                                   int start_x,
@@ -123,7 +124,7 @@ ccl_device void kernel_adaptive_sampling_filter_x(ccl_global const KernelGlobals
   }
 }
 
-ccl_device void kernel_adaptive_sampling_filter_y(ccl_global const KernelGlobals *kg,
+ccl_device void kernel_adaptive_sampling_filter_y(KernelGlobals kg,
                                                   ccl_global float *render_buffer,
                                                   int x,
                                                   int start_y,
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 6cbb8dcc291..933ee0082c2 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -24,7 +24,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void kernel_displace_evaluate(ccl_global const KernelGlobals *kg,
+ccl_device void kernel_displace_evaluate(KernelGlobals kg,
                                          ccl_global const KernelShaderEvalInput *input,
                                          ccl_global float *output,
                                          const int offset)
@@ -37,7 +37,7 @@ ccl_device void kernel_displace_evaluate(ccl_global const KernelGlobals *kg,
 
   /* Evaluate displacement shader. */
   const float3 P = sd.P;
-  shader_eval_displacement(INTEGRATOR_STATE_PASS_NULL, &sd);
+  shader_eval_displacement(kg, INTEGRATOR_STATE_NULL, &sd);
   float3 D = sd.P - P;
 
   object_inverse_dir_transform(kg, &sd, &D);
@@ -58,7 +58,7 @@ ccl_device void kernel_displace_evaluate(ccl_global const KernelGlobals *kg,
   output[offset * 3 + 2] += D.z;
 }
 
-ccl_device void kernel_background_evaluate(ccl_global const KernelGlobals *kg,
+ccl_device void kernel_background_evaluate(KernelGlobals kg,
                                            ccl_global const KernelShaderEvalInput *input,
                                            ccl_global float *output,
                                            const int offset)
@@ -77,7 +77,7 @@ ccl_device void kernel_background_evaluate(ccl_global const KernelGlobals *kg,
    * This is being evaluated for all BSDFs, so path flag does not contain a specific type. */
   const int path_flag = PATH_RAY_EMISSION;
   shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>(
-      INTEGRATOR_STATE_PASS_NULL, &sd, NULL, path_flag);
+      kg, INTEGRATOR_STATE_NULL, &sd, NULL, path_flag);
   float3 color = shader_background_eval(&sd);
 
 #ifdef __KERNEL_DEBUG_NAN__
diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h
index 73683a15c5d..58a34668f45 100644
--- a/intern/cycles/kernel/kernel_camera.h
+++ b/intern/cycles/kernel/kernel_camera.h
@@ -46,7 +46,7 @@ ccl_device float2 camera_sample_aperture(ccl_constant KernelCamera *cam, float u
   return bokeh;
 }
 
-ccl_device void camera_sample_perspective(ccl_global const KernelGlobals *ccl_restrict kg,
+ccl_device void camera_sample_perspective(KernelGlobals kg,
                                           float raster_x,
                                           float raster_y,
                                           float lens_u,
@@ -185,7 +185,7 @@ ccl_device void camera_sample_perspective(ccl_global const KernelGlobals *ccl_re
 }
 
 /* Orthographic Camera */
-ccl_device void camera_sample_orthographic(ccl_global const KernelGlobals *ccl_restrict kg,
+ccl_device void camera_sample_orthographic(KernelGlobals kg,
                                            float raster_x,
                                            float raster_y,
                                            float lens_u,
@@ -370,7 +370,7 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
 
 /* Common */
 
-ccl_device_inline void camera_sample(ccl_global const KernelGlobals *ccl_restrict kg,
+ccl_device_inline void camera_sample(KernelGlobals kg,
                                      int x,
                                      int y,
                                      float filter_u,
@@ -444,13 +444,13 @@ ccl_device_inline void camera_sample(ccl_global const KernelGlobals *ccl_restric
 
 /* Utilities */
 
-ccl_device_inline float3 camera_position(ccl_global const KernelGlobals *kg)
+ccl_device_inline float3 camera_position(KernelGlobals kg)
 {
   Transform cameratoworld = kernel_data.cam.cameratoworld;
   return make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
 }
 
-ccl_device_inline float camera_distance(ccl_global const KernelGlobals *kg, float3 P)
+ccl_device_inline float camera_distance(KernelGlobals kg, float3 P)
 {
   Transform cameratoworld = kernel_data.cam.cameratoworld;
   float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w);
@@ -464,7 +464,7 @@ ccl_device_inline float camera_distance(ccl_global const KernelGlobals *kg, floa
   }
 }
 
-ccl_device_inline float camera_z_depth(ccl_global const KernelGlobals *kg, float3 P)
+ccl_device_inline float camera_z_depth(KernelGlobals kg, float3 P)
 {
   if (kernel_data.cam.type != CAMERA_PANORAMA) {
     Transform worldtocamera = kernel_data.cam.worldtocamera;
@@ -477,7 +477,7 @@ ccl_device_inline float camera_z_depth(ccl_global const KernelGlobals *kg, float
   }
 }
 
-ccl_device_inline float3 camera_direction_from_point(ccl_global const KernelGlobals *kg, float3 P)
+ccl_device_inline float3 camera_direction_from_point(KernelGlobals kg, float3 P)
 {
   Transform cameratoworld = kernel_data.cam.cameratoworld;
 
@@ -491,7 +491,7 @@ ccl_device_inline float3 camera_direction_from_point(ccl_global const KernelGlob
   }
 }
 
-ccl_device_inline float3 camera_world_to_ndc(ccl_global const KernelGlobals *kg,
+ccl_device_inline float3 camera_world_to_ndc(KernelGlobals kg,
                                              ccl_private ShaderData *sd,
                                              float3 P)
 {
diff --git a/intern/cycles/kernel/kernel_color.h b/intern/cycles/kernel/kernel_color.h
index 9e8e0e68b8f..0d7bfecd5f3 100644
--- a/intern/cycles/kernel/kernel_color.h
+++ b/intern/cycles/kernel/kernel_color.h
@@ -20,14 +20,14 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device float3 xyz_to_rgb(ccl_global const KernelGlobals *kg, float3 xyz)
+ccl_device float3 xyz_to_rgb(KernelGlobals kg, float3 xyz)
 {
   return make_float3(dot(float4_to_float3(kernel_data.film.xyz_to_r), xyz),
                      dot(float4_to_float3(kernel_data.film.xyz_to_g), xyz),
                      dot(float4_to_float3(kernel_data.film.xyz_to_b), xyz));
 }
 
-ccl_device float linear_rgb_to_gray(ccl_global const KernelGlobals *kg, float3 c)
+ccl_device float linear_rgb_to_gray(KernelGlobals kg, float3 c)
 {
   return dot(c, float4_to_float3(kernel_data.film.rgb_to_y));
 }
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 015587ccbbd..8d329b8dac3 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -25,7 +25,8 @@ CCL_NAMESPACE_BEGIN
 
 /* Evaluate shader on light. */
 ccl_device_noinline_cpu float3
-light_sample_shader_eval(INTEGRATOR_STATE_ARGS,
+light_sample_shader_eval(KernelGlobals kg,
+                         IntegratorState state,
                          ccl_private ShaderData *ccl_restrict emission_sd,
                          ccl_private LightSample *ccl_restrict ls,
                          float time)
@@ -73,7 +74,7 @@ light_sample_shader_eval(INTEGRATOR_STATE_ARGS,
     /* No proper path flag, we're evaluating this for all closures. that's
      * weak but we'd have to do multiple evaluations otherwise. */
     shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>(
-        INTEGRATOR_STATE_PASS, emission_sd, NULL, PATH_RAY_EMISSION);
+        kg, state, emission_sd, NULL, PATH_RAY_EMISSION);
 
     /* Evaluate closures. */
 #ifdef __BACKGROUND_MIS__
@@ -105,7 +106,7 @@ ccl_device_inline bool light_sample_is_light(ccl_private const LightSample *ccl_
 }
 
 /* Early path termination of shadow rays. */
-ccl_device_inline bool light_sample_terminate(ccl_global const KernelGlobals *ccl_restrict kg,
+ccl_device_inline bool light_sample_terminate(KernelGlobals kg,
                                               ccl_private const LightSample *ccl_restrict ls,
                                               ccl_private BsdfEval *ccl_restrict eval,
                                               const float rand_terminate)
@@ -133,10 +134,8 @@ ccl_device_inline bool light_sample_terminate(ccl_global const KernelGlobals *cc
  * of a triangle. Surface is lifted by amount h along normal n in the incident
  * point. */
 
-ccl_device_inline float3
-shadow_ray_smooth_surface_offset(ccl_global const KernelGlobals *ccl_restrict kg,
-                                 ccl_private const ShaderData *ccl_restrict sd,
-                                 float3 Ng)
+ccl_device_inline float3 shadow_ray_smooth_surface_offset(
+    KernelGlobals kg, ccl_private const ShaderData *ccl_restrict sd, float3 Ng)
 {
   float3 V[3], N[3];
   triangle_vertices_and_normals(kg, sd->prim, V, N);
@@ -180,7 +179,7 @@ shadow_ray_smooth_surface_offset(ccl_global const KernelGlobals *ccl_restrict kg
 
 /* Ray offset to avoid shadow terminator artifact. */
 
-ccl_device_inline float3 shadow_ray_offset(ccl_global const KernelGlobals *ccl_restrict kg,
+ccl_device_inline float3 shadow_ray_offset(KernelGlobals kg,
                                            ccl_private const ShaderData *ccl_restrict sd,
                                            float3 L)
 {
@@ -247,7 +246,7 @@ ccl_device_inline void shadow_ray_setup(ccl_private const ShaderData *ccl_restri
 
 /* Create shadow ray towards light sample. */
 ccl_device_inline void light_sample_to_surface_shadow_ray(
-    ccl_global const KernelGlobals *ccl_restrict kg,
+    KernelGlobals kg,
     ccl_private const ShaderData *ccl_restrict sd,
     ccl_private const LightSample *ccl_restrict ls,
     ccl_private Ray *ray)
@@ -258,7 +257,7 @@ ccl_device_inline void light_sample_to_surface_shadow_ray(
 
 /* Create shadow ray towards light sample. */
 ccl_device_inline void light_sample_to_volume_shadow_ray(
-    ccl_global const KernelGlobals *ccl_restrict kg,
+    KernelGlobals kg,
     ccl_private const ShaderData *ccl_restrict sd,
     ccl_private const LightSample *ccl_restrict ls,
     const float3 P,
diff --git a/intern/cycles/kernel/kernel_id_passes.h b/intern/cycles/kernel/kernel_id_passes.h
index 07b96d0e1a8..d5b8c90a828 100644
--- a/intern/cycles/kernel/kernel_id_passes.h
+++ b/intern/cycles/kernel/kernel_id_passes.h
@@ -92,7 +92,7 @@ ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_sl
 }
 
 /* post-sorting for Cryptomatte */
-ccl_device_inline void kernel_cryptomatte_post(ccl_global const KernelGlobals *kg,
+ccl_device_inline void kernel_cryptomatte_post(KernelGlobals kg,
                                                ccl_global float *render_buffer,
                                                int pixel_index)
 {
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index 1f745ab1da9..b62ec7fda42 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -72,10 +72,7 @@ ccl_device_inline float cmj_randfloat_simple(uint i, uint p)
   return cmj_hash_simple(i, p) * (1.0f / (float)0xFFFFFFFF);
 }
 
-ccl_device float pmj_sample_1D(ccl_global const KernelGlobals *kg,
-                               uint sample,
-                               uint rng_hash,
-                               uint dimension)
+ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uint dimension)
 {
   /* Perform Owen shuffle of the sample number to reorder the samples. */
 #ifdef _SIMPLE_HASH_
@@ -118,7 +115,7 @@ ccl_device float pmj_sample_1D(ccl_global const KernelGlobals *kg,
   return fx;
 }
 
-ccl_device void pmj_sample_2D(ccl_global const KernelGlobals *kg,
+ccl_device void pmj_sample_2D(KernelGlobals kg,
                               uint sample,
                               uint rng_hash,
                               uint dimension,
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 33d0c09a32a..a7a95918b4e 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -45,7 +45,7 @@ typedef struct LightSample {
 /* Regular Light */
 
 template<bool in_volume_segment>
-ccl_device_inline bool light_sample(ccl_global const KernelGlobals *kg,
+ccl_device_inline bool light_sample(KernelGlobals kg,
                                     const int lamp,
                                     const float randu,
                                     const float randv,
@@ -209,7 +209,7 @@ ccl_device_inline bool light_sample(ccl_global const KernelGlobals *kg,
   return (ls->pdf > 0.0f);
 }
 
-ccl_device bool lights_intersect(ccl_global const KernelGlobals *ccl_restrict kg,
+ccl_device bool lights_intersect(KernelGlobals kg,
                                  ccl_private const Ray *ccl_restrict ray,
                                  ccl_private Intersection *ccl_restrict isect,
                                  const int last_prim,
@@ -298,7 +298,7 @@ ccl_device bool lights_intersect(ccl_global const KernelGlobals *ccl_restrict kg
   return isect->prim != PRIM_NONE;
 }
 
-ccl_device bool light_sample_from_distant_ray(ccl_global const KernelGlobals *ccl_restrict kg,
+ccl_device bool light_sample_from_distant_ray(KernelGlobals kg,
                                               const float3 ray_D,
                                               const int lamp,
                                               ccl_private LightSample *ccl_restrict ls)
@@ -362,7 +362,7 @@ ccl_device bool light_sample_from_distant_ray(ccl_global const KernelGlobals *cc
   return true;
 }
 
-ccl_device bool light_sample_from_intersection(ccl_global const KernelGlobals *ccl_restrict kg,
+ccl_device bool light_sample_from_intersection(KernelGlobals kg,
                                                ccl_private const Intersection *ccl_restrict isect,
                                                const float3 ray_P,
                                                const float3 ray_D,
@@ -464,7 +464,7 @@ ccl_device bool light_sample_from_intersection(ccl_global const KernelGlobals *c
 
 /* returns true if the triangle is has motion blur or an instancing transform applied */
 ccl_device_inline bool triangle_world_space_vertices(
-    ccl_global const KernelGlobals *kg, int object, int prim, float time, float3 V[3])
+    KernelGlobals kg, int object, int prim, float time, float3 V[3])
 {
   bool has_motion = false;
   const int object_flag = kernel_tex_fetch(__object_flag, object);
@@ -492,7 +492,7 @@ ccl_device_inline bool triangle_world_space_vertices(
   return has_motion;
 }
 
-ccl_device_inline float triangle_light_pdf_area(ccl_global const KernelGlobals *kg,
+ccl_device_inline float triangle_light_pdf_area(KernelGlobals kg,
                                                 const float3 Ng,
                                                 const float3 I,
                                                 float t)
@@ -506,7 +506,7 @@ ccl_device_inline float triangle_light_pdf_area(ccl_global const KernelGlobals *
   return t * t * pdf / cos_pi;
 }
 
-ccl_device_forceinline float triangle_light_pdf(ccl_global const KernelGlobals *kg,
+ccl_device_forceinline float triangle_light_pdf(KernelGlobals kg,
                                                 ccl_private const ShaderData *sd,
                                                 float t)
 {
@@ -578,7 +578,7 @@ ccl_device_forceinline float triangle_light_pdf(ccl_global const KernelGlobals *
 }
 
 template<bool in_volume_segment>
-ccl_device_forceinline void triangle_light_sample(ccl_global const KernelGlobals *kg,
+ccl_device_forceinline void triangle_light_sample(KernelGlobals kg,
                                                   int prim,
                                                   int object,
                                                   float randu,
@@ -747,8 +747,7 @@ ccl_device_forceinline void triangle_light_sample(ccl_global const KernelGlobals
 
 /* Light Distribution */
 
-ccl_device int light_distribution_sample(ccl_global const KernelGlobals *kg,
-                                         ccl_private float *randu)
+ccl_device int light_distribution_sample(KernelGlobals kg, ccl_private float *randu)
 {
   /* This is basically std::upper_bound as used by PBRT, to find a point light or
    * triangle to emit from, proportional to area. a good improvement would be to
@@ -786,15 +785,13 @@ ccl_device int light_distribution_sample(ccl_global const KernelGlobals *kg,
 
 /* Generic Light */
 
-ccl_device_inline bool light_select_reached_max_bounces(ccl_global const KernelGlobals *kg,
-                                                        int index,
-                                                        int bounce)
+ccl_device_inline bool light_select_reached_max_bounces(KernelGlobals kg, int index, int bounce)
 {
   return (bounce > kernel_tex_fetch(__lights, index).max_bounces);
 }
 
 template<bool in_volume_segment>
-ccl_device_noinline bool light_distribution_sample(ccl_global const KernelGlobals *kg,
+ccl_device_noinline bool light_distribution_sample(KernelGlobals kg,
                                                    float randu,
                                                    const float randv,
                                                    const float time,
@@ -834,20 +831,19 @@ ccl_device_noinline bool light_distribution_sample(ccl_global const KernelGlobal
   return light_sample<in_volume_segment>(kg, lamp, randu, randv, P, path_flag, ls);
 }
 
-ccl_device_inline bool light_distribution_sample_from_volume_segment(
-    ccl_global const KernelGlobals *kg,
-    float randu,
-    const float randv,
-    const float time,
-    const float3 P,
-    const int bounce,
-    const int path_flag,
-    ccl_private LightSample *ls)
+ccl_device_inline bool light_distribution_sample_from_volume_segment(KernelGlobals kg,
+                                                                     float randu,
+                                                                     const float randv,
+                                                                     const float time,
+                                                                     const float3 P,
+                                                                     const int bounce,
+                                                                     const int path_flag,
+                                                                     ccl_private LightSample *ls)
 {
   return light_distribution_sample<true>(kg, randu, randv, time, P, bounce, path_flag, ls);
 }
 
-ccl_device_inline bool light_distribution_sample_from_position(ccl_global const KernelGlobals *kg,
+ccl_device_inline bool light_distribution_sample_from_position(KernelGlobals kg,
                                                                float randu,
                                                                const float randv,
                                                                const float time,
@@ -859,7 +855,7 @@ ccl_device_inline bool light_distribution_sample_from_position(ccl_global const
   return light_distribution_sample<false>(kg, randu, randv, time, P, bounce, path_flag, ls);
 }
 
-ccl_device_inline bool light_distribution_sample_new_position(ccl_global const KernelGlobals *kg,
+ccl_device_inline bool light_distribution_sample_new_position(KernelGlobals kg,
                                                               const float randu,
                                                               const float randv,
                                                               const float time,
diff --git a/intern/cycles/kernel/kernel_light_background.h b/intern/cycles/kernel/kernel_light_background.h
index 3669ff50455..2e828b8b765 100644
--- a/intern/cycles/kernel/kernel_light_background.h
+++ b/intern/cycles/kernel/kernel_light_background.h
@@ -24,7 +24,7 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __BACKGROUND_MIS__
 
-ccl_device float3 background_map_sample(ccl_global const KernelGlobals *kg,
+ccl_device float3 background_map_sample(KernelGlobals kg,
                                         float randu,
                                         float randv,
                                         ccl_private float *pdf)
@@ -109,7 +109,7 @@ ccl_device float3 background_map_sample(ccl_global const KernelGlobals *kg,
 /* TODO(sergey): Same as above, after the release we should consider using
  * 'noinline' for all devices.
  */
-ccl_device float background_map_pdf(ccl_global const KernelGlobals *kg, float3 direction)
+ccl_device float background_map_pdf(KernelGlobals kg, float3 direction)
 {
   float2 uv = direction_to_equirectangular(direction);
   int res_x = kernel_data.background.map_res_x;
@@ -143,11 +143,7 @@ ccl_device float background_map_pdf(ccl_global const KernelGlobals *kg, float3 d
 }
 
 ccl_device_inline bool background_portal_data_fetch_and_check_side(
-    ccl_global const KernelGlobals *kg,
-    float3 P,
-    int index,
-    ccl_private float3 *lightpos,
-    ccl_private float3 *dir)
+    KernelGlobals kg, float3 P, int index, ccl_private float3 *lightpos, ccl_private float3 *dir)
 {
   int portal = kernel_data.background.portal_offset + index;
   const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
@@ -162,11 +158,8 @@ ccl_device_inline bool background_portal_data_fetch_and_check_side(
   return false;
 }
 
-ccl_device_inline float background_portal_pdf(ccl_global const KernelGlobals *kg,
-                                              float3 P,
-                                              float3 direction,
-                                              int ignore_portal,
-                                              ccl_private bool *is_possible)
+ccl_device_inline float background_portal_pdf(
+    KernelGlobals kg, float3 P, float3 direction, int ignore_portal, ccl_private bool *is_possible)
 {
   float portal_pdf = 0.0f;
 
@@ -226,7 +219,7 @@ ccl_device_inline float background_portal_pdf(ccl_global const KernelGlobals *kg
   return (num_possible > 0) ? portal_pdf / num_possible : 0.0f;
 }
 
-ccl_device int background_num_possible_portals(ccl_global const KernelGlobals *kg, float3 P)
+ccl_device int background_num_possible_portals(KernelGlobals kg, float3 P)
 {
   int num_possible_portals = 0;
   for (int p = 0; p < kernel_data.background.num_portals; p++) {
@@ -237,7 +230,7 @@ ccl_device int background_num_possible_portals(ccl_global const KernelGlobals *k
   return num_possible_portals;
 }
 
-ccl_device float3 background_portal_sample(ccl_global const KernelGlobals *kg,
+ccl_device float3 background_portal_sample(KernelGlobals kg,
                                            float3 P,
                                            float randu,
                                            float randv,
@@ -292,7 +285,7 @@ ccl_device float3 background_portal_sample(ccl_global const KernelGlobals *kg,
   return zero_float3();
 }
 
-ccl_device_inline float3 background_sun_sample(ccl_global const KernelGlobals *kg,
+ccl_device_inline float3 background_sun_sample(KernelGlobals kg,
                                                float randu,
                                                float randv,
                                                ccl_private float *pdf)
@@ -304,7 +297,7 @@ ccl_device_inline float3 background_sun_sample(ccl_global const KernelGlobals *k
   return D;
 }
 
-ccl_device_inline float background_sun_pdf(ccl_global const KernelGlobals *kg, float3 D)
+ccl_device_inline float background_sun_pdf(KernelGlobals kg, float3 D)
 {
   const float3 N = float4_to_float3(kernel_data.background.sun);
   const float angle = kernel_data.background.sun.w;
@@ -312,7 +305,7 @@ ccl_device_inline float background_sun_pdf(ccl_global const KernelGlobals *kg, f
 }
 
 ccl_device_inline float3 background_light_sample(
-    ccl_global const KernelGlobals *kg, float3 P, float randu, float randv, ccl_private float *pdf)
+    KernelGlobals kg, float3 P, float randu, float randv, ccl_private float *pdf)
 {
   float portal_method_pdf = kernel_data.background.portal_weight;
   float sun_method_pdf = kernel_data.background.sun_weight;
@@ -412,9 +405,7 @@ ccl_device_inline float3 background_light_sample(
   return D;
 }
 
-ccl_device float background_light_pdf(ccl_global const KernelGlobals *kg,
-                                      float3 P,
-                                      float3 direction)
+ccl_device float background_light_pdf(KernelGlobals kg, float3 P, float3 direction)
 {
   float portal_method_pdf = kernel_data.background.portal_weight;
   float sun_method_pdf = kernel_data.background.sun_weight;
diff --git a/intern/cycles/kernel/kernel_light_common.h b/intern/cycles/kernel/kernel_light_common.h
index 9421ac462e2..9e2b738f376 100644
--- a/intern/cycles/kernel/kernel_light_common.h
+++ b/intern/cycles/kernel/kernel_light_common.h
@@ -214,10 +214,7 @@ ccl_device bool light_spread_clamp_area_light(const float3 P,
   return true;
 }
 
-ccl_device float lamp_light_pdf(ccl_global const KernelGlobals *kg,
-                                const float3 Ng,
-                                const float3 I,
-                                float t)
+ccl_device float lamp_light_pdf(KernelGlobals kg, const float3 Ng, const float3 I, float t)
 {
   float cos_pi = dot(Ng, I);
 
diff --git a/intern/cycles/kernel/kernel_lookup_table.h b/intern/cycles/kernel/kernel_lookup_table.h
index 3c8577af417..2c26e668d7b 100644
--- a/intern/cycles/kernel/kernel_lookup_table.h
+++ b/intern/cycles/kernel/kernel_lookup_table.h
@@ -20,10 +20,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Interpolated lookup table access */
 
-ccl_device float lookup_table_read(ccl_global const KernelGlobals *kg,
-                                   float x,
-                                   int offset,
-                                   int size)
+ccl_device float lookup_table_read(KernelGlobals kg, float x, int offset, int size)
 {
   x = saturate(x) * (size - 1);
 
@@ -40,7 +37,7 @@ ccl_device float lookup_table_read(ccl_global const KernelGlobals *kg,
 }
 
 ccl_device float lookup_table_read_2D(
-    ccl_global const KernelGlobals *kg, float x, float y, int offset, int xsize, int ysize)
+    KernelGlobals kg, float x, float y, int offset, int xsize, int ysize)
 {
   y = saturate(y) * (ysize - 1);
 
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index b981e750dda..4d05b63bfbd 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -25,9 +25,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Get pointer to pixel in render buffer. */
 ccl_device_forceinline ccl_global float *kernel_pass_pixel_render_buffer(
-    INTEGRATOR_STATE_CONST_ARGS, ccl_global float *ccl_restrict render_buffer)
+    KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
 {
-  const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index);
+  const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
   const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
                                         kernel_data.film.pass_stride;
   return render_buffer + render_buffer_offset;
@@ -36,11 +36,12 @@ ccl_device_forceinline ccl_global float *kernel_pass_pixel_render_buffer(
 #ifdef __DENOISING_FEATURES__
 
 ccl_device_forceinline void kernel_write_denoising_features_surface(
-    INTEGRATOR_STATE_ARGS,
+    KernelGlobals kg,
+    IntegratorState state,
     ccl_private const ShaderData *sd,
     ccl_global float *ccl_restrict render_buffer)
 {
-  if (!(INTEGRATOR_STATE(path, flag) & PATH_RAY_DENOISING_FEATURES)) {
+  if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES)) {
     return;
   }
 
@@ -49,7 +50,7 @@ ccl_device_forceinline void kernel_write_denoising_features_surface(
     return;
   }
 
-  ccl_global float *buffer = kernel_pass_pixel_render_buffer(INTEGRATOR_STATE_PASS, render_buffer);
+  ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
 
   float3 normal = zero_float3();
   float3 diffuse_albedo = zero_float3();
@@ -109,32 +110,34 @@ ccl_device_forceinline void kernel_write_denoising_features_surface(
     }
 
     if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
-      const float3 denoising_feature_throughput = INTEGRATOR_STATE(path,
-                                                                   denoising_feature_throughput);
+      const float3 denoising_feature_throughput = INTEGRATOR_STATE(
+          state, path, denoising_feature_throughput);
       const float3 denoising_albedo = ensure_finite3(denoising_feature_throughput *
                                                      diffuse_albedo);
       kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
     }
 
-    INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_DENOISING_FEATURES;
+    INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES;
   }
   else {
-    INTEGRATOR_STATE_WRITE(path, denoising_feature_throughput) *= specular_albedo;
+    INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) *= specular_albedo;
   }
 }
 
-ccl_device_forceinline void kernel_write_denoising_features_volume(INTEGRATOR_STATE_ARGS,
+ccl_device_forceinline void kernel_write_denoising_features_volume(KernelGlobals kg,
+                                                                   IntegratorState state,
                                                                    const float3 albedo,
                                                                    const bool scatter,
                                                                    ccl_global float *ccl_restrict
                                                                        render_buffer)
 {
-  ccl_global float *buffer = kernel_pass_pixel_render_buffer(INTEGRATOR_STATE_PASS, render_buffer);
-  const float3 denoising_feature_throughput = INTEGRATOR_STATE(path, denoising_feature_throughput);
+  ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
+  const float3 denoising_feature_throughput = INTEGRATOR_STATE(
+      state, path, denoising_feature_throughput);
 
   if (scatter && kernel_data.film.pass_denoising_normal != PASS_UNUSED) {
     /* Assume scatter is sufficiently diffuse to stop writing denoising features. */
-    INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_DENOISING_FEATURES;
+    INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES;
 
     /* Write view direction as normal. */
     const float3 denoising_normal = make_float3(0.0f, 0.0f, -1.0f);
@@ -153,7 +156,8 @@ ccl_device_forceinline void kernel_write_denoising_features_volume(INTEGRATOR_ST
 
 /* Write shadow catcher passes on a bounce from the shadow catcher object. */
 ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
-    INTEGRATOR_STATE_ARGS,
+    KernelGlobals kg,
+    IntegratorState state,
     ccl_private const ShaderData *sd,
     ccl_global float *ccl_restrict render_buffer)
 {
@@ -164,18 +168,18 @@ ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
   kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
   kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
 
-  if (!kernel_shadow_catcher_is_path_split_bounce(INTEGRATOR_STATE_PASS, sd->object_flag)) {
+  if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, sd->object_flag)) {
     return;
   }
 
-  ccl_global float *buffer = kernel_pass_pixel_render_buffer(INTEGRATOR_STATE_PASS, render_buffer);
+  ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
 
   /* Count sample for the shadow catcher object. */
   kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
 
   /* Since the split is done, the sample does not contribute to the matte, so accumulate it as
    * transparency to the matte. */
-  const float3 throughput = INTEGRATOR_STATE(path, throughput);
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
   kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
                           average(throughput));
 }
@@ -191,12 +195,13 @@ ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buf
   return depth * 4;
 }
 
-ccl_device_inline void kernel_write_data_passes(INTEGRATOR_STATE_ARGS,
+ccl_device_inline void kernel_write_data_passes(KernelGlobals kg,
+                                                IntegratorState state,
                                                 ccl_private const ShaderData *sd,
                                                 ccl_global float *ccl_restrict render_buffer)
 {
 #ifdef __PASSES__
-  const int path_flag = INTEGRATOR_STATE(path, flag);
+  const int path_flag = INTEGRATOR_STATE(state, path, flag);
 
   if (!(path_flag & PATH_RAY_CAMERA)) {
     return;
@@ -208,12 +213,12 @@ ccl_device_inline void kernel_write_data_passes(INTEGRATOR_STATE_ARGS,
     return;
   }
 
-  ccl_global float *buffer = kernel_pass_pixel_render_buffer(INTEGRATOR_STATE_PASS, render_buffer);
+  ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
 
   if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
     if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f ||
         average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) {
-      if (INTEGRATOR_STATE(path, sample) == 0) {
+      if (INTEGRATOR_STATE(state, path, sample) == 0) {
         if (flag & PASSMASK(DEPTH)) {
           const float depth = camera_z_depth(kg, sd->P);
           kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
@@ -250,12 +255,12 @@ ccl_device_inline void kernel_write_data_passes(INTEGRATOR_STATE_ARGS,
         kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f);
       }
 
-      INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_SINGLE_PASS_DONE;
+      INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SINGLE_PASS_DONE;
     }
   }
 
   if (kernel_data.film.cryptomatte_passes) {
-    const float3 throughput = INTEGRATOR_STATE(path, throughput);
+    const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
     const float matte_weight = average(throughput) *
                                (1.0f - average(shader_bsdf_transparency(kg, sd)));
     if (matte_weight > 0.0f) {
@@ -279,17 +284,17 @@ ccl_device_inline void kernel_write_data_passes(INTEGRATOR_STATE_ARGS,
   }
 
   if (flag & PASSMASK(DIFFUSE_COLOR)) {
-    const float3 throughput = INTEGRATOR_STATE(path, throughput);
+    const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
     kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color,
                              shader_bsdf_diffuse(kg, sd) * throughput);
   }
   if (flag & PASSMASK(GLOSSY_COLOR)) {
-    const float3 throughput = INTEGRATOR_STATE(path, throughput);
+    const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
     kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color,
                              shader_bsdf_glossy(kg, sd) * throughput);
   }
   if (flag & PASSMASK(TRANSMISSION_COLOR)) {
-    const float3 throughput = INTEGRATOR_STATE(path, throughput);
+    const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
     kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color,
                              shader_bsdf_transmission(kg, sd) * throughput);
   }
@@ -314,7 +319,7 @@ ccl_device_inline void kernel_write_data_passes(INTEGRATOR_STATE_ARGS,
       mist = powf(mist, mist_falloff);
 
     /* Modulate by transparency */
-    const float3 throughput = INTEGRATOR_STATE(path, throughput);
+    const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
     const float3 alpha = shader_bsdf_alpha(kg, sd);
     const float mist_output = (1.0f - mist) * average(throughput * alpha);
 
diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h
index e04ed5b1cc1..66eb468fdca 100644
--- a/intern/cycles/kernel/kernel_path_state.h
+++ b/intern/cycles/kernel/kernel_path_state.h
@@ -23,71 +23,73 @@ CCL_NAMESPACE_BEGIN
 /* Initialize queues, so that the this path is considered terminated.
  * Used for early outputs in the camera ray initialization, as well as initialization of split
  * states for shadow catcher. */
-ccl_device_inline void path_state_init_queues(INTEGRATOR_STATE_ARGS)
+ccl_device_inline void path_state_init_queues(IntegratorState state)
 {
-  INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0;
-  INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
 }
 
 /* Minimalistic initialization of the path state, which is needed for early outputs in the
  * integrator initialization to work. */
-ccl_device_inline void path_state_init(INTEGRATOR_STATE_ARGS,
+ccl_device_inline void path_state_init(IntegratorState state,
                                        ccl_global const KernelWorkTile *ccl_restrict tile,
                                        const int x,
                                        const int y)
 {
   const uint render_pixel_index = (uint)tile->offset + x + y * tile->stride;
 
-  INTEGRATOR_STATE_WRITE(path, render_pixel_index) = render_pixel_index;
+  INTEGRATOR_STATE_WRITE(state, path, render_pixel_index) = render_pixel_index;
 
-  path_state_init_queues(INTEGRATOR_STATE_PASS);
+  path_state_init_queues(state);
 }
 
 /* Initialize the rest of the path state needed to continue the path integration. */
-ccl_device_inline void path_state_init_integrator(INTEGRATOR_STATE_ARGS,
+ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
+                                                  IntegratorState state,
                                                   const int sample,
                                                   const uint rng_hash)
 {
-  INTEGRATOR_STATE_WRITE(path, sample) = sample;
-  INTEGRATOR_STATE_WRITE(path, bounce) = 0;
-  INTEGRATOR_STATE_WRITE(path, diffuse_bounce) = 0;
-  INTEGRATOR_STATE_WRITE(path, glossy_bounce) = 0;
-  INTEGRATOR_STATE_WRITE(path, transmission_bounce) = 0;
-  INTEGRATOR_STATE_WRITE(path, transparent_bounce) = 0;
-  INTEGRATOR_STATE_WRITE(path, volume_bounce) = 0;
-  INTEGRATOR_STATE_WRITE(path, volume_bounds_bounce) = 0;
-  INTEGRATOR_STATE_WRITE(path, rng_hash) = rng_hash;
-  INTEGRATOR_STATE_WRITE(path, rng_offset) = PRNG_BASE_NUM;
-  INTEGRATOR_STATE_WRITE(path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP |
-                                       PATH_RAY_TRANSPARENT_BACKGROUND;
-  INTEGRATOR_STATE_WRITE(path, mis_ray_pdf) = 0.0f;
-  INTEGRATOR_STATE_WRITE(path, mis_ray_t) = 0.0f;
-  INTEGRATOR_STATE_WRITE(path, min_ray_pdf) = FLT_MAX;
-  INTEGRATOR_STATE_WRITE(path, throughput) = make_float3(1.0f, 1.0f, 1.0f);
+  INTEGRATOR_STATE_WRITE(state, path, sample) = sample;
+  INTEGRATOR_STATE_WRITE(state, path, bounce) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, diffuse_bounce) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, glossy_bounce) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, transmission_bounce) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, volume_bounce) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, volume_bounds_bounce) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, rng_hash) = rng_hash;
+  INTEGRATOR_STATE_WRITE(state, path, rng_offset) = PRNG_BASE_NUM;
+  INTEGRATOR_STATE_WRITE(state, path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP |
+                                              PATH_RAY_TRANSPARENT_BACKGROUND;
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = 0.0f;
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
+  INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = FLT_MAX;
+  INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f);
 
   if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) {
-    INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 0, object) = OBJECT_NONE;
-    INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 0, shader) = kernel_data.background.volume_shader;
-    INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 1, object) = OBJECT_NONE;
-    INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 1, shader) = SHADER_NONE;
+    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 0, object) = OBJECT_NONE;
+    INTEGRATOR_STATE_ARRAY_WRITE(
+        state, volume_stack, 0, shader) = kernel_data.background.volume_shader;
+    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, object) = OBJECT_NONE;
+    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, shader) = SHADER_NONE;
   }
 
 #ifdef __DENOISING_FEATURES__
   if (kernel_data.kernel_features & KERNEL_FEATURE_DENOISING) {
-    INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_DENOISING_FEATURES;
-    INTEGRATOR_STATE_WRITE(path, denoising_feature_throughput) = one_float3();
+    INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_DENOISING_FEATURES;
+    INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) = one_float3();
   }
 #endif
 }
 
-ccl_device_inline void path_state_next(INTEGRATOR_STATE_ARGS, int label)
+ccl_device_inline void path_state_next(KernelGlobals kg, IntegratorState state, int label)
 {
-  uint32_t flag = INTEGRATOR_STATE(path, flag);
+  uint32_t flag = INTEGRATOR_STATE(state, path, flag);
 
   /* ray through transparent keeps same flags from previous ray and is
    * not counted as a regular bounce, transparent has separate max */
   if (label & LABEL_TRANSPARENT) {
-    uint32_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce) + 1;
+    uint32_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce) + 1;
 
     flag |= PATH_RAY_TRANSPARENT;
     if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
@@ -97,14 +99,14 @@ ccl_device_inline void path_state_next(INTEGRATOR_STATE_ARGS, int label)
     if (!kernel_data.integrator.transparent_shadows)
       flag |= PATH_RAY_MIS_SKIP;
 
-    INTEGRATOR_STATE_WRITE(path, flag) = flag;
-    INTEGRATOR_STATE_WRITE(path, transparent_bounce) = transparent_bounce;
+    INTEGRATOR_STATE_WRITE(state, path, flag) = flag;
+    INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce;
     /* Random number generator next bounce. */
-    INTEGRATOR_STATE_WRITE(path, rng_offset) += PRNG_BOUNCE_NUM;
+    INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
     return;
   }
 
-  uint32_t bounce = INTEGRATOR_STATE(path, bounce) + 1;
+  uint32_t bounce = INTEGRATOR_STATE(state, path, bounce) + 1;
   if (bounce >= kernel_data.integrator.max_bounce) {
     flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
   }
@@ -120,8 +122,8 @@ ccl_device_inline void path_state_next(INTEGRATOR_STATE_ARGS, int label)
       flag |= PATH_RAY_VOLUME_PASS;
     }
 
-    const int volume_bounce = INTEGRATOR_STATE(path, volume_bounce) + 1;
-    INTEGRATOR_STATE_WRITE(path, volume_bounce) = volume_bounce;
+    const int volume_bounce = INTEGRATOR_STATE(state, path, volume_bounce) + 1;
+    INTEGRATOR_STATE_WRITE(state, path, volume_bounce) = volume_bounce;
     if (volume_bounce >= kernel_data.integrator.max_volume_bounce) {
       flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
     }
@@ -135,15 +137,15 @@ ccl_device_inline void path_state_next(INTEGRATOR_STATE_ARGS, int label)
       flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
 
       if (label & LABEL_DIFFUSE) {
-        const int diffuse_bounce = INTEGRATOR_STATE(path, diffuse_bounce) + 1;
-        INTEGRATOR_STATE_WRITE(path, diffuse_bounce) = diffuse_bounce;
+        const int diffuse_bounce = INTEGRATOR_STATE(state, path, diffuse_bounce) + 1;
+        INTEGRATOR_STATE_WRITE(state, path, diffuse_bounce) = diffuse_bounce;
         if (diffuse_bounce >= kernel_data.integrator.max_diffuse_bounce) {
           flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
         }
       }
       else {
-        const int glossy_bounce = INTEGRATOR_STATE(path, glossy_bounce) + 1;
-        INTEGRATOR_STATE_WRITE(path, glossy_bounce) = glossy_bounce;
+        const int glossy_bounce = INTEGRATOR_STATE(state, path, glossy_bounce) + 1;
+        INTEGRATOR_STATE_WRITE(state, path, glossy_bounce) = glossy_bounce;
         if (glossy_bounce >= kernel_data.integrator.max_glossy_bounce) {
           flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
         }
@@ -158,8 +160,8 @@ ccl_device_inline void path_state_next(INTEGRATOR_STATE_ARGS, int label)
         flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
       }
 
-      const int transmission_bounce = INTEGRATOR_STATE(path, transmission_bounce) + 1;
-      INTEGRATOR_STATE_WRITE(path, transmission_bounce) = transmission_bounce;
+      const int transmission_bounce = INTEGRATOR_STATE(state, path, transmission_bounce) + 1;
+      INTEGRATOR_STATE_WRITE(state, path, transmission_bounce) = transmission_bounce;
       if (transmission_bounce >= kernel_data.integrator.max_transmission_bounce) {
         flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
       }
@@ -183,36 +185,36 @@ ccl_device_inline void path_state_next(INTEGRATOR_STATE_ARGS, int label)
     }
   }
 
-  INTEGRATOR_STATE_WRITE(path, flag) = flag;
-  INTEGRATOR_STATE_WRITE(path, bounce) = bounce;
+  INTEGRATOR_STATE_WRITE(state, path, flag) = flag;
+  INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce;
 
   /* Random number generator next bounce. */
-  INTEGRATOR_STATE_WRITE(path, rng_offset) += PRNG_BOUNCE_NUM;
+  INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
 }
 
 #ifdef __VOLUME__
-ccl_device_inline bool path_state_volume_next(INTEGRATOR_STATE_ARGS)
+ccl_device_inline bool path_state_volume_next(IntegratorState state)
 {
   /* For volume bounding meshes we pass through without counting transparent
    * bounces, only sanity check in case self intersection gets us stuck. */
-  uint32_t volume_bounds_bounce = INTEGRATOR_STATE(path, volume_bounds_bounce) + 1;
-  INTEGRATOR_STATE_WRITE(path, volume_bounds_bounce) = volume_bounds_bounce;
+  uint32_t volume_bounds_bounce = INTEGRATOR_STATE(state, path, volume_bounds_bounce) + 1;
+  INTEGRATOR_STATE_WRITE(state, path, volume_bounds_bounce) = volume_bounds_bounce;
   if (volume_bounds_bounce > VOLUME_BOUNDS_MAX) {
     return false;
   }
 
   /* Random number generator next bounce. */
   if (volume_bounds_bounce > 1) {
-    INTEGRATOR_STATE_WRITE(path, rng_offset) += PRNG_BOUNCE_NUM;
+    INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
   }
 
   return true;
 }
 #endif
 
-ccl_device_inline uint path_state_ray_visibility(INTEGRATOR_STATE_CONST_ARGS)
+ccl_device_inline uint path_state_ray_visibility(ConstIntegratorState state)
 {
-  const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
 
   uint32_t visibility = path_flag & PATH_RAY_ALL_VISIBILITY;
 
@@ -231,18 +233,19 @@ ccl_device_inline uint path_state_ray_visibility(INTEGRATOR_STATE_CONST_ARGS)
   return visibility;
 }
 
-ccl_device_inline float path_state_continuation_probability(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_inline float path_state_continuation_probability(KernelGlobals kg,
+                                                            ConstIntegratorState state,
                                                             const uint32_t path_flag)
 {
   if (path_flag & PATH_RAY_TRANSPARENT) {
-    const uint32_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce);
+    const uint32_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
     /* Do at least specified number of bounces without RR. */
     if (transparent_bounce <= kernel_data.integrator.transparent_min_bounce) {
       return 1.0f;
     }
   }
   else {
-    const uint32_t bounce = INTEGRATOR_STATE(path, bounce);
+    const uint32_t bounce = INTEGRATOR_STATE(state, path, bounce);
     /* Do at least specified number of bounces without RR. */
     if (bounce <= kernel_data.integrator.min_bounce) {
       return 1.0f;
@@ -251,17 +254,18 @@ ccl_device_inline float path_state_continuation_probability(INTEGRATOR_STATE_CON
 
   /* Probabilistic termination: use sqrt() to roughly match typical view
    * transform and do path termination a bit later on average. */
-  return min(sqrtf(max3(fabs(INTEGRATOR_STATE(path, throughput)))), 1.0f);
+  return min(sqrtf(max3(fabs(INTEGRATOR_STATE(state, path, throughput)))), 1.0f);
 }
 
-ccl_device_inline bool path_state_ao_bounce(INTEGRATOR_STATE_CONST_ARGS)
+ccl_device_inline bool path_state_ao_bounce(KernelGlobals kg, ConstIntegratorState state)
 {
   if (!kernel_data.integrator.ao_bounces) {
     return false;
   }
 
-  const int bounce = INTEGRATOR_STATE(path, bounce) - INTEGRATOR_STATE(path, transmission_bounce) -
-                     (INTEGRATOR_STATE(path, glossy_bounce) > 0) + 1;
+  const int bounce = INTEGRATOR_STATE(state, path, bounce) -
+                     INTEGRATOR_STATE(state, path, transmission_bounce) -
+                     (INTEGRATOR_STATE(state, path, glossy_bounce) > 0) + 1;
   return (bounce > kernel_data.integrator.ao_bounces);
 }
 
@@ -281,26 +285,27 @@ typedef struct RNGState {
   int sample;
 } RNGState;
 
-ccl_device_inline void path_state_rng_load(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_inline void path_state_rng_load(ConstIntegratorState state,
                                            ccl_private RNGState *rng_state)
 {
-  rng_state->rng_hash = INTEGRATOR_STATE(path, rng_hash);
-  rng_state->rng_offset = INTEGRATOR_STATE(path, rng_offset);
-  rng_state->sample = INTEGRATOR_STATE(path, sample);
+  rng_state->rng_hash = INTEGRATOR_STATE(state, path, rng_hash);
+  rng_state->rng_offset = INTEGRATOR_STATE(state, path, rng_offset);
+  rng_state->sample = INTEGRATOR_STATE(state, path, sample);
 }
 
-ccl_device_inline void shadow_path_state_rng_load(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_inline void shadow_path_state_rng_load(ConstIntegratorState state,
                                                   ccl_private RNGState *rng_state)
 {
-  const uint shadow_bounces = INTEGRATOR_STATE(shadow_path, transparent_bounce) -
-                              INTEGRATOR_STATE(path, transparent_bounce);
+  const uint shadow_bounces = INTEGRATOR_STATE(state, shadow_path, transparent_bounce) -
+                              INTEGRATOR_STATE(state, path, transparent_bounce);
 
-  rng_state->rng_hash = INTEGRATOR_STATE(path, rng_hash);
-  rng_state->rng_offset = INTEGRATOR_STATE(path, rng_offset) + PRNG_BOUNCE_NUM * shadow_bounces;
-  rng_state->sample = INTEGRATOR_STATE(path, sample);
+  rng_state->rng_hash = INTEGRATOR_STATE(state, path, rng_hash);
+  rng_state->rng_offset = INTEGRATOR_STATE(state, path, rng_offset) +
+                          PRNG_BOUNCE_NUM * shadow_bounces;
+  rng_state->sample = INTEGRATOR_STATE(state, path, sample);
 }
 
-ccl_device_inline float path_state_rng_1D(ccl_global const KernelGlobals *kg,
+ccl_device_inline float path_state_rng_1D(KernelGlobals kg,
                                           ccl_private const RNGState *rng_state,
                                           int dimension)
 {
@@ -308,7 +313,7 @@ ccl_device_inline float path_state_rng_1D(ccl_global const KernelGlobals *kg,
       kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension);
 }
 
-ccl_device_inline void path_state_rng_2D(ccl_global const KernelGlobals *kg,
+ccl_device_inline void path_state_rng_2D(KernelGlobals kg,
                                          ccl_private const RNGState *rng_state,
                                          int dimension,
                                          ccl_private float *fx,
@@ -318,7 +323,7 @@ ccl_device_inline void path_state_rng_2D(ccl_global const KernelGlobals *kg,
       kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension, fx, fy);
 }
 
-ccl_device_inline float path_state_rng_1D_hash(ccl_global const KernelGlobals *kg,
+ccl_device_inline float path_state_rng_1D_hash(KernelGlobals kg,
                                                ccl_private const RNGState *rng_state,
                                                uint hash)
 {
@@ -329,7 +334,7 @@ ccl_device_inline float path_state_rng_1D_hash(ccl_global const KernelGlobals *k
       kg, cmj_hash_simple(rng_state->rng_hash, hash), rng_state->sample, rng_state->rng_offset);
 }
 
-ccl_device_inline float path_branched_rng_1D(ccl_global const KernelGlobals *kg,
+ccl_device_inline float path_branched_rng_1D(KernelGlobals kg,
                                              ccl_private const RNGState *rng_state,
                                              int branch,
                                              int num_branches,
@@ -341,7 +346,7 @@ ccl_device_inline float path_branched_rng_1D(ccl_global const KernelGlobals *kg,
                      rng_state->rng_offset + dimension);
 }
 
-ccl_device_inline void path_branched_rng_2D(ccl_global const KernelGlobals *kg,
+ccl_device_inline void path_branched_rng_2D(KernelGlobals kg,
                                             ccl_private const RNGState *rng_state,
                                             int branch,
                                             int num_branches,
@@ -360,7 +365,7 @@ ccl_device_inline void path_branched_rng_2D(ccl_global const KernelGlobals *kg,
 /* Utility functions to get light termination value,
  * since it might not be needed in many cases.
  */
-ccl_device_inline float path_state_rng_light_termination(ccl_global const KernelGlobals *kg,
+ccl_device_inline float path_state_rng_light_termination(KernelGlobals kg,
                                                          ccl_private const RNGState *state)
 {
   if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 7db4289acec..e5e87453611 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -38,7 +38,7 @@ CCL_NAMESPACE_BEGIN
  */
 #  define SOBOL_SKIP 64
 
-ccl_device uint sobol_dimension(ccl_global const KernelGlobals *kg, int index, int dimension)
+ccl_device uint sobol_dimension(KernelGlobals kg, int index, int dimension)
 {
   uint result = 0;
   uint i = index + SOBOL_SKIP;
@@ -51,7 +51,7 @@ ccl_device uint sobol_dimension(ccl_global const KernelGlobals *kg, int index, i
 
 #endif /* __SOBOL__ */
 
-ccl_device_forceinline float path_rng_1D(ccl_global const KernelGlobals *kg,
+ccl_device_forceinline float path_rng_1D(KernelGlobals kg,
                                          uint rng_hash,
                                          int sample,
                                          int dimension)
@@ -85,7 +85,7 @@ ccl_device_forceinline float path_rng_1D(ccl_global const KernelGlobals *kg,
 #endif
 }
 
-ccl_device_forceinline void path_rng_2D(ccl_global const KernelGlobals *kg,
+ccl_device_forceinline void path_rng_2D(KernelGlobals kg,
                                         uint rng_hash,
                                         int sample,
                                         int dimension,
@@ -141,7 +141,7 @@ ccl_device_inline uint hash_iqnt2d(const uint x, const uint y)
   return n;
 }
 
-ccl_device_inline uint path_rng_hash_init(ccl_global const KernelGlobals *ccl_restrict kg,
+ccl_device_inline uint path_rng_hash_init(KernelGlobals kg,
                                           const int sample,
                                           const int x,
                                           const int y)
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index b5a52ff866d..4a57d22775a 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -104,7 +104,8 @@ ccl_device_inline void shader_copy_volume_phases(ccl_private ShaderVolumePhases
 }
 #endif /* __VOLUME__ */
 
-ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_inline void shader_prepare_surface_closures(KernelGlobals kg,
+                                                       ConstIntegratorState state,
                                                        ccl_private ShaderData *sd)
 {
   /* Defensive sampling.
@@ -112,7 +113,8 @@ ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_AR
    * We can likely also do defensive sampling at deeper bounces, particularly
    * for cases like a perfect mirror but possibly also others. This will need
    * a good heuristic. */
-  if (INTEGRATOR_STATE(path, bounce) + INTEGRATOR_STATE(path, transparent_bounce) == 0 &&
+  if (INTEGRATOR_STATE(state, path, bounce) + INTEGRATOR_STATE(state, path, transparent_bounce) ==
+          0 &&
       sd->num_closure > 1) {
     float sum = 0.0f;
 
@@ -136,7 +138,8 @@ ccl_device_inline void shader_prepare_surface_closures(INTEGRATOR_STATE_CONST_AR
    * Blurring of bsdf after bounces, for rays that have a small likelihood
    * of following this particular path (diffuse, rough glossy) */
   if (kernel_data.integrator.filter_glossy != FLT_MAX) {
-    float blur_pdf = kernel_data.integrator.filter_glossy * INTEGRATOR_STATE(path, min_ray_pdf);
+    float blur_pdf = kernel_data.integrator.filter_glossy *
+                     INTEGRATOR_STATE(state, path, min_ray_pdf);
 
     if (blur_pdf < 1.0f) {
       float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f;
@@ -182,7 +185,7 @@ ccl_device_forceinline bool _shader_bsdf_exclude(ClosureType type, uint light_sh
   return false;
 }
 
-ccl_device_inline float _shader_bsdf_multi_eval(ccl_global const KernelGlobals *kg,
+ccl_device_inline float _shader_bsdf_multi_eval(KernelGlobals kg,
                                                 ccl_private ShaderData *sd,
                                                 const float3 omega_in,
                                                 const bool is_transmission,
@@ -226,7 +229,7 @@ ccl_device
 ccl_device_inline
 #endif
     float
-    shader_bsdf_eval(ccl_global const KernelGlobals *kg,
+    shader_bsdf_eval(KernelGlobals kg,
                      ccl_private ShaderData *sd,
                      const float3 omega_in,
                      const bool is_transmission,
@@ -306,7 +309,7 @@ shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd,
 
 /* Sample direction for picked BSDF, and return evaluation and pdf for all
  * BSDFs combined using MIS. */
-ccl_device int shader_bsdf_sample_closure(ccl_global const KernelGlobals *kg,
+ccl_device int shader_bsdf_sample_closure(KernelGlobals kg,
                                           ccl_private ShaderData *sd,
                                           ccl_private const ShaderClosure *sc,
                                           float randu,
@@ -360,8 +363,7 @@ ccl_device float shader_bsdf_average_roughness(ccl_private const ShaderData *sd)
   return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f;
 }
 
-ccl_device float3 shader_bsdf_transparency(ccl_global const KernelGlobals *kg,
-                                           ccl_private const ShaderData *sd)
+ccl_device float3 shader_bsdf_transparency(KernelGlobals kg, ccl_private const ShaderData *sd)
 {
   if (sd->flag & SD_HAS_ONLY_VOLUME) {
     return one_float3();
@@ -374,8 +376,7 @@ ccl_device float3 shader_bsdf_transparency(ccl_global const KernelGlobals *kg,
   }
 }
 
-ccl_device void shader_bsdf_disable_transparency(ccl_global const KernelGlobals *kg,
-                                                 ccl_private ShaderData *sd)
+ccl_device void shader_bsdf_disable_transparency(KernelGlobals kg, ccl_private ShaderData *sd)
 {
   if (sd->flag & SD_TRANSPARENT) {
     for (int i = 0; i < sd->num_closure; i++) {
@@ -391,8 +392,7 @@ ccl_device void shader_bsdf_disable_transparency(ccl_global const KernelGlobals
   }
 }
 
-ccl_device float3 shader_bsdf_alpha(ccl_global const KernelGlobals *kg,
-                                    ccl_private const ShaderData *sd)
+ccl_device float3 shader_bsdf_alpha(KernelGlobals kg, ccl_private const ShaderData *sd)
 {
   float3 alpha = one_float3() - shader_bsdf_transparency(kg, sd);
 
@@ -402,8 +402,7 @@ ccl_device float3 shader_bsdf_alpha(ccl_global const KernelGlobals *kg,
   return alpha;
 }
 
-ccl_device float3 shader_bsdf_diffuse(ccl_global const KernelGlobals *kg,
-                                      ccl_private const ShaderData *sd)
+ccl_device float3 shader_bsdf_diffuse(KernelGlobals kg, ccl_private const ShaderData *sd)
 {
   float3 eval = zero_float3();
 
@@ -417,8 +416,7 @@ ccl_device float3 shader_bsdf_diffuse(ccl_global const KernelGlobals *kg,
   return eval;
 }
 
-ccl_device float3 shader_bsdf_glossy(ccl_global const KernelGlobals *kg,
-                                     ccl_private const ShaderData *sd)
+ccl_device float3 shader_bsdf_glossy(KernelGlobals kg, ccl_private const ShaderData *sd)
 {
   float3 eval = zero_float3();
 
@@ -432,8 +430,7 @@ ccl_device float3 shader_bsdf_glossy(ccl_global const KernelGlobals *kg,
   return eval;
 }
 
-ccl_device float3 shader_bsdf_transmission(ccl_global const KernelGlobals *kg,
-                                           ccl_private const ShaderData *sd)
+ccl_device float3 shader_bsdf_transmission(KernelGlobals kg, ccl_private const ShaderData *sd)
 {
   float3 eval = zero_float3();
 
@@ -447,8 +444,7 @@ ccl_device float3 shader_bsdf_transmission(ccl_global const KernelGlobals *kg,
   return eval;
 }
 
-ccl_device float3 shader_bsdf_average_normal(ccl_global const KernelGlobals *kg,
-                                             ccl_private const ShaderData *sd)
+ccl_device float3 shader_bsdf_average_normal(KernelGlobals kg, ccl_private const ShaderData *sd)
 {
   float3 N = zero_float3();
 
@@ -461,8 +457,7 @@ ccl_device float3 shader_bsdf_average_normal(ccl_global const KernelGlobals *kg,
   return (is_zero(N)) ? sd->N : normalize(N);
 }
 
-ccl_device float3 shader_bsdf_ao_normal(ccl_global const KernelGlobals *kg,
-                                        ccl_private const ShaderData *sd)
+ccl_device float3 shader_bsdf_ao_normal(KernelGlobals kg, ccl_private const ShaderData *sd)
 {
   float3 N = zero_float3();
 
@@ -499,7 +494,7 @@ ccl_device float3 shader_bssrdf_normal(ccl_private const ShaderData *sd)
 
 /* Constant emission optimization */
 
-ccl_device bool shader_constant_emission_eval(ccl_global const KernelGlobals *kg,
+ccl_device bool shader_constant_emission_eval(KernelGlobals kg,
                                               int shader,
                                               ccl_private float3 *eval)
 {
@@ -543,8 +538,7 @@ ccl_device float3 shader_emissive_eval(ccl_private const ShaderData *sd)
 
 /* Holdout */
 
-ccl_device float3 shader_holdout_apply(ccl_global const KernelGlobals *kg,
-                                       ccl_private ShaderData *sd)
+ccl_device float3 shader_holdout_apply(KernelGlobals kg, ccl_private ShaderData *sd)
 {
   float3 weight = zero_float3();
 
@@ -582,7 +576,8 @@ ccl_device float3 shader_holdout_apply(ccl_global const KernelGlobals *kg,
 /* Surface Evaluation */
 
 template<uint node_feature_mask>
-ccl_device void shader_eval_surface(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device void shader_eval_surface(KernelGlobals kg,
+                                    ConstIntegratorState state,
                                     ccl_private ShaderData *ccl_restrict sd,
                                     ccl_global float *ccl_restrict buffer,
                                     int path_flag)
@@ -604,18 +599,17 @@ ccl_device void shader_eval_surface(INTEGRATOR_STATE_CONST_ARGS,
 #ifdef __OSL__
   if (kg->osl) {
     if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) {
-      OSLShader::eval_background(INTEGRATOR_STATE_PASS, sd, path_flag);
+      OSLShader::eval_background(kg, state, sd, path_flag);
     }
     else {
-      OSLShader::eval_surface(INTEGRATOR_STATE_PASS, sd, path_flag);
+      OSLShader::eval_surface(kg, state, sd, path_flag);
     }
   }
   else
 #endif
   {
 #ifdef __SVM__
-    svm_eval_nodes<node_feature_mask, SHADER_TYPE_SURFACE>(
-        INTEGRATOR_STATE_PASS, sd, buffer, path_flag);
+    svm_eval_nodes<node_feature_mask, SHADER_TYPE_SURFACE>(kg, state, sd, buffer, path_flag);
 #else
     if (sd->object == OBJECT_NONE) {
       sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f);
@@ -632,11 +626,14 @@ ccl_device void shader_eval_surface(INTEGRATOR_STATE_CONST_ARGS,
 #endif
   }
 
-  if (KERNEL_NODES_FEATURE(BSDF) && (sd->flag & SD_BSDF_NEEDS_LCG)) {
-    sd->lcg_state = lcg_state_init(INTEGRATOR_STATE(path, rng_hash),
-                                   INTEGRATOR_STATE(path, rng_offset),
-                                   INTEGRATOR_STATE(path, sample),
-                                   0xb4bc3953);
+  IF_KERNEL_NODES_FEATURE(BSDF)
+  {
+    if (sd->flag & SD_BSDF_NEEDS_LCG) {
+      sd->lcg_state = lcg_state_init(INTEGRATOR_STATE(state, path, rng_hash),
+                                     INTEGRATOR_STATE(state, path, rng_offset),
+                                     INTEGRATOR_STATE(state, path, sample),
+                                     0xb4bc3953);
+    }
   }
 }
 
@@ -672,7 +669,7 @@ ccl_device_inline float _shader_volume_phase_multi_eval(
   return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
 }
 
-ccl_device float shader_volume_phase_eval(ccl_global const KernelGlobals *kg,
+ccl_device float shader_volume_phase_eval(KernelGlobals kg,
                                           ccl_private const ShaderData *sd,
                                           ccl_private const ShaderVolumePhases *phases,
                                           const float3 omega_in,
@@ -683,7 +680,7 @@ ccl_device float shader_volume_phase_eval(ccl_global const KernelGlobals *kg,
   return _shader_volume_phase_multi_eval(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f);
 }
 
-ccl_device int shader_volume_phase_sample(ccl_global const KernelGlobals *kg,
+ccl_device int shader_volume_phase_sample(KernelGlobals kg,
                                           ccl_private const ShaderData *sd,
                                           ccl_private const ShaderVolumePhases *phases,
                                           float randu,
@@ -742,7 +739,7 @@ ccl_device int shader_volume_phase_sample(ccl_global const KernelGlobals *kg,
   return label;
 }
 
-ccl_device int shader_phase_sample_closure(ccl_global const KernelGlobals *kg,
+ccl_device int shader_phase_sample_closure(KernelGlobals kg,
                                            ccl_private const ShaderData *sd,
                                            ccl_private const ShaderVolumeClosure *sc,
                                            float randu,
@@ -767,7 +764,8 @@ ccl_device int shader_phase_sample_closure(ccl_global const KernelGlobals *kg,
 /* Volume Evaluation */
 
 template<const bool shadow, typename StackReadOp>
-ccl_device_inline void shader_eval_volume(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_inline void shader_eval_volume(KernelGlobals kg,
+                                          ConstIntegratorState state,
                                           ccl_private ShaderData *ccl_restrict sd,
                                           const int path_flag,
                                           StackReadOp stack_read)
@@ -820,13 +818,13 @@ ccl_device_inline void shader_eval_volume(INTEGRATOR_STATE_CONST_ARGS,
 #  ifdef __SVM__
 #    ifdef __OSL__
     if (kg->osl) {
-      OSLShader::eval_volume(INTEGRATOR_STATE_PASS, sd, path_flag);
+      OSLShader::eval_volume(kg, state, sd, path_flag);
     }
     else
 #    endif
     {
       svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_VOLUME, SHADER_TYPE_VOLUME>(
-          INTEGRATOR_STATE_PASS, sd, NULL, path_flag);
+          kg, state, sd, NULL, path_flag);
     }
 #  endif
 
@@ -843,7 +841,9 @@ ccl_device_inline void shader_eval_volume(INTEGRATOR_STATE_CONST_ARGS,
 
 /* Displacement Evaluation */
 
-ccl_device void shader_eval_displacement(INTEGRATOR_STATE_CONST_ARGS, ccl_private ShaderData *sd)
+ccl_device void shader_eval_displacement(KernelGlobals kg,
+                                         ConstIntegratorState state,
+                                         ccl_private ShaderData *sd)
 {
   sd->num_closure = 0;
   sd->num_closure_left = 0;
@@ -852,19 +852,19 @@ ccl_device void shader_eval_displacement(INTEGRATOR_STATE_CONST_ARGS, ccl_privat
 #ifdef __SVM__
 #  ifdef __OSL__
   if (kg->osl)
-    OSLShader::eval_displacement(INTEGRATOR_STATE_PASS, sd);
+    OSLShader::eval_displacement(kg, state, sd);
   else
 #  endif
   {
     svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_DISPLACEMENT, SHADER_TYPE_DISPLACEMENT>(
-        INTEGRATOR_STATE_PASS, sd, NULL, 0);
+        kg, state, sd, NULL, 0);
   }
 #endif
 }
 
 /* Cryptomatte */
 
-ccl_device float shader_cryptomatte_id(ccl_global const KernelGlobals *kg, int shader)
+ccl_device float shader_cryptomatte_id(KernelGlobals kg, int shader)
 {
   return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id;
 }
diff --git a/intern/cycles/kernel/kernel_shadow_catcher.h b/intern/cycles/kernel/kernel_shadow_catcher.h
index 824749818a4..8dc7a568b33 100644
--- a/intern/cycles/kernel/kernel_shadow_catcher.h
+++ b/intern/cycles/kernel/kernel_shadow_catcher.h
@@ -22,7 +22,8 @@
 CCL_NAMESPACE_BEGIN
 
 /* Check whether current surface bounce is where path is to be split for the shadow catcher. */
-ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(INTEGRATOR_STATE_ARGS,
+ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals kg,
+                                                                  IntegratorState state,
                                                                   const int object_flag)
 {
 #ifdef __SHADOW_CATCHER__
@@ -38,7 +39,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(INTEGRATOR_STA
     return false;
   }
 
-  const int path_flag = INTEGRATOR_STATE(path, flag);
+  const int path_flag = INTEGRATOR_STATE(state, path, flag);
 
   if ((path_flag & PATH_RAY_TRANSPARENT_BACKGROUND) == 0) {
     /* Split only on primary rays, secondary bounces are to treat shadow catcher as a regular
@@ -58,13 +59,14 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(INTEGRATOR_STA
 }
 
 /* Check whether the current path can still split. */
-ccl_device_inline bool kernel_shadow_catcher_path_can_split(INTEGRATOR_STATE_CONST_ARGS)
+ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg,
+                                                            ConstIntegratorState state)
 {
   if (INTEGRATOR_PATH_IS_TERMINATED && INTEGRATOR_SHADOW_PATH_IS_TERMINATED) {
     return false;
   }
 
-  const int path_flag = INTEGRATOR_STATE(path, flag);
+  const int path_flag = INTEGRATOR_STATE(state, path, flag);
 
   if (path_flag & PATH_RAY_SHADOW_CATCHER_HIT) {
     /* Shadow catcher was already hit and the state was split. No further split is allowed. */
@@ -76,21 +78,23 @@ ccl_device_inline bool kernel_shadow_catcher_path_can_split(INTEGRATOR_STATE_CON
 
 /* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths
  * after this function. */
-ccl_device_inline bool kernel_shadow_catcher_split(INTEGRATOR_STATE_ARGS, const int object_flags)
+ccl_device_inline bool kernel_shadow_catcher_split(KernelGlobals kg,
+                                                   IntegratorState state,
+                                                   const int object_flags)
 {
 #ifdef __SHADOW_CATCHER__
 
-  if (!kernel_shadow_catcher_is_path_split_bounce(INTEGRATOR_STATE_PASS, object_flags)) {
+  if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, object_flags)) {
     return false;
   }
 
   /* The split is to be done. Mark the current state as such, so that it stops contributing to the
    * shadow catcher matte pass, but keeps contributing to the combined pass. */
-  INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT;
+  INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT;
 
   /* Split new state from the current one. This new state will only track contribution of shadow
    * catcher objects ignoring non-catcher objects. */
-  integrator_state_shadow_catcher_split(INTEGRATOR_STATE_PASS);
+  integrator_state_shadow_catcher_split(kg, state);
 
   return true;
 #else
@@ -101,14 +105,16 @@ ccl_device_inline bool kernel_shadow_catcher_split(INTEGRATOR_STATE_ARGS, const
 
 #ifdef __SHADOW_CATCHER__
 
-ccl_device_forceinline bool kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_CONST_ARGS)
+ccl_device_forceinline bool kernel_shadow_catcher_is_matte_path(KernelGlobals kg,
+                                                                ConstIntegratorState state)
 {
-  return (INTEGRATOR_STATE(path, flag) & PATH_RAY_SHADOW_CATCHER_HIT) == 0;
+  return (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_HIT) == 0;
 }
 
-ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(INTEGRATOR_STATE_CONST_ARGS)
+ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(KernelGlobals kg,
+                                                                 ConstIntegratorState state)
 {
-  return INTEGRATOR_STATE(path, flag) & PATH_RAY_SHADOW_CATCHER_PASS;
+  return INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_PASS;
 }
 
 #endif /* __SHADOW_CATCHER__ */
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 3a5a11d2c10..5625c0e4d19 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -813,7 +813,7 @@ typedef struct ccl_align(16) ShaderData
   float ray_dP;
 
 #ifdef __OSL__
-  const struct KernelGlobals *osl_globals;
+  const struct KernelGlobalsCPU *osl_globals;
   const struct IntegratorStateCPU *osl_path_state;
 #endif
 
@@ -1505,63 +1505,77 @@ enum KernelFeatureFlag : unsigned int {
   KERNEL_FEATURE_NODE_BUMP_STATE = (1U << 5U),
   KERNEL_FEATURE_NODE_VORONOI_EXTRA = (1U << 6U),
   KERNEL_FEATURE_NODE_RAYTRACE = (1U << 7U),
+  KERNEL_FEATURE_NODE_AOV = (1U << 8U),
+  KERNEL_FEATURE_NODE_LIGHT_PATH = (1U << 9U),
 
   /* Use denoising kernels and output denoising passes. */
-  KERNEL_FEATURE_DENOISING = (1U << 8U),
+  KERNEL_FEATURE_DENOISING = (1U << 10U),
 
   /* Use path tracing kernels. */
-  KERNEL_FEATURE_PATH_TRACING = (1U << 9U),
+  KERNEL_FEATURE_PATH_TRACING = (1U << 11U),
 
   /* BVH/sampling kernel features. */
-  KERNEL_FEATURE_HAIR = (1U << 10U),
-  KERNEL_FEATURE_HAIR_THICK = (1U << 11U),
-  KERNEL_FEATURE_OBJECT_MOTION = (1U << 12U),
-  KERNEL_FEATURE_CAMERA_MOTION = (1U << 13U),
+  KERNEL_FEATURE_HAIR = (1U << 12U),
+  KERNEL_FEATURE_HAIR_THICK = (1U << 13U),
+  KERNEL_FEATURE_OBJECT_MOTION = (1U << 14U),
+  KERNEL_FEATURE_CAMERA_MOTION = (1U << 15U),
 
   /* Denotes whether baking functionality is needed. */
-  KERNEL_FEATURE_BAKING = (1U << 14U),
+  KERNEL_FEATURE_BAKING = (1U << 16U),
 
   /* Use subsurface scattering materials. */
-  KERNEL_FEATURE_SUBSURFACE = (1U << 15U),
+  KERNEL_FEATURE_SUBSURFACE = (1U << 17U),
 
   /* Use volume materials. */
-  KERNEL_FEATURE_VOLUME = (1U << 16U),
+  KERNEL_FEATURE_VOLUME = (1U << 18U),
 
   /* Use OpenSubdiv patch evaluation */
-  KERNEL_FEATURE_PATCH_EVALUATION = (1U << 17U),
+  KERNEL_FEATURE_PATCH_EVALUATION = (1U << 19U),
 
   /* Use Transparent shadows */
-  KERNEL_FEATURE_TRANSPARENT = (1U << 18U),
+  KERNEL_FEATURE_TRANSPARENT = (1U << 20U),
 
   /* Use shadow catcher. */
-  KERNEL_FEATURE_SHADOW_CATCHER = (1U << 19U),
+  KERNEL_FEATURE_SHADOW_CATCHER = (1U << 21U),
 
   /* Per-uber shader usage flags. */
-  KERNEL_FEATURE_PRINCIPLED = (1U << 20U),
+  KERNEL_FEATURE_PRINCIPLED = (1U << 22U),
 
   /* Light render passes. */
-  KERNEL_FEATURE_LIGHT_PASSES = (1U << 21U),
+  KERNEL_FEATURE_LIGHT_PASSES = (1U << 23U),
 
   /* Shadow render pass. */
-  KERNEL_FEATURE_SHADOW_PASS = (1U << 22U),
+  KERNEL_FEATURE_SHADOW_PASS = (1U << 24U),
 };
 
 /* Shader node feature mask, to specialize shader evaluation for kernels. */
 
 #define KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT \
-  (KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VORONOI_EXTRA)
+  (KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VORONOI_EXTRA | \
+   KERNEL_FEATURE_NODE_LIGHT_PATH)
 #define KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW \
   (KERNEL_FEATURE_NODE_BSDF | KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VOLUME | \
    KERNEL_FEATURE_NODE_HAIR | KERNEL_FEATURE_NODE_BUMP | KERNEL_FEATURE_NODE_BUMP_STATE | \
-   KERNEL_FEATURE_NODE_VORONOI_EXTRA)
+   KERNEL_FEATURE_NODE_VORONOI_EXTRA | KERNEL_FEATURE_NODE_LIGHT_PATH)
 #define KERNEL_FEATURE_NODE_MASK_SURFACE \
-  (KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW | KERNEL_FEATURE_NODE_RAYTRACE)
+  (KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW | KERNEL_FEATURE_NODE_RAYTRACE | \
+   KERNEL_FEATURE_NODE_AOV | KERNEL_FEATURE_NODE_LIGHT_PATH)
 #define KERNEL_FEATURE_NODE_MASK_VOLUME \
-  (KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VOLUME | KERNEL_FEATURE_NODE_VORONOI_EXTRA)
+  (KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VOLUME | \
+   KERNEL_FEATURE_NODE_VORONOI_EXTRA | KERNEL_FEATURE_NODE_LIGHT_PATH)
 #define KERNEL_FEATURE_NODE_MASK_DISPLACEMENT \
   (KERNEL_FEATURE_NODE_VORONOI_EXTRA | KERNEL_FEATURE_NODE_BUMP | KERNEL_FEATURE_NODE_BUMP_STATE)
 #define KERNEL_FEATURE_NODE_MASK_BUMP KERNEL_FEATURE_NODE_MASK_DISPLACEMENT
 
-#define KERNEL_NODES_FEATURE(feature) ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
+/* Must be constexpr on the CPU to avoid compile errors because the state types
+ * are different depending on the main, shadow or null path. For GPU we don't have
+ * C++17 everywhere so can't use it. */
+#ifdef __KERNEL_CPU__
+#  define IF_KERNEL_NODES_FEATURE(feature) \
+    if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
+#else
+#  define IF_KERNEL_NODES_FEATURE(feature) \
+    if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
+#endif
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp
index e814fcca246..94712a4dd13 100644
--- a/intern/cycles/kernel/osl/osl_closures.cpp
+++ b/intern/cycles/kernel/osl/osl_closures.cpp
@@ -500,7 +500,7 @@ bool CBSDFClosure::skip(const ShaderData *sd, int path_flag, int scattering)
 {
   /* caustic options */
   if ((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) {
-    const KernelGlobals *kg = sd->osl_globals;
+    const KernelGlobalsCPU *kg = sd->osl_globals;
 
     if ((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) ||
         (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) {
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index 2c7f5eb4948..bb7655fbe9a 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -149,7 +149,7 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
    * a concept of shader space, so we just use object space for both. */
   if (xform) {
     const ShaderData *sd = (const ShaderData *)xform;
-    const KernelGlobals *kg = sd->osl_globals;
+    const KernelGlobalsCPU *kg = sd->osl_globals;
     int object = sd->object;
 
     if (object != OBJECT_NONE) {
@@ -187,7 +187,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
    * a concept of shader space, so we just use object space for both. */
   if (xform) {
     const ShaderData *sd = (const ShaderData *)xform;
-    const KernelGlobals *kg = sd->osl_globals;
+    const KernelGlobalsCPU *kg = sd->osl_globals;
     int object = sd->object;
 
     if (object != OBJECT_NONE) {
@@ -222,7 +222,7 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
                                    float time)
 {
   ShaderData *sd = (ShaderData *)(sg->renderstate);
-  const KernelGlobals *kg = sd->osl_globals;
+  const KernelGlobalsCPU *kg = sd->osl_globals;
 
   if (from == u_ndc) {
     copy_matrix(result, kernel_data.cam.ndctoworld);
@@ -254,7 +254,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
                                            float time)
 {
   ShaderData *sd = (ShaderData *)(sg->renderstate);
-  const KernelGlobals *kg = sd->osl_globals;
+  const KernelGlobalsCPU *kg = sd->osl_globals;
 
   if (to == u_ndc) {
     copy_matrix(result, kernel_data.cam.worldtondc);
@@ -288,7 +288,7 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
    * a concept of shader space, so we just use object space for both. */
   if (xform) {
     const ShaderData *sd = (const ShaderData *)xform;
-    const KernelGlobals *kg = sd->osl_globals;
+    const KernelGlobalsCPU *kg = sd->osl_globals;
     int object = sd->object;
 
     if (object != OBJECT_NONE) {
@@ -316,7 +316,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
    * a concept of shader space, so we just use object space for both. */
   if (xform) {
     const ShaderData *sd = (const ShaderData *)xform;
-    const KernelGlobals *kg = sd->osl_globals;
+    const KernelGlobalsCPU *kg = sd->osl_globals;
     int object = sd->object;
 
     if (object != OBJECT_NONE) {
@@ -339,7 +339,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
 bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from)
 {
   ShaderData *sd = (ShaderData *)(sg->renderstate);
-  const KernelGlobals *kg = sd->osl_globals;
+  const KernelGlobalsCPU *kg = sd->osl_globals;
 
   if (from == u_ndc) {
     copy_matrix(result, kernel_data.cam.ndctoworld);
@@ -366,7 +366,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
                                            ustring to)
 {
   ShaderData *sd = (ShaderData *)(sg->renderstate);
-  const KernelGlobals *kg = sd->osl_globals;
+  const KernelGlobalsCPU *kg = sd->osl_globals;
 
   if (to == u_ndc) {
     copy_matrix(result, kernel_data.cam.worldtondc);
@@ -745,7 +745,7 @@ static bool set_attribute_matrix(const Transform &tfm, TypeDesc type, void *val)
   return false;
 }
 
-static bool get_primitive_attribute(const KernelGlobals *kg,
+static bool get_primitive_attribute(const KernelGlobalsCPU *kg,
                                     const ShaderData *sd,
                                     const OSLGlobals::Attribute &attr,
                                     const TypeDesc &type,
@@ -806,7 +806,7 @@ static bool get_primitive_attribute(const KernelGlobals *kg,
   }
 }
 
-static bool get_mesh_attribute(const KernelGlobals *kg,
+static bool get_mesh_attribute(const KernelGlobalsCPU *kg,
                                const ShaderData *sd,
                                const OSLGlobals::Attribute &attr,
                                const TypeDesc &type,
@@ -855,7 +855,7 @@ static bool get_object_attribute(const OSLGlobals::Attribute &attr,
   }
 }
 
-bool OSLRenderServices::get_object_standard_attribute(const KernelGlobals *kg,
+bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg,
                                                       ShaderData *sd,
                                                       ustring name,
                                                       TypeDesc type,
@@ -1000,7 +1000,7 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobals *kg,
   }
 }
 
-bool OSLRenderServices::get_background_attribute(const KernelGlobals *kg,
+bool OSLRenderServices::get_background_attribute(const KernelGlobalsCPU *kg,
                                                  ShaderData *sd,
                                                  ustring name,
                                                  TypeDesc type,
@@ -1091,7 +1091,7 @@ bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg,
 bool OSLRenderServices::get_attribute(
     ShaderData *sd, bool derivatives, ustring object_name, TypeDesc type, ustring name, void *val)
 {
-  const KernelGlobals *kg = sd->osl_globals;
+  const KernelGlobalsCPU *kg = sd->osl_globals;
   int prim_type = 0;
   int object;
 
@@ -1220,7 +1220,7 @@ bool OSLRenderServices::texture(ustring filename,
   OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
   OSLTextureHandle::Type texture_type = (handle) ? handle->type : OSLTextureHandle::OIIO;
   ShaderData *sd = (ShaderData *)(sg->renderstate);
-  const KernelGlobals *kernel_globals = sd->osl_globals;
+  KernelGlobals kernel_globals = sd->osl_globals;
   bool status = false;
 
   switch (texture_type) {
@@ -1367,7 +1367,7 @@ bool OSLRenderServices::texture3d(ustring filename,
     case OSLTextureHandle::SVM: {
       /* Packed texture. */
       ShaderData *sd = (ShaderData *)(sg->renderstate);
-      const KernelGlobals *kernel_globals = sd->osl_globals;
+      KernelGlobals kernel_globals = sd->osl_globals;
       int slot = handle->svm_slot;
       float3 P_float3 = make_float3(P.x, P.y, P.z);
       float4 rgba = kernel_tex_image_interp_3d(kernel_globals, slot, P_float3, INTERPOLATION_NONE);
@@ -1389,7 +1389,7 @@ bool OSLRenderServices::texture3d(ustring filename,
       if (handle && handle->oiio_handle) {
         if (texture_thread_info == NULL) {
           ShaderData *sd = (ShaderData *)(sg->renderstate);
-          const KernelGlobals *kernel_globals = sd->osl_globals;
+          KernelGlobals kernel_globals = sd->osl_globals;
           OSLThreadData *tdata = kernel_globals->osl_tdata;
           texture_thread_info = tdata->oiio_thread_info;
         }
@@ -1474,7 +1474,7 @@ bool OSLRenderServices::environment(ustring filename,
   if (handle && handle->oiio_handle) {
     if (thread_info == NULL) {
       ShaderData *sd = (ShaderData *)(sg->renderstate);
-      const KernelGlobals *kernel_globals = sd->osl_globals;
+      KernelGlobals kernel_globals = sd->osl_globals;
       OSLThreadData *tdata = kernel_globals->osl_tdata;
       thread_info = tdata->oiio_thread_info;
     }
@@ -1629,7 +1629,7 @@ bool OSLRenderServices::trace(TraceOpt &options,
   tracedata->hit = false;
   tracedata->sd.osl_globals = sd->osl_globals;
 
-  const KernelGlobals *kg = sd->osl_globals;
+  const KernelGlobalsCPU *kg = sd->osl_globals;
 
   /* Can't raytrace from shaders like displacement, before BVH exists. */
   if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
@@ -1662,7 +1662,7 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg,
       }
       else {
         ShaderData *sd = &tracedata->sd;
-        const KernelGlobals *kg = sd->osl_globals;
+        const KernelGlobalsCPU *kg = sd->osl_globals;
 
         if (!tracedata->setup) {
           /* lazy shader data setup */
diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h
index a9671485eda..d9f57c642ad 100644
--- a/intern/cycles/kernel/osl/osl_services.h
+++ b/intern/cycles/kernel/osl/osl_services.h
@@ -40,7 +40,7 @@ class Scene;
 class Shader;
 struct ShaderData;
 struct float3;
-struct KernelGlobals;
+struct KernelGlobalsCPU;
 
 /* OSL Texture Handle
  *
@@ -250,13 +250,13 @@ class OSLRenderServices : public OSL::RendererServices {
                         void *data) override;
 #endif
 
-  static bool get_background_attribute(const KernelGlobals *kg,
+  static bool get_background_attribute(const KernelGlobalsCPU *kg,
                                        ShaderData *sd,
                                        ustring name,
                                        TypeDesc type,
                                        bool derivatives,
                                        void *val);
-  static bool get_object_standard_attribute(const KernelGlobals *kg,
+  static bool get_object_standard_attribute(const KernelGlobalsCPU *kg,
                                             ShaderData *sd,
                                             ustring name,
                                             TypeDesc type,
diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp
index 880ef635c76..a1df63ca8ff 100644
--- a/intern/cycles/kernel/osl/osl_shader.cpp
+++ b/intern/cycles/kernel/osl/osl_shader.cpp
@@ -41,7 +41,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Threads */
 
-void OSLShader::thread_init(KernelGlobals *kg, OSLGlobals *osl_globals)
+void OSLShader::thread_init(KernelGlobalsCPU *kg, OSLGlobals *osl_globals)
 {
   /* no osl used? */
   if (!osl_globals->use) {
@@ -67,7 +67,7 @@ void OSLShader::thread_init(KernelGlobals *kg, OSLGlobals *osl_globals)
   kg->osl_tdata = tdata;
 }
 
-void OSLShader::thread_free(KernelGlobals *kg)
+void OSLShader::thread_free(KernelGlobalsCPU *kg)
 {
   if (!kg->osl)
     return;
@@ -87,7 +87,7 @@ void OSLShader::thread_free(KernelGlobals *kg)
 
 /* Globals */
 
-static void shaderdata_to_shaderglobals(const KernelGlobals *kg,
+static void shaderdata_to_shaderglobals(const KernelGlobalsCPU *kg,
                                         ShaderData *sd,
                                         const IntegratorStateCPU *state,
                                         int path_flag,
@@ -174,7 +174,7 @@ static void flatten_surface_closure_tree(ShaderData *sd,
   }
 }
 
-void OSLShader::eval_surface(const KernelGlobals *kg,
+void OSLShader::eval_surface(const KernelGlobalsCPU *kg,
                              const IntegratorStateCPU *state,
                              ShaderData *sd,
                              int path_flag)
@@ -282,7 +282,7 @@ static void flatten_background_closure_tree(ShaderData *sd,
   }
 }
 
-void OSLShader::eval_background(const KernelGlobals *kg,
+void OSLShader::eval_background(const KernelGlobalsCPU *kg,
                                 const IntegratorStateCPU *state,
                                 ShaderData *sd,
                                 int path_flag)
@@ -340,7 +340,7 @@ static void flatten_volume_closure_tree(ShaderData *sd,
   }
 }
 
-void OSLShader::eval_volume(const KernelGlobals *kg,
+void OSLShader::eval_volume(const KernelGlobalsCPU *kg,
                             const IntegratorStateCPU *state,
                             ShaderData *sd,
                             int path_flag)
@@ -366,7 +366,7 @@ void OSLShader::eval_volume(const KernelGlobals *kg,
 
 /* Displacement */
 
-void OSLShader::eval_displacement(const KernelGlobals *kg,
+void OSLShader::eval_displacement(const KernelGlobalsCPU *kg,
                                   const IntegratorStateCPU *state,
                                   ShaderData *sd)
 {
@@ -391,7 +391,7 @@ void OSLShader::eval_displacement(const KernelGlobals *kg,
 
 /* Attributes */
 
-int OSLShader::find_attribute(const KernelGlobals *kg,
+int OSLShader::find_attribute(const KernelGlobalsCPU *kg,
                               const ShaderData *sd,
                               uint id,
                               AttributeDescriptor *desc)
diff --git a/intern/cycles/kernel/osl/osl_shader.h b/intern/cycles/kernel/osl/osl_shader.h
index f1f17b141eb..686a1e1374a 100644
--- a/intern/cycles/kernel/osl/osl_shader.h
+++ b/intern/cycles/kernel/osl/osl_shader.h
@@ -39,7 +39,7 @@ struct ShaderClosure;
 struct ShaderData;
 struct IntegratorStateCPU;
 struct differential3;
-struct KernelGlobals;
+struct KernelGlobalsCPU;
 
 struct OSLGlobals;
 struct OSLShadingSystem;
@@ -50,28 +50,28 @@ class OSLShader {
   static void register_closures(OSLShadingSystem *ss);
 
   /* per thread data */
-  static void thread_init(KernelGlobals *kg, OSLGlobals *osl_globals);
-  static void thread_free(KernelGlobals *kg);
+  static void thread_init(KernelGlobalsCPU *kg, OSLGlobals *osl_globals);
+  static void thread_free(KernelGlobalsCPU *kg);
 
   /* eval */
-  static void eval_surface(const KernelGlobals *kg,
+  static void eval_surface(const KernelGlobalsCPU *kg,
                            const IntegratorStateCPU *state,
                            ShaderData *sd,
                            int path_flag);
-  static void eval_background(const KernelGlobals *kg,
+  static void eval_background(const KernelGlobalsCPU *kg,
                               const IntegratorStateCPU *state,
                               ShaderData *sd,
                               int path_flag);
-  static void eval_volume(const KernelGlobals *kg,
+  static void eval_volume(const KernelGlobalsCPU *kg,
                           const IntegratorStateCPU *state,
                           ShaderData *sd,
                           int path_flag);
-  static void eval_displacement(const KernelGlobals *kg,
+  static void eval_displacement(const KernelGlobalsCPU *kg,
                                 const IntegratorStateCPU *state,
                                 ShaderData *sd);
 
   /* attributes */
-  static int find_attribute(const KernelGlobals *kg,
+  static int find_attribute(const KernelGlobalsCPU *kg,
                             const ShaderData *sd,
                             uint id,
                             AttributeDescriptor *desc);
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index 871e370123e..9692308c496 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -107,15 +107,14 @@ ccl_device_inline bool stack_valid(uint a)
 
 /* Reading Nodes */
 
-ccl_device_inline uint4 read_node(ccl_global const KernelGlobals *kg, ccl_private int *offset)
+ccl_device_inline uint4 read_node(KernelGlobals kg, ccl_private int *offset)
 {
   uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
   (*offset)++;
   return node;
 }
 
-ccl_device_inline float4 read_node_float(ccl_global const KernelGlobals *kg,
-                                         ccl_private int *offset)
+ccl_device_inline float4 read_node_float(KernelGlobals kg, ccl_private int *offset)
 {
   uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
   float4 f = make_float4(__uint_as_float(node.x),
@@ -126,7 +125,7 @@ ccl_device_inline float4 read_node_float(ccl_global const KernelGlobals *kg,
   return f;
 }
 
-ccl_device_inline float4 fetch_node_float(ccl_global const KernelGlobals *kg, int offset)
+ccl_device_inline float4 fetch_node_float(KernelGlobals kg, int offset)
 {
   uint4 node = kernel_tex_fetch(__svm_nodes, offset);
   return make_float4(__uint_as_float(node.x),
@@ -227,7 +226,8 @@ CCL_NAMESPACE_BEGIN
 
 /* Main Interpreter Loop */
 template<uint node_feature_mask, ShaderType type>
-ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device void svm_eval_nodes(KernelGlobals kg,
+                               ConstIntegratorState state,
                                ShaderData *sd,
                                ccl_global float *render_buffer,
                                int path_flag)
@@ -257,12 +257,14 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS,
             kg, sd, stack, node, path_flag, offset);
         break;
       case NODE_CLOSURE_EMISSION:
-        if (KERNEL_NODES_FEATURE(EMISSION)) {
+        IF_KERNEL_NODES_FEATURE(EMISSION)
+        {
           svm_node_closure_emission(sd, stack, node);
         }
         break;
       case NODE_CLOSURE_BACKGROUND:
-        if (KERNEL_NODES_FEATURE(EMISSION)) {
+        IF_KERNEL_NODES_FEATURE(EMISSION)
+        {
           svm_node_closure_background(sd, stack, node);
         }
         break;
@@ -273,7 +275,8 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS,
         svm_node_closure_weight(sd, stack, node.y);
         break;
       case NODE_EMISSION_WEIGHT:
-        if (KERNEL_NODES_FEATURE(EMISSION)) {
+        IF_KERNEL_NODES_FEATURE(EMISSION)
+        {
           svm_node_emission_weight(kg, sd, stack, node);
         }
         break;
@@ -310,27 +313,32 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS,
         svm_node_vertex_color(kg, sd, stack, node.y, node.z, node.w);
         break;
       case NODE_GEOMETRY_BUMP_DX:
-        if (KERNEL_NODES_FEATURE(BUMP)) {
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
           svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z);
         }
         break;
       case NODE_GEOMETRY_BUMP_DY:
-        if (KERNEL_NODES_FEATURE(BUMP)) {
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
           svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z);
         }
         break;
       case NODE_SET_DISPLACEMENT:
-        if (KERNEL_NODES_FEATURE(BUMP)) {
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
           svm_node_set_displacement(kg, sd, stack, node.y);
         }
         break;
       case NODE_DISPLACEMENT:
-        if (KERNEL_NODES_FEATURE(BUMP)) {
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
           svm_node_displacement(kg, sd, stack, node);
         }
         break;
       case NODE_VECTOR_DISPLACEMENT:
-        if (KERNEL_NODES_FEATURE(BUMP)) {
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
           offset = svm_node_vector_displacement(kg, sd, stack, node, offset);
         }
         break;
@@ -344,52 +352,62 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS,
         offset = svm_node_tex_noise(kg, sd, stack, node.y, node.z, node.w, offset);
         break;
       case NODE_SET_BUMP:
-        if (KERNEL_NODES_FEATURE(BUMP)) {
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
           svm_node_set_bump(kg, sd, stack, node);
         }
         break;
       case NODE_ATTR_BUMP_DX:
-        if (KERNEL_NODES_FEATURE(BUMP)) {
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
           svm_node_attr_bump_dx(kg, sd, stack, node);
         }
         break;
       case NODE_ATTR_BUMP_DY:
-        if (KERNEL_NODES_FEATURE(BUMP)) {
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
           svm_node_attr_bump_dy(kg, sd, stack, node);
         }
         break;
       case NODE_VERTEX_COLOR_BUMP_DX:
-        if (KERNEL_NODES_FEATURE(BUMP)) {
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
           svm_node_vertex_color_bump_dx(kg, sd, stack, node.y, node.z, node.w);
         }
         break;
       case NODE_VERTEX_COLOR_BUMP_DY:
-        if (KERNEL_NODES_FEATURE(BUMP)) {
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
           svm_node_vertex_color_bump_dy(kg, sd, stack, node.y, node.z, node.w);
         }
         break;
       case NODE_TEX_COORD_BUMP_DX:
-        if (KERNEL_NODES_FEATURE(BUMP)) {
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
           offset = svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, offset);
         }
         break;
       case NODE_TEX_COORD_BUMP_DY:
-        if (KERNEL_NODES_FEATURE(BUMP)) {
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
           offset = svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, offset);
         }
         break;
       case NODE_CLOSURE_SET_NORMAL:
-        if (KERNEL_NODES_FEATURE(BUMP)) {
+        IF_KERNEL_NODES_FEATURE(BUMP)
+        {
           svm_node_set_normal(kg, sd, stack, node.y, node.z);
         }
         break;
       case NODE_ENTER_BUMP_EVAL:
-        if (KERNEL_NODES_FEATURE(BUMP_STATE)) {
+        IF_KERNEL_NODES_FEATURE(BUMP_STATE)
+        {
           svm_node_enter_bump_eval(kg, sd, stack, node.y);
         }
         break;
       case NODE_LEAVE_BUMP_EVAL:
-        if (KERNEL_NODES_FEATURE(BUMP_STATE)) {
+        IF_KERNEL_NODES_FEATURE(BUMP_STATE)
+        {
           svm_node_leave_bump_eval(kg, sd, stack, node.y);
         }
         break;
@@ -407,12 +425,14 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS,
         svm_node_layer_weight(sd, stack, node);
         break;
       case NODE_CLOSURE_VOLUME:
-        if (KERNEL_NODES_FEATURE(VOLUME)) {
+        IF_KERNEL_NODES_FEATURE(VOLUME)
+        {
           svm_node_closure_volume<type>(kg, sd, stack, node);
         }
         break;
       case NODE_PRINCIPLED_VOLUME:
-        if (KERNEL_NODES_FEATURE(VOLUME)) {
+        IF_KERNEL_NODES_FEATURE(VOLUME)
+        {
           offset = svm_node_principled_volume<type>(kg, sd, stack, node, path_flag, offset);
         }
         break;
@@ -432,7 +452,7 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS,
         svm_node_brightness(sd, stack, node.y, node.z, node.w);
         break;
       case NODE_LIGHT_PATH:
-        svm_node_light_path(INTEGRATOR_STATE_PASS, sd, stack, node.y, node.z, path_flag);
+        svm_node_light_path<node_feature_mask>(kg, state, sd, stack, node.y, node.z, path_flag);
         break;
       case NODE_OBJECT_INFO:
         svm_node_object_info(kg, sd, stack, node.y, node.z);
@@ -442,7 +462,8 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS,
         break;
 #if defined(__HAIR__)
       case NODE_HAIR_INFO:
-        if (KERNEL_NODES_FEATURE(HAIR)) {
+        IF_KERNEL_NODES_FEATURE(HAIR)
+        {
           svm_node_hair_info(kg, sd, stack, node.y, node.z);
         }
         break;
@@ -554,15 +575,16 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS,
         break;
 #ifdef __SHADER_RAYTRACE__
       case NODE_BEVEL:
-        svm_node_bevel<node_feature_mask>(INTEGRATOR_STATE_PASS, sd, stack, node);
+        svm_node_bevel<node_feature_mask>(kg, state, sd, stack, node);
         break;
       case NODE_AMBIENT_OCCLUSION:
-        svm_node_ao<node_feature_mask>(INTEGRATOR_STATE_PASS, sd, stack, node);
+        svm_node_ao<node_feature_mask>(kg, state, sd, stack, node);
         break;
 #endif
 
       case NODE_TEX_VOXEL:
-        if (KERNEL_NODES_FEATURE(VOLUME)) {
+        IF_KERNEL_NODES_FEATURE(VOLUME)
+        {
           offset = svm_node_tex_voxel(kg, sd, stack, node, offset);
         }
         break;
@@ -572,10 +594,10 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS,
         }
         break;
       case NODE_AOV_COLOR:
-        svm_node_aov_color(INTEGRATOR_STATE_PASS, sd, stack, node, render_buffer);
+        svm_node_aov_color<node_feature_mask>(kg, state, sd, stack, node, render_buffer);
         break;
       case NODE_AOV_VALUE:
-        svm_node_aov_value(INTEGRATOR_STATE_PASS, sd, stack, node, render_buffer);
+        svm_node_aov_value<node_feature_mask>(kg, state, sd, stack, node, render_buffer);
         break;
       default:
         kernel_assert(!"Unknown node type was passed to the SVM machine");
diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h
index 092f3817fd8..18d60c43b12 100644
--- a/intern/cycles/kernel/svm/svm_ao.h
+++ b/intern/cycles/kernel/svm/svm_ao.h
@@ -21,9 +21,11 @@ CCL_NAMESPACE_BEGIN
 #ifdef __SHADER_RAYTRACE__
 
 #  ifdef __KERNEL_OPTIX__
-extern "C" __device__ float __direct_callable__svm_node_ao(INTEGRATOR_STATE_CONST_ARGS,
+extern "C" __device__ float __direct_callable__svm_node_ao(KernelGlobals kg,
+                                                           ConstIntegratorState state,
 #  else
-ccl_device float svm_ao(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device float svm_ao(KernelGlobals kg,
+                        ConstIntegratorState state,
 #  endif
                                                            ccl_private ShaderData *sd,
                                                            float3 N,
@@ -54,7 +56,7 @@ ccl_device float svm_ao(INTEGRATOR_STATE_CONST_ARGS,
 
   /* TODO: support ray-tracing in shadow shader evaluation? */
   RNGState rng_state;
-  path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+  path_state_rng_load(state, &rng_state);
 
   int unoccluded = 0;
   for (int sample = 0; sample < num_samples; sample++) {
@@ -96,7 +98,8 @@ ccl_device_inline
 ccl_device_noinline
 #  endif
     void
-    svm_node_ao(INTEGRATOR_STATE_CONST_ARGS,
+    svm_node_ao(KernelGlobals kg,
+                ConstIntegratorState state,
                 ccl_private ShaderData *sd,
                 ccl_private float *stack,
                 uint4 node)
@@ -112,11 +115,12 @@ ccl_device_noinline
 
   float ao = 1.0f;
 
-  if (KERNEL_NODES_FEATURE(RAYTRACE)) {
+  IF_KERNEL_NODES_FEATURE(RAYTRACE)
+  {
 #  ifdef __KERNEL_OPTIX__
-    ao = optixDirectCall<float>(0, INTEGRATOR_STATE_PASS, sd, normal, dist, samples, flags);
+    ao = optixDirectCall<float>(0, kg, state, sd, normal, dist, samples, flags);
 #  else
-    ao = svm_ao(INTEGRATOR_STATE_PASS, sd, normal, dist, samples, flags);
+    ao = svm_ao(kg, state, sd, normal, dist, samples, flags);
 #  endif
   }
 
diff --git a/intern/cycles/kernel/svm/svm_aov.h b/intern/cycles/kernel/svm/svm_aov.h
index 640bec87ac9..d09eaa61cc0 100644
--- a/intern/cycles/kernel/svm/svm_aov.h
+++ b/intern/cycles/kernel/svm/svm_aov.h
@@ -25,7 +25,9 @@ ccl_device_inline bool svm_node_aov_check(const int path_flag, ccl_global float
   return ((render_buffer != NULL) && is_primary);
 }
 
-ccl_device void svm_node_aov_color(INTEGRATOR_STATE_CONST_ARGS,
+template<uint node_feature_mask>
+ccl_device void svm_node_aov_color(KernelGlobals kg,
+                                   ConstIntegratorState state,
                                    ccl_private ShaderData *sd,
                                    ccl_private float *stack,
                                    uint4 node,
@@ -33,8 +35,9 @@ ccl_device void svm_node_aov_color(INTEGRATOR_STATE_CONST_ARGS,
 {
   float3 val = stack_load_float3(stack, node.y);
 
-  if (render_buffer && !INTEGRATOR_STATE_IS_NULL) {
-    const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index);
+  IF_KERNEL_NODES_FEATURE(AOV)
+  {
+    const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
     const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
                                           kernel_data.film.pass_stride;
     ccl_global float *buffer = render_buffer + render_buffer_offset +
@@ -43,7 +46,9 @@ ccl_device void svm_node_aov_color(INTEGRATOR_STATE_CONST_ARGS,
   }
 }
 
-ccl_device void svm_node_aov_value(INTEGRATOR_STATE_CONST_ARGS,
+template<uint node_feature_mask>
+ccl_device void svm_node_aov_value(KernelGlobals kg,
+                                   ConstIntegratorState state,
                                    ccl_private ShaderData *sd,
                                    ccl_private float *stack,
                                    uint4 node,
@@ -51,8 +56,9 @@ ccl_device void svm_node_aov_value(INTEGRATOR_STATE_CONST_ARGS,
 {
   float val = stack_load_float(stack, node.y);
 
-  if (render_buffer && !INTEGRATOR_STATE_IS_NULL) {
-    const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index);
+  IF_KERNEL_NODES_FEATURE(AOV)
+  {
+    const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
     const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
                                           kernel_data.film.pass_stride;
     ccl_global float *buffer = render_buffer + render_buffer_offset +
diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h
index 9fd401ba1c3..b3c66d29f5c 100644
--- a/intern/cycles/kernel/svm/svm_attribute.h
+++ b/intern/cycles/kernel/svm/svm_attribute.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Attribute Node */
 
-ccl_device AttributeDescriptor svm_node_attr_init(ccl_global const KernelGlobals *kg,
+ccl_device AttributeDescriptor svm_node_attr_init(KernelGlobals kg,
                                                   ccl_private ShaderData *sd,
                                                   uint4 node,
                                                   ccl_private NodeAttributeOutputType *type,
@@ -48,7 +48,7 @@ ccl_device AttributeDescriptor svm_node_attr_init(ccl_global const KernelGlobals
 }
 
 template<uint node_feature_mask>
-ccl_device_noinline void svm_node_attr(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_attr(KernelGlobals kg,
                                        ccl_private ShaderData *sd,
                                        ccl_private float *stack,
                                        uint4 node)
@@ -58,7 +58,8 @@ ccl_device_noinline void svm_node_attr(ccl_global const KernelGlobals *kg,
   AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
 
 #ifdef __VOLUME__
-  if (KERNEL_NODES_FEATURE(VOLUME)) {
+  IF_KERNEL_NODES_FEATURE(VOLUME)
+  {
     /* Volumes
      * NOTE: moving this into its own node type might help improve performance. */
     if (primitive_is_volume_attribute(sd, desc)) {
@@ -148,7 +149,7 @@ ccl_device_noinline void svm_node_attr(ccl_global const KernelGlobals *kg,
   }
 }
 
-ccl_device_noinline void svm_node_attr_bump_dx(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_attr_bump_dx(KernelGlobals kg,
                                                ccl_private ShaderData *sd,
                                                ccl_private float *stack,
                                                uint4 node)
@@ -244,7 +245,7 @@ ccl_device_noinline void svm_node_attr_bump_dx(ccl_global const KernelGlobals *k
   }
 }
 
-ccl_device_noinline void svm_node_attr_bump_dy(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_attr_bump_dy(KernelGlobals kg,
                                                ccl_private ShaderData *sd,
                                                ccl_private float *stack,
                                                uint4 node)
diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h
index a76584e6bc8..197562434f9 100644
--- a/intern/cycles/kernel/svm/svm_bevel.h
+++ b/intern/cycles/kernel/svm/svm_bevel.h
@@ -99,9 +99,11 @@ ccl_device void svm_bevel_cubic_sample(const float radius,
  */
 
 #  ifdef __KERNEL_OPTIX__
-extern "C" __device__ float3 __direct_callable__svm_node_bevel(INTEGRATOR_STATE_CONST_ARGS,
+extern "C" __device__ float3 __direct_callable__svm_node_bevel(KernelGlobals kg,
+                                                               ConstIntegratorState state,
 #  else
-ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device float3 svm_bevel(KernelGlobals kg,
+                            ConstIntegratorState state,
 #  endif
                                                                ccl_private ShaderData *sd,
                                                                float radius,
@@ -118,15 +120,15 @@ ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS,
   }
 
   /* Don't bevel for blurry indirect rays. */
-  if (INTEGRATOR_STATE(path, min_ray_pdf) < 8.0f) {
+  if (INTEGRATOR_STATE(state, path, min_ray_pdf) < 8.0f) {
     return sd->N;
   }
 
   /* Setup for multi intersection. */
   LocalIntersection isect;
-  uint lcg_state = lcg_state_init(INTEGRATOR_STATE(path, rng_hash),
-                                  INTEGRATOR_STATE(path, rng_offset),
-                                  INTEGRATOR_STATE(path, sample),
+  uint lcg_state = lcg_state_init(INTEGRATOR_STATE(state, path, rng_hash),
+                                  INTEGRATOR_STATE(state, path, rng_offset),
+                                  INTEGRATOR_STATE(state, path, sample),
                                   0x64c6a40e);
 
   /* Sample normals from surrounding points on surface. */
@@ -134,7 +136,7 @@ ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS,
 
   /* TODO: support ray-tracing in shadow shader evaluation? */
   RNGState rng_state;
-  path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+  path_state_rng_load(state, &rng_state);
 
   for (int sample = 0; sample < num_samples; sample++) {
     float disk_u, disk_v;
@@ -287,7 +289,8 @@ ccl_device_inline
 ccl_device_noinline
 #  endif
     void
-    svm_node_bevel(INTEGRATOR_STATE_CONST_ARGS,
+    svm_node_bevel(KernelGlobals kg,
+                   ConstIntegratorState state,
                    ccl_private ShaderData *sd,
                    ccl_private float *stack,
                    uint4 node)
@@ -299,11 +302,12 @@ ccl_device_noinline
 
   float3 bevel_N = sd->N;
 
-  if (KERNEL_NODES_FEATURE(RAYTRACE)) {
+  IF_KERNEL_NODES_FEATURE(RAYTRACE)
+  {
 #  ifdef __KERNEL_OPTIX__
-    bevel_N = optixDirectCall<float3>(1, INTEGRATOR_STATE_PASS, sd, radius, num_samples);
+    bevel_N = optixDirectCall<float3>(1, kg, state, sd, radius, num_samples);
 #  else
-    bevel_N = svm_bevel(INTEGRATOR_STATE_PASS, sd, radius, num_samples);
+    bevel_N = svm_bevel(kg, state, sd, radius, num_samples);
 #  endif
 
     if (stack_valid(normal_offset)) {
diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h
index 521afb42adc..f1adb0e76af 100644
--- a/intern/cycles/kernel/svm/svm_blackbody.h
+++ b/intern/cycles/kernel/svm/svm_blackbody.h
@@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Blackbody Node */
 
-ccl_device_noinline void svm_node_blackbody(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_blackbody(KernelGlobals kg,
                                             ccl_private ShaderData *sd,
                                             ccl_private float *stack,
                                             uint temperature_offset,
diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h
index 29a8350f1c1..9dc31ef37ec 100644
--- a/intern/cycles/kernel/svm/svm_brick.h
+++ b/intern/cycles/kernel/svm/svm_brick.h
@@ -72,11 +72,8 @@ ccl_device_noinline_cpu float2 svm_brick(float3 p,
   return make_float2(tint, mortar);
 }
 
-ccl_device_noinline int svm_node_tex_brick(ccl_global const KernelGlobals *kg,
-                                           ccl_private ShaderData *sd,
-                                           ccl_private float *stack,
-                                           uint4 node,
-                                           int offset)
+ccl_device_noinline int svm_node_tex_brick(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
 {
   uint4 node2 = read_node(kg, &offset);
   uint4 node3 = read_node(kg, &offset);
diff --git a/intern/cycles/kernel/svm/svm_bump.h b/intern/cycles/kernel/svm/svm_bump.h
index 70935c730f4..66e5b665532 100644
--- a/intern/cycles/kernel/svm/svm_bump.h
+++ b/intern/cycles/kernel/svm/svm_bump.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Bump Eval Nodes */
 
-ccl_device_noinline void svm_node_enter_bump_eval(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_enter_bump_eval(KernelGlobals kg,
                                                   ccl_private ShaderData *sd,
                                                   ccl_private float *stack,
                                                   uint offset)
@@ -45,7 +45,7 @@ ccl_device_noinline void svm_node_enter_bump_eval(ccl_global const KernelGlobals
   }
 }
 
-ccl_device_noinline void svm_node_leave_bump_eval(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_leave_bump_eval(KernelGlobals kg,
                                                   ccl_private ShaderData *sd,
                                                   ccl_private float *stack,
                                                   uint offset)
diff --git a/intern/cycles/kernel/svm/svm_camera.h b/intern/cycles/kernel/svm/svm_camera.h
index 2b786757af8..787f11f38b5 100644
--- a/intern/cycles/kernel/svm/svm_camera.h
+++ b/intern/cycles/kernel/svm/svm_camera.h
@@ -16,7 +16,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_camera(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_camera(KernelGlobals kg,
                                          ccl_private ShaderData *sd,
                                          ccl_private float *stack,
                                          uint out_vector,
diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h
index e22367f4f59..9251d90c0e1 100644
--- a/intern/cycles/kernel/svm/svm_checker.h
+++ b/intern/cycles/kernel/svm/svm_checker.h
@@ -32,7 +32,7 @@ ccl_device float svm_checker(float3 p)
   return ((xi % 2 == yi % 2) == (zi % 2)) ? 1.0f : 0.0f;
 }
 
-ccl_device_noinline void svm_node_tex_checker(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_tex_checker(KernelGlobals kg,
                                               ccl_private ShaderData *sd,
                                               ccl_private float *stack,
                                               uint4 node)
diff --git a/intern/cycles/kernel/svm/svm_clamp.h b/intern/cycles/kernel/svm/svm_clamp.h
index cb5224aebb2..5b5ea784f4a 100644
--- a/intern/cycles/kernel/svm/svm_clamp.h
+++ b/intern/cycles/kernel/svm/svm_clamp.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Clamp Node */
 
-ccl_device_noinline int svm_node_clamp(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_clamp(KernelGlobals kg,
                                        ccl_private ShaderData *sd,
                                        ccl_private float *stack,
                                        uint value_stack_offset,
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index 87be73bb2cc..fb10288da72 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -61,8 +61,21 @@ ccl_device void svm_node_glass_setup(ccl_private ShaderData *sd,
   }
 }
 
+ccl_device_inline int svm_node_closure_bsdf_skip(KernelGlobals kg, int offset, uint type)
+{
+  if (type == CLOSURE_BSDF_PRINCIPLED_ID) {
+    /* Read all principled BSDF extra data to get the right offset. */
+    read_node(kg, &offset);
+    read_node(kg, &offset);
+    read_node(kg, &offset);
+    read_node(kg, &offset);
+  }
+
+  return offset;
+}
+
 template<uint node_feature_mask, ShaderType shader_type>
-ccl_device_noinline int svm_node_closure_bsdf(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
                                               ccl_private ShaderData *sd,
                                               ccl_private float *stack,
                                               uint4 node,
@@ -80,16 +93,15 @@ ccl_device_noinline int svm_node_closure_bsdf(ccl_global const KernelGlobals *kg
   uint4 data_node = read_node(kg, &offset);
 
   /* Only compute BSDF for surfaces, transparent variable is shared with volume extinction. */
-  if ((!KERNEL_NODES_FEATURE(BSDF) || shader_type != SHADER_TYPE_SURFACE) || mix_weight == 0.0f) {
-    if (type == CLOSURE_BSDF_PRINCIPLED_ID) {
-      /* Read all principled BSDF extra data to get the right offset. */
-      read_node(kg, &offset);
-      read_node(kg, &offset);
-      read_node(kg, &offset);
-      read_node(kg, &offset);
+  IF_KERNEL_NODES_FEATURE(BSDF)
+  {
+    if ((shader_type != SHADER_TYPE_SURFACE) || mix_weight == 0.0f) {
+      return svm_node_closure_bsdf_skip(kg, offset, type);
     }
-
-    return offset;
+  }
+  else
+  {
+    return svm_node_closure_bsdf_skip(kg, offset, type);
   }
 
   float3 N = stack_valid(data_node.x) ? stack_load_float3(stack, data_node.x) : sd->N;
@@ -944,7 +956,7 @@ ccl_device_noinline int svm_node_closure_bsdf(ccl_global const KernelGlobals *kg
 }
 
 template<ShaderType shader_type>
-ccl_device_noinline void svm_node_closure_volume(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_closure_volume(KernelGlobals kg,
                                                  ccl_private ShaderData *sd,
                                                  ccl_private float *stack,
                                                  uint4 node)
@@ -999,7 +1011,7 @@ ccl_device_noinline void svm_node_closure_volume(ccl_global const KernelGlobals
 }
 
 template<ShaderType shader_type>
-ccl_device_noinline int svm_node_principled_volume(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_principled_volume(KernelGlobals kg,
                                                    ccl_private ShaderData *sd,
                                                    ccl_private float *stack,
                                                    uint4 node,
@@ -1194,7 +1206,7 @@ ccl_device void svm_node_closure_weight(ccl_private ShaderData *sd,
   svm_node_closure_store_weight(sd, weight);
 }
 
-ccl_device_noinline void svm_node_emission_weight(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_emission_weight(KernelGlobals kg,
                                                   ccl_private ShaderData *sd,
                                                   ccl_private float *stack,
                                                   uint4 node)
@@ -1232,7 +1244,7 @@ ccl_device_noinline void svm_node_mix_closure(ccl_private ShaderData *sd,
 
 /* (Bump) normal */
 
-ccl_device void svm_node_set_normal(ccl_global const KernelGlobals *kg,
+ccl_device void svm_node_set_normal(KernelGlobals kg,
                                     ccl_private ShaderData *sd,
                                     ccl_private float *stack,
                                     uint in_direction,
diff --git a/intern/cycles/kernel/svm/svm_convert.h b/intern/cycles/kernel/svm/svm_convert.h
index 0d53779a5c8..ec5745dc78a 100644
--- a/intern/cycles/kernel/svm/svm_convert.h
+++ b/intern/cycles/kernel/svm/svm_convert.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Conversion Nodes */
 
-ccl_device_noinline void svm_node_convert(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_convert(KernelGlobals kg,
                                           ccl_private ShaderData *sd,
                                           ccl_private float *stack,
                                           uint type,
diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h
index 7a3c8a6d36d..f2446c3b3ef 100644
--- a/intern/cycles/kernel/svm/svm_displace.h
+++ b/intern/cycles/kernel/svm/svm_displace.h
@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Bump Node */
 
-ccl_device_noinline void svm_node_set_bump(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_set_bump(KernelGlobals kg,
                                            ccl_private ShaderData *sd,
                                            ccl_private float *stack,
                                            uint4 node)
@@ -88,7 +88,7 @@ ccl_device_noinline void svm_node_set_bump(ccl_global const KernelGlobals *kg,
 
 /* Displacement Node */
 
-ccl_device void svm_node_set_displacement(ccl_global const KernelGlobals *kg,
+ccl_device void svm_node_set_displacement(KernelGlobals kg,
                                           ccl_private ShaderData *sd,
                                           ccl_private float *stack,
                                           uint fac_offset)
@@ -97,7 +97,7 @@ ccl_device void svm_node_set_displacement(ccl_global const KernelGlobals *kg,
   sd->P += dP;
 }
 
-ccl_device_noinline void svm_node_displacement(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_displacement(KernelGlobals kg,
                                                ccl_private ShaderData *sd,
                                                ccl_private float *stack,
                                                uint4 node)
@@ -127,11 +127,8 @@ ccl_device_noinline void svm_node_displacement(ccl_global const KernelGlobals *k
   stack_store_float3(stack, node.z, dP);
 }
 
-ccl_device_noinline int svm_node_vector_displacement(ccl_global const KernelGlobals *kg,
-                                                     ccl_private ShaderData *sd,
-                                                     ccl_private float *stack,
-                                                     uint4 node,
-                                                     int offset)
+ccl_device_noinline int svm_node_vector_displacement(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
 {
   uint4 data_node = read_node(kg, &offset);
   uint space = data_node.x;
diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h
index a94464d3a52..b29bfdbed07 100644
--- a/intern/cycles/kernel/svm/svm_geometry.h
+++ b/intern/cycles/kernel/svm/svm_geometry.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Geometry Node */
 
-ccl_device_noinline void svm_node_geometry(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_geometry(KernelGlobals kg,
                                            ccl_private ShaderData *sd,
                                            ccl_private float *stack,
                                            uint type,
@@ -54,7 +54,7 @@ ccl_device_noinline void svm_node_geometry(ccl_global const KernelGlobals *kg,
   stack_store_float3(stack, out_offset, data);
 }
 
-ccl_device_noinline void svm_node_geometry_bump_dx(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_geometry_bump_dx(KernelGlobals kg,
                                                    ccl_private ShaderData *sd,
                                                    ccl_private float *stack,
                                                    uint type,
@@ -81,7 +81,7 @@ ccl_device_noinline void svm_node_geometry_bump_dx(ccl_global const KernelGlobal
 #endif
 }
 
-ccl_device_noinline void svm_node_geometry_bump_dy(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_geometry_bump_dy(KernelGlobals kg,
                                                    ccl_private ShaderData *sd,
                                                    ccl_private float *stack,
                                                    uint type,
@@ -110,7 +110,7 @@ ccl_device_noinline void svm_node_geometry_bump_dy(ccl_global const KernelGlobal
 
 /* Object Info */
 
-ccl_device_noinline void svm_node_object_info(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_object_info(KernelGlobals kg,
                                               ccl_private ShaderData *sd,
                                               ccl_private float *stack,
                                               uint type,
@@ -152,7 +152,7 @@ ccl_device_noinline void svm_node_object_info(ccl_global const KernelGlobals *kg
 
 /* Particle Info */
 
-ccl_device_noinline void svm_node_particle_info(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_particle_info(KernelGlobals kg,
                                                 ccl_private ShaderData *sd,
                                                 ccl_private float *stack,
                                                 uint type,
@@ -214,7 +214,7 @@ ccl_device_noinline void svm_node_particle_info(ccl_global const KernelGlobals *
 
 /* Hair Info */
 
-ccl_device_noinline void svm_node_hair_info(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_hair_info(KernelGlobals kg,
                                             ccl_private ShaderData *sd,
                                             ccl_private float *stack,
                                             uint type,
diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h
index feb85eda122..978c4c2d781 100644
--- a/intern/cycles/kernel/svm/svm_hsv.h
+++ b/intern/cycles/kernel/svm/svm_hsv.h
@@ -19,7 +19,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_hsv(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_hsv(KernelGlobals kg,
                                       ccl_private ShaderData *sd,
                                       ccl_private float *stack,
                                       uint4 node)
diff --git a/intern/cycles/kernel/svm/svm_ies.h b/intern/cycles/kernel/svm/svm_ies.h
index 7d41205c9ef..0215670d062 100644
--- a/intern/cycles/kernel/svm/svm_ies.h
+++ b/intern/cycles/kernel/svm/svm_ies.h
@@ -19,7 +19,7 @@ CCL_NAMESPACE_BEGIN
 /* IES Light */
 
 ccl_device_inline float interpolate_ies_vertical(
-    ccl_global const KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h)
+    KernelGlobals kg, int ofs, int v, int v_num, float v_frac, int h)
 {
   /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end
    * of v (corresponding to the north pole) would result in artifacts. The proper way of dealing
@@ -39,10 +39,7 @@ ccl_device_inline float interpolate_ies_vertical(
   return cubic_interp(a, b, c, d, v_frac);
 }
 
-ccl_device_inline float kernel_ies_interp(ccl_global const KernelGlobals *kg,
-                                          int slot,
-                                          float h_angle,
-                                          float v_angle)
+ccl_device_inline float kernel_ies_interp(KernelGlobals kg, int slot, float h_angle, float v_angle)
 {
   /* Find offset of the IES data in the table. */
   int ofs = __float_as_int(kernel_tex_fetch(__ies, slot));
@@ -98,7 +95,7 @@ ccl_device_inline float kernel_ies_interp(ccl_global const KernelGlobals *kg,
   return max(cubic_interp(a, b, c, d, h_frac), 0.0f);
 }
 
-ccl_device_noinline void svm_node_ies(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_ies(KernelGlobals kg,
                                       ccl_private ShaderData *sd,
                                       ccl_private float *stack,
                                       uint4 node)
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 2de80d5fc29..68374fcfb0d 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -16,8 +16,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device float4
-svm_image_texture(ccl_global const KernelGlobals *kg, int id, float x, float y, uint flags)
+ccl_device float4 svm_image_texture(KernelGlobals kg, int id, float x, float y, uint flags)
 {
   if (id == -1) {
     return make_float4(
@@ -45,11 +44,8 @@ ccl_device_inline float3 texco_remap_square(float3 co)
   return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f;
 }
 
-ccl_device_noinline int svm_node_tex_image(ccl_global const KernelGlobals *kg,
-                                           ccl_private ShaderData *sd,
-                                           ccl_private float *stack,
-                                           uint4 node,
-                                           int offset)
+ccl_device_noinline int svm_node_tex_image(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
 {
   uint co_offset, out_offset, alpha_offset, flags;
 
@@ -121,7 +117,7 @@ ccl_device_noinline int svm_node_tex_image(ccl_global const KernelGlobals *kg,
   return offset;
 }
 
-ccl_device_noinline void svm_node_tex_image_box(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_tex_image_box(KernelGlobals kg,
                                                 ccl_private ShaderData *sd,
                                                 ccl_private float *stack,
                                                 uint4 node)
@@ -223,7 +219,7 @@ ccl_device_noinline void svm_node_tex_image_box(ccl_global const KernelGlobals *
     stack_store_float(stack, alpha_offset, f.w);
 }
 
-ccl_device_noinline void svm_node_tex_environment(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_tex_environment(KernelGlobals kg,
                                                   ccl_private ShaderData *sd,
                                                   ccl_private float *stack,
                                                   uint4 node)
diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h
index aaff8376c7c..955a1f23379 100644
--- a/intern/cycles/kernel/svm/svm_light_path.h
+++ b/intern/cycles/kernel/svm/svm_light_path.h
@@ -18,7 +18,9 @@ CCL_NAMESPACE_BEGIN
 
 /* Light Path Node */
 
-ccl_device_noinline void svm_node_light_path(INTEGRATOR_STATE_CONST_ARGS,
+template<uint node_feature_mask>
+ccl_device_noinline void svm_node_light_path(KernelGlobals kg,
+                                             ConstIntegratorState state,
                                              ccl_private const ShaderData *sd,
                                              ccl_private float *stack,
                                              uint type,
@@ -62,9 +64,12 @@ ccl_device_noinline void svm_node_light_path(INTEGRATOR_STATE_CONST_ARGS,
       /* Read bounce from difference location depending if this is a shadow
        * path. It's a bit dubious to have integrate state details leak into
        * this function but hard to avoid currently. */
-      int bounce = (INTEGRATOR_STATE_IS_NULL)    ? 0 :
-                   (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(shadow_path, bounce) :
-                                                   INTEGRATOR_STATE(path, bounce);
+      int bounce = 0;
+      IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
+      {
+        bounce = (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, bounce) :
+                                                 INTEGRATOR_STATE(state, path, bounce);
+      }
 
       /* For background, light emission and shadow evaluation we from a
        * surface or volume we are effective one bounce further. */
@@ -77,11 +82,13 @@ ccl_device_noinline void svm_node_light_path(INTEGRATOR_STATE_CONST_ARGS,
     }
       /* TODO */
     case NODE_LP_ray_transparent: {
-      const int bounce = (INTEGRATOR_STATE_IS_NULL) ?
-                             0 :
-                         (path_flag & PATH_RAY_SHADOW) ?
-                             INTEGRATOR_STATE(shadow_path, transparent_bounce) :
-                             INTEGRATOR_STATE(path, transparent_bounce);
+      int bounce = 0;
+      IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
+      {
+        bounce = (path_flag & PATH_RAY_SHADOW) ?
+                     INTEGRATOR_STATE(state, shadow_path, transparent_bounce) :
+                     INTEGRATOR_STATE(state, path, transparent_bounce);
+      }
 
       info = (float)bounce;
       break;
diff --git a/intern/cycles/kernel/svm/svm_magic.h b/intern/cycles/kernel/svm/svm_magic.h
index 4c4f3bcf523..d3a429fec56 100644
--- a/intern/cycles/kernel/svm/svm_magic.h
+++ b/intern/cycles/kernel/svm/svm_magic.h
@@ -87,11 +87,8 @@ ccl_device_noinline_cpu float3 svm_magic(float3 p, int n, float distortion)
   return make_float3(0.5f - x, 0.5f - y, 0.5f - z);
 }
 
-ccl_device_noinline int svm_node_tex_magic(ccl_global const KernelGlobals *kg,
-                                           ccl_private ShaderData *sd,
-                                           ccl_private float *stack,
-                                           uint4 node,
-                                           int offset)
+ccl_device_noinline int svm_node_tex_magic(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
 {
   uint depth;
   uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset;
diff --git a/intern/cycles/kernel/svm/svm_map_range.h b/intern/cycles/kernel/svm/svm_map_range.h
index f4f7d3ca76f..5e89947c6c7 100644
--- a/intern/cycles/kernel/svm/svm_map_range.h
+++ b/intern/cycles/kernel/svm/svm_map_range.h
@@ -24,7 +24,7 @@ ccl_device_inline float smootherstep(float edge0, float edge1, float x)
   return x * x * x * (x * (x * 6.0f - 15.0f) + 10.0f);
 }
 
-ccl_device_noinline int svm_node_map_range(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_map_range(KernelGlobals kg,
                                            ccl_private ShaderData *sd,
                                            ccl_private float *stack,
                                            uint value_stack_offset,
diff --git a/intern/cycles/kernel/svm/svm_mapping.h b/intern/cycles/kernel/svm/svm_mapping.h
index 8102afc637e..ed420e5bc3d 100644
--- a/intern/cycles/kernel/svm/svm_mapping.h
+++ b/intern/cycles/kernel/svm/svm_mapping.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Mapping Node */
 
-ccl_device_noinline void svm_node_mapping(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_mapping(KernelGlobals kg,
                                           ccl_private ShaderData *sd,
                                           ccl_private float *stack,
                                           uint type,
@@ -43,7 +43,7 @@ ccl_device_noinline void svm_node_mapping(ccl_global const KernelGlobals *kg,
 
 /* Texture Mapping */
 
-ccl_device_noinline int svm_node_texture_mapping(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_texture_mapping(KernelGlobals kg,
                                                  ccl_private ShaderData *sd,
                                                  ccl_private float *stack,
                                                  uint vec_offset,
@@ -62,7 +62,7 @@ ccl_device_noinline int svm_node_texture_mapping(ccl_global const KernelGlobals
   return offset;
 }
 
-ccl_device_noinline int svm_node_min_max(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_min_max(KernelGlobals kg,
                                          ccl_private ShaderData *sd,
                                          ccl_private float *stack,
                                          uint vec_offset,
diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h
index 3897a453873..97f7d486c09 100644
--- a/intern/cycles/kernel/svm/svm_math.h
+++ b/intern/cycles/kernel/svm/svm_math.h
@@ -16,7 +16,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_math(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_math(KernelGlobals kg,
                                        ccl_private ShaderData *sd,
                                        ccl_private float *stack,
                                        uint type,
@@ -34,7 +34,7 @@ ccl_device_noinline void svm_node_math(ccl_global const KernelGlobals *kg,
   stack_store_float(stack, result_stack_offset, result);
 }
 
-ccl_device_noinline int svm_node_vector_math(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_vector_math(KernelGlobals kg,
                                              ccl_private ShaderData *sd,
                                              ccl_private float *stack,
                                              uint type,
diff --git a/intern/cycles/kernel/svm/svm_mix.h b/intern/cycles/kernel/svm/svm_mix.h
index 0064c5e643c..568dda3dddc 100644
--- a/intern/cycles/kernel/svm/svm_mix.h
+++ b/intern/cycles/kernel/svm/svm_mix.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Node */
 
-ccl_device_noinline int svm_node_mix(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_mix(KernelGlobals kg,
                                      ccl_private ShaderData *sd,
                                      ccl_private float *stack,
                                      uint fac_offset,
diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h
index 8523f45b95f..decd29bbe13 100644
--- a/intern/cycles/kernel/svm/svm_musgrave.h
+++ b/intern/cycles/kernel/svm/svm_musgrave.h
@@ -700,7 +700,7 @@ ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_4d(
   return value;
 }
 
-ccl_device_noinline int svm_node_tex_musgrave(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_tex_musgrave(KernelGlobals kg,
                                               ccl_private ShaderData *sd,
                                               ccl_private float *stack,
                                               uint offsets1,
diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h
index 61da8227efa..3fe33f72b59 100644
--- a/intern/cycles/kernel/svm/svm_noisetex.h
+++ b/intern/cycles/kernel/svm/svm_noisetex.h
@@ -140,7 +140,7 @@ ccl_device void noise_texture_4d(float4 co,
   }
 }
 
-ccl_device_noinline int svm_node_tex_noise(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_tex_noise(KernelGlobals kg,
                                            ccl_private ShaderData *sd,
                                            ccl_private float *stack,
                                            uint dimensions,
diff --git a/intern/cycles/kernel/svm/svm_normal.h b/intern/cycles/kernel/svm/svm_normal.h
index 0d1b4200d54..9bf64ed8823 100644
--- a/intern/cycles/kernel/svm/svm_normal.h
+++ b/intern/cycles/kernel/svm/svm_normal.h
@@ -16,7 +16,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline int svm_node_normal(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_normal(KernelGlobals kg,
                                         ccl_private ShaderData *sd,
                                         ccl_private float *stack,
                                         uint in_normal_offset,
diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h
index ef8b0d103c1..d2dddf4c6eb 100644
--- a/intern/cycles/kernel/svm/svm_ramp.h
+++ b/intern/cycles/kernel/svm/svm_ramp.h
@@ -21,18 +21,14 @@ CCL_NAMESPACE_BEGIN
 
 /* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
 
-ccl_device_inline float fetch_float(ccl_global const KernelGlobals *kg, int offset)
+ccl_device_inline float fetch_float(KernelGlobals kg, int offset)
 {
   uint4 node = kernel_tex_fetch(__svm_nodes, offset);
   return __uint_as_float(node.x);
 }
 
-ccl_device_inline float float_ramp_lookup(ccl_global const KernelGlobals *kg,
-                                          int offset,
-                                          float f,
-                                          bool interpolate,
-                                          bool extrapolate,
-                                          int table_size)
+ccl_device_inline float float_ramp_lookup(
+    KernelGlobals kg, int offset, float f, bool interpolate, bool extrapolate, int table_size)
 {
   if ((f < 0.0f || f > 1.0f) && extrapolate) {
     float t0, dy;
@@ -63,12 +59,8 @@ ccl_device_inline float float_ramp_lookup(ccl_global const KernelGlobals *kg,
   return a;
 }
 
-ccl_device_inline float4 rgb_ramp_lookup(ccl_global const KernelGlobals *kg,
-                                         int offset,
-                                         float f,
-                                         bool interpolate,
-                                         bool extrapolate,
-                                         int table_size)
+ccl_device_inline float4 rgb_ramp_lookup(
+    KernelGlobals kg, int offset, float f, bool interpolate, bool extrapolate, int table_size)
 {
   if ((f < 0.0f || f > 1.0f) && extrapolate) {
     float4 t0, dy;
@@ -99,11 +91,8 @@ ccl_device_inline float4 rgb_ramp_lookup(ccl_global const KernelGlobals *kg,
   return a;
 }
 
-ccl_device_noinline int svm_node_rgb_ramp(ccl_global const KernelGlobals *kg,
-                                          ccl_private ShaderData *sd,
-                                          ccl_private float *stack,
-                                          uint4 node,
-                                          int offset)
+ccl_device_noinline int svm_node_rgb_ramp(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
 {
   uint fac_offset, color_offset, alpha_offset;
   uint interpolate = node.z;
@@ -124,11 +113,8 @@ ccl_device_noinline int svm_node_rgb_ramp(ccl_global const KernelGlobals *kg,
   return offset;
 }
 
-ccl_device_noinline int svm_node_curves(ccl_global const KernelGlobals *kg,
-                                        ccl_private ShaderData *sd,
-                                        ccl_private float *stack,
-                                        uint4 node,
-                                        int offset)
+ccl_device_noinline int svm_node_curves(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
 {
   uint fac_offset, color_offset, out_offset;
   svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &out_offset);
@@ -153,11 +139,8 @@ ccl_device_noinline int svm_node_curves(ccl_global const KernelGlobals *kg,
   return offset;
 }
 
-ccl_device_noinline int svm_node_curve(ccl_global const KernelGlobals *kg,
-                                       ccl_private ShaderData *sd,
-                                       ccl_private float *stack,
-                                       uint4 node,
-                                       int offset)
+ccl_device_noinline int svm_node_curve(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
 {
   uint fac_offset, value_in_offset, out_offset;
   svm_unpack_node_uchar3(node.y, &fac_offset, &value_in_offset, &out_offset);
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
index 3cd4ba87a55..bafa0456342 100644
--- a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
+++ b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
@@ -16,7 +16,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline int svm_node_combine_hsv(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_combine_hsv(KernelGlobals kg,
                                              ccl_private ShaderData *sd,
                                              ccl_private float *stack,
                                              uint hue_in,
@@ -39,7 +39,7 @@ ccl_device_noinline int svm_node_combine_hsv(ccl_global const KernelGlobals *kg,
   return offset;
 }
 
-ccl_device_noinline int svm_node_separate_hsv(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_separate_hsv(KernelGlobals kg,
                                               ccl_private ShaderData *sd,
                                               ccl_private float *stack,
                                               uint color_in,
diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h
index 04db8109170..3ab7bc89c66 100644
--- a/intern/cycles/kernel/svm/svm_sky.h
+++ b/intern/cycles/kernel/svm/svm_sky.h
@@ -37,7 +37,7 @@ ccl_device float sky_perez_function(ccl_private float *lam, float theta, float g
          (1.0f + lam[2] * expf(lam[3] * gamma) + lam[4] * cgamma * cgamma);
 }
 
-ccl_device float3 sky_radiance_preetham(ccl_global const KernelGlobals *kg,
+ccl_device float3 sky_radiance_preetham(KernelGlobals kg,
                                         float3 dir,
                                         float sunphi,
                                         float suntheta,
@@ -90,7 +90,7 @@ ccl_device float sky_radiance_internal(ccl_private float *configuration, float t
           configuration[6] * mieM + configuration[7] * zenith);
 }
 
-ccl_device float3 sky_radiance_hosek(ccl_global const KernelGlobals *kg,
+ccl_device float3 sky_radiance_hosek(KernelGlobals kg,
                                      float3 dir,
                                      float sunphi,
                                      float suntheta,
@@ -127,7 +127,7 @@ ccl_device float3 geographical_to_direction(float lat, float lon)
   return make_float3(cos(lat) * cos(lon), cos(lat) * sin(lon), sin(lat));
 }
 
-ccl_device float3 sky_radiance_nishita(ccl_global const KernelGlobals *kg,
+ccl_device float3 sky_radiance_nishita(KernelGlobals kg,
                                        float3 dir,
                                        ccl_private float *nishita_data,
                                        uint texture_id)
@@ -209,11 +209,8 @@ ccl_device float3 sky_radiance_nishita(ccl_global const KernelGlobals *kg,
   return xyz_to_rgb(kg, xyz);
 }
 
-ccl_device_noinline int svm_node_tex_sky(ccl_global const KernelGlobals *kg,
-                                         ccl_private ShaderData *sd,
-                                         ccl_private float *stack,
-                                         uint4 node,
-                                         int offset)
+ccl_device_noinline int svm_node_tex_sky(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
 {
   /* Load data */
   uint dir_offset = node.y;
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
index 295d5e9f65b..657a4bb32a8 100644
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ b/intern/cycles/kernel/svm/svm_tex_coord.h
@@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Texture Coordinate Node */
 
-ccl_device_noinline int svm_node_tex_coord(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_tex_coord(KernelGlobals kg,
                                            ccl_private ShaderData *sd,
                                            int path_flag,
                                            ccl_private float *stack,
@@ -103,7 +103,7 @@ ccl_device_noinline int svm_node_tex_coord(ccl_global const KernelGlobals *kg,
   return offset;
 }
 
-ccl_device_noinline int svm_node_tex_coord_bump_dx(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg,
                                                    ccl_private ShaderData *sd,
                                                    int path_flag,
                                                    ccl_private float *stack,
@@ -188,7 +188,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(ccl_global const KernelGlobal
 #endif
 }
 
-ccl_device_noinline int svm_node_tex_coord_bump_dy(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg,
                                                    ccl_private ShaderData *sd,
                                                    int path_flag,
                                                    ccl_private float *stack,
@@ -273,7 +273,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(ccl_global const KernelGlobal
 #endif
 }
 
-ccl_device_noinline void svm_node_normal_map(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_normal_map(KernelGlobals kg,
                                              ccl_private ShaderData *sd,
                                              ccl_private float *stack,
                                              uint4 node)
@@ -366,7 +366,7 @@ ccl_device_noinline void svm_node_normal_map(ccl_global const KernelGlobals *kg,
   stack_store_float3(stack, normal_offset, N);
 }
 
-ccl_device_noinline void svm_node_tangent(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_tangent(KernelGlobals kg,
                                           ccl_private ShaderData *sd,
                                           ccl_private float *stack,
                                           uint4 node)
diff --git a/intern/cycles/kernel/svm/svm_value.h b/intern/cycles/kernel/svm/svm_value.h
index d1038bc072d..cc72961d0f6 100644
--- a/intern/cycles/kernel/svm/svm_value.h
+++ b/intern/cycles/kernel/svm/svm_value.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Value Nodes */
 
-ccl_device void svm_node_value_f(ccl_global const KernelGlobals *kg,
+ccl_device void svm_node_value_f(KernelGlobals kg,
                                  ccl_private ShaderData *sd,
                                  ccl_private float *stack,
                                  uint ivalue,
@@ -27,7 +27,7 @@ ccl_device void svm_node_value_f(ccl_global const KernelGlobals *kg,
   stack_store_float(stack, out_offset, __uint_as_float(ivalue));
 }
 
-ccl_device int svm_node_value_v(ccl_global const KernelGlobals *kg,
+ccl_device int svm_node_value_v(KernelGlobals kg,
                                 ccl_private ShaderData *sd,
                                 ccl_private float *stack,
                                 uint out_offset,
diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h
index b6c898c3952..4e0d36647da 100644
--- a/intern/cycles/kernel/svm/svm_vector_transform.h
+++ b/intern/cycles/kernel/svm/svm_vector_transform.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Vector Transform */
 
-ccl_device_noinline void svm_node_vector_transform(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_vector_transform(KernelGlobals kg,
                                                    ccl_private ShaderData *sd,
                                                    ccl_private float *stack,
                                                    uint4 node)
diff --git a/intern/cycles/kernel/svm/svm_vertex_color.h b/intern/cycles/kernel/svm/svm_vertex_color.h
index 3641f05ca43..a5fa15ee085 100644
--- a/intern/cycles/kernel/svm/svm_vertex_color.h
+++ b/intern/cycles/kernel/svm/svm_vertex_color.h
@@ -16,7 +16,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_vertex_color(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_vertex_color(KernelGlobals kg,
                                                ccl_private ShaderData *sd,
                                                ccl_private float *stack,
                                                uint layer_id,
@@ -35,7 +35,7 @@ ccl_device_noinline void svm_node_vertex_color(ccl_global const KernelGlobals *k
   }
 }
 
-ccl_device_noinline void svm_node_vertex_color_bump_dx(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_vertex_color_bump_dx(KernelGlobals kg,
                                                        ccl_private ShaderData *sd,
                                                        ccl_private float *stack,
                                                        uint layer_id,
@@ -56,7 +56,7 @@ ccl_device_noinline void svm_node_vertex_color_bump_dx(ccl_global const KernelGl
   }
 }
 
-ccl_device_noinline void svm_node_vertex_color_bump_dy(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_vertex_color_bump_dy(KernelGlobals kg,
                                                        ccl_private ShaderData *sd,
                                                        ccl_private float *stack,
                                                        uint layer_id,
diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h
index 062a8bde415..b8067520770 100644
--- a/intern/cycles/kernel/svm/svm_voronoi.h
+++ b/intern/cycles/kernel/svm/svm_voronoi.h
@@ -917,7 +917,7 @@ ccl_device void voronoi_n_sphere_radius_4d(float4 coord,
 }
 
 template<uint node_feature_mask>
-ccl_device_noinline int svm_node_tex_voronoi(ccl_global const KernelGlobals *kg,
+ccl_device_noinline int svm_node_tex_voronoi(KernelGlobals kg,
                                              ccl_private ShaderData *sd,
                                              ccl_private float *stack,
                                              uint dimensions,
@@ -1013,7 +1013,8 @@ ccl_device_noinline int svm_node_tex_voronoi(ccl_global const KernelGlobals *kg,
                         &position_out_2d);
           break;
         case NODE_VORONOI_SMOOTH_F1:
-          if (KERNEL_NODES_FEATURE(VORONOI_EXTRA)) {
+          IF_KERNEL_NODES_FEATURE(VORONOI_EXTRA)
+          {
             voronoi_smooth_f1_2d(coord_2d,
                                  smoothness,
                                  exponent,
@@ -1058,7 +1059,8 @@ ccl_device_noinline int svm_node_tex_voronoi(ccl_global const KernelGlobals *kg,
                         &position_out);
           break;
         case NODE_VORONOI_SMOOTH_F1:
-          if (KERNEL_NODES_FEATURE(VORONOI_EXTRA)) {
+          IF_KERNEL_NODES_FEATURE(VORONOI_EXTRA)
+          {
             voronoi_smooth_f1_3d(coord,
                                  smoothness,
                                  exponent,
@@ -1092,7 +1094,8 @@ ccl_device_noinline int svm_node_tex_voronoi(ccl_global const KernelGlobals *kg,
     }
 
     case 4: {
-      if (KERNEL_NODES_FEATURE(VORONOI_EXTRA)) {
+      IF_KERNEL_NODES_FEATURE(VORONOI_EXTRA)
+      {
         float4 coord_4d = make_float4(coord.x, coord.y, coord.z, w);
         float4 position_out_4d;
         switch (voronoi_feature) {
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
index 764fb71ba72..be4bb315145 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -19,11 +19,8 @@ CCL_NAMESPACE_BEGIN
 /* TODO(sergey): Think of making it more generic volume-type attribute
  * sampler.
  */
-ccl_device_noinline int svm_node_tex_voxel(ccl_global const KernelGlobals *kg,
-                                           ccl_private ShaderData *sd,
-                                           ccl_private float *stack,
-                                           uint4 node,
-                                           int offset)
+ccl_device_noinline int svm_node_tex_voxel(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
 {
   uint co_offset, density_out_offset, color_out_offset, space;
   svm_unpack_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space);
diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h
index 1ac130e2006..d04b7aa3476 100644
--- a/intern/cycles/kernel/svm/svm_wave.h
+++ b/intern/cycles/kernel/svm/svm_wave.h
@@ -82,11 +82,8 @@ ccl_device_noinline_cpu float svm_wave(NodeWaveType type,
   }
 }
 
-ccl_device_noinline int svm_node_tex_wave(ccl_global const KernelGlobals *kg,
-                                          ccl_private ShaderData *sd,
-                                          ccl_private float *stack,
-                                          uint4 node,
-                                          int offset)
+ccl_device_noinline int svm_node_tex_wave(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
 {
   uint4 node2 = read_node(kg, &offset);
   uint4 node3 = read_node(kg, &offset);
diff --git a/intern/cycles/kernel/svm/svm_wavelength.h b/intern/cycles/kernel/svm/svm_wavelength.h
index e891744f276..4ef041f68d5 100644
--- a/intern/cycles/kernel/svm/svm_wavelength.h
+++ b/intern/cycles/kernel/svm/svm_wavelength.h
@@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Wavelength to RGB */
 
-ccl_device_noinline void svm_node_wavelength(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_wavelength(KernelGlobals kg,
                                              ccl_private ShaderData *sd,
                                              ccl_private float *stack,
                                              uint wavelength,
diff --git a/intern/cycles/kernel/svm/svm_white_noise.h b/intern/cycles/kernel/svm/svm_white_noise.h
index ccc49bf1a7c..6c2c3d6a683 100644
--- a/intern/cycles/kernel/svm/svm_white_noise.h
+++ b/intern/cycles/kernel/svm/svm_white_noise.h
@@ -16,7 +16,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_noinline void svm_node_tex_white_noise(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_tex_white_noise(KernelGlobals kg,
                                                   ccl_private ShaderData *sd,
                                                   ccl_private float *stack,
                                                   uint dimensions,
diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h
index 70d1211aa4a..d75976d23e1 100644
--- a/intern/cycles/kernel/svm/svm_wireframe.h
+++ b/intern/cycles/kernel/svm/svm_wireframe.h
@@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Wireframe Node */
 
-ccl_device_inline float wireframe(ccl_global const KernelGlobals *kg,
+ccl_device_inline float wireframe(KernelGlobals kg,
                                   ccl_private ShaderData *sd,
                                   float size,
                                   int pixel_size,
@@ -91,7 +91,7 @@ ccl_device_inline float wireframe(ccl_global const KernelGlobals *kg,
   return 0.0f;
 }
 
-ccl_device_noinline void svm_node_wireframe(ccl_global const KernelGlobals *kg,
+ccl_device_noinline void svm_node_wireframe(KernelGlobals kg,
                                             ccl_private ShaderData *sd,
                                             ccl_private float *stack,
                                             uint4 node)
-- 
cgit v1.2.3