Cycles: replace integrator state argument macros

* Rename struct KernelGlobals to struct KernelGlobalsCPU * Add KernelGlobals, IntegratorState and ConstIntegratorState typedefs that every device can define in its own way. * Remove INTEGRATOR_STATE_ARGS and INTEGRATOR_STATE_PASS macros and replace with these new typedefs. * Add explicit state argument to INTEGRATOR_STATE and similar macros In preparation for decoupling main and shadow paths. Differential Revision: https://developer.blender.org/D12888
author: Brecht Van Lommel <brecht> 2021-10-17 17:10:10 +0300
committer: Brecht Van Lommel <brecht@blender.org> 2021-10-18 20:02:10 +0300
commit: 1df3b51988852fa8ee6b530a64aa23346db9acd4 (patch)
tree: dd79dba4c8ff8bb8474cc399e9d1b308d845e0cb /intern/cycles/kernel/integrator
parent: 44c3bb729be42d6d67eaf8918d7cbcb2ff0b315d (diff)
19 files changed, 589 insertions, 564 deletions
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_bake.h b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
index c822823de9c..df3c2103c5b 100644
--- a/intern/cycles/kernel/integrator/integrator_init_from_bake.h
+++ b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
@@ -43,7 +43,8 @@ ccl_device_inline float bake_clamp_mirror_repeat(float u, float max)
 /* Return false to indicate that this pixel is finished.
  * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
  * that the pixel did converge. */
-ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
+ccl_device bool integrator_init_from_bake(KernelGlobals kg,
+                                          IntegratorState state,
                                           ccl_global const KernelWorkTile *ccl_restrict tile,
                                           ccl_global float *render_buffer,
                                           const int x,
@@ -53,18 +54,18 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
   PROFILING_INIT(kg, PROFILING_RAY_SETUP);
 
   /* Initialize path state to give basic buffer access and allow early outputs. */
-  path_state_init(INTEGRATOR_STATE_PASS, tile, x, y);
+  path_state_init(state, tile, x, y);
 
   /* Check whether the pixel has converged and should not be sampled anymore. */
-  if (!kernel_need_sample_pixel(INTEGRATOR_STATE_PASS, render_buffer)) {
+  if (!kernel_need_sample_pixel(kg, state, render_buffer)) {
     return false;
   }
 
   /* Always count the sample, even if the camera sample will reject the ray. */
-  const int sample = kernel_accum_sample(INTEGRATOR_STATE_PASS, render_buffer, scheduled_sample);
+  const int sample = kernel_accum_sample(kg, state, render_buffer, scheduled_sample);
 
   /* Setup render buffers. */
-  const int index = INTEGRATOR_STATE(path, render_pixel_index);
+  const int index = INTEGRATOR_STATE(state, path, render_pixel_index);
   const int pass_stride = kernel_data.film.pass_stride;
   render_buffer += index * pass_stride;
 
@@ -91,7 +92,7 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
   }
 
   /* Initialize path state for path integration. */
-  path_state_init_integrator(INTEGRATOR_STATE_PASS, sample, rng_hash);
+  path_state_init_integrator(kg, state, sample, rng_hash);
 
   /* Barycentric UV with sub-pixel offset. */
   float u = primitive[2];
@@ -131,7 +132,7 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
     ray.time = 0.5f;
     ray.dP = differential_zero_compact();
     ray.dD = differential_zero_compact();
-    integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray);
+    integrator_state_write_ray(kg, state, &ray);
 
     /* Setup next kernel to execute. */
     INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
@@ -169,7 +170,7 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
     ray.dD = differential_zero_compact();
 
     /* Write ray. */
-    integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray);
+    integrator_state_write_ray(kg, state, &ray);
 
     /* Setup and write intersection. */
     Intersection isect ccl_optional_struct_init;
@@ -182,7 +183,7 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
 #ifdef __EMBREE__
     isect.Ng = Ng;
 #endif
-    integrator_state_write_isect(INTEGRATOR_STATE_PASS, &isect);
+    integrator_state_write_isect(kg, state, &isect);
 
     /* Setup next kernel to execute. */
     const int shader_index = shader & SHADER_MASK;
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_camera.h b/intern/cycles/kernel/integrator/integrator_init_from_camera.h
index 291f0f106f0..5bab6b2e2fd 100644
--- a/intern/cycles/kernel/integrator/integrator_init_from_camera.h
+++ b/intern/cycles/kernel/integrator/integrator_init_from_camera.h
@@ -25,7 +25,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline void integrate_camera_sample(ccl_global const KernelGlobals *ccl_restrict kg,
+ccl_device_inline void integrate_camera_sample(KernelGlobals kg,
                                                const int sample,
                                                const int x,
                                                const int y,
@@ -63,7 +63,8 @@ ccl_device_inline void integrate_camera_sample(ccl_global const KernelGlobals *c
 /* Return false to indicate that this pixel is finished.
  * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
  * that the pixel did converge. */
-ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS,
+ccl_device bool integrator_init_from_camera(KernelGlobals kg,
+                                            IntegratorState state,
                                             ccl_global const KernelWorkTile *ccl_restrict tile,
                                             ccl_global float *render_buffer,
                                             const int x,
@@ -73,10 +74,10 @@ ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS,
   PROFILING_INIT(kg, PROFILING_RAY_SETUP);
 
   /* Initialize path state to give basic buffer access and allow early outputs. */
-  path_state_init(INTEGRATOR_STATE_PASS, tile, x, y);
+  path_state_init(state, tile, x, y);
 
   /* Check whether the pixel has converged and should not be sampled anymore. */
-  if (!kernel_need_sample_pixel(INTEGRATOR_STATE_PASS, render_buffer)) {
+  if (!kernel_need_sample_pixel(kg, state, render_buffer)) {
     return false;
   }
 
@@ -85,7 +86,7 @@ ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS,
    * This logic allows to both count actual number of samples per pixel, and to add samples to this
    * pixel after it was converged and samples were added somewhere else (in which case the
    * `scheduled_sample` will be different from actual number of samples in this pixel). */
-  const int sample = kernel_accum_sample(INTEGRATOR_STATE_PASS, render_buffer, scheduled_sample);
+  const int sample = kernel_accum_sample(kg, state, render_buffer, scheduled_sample);
 
   /* Initialize random number seed for path. */
   const uint rng_hash = path_rng_hash_init(kg, sample, x, y);
@@ -99,11 +100,11 @@ ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS,
     }
 
     /* Write camera ray to state. */
-    integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray);
+    integrator_state_write_ray(kg, state, &ray);
   }
 
   /* Initialize path state for path integration. */
-  path_state_init_integrator(INTEGRATOR_STATE_PASS, sample, rng_hash);
+  path_state_init_integrator(kg, state, sample, rng_hash);
 
   /* Continue with intersect_closest kernel, optionally initializing volume
    * stack before that if the camera may be inside a volume. */
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_closest.h b/intern/cycles/kernel/integrator/integrator_intersect_closest.h
index 760c08159e3..e915d984e1d 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_closest.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_closest.h
@@ -29,7 +29,8 @@
 CCL_NAMESPACE_BEGIN
 
 template<uint32_t current_kernel>
-ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS,
+ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg,
+                                                           IntegratorState state,
                                                            const int shader_flags)
 {
 
@@ -37,12 +38,12 @@ ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS
    * We continue evaluating emissive/transparent surfaces and volumes, similar
    * to direct lighting. Only if we know there are none can we terminate the
    * path immediately. */
-  if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) {
+  if (path_state_ao_bounce(kg, state)) {
     if (shader_flags & (SD_HAS_TRANSPARENT_SHADOW | SD_HAS_EMISSION)) {
-      INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+      INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
     }
-    else if (!integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) {
-      INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_AFTER_VOLUME;
+    else if (!integrator_state_volume_stack_is_empty(kg, state)) {
+      INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_AFTER_VOLUME;
     }
     else {
       return true;
@@ -51,14 +52,14 @@ ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS
 
   /* Load random number state. */
   RNGState rng_state;
-  path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+  path_state_rng_load(state, &rng_state);
 
   /* We perform path termination in this kernel to avoid launching shade_surface
    * and evaluating the shader when not needed. Only for emission and transparent
    * surfaces in front of emission do we need to evaluate the shader, since we
    * perform MIS as part of indirect rays. */
-  const int path_flag = INTEGRATOR_STATE(path, flag);
-  const float probability = path_state_continuation_probability(INTEGRATOR_STATE_PASS, path_flag);
+  const int path_flag = INTEGRATOR_STATE(state, path, flag);
+  const float probability = path_state_continuation_probability(kg, state, path_flag);
 
   if (probability != 1.0f) {
     const float terminate = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE);
@@ -66,11 +67,11 @@ ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS
     if (probability == 0.0f || terminate >= probability) {
       if (shader_flags & SD_HAS_EMISSION) {
         /* Mark path to be terminated right after shader evaluation on the surface. */
-        INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_ON_NEXT_SURFACE;
+        INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_ON_NEXT_SURFACE;
       }
-      else if (!integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) {
+      else if (!integrator_state_volume_stack_is_empty(kg, state)) {
         /* TODO: only do this for emissive volumes. */
-        INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_IN_NEXT_VOLUME;
+        INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_IN_NEXT_VOLUME;
       }
       else {
         return true;
@@ -85,7 +86,8 @@ ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS
  * leads to poor performance with CUDA atomics. */
 template<uint32_t current_kernel>
 ccl_device_forceinline void integrator_intersect_shader_next_kernel(
-    INTEGRATOR_STATE_ARGS,
+    KernelGlobals kg,
+    IntegratorState state,
     ccl_private const Intersection *ccl_restrict isect,
     const int shader,
     const int shader_flags)
@@ -122,9 +124,9 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel(
 
 #ifdef __SHADOW_CATCHER__
   const int object_flags = intersection_get_object_flags(kg, isect);
-  if (kernel_shadow_catcher_split(INTEGRATOR_STATE_PASS, object_flags)) {
+  if (kernel_shadow_catcher_split(kg, state, object_flags)) {
     if (kernel_data.film.pass_background != PASS_UNUSED && !kernel_data.background.transparent) {
-      INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
+      INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
 
       INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
     }
@@ -137,7 +139,7 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel(
 
     /* If the split happened after bounce through a transparent object it's possible to have shadow
      * patch. Make sure it is properly re-scheduled on the split path. */
-    const int shadow_kernel = INTEGRATOR_STATE(shadow_path, queued_kernel);
+    const int shadow_kernel = INTEGRATOR_STATE(state, shadow_path, queued_kernel);
     if (shadow_kernel != 0) {
       INTEGRATOR_SHADOW_PATH_INIT(shadow_kernel);
     }
@@ -145,21 +147,21 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel(
 #endif
 }
 
-ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS)
+ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState state)
 {
   PROFILING_INIT(kg, PROFILING_INTERSECT_CLOSEST);
 
   /* Read ray from integrator state into local memory. */
   Ray ray ccl_optional_struct_init;
-  integrator_state_read_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_read_ray(kg, state, &ray);
   kernel_assert(ray.t != 0.0f);
 
-  const uint visibility = path_state_ray_visibility(INTEGRATOR_STATE_PASS);
-  const int last_isect_prim = INTEGRATOR_STATE(isect, prim);
-  const int last_isect_object = INTEGRATOR_STATE(isect, object);
+  const uint visibility = path_state_ray_visibility(state);
+  const int last_isect_prim = INTEGRATOR_STATE(state, isect, prim);
+  const int last_isect_object = INTEGRATOR_STATE(state, isect, object);
 
   /* Trick to use short AO rays to approximate indirect light at the end of the path. */
-  if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) {
+  if (path_state_ao_bounce(kg, state)) {
     ray.t = kernel_data.integrator.ao_bounces_distance;
 
     const float object_ao_distance = kernel_tex_fetch(__objects, last_isect_object).ao_distance;
@@ -181,8 +183,8 @@ ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS)
   if (kernel_data.integrator.use_lamp_mis) {
     /* NOTE: if we make lights visible to camera rays, we'll need to initialize
      * these in the path_state_init. */
-    const int last_type = INTEGRATOR_STATE(isect, type);
-    const int path_flag = INTEGRATOR_STATE(path, flag);
+    const int last_type = INTEGRATOR_STATE(state, isect, type);
+    const int path_flag = INTEGRATOR_STATE(state, path, flag);
 
     hit = lights_intersect(
               kg, &ray, &isect, last_isect_prim, last_isect_object, last_type, path_flag) ||
@@ -190,16 +192,16 @@ ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS)
   }
 
   /* Write intersection result into global integrator state memory. */
-  integrator_state_write_isect(INTEGRATOR_STATE_PASS, &isect);
+  integrator_state_write_isect(kg, state, &isect);
 
 #ifdef __VOLUME__
-  if (!integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) {
+  if (!integrator_state_volume_stack_is_empty(kg, state)) {
     const bool hit_surface = hit && !(isect.type & PRIMITIVE_LAMP);
     const int shader = (hit_surface) ? intersection_get_shader(kg, &isect) : SHADER_NONE;
     const int flags = (hit_surface) ? kernel_tex_fetch(__shaders, shader).flags : 0;
 
     if (!integrator_intersect_terminate<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
-            INTEGRATOR_STATE_PASS, flags)) {
+            kg, state, flags)) {
       /* Continue with volume kernel if we are inside a volume, regardless
        * if we hit anything. */
       INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
@@ -225,9 +227,9 @@ ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS)
       const int flags = kernel_tex_fetch(__shaders, shader).flags;
 
       if (!integrator_intersect_terminate<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
-              INTEGRATOR_STATE_PASS, flags)) {
+              kg, state, flags)) {
         integrator_intersect_shader_next_kernel<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
-            INTEGRATOR_STATE_PASS, &isect, shader, flags);
+            kg, state, &isect, shader, flags);
         return;
       }
       else {
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
index 3ebd21e4651..06f58f88bc8 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
@@ -19,19 +19,21 @@
 CCL_NAMESPACE_BEGIN
 
 /* Visibility for the shadow ray. */
-ccl_device_forceinline uint integrate_intersect_shadow_visibility(INTEGRATOR_STATE_CONST_ARGS)
+ccl_device_forceinline uint integrate_intersect_shadow_visibility(KernelGlobals kg,
+                                                                  ConstIntegratorState state)
 {
   uint visibility = PATH_RAY_SHADOW;
 
 #ifdef __SHADOW_CATCHER__
-  const uint32_t path_flag = INTEGRATOR_STATE(shadow_path, flag);
+  const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag);
   visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, visibility);
 #endif
 
   return visibility;
 }
 
-ccl_device bool integrate_intersect_shadow_opaque(INTEGRATOR_STATE_ARGS,
+ccl_device bool integrate_intersect_shadow_opaque(KernelGlobals kg,
+                                                  IntegratorState state,
                                                   ccl_private const Ray *ray,
                                                   const uint visibility)
 {
@@ -46,22 +48,24 @@ ccl_device bool integrate_intersect_shadow_opaque(INTEGRATOR_STATE_ARGS,
   const bool opaque_hit = scene_intersect(kg, ray, visibility & opaque_mask, &isect);
 
   if (!opaque_hit) {
-    INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = 0;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = 0;
   }
 
   return opaque_hit;
 }
 
-ccl_device_forceinline int integrate_shadow_max_transparent_hits(INTEGRATOR_STATE_CONST_ARGS)
+ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals kg,
+                                                                 ConstIntegratorState state)
 {
   const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
-  const int transparent_bounce = INTEGRATOR_STATE(shadow_path, transparent_bounce);
+  const int transparent_bounce = INTEGRATOR_STATE(state, shadow_path, transparent_bounce);
 
   return max(transparent_max_bounce - transparent_bounce - 1, 0);
 }
 
 #ifdef __TRANSPARENT_SHADOWS__
-ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS,
+ccl_device bool integrate_intersect_shadow_transparent(KernelGlobals kg,
+                                                       IntegratorState state,
                                                        ccl_private const Ray *ray,
                                                        const uint visibility)
 {
@@ -69,7 +73,7 @@ ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS,
 
   /* Limit the number hits to the max transparent bounces allowed and the size that we
    * have available in the integrator state. */
-  const uint max_transparent_hits = integrate_shadow_max_transparent_hits(INTEGRATOR_STATE_PASS);
+  const uint max_transparent_hits = integrate_shadow_max_transparent_hits(kg, state);
   const uint max_hits = min(max_transparent_hits, (uint)INTEGRATOR_SHADOW_ISECT_SIZE);
   uint num_hits = 0;
   bool opaque_hit = scene_intersect_shadow_all(kg, ray, isect, visibility, max_hits, &num_hits);
@@ -88,41 +92,39 @@ ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS,
       /* Write intersection result into global integrator state memory.
        * More efficient may be to do this directly from the intersection kernel. */
       for (int hit = 0; hit < num_recorded_hits; hit++) {
-        integrator_state_write_shadow_isect(INTEGRATOR_STATE_PASS, &isect[hit], hit);
+        integrator_state_write_shadow_isect(state, &isect[hit], hit);
       }
     }
 
-    INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = num_hits;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = num_hits;
   }
   else {
-    INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = 0;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = 0;
   }
 
   return opaque_hit;
 }
 #endif
 
-ccl_device void integrator_intersect_shadow(INTEGRATOR_STATE_ARGS)
+ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorState state)
 {
   PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW);
 
   /* Read ray from integrator state into local memory. */
   Ray ray ccl_optional_struct_init;
-  integrator_state_read_shadow_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_read_shadow_ray(kg, state, &ray);
 
   /* Compute visibility. */
-  const uint visibility = integrate_intersect_shadow_visibility(INTEGRATOR_STATE_PASS);
+  const uint visibility = integrate_intersect_shadow_visibility(kg, state);
 
 #ifdef __TRANSPARENT_SHADOWS__
   /* TODO: compile different kernels depending on this? Especially for OptiX
    * conditional trace calls are bad. */
-  const bool opaque_hit =
-      (kernel_data.integrator.transparent_shadows) ?
-          integrate_intersect_shadow_transparent(INTEGRATOR_STATE_PASS, &ray, visibility) :
-          integrate_intersect_shadow_opaque(INTEGRATOR_STATE_PASS, &ray, visibility);
+  const bool opaque_hit = (kernel_data.integrator.transparent_shadows) ?
+                              integrate_intersect_shadow_transparent(kg, state, &ray, visibility) :
+                              integrate_intersect_shadow_opaque(kg, state, &ray, visibility);
 #else
-  const bool opaque_hit = integrate_intersect_shadow_opaque(
-      INTEGRATOR_STATE_PASS, &ray, visibility);
+  const bool opaque_hit = integrate_intersect_shadow_opaque(kg, state, &ray, visibility);
 #endif
 
   if (opaque_hit) {
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h b/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h
index 7c090952dc7..b575e7fd1e6 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h
@@ -20,12 +20,12 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void integrator_intersect_subsurface(INTEGRATOR_STATE_ARGS)
+ccl_device void integrator_intersect_subsurface(KernelGlobals kg, IntegratorState state)
 {
   PROFILING_INIT(kg, PROFILING_INTERSECT_SUBSURFACE);
 
 #ifdef __SUBSURFACE__
-  if (subsurface_scatter(INTEGRATOR_STATE_PASS)) {
+  if (subsurface_scatter(kg, state)) {
     return;
   }
 #endif
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
index 192e9c6ab43..7def3e2f3f3 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
@@ -23,7 +23,8 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_ARGS,
+ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
+                                                              IntegratorState state,
                                                               const float3 from_P,
                                                               const float3 to_P)
 {
@@ -52,7 +53,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_A
 
     for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
       shader_setup_from_ray(kg, stack_sd, &volume_ray, isect);
-      volume_stack_enter_exit(INTEGRATOR_STATE_PASS, stack_sd);
+      volume_stack_enter_exit(kg, state, stack_sd);
     }
   }
 #else
@@ -61,7 +62,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_A
   while (step < 2 * volume_stack_size &&
          scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) {
     shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect);
-    volume_stack_enter_exit(INTEGRATOR_STATE_PASS, stack_sd);
+    volume_stack_enter_exit(kg, state, stack_sd);
 
     /* Move ray forward. */
     volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
@@ -73,7 +74,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_A
 #endif
 }
 
-ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
+ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorState state)
 {
   PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK);
 
@@ -81,16 +82,16 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
   ccl_private ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
 
   Ray volume_ray ccl_optional_struct_init;
-  integrator_state_read_ray(INTEGRATOR_STATE_PASS, &volume_ray);
+  integrator_state_read_ray(kg, state, &volume_ray);
   volume_ray.t = FLT_MAX;
 
-  const uint visibility = (INTEGRATOR_STATE(path, flag) & PATH_RAY_ALL_VISIBILITY);
+  const uint visibility = (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_ALL_VISIBILITY);
   int stack_index = 0, enclosed_index = 0;
 
   /* Write background shader. */
   if (kernel_data.background.volume_shader != SHADER_NONE) {
     const VolumeStack new_entry = {OBJECT_NONE, kernel_data.background.volume_shader};
-    integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry);
+    integrator_state_write_volume_stack(state, stack_index, new_entry);
     stack_index++;
   }
 
@@ -121,7 +122,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
         }
         for (int i = 0; i < stack_index && need_add; ++i) {
           /* Don't add intersections twice. */
-          VolumeStack entry = integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
+          VolumeStack entry = integrator_state_read_volume_stack(state, i);
           if (entry.object == stack_sd->object) {
             need_add = false;
             break;
@@ -129,7 +130,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
         }
         if (need_add && stack_index < volume_stack_size - 1) {
           const VolumeStack new_entry = {stack_sd->object, stack_sd->shader};
-          integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry);
+          integrator_state_write_volume_stack(state, stack_index, new_entry);
           ++stack_index;
         }
       }
@@ -169,7 +170,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
       }
       for (int i = 0; i < stack_index && need_add; ++i) {
         /* Don't add intersections twice. */
-        VolumeStack entry = integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
+        VolumeStack entry = integrator_state_read_volume_stack(state, i);
         if (entry.object == stack_sd->object) {
           need_add = false;
           break;
@@ -177,7 +178,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
       }
       if (need_add) {
         const VolumeStack new_entry = {stack_sd->object, stack_sd->shader};
-        integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry);
+        integrator_state_write_volume_stack(state, stack_index, new_entry);
         ++stack_index;
       }
     }
@@ -196,7 +197,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
 
   /* Write terminator. */
   const VolumeStack new_entry = {OBJECT_NONE, SHADER_NONE};
-  integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry);
+  integrator_state_write_volume_stack(state, stack_index, new_entry);
 
   INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
                        DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
diff --git a/intern/cycles/kernel/integrator/integrator_megakernel.h b/intern/cycles/kernel/integrator/integrator_megakernel.h
index 91363ea1c7f..a3b2b1f9e90 100644
--- a/intern/cycles/kernel/integrator/integrator_megakernel.h
+++ b/intern/cycles/kernel/integrator/integrator_megakernel.h
@@ -29,7 +29,8 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void integrator_megakernel(INTEGRATOR_STATE_ARGS,
+ccl_device void integrator_megakernel(KernelGlobals kg,
+                                      IntegratorState state,
                                       ccl_global float *ccl_restrict render_buffer)
 {
   /* Each kernel indicates the next kernel to execute, so here we simply
@@ -38,46 +39,46 @@ ccl_device void integrator_megakernel(INTEGRATOR_STATE_ARGS,
    * TODO: investigate if we can use device side enqueue for GPUs to avoid
    * having to compile this big kernel. */
   while (true) {
-    if (INTEGRATOR_STATE(shadow_path, queued_kernel)) {
+    if (INTEGRATOR_STATE(state, shadow_path, queued_kernel)) {
       /* First handle any shadow paths before we potentially create more shadow paths. */
-      switch (INTEGRATOR_STATE(shadow_path, queued_kernel)) {
+      switch (INTEGRATOR_STATE(state, shadow_path, queued_kernel)) {
         case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
-          integrator_intersect_shadow(INTEGRATOR_STATE_PASS);
+          integrator_intersect_shadow(kg, state);
           break;
         case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
-          integrator_shade_shadow(INTEGRATOR_STATE_PASS, render_buffer);
+          integrator_shade_shadow(kg, state, render_buffer);
           break;
         default:
           kernel_assert(0);
           break;
       }
     }
-    else if (INTEGRATOR_STATE(path, queued_kernel)) {
+    else if (INTEGRATOR_STATE(state, path, queued_kernel)) {
       /* Then handle regular path kernels. */
-      switch (INTEGRATOR_STATE(path, queued_kernel)) {
+      switch (INTEGRATOR_STATE(state, path, queued_kernel)) {
         case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
-          integrator_intersect_closest(INTEGRATOR_STATE_PASS);
+          integrator_intersect_closest(kg, state);
           break;
         case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
-          integrator_shade_background(INTEGRATOR_STATE_PASS, render_buffer);
+          integrator_shade_background(kg, state, render_buffer);
           break;
         case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
-          integrator_shade_surface(INTEGRATOR_STATE_PASS, render_buffer);
+          integrator_shade_surface(kg, state, render_buffer);
           break;
         case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
-          integrator_shade_volume(INTEGRATOR_STATE_PASS, render_buffer);
+          integrator_shade_volume(kg, state, render_buffer);
           break;
         case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
-          integrator_shade_surface_raytrace(INTEGRATOR_STATE_PASS, render_buffer);
+          integrator_shade_surface_raytrace(kg, state, render_buffer);
           break;
         case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
-          integrator_shade_light(INTEGRATOR_STATE_PASS, render_buffer);
+          integrator_shade_light(kg, state, render_buffer);
           break;
         case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
-          integrator_intersect_subsurface(INTEGRATOR_STATE_PASS);
+          integrator_intersect_subsurface(kg, state);
           break;
         case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
-          integrator_intersect_volume_stack(INTEGRATOR_STATE_PASS);
+          integrator_intersect_volume_stack(kg, state);
           break;
         default:
           kernel_assert(0);
diff --git a/intern/cycles/kernel/integrator/integrator_shade_background.h b/intern/cycles/kernel/integrator/integrator_shade_background.h
index a898f3fb2fc..d98e53e6bbf 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_background.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_background.h
@@ -23,12 +23,13 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS,
+ccl_device float3 integrator_eval_background_shader(KernelGlobals kg,
+                                                    IntegratorState state,
                                                     ccl_global float *ccl_restrict render_buffer)
 {
 #ifdef __BACKGROUND__
   const int shader = kernel_data.background.surface_shader;
-  const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
 
   /* Use visibility flag to skip lights. */
   if (shader & SHADER_EXCLUDE_ANY) {
@@ -54,14 +55,14 @@ ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS,
     PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP);
     shader_setup_from_background(kg,
                                  emission_sd,
-                                 INTEGRATOR_STATE(ray, P),
-                                 INTEGRATOR_STATE(ray, D),
-                                 INTEGRATOR_STATE(ray, time));
+                                 INTEGRATOR_STATE(state, ray, P),
+                                 INTEGRATOR_STATE(state, ray, D),
+                                 INTEGRATOR_STATE(state, ray, time));
 
     PROFILING_SHADER(emission_sd->object, emission_sd->shader);
     PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL);
     shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>(
-        INTEGRATOR_STATE_PASS, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION);
+        kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION);
 
     L = shader_background_eval(emission_sd);
   }
@@ -69,11 +70,12 @@ ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS,
   /* Background MIS weights. */
 #  ifdef __BACKGROUND_MIS__
   /* Check if background light exists or if we should skip pdf. */
-  if (!(INTEGRATOR_STATE(path, flag) & PATH_RAY_MIS_SKIP) && kernel_data.background.use_mis) {
-    const float3 ray_P = INTEGRATOR_STATE(ray, P);
-    const float3 ray_D = INTEGRATOR_STATE(ray, D);
-    const float mis_ray_pdf = INTEGRATOR_STATE(path, mis_ray_pdf);
-    const float mis_ray_t = INTEGRATOR_STATE(path, mis_ray_t);
+  if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) &&
+      kernel_data.background.use_mis) {
+    const float3 ray_P = INTEGRATOR_STATE(state, ray, P);
+    const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
+    const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
+    const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
 
     /* multiple importance sampling, get background light pdf for ray
      * direction, and compute weight with respect to BSDF pdf */
@@ -90,7 +92,8 @@ ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS,
 #endif
 }
 
-ccl_device_inline void integrate_background(INTEGRATOR_STATE_ARGS,
+ccl_device_inline void integrate_background(KernelGlobals kg,
+                                            IntegratorState state,
                                             ccl_global float *ccl_restrict render_buffer)
 {
   /* Accumulate transparency for transparent background. We can skip background
@@ -99,11 +102,11 @@ ccl_device_inline void integrate_background(INTEGRATOR_STATE_ARGS,
   float transparent = 0.0f;
 
   const bool is_transparent_background_ray = kernel_data.background.transparent &&
-                                             (INTEGRATOR_STATE(path, flag) &
+                                             (INTEGRATOR_STATE(state, path, flag) &
                                               PATH_RAY_TRANSPARENT_BACKGROUND);
 
   if (is_transparent_background_ray) {
-    transparent = average(INTEGRATOR_STATE(path, throughput));
+    transparent = average(INTEGRATOR_STATE(state, path, throughput));
 
 #ifdef __PASSES__
     eval_background = (kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND));
@@ -113,32 +116,31 @@ ccl_device_inline void integrate_background(INTEGRATOR_STATE_ARGS,
   }
 
   /* Evaluate background shader. */
-  float3 L = (eval_background) ?
-                 integrator_eval_background_shader(INTEGRATOR_STATE_PASS, render_buffer) :
-                 zero_float3();
+  float3 L = (eval_background) ? integrator_eval_background_shader(kg, state, render_buffer) :
+                                 zero_float3();
 
   /* When using the ao bounces approximation, adjust background
    * shader intensity with ao factor. */
-  if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) {
+  if (path_state_ao_bounce(kg, state)) {
     L *= kernel_data.integrator.ao_bounces_factor;
   }
 
   /* Write to render buffer. */
-  kernel_accum_background(
-      INTEGRATOR_STATE_PASS, L, transparent, is_transparent_background_ray, render_buffer);
+  kernel_accum_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer);
 }
 
-ccl_device_inline void integrate_distant_lights(INTEGRATOR_STATE_ARGS,
+ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
+                                                IntegratorState state,
                                                 ccl_global float *ccl_restrict render_buffer)
 {
-  const float3 ray_D = INTEGRATOR_STATE(ray, D);
-  const float ray_time = INTEGRATOR_STATE(ray, time);
+  const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
+  const float ray_time = INTEGRATOR_STATE(state, ray, time);
   LightSample ls ccl_optional_struct_init;
   for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) {
     if (light_sample_from_distant_ray(kg, ray_D, lamp, &ls)) {
       /* Use visibility flag to skip lights. */
 #ifdef __PASSES__
-      const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+      const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
 
       if (ls.shader & SHADER_EXCLUDE_ANY) {
         if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) ||
@@ -156,8 +158,7 @@ ccl_device_inline void integrate_distant_lights(INTEGRATOR_STATE_ARGS,
       /* TODO: does aliasing like this break automatic SoA in CUDA? */
       ShaderDataTinyStorage emission_sd_storage;
       ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-      float3 light_eval = light_sample_shader_eval(
-          INTEGRATOR_STATE_PASS, emission_sd, &ls, ray_time);
+      float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
       if (is_zero(light_eval)) {
         return;
       }
@@ -166,33 +167,34 @@ ccl_device_inline void integrate_distant_lights(INTEGRATOR_STATE_ARGS,
       if (!(path_flag & PATH_RAY_MIS_SKIP)) {
         /* multiple importance sampling, get regular light pdf,
          * and compute weight with respect to BSDF pdf */
-        const float mis_ray_pdf = INTEGRATOR_STATE(path, mis_ray_pdf);
+        const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
         const float mis_weight = power_heuristic(mis_ray_pdf, ls.pdf);
         light_eval *= mis_weight;
       }
 
       /* Write to render buffer. */
-      const float3 throughput = INTEGRATOR_STATE(path, throughput);
-      kernel_accum_emission(INTEGRATOR_STATE_PASS, throughput, light_eval, render_buffer);
+      const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+      kernel_accum_emission(kg, state, throughput, light_eval, render_buffer);
     }
   }
 }
 
-ccl_device void integrator_shade_background(INTEGRATOR_STATE_ARGS,
+ccl_device void integrator_shade_background(KernelGlobals kg,
+                                            IntegratorState state,
                                             ccl_global float *ccl_restrict render_buffer)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_LIGHT_SETUP);
 
   /* TODO: unify these in a single loop to only have a single shader evaluation call. */
-  integrate_distant_lights(INTEGRATOR_STATE_PASS, render_buffer);
-  integrate_background(INTEGRATOR_STATE_PASS, render_buffer);
+  integrate_distant_lights(kg, state, render_buffer);
+  integrate_background(kg, state, render_buffer);
 
 #ifdef __SHADOW_CATCHER__
-  if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) {
-    INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_SHADOW_CATCHER_BACKGROUND;
+  if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) {
+    INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_SHADOW_CATCHER_BACKGROUND;
 
-    const int isect_prim = INTEGRATOR_STATE(isect, prim);
-    const int isect_type = INTEGRATOR_STATE(isect, type);
+    const int isect_prim = INTEGRATOR_STATE(state, isect, prim);
+    const int isect_type = INTEGRATOR_STATE(state, isect, type);
     const int shader = intersection_get_shader_from_isect_prim(kg, isect_prim, isect_type);
     const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
 
diff --git a/intern/cycles/kernel/integrator/integrator_shade_light.h b/intern/cycles/kernel/integrator/integrator_shade_light.h
index d8f8da63023..4f0f5a39756 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_light.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_light.h
@@ -23,29 +23,30 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS,
+ccl_device_inline void integrate_light(KernelGlobals kg,
+                                       IntegratorState state,
                                        ccl_global float *ccl_restrict render_buffer)
 {
   /* Setup light sample. */
   Intersection isect ccl_optional_struct_init;
-  integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect);
+  integrator_state_read_isect(kg, state, &isect);
 
-  float3 ray_P = INTEGRATOR_STATE(ray, P);
-  const float3 ray_D = INTEGRATOR_STATE(ray, D);
-  const float ray_time = INTEGRATOR_STATE(ray, time);
+  float3 ray_P = INTEGRATOR_STATE(state, ray, P);
+  const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
+  const float ray_time = INTEGRATOR_STATE(state, ray, time);
 
   /* Advance ray beyond light. */
   /* TODO: can we make this more numerically robust to avoid reintersecting the
    * same light in some cases? */
   const float3 new_ray_P = ray_offset(ray_P + ray_D * isect.t, ray_D);
-  INTEGRATOR_STATE_WRITE(ray, P) = new_ray_P;
-  INTEGRATOR_STATE_WRITE(ray, t) -= isect.t;
+  INTEGRATOR_STATE_WRITE(state, ray, P) = new_ray_P;
+  INTEGRATOR_STATE_WRITE(state, ray, t) -= isect.t;
 
   /* Set position to where the BSDF was sampled, for correct MIS PDF. */
-  const float mis_ray_t = INTEGRATOR_STATE(path, mis_ray_t);
+  const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
   ray_P -= ray_D * mis_ray_t;
   isect.t += mis_ray_t;
-  INTEGRATOR_STATE_WRITE(path, mis_ray_t) = mis_ray_t + isect.t;
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = mis_ray_t + isect.t;
 
   LightSample ls ccl_optional_struct_init;
   const bool use_light_sample = light_sample_from_intersection(kg, &isect, ray_P, ray_D, &ls);
@@ -56,7 +57,7 @@ ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS,
 
   /* Use visibility flag to skip lights. */
 #ifdef __PASSES__
-  const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
 
   if (ls.shader & SHADER_EXCLUDE_ANY) {
     if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) ||
@@ -73,7 +74,7 @@ ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS,
   /* TODO: does aliasing like this break automatic SoA in CUDA? */
   ShaderDataTinyStorage emission_sd_storage;
   ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-  float3 light_eval = light_sample_shader_eval(INTEGRATOR_STATE_PASS, emission_sd, &ls, ray_time);
+  float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
   if (is_zero(light_eval)) {
     return;
   }
@@ -82,22 +83,23 @@ ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS,
   if (!(path_flag & PATH_RAY_MIS_SKIP)) {
     /* multiple importance sampling, get regular light pdf,
      * and compute weight with respect to BSDF pdf */
-    const float mis_ray_pdf = INTEGRATOR_STATE(path, mis_ray_pdf);
+    const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
     const float mis_weight = power_heuristic(mis_ray_pdf, ls.pdf);
     light_eval *= mis_weight;
   }
 
   /* Write to render buffer. */
-  const float3 throughput = INTEGRATOR_STATE(path, throughput);
-  kernel_accum_emission(INTEGRATOR_STATE_PASS, throughput, light_eval, render_buffer);
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+  kernel_accum_emission(kg, state, throughput, light_eval, render_buffer);
 }
 
-ccl_device void integrator_shade_light(INTEGRATOR_STATE_ARGS,
+ccl_device void integrator_shade_light(KernelGlobals kg,
+                                       IntegratorState state,
                                        ccl_global float *ccl_restrict render_buffer)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_LIGHT_SETUP);
 
-  integrate_light(INTEGRATOR_STATE_PASS, render_buffer);
+  integrate_light(kg, state, render_buffer);
 
   /* TODO: we could get stuck in an infinite loop if there are precision issues
    * and the same light is hit again.
@@ -105,8 +107,8 @@ ccl_device void integrator_shade_light(INTEGRATOR_STATE_ARGS,
    * As a workaround count this as a transparent bounce. It makes some sense
    * to interpret lights as transparent surfaces (and support making them opaque),
    * but this needs to be revisited. */
-  uint32_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce) + 1;
-  INTEGRATOR_STATE_WRITE(path, transparent_bounce) = transparent_bounce;
+  uint32_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce) + 1;
+  INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce;
 
   if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
     INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
diff --git a/intern/cycles/kernel/integrator/integrator_shade_shadow.h b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
index 3857b522b25..cdbe85f6b8c 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_shadow.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
@@ -29,7 +29,9 @@ ccl_device_inline bool shadow_intersections_has_remaining(const int num_hits)
 }
 
 #ifdef __TRANSPARENT_SHADOWS__
-ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_ARGS, const int hit)
+ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg,
+                                                              IntegratorState state,
+                                                              const int hit)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SURFACE);
 
@@ -43,22 +45,22 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_A
 
   /* Setup shader data at surface. */
   Intersection isect ccl_optional_struct_init;
-  integrator_state_read_shadow_isect(INTEGRATOR_STATE_PASS, &isect, hit);
+  integrator_state_read_shadow_isect(state, &isect, hit);
 
   Ray ray ccl_optional_struct_init;
-  integrator_state_read_shadow_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_read_shadow_ray(kg, state, &ray);
 
   shader_setup_from_ray(kg, shadow_sd, &ray, &isect);
 
   /* Evaluate shader. */
   if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
     shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
-        INTEGRATOR_STATE_PASS, shadow_sd, NULL, PATH_RAY_SHADOW);
+        kg, state, shadow_sd, NULL, PATH_RAY_SHADOW);
   }
 
 #  ifdef __VOLUME__
   /* Exit/enter volume. */
-  shadow_volume_stack_enter_exit(INTEGRATOR_STATE_PASS, shadow_sd);
+  shadow_volume_stack_enter_exit(kg, state, shadow_sd);
 #  endif
 
   /* Compute transparency from closures. */
@@ -66,7 +68,8 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_A
 }
 
 #  ifdef __VOLUME__
-ccl_device_inline void integrate_transparent_volume_shadow(INTEGRATOR_STATE_ARGS,
+ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg,
+                                                           IntegratorState state,
                                                            const int hit,
                                                            const int num_recorded_hits,
                                                            ccl_private float3 *ccl_restrict
@@ -80,26 +83,29 @@ ccl_device_inline void integrate_transparent_volume_shadow(INTEGRATOR_STATE_ARGS
 
   /* Setup shader data. */
   Ray ray ccl_optional_struct_init;
-  integrator_state_read_shadow_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_read_shadow_ray(kg, state, &ray);
 
   /* Modify ray position and length to match current segment. */
-  const float start_t = (hit == 0) ? 0.0f : INTEGRATOR_STATE_ARRAY(shadow_isect, hit - 1, t);
-  const float end_t = (hit < num_recorded_hits) ? INTEGRATOR_STATE_ARRAY(shadow_isect, hit, t) :
-                                                  ray.t;
+  const float start_t = (hit == 0) ? 0.0f :
+                                     INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t);
+  const float end_t = (hit < num_recorded_hits) ?
+                          INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) :
+                          ray.t;
   ray.P += start_t * ray.D;
   ray.t = end_t - start_t;
 
   shader_setup_from_volume(kg, shadow_sd, &ray);
 
-  const float step_size = volume_stack_step_size(INTEGRATOR_STATE_PASS, [=](const int i) {
-    return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i);
-  });
+  const float step_size = volume_stack_step_size(
+      kg, state, [=](const int i) { return integrator_state_read_shadow_volume_stack(state, i); });
 
-  volume_shadow_heterogeneous(INTEGRATOR_STATE_PASS, &ray, shadow_sd, throughput, step_size);
+  volume_shadow_heterogeneous(kg, state, &ray, shadow_sd, throughput, step_size);
 }
 #  endif
 
-ccl_device_inline bool integrate_transparent_shadow(INTEGRATOR_STATE_ARGS, const int num_hits)
+ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg,
+                                                    IntegratorState state,
+                                                    const int num_hits)
 {
   /* Accumulate shadow for transparent surfaces. */
   const int num_recorded_hits = min(num_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
@@ -108,29 +114,28 @@ ccl_device_inline bool integrate_transparent_shadow(INTEGRATOR_STATE_ARGS, const
     /* Volume shaders. */
     if (hit < num_recorded_hits || !shadow_intersections_has_remaining(num_hits)) {
 #  ifdef __VOLUME__
-      if (!integrator_state_shadow_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) {
-        float3 throughput = INTEGRATOR_STATE(shadow_path, throughput);
-        integrate_transparent_volume_shadow(
-            INTEGRATOR_STATE_PASS, hit, num_recorded_hits, &throughput);
+      if (!integrator_state_shadow_volume_stack_is_empty(kg, state)) {
+        float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
+        integrate_transparent_volume_shadow(kg, state, hit, num_recorded_hits, &throughput);
         if (is_zero(throughput)) {
           return true;
         }
 
-        INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput;
+        INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput;
       }
 #  endif
     }
 
     /* Surface shaders. */
     if (hit < num_recorded_hits) {
-      const float3 shadow = integrate_transparent_surface_shadow(INTEGRATOR_STATE_PASS, hit);
-      const float3 throughput = INTEGRATOR_STATE(shadow_path, throughput) * shadow;
+      const float3 shadow = integrate_transparent_surface_shadow(kg, state, hit);
+      const float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput) * shadow;
       if (is_zero(throughput)) {
         return true;
       }
 
-      INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput;
-      INTEGRATOR_STATE_WRITE(shadow_path, transparent_bounce) += 1;
+      INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput;
+      INTEGRATOR_STATE_WRITE(state, shadow_path, transparent_bounce) += 1;
     }
 
     /* Note we do not need to check max_transparent_bounce here, the number
@@ -141,26 +146,27 @@ ccl_device_inline bool integrate_transparent_shadow(INTEGRATOR_STATE_ARGS, const
   if (shadow_intersections_has_remaining(num_hits)) {
     /* There are more hits that we could not recorded due to memory usage,
      * adjust ray to intersect again from the last hit. */
-    const float last_hit_t = INTEGRATOR_STATE_ARRAY(shadow_isect, num_recorded_hits - 1, t);
-    const float3 ray_P = INTEGRATOR_STATE(shadow_ray, P);
-    const float3 ray_D = INTEGRATOR_STATE(shadow_ray, D);
-    INTEGRATOR_STATE_WRITE(shadow_ray, P) = ray_offset(ray_P + last_hit_t * ray_D, ray_D);
-    INTEGRATOR_STATE_WRITE(shadow_ray, t) -= last_hit_t;
+    const float last_hit_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, num_recorded_hits - 1, t);
+    const float3 ray_P = INTEGRATOR_STATE(state, shadow_ray, P);
+    const float3 ray_D = INTEGRATOR_STATE(state, shadow_ray, D);
+    INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray_offset(ray_P + last_hit_t * ray_D, ray_D);
+    INTEGRATOR_STATE_WRITE(state, shadow_ray, t) -= last_hit_t;
   }
 
   return false;
 }
 #endif /* __TRANSPARENT_SHADOWS__ */
 
-ccl_device void integrator_shade_shadow(INTEGRATOR_STATE_ARGS,
+ccl_device void integrator_shade_shadow(KernelGlobals kg,
+                                        IntegratorState state,
                                         ccl_global float *ccl_restrict render_buffer)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SETUP);
-  const int num_hits = INTEGRATOR_STATE(shadow_path, num_hits);
+  const int num_hits = INTEGRATOR_STATE(state, shadow_path, num_hits);
 
 #ifdef __TRANSPARENT_SHADOWS__
   /* Evaluate transparent shadows. */
-  const bool opaque = integrate_transparent_shadow(INTEGRATOR_STATE_PASS, num_hits);
+  const bool opaque = integrate_transparent_shadow(kg, state, num_hits);
   if (opaque) {
     INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
     return;
@@ -174,7 +180,7 @@ ccl_device void integrator_shade_shadow(INTEGRATOR_STATE_ARGS,
     return;
   }
   else {
-    kernel_accum_light(INTEGRATOR_STATE_PASS, render_buffer);
+    kernel_accum_light(kg, state, render_buffer);
     INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
     return;
   }
diff --git a/intern/cycles/kernel/integrator/integrator_shade_surface.h b/intern/cycles/kernel/integrator/integrator_shade_surface.h
index 0d739517592..bc97fde0e4a 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_surface.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_surface.h
@@ -28,33 +28,35 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_forceinline void integrate_surface_shader_setup(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_forceinline void integrate_surface_shader_setup(KernelGlobals kg,
+                                                           ConstIntegratorState state,
                                                            ccl_private ShaderData *sd)
 {
   Intersection isect ccl_optional_struct_init;
-  integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect);
+  integrator_state_read_isect(kg, state, &isect);
 
   Ray ray ccl_optional_struct_init;
-  integrator_state_read_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_read_ray(kg, state, &ray);
 
   shader_setup_from_ray(kg, sd, &ray, &isect);
 }
 
 #ifdef __HOLDOUT__
-ccl_device_forceinline bool integrate_surface_holdout(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg,
+                                                      ConstIntegratorState state,
                                                       ccl_private ShaderData *sd,
                                                       ccl_global float *ccl_restrict render_buffer)
 {
   /* Write holdout transparency to render buffer and stop if fully holdout. */
-  const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
 
   if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
       (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
     const float3 holdout_weight = shader_holdout_apply(kg, sd);
     if (kernel_data.background.transparent) {
-      const float3 throughput = INTEGRATOR_STATE(path, throughput);
+      const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
       const float transparent = average(holdout_weight * throughput);
-      kernel_accum_transparent(INTEGRATOR_STATE_PASS, transparent, render_buffer);
+      kernel_accum_transparent(kg, state, transparent, render_buffer);
     }
     if (isequal_float3(holdout_weight, one_float3())) {
       return false;
@@ -66,12 +68,13 @@ ccl_device_forceinline bool integrate_surface_holdout(INTEGRATOR_STATE_CONST_ARG
 #endif /* __HOLDOUT__ */
 
 #ifdef __EMISSION__
-ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
+                                                       ConstIntegratorState state,
                                                        ccl_private const ShaderData *sd,
                                                        ccl_global float *ccl_restrict
                                                            render_buffer)
 {
-  const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
 
   /* Evaluate emissive closure. */
   float3 L = shader_emissive_eval(sd);
@@ -83,8 +86,8 @@ ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_AR
   if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS))
 #  endif
   {
-    const float bsdf_pdf = INTEGRATOR_STATE(path, mis_ray_pdf);
-    const float t = sd->ray_length + INTEGRATOR_STATE(path, mis_ray_t);
+    const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
+    const float t = sd->ray_length + INTEGRATOR_STATE(state, path, mis_ray_t);
 
     /* Multiple importance sampling, get triangle light pdf,
      * and compute weight with respect to BSDF pdf. */
@@ -94,15 +97,16 @@ ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_AR
     L *= mis_weight;
   }
 
-  const float3 throughput = INTEGRATOR_STATE(path, throughput);
-  kernel_accum_emission(INTEGRATOR_STATE_PASS, throughput, L, render_buffer);
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+  kernel_accum_emission(kg, state, throughput, L, render_buffer);
 }
 #endif /* __EMISSION__ */
 
 #ifdef __EMISSION__
 /* Path tracing: sample point on light and evaluate light shader, then
  * queue shadow ray to be traced. */
-ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS,
+ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
+                                                           IntegratorState state,
                                                            ccl_private ShaderData *sd,
                                                            ccl_private const RNGState *rng_state)
 {
@@ -114,8 +118,8 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS
   /* Sample position on a light. */
   LightSample ls ccl_optional_struct_init;
   {
-    const int path_flag = INTEGRATOR_STATE(path, flag);
-    const uint bounce = INTEGRATOR_STATE(path, bounce);
+    const int path_flag = INTEGRATOR_STATE(state, path, flag);
+    const uint bounce = INTEGRATOR_STATE(state, path, bounce);
     float light_u, light_v;
     path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
 
@@ -135,8 +139,7 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS
    * non-constant light sources. */
   ShaderDataTinyStorage emission_sd_storage;
   ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-  const float3 light_eval = light_sample_shader_eval(
-      INTEGRATOR_STATE_PASS, emission_sd, &ls, sd->time);
+  const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time);
   if (is_zero(light_eval)) {
     return;
   }
@@ -165,39 +168,39 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS
   const bool is_light = light_sample_is_light(&ls);
 
   /* Copy volume stack and enter/exit volume. */
-  integrator_state_copy_volume_stack_to_shadow(INTEGRATOR_STATE_PASS);
+  integrator_state_copy_volume_stack_to_shadow(kg, state);
 
   if (is_transmission) {
 #  ifdef __VOLUME__
-    shadow_volume_stack_enter_exit(INTEGRATOR_STATE_PASS, sd);
+    shadow_volume_stack_enter_exit(kg, state, sd);
 #  endif
   }
 
   /* Write shadow ray and associated state to global memory. */
-  integrator_state_write_shadow_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_write_shadow_ray(kg, state, &ray);
 
   /* Copy state from main path to shadow path. */
-  const uint16_t bounce = INTEGRATOR_STATE(path, bounce);
-  const uint16_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce);
-  uint32_t shadow_flag = INTEGRATOR_STATE(path, flag);
+  const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce);
+  const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
+  uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
   shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
   shadow_flag |= (is_transmission) ? PATH_RAY_TRANSMISSION_PASS : PATH_RAY_REFLECT_PASS;
-  const float3 throughput = INTEGRATOR_STATE(path, throughput) * bsdf_eval_sum(&bsdf_eval);
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * bsdf_eval_sum(&bsdf_eval);
 
   if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
     const float3 diffuse_glossy_ratio = (bounce == 0) ?
                                             bsdf_eval_diffuse_glossy_ratio(&bsdf_eval) :
-                                            INTEGRATOR_STATE(path, diffuse_glossy_ratio);
-    INTEGRATOR_STATE_WRITE(shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
+                                            INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
+    INTEGRATOR_STATE_WRITE(state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
   }
 
-  INTEGRATOR_STATE_WRITE(shadow_path, flag) = shadow_flag;
-  INTEGRATOR_STATE_WRITE(shadow_path, bounce) = bounce;
-  INTEGRATOR_STATE_WRITE(shadow_path, transparent_bounce) = transparent_bounce;
-  INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, flag) = shadow_flag;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, bounce) = bounce;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, transparent_bounce) = transparent_bounce;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput;
 
   if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) {
-    INTEGRATOR_STATE_WRITE(shadow_path, unshadowed_throughput) = throughput;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, unshadowed_throughput) = throughput;
   }
 
   /* Branch off shadow kernel. */
@@ -207,7 +210,10 @@ ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS
 
 /* Path tracing: bounce off or through surface with new direction. */
 ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
-    INTEGRATOR_STATE_ARGS, ccl_private ShaderData *sd, ccl_private const RNGState *rng_state)
+    KernelGlobals kg,
+    IntegratorState state,
+    ccl_private ShaderData *sd,
+    ccl_private const RNGState *rng_state)
 {
   /* Sample BSDF or BSSRDF. */
   if (!(sd->flag & (SD_BSDF | SD_BSSRDF))) {
@@ -221,7 +227,7 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
 #ifdef __SUBSURFACE__
   /* BSSRDF closure, we schedule subsurface intersection kernel. */
   if (CLOSURE_IS_BSSRDF(sc->type)) {
-    return subsurface_bounce(INTEGRATOR_STATE_PASS, sd, sc);
+    return subsurface_bounce(kg, state, sd, sc);
   }
 #endif
 
@@ -240,63 +246,64 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
   }
 
   /* Setup ray. Note that clipping works through transparent bounces. */
-  INTEGRATOR_STATE_WRITE(ray, P) = ray_offset(sd->P, (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng);
-  INTEGRATOR_STATE_WRITE(ray, D) = normalize(bsdf_omega_in);
-  INTEGRATOR_STATE_WRITE(ray, t) = (label & LABEL_TRANSPARENT) ?
-                                       INTEGRATOR_STATE(ray, t) - sd->ray_length :
-                                       FLT_MAX;
+  INTEGRATOR_STATE_WRITE(state, ray, P) = ray_offset(sd->P,
+                                                     (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng);
+  INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in);
+  INTEGRATOR_STATE_WRITE(state, ray, t) = (label & LABEL_TRANSPARENT) ?
+                                              INTEGRATOR_STATE(state, ray, t) - sd->ray_length :
+                                              FLT_MAX;
 
 #ifdef __RAY_DIFFERENTIALS__
-  INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP);
-  INTEGRATOR_STATE_WRITE(ray, dD) = differential_make_compact(bsdf_domega_in);
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
+  INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in);
 #endif
 
   /* Update throughput. */
-  float3 throughput = INTEGRATOR_STATE(path, throughput);
+  float3 throughput = INTEGRATOR_STATE(state, path, throughput);
   throughput *= bsdf_eval_sum(&bsdf_eval) / bsdf_pdf;
-  INTEGRATOR_STATE_WRITE(path, throughput) = throughput;
+  INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput;
 
   if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
-    if (INTEGRATOR_STATE(path, bounce) == 0) {
-      INTEGRATOR_STATE_WRITE(path,
-                             diffuse_glossy_ratio) = bsdf_eval_diffuse_glossy_ratio(&bsdf_eval);
+    if (INTEGRATOR_STATE(state, path, bounce) == 0) {
+      INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = bsdf_eval_diffuse_glossy_ratio(
+          &bsdf_eval);
     }
   }
 
   /* Update path state */
   if (label & LABEL_TRANSPARENT) {
-    INTEGRATOR_STATE_WRITE(path, mis_ray_t) += sd->ray_length;
+    INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
   }
   else {
-    INTEGRATOR_STATE_WRITE(path, mis_ray_pdf) = bsdf_pdf;
-    INTEGRATOR_STATE_WRITE(path, mis_ray_t) = 0.0f;
-    INTEGRATOR_STATE_WRITE(path, min_ray_pdf) = fminf(bsdf_pdf,
-                                                      INTEGRATOR_STATE(path, min_ray_pdf));
+    INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = bsdf_pdf;
+    INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
+    INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
+        bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
   }
 
-  path_state_next(INTEGRATOR_STATE_PASS, label);
+  path_state_next(kg, state, label);
   return label;
 }
 
 #ifdef __VOLUME__
-ccl_device_forceinline bool integrate_surface_volume_only_bounce(INTEGRATOR_STATE_ARGS,
+ccl_device_forceinline bool integrate_surface_volume_only_bounce(IntegratorState state,
                                                                  ccl_private ShaderData *sd)
 {
-  if (!path_state_volume_next(INTEGRATOR_STATE_PASS)) {
+  if (!path_state_volume_next(state)) {
     return LABEL_NONE;
   }
 
   /* Setup ray position, direction stays unchanged. */
-  INTEGRATOR_STATE_WRITE(ray, P) = ray_offset(sd->P, -sd->Ng);
+  INTEGRATOR_STATE_WRITE(state, ray, P) = ray_offset(sd->P, -sd->Ng);
 
   /* Clipping works through transparent. */
-  INTEGRATOR_STATE_WRITE(ray, t) -= sd->ray_length;
+  INTEGRATOR_STATE_WRITE(state, ray, t) -= sd->ray_length;
 
 #  ifdef __RAY_DIFFERENTIALS__
-  INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP);
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
 #  endif
 
-  INTEGRATOR_STATE_WRITE(path, mis_ray_t) += sd->ray_length;
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
 
   return LABEL_TRANSMIT | LABEL_TRANSPARENT;
 }
@@ -304,17 +311,19 @@ ccl_device_forceinline bool integrate_surface_volume_only_bounce(INTEGRATOR_STAT
 
 #if defined(__AO__) && defined(__SHADER_RAYTRACE__)
 ccl_device_forceinline void integrate_surface_ao_pass(
-    INTEGRATOR_STATE_CONST_ARGS,
+    KernelGlobals kg,
+    ConstIntegratorState state,
     ccl_private const ShaderData *ccl_restrict sd,
     ccl_private const RNGState *ccl_restrict rng_state,
     ccl_global float *ccl_restrict render_buffer)
 {
 #  ifdef __KERNEL_OPTIX__
-  optixDirectCall<void>(2, INTEGRATOR_STATE_PASS, sd, rng_state, render_buffer);
+  optixDirectCall<void>(2, kg, state, sd, rng_state, render_buffer);
 }
 
 extern "C" __device__ void __direct_callable__ao_pass(
-    INTEGRATOR_STATE_CONST_ARGS,
+    KernelGlobals kg,
+    ConstIntegratorState state,
     ccl_private const ShaderData *ccl_restrict sd,
     ccl_private const RNGState *ccl_restrict rng_state,
     ccl_global float *ccl_restrict render_buffer)
@@ -339,9 +348,8 @@ extern "C" __device__ void __direct_callable__ao_pass(
 
     Intersection isect ccl_optional_struct_init;
     if (!scene_intersect(kg, &ray, PATH_RAY_SHADOW_OPAQUE, &isect)) {
-      ccl_global float *buffer = kernel_pass_pixel_render_buffer(INTEGRATOR_STATE_PASS,
-                                                                 render_buffer);
-      const float3 throughput = INTEGRATOR_STATE(path, throughput);
+      ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
+      const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
       kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, throughput);
     }
   }
@@ -349,7 +357,8 @@ extern "C" __device__ void __direct_callable__ao_pass(
 #endif /* defined(__AO__) && defined(__SHADER_RAYTRACE__) */
 
 template<uint node_feature_mask>
-ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
+ccl_device bool integrate_surface(KernelGlobals kg,
+                                  IntegratorState state,
                                   ccl_global float *ccl_restrict render_buffer)
 
 {
@@ -357,7 +366,7 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
 
   /* Setup shader data. */
   ShaderData sd;
-  integrate_surface_shader_setup(INTEGRATOR_STATE_PASS, &sd);
+  integrate_surface_shader_setup(kg, state, &sd);
   PROFILING_SHADER(sd.object, sd.shader);
 
   int continue_path_label = 0;
@@ -366,7 +375,7 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
 #ifdef __VOLUME__
   if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
 #endif
-    const int path_flag = INTEGRATOR_STATE(path, flag);
+    const int path_flag = INTEGRATOR_STATE(state, path, flag);
 
 #ifdef __SUBSURFACE__
     /* Can skip shader evaluation for BSSRDF exit point without bump mapping. */
@@ -375,23 +384,23 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
     {
       /* Evaluate shader. */
       PROFILING_EVENT(PROFILING_SHADE_SURFACE_EVAL);
-      shader_eval_surface<node_feature_mask>(INTEGRATOR_STATE_PASS, &sd, render_buffer, path_flag);
+      shader_eval_surface<node_feature_mask>(kg, state, &sd, render_buffer, path_flag);
     }
 
 #ifdef __SUBSURFACE__
     if (path_flag & PATH_RAY_SUBSURFACE) {
       /* When coming from inside subsurface scattering, setup a diffuse
        * closure to perform lighting at the exit point. */
-      subsurface_shader_data_setup(INTEGRATOR_STATE_PASS, &sd, path_flag);
-      INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_SUBSURFACE;
+      subsurface_shader_data_setup(kg, state, &sd, path_flag);
+      INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_SUBSURFACE;
     }
 #endif
 
-    shader_prepare_surface_closures(INTEGRATOR_STATE_PASS, &sd);
+    shader_prepare_surface_closures(kg, state, &sd);
 
 #ifdef __HOLDOUT__
     /* Evaluate holdout. */
-    if (!integrate_surface_holdout(INTEGRATOR_STATE_PASS, &sd, render_buffer)) {
+    if (!integrate_surface_holdout(kg, state, &sd, render_buffer)) {
       return false;
     }
 #endif
@@ -399,19 +408,19 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
 #ifdef __EMISSION__
     /* Write emission. */
     if (sd.flag & SD_EMISSION) {
-      integrate_surface_emission(INTEGRATOR_STATE_PASS, &sd, render_buffer);
+      integrate_surface_emission(kg, state, &sd, render_buffer);
     }
 #endif
 
 #ifdef __PASSES__
     /* Write render passes. */
     PROFILING_EVENT(PROFILING_SHADE_SURFACE_PASSES);
-    kernel_write_data_passes(INTEGRATOR_STATE_PASS, &sd, render_buffer);
+    kernel_write_data_passes(kg, state, &sd, render_buffer);
 #endif
 
     /* Load random number state. */
     RNGState rng_state;
-    path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+    path_state_rng_load(state, &rng_state);
 
     /* Perform path termination. Most paths have already been terminated in
      * the intersect_closest kernel, this is just for emission and for dividing
@@ -421,52 +430,50 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
     if (!(path_flag & PATH_RAY_SUBSURFACE)) {
       const float probability = (path_flag & PATH_RAY_TERMINATE_ON_NEXT_SURFACE) ?
                                     0.0f :
-                                    path_state_continuation_probability(INTEGRATOR_STATE_PASS,
-                                                                        path_flag);
+                                    path_state_continuation_probability(kg, state, path_flag);
       if (probability == 0.0f) {
         return false;
       }
       else if (probability != 1.0f) {
-        INTEGRATOR_STATE_WRITE(path, throughput) /= probability;
+        INTEGRATOR_STATE_WRITE(state, path, throughput) /= probability;
       }
     }
 
 #ifdef __DENOISING_FEATURES__
-    kernel_write_denoising_features_surface(INTEGRATOR_STATE_PASS, &sd, render_buffer);
+    kernel_write_denoising_features_surface(kg, state, &sd, render_buffer);
 #endif
 
 #ifdef __SHADOW_CATCHER__
-    kernel_write_shadow_catcher_bounce_data(INTEGRATOR_STATE_PASS, &sd, render_buffer);
+    kernel_write_shadow_catcher_bounce_data(kg, state, &sd, render_buffer);
 #endif
 
     /* Direct light. */
     PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT);
-    integrate_surface_direct_light(INTEGRATOR_STATE_PASS, &sd, &rng_state);
+    integrate_surface_direct_light(kg, state, &sd, &rng_state);
 
 #if defined(__AO__) && defined(__SHADER_RAYTRACE__)
     /* Ambient occlusion pass. */
     if (node_feature_mask & KERNEL_FEATURE_NODE_RAYTRACE) {
       if ((kernel_data.film.pass_ao != PASS_UNUSED) &&
-          (INTEGRATOR_STATE(path, flag) & PATH_RAY_CAMERA)) {
+          (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_CAMERA)) {
         PROFILING_EVENT(PROFILING_SHADE_SURFACE_AO);
-        integrate_surface_ao_pass(INTEGRATOR_STATE_PASS, &sd, &rng_state, render_buffer);
+        integrate_surface_ao_pass(kg, state, &sd, &rng_state, render_buffer);
       }
     }
 #endif
 
     PROFILING_EVENT(PROFILING_SHADE_SURFACE_INDIRECT_LIGHT);
-    continue_path_label = integrate_surface_bsdf_bssrdf_bounce(
-        INTEGRATOR_STATE_PASS, &sd, &rng_state);
+    continue_path_label = integrate_surface_bsdf_bssrdf_bounce(kg, state, &sd, &rng_state);
 #ifdef __VOLUME__
   }
   else {
     PROFILING_EVENT(PROFILING_SHADE_SURFACE_INDIRECT_LIGHT);
-    continue_path_label = integrate_surface_volume_only_bounce(INTEGRATOR_STATE_PASS, &sd);
+    continue_path_label = integrate_surface_volume_only_bounce(state, &sd);
   }
 
   if (continue_path_label & LABEL_TRANSMIT) {
     /* Enter/Exit volume. */
-    volume_stack_enter_exit(INTEGRATOR_STATE_PASS, &sd);
+    volume_stack_enter_exit(kg, state, &sd);
   }
 #endif
 
@@ -475,15 +482,16 @@ ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
 
 template<uint node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE,
          int current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
-ccl_device_forceinline void integrator_shade_surface(INTEGRATOR_STATE_ARGS,
+ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg,
+                                                     IntegratorState state,
                                                      ccl_global float *ccl_restrict render_buffer)
 {
-  if (integrate_surface<node_feature_mask>(INTEGRATOR_STATE_PASS, render_buffer)) {
-    if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SUBSURFACE) {
+  if (integrate_surface<node_feature_mask>(kg, state, render_buffer)) {
+    if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE) {
       INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
     }
     else {
-      kernel_assert(INTEGRATOR_STATE(ray, t) != 0.0f);
+      kernel_assert(INTEGRATOR_STATE(state, ray, t) != 0.0f);
       INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
     }
   }
@@ -493,11 +501,11 @@ ccl_device_forceinline void integrator_shade_surface(INTEGRATOR_STATE_ARGS,
 }
 
 ccl_device_forceinline void integrator_shade_surface_raytrace(
-    INTEGRATOR_STATE_ARGS, ccl_global float *ccl_restrict render_buffer)
+    KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
 {
   integrator_shade_surface<KERNEL_FEATURE_NODE_MASK_SURFACE,
-                           DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE>(INTEGRATOR_STATE_PASS,
-                                                                            render_buffer);
+                           DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE>(
+      kg, state, render_buffer);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h
index 72c609751f7..e465a993041 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_volume.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h
@@ -70,12 +70,13 @@ typedef struct VolumeShaderCoefficients {
 } VolumeShaderCoefficients;
 
 /* Evaluate shader to get extinction coefficient at P. */
-ccl_device_inline bool shadow_volume_shader_sample(INTEGRATOR_STATE_ARGS,
+ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg,
+                                                   IntegratorState state,
                                                    ccl_private ShaderData *ccl_restrict sd,
                                                    ccl_private float3 *ccl_restrict extinction)
 {
-  shader_eval_volume<true>(INTEGRATOR_STATE_PASS, sd, PATH_RAY_SHADOW, [=](const int i) {
-    return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i);
+  shader_eval_volume<true>(kg, state, sd, PATH_RAY_SHADOW, [=](const int i) {
+    return integrator_state_read_shadow_volume_stack(state, i);
   });
 
   if (!(sd->flag & SD_EXTINCTION)) {
@@ -88,13 +89,14 @@ ccl_device_inline bool shadow_volume_shader_sample(INTEGRATOR_STATE_ARGS,
 }
 
 /* Evaluate shader to get absorption, scattering and emission at P. */
-ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS,
+ccl_device_inline bool volume_shader_sample(KernelGlobals kg,
+                                            IntegratorState state,
                                             ccl_private ShaderData *ccl_restrict sd,
                                             ccl_private VolumeShaderCoefficients *coeff)
 {
-  const int path_flag = INTEGRATOR_STATE(path, flag);
-  shader_eval_volume<false>(INTEGRATOR_STATE_PASS, sd, path_flag, [=](const int i) {
-    return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
+  const int path_flag = INTEGRATOR_STATE(state, path, flag);
+  shader_eval_volume<false>(kg, state, sd, path_flag, [=](const int i) {
+    return integrator_state_read_volume_stack(state, i);
   });
 
   if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) {
@@ -123,7 +125,7 @@ ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS,
   return true;
 }
 
-ccl_device_forceinline void volume_step_init(ccl_global const KernelGlobals *kg,
+ccl_device_forceinline void volume_step_init(KernelGlobals kg,
                                              ccl_private const RNGState *rng_state,
                                              const float object_step_size,
                                              float t,
@@ -169,14 +171,14 @@ ccl_device_forceinline void volume_step_init(ccl_global const KernelGlobals *kg,
 #  if 0
 /* homogeneous volume: assume shader evaluation at the starts gives
  * the extinction coefficient for the entire line segment */
-ccl_device void volume_shadow_homogeneous(INTEGRATOR_STATE_ARGS,
+ccl_device void volume_shadow_homogeneous(KernelGlobals kg, IntegratorState state,
                                           ccl_private Ray *ccl_restrict ray,
                                           ccl_private ShaderData *ccl_restrict sd,
                                           ccl_global float3 *ccl_restrict throughput)
 {
   float3 sigma_t = zero_float3();
 
-  if (shadow_volume_shader_sample(INTEGRATOR_STATE_PASS, sd, &sigma_t)) {
+  if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) {
     *throughput *= volume_color_transmittance(sigma_t, ray->t);
   }
 }
@@ -184,7 +186,8 @@ ccl_device void volume_shadow_homogeneous(INTEGRATOR_STATE_ARGS,
 
 /* heterogeneous volume: integrate stepping through the volume until we
  * reach the end, get absorbed entirely, or run out of iterations */
-ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS,
+ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
+                                            IntegratorState state,
                                             ccl_private Ray *ccl_restrict ray,
                                             ccl_private ShaderData *ccl_restrict sd,
                                             ccl_private float3 *ccl_restrict throughput,
@@ -192,7 +195,7 @@ ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS,
 {
   /* Load random number state. */
   RNGState rng_state;
-  shadow_path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+  shadow_path_state_rng_load(state, &rng_state);
 
   float3 tp = *throughput;
 
@@ -227,7 +230,7 @@ ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS,
 
     /* compute attenuation over segment */
     sd->P = new_P;
-    if (shadow_volume_shader_sample(INTEGRATOR_STATE_PASS, sd, &sigma_t)) {
+    if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) {
       /* Compute `expf()` only for every Nth step, to save some calculations
        * because `exp(a)*exp(b) = exp(a+b)`, also do a quick #VOLUME_THROUGHPUT_EPSILON
        * check then. */
@@ -510,7 +513,8 @@ ccl_device_forceinline void volume_integrate_step_scattering(
  * iterations. this does probabilistically scatter or get transmitted through
  * for path tracing where we don't want to branch. */
 ccl_device_forceinline void volume_integrate_heterogeneous(
-    INTEGRATOR_STATE_ARGS,
+    KernelGlobals kg,
+    IntegratorState state,
     ccl_private Ray *ccl_restrict ray,
     ccl_private ShaderData *ccl_restrict sd,
     ccl_private const RNGState *rng_state,
@@ -560,7 +564,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
   vstate.distance_pdf = 1.0f;
 
   /* Initialize volume integration result. */
-  const float3 throughput = INTEGRATOR_STATE(path, throughput);
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
   result.direct_throughput = throughput;
   result.indirect_throughput = throughput;
 
@@ -571,7 +575,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
   }
 
 #  ifdef __DENOISING_FEATURES__
-  const bool write_denoising_features = (INTEGRATOR_STATE(path, flag) &
+  const bool write_denoising_features = (INTEGRATOR_STATE(state, path, flag) &
                                          PATH_RAY_DENOISING_FEATURES);
   float3 accum_albedo = zero_float3();
 #  endif
@@ -585,7 +589,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
 
     /* compute segment */
     VolumeShaderCoefficients coeff ccl_optional_struct_init;
-    if (volume_shader_sample(INTEGRATOR_STATE_PASS, sd, &coeff)) {
+    if (volume_shader_sample(kg, state, sd, &coeff)) {
       const int closure_flag = sd->flag;
 
       /* Evaluate transmittance over segment. */
@@ -654,15 +658,14 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
 
   /* Write accumulated emission. */
   if (!is_zero(accum_emission)) {
-    kernel_accum_emission(
-        INTEGRATOR_STATE_PASS, result.indirect_throughput, accum_emission, render_buffer);
+    kernel_accum_emission(kg, state, result.indirect_throughput, accum_emission, render_buffer);
   }
 
 #  ifdef __DENOISING_FEATURES__
   /* Write denoising features. */
   if (write_denoising_features) {
     kernel_write_denoising_features_volume(
-        INTEGRATOR_STATE_PASS, accum_albedo, result.indirect_scatter, render_buffer);
+        kg, state, accum_albedo, result.indirect_scatter, render_buffer);
   }
 #  endif /* __DENOISING_FEATURES__ */
 }
@@ -671,7 +674,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
 /* Path tracing: sample point on light and evaluate light shader, then
  * queue shadow ray to be traced. */
 ccl_device_forceinline bool integrate_volume_sample_light(
-    INTEGRATOR_STATE_ARGS,
+    KernelGlobals kg,
+    IntegratorState state,
     ccl_private const ShaderData *ccl_restrict sd,
     ccl_private const RNGState *ccl_restrict rng_state,
     ccl_private LightSample *ccl_restrict ls)
@@ -682,8 +686,8 @@ ccl_device_forceinline bool integrate_volume_sample_light(
   }
 
   /* Sample position on a light. */
-  const int path_flag = INTEGRATOR_STATE(path, flag);
-  const uint bounce = INTEGRATOR_STATE(path, bounce);
+  const int path_flag = INTEGRATOR_STATE(state, path, flag);
+  const uint bounce = INTEGRATOR_STATE(state, path, bounce);
   float light_u, light_v;
   path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
 
@@ -700,7 +704,8 @@ ccl_device_forceinline bool integrate_volume_sample_light(
 /* Path tracing: sample point on light and evaluate light shader, then
  * queue shadow ray to be traced. */
 ccl_device_forceinline void integrate_volume_direct_light(
-    INTEGRATOR_STATE_ARGS,
+    KernelGlobals kg,
+    IntegratorState state,
     ccl_private const ShaderData *ccl_restrict sd,
     ccl_private const RNGState *ccl_restrict rng_state,
     const float3 P,
@@ -720,8 +725,8 @@ ccl_device_forceinline void integrate_volume_direct_light(
    * TODO: decorrelate random numbers and use light_sample_new_position to
    * avoid resampling the CDF. */
   {
-    const int path_flag = INTEGRATOR_STATE(path, flag);
-    const uint bounce = INTEGRATOR_STATE(path, bounce);
+    const int path_flag = INTEGRATOR_STATE(state, path, flag);
+    const uint bounce = INTEGRATOR_STATE(state, path, bounce);
     float light_u, light_v;
     path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
 
@@ -743,8 +748,7 @@ ccl_device_forceinline void integrate_volume_direct_light(
    * non-constant light sources. */
   ShaderDataTinyStorage emission_sd_storage;
   ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-  const float3 light_eval = light_sample_shader_eval(
-      INTEGRATOR_STATE_PASS, emission_sd, ls, sd->time);
+  const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time);
   if (is_zero(light_eval)) {
     return;
   }
@@ -772,12 +776,12 @@ ccl_device_forceinline void integrate_volume_direct_light(
   const bool is_light = light_sample_is_light(ls);
 
   /* Write shadow ray and associated state to global memory. */
-  integrator_state_write_shadow_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_write_shadow_ray(kg, state, &ray);
 
   /* Copy state from main path to shadow path. */
-  const uint16_t bounce = INTEGRATOR_STATE(path, bounce);
-  const uint16_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce);
-  uint32_t shadow_flag = INTEGRATOR_STATE(path, flag);
+  const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce);
+  const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
+  uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
   shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
   shadow_flag |= PATH_RAY_VOLUME_PASS;
   const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval);
@@ -785,20 +789,20 @@ ccl_device_forceinline void integrate_volume_direct_light(
   if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
     const float3 diffuse_glossy_ratio = (bounce == 0) ?
                                             one_float3() :
-                                            INTEGRATOR_STATE(path, diffuse_glossy_ratio);
-    INTEGRATOR_STATE_WRITE(shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
+                                            INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
+    INTEGRATOR_STATE_WRITE(state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
   }
 
-  INTEGRATOR_STATE_WRITE(shadow_path, flag) = shadow_flag;
-  INTEGRATOR_STATE_WRITE(shadow_path, bounce) = bounce;
-  INTEGRATOR_STATE_WRITE(shadow_path, transparent_bounce) = transparent_bounce;
-  INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput_phase;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, flag) = shadow_flag;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, bounce) = bounce;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, transparent_bounce) = transparent_bounce;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput_phase;
 
   if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) {
-    INTEGRATOR_STATE_WRITE(shadow_path, unshadowed_throughput) = throughput;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, unshadowed_throughput) = throughput;
   }
 
-  integrator_state_copy_volume_stack_to_shadow(INTEGRATOR_STATE_PASS);
+  integrator_state_copy_volume_stack_to_shadow(kg, state);
 
   /* Branch off shadow kernel. */
   INTEGRATOR_SHADOW_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
@@ -807,7 +811,8 @@ ccl_device_forceinline void integrate_volume_direct_light(
 
 /* Path tracing: scatter in new direction using phase function */
 ccl_device_forceinline bool integrate_volume_phase_scatter(
-    INTEGRATOR_STATE_ARGS,
+    KernelGlobals kg,
+    IntegratorState state,
     ccl_private ShaderData *sd,
     ccl_private const RNGState *rng_state,
     ccl_private const ShaderVolumePhases *phases)
@@ -838,31 +843,31 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
   }
 
   /* Setup ray. */
-  INTEGRATOR_STATE_WRITE(ray, P) = sd->P;
-  INTEGRATOR_STATE_WRITE(ray, D) = normalize(phase_omega_in);
-  INTEGRATOR_STATE_WRITE(ray, t) = FLT_MAX;
+  INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
+  INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_omega_in);
+  INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
 
 #  ifdef __RAY_DIFFERENTIALS__
-  INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP);
-  INTEGRATOR_STATE_WRITE(ray, dD) = differential_make_compact(phase_domega_in);
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
+  INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in);
 #  endif
 
   /* Update throughput. */
-  const float3 throughput = INTEGRATOR_STATE(path, throughput);
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
   const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval) / phase_pdf;
-  INTEGRATOR_STATE_WRITE(path, throughput) = throughput_phase;
+  INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput_phase;
 
   if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
-    INTEGRATOR_STATE_WRITE(path, diffuse_glossy_ratio) = one_float3();
+    INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = one_float3();
   }
 
   /* Update path state */
-  INTEGRATOR_STATE_WRITE(path, mis_ray_pdf) = phase_pdf;
-  INTEGRATOR_STATE_WRITE(path, mis_ray_t) = 0.0f;
-  INTEGRATOR_STATE_WRITE(path, min_ray_pdf) = fminf(phase_pdf,
-                                                    INTEGRATOR_STATE(path, min_ray_pdf));
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf;
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
+  INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
+      phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
 
-  path_state_next(INTEGRATOR_STATE_PASS, label);
+  path_state_next(kg, state, label);
   return true;
 }
 
@@ -870,7 +875,8 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
  * ray, with the assumption that there are no surfaces blocking light
  * between the endpoints. distance sampling is used to decide if we will
  * scatter or not. */
-ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
+ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
+                                                 IntegratorState state,
                                                  ccl_private Ray *ccl_restrict ray,
                                                  ccl_global float *ccl_restrict render_buffer)
 {
@@ -879,29 +885,29 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
 
   /* Load random number state. */
   RNGState rng_state;
-  path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+  path_state_rng_load(state, &rng_state);
 
   /* Sample light ahead of volume stepping, for equiangular sampling. */
   /* TODO: distant lights are ignored now, but could instead use even distribution. */
   LightSample ls ccl_optional_struct_init;
-  const bool need_light_sample = !(INTEGRATOR_STATE(path, flag) & PATH_RAY_TERMINATE);
+  const bool need_light_sample = !(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_TERMINATE);
   const bool have_equiangular_sample = need_light_sample &&
                                        integrate_volume_sample_light(
-                                           INTEGRATOR_STATE_PASS, &sd, &rng_state, &ls) &&
+                                           kg, state, &sd, &rng_state, &ls) &&
                                        (ls.t != FLT_MAX);
 
   VolumeSampleMethod direct_sample_method = (have_equiangular_sample) ?
-                                                volume_stack_sample_method(INTEGRATOR_STATE_PASS) :
+                                                volume_stack_sample_method(kg, state) :
                                                 VOLUME_SAMPLE_DISTANCE;
 
   /* Step through volume. */
-  const float step_size = volume_stack_step_size(INTEGRATOR_STATE_PASS, [=](const int i) {
-    return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
-  });
+  const float step_size = volume_stack_step_size(
+      kg, state, [=](const int i) { return integrator_state_read_volume_stack(state, i); });
 
   /* TODO: expensive to zero closures? */
   VolumeIntegrateResult result = {};
-  volume_integrate_heterogeneous(INTEGRATOR_STATE_PASS,
+  volume_integrate_heterogeneous(kg,
+                                 state,
                                  ray,
                                  &sd,
                                  &rng_state,
@@ -914,11 +920,10 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
   /* Perform path termination. The intersect_closest will have already marked this path
    * to be terminated. That will shading evaluating to leave out any scattering closures,
    * but emission and absorption are still handled for multiple importance sampling. */
-  const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
   const float probability = (path_flag & PATH_RAY_TERMINATE_IN_NEXT_VOLUME) ?
                                 0.0f :
-                                path_state_continuation_probability(INTEGRATOR_STATE_PASS,
-                                                                    path_flag);
+                                path_state_continuation_probability(kg, state, path_flag);
   if (probability == 0.0f) {
     return VOLUME_PATH_MISSED;
   }
@@ -927,7 +932,8 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
   if (result.direct_scatter) {
     const float3 direct_P = ray->P + result.direct_t * ray->D;
     result.direct_throughput /= probability;
-    integrate_volume_direct_light(INTEGRATOR_STATE_PASS,
+    integrate_volume_direct_light(kg,
+                                  state,
                                   &sd,
                                   &rng_state,
                                   direct_P,
@@ -943,13 +949,12 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
   if (result.indirect_scatter) {
     result.indirect_throughput /= probability;
   }
-  INTEGRATOR_STATE_WRITE(path, throughput) = result.indirect_throughput;
+  INTEGRATOR_STATE_WRITE(state, path, throughput) = result.indirect_throughput;
 
   if (result.indirect_scatter) {
     sd.P = ray->P + result.indirect_t * ray->D;
 
-    if (integrate_volume_phase_scatter(
-            INTEGRATOR_STATE_PASS, &sd, &rng_state, &result.indirect_phases)) {
+    if (integrate_volume_phase_scatter(kg, state, &sd, &rng_state, &result.indirect_phases)) {
       return VOLUME_PATH_SCATTERED;
     }
     else {
@@ -963,7 +968,8 @@ ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
 
 #endif
 
-ccl_device void integrator_shade_volume(INTEGRATOR_STATE_ARGS,
+ccl_device void integrator_shade_volume(KernelGlobals kg,
+                                        IntegratorState state,
                                         ccl_global float *ccl_restrict render_buffer)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_SETUP);
@@ -971,20 +977,20 @@ ccl_device void integrator_shade_volume(INTEGRATOR_STATE_ARGS,
 #ifdef __VOLUME__
   /* Setup shader data. */
   Ray ray ccl_optional_struct_init;
-  integrator_state_read_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_read_ray(kg, state, &ray);
 
   Intersection isect ccl_optional_struct_init;
-  integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect);
+  integrator_state_read_isect(kg, state, &isect);
 
   /* Set ray length to current segment. */
   ray.t = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX;
 
   /* Clean volume stack for background rays. */
   if (isect.prim == PRIM_NONE) {
-    volume_stack_clean(INTEGRATOR_STATE_PASS);
+    volume_stack_clean(kg, state);
   }
 
-  VolumeIntegrateEvent event = volume_integrate(INTEGRATOR_STATE_PASS, &ray, render_buffer);
+  VolumeIntegrateEvent event = volume_integrate(kg, state, &ray, render_buffer);
 
   if (event == VOLUME_PATH_SCATTERED) {
     /* Queue intersect_closest kernel. */
@@ -1015,7 +1021,7 @@ ccl_device void integrator_shade_volume(INTEGRATOR_STATE_ARGS,
       const int flags = kernel_tex_fetch(__shaders, shader).flags;
 
       integrator_intersect_shader_next_kernel<DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME>(
-          INTEGRATOR_STATE_PASS, &isect, shader, flags);
+          kg, state, &isect, shader, flags);
       return;
     }
   }
diff --git a/intern/cycles/kernel/integrator/integrator_state.h b/intern/cycles/kernel/integrator/integrator_state.h
index 517e2891769..3aab456a021 100644
--- a/intern/cycles/kernel/integrator/integrator_state.h
+++ b/intern/cycles/kernel/integrator/integrator_state.h
@@ -27,24 +27,17 @@
  * to every kernel, or the pointer may exist at program scope or in constant memory. To abstract
  * these differences between devices and experiment with different layouts, macros are used.
  *
- * INTEGRATOR_STATE_ARGS: prepend to argument definitions for every function that accesses
- * path state.
- * INTEGRATOR_STATE_CONST_ARGS: same as INTEGRATOR_STATE_ARGS, when state is read-only
- * INTEGRATOR_STATE_PASS: use to pass along state to other functions access it.
+ * Use IntegratorState to pass a reference to the integrator state for the current path. These are
+ * defined differently on the CPU and GPU. Use ConstIntegratorState instead of const
+ * IntegratorState for passing state as read-only, to avoid oddities in typedef behavior.
  *
- * INTEGRATOR_STATE(x, y): read nested struct member x.y of IntegratorState
- * INTEGRATOR_STATE_WRITE(x, y): write to nested struct member x.y of IntegratorState
+ * INTEGRATOR_STATE(state, x, y): read nested struct member x.y of IntegratorState
+ * INTEGRATOR_STATE_WRITE(state, x, y): write to nested struct member x.y of IntegratorState
  *
- * INTEGRATOR_STATE_ARRAY(x, index, y): read x[index].y
- * INTEGRATOR_STATE_ARRAY_WRITE(x, index, y): write x[index].y
+ * INTEGRATOR_STATE_ARRAY(state, x, index, y): read x[index].y
+ * INTEGRATOR_STATE_ARRAY_WRITE(state, x, index, y): write x[index].y
  *
- * INTEGRATOR_STATE_COPY(to_x, from_x): copy contents of one nested struct to another
- *
- * INTEGRATOR_STATE_IS_NULL: test if any integrator state is available, for shader evaluation
- * INTEGRATOR_STATE_PASS_NULL: use to pass empty state to other functions.
- *
- * NOTE: if we end up with a device that passes no arguments, the leading comma will be a problem.
- * Can solve it with more macros if we encounter it, but rather ugly so postpone for now.
+ * INTEGRATOR_STATE_NULL: use to pass empty state to other functions.
  */
 
 #include "kernel/kernel_types.h"
@@ -146,50 +139,36 @@ typedef struct IntegratorStateGPU {
 /* Scalar access on CPU. */
 
 typedef IntegratorStateCPU *ccl_restrict IntegratorState;
+typedef const IntegratorStateCPU *ccl_restrict ConstIntegratorState;
 
-#  define INTEGRATOR_STATE_ARGS \
-    ccl_attr_maybe_unused const KernelGlobals *ccl_restrict kg, \
-        IntegratorStateCPU *ccl_restrict state
-#  define INTEGRATOR_STATE_CONST_ARGS \
-    ccl_attr_maybe_unused const KernelGlobals *ccl_restrict kg, \
-        const IntegratorStateCPU *ccl_restrict state
-#  define INTEGRATOR_STATE_PASS kg, state
-
-#  define INTEGRATOR_STATE_PASS_NULL kg, NULL
-#  define INTEGRATOR_STATE_IS_NULL (state == NULL)
+#  define INTEGRATOR_STATE_NULL nullptr
 
-#  define INTEGRATOR_STATE(nested_struct, member) \
-    (((const IntegratorStateCPU *)state)->nested_struct.member)
-#  define INTEGRATOR_STATE_WRITE(nested_struct, member) (state->nested_struct.member)
+#  define INTEGRATOR_STATE(state, nested_struct, member) ((state)->nested_struct.member)
+#  define INTEGRATOR_STATE_WRITE(state, nested_struct, member) ((state)->nested_struct.member)
 
-#  define INTEGRATOR_STATE_ARRAY(nested_struct, array_index, member) \
-    (((const IntegratorStateCPU *)state)->nested_struct[array_index].member)
-#  define INTEGRATOR_STATE_ARRAY_WRITE(nested_struct, array_index, member) \
+#  define INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) \
+    ((state)->nested_struct[array_index].member)
+#  define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
     ((state)->nested_struct[array_index].member)
 
 #else /* __KERNEL_CPU__ */
 
 /* Array access on GPU with Structure-of-Arrays. */
 
-typedef int IntegratorState;
-
-#  define INTEGRATOR_STATE_ARGS \
-    ccl_global const KernelGlobals *ccl_restrict kg, const IntegratorState state
-#  define INTEGRATOR_STATE_CONST_ARGS \
-    ccl_global const KernelGlobals *ccl_restrict kg, const IntegratorState state
-#  define INTEGRATOR_STATE_PASS kg, state
+typedef const int IntegratorState;
+typedef const int ConstIntegratorState;
 
-#  define INTEGRATOR_STATE_PASS_NULL kg, -1
-#  define INTEGRATOR_STATE_IS_NULL (state == -1)
+#  define INTEGRATOR_STATE_NULL -1
 
-#  define INTEGRATOR_STATE(nested_struct, member) \
+#  define INTEGRATOR_STATE(state, nested_struct, member) \
     kernel_integrator_state.nested_struct.member[state]
-#  define INTEGRATOR_STATE_WRITE(nested_struct, member) INTEGRATOR_STATE(nested_struct, member)
+#  define INTEGRATOR_STATE_WRITE(state, nested_struct, member) \
+    INTEGRATOR_STATE(state, nested_struct, member)
 
-#  define INTEGRATOR_STATE_ARRAY(nested_struct, array_index, member) \
+#  define INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) \
     kernel_integrator_state.nested_struct[array_index].member[state]
-#  define INTEGRATOR_STATE_ARRAY_WRITE(nested_struct, array_index, member) \
-    INTEGRATOR_STATE_ARRAY(nested_struct, array_index, member)
+#  define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
+    INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member)
 
 #endif /* __KERNEL_CPU__ */
 
diff --git a/intern/cycles/kernel/integrator/integrator_state_flow.h b/intern/cycles/kernel/integrator/integrator_state_flow.h
index 8477efd7b66..9829da875eb 100644
--- a/intern/cycles/kernel/integrator/integrator_state_flow.h
+++ b/intern/cycles/kernel/integrator/integrator_state_flow.h
@@ -42,48 +42,49 @@ CCL_NAMESPACE_BEGIN
  * one of them, and only once.
  */
 
-#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(path, queued_kernel) == 0)
-#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED (INTEGRATOR_STATE(shadow_path, queued_kernel) == 0)
+#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(state, path, queued_kernel) == 0)
+#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED \
+  (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0)
 
 #ifdef __KERNEL_GPU__
 
 #  define INTEGRATOR_PATH_INIT(next_kernel) \
     atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
                                 1); \
-    INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel;
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
 #  define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
     atomic_fetch_and_sub_uint32( \
         &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
     atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
                                 1); \
-    INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel;
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
 #  define INTEGRATOR_PATH_TERMINATE(current_kernel) \
     atomic_fetch_and_sub_uint32( \
         &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
-    INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0;
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
 
 #  define INTEGRATOR_SHADOW_PATH_INIT(next_kernel) \
     atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
                                 1); \
-    INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
 #  define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
     atomic_fetch_and_sub_uint32( \
         &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
     atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
                                 1); \
-    INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
 #  define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
     atomic_fetch_and_sub_uint32( \
         &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
-    INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
 
 #  define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
     { \
       const int key_ = key; \
       atomic_fetch_and_add_uint32( \
           &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
-      INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \
-      INTEGRATOR_STATE_WRITE(path, shader_sort_key) = key_; \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
+      INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
       atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
                                   1); \
     }
@@ -94,8 +95,8 @@ CCL_NAMESPACE_BEGIN
           &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
       atomic_fetch_and_add_uint32( \
           &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
-      INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \
-      INTEGRATOR_STATE_WRITE(path, shader_sort_key) = key_; \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
+      INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
       atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
                                   1); \
     }
@@ -103,39 +104,39 @@ CCL_NAMESPACE_BEGIN
 #else
 
 #  define INTEGRATOR_PATH_INIT(next_kernel) \
-    INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel;
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
 #  define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
     { \
-      INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
       (void)key; \
     }
 #  define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
     { \
-      INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
       (void)current_kernel; \
     }
 #  define INTEGRATOR_PATH_TERMINATE(current_kernel) \
     { \
-      INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0; \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; \
       (void)current_kernel; \
     }
 #  define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
     { \
-      INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
       (void)key; \
       (void)current_kernel; \
     }
 
 #  define INTEGRATOR_SHADOW_PATH_INIT(next_kernel) \
-    INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel;
+    INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
 #  define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
     { \
-      INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel; \
+      INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; \
       (void)current_kernel; \
     }
 #  define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
     { \
-      INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0; \
+      INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; \
       (void)current_kernel; \
     }
 
diff --git a/intern/cycles/kernel/integrator/integrator_state_util.h b/intern/cycles/kernel/integrator/integrator_state_util.h
index fddd9eb5ac8..fee59e451d9 100644
--- a/intern/cycles/kernel/integrator/integrator_state_util.h
+++ b/intern/cycles/kernel/integrator/integrator_state_util.h
@@ -23,145 +23,150 @@ CCL_NAMESPACE_BEGIN
 
 /* Ray */
 
-ccl_device_forceinline void integrator_state_write_ray(INTEGRATOR_STATE_ARGS,
+ccl_device_forceinline void integrator_state_write_ray(KernelGlobals kg,
+                                                       IntegratorState state,
                                                        ccl_private const Ray *ccl_restrict ray)
 {
-  INTEGRATOR_STATE_WRITE(ray, P) = ray->P;
-  INTEGRATOR_STATE_WRITE(ray, D) = ray->D;
-  INTEGRATOR_STATE_WRITE(ray, t) = ray->t;
-  INTEGRATOR_STATE_WRITE(ray, time) = ray->time;
-  INTEGRATOR_STATE_WRITE(ray, dP) = ray->dP;
-  INTEGRATOR_STATE_WRITE(ray, dD) = ray->dD;
+  INTEGRATOR_STATE_WRITE(state, ray, P) = ray->P;
+  INTEGRATOR_STATE_WRITE(state, ray, D) = ray->D;
+  INTEGRATOR_STATE_WRITE(state, ray, t) = ray->t;
+  INTEGRATOR_STATE_WRITE(state, ray, time) = ray->time;
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = ray->dP;
+  INTEGRATOR_STATE_WRITE(state, ray, dD) = ray->dD;
 }
 
-ccl_device_forceinline void integrator_state_read_ray(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_forceinline void integrator_state_read_ray(KernelGlobals kg,
+                                                      ConstIntegratorState state,
                                                       ccl_private Ray *ccl_restrict ray)
 {
-  ray->P = INTEGRATOR_STATE(ray, P);
-  ray->D = INTEGRATOR_STATE(ray, D);
-  ray->t = INTEGRATOR_STATE(ray, t);
-  ray->time = INTEGRATOR_STATE(ray, time);
-  ray->dP = INTEGRATOR_STATE(ray, dP);
-  ray->dD = INTEGRATOR_STATE(ray, dD);
+  ray->P = INTEGRATOR_STATE(state, ray, P);
+  ray->D = INTEGRATOR_STATE(state, ray, D);
+  ray->t = INTEGRATOR_STATE(state, ray, t);
+  ray->time = INTEGRATOR_STATE(state, ray, time);
+  ray->dP = INTEGRATOR_STATE(state, ray, dP);
+  ray->dD = INTEGRATOR_STATE(state, ray, dD);
 }
 
 /* Shadow Ray */
 
 ccl_device_forceinline void integrator_state_write_shadow_ray(
-    INTEGRATOR_STATE_ARGS, ccl_private const Ray *ccl_restrict ray)
+    KernelGlobals kg, IntegratorState state, ccl_private const Ray *ccl_restrict ray)
 {
-  INTEGRATOR_STATE_WRITE(shadow_ray, P) = ray->P;
-  INTEGRATOR_STATE_WRITE(shadow_ray, D) = ray->D;
-  INTEGRATOR_STATE_WRITE(shadow_ray, t) = ray->t;
-  INTEGRATOR_STATE_WRITE(shadow_ray, time) = ray->time;
-  INTEGRATOR_STATE_WRITE(shadow_ray, dP) = ray->dP;
+  INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray->P;
+  INTEGRATOR_STATE_WRITE(state, shadow_ray, D) = ray->D;
+  INTEGRATOR_STATE_WRITE(state, shadow_ray, t) = ray->t;
+  INTEGRATOR_STATE_WRITE(state, shadow_ray, time) = ray->time;
+  INTEGRATOR_STATE_WRITE(state, shadow_ray, dP) = ray->dP;
 }
 
-ccl_device_forceinline void integrator_state_read_shadow_ray(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_forceinline void integrator_state_read_shadow_ray(KernelGlobals kg,
+                                                             ConstIntegratorState state,
                                                              ccl_private Ray *ccl_restrict ray)
 {
-  ray->P = INTEGRATOR_STATE(shadow_ray, P);
-  ray->D = INTEGRATOR_STATE(shadow_ray, D);
-  ray->t = INTEGRATOR_STATE(shadow_ray, t);
-  ray->time = INTEGRATOR_STATE(shadow_ray, time);
-  ray->dP = INTEGRATOR_STATE(shadow_ray, dP);
+  ray->P = INTEGRATOR_STATE(state, shadow_ray, P);
+  ray->D = INTEGRATOR_STATE(state, shadow_ray, D);
+  ray->t = INTEGRATOR_STATE(state, shadow_ray, t);
+  ray->time = INTEGRATOR_STATE(state, shadow_ray, time);
+  ray->dP = INTEGRATOR_STATE(state, shadow_ray, dP);
   ray->dD = differential_zero_compact();
 }
 
 /* Intersection */
 
 ccl_device_forceinline void integrator_state_write_isect(
-    INTEGRATOR_STATE_ARGS, ccl_private const Intersection *ccl_restrict isect)
+    KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
 {
-  INTEGRATOR_STATE_WRITE(isect, t) = isect->t;
-  INTEGRATOR_STATE_WRITE(isect, u) = isect->u;
-  INTEGRATOR_STATE_WRITE(isect, v) = isect->v;
-  INTEGRATOR_STATE_WRITE(isect, object) = isect->object;
-  INTEGRATOR_STATE_WRITE(isect, prim) = isect->prim;
-  INTEGRATOR_STATE_WRITE(isect, type) = isect->type;
+  INTEGRATOR_STATE_WRITE(state, isect, t) = isect->t;
+  INTEGRATOR_STATE_WRITE(state, isect, u) = isect->u;
+  INTEGRATOR_STATE_WRITE(state, isect, v) = isect->v;
+  INTEGRATOR_STATE_WRITE(state, isect, object) = isect->object;
+  INTEGRATOR_STATE_WRITE(state, isect, prim) = isect->prim;
+  INTEGRATOR_STATE_WRITE(state, isect, type) = isect->type;
 #ifdef __EMBREE__
-  INTEGRATOR_STATE_WRITE(isect, Ng) = isect->Ng;
+  INTEGRATOR_STATE_WRITE(state, isect, Ng) = isect->Ng;
 #endif
 }
 
 ccl_device_forceinline void integrator_state_read_isect(
-    INTEGRATOR_STATE_CONST_ARGS, ccl_private Intersection *ccl_restrict isect)
+    KernelGlobals kg, ConstIntegratorState state, ccl_private Intersection *ccl_restrict isect)
 {
-  isect->prim = INTEGRATOR_STATE(isect, prim);
-  isect->object = INTEGRATOR_STATE(isect, object);
-  isect->type = INTEGRATOR_STATE(isect, type);
-  isect->u = INTEGRATOR_STATE(isect, u);
-  isect->v = INTEGRATOR_STATE(isect, v);
-  isect->t = INTEGRATOR_STATE(isect, t);
+  isect->prim = INTEGRATOR_STATE(state, isect, prim);
+  isect->object = INTEGRATOR_STATE(state, isect, object);
+  isect->type = INTEGRATOR_STATE(state, isect, type);
+  isect->u = INTEGRATOR_STATE(state, isect, u);
+  isect->v = INTEGRATOR_STATE(state, isect, v);
+  isect->t = INTEGRATOR_STATE(state, isect, t);
 #ifdef __EMBREE__
-  isect->Ng = INTEGRATOR_STATE(isect, Ng);
+  isect->Ng = INTEGRATOR_STATE(state, isect, Ng);
 #endif
 }
 
-ccl_device_forceinline VolumeStack integrator_state_read_volume_stack(INTEGRATOR_STATE_CONST_ARGS,
+ccl_device_forceinline VolumeStack integrator_state_read_volume_stack(ConstIntegratorState state,
                                                                       int i)
 {
-  VolumeStack entry = {INTEGRATOR_STATE_ARRAY(volume_stack, i, object),
-                       INTEGRATOR_STATE_ARRAY(volume_stack, i, shader)};
+  VolumeStack entry = {INTEGRATOR_STATE_ARRAY(state, volume_stack, i, object),
+                       INTEGRATOR_STATE_ARRAY(state, volume_stack, i, shader)};
   return entry;
 }
 
-ccl_device_forceinline void integrator_state_write_volume_stack(INTEGRATOR_STATE_ARGS,
+ccl_device_forceinline void integrator_state_write_volume_stack(IntegratorState state,
                                                                 int i,
                                                                 VolumeStack entry)
 {
-  INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, i, object) = entry.object;
-  INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, i, shader) = entry.shader;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, i, object) = entry.object;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, i, shader) = entry.shader;
 }
 
-ccl_device_forceinline bool integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_CONST_ARGS)
+ccl_device_forceinline bool integrator_state_volume_stack_is_empty(KernelGlobals kg,
+                                                                   ConstIntegratorState state)
 {
   return (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) ?
-             INTEGRATOR_STATE_ARRAY(volume_stack, 0, shader) == SHADER_NONE :
+             INTEGRATOR_STATE_ARRAY(state, volume_stack, 0, shader) == SHADER_NONE :
              true;
 }
 
 /* Shadow Intersection */
 
 ccl_device_forceinline void integrator_state_write_shadow_isect(
-    INTEGRATOR_STATE_ARGS, ccl_private const Intersection *ccl_restrict isect, const int index)
+    IntegratorState state, ccl_private const Intersection *ccl_restrict isect, const int index)
 {
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, t) = isect->t;
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, u) = isect->u;
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, v) = isect->v;
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, object) = isect->object;
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, prim) = isect->prim;
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, type) = isect->type;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, t) = isect->t;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, u) = isect->u;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, v) = isect->v;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, object) = isect->object;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, prim) = isect->prim;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, type) = isect->type;
 #ifdef __EMBREE__
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, Ng) = isect->Ng;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, Ng) = isect->Ng;
 #endif
 }
 
 ccl_device_forceinline void integrator_state_read_shadow_isect(
-    INTEGRATOR_STATE_CONST_ARGS, ccl_private Intersection *ccl_restrict isect, const int index)
+    ConstIntegratorState state, ccl_private Intersection *ccl_restrict isect, const int index)
 {
-  isect->prim = INTEGRATOR_STATE_ARRAY(shadow_isect, index, prim);
-  isect->object = INTEGRATOR_STATE_ARRAY(shadow_isect, index, object);
-  isect->type = INTEGRATOR_STATE_ARRAY(shadow_isect, index, type);
-  isect->u = INTEGRATOR_STATE_ARRAY(shadow_isect, index, u);
-  isect->v = INTEGRATOR_STATE_ARRAY(shadow_isect, index, v);
-  isect->t = INTEGRATOR_STATE_ARRAY(shadow_isect, index, t);
+  isect->prim = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, prim);
+  isect->object = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, object);
+  isect->type = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, type);
+  isect->u = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, u);
+  isect->v = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, v);
+  isect->t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, t);
 #ifdef __EMBREE__
-  isect->Ng = INTEGRATOR_STATE_ARRAY(shadow_isect, index, Ng);
+  isect->Ng = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, Ng);
 #endif
 }
 
-ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(INTEGRATOR_STATE_ARGS)
+ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(KernelGlobals kg,
+                                                                         IntegratorState state)
 {
   if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) {
     int index = 0;
     int shader;
     do {
-      shader = INTEGRATOR_STATE_ARRAY(volume_stack, index, shader);
+      shader = INTEGRATOR_STATE_ARRAY(state, volume_stack, index, shader);
 
-      INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, index, object) = INTEGRATOR_STATE_ARRAY(
-          volume_stack, index, object);
-      INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, index, shader) = shader;
+      INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, index, object) =
+          INTEGRATOR_STATE_ARRAY(state, volume_stack, index, object);
+      INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, index, shader) = shader;
 
       ++index;
     } while (shader != OBJECT_NONE);
@@ -169,27 +174,27 @@ ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(INTEGRA
 }
 
 ccl_device_forceinline VolumeStack
-integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_CONST_ARGS, int i)
+integrator_state_read_shadow_volume_stack(ConstIntegratorState state, int i)
 {
-  VolumeStack entry = {INTEGRATOR_STATE_ARRAY(shadow_volume_stack, i, object),
-                       INTEGRATOR_STATE_ARRAY(shadow_volume_stack, i, shader)};
+  VolumeStack entry = {INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, i, object),
+                       INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, i, shader)};
   return entry;
 }
 
 ccl_device_forceinline bool integrator_state_shadow_volume_stack_is_empty(
-    INTEGRATOR_STATE_CONST_ARGS)
+    KernelGlobals kg, ConstIntegratorState state)
 {
   return (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) ?
-             INTEGRATOR_STATE_ARRAY(shadow_volume_stack, 0, shader) == SHADER_NONE :
+             INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, 0, shader) == SHADER_NONE :
              true;
 }
 
-ccl_device_forceinline void integrator_state_write_shadow_volume_stack(INTEGRATOR_STATE_ARGS,
+ccl_device_forceinline void integrator_state_write_shadow_volume_stack(IntegratorState state,
                                                                        int i,
                                                                        VolumeStack entry)
 {
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, i, object) = entry.object;
-  INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, i, shader) = entry.shader;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, i, object) = entry.object;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, i, shader) = entry.shader;
 }
 
 #if defined(__KERNEL_GPU__)
@@ -244,15 +249,16 @@ ccl_device_inline void integrator_state_move(const IntegratorState to_state,
 {
   integrator_state_copy_only(to_state, state);
 
-  INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0;
-  INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
+  INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
 }
 
 #endif
 
 /* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths
  * after this function. */
-ccl_device_inline void integrator_state_shadow_catcher_split(INTEGRATOR_STATE_ARGS)
+ccl_device_inline void integrator_state_shadow_catcher_split(KernelGlobals kg,
+                                                             IntegratorState state)
 {
 #if defined(__KERNEL_GPU__)
   const IntegratorState to_state = atomic_fetch_and_add_uint32(
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface.h b/intern/cycles/kernel/integrator/integrator_subsurface.h
index 153f9b79743..448c99765e3 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface.h
@@ -36,29 +36,30 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __SUBSURFACE__
 
-ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS,
+ccl_device int subsurface_bounce(KernelGlobals kg,
+                                 IntegratorState state,
                                  ccl_private ShaderData *sd,
                                  ccl_private const ShaderClosure *sc)
 {
   /* We should never have two consecutive BSSRDF bounces, the second one should
    * be converted to a diffuse BSDF to avoid this. */
-  kernel_assert(!(INTEGRATOR_STATE(path, flag) & PATH_RAY_DIFFUSE_ANCESTOR));
+  kernel_assert(!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DIFFUSE_ANCESTOR));
 
   /* Setup path state for intersect_subsurface kernel. */
   ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc;
 
   /* Setup ray into surface. */
-  INTEGRATOR_STATE_WRITE(ray, P) = sd->P;
-  INTEGRATOR_STATE_WRITE(ray, D) = bssrdf->N;
-  INTEGRATOR_STATE_WRITE(ray, t) = FLT_MAX;
-  INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP);
-  INTEGRATOR_STATE_WRITE(ray, dD) = differential_zero_compact();
+  INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
+  INTEGRATOR_STATE_WRITE(state, ray, D) = bssrdf->N;
+  INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
+  INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact();
 
   /* Pass along object info, reusing isect to save memory. */
-  INTEGRATOR_STATE_WRITE(isect, Ng) = sd->Ng;
-  INTEGRATOR_STATE_WRITE(isect, object) = sd->object;
+  INTEGRATOR_STATE_WRITE(state, isect, Ng) = sd->Ng;
+  INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object;
 
-  uint32_t path_flag = (INTEGRATOR_STATE(path, flag) & ~PATH_RAY_CAMERA) |
+  uint32_t path_flag = (INTEGRATOR_STATE(state, path, flag) & ~PATH_RAY_CAMERA) |
                        ((sc->type == CLOSURE_BSSRDF_BURLEY_ID) ? PATH_RAY_SUBSURFACE_DISK :
                                                                  PATH_RAY_SUBSURFACE_RANDOM_WALK);
 
@@ -70,27 +71,28 @@ ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS,
   }
 #  endif
 
-  INTEGRATOR_STATE_WRITE(path, throughput) *= weight;
-  INTEGRATOR_STATE_WRITE(path, flag) = path_flag;
+  INTEGRATOR_STATE_WRITE(state, path, throughput) *= weight;
+  INTEGRATOR_STATE_WRITE(state, path, flag) = path_flag;
 
   /* Advance random number offset for bounce. */
-  INTEGRATOR_STATE_WRITE(path, rng_offset) += PRNG_BOUNCE_NUM;
+  INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
 
   if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
-    if (INTEGRATOR_STATE(path, bounce) == 0) {
-      INTEGRATOR_STATE_WRITE(path, diffuse_glossy_ratio) = one_float3();
+    if (INTEGRATOR_STATE(state, path, bounce) == 0) {
+      INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = one_float3();
     }
   }
 
   /* Pass BSSRDF parameters. */
-  INTEGRATOR_STATE_WRITE(subsurface, albedo) = bssrdf->albedo;
-  INTEGRATOR_STATE_WRITE(subsurface, radius) = bssrdf->radius;
-  INTEGRATOR_STATE_WRITE(subsurface, anisotropy) = bssrdf->anisotropy;
+  INTEGRATOR_STATE_WRITE(state, subsurface, albedo) = bssrdf->albedo;
+  INTEGRATOR_STATE_WRITE(state, subsurface, radius) = bssrdf->radius;
+  INTEGRATOR_STATE_WRITE(state, subsurface, anisotropy) = bssrdf->anisotropy;
 
   return LABEL_SUBSURFACE_SCATTER;
 }
 
-ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS,
+ccl_device void subsurface_shader_data_setup(KernelGlobals kg,
+                                             IntegratorState state,
                                              ccl_private ShaderData *sd,
                                              const uint32_t path_flag)
 {
@@ -131,21 +133,21 @@ ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS,
   }
 }
 
-ccl_device_inline bool subsurface_scatter(INTEGRATOR_STATE_ARGS)
+ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState state)
 {
   RNGState rng_state;
-  path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+  path_state_rng_load(state, &rng_state);
 
   Ray ray ccl_optional_struct_init;
   LocalIntersection ss_isect ccl_optional_struct_init;
 
-  if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SUBSURFACE_RANDOM_WALK) {
-    if (!subsurface_random_walk(INTEGRATOR_STATE_PASS, rng_state, ray, ss_isect)) {
+  if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE_RANDOM_WALK) {
+    if (!subsurface_random_walk(kg, state, rng_state, ray, ss_isect)) {
       return false;
     }
   }
   else {
-    if (!subsurface_disk(INTEGRATOR_STATE_PASS, rng_state, ray, ss_isect)) {
+    if (!subsurface_disk(kg, state, rng_state, ray, ss_isect)) {
       return false;
     }
   }
@@ -157,11 +159,11 @@ ccl_device_inline bool subsurface_scatter(INTEGRATOR_STATE_ARGS)
     const int object_flag = kernel_tex_fetch(__object_flag, object);
 
     if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) {
-      float3 P = INTEGRATOR_STATE(ray, P);
-      const float3 Ng = INTEGRATOR_STATE(isect, Ng);
+      float3 P = INTEGRATOR_STATE(state, ray, P);
+      const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
       const float3 offset_P = ray_offset(P, -Ng);
 
-      integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_PASS, offset_P, ray.P);
+      integrator_volume_stack_update_for_subsurface(kg, state, offset_P, ray.P);
     }
   }
 #  endif /* __VOLUME__ */
@@ -172,11 +174,11 @@ ccl_device_inline bool subsurface_scatter(INTEGRATOR_STATE_ARGS)
   ray.P += ray.D * ray.t * 2.0f;
   ray.D = -ray.D;
 
-  integrator_state_write_isect(INTEGRATOR_STATE_PASS, &ss_isect.hits[0]);
-  integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray);
+  integrator_state_write_isect(kg, state, &ss_isect.hits[0]);
+  integrator_state_write_ray(kg, state, &ray);
 
   /* Advance random number offset for bounce. */
-  INTEGRATOR_STATE_WRITE(path, rng_offset) += PRNG_BOUNCE_NUM;
+  INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
 
   const int shader = intersection_get_shader(kg, &ss_isect.hits[0]);
   const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
index 788a5e9b929..1de05ea2696 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
@@ -31,7 +31,8 @@ ccl_device_inline float3 subsurface_disk_eval(const float3 radius, float disk_r,
 
 /* Subsurface scattering step, from a point on the surface to other
  * nearby points on the same object. */
-ccl_device_inline bool subsurface_disk(INTEGRATOR_STATE_ARGS,
+ccl_device_inline bool subsurface_disk(KernelGlobals kg,
+                                       IntegratorState state,
                                        RNGState rng_state,
                                        ccl_private Ray &ray,
                                        ccl_private LocalIntersection &ss_isect)
@@ -41,14 +42,14 @@ ccl_device_inline bool subsurface_disk(INTEGRATOR_STATE_ARGS,
   path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &disk_u, &disk_v);
 
   /* Read shading point info from integrator state. */
-  const float3 P = INTEGRATOR_STATE(ray, P);
-  const float ray_dP = INTEGRATOR_STATE(ray, dP);
-  const float time = INTEGRATOR_STATE(ray, time);
-  const float3 Ng = INTEGRATOR_STATE(isect, Ng);
-  const int object = INTEGRATOR_STATE(isect, object);
+  const float3 P = INTEGRATOR_STATE(state, ray, P);
+  const float ray_dP = INTEGRATOR_STATE(state, ray, dP);
+  const float time = INTEGRATOR_STATE(state, ray, time);
+  const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
+  const int object = INTEGRATOR_STATE(state, isect, object);
 
   /* Read subsurface scattering parameters. */
-  const float3 radius = INTEGRATOR_STATE(subsurface, radius);
+  const float3 radius = INTEGRATOR_STATE(state, subsurface, radius);
 
   /* Pick random axis in local frame and point on disk. */
   float3 disk_N, disk_T, disk_B;
@@ -175,7 +176,7 @@ ccl_device_inline bool subsurface_disk(INTEGRATOR_STATE_ARGS,
 
     if (r < next_sum) {
       /* Return exit point. */
-      INTEGRATOR_STATE_WRITE(path, throughput) *= weight * sum_weights / sample_weight;
+      INTEGRATOR_STATE_WRITE(state, path, throughput) *= weight * sum_weights / sample_weight;
 
       ss_isect.hits[0] = ss_isect.hits[hit];
       ss_isect.Ng[0] = ss_isect.Ng[hit];
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
index 45a43ea67a9..5365093decf 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
@@ -180,7 +180,8 @@ ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t,
  * and the value represents the cutoff level */
 #define SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL 9
 
-ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS,
+ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
+                                              IntegratorState state,
                                               RNGState rng_state,
                                               ccl_private Ray &ray,
                                               ccl_private LocalIntersection &ss_isect)
@@ -188,12 +189,12 @@ ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS,
   float bssrdf_u, bssrdf_v;
   path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
 
-  const float3 P = INTEGRATOR_STATE(ray, P);
-  const float3 N = INTEGRATOR_STATE(ray, D);
-  const float ray_dP = INTEGRATOR_STATE(ray, dP);
-  const float time = INTEGRATOR_STATE(ray, time);
-  const float3 Ng = INTEGRATOR_STATE(isect, Ng);
-  const int object = INTEGRATOR_STATE(isect, object);
+  const float3 P = INTEGRATOR_STATE(state, ray, P);
+  const float3 N = INTEGRATOR_STATE(state, ray, D);
+  const float ray_dP = INTEGRATOR_STATE(state, ray, dP);
+  const float time = INTEGRATOR_STATE(state, ray, time);
+  const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
+  const int object = INTEGRATOR_STATE(state, isect, object);
 
   /* Sample diffuse surface scatter into the object. */
   float3 D;
@@ -219,12 +220,12 @@ ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS,
 
   /* Convert subsurface to volume coefficients.
    * The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */
-  const float3 albedo = INTEGRATOR_STATE(subsurface, albedo);
-  const float3 radius = INTEGRATOR_STATE(subsurface, radius);
-  const float anisotropy = INTEGRATOR_STATE(subsurface, anisotropy);
+  const float3 albedo = INTEGRATOR_STATE(state, subsurface, albedo);
+  const float3 radius = INTEGRATOR_STATE(state, subsurface, radius);
+  const float anisotropy = INTEGRATOR_STATE(state, subsurface, anisotropy);
 
   float3 sigma_t, alpha;
-  float3 throughput = INTEGRATOR_STATE_WRITE(path, throughput);
+  float3 throughput = INTEGRATOR_STATE_WRITE(state, path, throughput);
   subsurface_random_walk_coefficients(albedo, radius, anisotropy, &sigma_t, &alpha, &throughput);
   float3 sigma_s = sigma_t * alpha;
 
@@ -459,7 +460,7 @@ ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS,
 
   if (hit) {
     kernel_assert(isfinite3_safe(throughput));
-    INTEGRATOR_STATE_WRITE(path, throughput) = throughput;
+    INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput;
   }
 
   return hit;
diff --git a/intern/cycles/kernel/integrator/integrator_volume_stack.h b/intern/cycles/kernel/integrator/integrator_volume_stack.h
index 0c4a723de6f..e3a4546508f 100644
--- a/intern/cycles/kernel/integrator/integrator_volume_stack.h
+++ b/intern/cycles/kernel/integrator/integrator_volume_stack.h
@@ -24,7 +24,7 @@ CCL_NAMESPACE_BEGIN
  * is inside of. */
 
 template<typename StackReadOp, typename StackWriteOp>
-ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
+ccl_device void volume_stack_enter_exit(KernelGlobals kg,
                                         ccl_private const ShaderData *sd,
                                         StackReadOp stack_read,
                                         StackWriteOp stack_write)
@@ -84,28 +84,29 @@ ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
   }
 }
 
-ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, ccl_private const ShaderData *sd)
+ccl_device void volume_stack_enter_exit(KernelGlobals kg,
+                                        IntegratorState state,
+                                        ccl_private const ShaderData *sd)
 {
   volume_stack_enter_exit(
-      INTEGRATOR_STATE_PASS,
+      kg,
       sd,
-      [=](const int i) { return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i); },
+      [=](const int i) { return integrator_state_read_volume_stack(state, i); },
       [=](const int i, const VolumeStack entry) {
-        integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, i, entry);
+        integrator_state_write_volume_stack(state, i, entry);
       });
 }
 
-ccl_device void shadow_volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
+ccl_device void shadow_volume_stack_enter_exit(KernelGlobals kg,
+                                               IntegratorState state,
                                                ccl_private const ShaderData *sd)
 {
   volume_stack_enter_exit(
-      INTEGRATOR_STATE_PASS,
+      kg,
       sd,
-      [=](const int i) {
-        return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i);
-      },
+      [=](const int i) { return integrator_state_read_shadow_volume_stack(state, i); },
       [=](const int i, const VolumeStack entry) {
-        integrator_state_write_shadow_volume_stack(INTEGRATOR_STATE_PASS, i, entry);
+        integrator_state_write_shadow_volume_stack(state, i, entry);
       });
 }
 
@@ -123,19 +124,21 @@ ccl_device void shadow_volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
  * Use this function after the last bounce to get rid of all volumes apart from
  * the world's one after the last bounce to avoid render artifacts.
  */
-ccl_device_inline void volume_stack_clean(INTEGRATOR_STATE_ARGS)
+ccl_device_inline void volume_stack_clean(KernelGlobals kg, IntegratorState state)
 {
   if (kernel_data.background.volume_shader != SHADER_NONE) {
     /* Keep the world's volume in stack. */
-    INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 1, shader) = SHADER_NONE;
+    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, shader) = SHADER_NONE;
   }
   else {
-    INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 0, shader) = SHADER_NONE;
+    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 0, shader) = SHADER_NONE;
   }
 }
 
 template<typename StackReadOp>
-ccl_device float volume_stack_step_size(INTEGRATOR_STATE_ARGS, StackReadOp stack_read)
+ccl_device float volume_stack_step_size(KernelGlobals kg,
+                                        IntegratorState state,
+                                        StackReadOp stack_read)
 {
   float step_size = FLT_MAX;
 
@@ -182,12 +185,12 @@ typedef enum VolumeSampleMethod {
   VOLUME_SAMPLE_MIS = (VOLUME_SAMPLE_DISTANCE | VOLUME_SAMPLE_EQUIANGULAR),
 } VolumeSampleMethod;
 
-ccl_device VolumeSampleMethod volume_stack_sample_method(INTEGRATOR_STATE_ARGS)
+ccl_device VolumeSampleMethod volume_stack_sample_method(KernelGlobals kg, IntegratorState state)
 {
   VolumeSampleMethod method = VOLUME_SAMPLE_NONE;
 
   for (int i = 0;; i++) {
-    VolumeStack entry = integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
+    VolumeStack entry = integrator_state_read_volume_stack(state, i);
     if (entry.shader == SHADER_NONE) {
       break;
     }
author	Brecht Van Lommel <brecht>	2021-10-17 17:10:10 +0300
committer	Brecht Van Lommel <brecht@blender.org>	2021-10-18 20:02:10 +0300
commit	1df3b51988852fa8ee6b530a64aa23346db9acd4 (patch)
tree	dd79dba4c8ff8bb8474cc399e9d1b308d845e0cb /intern/cycles/kernel/integrator
parent	44c3bb729be42d6d67eaf8918d7cbcb2ff0b315d (diff)