1 files changed, 361 insertions, 611 deletions
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index 61653d328f1..9e12d24dcf4 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -14,751 +14,501 @@
  * limitations under the License.
  */
 
+#pragma once
+
+#include "kernel_adaptive_sampling.h"
+#include "kernel_random.h"
+#include "kernel_shadow_catcher.h"
+#include "kernel_write_passes.h"
+
 CCL_NAMESPACE_BEGIN
 
-/* BSDF Eval
+/* --------------------------------------------------------------------
+ * BSDF Evaluation
  *
- * BSDF evaluation result, split per BSDF type. This is used to accumulate
- * render passes separately. */
-
-ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, const ShaderData *sd);
+ * BSDF evaluation result, split between diffuse and glossy. This is used to
+ * accumulate render passes separately. Note that reflection, transmission
+ * and volume scattering are written to different render passes, but we assume
+ * that only one of those can happen at a bounce, and so do not need to accumulate
+ * them separately. */
 
-ccl_device_inline void bsdf_eval_init(BsdfEval *eval,
-                                      ClosureType type,
-                                      float3 value,
-                                      int use_light_pass)
+ccl_device_inline void bsdf_eval_init(BsdfEval *eval, const bool is_diffuse, float3 value)
 {
-#ifdef __PASSES__
-  eval->use_light_pass = use_light_pass;
-
-  if (eval->use_light_pass) {
-    eval->diffuse = zero_float3();
-    eval->glossy = zero_float3();
-    eval->transmission = zero_float3();
-    eval->transparent = zero_float3();
-    eval->volume = zero_float3();
-
-    if (type == CLOSURE_BSDF_TRANSPARENT_ID)
-      eval->transparent = value;
-    else if (CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_BSDF_BSSRDF(type))
-      eval->diffuse = value;
-    else if (CLOSURE_IS_BSDF_GLOSSY(type))
-      eval->glossy = value;
-    else if (CLOSURE_IS_BSDF_TRANSMISSION(type))
-      eval->transmission = value;
-    else if (CLOSURE_IS_PHASE(type))
-      eval->volume = value;
-  }
-  else
-#endif
-  {
+  eval->diffuse = zero_float3();
+  eval->glossy = zero_float3();
+
+  if (is_diffuse) {
     eval->diffuse = value;
   }
-#ifdef __SHADOW_TRICKS__
-  eval->sum_no_mis = zero_float3();
-#endif
+  else {
+    eval->glossy = value;
+  }
 }
 
 ccl_device_inline void bsdf_eval_accum(BsdfEval *eval,
-                                       ClosureType type,
+                                       const bool is_diffuse,
                                        float3 value,
                                        float mis_weight)
 {
-#ifdef __SHADOW_TRICKS__
-  eval->sum_no_mis += value;
-#endif
   value *= mis_weight;
-#ifdef __PASSES__
-  if (eval->use_light_pass) {
-    if (CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_BSDF_BSSRDF(type))
-      eval->diffuse += value;
-    else if (CLOSURE_IS_BSDF_GLOSSY(type))
-      eval->glossy += value;
-    else if (CLOSURE_IS_BSDF_TRANSMISSION(type))
-      eval->transmission += value;
-    else if (CLOSURE_IS_PHASE(type))
-      eval->volume += value;
-
-    /* skipping transparent, this function is used by for eval(), will be zero then */
-  }
-  else
-#endif
-  {
-    eval->diffuse += value;
-  }
-}
 
-ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval)
-{
-#ifdef __PASSES__
-  if (eval->use_light_pass) {
-    return is_zero(eval->diffuse) && is_zero(eval->glossy) && is_zero(eval->transmission) &&
-           is_zero(eval->transparent) && is_zero(eval->volume);
+  if (is_diffuse) {
+    eval->diffuse += value;
   }
-  else
-#endif
-  {
-    return is_zero(eval->diffuse);
+  else {
+    eval->glossy += value;
   }
 }
 
-ccl_device_inline void bsdf_eval_mis(BsdfEval *eval, float value)
+ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval)
 {
-#ifdef __PASSES__
-  if (eval->use_light_pass) {
-    eval->diffuse *= value;
-    eval->glossy *= value;
-    eval->transmission *= value;
-    eval->volume *= value;
-
-    /* skipping transparent, this function is used by for eval(), will be zero then */
-  }
-  else
-#endif
-  {
-    eval->diffuse *= value;
-  }
+  return is_zero(eval->diffuse) && is_zero(eval->glossy);
 }
 
 ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float value)
 {
-#ifdef __SHADOW_TRICKS__
-  eval->sum_no_mis *= value;
-#endif
-  bsdf_eval_mis(eval, value);
+  eval->diffuse *= value;
+  eval->glossy *= value;
 }
 
 ccl_device_inline void bsdf_eval_mul3(BsdfEval *eval, float3 value)
 {
-#ifdef __SHADOW_TRICKS__
-  eval->sum_no_mis *= value;
-#endif
-#ifdef __PASSES__
-  if (eval->use_light_pass) {
-    eval->diffuse *= value;
-    eval->glossy *= value;
-    eval->transmission *= value;
-    eval->volume *= value;
-
-    /* skipping transparent, this function is used by for eval(), will be zero then */
-  }
-  else
-    eval->diffuse *= value;
-#else
   eval->diffuse *= value;
-#endif
+  eval->glossy *= value;
 }
 
 ccl_device_inline float3 bsdf_eval_sum(const BsdfEval *eval)
 {
-#ifdef __PASSES__
-  if (eval->use_light_pass) {
-    return eval->diffuse + eval->glossy + eval->transmission + eval->volume;
-  }
-  else
-#endif
-    return eval->diffuse;
+  return eval->diffuse + eval->glossy;
 }
 
-/* Path Radiance
- *
- * We accumulate different render passes separately. After summing at the end
- * to get the combined result, it should be identical. We definite directly
- * visible as the first non-transparent hit, while indirectly visible are the
- * bounces after that. */
-
-ccl_device_inline void path_radiance_init(KernelGlobals *kg, PathRadiance *L)
+ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(const BsdfEval *eval)
 {
-  /* clear all */
-#ifdef __PASSES__
-  L->use_light_pass = kernel_data.film.use_light_pass;
-
-  if (kernel_data.film.use_light_pass) {
-    L->indirect = zero_float3();
-    L->direct_emission = zero_float3();
-
-    L->color_diffuse = zero_float3();
-    L->color_glossy = zero_float3();
-    L->color_transmission = zero_float3();
-
-    L->direct_diffuse = zero_float3();
-    L->direct_glossy = zero_float3();
-    L->direct_transmission = zero_float3();
-    L->direct_volume = zero_float3();
-
-    L->indirect_diffuse = zero_float3();
-    L->indirect_glossy = zero_float3();
-    L->indirect_transmission = zero_float3();
-    L->indirect_volume = zero_float3();
-
-    L->transparent = 0.0f;
-    L->emission = zero_float3();
-    L->background = zero_float3();
-    L->ao = zero_float3();
-    L->shadow = zero_float3();
-    L->mist = 0.0f;
-
-    L->state.diffuse = zero_float3();
-    L->state.glossy = zero_float3();
-    L->state.transmission = zero_float3();
-    L->state.volume = zero_float3();
-    L->state.direct = zero_float3();
-  }
-  else
-#endif
-  {
-    L->transparent = 0.0f;
-    L->emission = zero_float3();
-  }
-
-#ifdef __SHADOW_TRICKS__
-  L->path_total = zero_float3();
-  L->path_total_shaded = zero_float3();
-  L->shadow_background_color = zero_float3();
-  L->shadow_throughput = 0.0f;
-  L->shadow_transparency = 1.0f;
-  L->has_shadow_catcher = 0;
-#endif
-
-#ifdef __DENOISING_FEATURES__
-  L->denoising_normal = zero_float3();
-  L->denoising_albedo = zero_float3();
-  L->denoising_depth = 0.0f;
-#endif
+  /* Ratio of diffuse and glossy to recover proportions for writing to render pass.
+   * We assume reflection, transmission and volume scatter to be exclusive. */
+  return safe_divide_float3_float3(eval->diffuse, eval->diffuse + eval->glossy);
 }
 
-ccl_device_inline void path_radiance_bsdf_bounce(KernelGlobals *kg,
-                                                 PathRadianceState *L_state,
-                                                 ccl_addr_space float3 *throughput,
-                                                 BsdfEval *bsdf_eval,
-                                                 float bsdf_pdf,
-                                                 int bounce,
-                                                 int bsdf_label)
-{
-  float inverse_pdf = 1.0f / bsdf_pdf;
-
-#ifdef __PASSES__
-  if (kernel_data.film.use_light_pass) {
-    if (bounce == 0 && !(bsdf_label & LABEL_TRANSPARENT)) {
-      /* first on directly visible surface */
-      float3 value = *throughput * inverse_pdf;
-
-      L_state->diffuse = bsdf_eval->diffuse * value;
-      L_state->glossy = bsdf_eval->glossy * value;
-      L_state->transmission = bsdf_eval->transmission * value;
-      L_state->volume = bsdf_eval->volume * value;
-
-      *throughput = L_state->diffuse + L_state->glossy + L_state->transmission + L_state->volume;
+/* --------------------------------------------------------------------
+ * Clamping
+ *
+ * Clamping is done on a per-contribution basis so that we can write directly
+ * to render buffers instead of using per-thread memory, and to avoid the
+ * impact of clamping on other contributions. */
 
-      L_state->direct = *throughput;
-    }
-    else {
-      /* transparent bounce before first hit, or indirectly visible through BSDF */
-      float3 sum = (bsdf_eval_sum(bsdf_eval) + bsdf_eval->transparent) * inverse_pdf;
-      *throughput *= sum;
-    }
+ccl_device_forceinline void kernel_accum_clamp(const KernelGlobals *kg, float3 *L, int bounce)
+{
+#ifdef __KERNEL_DEBUG_NAN__
+  if (!isfinite3_safe(*L)) {
+    kernel_assert(!"Cycles sample with non-finite value detected");
   }
-  else
 #endif
-  {
-    *throughput *= bsdf_eval->diffuse * inverse_pdf;
-  }
-}
+  /* Make sure all components are finite, allowing the contribution to be usable by adaptive
+   * sampling convergence check, but also to make it so render result never causes issues with
+   * post-processing. */
+  *L = ensure_finite3(*L);
 
 #ifdef __CLAMP_SAMPLE__
-ccl_device_forceinline void path_radiance_clamp(KernelGlobals *kg, float3 *L, int bounce)
-{
   float limit = (bounce > 0) ? kernel_data.integrator.sample_clamp_indirect :
                                kernel_data.integrator.sample_clamp_direct;
   float sum = reduce_add(fabs(*L));
   if (sum > limit) {
     *L *= limit / sum;
   }
+#endif
 }
 
-ccl_device_forceinline void path_radiance_clamp_throughput(KernelGlobals *kg,
-                                                           float3 *L,
-                                                           float3 *throughput,
-                                                           int bounce)
-{
-  float limit = (bounce > 0) ? kernel_data.integrator.sample_clamp_indirect :
-                               kernel_data.integrator.sample_clamp_direct;
+/* --------------------------------------------------------------------
+ * Pass accumulation utilities.
+ */
 
-  float sum = reduce_add(fabs(*L));
-  if (sum > limit) {
-    float clamp_factor = limit / sum;
-    *L *= clamp_factor;
-    *throughput *= clamp_factor;
-  }
+/* Get pointer to pixel in render buffer. */
+ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer(
+    INTEGRATOR_STATE_CONST_ARGS, ccl_global float *ccl_restrict render_buffer)
+{
+  const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index);
+  const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+                                        kernel_data.film.pass_stride;
+  return render_buffer + render_buffer_offset;
 }
 
-#endif
+/* --------------------------------------------------------------------
+ * Adaptive sampling.
+ */
 
-ccl_device_inline void path_radiance_accum_emission(KernelGlobals *kg,
-                                                    PathRadiance *L,
-                                                    ccl_addr_space PathState *state,
-                                                    float3 throughput,
-                                                    float3 value)
+ccl_device_inline int kernel_accum_sample(INTEGRATOR_STATE_CONST_ARGS,
+                                          ccl_global float *ccl_restrict render_buffer,
+                                          int sample)
 {
-#ifdef __SHADOW_TRICKS__
-  if (state->flag & PATH_RAY_SHADOW_CATCHER) {
-    return;
+  if (kernel_data.film.pass_sample_count == PASS_UNUSED) {
+    return sample;
   }
-#endif
 
-  float3 contribution = throughput * value;
-#ifdef __CLAMP_SAMPLE__
-  path_radiance_clamp(kg, &contribution, state->bounce - 1);
-#endif
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
+                                                              render_buffer);
 
-#ifdef __PASSES__
-  if (L->use_light_pass) {
-    if (state->bounce == 0)
-      L->emission += contribution;
-    else if (state->bounce == 1)
-      L->direct_emission += contribution;
-    else
-      L->indirect += contribution;
-  }
-  else
-#endif
-  {
-    L->emission += contribution;
-  }
+  return atomic_fetch_and_add_uint32((uint *)(buffer) + kernel_data.film.pass_sample_count, 1);
 }
 
-ccl_device_inline void path_radiance_accum_ao(KernelGlobals *kg,
-                                              PathRadiance *L,
-                                              ccl_addr_space PathState *state,
-                                              float3 throughput,
-                                              float3 alpha,
-                                              float3 bsdf,
-                                              float3 ao)
+ccl_device void kernel_accum_adaptive_buffer(INTEGRATOR_STATE_CONST_ARGS,
+                                             const float3 contribution,
+                                             ccl_global float *ccl_restrict buffer)
 {
-#ifdef __PASSES__
-  /* Store AO pass. */
-  if (L->use_light_pass && state->bounce == 0) {
-    L->ao += alpha * throughput * ao;
-  }
-#endif
-
-#ifdef __SHADOW_TRICKS__
-  /* For shadow catcher, accumulate ratio. */
-  if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
-    float3 light = throughput * bsdf;
-    L->path_total += light;
-    L->path_total_shaded += ao * light;
+  /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping
+   * criteria. This is the heuristic from "A hierarchical automatic stopping condition for Monte
+   * Carlo global illumination" except that here it is applied per pixel and not in hierarchical
+   * tiles. */
 
-    if (state->flag & PATH_RAY_SHADOW_CATCHER) {
-      return;
-    }
+  if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) {
+    return;
   }
-#endif
-
-  float3 contribution = throughput * bsdf * ao;
 
-#ifdef __PASSES__
-  if (L->use_light_pass) {
-    if (state->bounce == 0) {
-      /* Directly visible lighting. */
-      L->direct_diffuse += contribution;
-    }
-    else {
-      /* Indirectly visible lighting after BSDF bounce. */
-      L->indirect += contribution;
-    }
-  }
-  else
-#endif
-  {
-    L->emission += contribution;
+  const int sample = INTEGRATOR_STATE(path, sample);
+  if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) {
+    kernel_write_pass_float4(
+        buffer + kernel_data.film.pass_adaptive_aux_buffer,
+        make_float4(contribution.x * 2.0f, contribution.y * 2.0f, contribution.z * 2.0f, 0.0f));
   }
 }
 
-ccl_device_inline void path_radiance_accum_total_ao(PathRadiance *L,
-                                                    ccl_addr_space PathState *state,
-                                                    float3 throughput,
-                                                    float3 bsdf)
-{
-#ifdef __SHADOW_TRICKS__
-  if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
-    L->path_total += throughput * bsdf;
-  }
-#else
-  (void)L;
-  (void)state;
-  (void)throughput;
-  (void)bsdf;
-#endif
-}
+/* --------------------------------------------------------------------
+ * Shadow catcher.
+ */
+
+#ifdef __SHADOW_CATCHER__
 
-ccl_device_inline void path_radiance_accum_light(KernelGlobals *kg,
-                                                 PathRadiance *L,
-                                                 ccl_addr_space PathState *state,
-                                                 float3 throughput,
-                                                 BsdfEval *bsdf_eval,
-                                                 float3 shadow,
-                                                 float shadow_fac,
-                                                 bool is_lamp)
+/* Accumulate contribution to the Shadow Catcher pass.
+ *
+ * Returns truth if the contribution is fully handled here and is not to be added to the other
+ * passes (like combined, adaptive sampling). */
+
+ccl_device bool kernel_accum_shadow_catcher(INTEGRATOR_STATE_CONST_ARGS,
+                                            const float3 contribution,
+                                            ccl_global float *ccl_restrict buffer)
 {
-#ifdef __SHADOW_TRICKS__
-  if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
-    float3 light = throughput * bsdf_eval->sum_no_mis;
-    L->path_total += light;
-    L->path_total_shaded += shadow * light;
-
-    if (state->flag & PATH_RAY_SHADOW_CATCHER) {
-      return;
-    }
+  if (!kernel_data.integrator.has_shadow_catcher) {
+    return false;
   }
-#endif
 
-  float3 shaded_throughput = throughput * shadow;
+  kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED);
+  kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
 
-#ifdef __PASSES__
-  if (L->use_light_pass) {
-    /* Compute the clamping based on the total contribution.
-     * The resulting scale is then be applied to all individual components. */
-    float3 full_contribution = shaded_throughput * bsdf_eval_sum(bsdf_eval);
-#  ifdef __CLAMP_SAMPLE__
-    path_radiance_clamp_throughput(kg, &full_contribution, &shaded_throughput, state->bounce);
-#  endif
-
-    if (state->bounce == 0) {
-      /* directly visible lighting */
-      L->direct_diffuse += shaded_throughput * bsdf_eval->diffuse;
-      L->direct_glossy += shaded_throughput * bsdf_eval->glossy;
-      L->direct_transmission += shaded_throughput * bsdf_eval->transmission;
-      L->direct_volume += shaded_throughput * bsdf_eval->volume;
-
-      if (is_lamp) {
-        L->shadow += shadow * shadow_fac;
-      }
-    }
-    else {
-      /* indirectly visible lighting after BSDF bounce */
-      L->indirect += full_contribution;
-    }
+  /* Matte pass. */
+  if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) {
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher_matte, contribution);
+    /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive
+     * sampling is based on how noisy the combined pass is as if there were no catchers in the
+     * scene. */
   }
-  else
-#endif
-  {
-    float3 contribution = shaded_throughput * bsdf_eval->diffuse;
-    path_radiance_clamp(kg, &contribution, state->bounce);
-    L->emission += contribution;
+
+  /* Shadow catcher pass. */
+  if (kernel_shadow_catcher_is_object_pass(INTEGRATOR_STATE_PASS)) {
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution);
+    return true;
   }
-}
 
-ccl_device_inline void path_radiance_accum_total_light(PathRadiance *L,
-                                                       ccl_addr_space PathState *state,
-                                                       float3 throughput,
-                                                       const BsdfEval *bsdf_eval)
-{
-#ifdef __SHADOW_TRICKS__
-  if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
-    L->path_total += throughput * bsdf_eval->sum_no_mis;
-  }
-#else
-  (void)L;
-  (void)state;
-  (void)throughput;
-  (void)bsdf_eval;
-#endif
+  return false;
 }
 
-ccl_device_inline void path_radiance_accum_background(KernelGlobals *kg,
-                                                      PathRadiance *L,
-                                                      ccl_addr_space PathState *state,
-                                                      float3 throughput,
-                                                      float3 value)
+ccl_device bool kernel_accum_shadow_catcher_transparent(INTEGRATOR_STATE_CONST_ARGS,
+                                                        const float3 contribution,
+                                                        const float transparent,
+                                                        ccl_global float *ccl_restrict buffer)
 {
+  if (!kernel_data.integrator.has_shadow_catcher) {
+    return false;
+  }
 
-#ifdef __SHADOW_TRICKS__
-  if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
-    L->path_total += throughput * value;
-    L->path_total_shaded += throughput * value * L->shadow_transparency;
+  kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED);
+  kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
 
-    if (state->flag & PATH_RAY_SHADOW_CATCHER) {
-      return;
-    }
+  if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) {
+    return true;
   }
-#endif
 
-  float3 contribution = throughput * value;
-#ifdef __CLAMP_SAMPLE__
-  path_radiance_clamp(kg, &contribution, state->bounce - 1);
-#endif
+  /* Matte pass. */
+  if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) {
+    kernel_write_pass_float4(
+        buffer + kernel_data.film.pass_shadow_catcher_matte,
+        make_float4(contribution.x, contribution.y, contribution.z, transparent));
+    /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive
+     * sampling is based on how noisy the combined pass is as if there were no catchers in the
+     * scene. */
+  }
 
-#ifdef __PASSES__
-  if (L->use_light_pass) {
-    if (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)
-      L->background += contribution;
-    else if (state->bounce == 1)
-      L->direct_emission += contribution;
-    else
-      L->indirect += contribution;
-  }
-  else
-#endif
-  {
-    L->emission += contribution;
+  /* Shadow catcher pass. */
+  if (kernel_shadow_catcher_is_object_pass(INTEGRATOR_STATE_PASS)) {
+    /* NOTE: The transparency of the shadow catcher pass is ignored. It is not needed for the
+     * calculation and the alpha channel of the pass contains numbers of samples contributed to a
+     * pixel of the pass. */
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution);
+    return true;
   }
 
-#ifdef __DENOISING_FEATURES__
-  L->denoising_albedo += state->denoising_feature_weight * state->denoising_feature_throughput *
-                         value;
-#endif /* __DENOISING_FEATURES__ */
+  return false;
 }
 
-ccl_device_inline void path_radiance_accum_transparent(PathRadiance *L,
-                                                       ccl_addr_space PathState *state,
-                                                       float3 throughput)
+ccl_device void kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_CONST_ARGS,
+                                                             const float transparent,
+                                                             ccl_global float *ccl_restrict buffer)
 {
-  L->transparent += average(throughput);
-}
+  if (!kernel_data.integrator.has_shadow_catcher) {
+    return;
+  }
 
-#ifdef __SHADOW_TRICKS__
-ccl_device_inline void path_radiance_accum_shadowcatcher(PathRadiance *L,
-                                                         float3 throughput,
-                                                         float3 background)
-{
-  L->shadow_throughput += average(throughput);
-  L->shadow_background_color += throughput * background;
-  L->has_shadow_catcher = 1;
-}
-#endif
+  kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
 
-ccl_device_inline void path_radiance_sum_indirect(PathRadiance *L)
-{
-#ifdef __PASSES__
-  /* this division is a bit ugly, but means we only have to keep track of
-   * only a single throughput further along the path, here we recover just
-   * the indirect path that is not influenced by any particular BSDF type */
-  if (L->use_light_pass) {
-    L->direct_emission = safe_divide_color(L->direct_emission, L->state.direct);
-    L->direct_diffuse += L->state.diffuse * L->direct_emission;
-    L->direct_glossy += L->state.glossy * L->direct_emission;
-    L->direct_transmission += L->state.transmission * L->direct_emission;
-    L->direct_volume += L->state.volume * L->direct_emission;
-
-    L->indirect = safe_divide_color(L->indirect, L->state.direct);
-    L->indirect_diffuse += L->state.diffuse * L->indirect;
-    L->indirect_glossy += L->state.glossy * L->indirect;
-    L->indirect_transmission += L->state.transmission * L->indirect;
-    L->indirect_volume += L->state.volume * L->indirect;
+  /* Matte pass. */
+  if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) {
+    kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, transparent);
   }
-#endif
 }
 
-ccl_device_inline void path_radiance_reset_indirect(PathRadiance *L)
-{
-#ifdef __PASSES__
-  if (L->use_light_pass) {
-    L->state.diffuse = zero_float3();
-    L->state.glossy = zero_float3();
-    L->state.transmission = zero_float3();
-    L->state.volume = zero_float3();
+#endif /* __SHADOW_CATCHER__ */
+
+/* --------------------------------------------------------------------
+ * Render passes.
+ */
 
-    L->direct_emission = zero_float3();
-    L->indirect = zero_float3();
+/* Write combined pass. */
+ccl_device_inline void kernel_accum_combined_pass(INTEGRATOR_STATE_CONST_ARGS,
+                                                  const float3 contribution,
+                                                  ccl_global float *ccl_restrict buffer)
+{
+#ifdef __SHADOW_CATCHER__
+  if (kernel_accum_shadow_catcher(INTEGRATOR_STATE_PASS, contribution, buffer)) {
+    return;
   }
 #endif
+
+  if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_combined, contribution);
+  }
+
+  kernel_accum_adaptive_buffer(INTEGRATOR_STATE_PASS, contribution, buffer);
 }
 
-ccl_device_inline void path_radiance_copy_indirect(PathRadiance *L, const PathRadiance *L_src)
+/* Write combined pass with transparency. */
+ccl_device_inline void kernel_accum_combined_transparent_pass(INTEGRATOR_STATE_CONST_ARGS,
+                                                              const float3 contribution,
+                                                              const float transparent,
+                                                              ccl_global float *ccl_restrict
+                                                                  buffer)
 {
-#ifdef __PASSES__
-  if (L->use_light_pass) {
-    L->state = L_src->state;
-
-    L->direct_emission = L_src->direct_emission;
-    L->indirect = L_src->indirect;
+#ifdef __SHADOW_CATCHER__
+  if (kernel_accum_shadow_catcher_transparent(
+          INTEGRATOR_STATE_PASS, contribution, transparent, buffer)) {
+    return;
   }
 #endif
+
+  if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
+    kernel_write_pass_float4(
+        buffer + kernel_data.film.pass_combined,
+        make_float4(contribution.x, contribution.y, contribution.z, transparent));
+  }
+
+  kernel_accum_adaptive_buffer(INTEGRATOR_STATE_PASS, contribution, buffer);
 }
 
-#ifdef __SHADOW_TRICKS__
-ccl_device_inline void path_radiance_sum_shadowcatcher(KernelGlobals *kg,
-                                                       PathRadiance *L,
-                                                       float3 *L_sum,
-                                                       float *alpha)
+/* Write background or emission to appropriate pass. */
+ccl_device_inline void kernel_accum_emission_or_background_pass(INTEGRATOR_STATE_CONST_ARGS,
+                                                                float3 contribution,
+                                                                ccl_global float *ccl_restrict
+                                                                    buffer,
+                                                                const int pass)
 {
-  /* Calculate current shadow of the path. */
-  float path_total = average(L->path_total);
-  float shadow;
+  if (!(kernel_data.film.light_pass_flag & PASS_ANY)) {
+    return;
+  }
 
-  if (UNLIKELY(!isfinite_safe(path_total))) {
-#  ifdef __KERNEL_DEBUG_NAN__
-    kernel_assert(!"Non-finite total radiance along the path");
-#  endif
-    shadow = 0.0f;
+#ifdef __PASSES__
+  const int path_flag = INTEGRATOR_STATE(path, flag);
+  int pass_offset = PASS_UNUSED;
+
+  /* Denoising albedo. */
+#  ifdef __DENOISING_FEATURES__
+  if (path_flag & PATH_RAY_DENOISING_FEATURES) {
+    if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
+      const float3 denoising_feature_throughput = INTEGRATOR_STATE(path,
+                                                                   denoising_feature_throughput);
+      const float3 denoising_albedo = denoising_feature_throughput * contribution;
+      kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
+    }
   }
-  else if (path_total == 0.0f) {
-    shadow = L->shadow_transparency;
+#  endif /* __DENOISING_FEATURES__ */
+
+  if (!(path_flag & PATH_RAY_ANY_PASS)) {
+    /* Directly visible, write to emission or background pass. */
+    pass_offset = pass;
+  }
+  else if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
+    /* Indirectly visible through reflection. */
+    const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
+                                       ((INTEGRATOR_STATE(path, bounce) == 1) ?
+                                            kernel_data.film.pass_glossy_direct :
+                                            kernel_data.film.pass_glossy_indirect) :
+                                       ((INTEGRATOR_STATE(path, bounce) == 1) ?
+                                            kernel_data.film.pass_transmission_direct :
+                                            kernel_data.film.pass_transmission_indirect);
+
+    if (glossy_pass_offset != PASS_UNUSED) {
+      /* Glossy is a subset of the throughput, reconstruct it here using the
+       * diffuse-glossy ratio. */
+      const float3 ratio = INTEGRATOR_STATE(path, diffuse_glossy_ratio);
+      const float3 glossy_contribution = (one_float3() - ratio) * contribution;
+      kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
+    }
+
+    /* Reconstruct diffuse subset of throughput. */
+    pass_offset = (INTEGRATOR_STATE(path, bounce) == 1) ? kernel_data.film.pass_diffuse_direct :
+                                                          kernel_data.film.pass_diffuse_indirect;
+    if (pass_offset != PASS_UNUSED) {
+      contribution *= INTEGRATOR_STATE(path, diffuse_glossy_ratio);
+    }
   }
-  else {
-    float path_total_shaded = average(L->path_total_shaded);
-    shadow = path_total_shaded / path_total;
+  else if (path_flag & PATH_RAY_VOLUME_PASS) {
+    /* Indirectly visible through volume. */
+    pass_offset = (INTEGRATOR_STATE(path, bounce) == 1) ? kernel_data.film.pass_volume_direct :
+                                                          kernel_data.film.pass_volume_indirect;
   }
 
-  /* Calculate final light sum and transparency for shadow catcher object. */
-  if (kernel_data.background.transparent) {
-    *alpha -= L->shadow_throughput * shadow;
-  }
-  else {
-    L->shadow_background_color *= shadow;
-    *L_sum += L->shadow_background_color;
+  /* Single write call for GPU coherence. */
+  if (pass_offset != PASS_UNUSED) {
+    kernel_write_pass_float3(buffer + pass_offset, contribution);
   }
+#endif /* __PASSES__ */
 }
-#endif
 
-ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg,
-                                                     PathRadiance *L,
-                                                     float *alpha)
+/* Write light contribution to render buffer. */
+ccl_device_inline void kernel_accum_light(INTEGRATOR_STATE_CONST_ARGS,
+                                          ccl_global float *ccl_restrict render_buffer)
 {
-  float3 L_sum;
-  /* Light Passes are used */
+  /* The throughput for shadow paths already contains the light shader evaluation. */
+  float3 contribution = INTEGRATOR_STATE(shadow_path, throughput);
+  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(shadow_path, bounce) - 1);
+
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
+                                                              render_buffer);
+
+  kernel_accum_combined_pass(INTEGRATOR_STATE_PASS, contribution, buffer);
+
 #ifdef __PASSES__
-  float3 L_direct, L_indirect;
-  if (L->use_light_pass) {
-    path_radiance_sum_indirect(L);
-
-    L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->direct_volume +
-               L->emission;
-    L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission +
-                 L->indirect_volume;
-
-    if (!kernel_data.background.transparent)
-      L_direct += L->background;
-
-    L_sum = L_direct + L_indirect;
-    float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
-
-    /* Reject invalid value */
-    if (!isfinite_safe(sum)) {
-#  ifdef __KERNEL_DEBUG_NAN__
-      kernel_assert(!"Non-finite sum in path_radiance_clamp_and_sum!");
-#  endif
-      L_sum = zero_float3();
-
-      L->direct_diffuse = zero_float3();
-      L->direct_glossy = zero_float3();
-      L->direct_transmission = zero_float3();
-      L->direct_volume = zero_float3();
-
-      L->indirect_diffuse = zero_float3();
-      L->indirect_glossy = zero_float3();
-      L->indirect_transmission = zero_float3();
-      L->indirect_volume = zero_float3();
-
-      L->emission = zero_float3();
+  if (kernel_data.film.light_pass_flag & PASS_ANY) {
+    const int path_flag = INTEGRATOR_STATE(shadow_path, flag);
+    int pass_offset = PASS_UNUSED;
+
+    if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
+      /* Indirectly visible through reflection. */
+      const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
+                                         ((INTEGRATOR_STATE(shadow_path, bounce) == 0) ?
+                                              kernel_data.film.pass_glossy_direct :
+                                              kernel_data.film.pass_glossy_indirect) :
+                                         ((INTEGRATOR_STATE(shadow_path, bounce) == 0) ?
+                                              kernel_data.film.pass_transmission_direct :
+                                              kernel_data.film.pass_transmission_indirect);
+
+      if (glossy_pass_offset != PASS_UNUSED) {
+        /* Glossy is a subset of the throughput, reconstruct it here using the
+         * diffuse-glossy ratio. */
+        const float3 ratio = INTEGRATOR_STATE(shadow_path, diffuse_glossy_ratio);
+        const float3 glossy_contribution = (one_float3() - ratio) * contribution;
+        kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
+      }
+
+      /* Reconstruct diffuse subset of throughput. */
+      pass_offset = (INTEGRATOR_STATE(shadow_path, bounce) == 0) ?
+                        kernel_data.film.pass_diffuse_direct :
+                        kernel_data.film.pass_diffuse_indirect;
+      if (pass_offset != PASS_UNUSED) {
+        contribution *= INTEGRATOR_STATE(shadow_path, diffuse_glossy_ratio);
+      }
+    }
+    else if (path_flag & PATH_RAY_VOLUME_PASS) {
+      /* Indirectly visible through volume. */
+      pass_offset = (INTEGRATOR_STATE(shadow_path, bounce) == 0) ?
+                        kernel_data.film.pass_volume_direct :
+                        kernel_data.film.pass_volume_indirect;
     }
-  }
 
-  /* No Light Passes */
-  else
-#endif
-  {
-    L_sum = L->emission;
+    /* Single write call for GPU coherence. */
+    if (pass_offset != PASS_UNUSED) {
+      kernel_write_pass_float3(buffer + pass_offset, contribution);
+    }
 
-    /* Reject invalid value */
-    float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
-    if (!isfinite_safe(sum)) {
-#ifdef __KERNEL_DEBUG_NAN__
-      kernel_assert(!"Non-finite final sum in path_radiance_clamp_and_sum!");
-#endif
-      L_sum = zero_float3();
+    /* Write shadow pass. */
+    if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) &&
+        (path_flag & PATH_RAY_CAMERA)) {
+      const float3 unshadowed_throughput = INTEGRATOR_STATE(shadow_path, unshadowed_throughput);
+      const float3 shadowed_throughput = INTEGRATOR_STATE(shadow_path, throughput);
+      const float3 shadow = safe_divide_float3_float3(shadowed_throughput, unshadowed_throughput) *
+                            kernel_data.film.pass_shadow_scale;
+      kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow, shadow);
     }
   }
+#endif
+}
 
-  /* Compute alpha. */
-  *alpha = 1.0f - L->transparent;
+/* Write transparency to render buffer.
+ *
+ * Note that we accumulate transparency = 1 - alpha in the render buffer.
+ * Otherwise we'd have to write alpha on path termination, which happens
+ * in many places. */
+ccl_device_inline void kernel_accum_transparent(INTEGRATOR_STATE_CONST_ARGS,
+                                                const float transparent,
+                                                ccl_global float *ccl_restrict render_buffer)
+{
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
+                                                              render_buffer);
 
-  /* Add shadow catcher contributions. */
-#ifdef __SHADOW_TRICKS__
-  if (L->has_shadow_catcher) {
-    path_radiance_sum_shadowcatcher(kg, L, &L_sum, alpha);
+  if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
+    kernel_write_pass_float(buffer + kernel_data.film.pass_combined + 3, transparent);
   }
-#endif /* __SHADOW_TRICKS__ */
 
-  return L_sum;
+  kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_PASS, transparent, buffer);
 }
 
-ccl_device_inline void path_radiance_split_denoising(KernelGlobals *kg,
-                                                     PathRadiance *L,
-                                                     float3 *noisy,
-                                                     float3 *clean)
+/* Write background contribution to render buffer.
+ *
+ * Includes transparency, matching kernel_accum_transparent. */
+ccl_device_inline void kernel_accum_background(INTEGRATOR_STATE_CONST_ARGS,
+                                               const float3 L,
+                                               const float transparent,
+                                               const bool is_transparent_background_ray,
+                                               ccl_global float *ccl_restrict render_buffer)
 {
-#ifdef __PASSES__
-  kernel_assert(L->use_light_pass);
-
-  *clean = L->emission + L->background;
-  *noisy = L->direct_volume + L->indirect_volume;
-
-#  define ADD_COMPONENT(flag, component) \
-    if (kernel_data.film.denoising_flags & flag) \
-      *clean += component; \
-    else \
-      *noisy += component;
-
-  ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_DIR, L->direct_diffuse);
-  ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_IND, L->indirect_diffuse);
-  ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_DIR, L->direct_glossy);
-  ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_IND, L->indirect_glossy);
-  ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_DIR, L->direct_transmission);
-  ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_IND, L->indirect_transmission);
-#  undef ADD_COMPONENT
-#else
-  *noisy = L->emission;
-  *clean = zero_float3();
-#endif
+  float3 contribution = INTEGRATOR_STATE(path, throughput) * L;
+  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(path, bounce) - 1);
 
-#ifdef __SHADOW_TRICKS__
-  if (L->has_shadow_catcher) {
-    *noisy += L->shadow_background_color;
-  }
-#endif
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
+                                                              render_buffer);
 
-  *noisy = ensure_finite3(*noisy);
-  *clean = ensure_finite3(*clean);
+  if (is_transparent_background_ray) {
+    kernel_accum_transparent(INTEGRATOR_STATE_PASS, transparent, render_buffer);
+  }
+  else {
+    kernel_accum_combined_transparent_pass(
+        INTEGRATOR_STATE_PASS, contribution, transparent, buffer);
+  }
+  kernel_accum_emission_or_background_pass(
+      INTEGRATOR_STATE_PASS, contribution, buffer, kernel_data.film.pass_background);
 }
 
-ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance *L_sample)
+/* Write emission to render buffer. */
+ccl_device_inline void kernel_accum_emission(INTEGRATOR_STATE_CONST_ARGS,
+                                             const float3 throughput,
+                                             const float3 L,
+                                             ccl_global float *ccl_restrict render_buffer)
 {
-#ifdef __SPLIT_KERNEL__
-#  define safe_float3_add(f, v) \
-    do { \
-      ccl_global float *p = (ccl_global float *)(&(f)); \
-      atomic_add_and_fetch_float(p + 0, (v).x); \
-      atomic_add_and_fetch_float(p + 1, (v).y); \
-      atomic_add_and_fetch_float(p + 2, (v).z); \
-    } while (0)
-#  define safe_float_add(f, v) atomic_add_and_fetch_float(&(f), (v))
-#else
-#  define safe_float3_add(f, v) (f) += (v)
-#  define safe_float_add(f, v) (f) += (v)
-#endif /* __SPLIT_KERNEL__ */
+  float3 contribution = throughput * L;
+  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(path, bounce) - 1);
 
-#ifdef __PASSES__
-  safe_float3_add(L->direct_diffuse, L_sample->direct_diffuse);
-  safe_float3_add(L->direct_glossy, L_sample->direct_glossy);
-  safe_float3_add(L->direct_transmission, L_sample->direct_transmission);
-  safe_float3_add(L->direct_volume, L_sample->direct_volume);
-
-  safe_float3_add(L->indirect_diffuse, L_sample->indirect_diffuse);
-  safe_float3_add(L->indirect_glossy, L_sample->indirect_glossy);
-  safe_float3_add(L->indirect_transmission, L_sample->indirect_transmission);
-  safe_float3_add(L->indirect_volume, L_sample->indirect_volume);
-
-  safe_float3_add(L->background, L_sample->background);
-  safe_float3_add(L->ao, L_sample->ao);
-  safe_float3_add(L->shadow, L_sample->shadow);
-  safe_float_add(L->mist, L_sample->mist);
-#endif /* __PASSES__ */
-  safe_float3_add(L->emission, L_sample->emission);
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
+                                                              render_buffer);
 
-#undef safe_float_add
-#undef safe_float3_add
+  kernel_accum_combined_pass(INTEGRATOR_STATE_PASS, contribution, buffer);
+  kernel_accum_emission_or_background_pass(
+      INTEGRATOR_STATE_PASS, contribution, buffer, kernel_data.film.pass_emission);
 }
 
 CCL_NAMESPACE_END