Cycles: merge of cycles-x branch, a major update to the renderer

This includes much improved GPU rendering performance, viewport interactivity, new shadow catcher, revamped sampling settings, subsurface scattering anisotropy, new GPU volume sampling, improved PMJ sampling pattern, and more. Some features have also been removed or changed, breaking backwards compatibility. Including the removal of the OpenCL backend, for which alternatives are under development. Release notes and code docs: https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles https://wiki.blender.org/wiki/Source/Render/Cycles Credits: * Sergey Sharybin * Brecht Van Lommel * Patrick Mours (OptiX backend) * Christophe Hery (subsurface scattering anisotropy) * William Leeson (PMJ sampling pattern) * Alaska (various fixes and tweaks) * Thomas Dinges (various fixes) For the full commit history, see the cycles-x branch. This squashes together all the changes since intermediate changes would often fail building or tests. Ref T87839, T87837, T87836 Fixes T90734, T89353, T80267, T80267, T77185, T69800
author: Brecht Van Lommel <brecht@blender.org> 2021-09-20 18:59:20 +0300
committer: Brecht Van Lommel <brecht@blender.org> 2021-09-21 15:55:54 +0300
commit: 08031197250aeecbaca3803254e6f25b8c7b7b37 (patch)
tree: 6fe7ab045f0dc0a423d6557c4073f34309ef4740 /intern/cycles/kernel/kernel_accumulate.h
parent: fa6b1007bad065440950cd67deb16a04f368856f (diff)
1 files changed, 361 insertions, 611 deletions
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index 61653d328f1..9e12d24dcf4 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -14,751 +14,501 @@
  * limitations under the License.
  */
 
+#pragma once
+
+#include "kernel_adaptive_sampling.h"
+#include "kernel_random.h"
+#include "kernel_shadow_catcher.h"
+#include "kernel_write_passes.h"
+
 CCL_NAMESPACE_BEGIN
 
-/* BSDF Eval
+/* --------------------------------------------------------------------
+ * BSDF Evaluation
  *
- * BSDF evaluation result, split per BSDF type. This is used to accumulate
- * render passes separately. */
-
-ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, const ShaderData *sd);
+ * BSDF evaluation result, split between diffuse and glossy. This is used to
+ * accumulate render passes separately. Note that reflection, transmission
+ * and volume scattering are written to different render passes, but we assume
+ * that only one of those can happen at a bounce, and so do not need to accumulate
+ * them separately. */
 
-ccl_device_inline void bsdf_eval_init(BsdfEval *eval,
-                                      ClosureType type,
-                                      float3 value,
-                                      int use_light_pass)
+ccl_device_inline void bsdf_eval_init(BsdfEval *eval, const bool is_diffuse, float3 value)
 {
-#ifdef __PASSES__
-  eval->use_light_pass = use_light_pass;
-
-  if (eval->use_light_pass) {
-    eval->diffuse = zero_float3();
-    eval->glossy = zero_float3();
-    eval->transmission = zero_float3();
-    eval->transparent = zero_float3();
-    eval->volume = zero_float3();
-
-    if (type == CLOSURE_BSDF_TRANSPARENT_ID)
-      eval->transparent = value;
-    else if (CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_BSDF_BSSRDF(type))
-      eval->diffuse = value;
-    else if (CLOSURE_IS_BSDF_GLOSSY(type))
-      eval->glossy = value;
-    else if (CLOSURE_IS_BSDF_TRANSMISSION(type))
-      eval->transmission = value;
-    else if (CLOSURE_IS_PHASE(type))
-      eval->volume = value;
-  }
-  else
-#endif
-  {
+  eval->diffuse = zero_float3();
+  eval->glossy = zero_float3();
+
+  if (is_diffuse) {
     eval->diffuse = value;
   }
-#ifdef __SHADOW_TRICKS__
-  eval->sum_no_mis = zero_float3();
-#endif
+  else {
+    eval->glossy = value;
+  }
 }
 
 ccl_device_inline void bsdf_eval_accum(BsdfEval *eval,
-                                       ClosureType type,
+                                       const bool is_diffuse,
                                        float3 value,
                                        float mis_weight)
 {
-#ifdef __SHADOW_TRICKS__
-  eval->sum_no_mis += value;
-#endif
   value *= mis_weight;
-#ifdef __PASSES__
-  if (eval->use_light_pass) {
-    if (CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_BSDF_BSSRDF(type))
-      eval->diffuse += value;
-    else if (CLOSURE_IS_BSDF_GLOSSY(type))
-      eval->glossy += value;
-    else if (CLOSURE_IS_BSDF_TRANSMISSION(type))
-      eval->transmission += value;
-    else if (CLOSURE_IS_PHASE(type))
-      eval->volume += value;
-
-    /* skipping transparent, this function is used by for eval(), will be zero then */
-  }
-  else
-#endif
-  {
-    eval->diffuse += value;
-  }
-}
 
-ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval)
-{
-#ifdef __PASSES__
-  if (eval->use_light_pass) {
-    return is_zero(eval->diffuse) && is_zero(eval->glossy) && is_zero(eval->transmission) &&
-           is_zero(eval->transparent) && is_zero(eval->volume);
+  if (is_diffuse) {
+    eval->diffuse += value;
   }
-  else
-#endif
-  {
-    return is_zero(eval->diffuse);
+  else {
+    eval->glossy += value;
   }
 }
 
-ccl_device_inline void bsdf_eval_mis(BsdfEval *eval, float value)
+ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval)
 {
-#ifdef __PASSES__
-  if (eval->use_light_pass) {
-    eval->diffuse *= value;
-    eval->glossy *= value;
-    eval->transmission *= value;
-    eval->volume *= value;
-
-    /* skipping transparent, this function is used by for eval(), will be zero then */
-  }
-  else
-#endif
-  {
-    eval->diffuse *= value;
-  }
+  return is_zero(eval->diffuse) && is_zero(eval->glossy);
 }
 
 ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float value)
 {
-#ifdef __SHADOW_TRICKS__
-  eval->sum_no_mis *= value;
-#endif
-  bsdf_eval_mis(eval, value);
+  eval->diffuse *= value;
+  eval->glossy *= value;
 }
 
 ccl_device_inline void bsdf_eval_mul3(BsdfEval *eval, float3 value)
 {
-#ifdef __SHADOW_TRICKS__
-  eval->sum_no_mis *= value;
-#endif
-#ifdef __PASSES__
-  if (eval->use_light_pass) {
-    eval->diffuse *= value;
-    eval->glossy *= value;
-    eval->transmission *= value;
-    eval->volume *= value;
-
-    /* skipping transparent, this function is used by for eval(), will be zero then */
-  }
-  else
-    eval->diffuse *= value;
-#else
   eval->diffuse *= value;
-#endif
+  eval->glossy *= value;
 }
 
 ccl_device_inline float3 bsdf_eval_sum(const BsdfEval *eval)
 {
-#ifdef __PASSES__
-  if (eval->use_light_pass) {
-    return eval->diffuse + eval->glossy + eval->transmission + eval->volume;
-  }
-  else
-#endif
-    return eval->diffuse;
+  return eval->diffuse + eval->glossy;
 }
 
-/* Path Radiance
- *
- * We accumulate different render passes separately. After summing at the end
- * to get the combined result, it should be identical. We definite directly
- * visible as the first non-transparent hit, while indirectly visible are the
- * bounces after that. */
-
-ccl_device_inline void path_radiance_init(KernelGlobals *kg, PathRadiance *L)
+ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(const BsdfEval *eval)
 {
-  /* clear all */
-#ifdef __PASSES__
-  L->use_light_pass = kernel_data.film.use_light_pass;
-
-  if (kernel_data.film.use_light_pass) {
-    L->indirect = zero_float3();
-    L->direct_emission = zero_float3();
-
-    L->color_diffuse = zero_float3();
-    L->color_glossy = zero_float3();
-    L->color_transmission = zero_float3();
-
-    L->direct_diffuse = zero_float3();
-    L->direct_glossy = zero_float3();
-    L->direct_transmission = zero_float3();
-    L->direct_volume = zero_float3();
-
-    L->indirect_diffuse = zero_float3();
-    L->indirect_glossy = zero_float3();
-    L->indirect_transmission = zero_float3();
-    L->indirect_volume = zero_float3();
-
-    L->transparent = 0.0f;
-    L->emission = zero_float3();
-    L->background = zero_float3();
-    L->ao = zero_float3();
-    L->shadow = zero_float3();
-    L->mist = 0.0f;
-
-    L->state.diffuse = zero_float3();
-    L->state.glossy = zero_float3();
-    L->state.transmission = zero_float3();
-    L->state.volume = zero_float3();
-    L->state.direct = zero_float3();
-  }
-  else
-#endif
-  {
-    L->transparent = 0.0f;
-    L->emission = zero_float3();
-  }
-
-#ifdef __SHADOW_TRICKS__
-  L->path_total = zero_float3();
-  L->path_total_shaded = zero_float3();
-  L->shadow_background_color = zero_float3();
-  L->shadow_throughput = 0.0f;
-  L->shadow_transparency = 1.0f;
-  L->has_shadow_catcher = 0;
-#endif
-
-#ifdef __DENOISING_FEATURES__
-  L->denoising_normal = zero_float3();
-  L->denoising_albedo = zero_float3();
-  L->denoising_depth = 0.0f;
-#endif
+  /* Ratio of diffuse and glossy to recover proportions for writing to render pass.
+   * We assume reflection, transmission and volume scatter to be exclusive. */
+  return safe_divide_float3_float3(eval->diffuse, eval->diffuse + eval->glossy);
 }
 
-ccl_device_inline void path_radiance_bsdf_bounce(KernelGlobals *kg,
-                                                 PathRadianceState *L_state,
-                                                 ccl_addr_space float3 *throughput,
-                                                 BsdfEval *bsdf_eval,
-                                                 float bsdf_pdf,
-                                                 int bounce,
-                                                 int bsdf_label)
-{
-  float inverse_pdf = 1.0f / bsdf_pdf;
-
-#ifdef __PASSES__
-  if (kernel_data.film.use_light_pass) {
-    if (bounce == 0 && !(bsdf_label & LABEL_TRANSPARENT)) {
-      /* first on directly visible surface */
-      float3 value = *throughput * inverse_pdf;
-
-      L_state->diffuse = bsdf_eval->diffuse * value;
-      L_state->glossy = bsdf_eval->glossy * value;
-      L_state->transmission = bsdf_eval->transmission * value;
-      L_state->volume = bsdf_eval->volume * value;
-
-      *throughput = L_state->diffuse + L_state->glossy + L_state->transmission + L_state->volume;
+/* --------------------------------------------------------------------
+ * Clamping
+ *
+ * Clamping is done on a per-contribution basis so that we can write directly
+ * to render buffers instead of using per-thread memory, and to avoid the
+ * impact of clamping on other contributions. */
 
-      L_state->direct = *throughput;
-    }
-    else {
-      /* transparent bounce before first hit, or indirectly visible through BSDF */
-      float3 sum = (bsdf_eval_sum(bsdf_eval) + bsdf_eval->transparent) * inverse_pdf;
-      *throughput *= sum;
-    }
+ccl_device_forceinline void kernel_accum_clamp(const KernelGlobals *kg, float3 *L, int bounce)
+{
+#ifdef __KERNEL_DEBUG_NAN__
+  if (!isfinite3_safe(*L)) {
+    kernel_assert(!"Cycles sample with non-finite value detected");
   }
-  else
 #endif
-  {
-    *throughput *= bsdf_eval->diffuse * inverse_pdf;
-  }
-}
+  /* Make sure all components are finite, allowing the contribution to be usable by adaptive
+   * sampling convergence check, but also to make it so render result never causes issues with
+   * post-processing. */
+  *L = ensure_finite3(*L);
 
 #ifdef __CLAMP_SAMPLE__
-ccl_device_forceinline void path_radiance_clamp(KernelGlobals *kg, float3 *L, int bounce)
-{
   float limit = (bounce > 0) ? kernel_data.integrator.sample_clamp_indirect :
                                kernel_data.integrator.sample_clamp_direct;
   float sum = reduce_add(fabs(*L));
   if (sum > limit) {
     *L *= limit / sum;
   }
+#endif
 }
 
-ccl_device_forceinline void path_radiance_clamp_throughput(KernelGlobals *kg,
-                                                           float3 *L,
-                                                           float3 *throughput,
-                                                           int bounce)
-{
-  float limit = (bounce > 0) ? kernel_data.integrator.sample_clamp_indirect :
-                               kernel_data.integrator.sample_clamp_direct;
+/* --------------------------------------------------------------------
+ * Pass accumulation utilities.
+ */
 
-  float sum = reduce_add(fabs(*L));
-  if (sum > limit) {
-    float clamp_factor = limit / sum;
-    *L *= clamp_factor;
-    *throughput *= clamp_factor;
-  }
+/* Get pointer to pixel in render buffer. */
+ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer(
+    INTEGRATOR_STATE_CONST_ARGS, ccl_global float *ccl_restrict render_buffer)
+{
+  const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index);
+  const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+                                        kernel_data.film.pass_stride;
+  return render_buffer + render_buffer_offset;
 }
 
-#endif
+/* --------------------------------------------------------------------
+ * Adaptive sampling.
+ */
 
-ccl_device_inline void path_radiance_accum_emission(KernelGlobals *kg,
-                                                    PathRadiance *L,
-                                                    ccl_addr_space PathState *state,
-                                                    float3 throughput,
-                                                    float3 value)
+ccl_device_inline int kernel_accum_sample(INTEGRATOR_STATE_CONST_ARGS,
+                                          ccl_global float *ccl_restrict render_buffer,
+                                          int sample)
 {
-#ifdef __SHADOW_TRICKS__
-  if (state->flag & PATH_RAY_SHADOW_CATCHER) {
-    return;
+  if (kernel_data.film.pass_sample_count == PASS_UNUSED) {
+    return sample;
   }
-#endif
 
-  float3 contribution = throughput * value;
-#ifdef __CLAMP_SAMPLE__
-  path_radiance_clamp(kg, &contribution, state->bounce - 1);
-#endif
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
+                                                              render_buffer);
 
-#ifdef __PASSES__
-  if (L->use_light_pass) {
-    if (state->bounce == 0)
-      L->emission += contribution;
-    else if (state->bounce == 1)
-      L->direct_emission += contribution;
-    else
-      L->indirect += contribution;
-  }
-  else
-#endif
-  {
-    L->emission += contribution;
-  }
+  return atomic_fetch_and_add_uint32((uint *)(buffer) + kernel_data.film.pass_sample_count, 1);
 }
 
-ccl_device_inline void path_radiance_accum_ao(KernelGlobals *kg,
-                                              PathRadiance *L,
-                                              ccl_addr_space PathState *state,
-                                              float3 throughput,
-                                              float3 alpha,
-                                              float3 bsdf,
-                                              float3 ao)
+ccl_device void kernel_accum_adaptive_buffer(INTEGRATOR_STATE_CONST_ARGS,
+                                             const float3 contribution,
+                                             ccl_global float *ccl_restrict buffer)
 {
-#ifdef __PASSES__
-  /* Store AO pass. */
-  if (L->use_light_pass && state->bounce == 0) {
-    L->ao += alpha * throughput * ao;
-  }
-#endif
-
-#ifdef __SHADOW_TRICKS__
-  /* For shadow catcher, accumulate ratio. */
-  if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
-    float3 light = throughput * bsdf;
-    L->path_total += light;
-    L->path_total_shaded += ao * light;
+  /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping
+   * criteria. This is the heuristic from "A hierarchical automatic stopping condition for Monte
+   * Carlo global illumination" except that here it is applied per pixel and not in hierarchical
+   * tiles. */
 
-    if (state->flag & PATH_RAY_SHADOW_CATCHER) {
-      return;
-    }
+  if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) {
+    return;
   }
-#endif
-
-  float3 contribution = throughput * bsdf * ao;
 
-#ifdef __PASSES__
-  if (L->use_light_pass) {
-    if (state->bounce == 0) {
-      /* Directly visible lighting. */
-      L->direct_diffuse += contribution;
-    }
-    else {
-      /* Indirectly visible lighting after BSDF bounce. */
-      L->indirect += contribution;
-    }
-  }
-  else
-#endif
-  {
-    L->emission += contribution;
+  const int sample = INTEGRATOR_STATE(path, sample);
+  if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) {
+    kernel_write_pass_float4(
+        buffer + kernel_data.film.pass_adaptive_aux_buffer,
+        make_float4(contribution.x * 2.0f, contribution.y * 2.0f, contribution.z * 2.0f, 0.0f));
   }
 }
 
-ccl_device_inline void path_radiance_accum_total_ao(PathRadiance *L,
-                                                    ccl_addr_space PathState *state,
-                                                    float3 throughput,
-                                                    float3 bsdf)
-{
-#ifdef __SHADOW_TRICKS__
-  if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
-    L->path_total += throughput * bsdf;
-  }
-#else
-  (void)L;
-  (void)state;
-  (void)throughput;
-  (void)bsdf;
-#endif
-}
+/* --------------------------------------------------------------------
+ * Shadow catcher.
+ */
+
+#ifdef __SHADOW_CATCHER__
 
-ccl_device_inline void path_radiance_accum_light(KernelGlobals *kg,
-                                                 PathRadiance *L,
-                                                 ccl_addr_space PathState *state,
-                                                 float3 throughput,
-                                                 BsdfEval *bsdf_eval,
-                                                 float3 shadow,
-                                                 float shadow_fac,
-                                                 bool is_lamp)
+/* Accumulate contribution to the Shadow Catcher pass.
+ *
+ * Returns truth if the contribution is fully handled here and is not to be added to the other
+ * passes (like combined, adaptive sampling). */
+
+ccl_device bool kernel_accum_shadow_catcher(INTEGRATOR_STATE_CONST_ARGS,
+                                            const float3 contribution,
+                                            ccl_global float *ccl_restrict buffer)
 {
-#ifdef __SHADOW_TRICKS__
-  if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
-    float3 light = throughput * bsdf_eval->sum_no_mis;
-    L->path_total += light;
-    L->path_total_shaded += shadow * light;
-
-    if (state->flag & PATH_RAY_SHADOW_CATCHER) {
-      return;
-    }
+  if (!kernel_data.integrator.has_shadow_catcher) {
+    return false;
   }
-#endif
 
-  float3 shaded_throughput = throughput * shadow;
+  kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED);
+  kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
 
-#ifdef __PASSES__
-  if (L->use_light_pass) {
-    /* Compute the clamping based on the total contribution.
-     * The resulting scale is then be applied to all individual components. */
-    float3 full_contribution = shaded_throughput * bsdf_eval_sum(bsdf_eval);
-#  ifdef __CLAMP_SAMPLE__
-    path_radiance_clamp_throughput(kg, &full_contribution, &shaded_throughput, state->bounce);
-#  endif
-
-    if (state->bounce == 0) {
-      /* directly visible lighting */
-      L->direct_diffuse += shaded_throughput * bsdf_eval->diffuse;
-      L->direct_glossy += shaded_throughput * bsdf_eval->glossy;
-      L->direct_transmission += shaded_throughput * bsdf_eval->transmission;
-      L->direct_volume += shaded_throughput * bsdf_eval->volume;
-
-      if (is_lamp) {
-        L->shadow += shadow * shadow_fac;
-      }
-    }
-    else {
-      /* indirectly visible lighting after BSDF bounce */
-      L->indirect += full_contribution;
-    }
+  /* Matte pass. */
+  if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) {
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher_matte, contribution);
+    /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive
+     * sampling is based on how noisy the combined pass is as if there were no catchers in the
+     * scene. */
   }
-  else
-#endif
-  {
-    float3 contribution = shaded_throughput * bsdf_eval->diffuse;
-    path_radiance_clamp(kg, &contribution, state->bounce);
-    L->emission += contribution;
+
+  /* Shadow catcher pass. */
+  if (kernel_shadow_catcher_is_object_pass(INTEGRATOR_STATE_PASS)) {
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution);
+    return true;
   }
-}
 
-ccl_device_inline void path_radiance_accum_total_light(PathRadiance *L,
-                                                       ccl_addr_space PathState *state,
-                                                       float3 throughput,
-                                                       const BsdfEval *bsdf_eval)
-{
-#ifdef __SHADOW_TRICKS__
-  if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
-    L->path_total += throughput * bsdf_eval->sum_no_mis;
-  }
-#else
-  (void)L;
-  (void)state;
-  (void)throughput;
-  (void)bsdf_eval;
-#endif
+  return false;
 }
 
-ccl_device_inline void path_radiance_accum_background(KernelGlobals *kg,
-                                                      PathRadiance *L,
-                                                      ccl_addr_space PathState *state,
-                                                      float3 throughput,
-                                                      float3 value)
+ccl_device bool kernel_accum_shadow_catcher_transparent(INTEGRATOR_STATE_CONST_ARGS,
+                                                        const float3 contribution,
+                                                        const float transparent,
+                                                        ccl_global float *ccl_restrict buffer)
 {
+  if (!kernel_data.integrator.has_shadow_catcher) {
+    return false;
+  }
 
-#ifdef __SHADOW_TRICKS__
-  if (state->flag & PATH_RAY_STORE_SHADOW_INFO) {
-    L->path_total += throughput * value;
-    L->path_total_shaded += throughput * value * L->shadow_transparency;
+  kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED);
+  kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
 
-    if (state->flag & PATH_RAY_SHADOW_CATCHER) {
-      return;
-    }
+  if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) {
+    return true;
   }
-#endif
 
-  float3 contribution = throughput * value;
-#ifdef __CLAMP_SAMPLE__
-  path_radiance_clamp(kg, &contribution, state->bounce - 1);
-#endif
+  /* Matte pass. */
+  if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) {
+    kernel_write_pass_float4(
+        buffer + kernel_data.film.pass_shadow_catcher_matte,
+        make_float4(contribution.x, contribution.y, contribution.z, transparent));
+    /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive
+     * sampling is based on how noisy the combined pass is as if there were no catchers in the
+     * scene. */
+  }
 
-#ifdef __PASSES__
-  if (L->use_light_pass) {
-    if (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)
-      L->background += contribution;
-    else if (state->bounce == 1)
-      L->direct_emission += contribution;
-    else
-      L->indirect += contribution;
-  }
-  else
-#endif
-  {
-    L->emission += contribution;
+  /* Shadow catcher pass. */
+  if (kernel_shadow_catcher_is_object_pass(INTEGRATOR_STATE_PASS)) {
+    /* NOTE: The transparency of the shadow catcher pass is ignored. It is not needed for the
+     * calculation and the alpha channel of the pass contains numbers of samples contributed to a
+     * pixel of the pass. */
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution);
+    return true;
   }
 
-#ifdef __DENOISING_FEATURES__
-  L->denoising_albedo += state->denoising_feature_weight * state->denoising_feature_throughput *
-                         value;
-#endif /* __DENOISING_FEATURES__ */
+  return false;
 }
 
-ccl_device_inline void path_radiance_accum_transparent(PathRadiance *L,
-                                                       ccl_addr_space PathState *state,
-                                                       float3 throughput)
+ccl_device void kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_CONST_ARGS,
+                                                             const float transparent,
+                                                             ccl_global float *ccl_restrict buffer)
 {
-  L->transparent += average(throughput);
-}
+  if (!kernel_data.integrator.has_shadow_catcher) {
+    return;
+  }
 
-#ifdef __SHADOW_TRICKS__
-ccl_device_inline void path_radiance_accum_shadowcatcher(PathRadiance *L,
-                                                         float3 throughput,
-                                                         float3 background)
-{
-  L->shadow_throughput += average(throughput);
-  L->shadow_background_color += throughput * background;
-  L->has_shadow_catcher = 1;
-}
-#endif
+  kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
 
-ccl_device_inline void path_radiance_sum_indirect(PathRadiance *L)
-{
-#ifdef __PASSES__
-  /* this division is a bit ugly, but means we only have to keep track of
-   * only a single throughput further along the path, here we recover just
-   * the indirect path that is not influenced by any particular BSDF type */
-  if (L->use_light_pass) {
-    L->direct_emission = safe_divide_color(L->direct_emission, L->state.direct);
-    L->direct_diffuse += L->state.diffuse * L->direct_emission;
-    L->direct_glossy += L->state.glossy * L->direct_emission;
-    L->direct_transmission += L->state.transmission * L->direct_emission;
-    L->direct_volume += L->state.volume * L->direct_emission;
-
-    L->indirect = safe_divide_color(L->indirect, L->state.direct);
-    L->indirect_diffuse += L->state.diffuse * L->indirect;
-    L->indirect_glossy += L->state.glossy * L->indirect;
-    L->indirect_transmission += L->state.transmission * L->indirect;
-    L->indirect_volume += L->state.volume * L->indirect;
+  /* Matte pass. */
+  if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) {
+    kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, transparent);
   }
-#endif
 }
 
-ccl_device_inline void path_radiance_reset_indirect(PathRadiance *L)
-{
-#ifdef __PASSES__
-  if (L->use_light_pass) {
-    L->state.diffuse = zero_float3();
-    L->state.glossy = zero_float3();
-    L->state.transmission = zero_float3();
-    L->state.volume = zero_float3();
+#endif /* __SHADOW_CATCHER__ */
+
+/* --------------------------------------------------------------------
+ * Render passes.
+ */
 
-    L->direct_emission = zero_float3();
-    L->indirect = zero_float3();
+/* Write combined pass. */
+ccl_device_inline void kernel_accum_combined_pass(INTEGRATOR_STATE_CONST_ARGS,
+                                                  const float3 contribution,
+                                                  ccl_global float *ccl_restrict buffer)
+{
+#ifdef __SHADOW_CATCHER__
+  if (kernel_accum_shadow_catcher(INTEGRATOR_STATE_PASS, contribution, buffer)) {
+    return;
   }
 #endif
+
+  if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_combined, contribution);
+  }
+
+  kernel_accum_adaptive_buffer(INTEGRATOR_STATE_PASS, contribution, buffer);
 }
 
-ccl_device_inline void path_radiance_copy_indirect(PathRadiance *L, const PathRadiance *L_src)
+/* Write combined pass with transparency. */
+ccl_device_inline void kernel_accum_combined_transparent_pass(INTEGRATOR_STATE_CONST_ARGS,
+                                                              const float3 contribution,
+                                                              const float transparent,
+                                                              ccl_global float *ccl_restrict
+                                                                  buffer)
 {
-#ifdef __PASSES__
-  if (L->use_light_pass) {
-    L->state = L_src->state;
-
-    L->direct_emission = L_src->direct_emission;
-    L->indirect = L_src->indirect;
+#ifdef __SHADOW_CATCHER__
+  if (kernel_accum_shadow_catcher_transparent(
+          INTEGRATOR_STATE_PASS, contribution, transparent, buffer)) {
+    return;
   }
 #endif
+
+  if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
+    kernel_write_pass_float4(
+        buffer + kernel_data.film.pass_combined,
+        make_float4(contribution.x, contribution.y, contribution.z, transparent));
+  }
+
+  kernel_accum_adaptive_buffer(INTEGRATOR_STATE_PASS, contribution, buffer);
 }
 
-#ifdef __SHADOW_TRICKS__
-ccl_device_inline void path_radiance_sum_shadowcatcher(KernelGlobals *kg,
-                                                       PathRadiance *L,
-                                                       float3 *L_sum,
-                                                       float *alpha)
+/* Write background or emission to appropriate pass. */
+ccl_device_inline void kernel_accum_emission_or_background_pass(INTEGRATOR_STATE_CONST_ARGS,
+                                                                float3 contribution,
+                                                                ccl_global float *ccl_restrict
+                                                                    buffer,
+                                                                const int pass)
 {
-  /* Calculate current shadow of the path. */
-  float path_total = average(L->path_total);
-  float shadow;
+  if (!(kernel_data.film.light_pass_flag & PASS_ANY)) {
+    return;
+  }
 
-  if (UNLIKELY(!isfinite_safe(path_total))) {
-#  ifdef __KERNEL_DEBUG_NAN__
-    kernel_assert(!"Non-finite total radiance along the path");
-#  endif
-    shadow = 0.0f;
+#ifdef __PASSES__
+  const int path_flag = INTEGRATOR_STATE(path, flag);
+  int pass_offset = PASS_UNUSED;
+
+  /* Denoising albedo. */
+#  ifdef __DENOISING_FEATURES__
+  if (path_flag & PATH_RAY_DENOISING_FEATURES) {
+    if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
+      const float3 denoising_feature_throughput = INTEGRATOR_STATE(path,
+                                                                   denoising_feature_throughput);
+      const float3 denoising_albedo = denoising_feature_throughput * contribution;
+      kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
+    }
   }
-  else if (path_total == 0.0f) {
-    shadow = L->shadow_transparency;
+#  endif /* __DENOISING_FEATURES__ */
+
+  if (!(path_flag & PATH_RAY_ANY_PASS)) {
+    /* Directly visible, write to emission or background pass. */
+    pass_offset = pass;
+  }
+  else if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
+    /* Indirectly visible through reflection. */
+    const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
+                                       ((INTEGRATOR_STATE(path, bounce) == 1) ?
+                                            kernel_data.film.pass_glossy_direct :
+                                            kernel_data.film.pass_glossy_indirect) :
+                                       ((INTEGRATOR_STATE(path, bounce) == 1) ?
+                                            kernel_data.film.pass_transmission_direct :
+                                            kernel_data.film.pass_transmission_indirect);
+
+    if (glossy_pass_offset != PASS_UNUSED) {
+      /* Glossy is a subset of the throughput, reconstruct it here using the
+       * diffuse-glossy ratio. */
+      const float3 ratio = INTEGRATOR_STATE(path, diffuse_glossy_ratio);
+      const float3 glossy_contribution = (one_float3() - ratio) * contribution;
+      kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
+    }
+
+    /* Reconstruct diffuse subset of throughput. */
+    pass_offset = (INTEGRATOR_STATE(path, bounce) == 1) ? kernel_data.film.pass_diffuse_direct :
+                                                          kernel_data.film.pass_diffuse_indirect;
+    if (pass_offset != PASS_UNUSED) {
+      contribution *= INTEGRATOR_STATE(path, diffuse_glossy_ratio);
+    }
   }
-  else {
-    float path_total_shaded = average(L->path_total_shaded);
-    shadow = path_total_shaded / path_total;
+  else if (path_flag & PATH_RAY_VOLUME_PASS) {
+    /* Indirectly visible through volume. */
+    pass_offset = (INTEGRATOR_STATE(path, bounce) == 1) ? kernel_data.film.pass_volume_direct :
+                                                          kernel_data.film.pass_volume_indirect;
   }
 
-  /* Calculate final light sum and transparency for shadow catcher object. */
-  if (kernel_data.background.transparent) {
-    *alpha -= L->shadow_throughput * shadow;
-  }
-  else {
-    L->shadow_background_color *= shadow;
-    *L_sum += L->shadow_background_color;
+  /* Single write call for GPU coherence. */
+  if (pass_offset != PASS_UNUSED) {
+    kernel_write_pass_float3(buffer + pass_offset, contribution);
   }
+#endif /* __PASSES__ */
 }
-#endif
 
-ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg,
-                                                     PathRadiance *L,
-                                                     float *alpha)
+/* Write light contribution to render buffer. */
+ccl_device_inline void kernel_accum_light(INTEGRATOR_STATE_CONST_ARGS,
+                                          ccl_global float *ccl_restrict render_buffer)
 {
-  float3 L_sum;
-  /* Light Passes are used */
+  /* The throughput for shadow paths already contains the light shader evaluation. */
+  float3 contribution = INTEGRATOR_STATE(shadow_path, throughput);
+  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(shadow_path, bounce) - 1);
+
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
+                                                              render_buffer);
+
+  kernel_accum_combined_pass(INTEGRATOR_STATE_PASS, contribution, buffer);
+
 #ifdef __PASSES__
-  float3 L_direct, L_indirect;
-  if (L->use_light_pass) {
-    path_radiance_sum_indirect(L);
-
-    L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->direct_volume +
-               L->emission;
-    L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission +
-                 L->indirect_volume;
-
-    if (!kernel_data.background.transparent)
-      L_direct += L->background;
-
-    L_sum = L_direct + L_indirect;
-    float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
-
-    /* Reject invalid value */
-    if (!isfinite_safe(sum)) {
-#  ifdef __KERNEL_DEBUG_NAN__
-      kernel_assert(!"Non-finite sum in path_radiance_clamp_and_sum!");
-#  endif
-      L_sum = zero_float3();
-
-      L->direct_diffuse = zero_float3();
-      L->direct_glossy = zero_float3();
-      L->direct_transmission = zero_float3();
-      L->direct_volume = zero_float3();
-
-      L->indirect_diffuse = zero_float3();
-      L->indirect_glossy = zero_float3();
-      L->indirect_transmission = zero_float3();
-      L->indirect_volume = zero_float3();
-
-      L->emission = zero_float3();
+  if (kernel_data.film.light_pass_flag & PASS_ANY) {
+    const int path_flag = INTEGRATOR_STATE(shadow_path, flag);
+    int pass_offset = PASS_UNUSED;
+
+    if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
+      /* Indirectly visible through reflection. */
+      const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
+                                         ((INTEGRATOR_STATE(shadow_path, bounce) == 0) ?
+                                              kernel_data.film.pass_glossy_direct :
+                                              kernel_data.film.pass_glossy_indirect) :
+                                         ((INTEGRATOR_STATE(shadow_path, bounce) == 0) ?
+                                              kernel_data.film.pass_transmission_direct :
+                                              kernel_data.film.pass_transmission_indirect);
+
+      if (glossy_pass_offset != PASS_UNUSED) {
+        /* Glossy is a subset of the throughput, reconstruct it here using the
+         * diffuse-glossy ratio. */
+        const float3 ratio = INTEGRATOR_STATE(shadow_path, diffuse_glossy_ratio);
+        const float3 glossy_contribution = (one_float3() - ratio) * contribution;
+        kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
+      }
+
+      /* Reconstruct diffuse subset of throughput. */
+      pass_offset = (INTEGRATOR_STATE(shadow_path, bounce) == 0) ?
+                        kernel_data.film.pass_diffuse_direct :
+                        kernel_data.film.pass_diffuse_indirect;
+      if (pass_offset != PASS_UNUSED) {
+        contribution *= INTEGRATOR_STATE(shadow_path, diffuse_glossy_ratio);
+      }
+    }
+    else if (path_flag & PATH_RAY_VOLUME_PASS) {
+      /* Indirectly visible through volume. */
+      pass_offset = (INTEGRATOR_STATE(shadow_path, bounce) == 0) ?
+                        kernel_data.film.pass_volume_direct :
+                        kernel_data.film.pass_volume_indirect;
     }
-  }
 
-  /* No Light Passes */
-  else
-#endif
-  {
-    L_sum = L->emission;
+    /* Single write call for GPU coherence. */
+    if (pass_offset != PASS_UNUSED) {
+      kernel_write_pass_float3(buffer + pass_offset, contribution);
+    }
 
-    /* Reject invalid value */
-    float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z);
-    if (!isfinite_safe(sum)) {
-#ifdef __KERNEL_DEBUG_NAN__
-      kernel_assert(!"Non-finite final sum in path_radiance_clamp_and_sum!");
-#endif
-      L_sum = zero_float3();
+    /* Write shadow pass. */
+    if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) &&
+        (path_flag & PATH_RAY_CAMERA)) {
+      const float3 unshadowed_throughput = INTEGRATOR_STATE(shadow_path, unshadowed_throughput);
+      const float3 shadowed_throughput = INTEGRATOR_STATE(shadow_path, throughput);
+      const float3 shadow = safe_divide_float3_float3(shadowed_throughput, unshadowed_throughput) *
+                            kernel_data.film.pass_shadow_scale;
+      kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow, shadow);
     }
   }
+#endif
+}
 
-  /* Compute alpha. */
-  *alpha = 1.0f - L->transparent;
+/* Write transparency to render buffer.
+ *
+ * Note that we accumulate transparency = 1 - alpha in the render buffer.
+ * Otherwise we'd have to write alpha on path termination, which happens
+ * in many places. */
+ccl_device_inline void kernel_accum_transparent(INTEGRATOR_STATE_CONST_ARGS,
+                                                const float transparent,
+                                                ccl_global float *ccl_restrict render_buffer)
+{
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
+                                                              render_buffer);
 
-  /* Add shadow catcher contributions. */
-#ifdef __SHADOW_TRICKS__
-  if (L->has_shadow_catcher) {
-    path_radiance_sum_shadowcatcher(kg, L, &L_sum, alpha);
+  if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
+    kernel_write_pass_float(buffer + kernel_data.film.pass_combined + 3, transparent);
   }
-#endif /* __SHADOW_TRICKS__ */
 
-  return L_sum;
+  kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_PASS, transparent, buffer);
 }
 
-ccl_device_inline void path_radiance_split_denoising(KernelGlobals *kg,
-                                                     PathRadiance *L,
-                                                     float3 *noisy,
-                                                     float3 *clean)
+/* Write background contribution to render buffer.
+ *
+ * Includes transparency, matching kernel_accum_transparent. */
+ccl_device_inline void kernel_accum_background(INTEGRATOR_STATE_CONST_ARGS,
+                                               const float3 L,
+                                               const float transparent,
+                                               const bool is_transparent_background_ray,
+                                               ccl_global float *ccl_restrict render_buffer)
 {
-#ifdef __PASSES__
-  kernel_assert(L->use_light_pass);
-
-  *clean = L->emission + L->background;
-  *noisy = L->direct_volume + L->indirect_volume;
-
-#  define ADD_COMPONENT(flag, component) \
-    if (kernel_data.film.denoising_flags & flag) \
-      *clean += component; \
-    else \
-      *noisy += component;
-
-  ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_DIR, L->direct_diffuse);
-  ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_IND, L->indirect_diffuse);
-  ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_DIR, L->direct_glossy);
-  ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_IND, L->indirect_glossy);
-  ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_DIR, L->direct_transmission);
-  ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_IND, L->indirect_transmission);
-#  undef ADD_COMPONENT
-#else
-  *noisy = L->emission;
-  *clean = zero_float3();
-#endif
+  float3 contribution = INTEGRATOR_STATE(path, throughput) * L;
+  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(path, bounce) - 1);
 
-#ifdef __SHADOW_TRICKS__
-  if (L->has_shadow_catcher) {
-    *noisy += L->shadow_background_color;
-  }
-#endif
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
+                                                              render_buffer);
 
-  *noisy = ensure_finite3(*noisy);
-  *clean = ensure_finite3(*clean);
+  if (is_transparent_background_ray) {
+    kernel_accum_transparent(INTEGRATOR_STATE_PASS, transparent, render_buffer);
+  }
+  else {
+    kernel_accum_combined_transparent_pass(
+        INTEGRATOR_STATE_PASS, contribution, transparent, buffer);
+  }
+  kernel_accum_emission_or_background_pass(
+      INTEGRATOR_STATE_PASS, contribution, buffer, kernel_data.film.pass_background);
 }
 
-ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance *L_sample)
+/* Write emission to render buffer. */
+ccl_device_inline void kernel_accum_emission(INTEGRATOR_STATE_CONST_ARGS,
+                                             const float3 throughput,
+                                             const float3 L,
+                                             ccl_global float *ccl_restrict render_buffer)
 {
-#ifdef __SPLIT_KERNEL__
-#  define safe_float3_add(f, v) \
-    do { \
-      ccl_global float *p = (ccl_global float *)(&(f)); \
-      atomic_add_and_fetch_float(p + 0, (v).x); \
-      atomic_add_and_fetch_float(p + 1, (v).y); \
-      atomic_add_and_fetch_float(p + 2, (v).z); \
-    } while (0)
-#  define safe_float_add(f, v) atomic_add_and_fetch_float(&(f), (v))
-#else
-#  define safe_float3_add(f, v) (f) += (v)
-#  define safe_float_add(f, v) (f) += (v)
-#endif /* __SPLIT_KERNEL__ */
+  float3 contribution = throughput * L;
+  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(path, bounce) - 1);
 
-#ifdef __PASSES__
-  safe_float3_add(L->direct_diffuse, L_sample->direct_diffuse);
-  safe_float3_add(L->direct_glossy, L_sample->direct_glossy);
-  safe_float3_add(L->direct_transmission, L_sample->direct_transmission);
-  safe_float3_add(L->direct_volume, L_sample->direct_volume);
-
-  safe_float3_add(L->indirect_diffuse, L_sample->indirect_diffuse);
-  safe_float3_add(L->indirect_glossy, L_sample->indirect_glossy);
-  safe_float3_add(L->indirect_transmission, L_sample->indirect_transmission);
-  safe_float3_add(L->indirect_volume, L_sample->indirect_volume);
-
-  safe_float3_add(L->background, L_sample->background);
-  safe_float3_add(L->ao, L_sample->ao);
-  safe_float3_add(L->shadow, L_sample->shadow);
-  safe_float_add(L->mist, L_sample->mist);
-#endif /* __PASSES__ */
-  safe_float3_add(L->emission, L_sample->emission);
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
+                                                              render_buffer);
 
-#undef safe_float_add
-#undef safe_float3_add
+  kernel_accum_combined_pass(INTEGRATOR_STATE_PASS, contribution, buffer);
+  kernel_accum_emission_or_background_pass(
+      INTEGRATOR_STATE_PASS, contribution, buffer, kernel_data.film.pass_emission);
 }
 
 CCL_NAMESPACE_END
author	Brecht Van Lommel <brecht@blender.org>	2021-09-20 18:59:20 +0300
committer	Brecht Van Lommel <brecht@blender.org>	2021-09-21 15:55:54 +0300
commit	08031197250aeecbaca3803254e6f25b8c7b7b37 (patch)
tree	6fe7ab045f0dc0a423d6557c4073f34309ef4740 /intern/cycles/kernel/kernel_accumulate.h
parent	fa6b1007bad065440950cd67deb16a04f368856f (diff)