Cycles: merge of cycles-x branch, a major update to the renderer

This includes much improved GPU rendering performance, viewport interactivity, new shadow catcher, revamped sampling settings, subsurface scattering anisotropy, new GPU volume sampling, improved PMJ sampling pattern, and more. Some features have also been removed or changed, breaking backwards compatibility. Including the removal of the OpenCL backend, for which alternatives are under development. Release notes and code docs: https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles https://wiki.blender.org/wiki/Source/Render/Cycles Credits: * Sergey Sharybin * Brecht Van Lommel * Patrick Mours (OptiX backend) * Christophe Hery (subsurface scattering anisotropy) * William Leeson (PMJ sampling pattern) * Alaska (various fixes and tweaks) * Thomas Dinges (various fixes) For the full commit history, see the cycles-x branch. This squashes together all the changes since intermediate changes would often fail building or tests. Ref T87839, T87837, T87836 Fixes T90734, T89353, T80267, T80267, T77185, T69800
author: Brecht Van Lommel <brecht@blender.org> 2021-09-20 18:59:20 +0300
committer: Brecht Van Lommel <brecht@blender.org> 2021-09-21 15:55:54 +0300
commit: 08031197250aeecbaca3803254e6f25b8c7b7b37 (patch)
tree: 6fe7ab045f0dc0a423d6557c4073f34309ef4740 /intern/cycles/kernel/kernel_film.h
parent: fa6b1007bad065440950cd67deb16a04f368856f (diff)
1 files changed, 482 insertions, 85 deletions
diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h
index a6fd4f1dc7e..fa93f4830d1 100644
--- a/intern/cycles/kernel/kernel_film.h
+++ b/intern/cycles/kernel/kernel_film.h
@@ -14,119 +14,516 @@
  * limitations under the License.
  */
 
+#pragma once
+
 CCL_NAMESPACE_BEGIN
 
-ccl_device float4 film_get_pass_result(KernelGlobals *kg,
-                                       ccl_global float *buffer,
-                                       float sample_scale,
-                                       int index,
-                                       bool use_display_sample_scale)
-{
-  float4 pass_result;
-
-  int display_pass_stride = kernel_data.film.display_pass_stride;
-  int display_pass_components = kernel_data.film.display_pass_components;
-
-  if (display_pass_components == 4) {
-    float4 in = *(ccl_global float4 *)(buffer + display_pass_stride +
-                                       index * kernel_data.film.pass_stride);
-    float alpha = use_display_sample_scale ?
-                      (kernel_data.film.use_display_pass_alpha ? in.w : 1.0f / sample_scale) :
-                      1.0f;
-
-    pass_result = make_float4(in.x, in.y, in.z, alpha);
-
-    int display_divide_pass_stride = kernel_data.film.display_divide_pass_stride;
-    if (display_divide_pass_stride != -1) {
-      ccl_global float4 *divide_in = (ccl_global float4 *)(buffer + display_divide_pass_stride +
-                                                           index * kernel_data.film.pass_stride);
-      float3 divided = safe_divide_even_color(float4_to_float3(pass_result),
-                                              float4_to_float3(*divide_in));
-      pass_result = make_float4(divided.x, divided.y, divided.z, pass_result.w);
-    }
+/* --------------------------------------------------------------------
+ * Common utilities.
+ */
 
-    if (kernel_data.film.use_display_exposure) {
-      float exposure = kernel_data.film.exposure;
-      pass_result *= make_float4(exposure, exposure, exposure, 1.0f);
-    }
+/* The input buffer contains transparency = 1 - alpha, this converts it to
+ * alpha. Also clamp since alpha might end up outside of 0..1 due to Russian
+ * roulette. */
+ccl_device_forceinline float film_transparency_to_alpha(float transparency)
+{
+  return saturate(1.0f - transparency);
+}
+
+ccl_device_inline float film_get_scale(const KernelFilmConvert *ccl_restrict kfilm_convert,
+                                       ccl_global const float *ccl_restrict buffer)
+{
+  if (kfilm_convert->pass_sample_count == PASS_UNUSED) {
+    return kfilm_convert->scale;
+  }
+
+  if (kfilm_convert->pass_use_filter) {
+    const uint sample_count = *((const uint *)(buffer + kfilm_convert->pass_sample_count));
+    return 1.0f / sample_count;
+  }
+
+  return 1.0f;
+}
+
+ccl_device_inline float film_get_scale_exposure(const KernelFilmConvert *ccl_restrict
+                                                    kfilm_convert,
+                                                ccl_global const float *ccl_restrict buffer)
+{
+  if (kfilm_convert->pass_sample_count == PASS_UNUSED) {
+    return kfilm_convert->scale_exposure;
+  }
+
+  const float scale = film_get_scale(kfilm_convert, buffer);
+
+  if (kfilm_convert->pass_use_exposure) {
+    return scale * kfilm_convert->exposure;
+  }
+
+  return scale;
+}
+
+ccl_device_inline bool film_get_scale_and_scale_exposure(
+    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    float *ccl_restrict scale,
+    float *ccl_restrict scale_exposure)
+{
+  if (kfilm_convert->pass_sample_count == PASS_UNUSED) {
+    *scale = kfilm_convert->scale;
+    *scale_exposure = kfilm_convert->scale_exposure;
+    return true;
+  }
+
+  const uint sample_count = *((const uint *)(buffer + kfilm_convert->pass_sample_count));
+  if (!sample_count) {
+    *scale = 0.0f;
+    *scale_exposure = 0.0f;
+    return false;
+  }
+
+  if (kfilm_convert->pass_use_filter) {
+    *scale = 1.0f / sample_count;
   }
-  else if (display_pass_components == 1) {
-    ccl_global float *in = (ccl_global float *)(buffer + display_pass_stride +
-                                                index * kernel_data.film.pass_stride);
-    pass_result = make_float4(*in, *in, *in, 1.0f / sample_scale);
+  else {
+    *scale = 1.0f;
+  }
+
+  if (kfilm_convert->pass_use_exposure) {
+    *scale_exposure = *scale * kfilm_convert->exposure;
+  }
+  else {
+    *scale_exposure = *scale;
+  }
+
+  return true;
+}
+
+/* --------------------------------------------------------------------
+ * Float (scalar) passes.
+ */
+
+ccl_device_inline void film_get_pass_pixel_depth(const KernelFilmConvert *ccl_restrict
+                                                     kfilm_convert,
+                                                 ccl_global const float *ccl_restrict buffer,
+                                                 float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components >= 1);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
+
+  const float *in = buffer + kfilm_convert->pass_offset;
+  const float f = *in;
+
+  pixel[0] = (f == 0.0f) ? 1e10f : f * scale_exposure;
+}
+
+ccl_device_inline void film_get_pass_pixel_mist(const KernelFilmConvert *ccl_restrict
+                                                    kfilm_convert,
+                                                ccl_global const float *ccl_restrict buffer,
+                                                float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components >= 1);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
+
+  const float *in = buffer + kfilm_convert->pass_offset;
+  const float f = *in;
+
+  /* Note that we accumulate 1 - mist in the kernel to avoid having to
+   * track the mist values in the integrator state. */
+  pixel[0] = saturate(1.0f - f * scale_exposure);
+}
+
+ccl_device_inline void film_get_pass_pixel_sample_count(
+    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    float *ccl_restrict pixel)
+{
+  /* TODO(sergey): Consider normalizing into the [0..1] range, so that it is possible to see
+   * meaningful value when adaptive sampler stopped rendering image way before the maximum
+   * number of samples was reached (for examples when number of samples is set to 0 in
+   * viewport). */
+
+  kernel_assert(kfilm_convert->num_components >= 1);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  const float *in = buffer + kfilm_convert->pass_offset;
+  const float f = *in;
+
+  pixel[0] = __float_as_uint(f) * kfilm_convert->scale;
+}
+
+ccl_device_inline void film_get_pass_pixel_float(const KernelFilmConvert *ccl_restrict
+                                                     kfilm_convert,
+                                                 ccl_global const float *ccl_restrict buffer,
+                                                 float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components >= 1);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
+
+  const float *in = buffer + kfilm_convert->pass_offset;
+  const float f = *in;
+
+  pixel[0] = f * scale_exposure;
+}
+
+/* --------------------------------------------------------------------
+ * Float 3 passes.
+ */
+
+ccl_device_inline void film_get_pass_pixel_light_path(const KernelFilmConvert *ccl_restrict
+                                                          kfilm_convert,
+                                                      ccl_global const float *ccl_restrict buffer,
+                                                      float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components >= 3);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  /* Read light pass. */
+  const float *in = buffer + kfilm_convert->pass_offset;
+  float3 f = make_float3(in[0], in[1], in[2]);
+
+  /* Optionally add indirect light pass. */
+  if (kfilm_convert->pass_indirect != PASS_UNUSED) {
+    const float *in_indirect = buffer + kfilm_convert->pass_indirect;
+    const float3 f_indirect = make_float3(in_indirect[0], in_indirect[1], in_indirect[2]);
+    f += f_indirect;
+  }
+
+  /* Optionally divide out color. */
+  if (kfilm_convert->pass_divide != PASS_UNUSED) {
+    const float *in_divide = buffer + kfilm_convert->pass_divide;
+    const float3 f_divide = make_float3(in_divide[0], in_divide[1], in_divide[2]);
+    f = safe_divide_even_color(f, f_divide);
+
+    /* Exposure only, sample scale cancels out. */
+    f *= kfilm_convert->exposure;
+  }
+  else {
+    /* Sample scale and exposure. */
+    f *= film_get_scale_exposure(kfilm_convert, buffer);
+  }
+
+  pixel[0] = f.x;
+  pixel[1] = f.y;
+  pixel[2] = f.z;
+}
+
+ccl_device_inline void film_get_pass_pixel_float3(const KernelFilmConvert *ccl_restrict
+                                                      kfilm_convert,
+                                                  ccl_global const float *ccl_restrict buffer,
+                                                  float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components >= 3);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
+
+  const float *in = buffer + kfilm_convert->pass_offset;
+
+  const float3 f = make_float3(in[0], in[1], in[2]) * scale_exposure;
+
+  pixel[0] = f.x;
+  pixel[1] = f.y;
+  pixel[2] = f.z;
+}
+
+/* --------------------------------------------------------------------
+ * Float4 passes.
+ */
+
+ccl_device_inline void film_get_pass_pixel_motion(const KernelFilmConvert *ccl_restrict
+                                                      kfilm_convert,
+                                                  ccl_global const float *ccl_restrict buffer,
+                                                  float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components == 4);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+  kernel_assert(kfilm_convert->pass_motion_weight != PASS_UNUSED);
+
+  const float *in = buffer + kfilm_convert->pass_offset;
+  const float *in_weight = buffer + kfilm_convert->pass_motion_weight;
+
+  const float weight = in_weight[0];
+  const float weight_inv = (weight > 0.0f) ? 1.0f / weight : 0.0f;
+
+  const float4 motion = make_float4(in[0], in[1], in[2], in[3]) * weight_inv;
+
+  pixel[0] = motion.x;
+  pixel[1] = motion.y;
+  pixel[2] = motion.z;
+  pixel[3] = motion.w;
+}
+
+ccl_device_inline void film_get_pass_pixel_cryptomatte(const KernelFilmConvert *ccl_restrict
+                                                           kfilm_convert,
+                                                       ccl_global const float *ccl_restrict buffer,
+                                                       float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components == 4);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  const float scale = film_get_scale(kfilm_convert, buffer);
+
+  const float *in = buffer + kfilm_convert->pass_offset;
+
+  const float4 f = make_float4(in[0], in[1], in[2], in[3]);
+
+  /* x and z contain integer IDs, don't rescale them.
+   * y and w contain matte weights, they get scaled. */
+  pixel[0] = f.x;
+  pixel[1] = f.y * scale;
+  pixel[2] = f.z;
+  pixel[3] = f.w * scale;
+}
+
+ccl_device_inline void film_get_pass_pixel_float4(const KernelFilmConvert *ccl_restrict
+                                                      kfilm_convert,
+                                                  ccl_global const float *ccl_restrict buffer,
+                                                  float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components == 4);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  float scale, scale_exposure;
+  film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure);
+
+  const float *in = buffer + kfilm_convert->pass_offset;
+
+  const float3 color = make_float3(in[0], in[1], in[2]) * scale_exposure;
+  const float alpha = in[3] * scale;
+
+  pixel[0] = color.x;
+  pixel[1] = color.y;
+  pixel[2] = color.z;
+  pixel[3] = alpha;
+}
+
+ccl_device_inline void film_get_pass_pixel_combined(const KernelFilmConvert *ccl_restrict
+                                                        kfilm_convert,
+                                                    ccl_global const float *ccl_restrict buffer,
+                                                    float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components == 4);
+
+  /* 3rd channel contains transparency = 1 - alpha for the combined pass. */
+
+  kernel_assert(kfilm_convert->num_components == 4);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  float scale, scale_exposure;
+  if (!film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure)) {
+    pixel[0] = 0.0f;
+    pixel[1] = 0.0f;
+    pixel[2] = 0.0f;
+    pixel[3] = 0.0f;
+    return;
   }
 
-  return pass_result;
+  const float *in = buffer + kfilm_convert->pass_offset;
+
+  const float3 color = make_float3(in[0], in[1], in[2]) * scale_exposure;
+  const float alpha = in[3] * scale;
+
+  pixel[0] = color.x;
+  pixel[1] = color.y;
+  pixel[2] = color.z;
+  pixel[3] = film_transparency_to_alpha(alpha);
 }
 
-ccl_device float4 film_map(KernelGlobals *kg, float4 rgba_in, float scale)
+/* --------------------------------------------------------------------
+ * Shadow catcher.
+ */
+
+ccl_device_inline float3
+film_calculate_shadow_catcher_denoised(const KernelFilmConvert *ccl_restrict kfilm_convert,
+                                       ccl_global const float *ccl_restrict buffer)
 {
-  float4 result;
+  kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED);
 
-  /* Conversion to SRGB. */
-  result.x = color_linear_to_srgb(rgba_in.x * scale);
-  result.y = color_linear_to_srgb(rgba_in.y * scale);
-  result.z = color_linear_to_srgb(rgba_in.z * scale);
+  float scale, scale_exposure;
+  film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure);
 
-  /* Clamp since alpha might be > 1.0 due to Russian roulette. */
-  result.w = saturate(rgba_in.w * scale);
+  ccl_global const float *in_catcher = buffer + kfilm_convert->pass_shadow_catcher;
 
-  return result;
+  const float3 pixel = make_float3(in_catcher[0], in_catcher[1], in_catcher[2]) * scale_exposure;
+
+  return pixel;
 }
 
-ccl_device uchar4 film_float_to_byte(float4 color)
+ccl_device_inline float3 safe_divide_shadow_catcher(float3 a, float3 b)
 {
-  uchar4 result;
+  float x, y, z;
 
-  /* simple float to byte conversion */
-  result.x = (uchar)(saturate(color.x) * 255.0f);
-  result.y = (uchar)(saturate(color.y) * 255.0f);
-  result.z = (uchar)(saturate(color.z) * 255.0f);
-  result.w = (uchar)(saturate(color.w) * 255.0f);
+  x = (b.x != 0.0f) ? a.x / b.x : 1.0f;
+  y = (b.y != 0.0f) ? a.y / b.y : 1.0f;
+  z = (b.z != 0.0f) ? a.z / b.z : 1.0f;
 
-  return result;
+  return make_float3(x, y, z);
 }
 
-ccl_device void kernel_film_convert_to_byte(KernelGlobals *kg,
-                                            ccl_global uchar4 *rgba,
-                                            ccl_global float *buffer,
-                                            float sample_scale,
-                                            int x,
-                                            int y,
-                                            int offset,
-                                            int stride)
+ccl_device_inline float3
+film_calculate_shadow_catcher(const KernelFilmConvert *ccl_restrict kfilm_convert,
+                              ccl_global const float *ccl_restrict buffer)
 {
-  /* buffer offset */
-  int index = offset + x + y * stride;
+  /* For the shadow catcher pass we divide combined pass by the shadow catcher.
+   * Note that denoised shadow catcher pass contains value which only needs ot be scaled (but not
+   * to be calculated as division). */
 
-  bool use_display_sample_scale = (kernel_data.film.display_divide_pass_stride == -1);
-  float4 rgba_in = film_get_pass_result(kg, buffer, sample_scale, index, use_display_sample_scale);
+  if (kfilm_convert->is_denoised) {
+    return film_calculate_shadow_catcher_denoised(kfilm_convert, buffer);
+  }
 
-  /* map colors */
-  float4 float_result = film_map(kg, rgba_in, use_display_sample_scale ? sample_scale : 1.0f);
-  uchar4 uchar_result = film_float_to_byte(float_result);
+  kernel_assert(kfilm_convert->pass_shadow_catcher_sample_count != PASS_UNUSED);
 
-  rgba += index;
-  *rgba = uchar_result;
+  /* If there is no shadow catcher object in this pixel, there is no modification of the light
+   * needed, so return one. */
+  ccl_global const float *in_catcher_sample_count =
+      buffer + kfilm_convert->pass_shadow_catcher_sample_count;
+  const float num_samples = in_catcher_sample_count[0];
+  if (num_samples == 0.0f) {
+    return one_float3();
+  }
+
+  kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED);
+  ccl_global const float *in_catcher = buffer + kfilm_convert->pass_shadow_catcher;
+
+  /* NOTE: It is possible that the Shadow Catcher pass is requested as an output without actual
+   * shadow catcher objects in the scene. In this case there will be no auxillary passes required
+   * for the devision (to save up memory). So delay the asserts to this point so that the number of
+   * samples check handles such configuration. */
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+  kernel_assert(kfilm_convert->pass_combined != PASS_UNUSED);
+  kernel_assert(kfilm_convert->pass_shadow_catcher_matte != PASS_UNUSED);
+
+  ccl_global const float *in_combined = buffer + kfilm_convert->pass_combined;
+  ccl_global const float *in_matte = buffer + kfilm_convert->pass_shadow_catcher_matte;
+
+  /* No scaling needed. The integration works in way that number of samples in the combined and
+   * shadow catcher passes are the same, and exposure is cancelled during the division. */
+  const float3 color_catcher = make_float3(in_catcher[0], in_catcher[1], in_catcher[2]);
+  const float3 color_combined = make_float3(in_combined[0], in_combined[1], in_combined[2]);
+  const float3 color_matte = make_float3(in_matte[0], in_matte[1], in_matte[2]);
+
+  /* Need to ignore contribution of the matte object when doing division (otherwise there will be
+   * artifacts caused by anti-aliasing). Since combined pass is used for adaptive sampling and need
+   * to contain matte objects, we subtrack matte objects contribution here. This is the same as if
+   * the matte objects were not accumulated to the combined pass. */
+  const float3 combined_no_matte = color_combined - color_matte;
+
+  const float3 shadow_catcher = safe_divide_shadow_catcher(combined_no_matte, color_catcher);
+
+  const float scale = film_get_scale(kfilm_convert, buffer);
+  const float transparency = in_combined[3] * scale;
+  const float alpha = film_transparency_to_alpha(transparency);
+
+  /* Alpha-over on white using transparency of the combined pass. This allows to eliminate
+   * artifacts which are happenning on an edge of a shadow catcher when using transparent film.
+   * Note that we treat shadow catcher as straight alpha here because alpha got cancelled out
+   * during the division. */
+  const float3 pixel = (1.0f - alpha) * one_float3() + alpha * shadow_catcher;
+
+  return pixel;
 }
 
-ccl_device void kernel_film_convert_to_half_float(KernelGlobals *kg,
-                                                  ccl_global uchar4 *rgba,
-                                                  ccl_global float *buffer,
-                                                  float sample_scale,
-                                                  int x,
-                                                  int y,
-                                                  int offset,
-                                                  int stride)
+ccl_device_inline float4 film_calculate_shadow_catcher_matte_with_shadow(
+    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer)
 {
-  /* buffer offset */
-  int index = offset + x + y * stride;
+  /* The approximation of the shadow is 1 - average(shadow_catcher_pass). A better approximation
+   * is possible.
+   *
+   * The matte is alpha-overed onto the shadow (which is kind of alpha-overing shadow onto footage,
+   * and then alpha-overing synthetic objects on top). */
 
-  bool use_display_sample_scale = (kernel_data.film.display_divide_pass_stride == -1);
-  float4 rgba_in = film_get_pass_result(kg, buffer, sample_scale, index, use_display_sample_scale);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+  kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED);
+  kernel_assert(kfilm_convert->pass_shadow_catcher_matte != PASS_UNUSED);
+
+  float scale, scale_exposure;
+  if (!film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure)) {
+    return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  }
+
+  ccl_global const float *in_matte = buffer + kfilm_convert->pass_shadow_catcher_matte;
+
+  const float3 shadow_catcher = film_calculate_shadow_catcher(kfilm_convert, buffer);
+  const float3 color_matte = make_float3(in_matte[0], in_matte[1], in_matte[2]) * scale_exposure;
+
+  const float transparency = in_matte[3] * scale;
+  const float alpha = saturate(1.0f - transparency);
+
+  const float alpha_matte = (1.0f - alpha) * (1.0f - average(shadow_catcher)) + alpha;
+
+  if (kfilm_convert->use_approximate_shadow_catcher_background) {
+    kernel_assert(kfilm_convert->pass_background != PASS_UNUSED);
+
+    ccl_global const float *in_background = buffer + kfilm_convert->pass_background;
+    const float3 color_background = make_float3(
+                                        in_background[0], in_background[1], in_background[2]) *
+                                    scale_exposure;
+    const float3 alpha_over = color_matte + color_background * (1.0f - alpha_matte);
+    return make_float4(alpha_over.x, alpha_over.y, alpha_over.z, 1.0f);
+  }
 
-  ccl_global half *out = (ccl_global half *)rgba + index * 4;
-  float4_store_half(out, rgba_in, use_display_sample_scale ? sample_scale : 1.0f);
+  return make_float4(color_matte.x, color_matte.y, color_matte.z, alpha_matte);
+}
+
+ccl_device_inline void film_get_pass_pixel_shadow_catcher(
+    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components >= 3);
+
+  const float3 pixel_value = film_calculate_shadow_catcher(kfilm_convert, buffer);
+
+  pixel[0] = pixel_value.x;
+  pixel[1] = pixel_value.y;
+  pixel[2] = pixel_value.z;
+}
+
+ccl_device_inline void film_get_pass_pixel_shadow_catcher_matte_with_shadow(
+    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components == 3 || kfilm_convert->num_components == 4);
+
+  const float4 pixel_value = film_calculate_shadow_catcher_matte_with_shadow(kfilm_convert,
+                                                                             buffer);
+
+  pixel[0] = pixel_value.x;
+  pixel[1] = pixel_value.y;
+  pixel[2] = pixel_value.z;
+  if (kfilm_convert->num_components == 4) {
+    pixel[3] = pixel_value.w;
+  }
+}
+
+/* --------------------------------------------------------------------
+ * Compositing and overlays.
+ */
+
+ccl_device_inline void film_apply_pass_pixel_overlays_rgba(
+    const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    float *ccl_restrict pixel)
+{
+  if (kfilm_convert->show_active_pixels &&
+      kfilm_convert->pass_adaptive_aux_buffer != PASS_UNUSED) {
+    if (buffer[kfilm_convert->pass_adaptive_aux_buffer + 3] == 0.0f) {
+      const float3 active_rgb = make_float3(1.0f, 0.0f, 0.0f);
+      const float3 mix_rgb = interp(make_float3(pixel[0], pixel[1], pixel[2]), active_rgb, 0.5f);
+      pixel[0] = mix_rgb.x;
+      pixel[1] = mix_rgb.y;
+      pixel[2] = mix_rgb.z;
+    }
+  }
 }
 
 CCL_NAMESPACE_END
author	Brecht Van Lommel <brecht@blender.org>	2021-09-20 18:59:20 +0300
committer	Brecht Van Lommel <brecht@blender.org>	2021-09-21 15:55:54 +0300
commit	08031197250aeecbaca3803254e6f25b8c7b7b37 (patch)
tree	6fe7ab045f0dc0a423d6557c4073f34309ef4740 /intern/cycles/kernel/kernel_film.h
parent	fa6b1007bad065440950cd67deb16a04f368856f (diff)