Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/film')
-rw-r--r--intern/cycles/kernel/film/film_accumulate.h553
-rw-r--r--intern/cycles/kernel/film/film_adaptive_sampling.h160
-rw-r--r--intern/cycles/kernel/film/film_id_passes.h106
-rw-r--r--intern/cycles/kernel/film/film_passes.h334
-rw-r--r--intern/cycles/kernel/film/film_read.h532
-rw-r--r--intern/cycles/kernel/film/film_write_passes.h88
6 files changed, 1773 insertions, 0 deletions
diff --git a/intern/cycles/kernel/film/film_accumulate.h b/intern/cycles/kernel/film/film_accumulate.h
new file mode 100644
index 00000000000..914e165a9cd
--- /dev/null
+++ b/intern/cycles/kernel/film/film_accumulate.h
@@ -0,0 +1,553 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/film/film_adaptive_sampling.h"
+#include "kernel/film/film_write_passes.h"
+
+#include "kernel/integrator/integrator_shadow_catcher.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* --------------------------------------------------------------------
+ * BSDF Evaluation
+ *
+ * BSDF evaluation result, split between diffuse and glossy. This is used to
+ * accumulate render passes separately. Note that reflection, transmission
+ * and volume scattering are written to different render passes, but we assume
+ * that only one of those can happen at a bounce, and so do not need to accumulate
+ * them separately. */
+
+ccl_device_inline void bsdf_eval_init(ccl_private BsdfEval *eval,
+ const bool is_diffuse,
+ float3 value)
+{
+ eval->diffuse = zero_float3();
+ eval->glossy = zero_float3();
+
+ if (is_diffuse) {
+ eval->diffuse = value;
+ }
+ else {
+ eval->glossy = value;
+ }
+}
+
+ccl_device_inline void bsdf_eval_accum(ccl_private BsdfEval *eval,
+ const bool is_diffuse,
+ float3 value,
+ float mis_weight)
+{
+ value *= mis_weight;
+
+ if (is_diffuse) {
+ eval->diffuse += value;
+ }
+ else {
+ eval->glossy += value;
+ }
+}
+
+ccl_device_inline bool bsdf_eval_is_zero(ccl_private BsdfEval *eval)
+{
+ return is_zero(eval->diffuse) && is_zero(eval->glossy);
+}
+
+ccl_device_inline void bsdf_eval_mul(ccl_private BsdfEval *eval, float value)
+{
+ eval->diffuse *= value;
+ eval->glossy *= value;
+}
+
+ccl_device_inline void bsdf_eval_mul3(ccl_private BsdfEval *eval, float3 value)
+{
+ eval->diffuse *= value;
+ eval->glossy *= value;
+}
+
+ccl_device_inline float3 bsdf_eval_sum(ccl_private const BsdfEval *eval)
+{
+ return eval->diffuse + eval->glossy;
+}
+
+ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(ccl_private const BsdfEval *eval)
+{
+ /* Ratio of diffuse and glossy to recover proportions for writing to render pass.
+ * We assume reflection, transmission and volume scatter to be exclusive. */
+ return safe_divide_float3_float3(eval->diffuse, eval->diffuse + eval->glossy);
+}
+
+/* --------------------------------------------------------------------
+ * Clamping
+ *
+ * Clamping is done on a per-contribution basis so that we can write directly
+ * to render buffers instead of using per-thread memory, and to avoid the
+ * impact of clamping on other contributions. */
+
+ccl_device_forceinline void kernel_accum_clamp(KernelGlobals kg, ccl_private float3 *L, int bounce)
+{
+#ifdef __KERNEL_DEBUG_NAN__
+ if (!isfinite3_safe(*L)) {
+ kernel_assert(!"Cycles sample with non-finite value detected");
+ }
+#endif
+ /* Make sure all components are finite, allowing the contribution to be usable by adaptive
+ * sampling convergence check, but also to make it so render result never causes issues with
+ * post-processing. */
+ *L = ensure_finite3(*L);
+
+#ifdef __CLAMP_SAMPLE__
+ float limit = (bounce > 0) ? kernel_data.integrator.sample_clamp_indirect :
+ kernel_data.integrator.sample_clamp_direct;
+ float sum = reduce_add(fabs(*L));
+ if (sum > limit) {
+ *L *= limit / sum;
+ }
+#endif
+}
+
+/* --------------------------------------------------------------------
+ * Pass accumulation utilities.
+ */
+
+/* Get pointer to pixel in render buffer. */
+ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer(
+ KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
+{
+ const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
+ const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+ kernel_data.film.pass_stride;
+ return render_buffer + render_buffer_offset;
+}
+
+/* --------------------------------------------------------------------
+ * Adaptive sampling.
+ */
+
+ccl_device_inline int kernel_accum_sample(KernelGlobals kg,
+ ConstIntegratorState state,
+ ccl_global float *ccl_restrict render_buffer,
+ int sample)
+{
+ if (kernel_data.film.pass_sample_count == PASS_UNUSED) {
+ return sample;
+ }
+
+ ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
+
+ return atomic_fetch_and_add_uint32((uint *)(buffer) + kernel_data.film.pass_sample_count, 1);
+}
+
+ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg,
+ const int sample,
+ const float3 contribution,
+ ccl_global float *ccl_restrict buffer)
+{
+ /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping
+ * criteria. This is the heuristic from "A hierarchical automatic stopping condition for Monte
+ * Carlo global illumination" except that here it is applied per pixel and not in hierarchical
+ * tiles. */
+
+ if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) {
+ return;
+ }
+
+ if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) {
+ kernel_write_pass_float4(
+ buffer + kernel_data.film.pass_adaptive_aux_buffer,
+ make_float4(contribution.x * 2.0f, contribution.y * 2.0f, contribution.z * 2.0f, 0.0f));
+ }
+}
+
+/* --------------------------------------------------------------------
+ * Shadow catcher.
+ */
+
+#ifdef __SHADOW_CATCHER__
+
+/* Accumulate contribution to the Shadow Catcher pass.
+ *
+ * Returns truth if the contribution is fully handled here and is not to be added to the other
+ * passes (like combined, adaptive sampling). */
+
+ccl_device bool kernel_accum_shadow_catcher(KernelGlobals kg,
+ const uint32_t path_flag,
+ const float3 contribution,
+ ccl_global float *ccl_restrict buffer)
+{
+ if (!kernel_data.integrator.has_shadow_catcher) {
+ return false;
+ }
+
+ kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED);
+ kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
+
+ /* Matte pass. */
+ if (kernel_shadow_catcher_is_matte_path(path_flag)) {
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher_matte, contribution);
+ /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive
+ * sampling is based on how noisy the combined pass is as if there were no catchers in the
+ * scene. */
+ }
+
+ /* Shadow catcher pass. */
+ if (kernel_shadow_catcher_is_object_pass(path_flag)) {
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution);
+ return true;
+ }
+
+ return false;
+}
+
+ccl_device bool kernel_accum_shadow_catcher_transparent(KernelGlobals kg,
+ const uint32_t path_flag,
+ const float3 contribution,
+ const float transparent,
+ ccl_global float *ccl_restrict buffer)
+{
+ if (!kernel_data.integrator.has_shadow_catcher) {
+ return false;
+ }
+
+ kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED);
+ kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
+
+ if (path_flag & PATH_RAY_SHADOW_CATCHER_BACKGROUND) {
+ return true;
+ }
+
+ /* Matte pass. */
+ if (kernel_shadow_catcher_is_matte_path(path_flag)) {
+ kernel_write_pass_float4(
+ buffer + kernel_data.film.pass_shadow_catcher_matte,
+ make_float4(contribution.x, contribution.y, contribution.z, transparent));
+ /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive
+ * sampling is based on how noisy the combined pass is as if there were no catchers in the
+ * scene. */
+ }
+
+ /* Shadow catcher pass. */
+ if (kernel_shadow_catcher_is_object_pass(path_flag)) {
+ /* NOTE: The transparency of the shadow catcher pass is ignored. It is not needed for the
+ * calculation and the alpha channel of the pass contains numbers of samples contributed to a
+ * pixel of the pass. */
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution);
+ return true;
+ }
+
+ return false;
+}
+
+ccl_device void kernel_accum_shadow_catcher_transparent_only(KernelGlobals kg,
+ const uint32_t path_flag,
+ const float transparent,
+ ccl_global float *ccl_restrict buffer)
+{
+ if (!kernel_data.integrator.has_shadow_catcher) {
+ return;
+ }
+
+ kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
+
+ /* Matte pass. */
+ if (kernel_shadow_catcher_is_matte_path(path_flag)) {
+ kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, transparent);
+ }
+}
+
+#endif /* __SHADOW_CATCHER__ */
+
+/* --------------------------------------------------------------------
+ * Render passes.
+ */
+
+/* Write combined pass. */
+ccl_device_inline void kernel_accum_combined_pass(KernelGlobals kg,
+ const uint32_t path_flag,
+ const int sample,
+ const float3 contribution,
+ ccl_global float *ccl_restrict buffer)
+{
+#ifdef __SHADOW_CATCHER__
+ if (kernel_accum_shadow_catcher(kg, path_flag, contribution, buffer)) {
+ return;
+ }
+#endif
+
+ if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_combined, contribution);
+ }
+
+ kernel_accum_adaptive_buffer(kg, sample, contribution, buffer);
+}
+
+/* Write combined pass with transparency. */
+ccl_device_inline void kernel_accum_combined_transparent_pass(KernelGlobals kg,
+ const uint32_t path_flag,
+ const int sample,
+ const float3 contribution,
+ const float transparent,
+ ccl_global float *ccl_restrict
+ buffer)
+{
+#ifdef __SHADOW_CATCHER__
+ if (kernel_accum_shadow_catcher_transparent(kg, path_flag, contribution, transparent, buffer)) {
+ return;
+ }
+#endif
+
+ if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
+ kernel_write_pass_float4(
+ buffer + kernel_data.film.pass_combined,
+ make_float4(contribution.x, contribution.y, contribution.z, transparent));
+ }
+
+ kernel_accum_adaptive_buffer(kg, sample, contribution, buffer);
+}
+
+/* Write background or emission to appropriate pass. */
+ccl_device_inline void kernel_accum_emission_or_background_pass(KernelGlobals kg,
+ ConstIntegratorState state,
+ float3 contribution,
+ ccl_global float *ccl_restrict
+ buffer,
+ const int pass)
+{
+ if (!(kernel_data.film.light_pass_flag & PASS_ANY)) {
+ return;
+ }
+
+#ifdef __PASSES__
+ const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+ int pass_offset = PASS_UNUSED;
+
+ /* Denoising albedo. */
+# ifdef __DENOISING_FEATURES__
+ if (path_flag & PATH_RAY_DENOISING_FEATURES) {
+ if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
+ const float3 denoising_feature_throughput = INTEGRATOR_STATE(
+ state, path, denoising_feature_throughput);
+ const float3 denoising_albedo = denoising_feature_throughput * contribution;
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
+ }
+ }
+# endif /* __DENOISING_FEATURES__ */
+
+ if (!(path_flag & PATH_RAY_ANY_PASS)) {
+ /* Directly visible, write to emission or background pass. */
+ pass_offset = pass;
+ }
+ else if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
+ /* Indirectly visible through reflection. */
+ const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
+ ((INTEGRATOR_STATE(state, path, bounce) == 1) ?
+ kernel_data.film.pass_glossy_direct :
+ kernel_data.film.pass_glossy_indirect) :
+ ((INTEGRATOR_STATE(state, path, bounce) == 1) ?
+ kernel_data.film.pass_transmission_direct :
+ kernel_data.film.pass_transmission_indirect);
+
+ if (glossy_pass_offset != PASS_UNUSED) {
+ /* Glossy is a subset of the throughput, reconstruct it here using the
+ * diffuse-glossy ratio. */
+ const float3 ratio = INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
+ const float3 glossy_contribution = (one_float3() - ratio) * contribution;
+ kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
+ }
+
+ /* Reconstruct diffuse subset of throughput. */
+ pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ?
+ kernel_data.film.pass_diffuse_direct :
+ kernel_data.film.pass_diffuse_indirect;
+ if (pass_offset != PASS_UNUSED) {
+ contribution *= INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
+ }
+ }
+ else if (path_flag & PATH_RAY_VOLUME_PASS) {
+ /* Indirectly visible through volume. */
+ pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ?
+ kernel_data.film.pass_volume_direct :
+ kernel_data.film.pass_volume_indirect;
+ }
+
+ /* Single write call for GPU coherence. */
+ if (pass_offset != PASS_UNUSED) {
+ kernel_write_pass_float3(buffer + pass_offset, contribution);
+ }
+#endif /* __PASSES__ */
+}
+
+/* Write light contribution to render buffer. */
+ccl_device_inline void kernel_accum_light(KernelGlobals kg,
+ ConstIntegratorShadowState state,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ /* The throughput for shadow paths already contains the light shader evaluation. */
+ float3 contribution = INTEGRATOR_STATE(state, shadow_path, throughput);
+ kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, shadow_path, bounce));
+
+ const uint32_t render_pixel_index = INTEGRATOR_STATE(state, shadow_path, render_pixel_index);
+ const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+ kernel_data.film.pass_stride;
+ ccl_global float *buffer = render_buffer + render_buffer_offset;
+
+ const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag);
+ const int sample = INTEGRATOR_STATE(state, shadow_path, sample);
+
+ /* Ambient occlusion. */
+ if (path_flag & PATH_RAY_SHADOW_FOR_AO) {
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, contribution);
+ return;
+ }
+
+ /* Direct light shadow. */
+ kernel_accum_combined_pass(kg, path_flag, sample, contribution, buffer);
+
+#ifdef __PASSES__
+ if (kernel_data.film.light_pass_flag & PASS_ANY) {
+ const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag);
+ int pass_offset = PASS_UNUSED;
+
+ if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
+ /* Indirectly visible through reflection. */
+ const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
+ ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
+ kernel_data.film.pass_glossy_direct :
+ kernel_data.film.pass_glossy_indirect) :
+ ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
+ kernel_data.film.pass_transmission_direct :
+ kernel_data.film.pass_transmission_indirect);
+
+ if (glossy_pass_offset != PASS_UNUSED) {
+ /* Glossy is a subset of the throughput, reconstruct it here using the
+ * diffuse-glossy ratio. */
+ const float3 ratio = INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio);
+ const float3 glossy_contribution = (one_float3() - ratio) * contribution;
+ kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
+ }
+
+ /* Reconstruct diffuse subset of throughput. */
+ pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
+ kernel_data.film.pass_diffuse_direct :
+ kernel_data.film.pass_diffuse_indirect;
+ if (pass_offset != PASS_UNUSED) {
+ contribution *= INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio);
+ }
+ }
+ else if (path_flag & PATH_RAY_VOLUME_PASS) {
+ /* Indirectly visible through volume. */
+ pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
+ kernel_data.film.pass_volume_direct :
+ kernel_data.film.pass_volume_indirect;
+ }
+
+ /* Single write call for GPU coherence. */
+ if (pass_offset != PASS_UNUSED) {
+ kernel_write_pass_float3(buffer + pass_offset, contribution);
+ }
+
+ /* Write shadow pass. */
+ if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) &&
+ (path_flag & PATH_RAY_CAMERA)) {
+ const float3 unshadowed_throughput = INTEGRATOR_STATE(
+ state, shadow_path, unshadowed_throughput);
+ const float3 shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
+ const float3 shadow = safe_divide_float3_float3(shadowed_throughput, unshadowed_throughput) *
+ kernel_data.film.pass_shadow_scale;
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow, shadow);
+ }
+ }
+#endif
+}
+
+/* Write transparency to render buffer.
+ *
+ * Note that we accumulate transparency = 1 - alpha in the render buffer.
+ * Otherwise we'd have to write alpha on path termination, which happens
+ * in many places. */
+ccl_device_inline void kernel_accum_transparent(KernelGlobals kg,
+ ConstIntegratorState state,
+ const uint32_t path_flag,
+ const float transparent,
+ ccl_global float *ccl_restrict buffer)
+{
+ if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
+ kernel_write_pass_float(buffer + kernel_data.film.pass_combined + 3, transparent);
+ }
+
+ kernel_accum_shadow_catcher_transparent_only(kg, path_flag, transparent, buffer);
+}
+
+/* Write holdout to render buffer. */
+ccl_device_inline void kernel_accum_holdout(KernelGlobals kg,
+ ConstIntegratorState state,
+ const uint32_t path_flag,
+ const float transparent,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
+ kernel_accum_transparent(kg, state, path_flag, transparent, buffer);
+}
+
+/* Write background contribution to render buffer.
+ *
+ * Includes transparency, matching kernel_accum_transparent. */
+ccl_device_inline void kernel_accum_background(KernelGlobals kg,
+ ConstIntegratorState state,
+ const float3 L,
+ const float transparent,
+ const bool is_transparent_background_ray,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ float3 contribution = INTEGRATOR_STATE(state, path, throughput) * L;
+ kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
+
+ ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
+ const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+
+ if (is_transparent_background_ray) {
+ kernel_accum_transparent(kg, state, path_flag, transparent, buffer);
+ }
+ else {
+ const int sample = INTEGRATOR_STATE(state, path, sample);
+ kernel_accum_combined_transparent_pass(
+ kg, path_flag, sample, contribution, transparent, buffer);
+ }
+ kernel_accum_emission_or_background_pass(
+ kg, state, contribution, buffer, kernel_data.film.pass_background);
+}
+
+/* Write emission to render buffer. */
+ccl_device_inline void kernel_accum_emission(KernelGlobals kg,
+ ConstIntegratorState state,
+ const float3 throughput,
+ const float3 L,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ float3 contribution = throughput * L;
+ kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
+
+ ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
+ const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+ const int sample = INTEGRATOR_STATE(state, path, sample);
+
+ kernel_accum_combined_pass(kg, path_flag, sample, contribution, buffer);
+ kernel_accum_emission_or_background_pass(
+ kg, state, contribution, buffer, kernel_data.film.pass_emission);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/film_adaptive_sampling.h b/intern/cycles/kernel/film/film_adaptive_sampling.h
new file mode 100644
index 00000000000..c78b5f6b707
--- /dev/null
+++ b/intern/cycles/kernel/film/film_adaptive_sampling.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/film/film_write_passes.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Check whether the pixel has converged and should not be sampled anymore. */
+
+ccl_device_forceinline bool kernel_need_sample_pixel(KernelGlobals kg,
+ ConstIntegratorState state,
+ ccl_global float *render_buffer)
+{
+ if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) {
+ return true;
+ }
+
+ const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
+ const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+ kernel_data.film.pass_stride;
+ ccl_global float *buffer = render_buffer + render_buffer_offset;
+
+ const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
+ return buffer[aux_w_offset] == 0.0f;
+}
+
+/* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */
+
+ccl_device bool kernel_adaptive_sampling_convergence_check(KernelGlobals kg,
+ ccl_global float *render_buffer,
+ int x,
+ int y,
+ float threshold,
+ bool reset,
+ int offset,
+ int stride)
+{
+ kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);
+ kernel_assert(kernel_data.film.pass_sample_count != PASS_UNUSED);
+
+ const int render_pixel_index = offset + x + y * stride;
+ ccl_global float *buffer = render_buffer +
+ (uint64_t)render_pixel_index * kernel_data.film.pass_stride;
+
+ /* TODO(Stefan): Is this better in linear, sRGB or something else? */
+
+ const float4 A = kernel_read_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+ if (!reset && A.w != 0.0f) {
+ /* If the pixel was considered converged, its state will not change in this kernel. Early
+ * output before doing any math.
+ *
+ * TODO(sergey): On a GPU it might be better to keep thread alive for better coherency? */
+ return true;
+ }
+
+ const float4 I = kernel_read_pass_float4(buffer + kernel_data.film.pass_combined);
+
+ const float sample = __float_as_uint(buffer[kernel_data.film.pass_sample_count]);
+ const float inv_sample = 1.0f / sample;
+
+ /* The per pixel error as seen in section 2.1 of
+ * "A hierarchical automatic stopping condition for Monte Carlo global illumination" */
+ const float error_difference = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) *
+ inv_sample;
+ const float error_normalize = sqrtf((I.x + I.y + I.z) * inv_sample);
+ /* A small epsilon is added to the divisor to prevent division by zero. */
+ const float error = error_difference / (0.0001f + error_normalize);
+ const bool did_converge = (error < threshold);
+
+ const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
+ buffer[aux_w_offset] = did_converge;
+
+ return did_converge;
+}
+
+/* This is a simple box filter in two passes.
+ * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */
+
+ccl_device void kernel_adaptive_sampling_filter_x(KernelGlobals kg,
+ ccl_global float *render_buffer,
+ int y,
+ int start_x,
+ int width,
+ int offset,
+ int stride)
+{
+ kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);
+
+ bool prev = false;
+ for (int x = start_x; x < start_x + width; ++x) {
+ int index = offset + x + y * stride;
+ ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
+ const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
+
+ if (buffer[aux_w_offset] == 0.0f) {
+ if (x > start_x && !prev) {
+ index = index - 1;
+ buffer = render_buffer + index * kernel_data.film.pass_stride;
+ buffer[aux_w_offset] = 0.0f;
+ }
+ prev = true;
+ }
+ else {
+ if (prev) {
+ buffer[aux_w_offset] = 0.0f;
+ }
+ prev = false;
+ }
+ }
+}
+
+ccl_device void kernel_adaptive_sampling_filter_y(KernelGlobals kg,
+ ccl_global float *render_buffer,
+ int x,
+ int start_y,
+ int height,
+ int offset,
+ int stride)
+{
+ kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);
+
+ bool prev = false;
+ for (int y = start_y; y < start_y + height; ++y) {
+ int index = offset + x + y * stride;
+ ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
+ const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
+
+ if (buffer[aux_w_offset] == 0.0f) {
+ if (y > start_y && !prev) {
+ index = index - stride;
+ buffer = render_buffer + index * kernel_data.film.pass_stride;
+ buffer[aux_w_offset] = 0.0f;
+ }
+ prev = true;
+ }
+ else {
+ if (prev) {
+ buffer[aux_w_offset] = 0.0f;
+ }
+ prev = false;
+ }
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/film_id_passes.h b/intern/cycles/kernel/film/film_id_passes.h
new file mode 100644
index 00000000000..d5b8c90a828
--- /dev/null
+++ b/intern/cycles/kernel/film/film_id_passes.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Element of ID pass stored in the render buffers.
+ * It is `float2` semantically, but it must be unaligned since the offset of ID passes in the
+ * render buffers might not meet expected by compiler alignment. */
+typedef struct IDPassBufferElement {
+ float x;
+ float y;
+} IDPassBufferElement;
+
+ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
+ int num_slots,
+ float id,
+ float weight)
+{
+ kernel_assert(id != ID_NONE);
+ if (weight == 0.0f) {
+ return;
+ }
+
+ for (int slot = 0; slot < num_slots; slot++) {
+ ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer;
+#ifdef __ATOMIC_PASS_WRITE__
+ /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
+ if (id_buffer[slot].x == ID_NONE) {
+ /* Use an atomic to claim this slot.
+ * If a different thread got here first, try again from this slot on. */
+ float old_id = atomic_compare_and_swap_float(buffer + slot * 2, ID_NONE, id);
+ if (old_id != ID_NONE && old_id != id) {
+ continue;
+ }
+ atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight);
+ break;
+ }
+ /* If there already is a slot for that ID, add the weight.
+ * If no slot was found, add it to the last. */
+ else if (id_buffer[slot].x == id || slot == num_slots - 1) {
+ atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight);
+ break;
+ }
+#else /* __ATOMIC_PASS_WRITE__ */
+ /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
+ if (id_buffer[slot].x == ID_NONE) {
+ id_buffer[slot].x = id;
+ id_buffer[slot].y = weight;
+ break;
+ }
+ /* If there already is a slot for that ID, add the weight.
+ * If no slot was found, add it to the last. */
+ else if (id_buffer[slot].x == id || slot == num_slots - 1) {
+ id_buffer[slot].y += weight;
+ break;
+ }
+#endif /* __ATOMIC_PASS_WRITE__ */
+ }
+}
+
+ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots)
+{
+ ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer;
+ for (int slot = 1; slot < num_slots; ++slot) {
+ if (id_buffer[slot].x == ID_NONE) {
+ return;
+ }
+ /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */
+ int i = slot;
+ while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) {
+ const IDPassBufferElement swap = id_buffer[i];
+ id_buffer[i] = id_buffer[i - 1];
+ id_buffer[i - 1] = swap;
+ --i;
+ }
+ }
+}
+
+/* post-sorting for Cryptomatte */
+ccl_device_inline void kernel_cryptomatte_post(KernelGlobals kg,
+ ccl_global float *render_buffer,
+ int pixel_index)
+{
+ const int pass_stride = kernel_data.film.pass_stride;
+ const uint64_t render_buffer_offset = (uint64_t)pixel_index * pass_stride;
+ ccl_global float *cryptomatte_buffer = render_buffer + render_buffer_offset +
+ kernel_data.film.pass_cryptomatte;
+ kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/film_passes.h b/intern/cycles/kernel/film/film_passes.h
new file mode 100644
index 00000000000..40ddb1b5563
--- /dev/null
+++ b/intern/cycles/kernel/film/film_passes.h
@@ -0,0 +1,334 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/geom/geom.h"
+
+#include "kernel/film/film_id_passes.h"
+#include "kernel/film/film_write_passes.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Get pointer to pixel in render buffer. */
+ccl_device_forceinline ccl_global float *kernel_pass_pixel_render_buffer(
+ KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
+{
+ const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
+ const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+ kernel_data.film.pass_stride;
+ return render_buffer + render_buffer_offset;
+}
+
+#ifdef __DENOISING_FEATURES__
+
+ccl_device_forceinline void kernel_write_denoising_features_surface(
+ KernelGlobals kg,
+ IntegratorState state,
+ ccl_private const ShaderData *sd,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES)) {
+ return;
+ }
+
+ /* Skip implicitly transparent surfaces. */
+ if (sd->flag & SD_HAS_ONLY_VOLUME) {
+ return;
+ }
+
+ ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
+
+ float3 normal = zero_float3();
+ float3 diffuse_albedo = zero_float3();
+ float3 specular_albedo = zero_float3();
+ float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ continue;
+ }
+
+ /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */
+ normal += sc->N * sc->sample_weight;
+ sum_weight += sc->sample_weight;
+
+ float3 closure_albedo = sc->weight;
+ /* Closures that include a Fresnel term typically have weights close to 1 even though their
+ * actual contribution is significantly lower.
+ * To account for this, we scale their weight by the average fresnel factor (the same is also
+ * done for the sample weight in the BSDF setup, so we don't need to scale that here). */
+ if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) {
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
+ closure_albedo *= bsdf->extra->fresnel_color;
+ }
+ else if (sc->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_ID) {
+ ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)sc;
+ closure_albedo *= bsdf->avg_value;
+ }
+ else if (sc->type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID) {
+ closure_albedo *= bsdf_principled_hair_albedo(sc);
+ }
+
+ if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) {
+ diffuse_albedo += closure_albedo;
+ sum_nonspecular_weight += sc->sample_weight;
+ }
+ else {
+ specular_albedo += closure_albedo;
+ }
+ }
+
+ /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */
+ if ((sum_weight == 0.0f) || (sum_nonspecular_weight * 4.0f > sum_weight)) {
+ if (sum_weight != 0.0f) {
+ normal /= sum_weight;
+ }
+
+ if (kernel_data.film.pass_denoising_normal != PASS_UNUSED) {
+ /* Transform normal into camera space. */
+ const Transform worldtocamera = kernel_data.cam.worldtocamera;
+ normal = transform_direction(&worldtocamera, normal);
+
+ const float3 denoising_normal = ensure_finite3(normal);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal);
+ }
+
+ if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
+ const float3 denoising_feature_throughput = INTEGRATOR_STATE(
+ state, path, denoising_feature_throughput);
+ const float3 denoising_albedo = ensure_finite3(denoising_feature_throughput *
+ diffuse_albedo);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
+ }
+
+ INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES;
+ }
+ else {
+ INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) *= specular_albedo;
+ }
+}
+
+ccl_device_forceinline void kernel_write_denoising_features_volume(KernelGlobals kg,
+ IntegratorState state,
+ const float3 albedo,
+ const bool scatter,
+ ccl_global float *ccl_restrict
+ render_buffer)
+{
+ ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
+ const float3 denoising_feature_throughput = INTEGRATOR_STATE(
+ state, path, denoising_feature_throughput);
+
+ if (scatter && kernel_data.film.pass_denoising_normal != PASS_UNUSED) {
+ /* Assume scatter is sufficiently diffuse to stop writing denoising features. */
+ INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES;
+
+ /* Write view direction as normal. */
+ const float3 denoising_normal = make_float3(0.0f, 0.0f, -1.0f);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal);
+ }
+
+ if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
+ /* Write albedo. */
+ const float3 denoising_albedo = ensure_finite3(denoising_feature_throughput * albedo);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
+ }
+}
+#endif /* __DENOISING_FEATURES__ */
+
+#ifdef __SHADOW_CATCHER__
+
+/* Write shadow catcher passes on a bounce from the shadow catcher object. */
+ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
+ KernelGlobals kg,
+ IntegratorState state,
+ ccl_private const ShaderData *sd,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ if (!kernel_data.integrator.has_shadow_catcher) {
+ return;
+ }
+
+ kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
+ kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
+
+ if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, sd->object_flag)) {
+ return;
+ }
+
+ ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
+
+ /* Count sample for the shadow catcher object. */
+ kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
+
+ /* Since the split is done, the sample does not contribute to the matte, so accumulate it as
+ * transparency to the matte. */
+ const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
+ average(throughput));
+}
+
+#endif /* __SHADOW_CATCHER__ */
+
+ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer,
+ size_t depth,
+ float id,
+ float matte_weight)
+{
+ kernel_write_id_slots(buffer, depth * 2, id, matte_weight);
+ return depth * 4;
+}
+
+ccl_device_inline void kernel_write_data_passes(KernelGlobals kg,
+ IntegratorState state,
+ ccl_private const ShaderData *sd,
+ ccl_global float *ccl_restrict render_buffer)
+{
+#ifdef __PASSES__
+ const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+
+ if (!(path_flag & PATH_RAY_CAMERA)) {
+ return;
+ }
+
+ const int flag = kernel_data.film.pass_flag;
+
+ if (!(flag & PASS_ANY)) {
+ return;
+ }
+
+ ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
+
+ if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
+ if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f ||
+ average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) {
+ if (INTEGRATOR_STATE(state, path, sample) == 0) {
+ if (flag & PASSMASK(DEPTH)) {
+ const float depth = camera_z_depth(kg, sd->P);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
+ }
+ if (flag & PASSMASK(OBJECT_ID)) {
+ const float id = object_pass_id(kg, sd->object);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id);
+ }
+ if (flag & PASSMASK(MATERIAL_ID)) {
+ const float id = shader_pass_id(kg, sd);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id);
+ }
+ }
+
+ if (flag & PASSMASK(POSITION)) {
+ const float3 position = sd->P;
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_position, position);
+ }
+ if (flag & PASSMASK(NORMAL)) {
+ const float3 normal = shader_bsdf_average_normal(kg, sd);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
+ }
+ if (flag & PASSMASK(ROUGHNESS)) {
+ const float roughness = shader_bsdf_average_roughness(sd);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_roughness, roughness);
+ }
+ if (flag & PASSMASK(UV)) {
+ const float3 uv = primitive_uv(kg, sd);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv);
+ }
+ if (flag & PASSMASK(MOTION)) {
+ const float4 speed = primitive_motion_vector(kg, sd);
+ kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f);
+ }
+
+ INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SINGLE_PASS_DONE;
+ }
+ }
+
+ if (kernel_data.film.cryptomatte_passes) {
+ const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+ const float matte_weight = average(throughput) *
+ (1.0f - average(shader_bsdf_transparency(kg, sd)));
+ if (matte_weight > 0.0f) {
+ ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
+ if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
+ const float id = object_cryptomatte_id(kg, sd->object);
+ cryptomatte_buffer += kernel_write_id_pass(
+ cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight);
+ }
+ if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
+ const float id = shader_cryptomatte_id(kg, sd->shader);
+ cryptomatte_buffer += kernel_write_id_pass(
+ cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight);
+ }
+ if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
+ const float id = object_cryptomatte_asset_id(kg, sd->object);
+ cryptomatte_buffer += kernel_write_id_pass(
+ cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight);
+ }
+ }
+ }
+
+ if (flag & PASSMASK(DIFFUSE_COLOR)) {
+ const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color,
+ shader_bsdf_diffuse(kg, sd) * throughput);
+ }
+ if (flag & PASSMASK(GLOSSY_COLOR)) {
+ const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color,
+ shader_bsdf_glossy(kg, sd) * throughput);
+ }
+ if (flag & PASSMASK(TRANSMISSION_COLOR)) {
+ const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color,
+ shader_bsdf_transmission(kg, sd) * throughput);
+ }
+ if (flag & PASSMASK(MIST)) {
+ /* Bring depth into 0..1 range. */
+ const float mist_start = kernel_data.film.mist_start;
+ const float mist_inv_depth = kernel_data.film.mist_inv_depth;
+
+ const float depth = camera_distance(kg, sd->P);
+ float mist = saturate((depth - mist_start) * mist_inv_depth);
+
+ /* Falloff */
+ const float mist_falloff = kernel_data.film.mist_falloff;
+
+ if (mist_falloff == 1.0f)
+ ;
+ else if (mist_falloff == 2.0f)
+ mist = mist * mist;
+ else if (mist_falloff == 0.5f)
+ mist = sqrtf(mist);
+ else
+ mist = powf(mist, mist_falloff);
+
+ /* Modulate by transparency */
+ const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+ const float3 alpha = shader_bsdf_alpha(kg, sd);
+ const float mist_output = (1.0f - mist) * average(throughput * alpha);
+
+ /* Note that the final value in the render buffer we want is 1 - mist_output,
+ * to avoid having to tracking this in the Integrator state we do the negation
+ * after rendering. */
+ kernel_write_pass_float(buffer + kernel_data.film.pass_mist, mist_output);
+ }
+#endif
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/film_read.h b/intern/cycles/kernel/film/film_read.h
new file mode 100644
index 00000000000..a87eff3832e
--- /dev/null
+++ b/intern/cycles/kernel/film/film_read.h
@@ -0,0 +1,532 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* --------------------------------------------------------------------
+ * Common utilities.
+ */
+
+/* The input buffer contains transparency = 1 - alpha, this converts it to
+ * alpha. Also clamp since alpha might end up outside of 0..1 due to Russian
+ * roulette. */
+ccl_device_forceinline float film_transparency_to_alpha(float transparency)
+{
+ return saturate(1.0f - transparency);
+}
+
+ccl_device_inline float film_get_scale(ccl_global const KernelFilmConvert *ccl_restrict
+ kfilm_convert,
+ ccl_global const float *ccl_restrict buffer)
+{
+ if (kfilm_convert->pass_sample_count == PASS_UNUSED) {
+ return kfilm_convert->scale;
+ }
+
+ if (kfilm_convert->pass_use_filter) {
+ const uint sample_count = *(
+ (ccl_global const uint *)(buffer + kfilm_convert->pass_sample_count));
+ return 1.0f / sample_count;
+ }
+
+ return 1.0f;
+}
+
+ccl_device_inline float film_get_scale_exposure(ccl_global const KernelFilmConvert *ccl_restrict
+ kfilm_convert,
+ ccl_global const float *ccl_restrict buffer)
+{
+ if (kfilm_convert->pass_sample_count == PASS_UNUSED) {
+ return kfilm_convert->scale_exposure;
+ }
+
+ const float scale = film_get_scale(kfilm_convert, buffer);
+
+ if (kfilm_convert->pass_use_exposure) {
+ return scale * kfilm_convert->exposure;
+ }
+
+ return scale;
+}
+
+ccl_device_inline bool film_get_scale_and_scale_exposure(
+ ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+ ccl_global const float *ccl_restrict buffer,
+ ccl_private float *ccl_restrict scale,
+ ccl_private float *ccl_restrict scale_exposure)
+{
+ if (kfilm_convert->pass_sample_count == PASS_UNUSED) {
+ *scale = kfilm_convert->scale;
+ *scale_exposure = kfilm_convert->scale_exposure;
+ return true;
+ }
+
+ const uint sample_count = *(
+ (ccl_global const uint *)(buffer + kfilm_convert->pass_sample_count));
+ if (!sample_count) {
+ *scale = 0.0f;
+ *scale_exposure = 0.0f;
+ return false;
+ }
+
+ if (kfilm_convert->pass_use_filter) {
+ *scale = 1.0f / sample_count;
+ }
+ else {
+ *scale = 1.0f;
+ }
+
+ if (kfilm_convert->pass_use_exposure) {
+ *scale_exposure = *scale * kfilm_convert->exposure;
+ }
+ else {
+ *scale_exposure = *scale;
+ }
+
+ return true;
+}
+
+/* --------------------------------------------------------------------
+ * Float (scalar) passes.
+ */
+
+ccl_device_inline void film_get_pass_pixel_depth(ccl_global const KernelFilmConvert *ccl_restrict
+ kfilm_convert,
+ ccl_global const float *ccl_restrict buffer,
+ ccl_private float *ccl_restrict pixel)
+{
+ kernel_assert(kfilm_convert->num_components >= 1);
+ kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+ const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
+
+ ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+ const float f = *in;
+
+ pixel[0] = (f == 0.0f) ? 1e10f : f * scale_exposure;
+}
+
+ccl_device_inline void film_get_pass_pixel_mist(ccl_global const KernelFilmConvert *ccl_restrict
+ kfilm_convert,
+ ccl_global const float *ccl_restrict buffer,
+ ccl_private float *ccl_restrict pixel)
+{
+ kernel_assert(kfilm_convert->num_components >= 1);
+ kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+ const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
+
+ ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+ const float f = *in;
+
+ /* Note that we accumulate 1 - mist in the kernel to avoid having to
+ * track the mist values in the integrator state. */
+ pixel[0] = saturate(1.0f - f * scale_exposure);
+}
+
+ccl_device_inline void film_get_pass_pixel_sample_count(
+ ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+ ccl_global const float *ccl_restrict buffer,
+ ccl_private float *ccl_restrict pixel)
+{
+ /* TODO(sergey): Consider normalizing into the [0..1] range, so that it is possible to see
+ * meaningful value when adaptive sampler stopped rendering image way before the maximum
+ * number of samples was reached (for examples when number of samples is set to 0 in
+ * viewport). */
+
+ kernel_assert(kfilm_convert->num_components >= 1);
+ kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+ ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+ const float f = *in;
+
+ pixel[0] = __float_as_uint(f) * kfilm_convert->scale;
+}
+
+ccl_device_inline void film_get_pass_pixel_float(ccl_global const KernelFilmConvert *ccl_restrict
+ kfilm_convert,
+ ccl_global const float *ccl_restrict buffer,
+ ccl_private float *ccl_restrict pixel)
+{
+ kernel_assert(kfilm_convert->num_components >= 1);
+ kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+ const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
+
+ ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+ const float f = *in;
+
+ pixel[0] = f * scale_exposure;
+}
+
+/* --------------------------------------------------------------------
+ * Float 3 passes.
+ */
+
+ccl_device_inline void film_get_pass_pixel_light_path(
+ ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+ ccl_global const float *ccl_restrict buffer,
+ ccl_private float *ccl_restrict pixel)
+{
+ kernel_assert(kfilm_convert->num_components >= 3);
+ kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+ /* Read light pass. */
+ ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+ float3 f = make_float3(in[0], in[1], in[2]);
+
+ /* Optionally add indirect light pass. */
+ if (kfilm_convert->pass_indirect != PASS_UNUSED) {
+ ccl_global const float *in_indirect = buffer + kfilm_convert->pass_indirect;
+ const float3 f_indirect = make_float3(in_indirect[0], in_indirect[1], in_indirect[2]);
+ f += f_indirect;
+ }
+
+ /* Optionally divide out color. */
+ if (kfilm_convert->pass_divide != PASS_UNUSED) {
+ ccl_global const float *in_divide = buffer + kfilm_convert->pass_divide;
+ const float3 f_divide = make_float3(in_divide[0], in_divide[1], in_divide[2]);
+ f = safe_divide_even_color(f, f_divide);
+
+ /* Exposure only, sample scale cancels out. */
+ f *= kfilm_convert->exposure;
+ }
+ else {
+ /* Sample scale and exposure. */
+ f *= film_get_scale_exposure(kfilm_convert, buffer);
+ }
+
+ pixel[0] = f.x;
+ pixel[1] = f.y;
+ pixel[2] = f.z;
+}
+
+ccl_device_inline void film_get_pass_pixel_float3(ccl_global const KernelFilmConvert *ccl_restrict
+ kfilm_convert,
+ ccl_global const float *ccl_restrict buffer,
+ ccl_private float *ccl_restrict pixel)
+{
+ kernel_assert(kfilm_convert->num_components >= 3);
+ kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+ const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
+
+ ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+
+ const float3 f = make_float3(in[0], in[1], in[2]) * scale_exposure;
+
+ pixel[0] = f.x;
+ pixel[1] = f.y;
+ pixel[2] = f.z;
+}
+
+/* --------------------------------------------------------------------
+ * Float4 passes.
+ */
+
+ccl_device_inline void film_get_pass_pixel_motion(ccl_global const KernelFilmConvert *ccl_restrict
+ kfilm_convert,
+ ccl_global const float *ccl_restrict buffer,
+ ccl_private float *ccl_restrict pixel)
+{
+ kernel_assert(kfilm_convert->num_components == 4);
+ kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+ kernel_assert(kfilm_convert->pass_motion_weight != PASS_UNUSED);
+
+ ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+ ccl_global const float *in_weight = buffer + kfilm_convert->pass_motion_weight;
+
+ const float weight = in_weight[0];
+ const float weight_inv = (weight > 0.0f) ? 1.0f / weight : 0.0f;
+
+ const float4 motion = make_float4(in[0], in[1], in[2], in[3]) * weight_inv;
+
+ pixel[0] = motion.x;
+ pixel[1] = motion.y;
+ pixel[2] = motion.z;
+ pixel[3] = motion.w;
+}
+
+ccl_device_inline void film_get_pass_pixel_cryptomatte(
+ ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+ ccl_global const float *ccl_restrict buffer,
+ ccl_private float *ccl_restrict pixel)
+{
+ kernel_assert(kfilm_convert->num_components == 4);
+ kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+ const float scale = film_get_scale(kfilm_convert, buffer);
+
+ ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+
+ const float4 f = make_float4(in[0], in[1], in[2], in[3]);
+
+ /* x and z contain integer IDs, don't rescale them.
+ * y and w contain matte weights, they get scaled. */
+ pixel[0] = f.x;
+ pixel[1] = f.y * scale;
+ pixel[2] = f.z;
+ pixel[3] = f.w * scale;
+}
+
+ccl_device_inline void film_get_pass_pixel_float4(ccl_global const KernelFilmConvert *ccl_restrict
+ kfilm_convert,
+ ccl_global const float *ccl_restrict buffer,
+ ccl_private float *ccl_restrict pixel)
+{
+ kernel_assert(kfilm_convert->num_components == 4);
+ kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+ float scale, scale_exposure;
+ film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure);
+
+ ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+
+ const float3 color = make_float3(in[0], in[1], in[2]) * scale_exposure;
+ const float alpha = in[3] * scale;
+
+ pixel[0] = color.x;
+ pixel[1] = color.y;
+ pixel[2] = color.z;
+ pixel[3] = alpha;
+}
+
+ccl_device_inline void film_get_pass_pixel_combined(
+ ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+ ccl_global const float *ccl_restrict buffer,
+ ccl_private float *ccl_restrict pixel)
+{
+ kernel_assert(kfilm_convert->num_components == 4);
+
+ /* 3rd channel contains transparency = 1 - alpha for the combined pass. */
+
+ kernel_assert(kfilm_convert->num_components == 4);
+ kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+ float scale, scale_exposure;
+ if (!film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure)) {
+ pixel[0] = 0.0f;
+ pixel[1] = 0.0f;
+ pixel[2] = 0.0f;
+ pixel[3] = 0.0f;
+ return;
+ }
+
+ ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+
+ const float3 color = make_float3(in[0], in[1], in[2]) * scale_exposure;
+ const float alpha = in[3] * scale;
+
+ pixel[0] = color.x;
+ pixel[1] = color.y;
+ pixel[2] = color.z;
+ pixel[3] = film_transparency_to_alpha(alpha);
+}
+
+/* --------------------------------------------------------------------
+ * Shadow catcher.
+ */
+
+ccl_device_inline float3 film_calculate_shadow_catcher_denoised(
+ ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+ ccl_global const float *ccl_restrict buffer)
+{
+ kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED);
+
+ float scale, scale_exposure;
+ film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure);
+
+ ccl_global const float *in_catcher = buffer + kfilm_convert->pass_shadow_catcher;
+
+ const float3 pixel = make_float3(in_catcher[0], in_catcher[1], in_catcher[2]) * scale_exposure;
+
+ return pixel;
+}
+
+ccl_device_inline float3 safe_divide_shadow_catcher(float3 a, float3 b)
+{
+ float x, y, z;
+
+ x = (b.x != 0.0f) ? a.x / b.x : 1.0f;
+ y = (b.y != 0.0f) ? a.y / b.y : 1.0f;
+ z = (b.z != 0.0f) ? a.z / b.z : 1.0f;
+
+ return make_float3(x, y, z);
+}
+
+ccl_device_inline float3
+film_calculate_shadow_catcher(ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+ ccl_global const float *ccl_restrict buffer)
+{
+ /* For the shadow catcher pass we divide combined pass by the shadow catcher.
+ * Note that denoised shadow catcher pass contains value which only needs ot be scaled (but not
+ * to be calculated as division). */
+
+ if (kfilm_convert->is_denoised) {
+ return film_calculate_shadow_catcher_denoised(kfilm_convert, buffer);
+ }
+
+ kernel_assert(kfilm_convert->pass_shadow_catcher_sample_count != PASS_UNUSED);
+
+ /* If there is no shadow catcher object in this pixel, there is no modification of the light
+ * needed, so return one. */
+ ccl_global const float *in_catcher_sample_count =
+ buffer + kfilm_convert->pass_shadow_catcher_sample_count;
+ const float num_samples = in_catcher_sample_count[0];
+ if (num_samples == 0.0f) {
+ return one_float3();
+ }
+
+ kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED);
+ ccl_global const float *in_catcher = buffer + kfilm_convert->pass_shadow_catcher;
+
+ /* NOTE: It is possible that the Shadow Catcher pass is requested as an output without actual
+ * shadow catcher objects in the scene. In this case there will be no auxiliary passes required
+ * for the decision (to save up memory). So delay the asserts to this point so that the number of
+ * samples check handles such configuration. */
+ kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+ kernel_assert(kfilm_convert->pass_combined != PASS_UNUSED);
+ kernel_assert(kfilm_convert->pass_shadow_catcher_matte != PASS_UNUSED);
+
+ ccl_global const float *in_combined = buffer + kfilm_convert->pass_combined;
+ ccl_global const float *in_matte = buffer + kfilm_convert->pass_shadow_catcher_matte;
+
+ /* No scaling needed. The integration works in way that number of samples in the combined and
+ * shadow catcher passes are the same, and exposure is canceled during the division. */
+ const float3 color_catcher = make_float3(in_catcher[0], in_catcher[1], in_catcher[2]);
+ const float3 color_combined = make_float3(in_combined[0], in_combined[1], in_combined[2]);
+ const float3 color_matte = make_float3(in_matte[0], in_matte[1], in_matte[2]);
+
+ /* Need to ignore contribution of the matte object when doing division (otherwise there will be
+ * artifacts caused by anti-aliasing). Since combined pass is used for adaptive sampling and need
+ * to contain matte objects, we subtract matte objects contribution here. This is the same as if
+ * the matte objects were not accumulated to the combined pass. */
+ const float3 combined_no_matte = color_combined - color_matte;
+
+ const float3 shadow_catcher = safe_divide_shadow_catcher(combined_no_matte, color_catcher);
+
+ const float scale = film_get_scale(kfilm_convert, buffer);
+ const float transparency = in_combined[3] * scale;
+ const float alpha = film_transparency_to_alpha(transparency);
+
+ /* Alpha-over on white using transparency of the combined pass. This allows to eliminate
+ * artifacts which are happening on an edge of a shadow catcher when using transparent film.
+ * Note that we treat shadow catcher as straight alpha here because alpha got canceled out
+ * during the division. */
+ const float3 pixel = (1.0f - alpha) * one_float3() + alpha * shadow_catcher;
+
+ return pixel;
+}
+
+ccl_device_inline float4 film_calculate_shadow_catcher_matte_with_shadow(
+ ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+ ccl_global const float *ccl_restrict buffer)
+{
+ /* The approximation of the shadow is 1 - average(shadow_catcher_pass). A better approximation
+ * is possible.
+ *
+ * The matte is alpha-overed onto the shadow (which is kind of alpha-overing shadow onto footage,
+ * and then alpha-overing synthetic objects on top). */
+
+ kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+ kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED);
+ kernel_assert(kfilm_convert->pass_shadow_catcher_matte != PASS_UNUSED);
+
+ float scale, scale_exposure;
+ if (!film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure)) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+
+ ccl_global const float *in_matte = buffer + kfilm_convert->pass_shadow_catcher_matte;
+
+ const float3 shadow_catcher = film_calculate_shadow_catcher(kfilm_convert, buffer);
+ const float3 color_matte = make_float3(in_matte[0], in_matte[1], in_matte[2]) * scale_exposure;
+
+ const float transparency = in_matte[3] * scale;
+ const float alpha = saturate(1.0f - transparency);
+
+ const float alpha_matte = (1.0f - alpha) * (1.0f - average(shadow_catcher)) + alpha;
+
+ if (kfilm_convert->use_approximate_shadow_catcher_background) {
+ kernel_assert(kfilm_convert->pass_background != PASS_UNUSED);
+
+ ccl_global const float *in_background = buffer + kfilm_convert->pass_background;
+ const float3 color_background = make_float3(
+ in_background[0], in_background[1], in_background[2]) *
+ scale_exposure;
+ const float3 alpha_over = color_matte + color_background * (1.0f - alpha_matte);
+ return make_float4(alpha_over.x, alpha_over.y, alpha_over.z, 1.0f);
+ }
+
+ return make_float4(color_matte.x, color_matte.y, color_matte.z, alpha_matte);
+}
+
+ccl_device_inline void film_get_pass_pixel_shadow_catcher(
+ ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+ ccl_global const float *ccl_restrict buffer,
+ ccl_private float *ccl_restrict pixel)
+{
+ kernel_assert(kfilm_convert->num_components >= 3);
+
+ const float3 pixel_value = film_calculate_shadow_catcher(kfilm_convert, buffer);
+
+ pixel[0] = pixel_value.x;
+ pixel[1] = pixel_value.y;
+ pixel[2] = pixel_value.z;
+}
+
+ccl_device_inline void film_get_pass_pixel_shadow_catcher_matte_with_shadow(
+ ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+ ccl_global const float *ccl_restrict buffer,
+ ccl_private float *ccl_restrict pixel)
+{
+ kernel_assert(kfilm_convert->num_components == 3 || kfilm_convert->num_components == 4);
+
+ const float4 pixel_value = film_calculate_shadow_catcher_matte_with_shadow(kfilm_convert,
+ buffer);
+
+ pixel[0] = pixel_value.x;
+ pixel[1] = pixel_value.y;
+ pixel[2] = pixel_value.z;
+ if (kfilm_convert->num_components == 4) {
+ pixel[3] = pixel_value.w;
+ }
+}
+
+/* --------------------------------------------------------------------
+ * Compositing and overlays.
+ */
+
+ccl_device_inline void film_apply_pass_pixel_overlays_rgba(
+ ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+ ccl_global const float *ccl_restrict buffer,
+ ccl_private float *ccl_restrict pixel)
+{
+ if (kfilm_convert->show_active_pixels &&
+ kfilm_convert->pass_adaptive_aux_buffer != PASS_UNUSED) {
+ if (buffer[kfilm_convert->pass_adaptive_aux_buffer + 3] == 0.0f) {
+ const float3 active_rgb = make_float3(1.0f, 0.0f, 0.0f);
+ const float3 mix_rgb = interp(make_float3(pixel[0], pixel[1], pixel[2]), active_rgb, 0.5f);
+ pixel[0] = mix_rgb.x;
+ pixel[1] = mix_rgb.y;
+ pixel[2] = mix_rgb.z;
+ }
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/film_write_passes.h b/intern/cycles/kernel/film/film_write_passes.h
new file mode 100644
index 00000000000..9d379495629
--- /dev/null
+++ b/intern/cycles/kernel/film/film_write_passes.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifdef __KERNEL_GPU__
+# define __ATOMIC_PASS_WRITE__
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void kernel_write_pass_float(ccl_global float *ccl_restrict buffer, float value)
+{
+#ifdef __ATOMIC_PASS_WRITE__
+ atomic_add_and_fetch_float(buffer, value);
+#else
+ *buffer += value;
+#endif
+}
+
+ccl_device_inline void kernel_write_pass_float3(ccl_global float *ccl_restrict buffer,
+ float3 value)
+{
+#ifdef __ATOMIC_PASS_WRITE__
+ ccl_global float *buf_x = buffer + 0;
+ ccl_global float *buf_y = buffer + 1;
+ ccl_global float *buf_z = buffer + 2;
+
+ atomic_add_and_fetch_float(buf_x, value.x);
+ atomic_add_and_fetch_float(buf_y, value.y);
+ atomic_add_and_fetch_float(buf_z, value.z);
+#else
+ buffer[0] += value.x;
+ buffer[1] += value.y;
+ buffer[2] += value.z;
+#endif
+}
+
+ccl_device_inline void kernel_write_pass_float4(ccl_global float *ccl_restrict buffer,
+ float4 value)
+{
+#ifdef __ATOMIC_PASS_WRITE__
+ ccl_global float *buf_x = buffer + 0;
+ ccl_global float *buf_y = buffer + 1;
+ ccl_global float *buf_z = buffer + 2;
+ ccl_global float *buf_w = buffer + 3;
+
+ atomic_add_and_fetch_float(buf_x, value.x);
+ atomic_add_and_fetch_float(buf_y, value.y);
+ atomic_add_and_fetch_float(buf_z, value.z);
+ atomic_add_and_fetch_float(buf_w, value.w);
+#else
+ buffer[0] += value.x;
+ buffer[1] += value.y;
+ buffer[2] += value.z;
+ buffer[3] += value.w;
+#endif
+}
+
+ccl_device_inline float kernel_read_pass_float(ccl_global float *ccl_restrict buffer)
+{
+ return *buffer;
+}
+
+ccl_device_inline float3 kernel_read_pass_float3(ccl_global float *ccl_restrict buffer)
+{
+ return make_float3(buffer[0], buffer[1], buffer[2]);
+}
+
+ccl_device_inline float4 kernel_read_pass_float4(ccl_global float *ccl_restrict buffer)
+{
+ return make_float4(buffer[0], buffer[1], buffer[2], buffer[3]);
+}
+
+CCL_NAMESPACE_END