diff options
Diffstat (limited to 'intern/cycles/kernel/film')
-rw-r--r-- | intern/cycles/kernel/film/film_accumulate.h | 559 | ||||
-rw-r--r-- | intern/cycles/kernel/film/film_adaptive_sampling.h | 160 | ||||
-rw-r--r-- | intern/cycles/kernel/film/film_id_passes.h | 106 | ||||
-rw-r--r-- | intern/cycles/kernel/film/film_passes.h | 342 | ||||
-rw-r--r-- | intern/cycles/kernel/film/film_read.h | 532 | ||||
-rw-r--r-- | intern/cycles/kernel/film/film_write_passes.h | 88 |
6 files changed, 1787 insertions, 0 deletions
diff --git a/intern/cycles/kernel/film/film_accumulate.h b/intern/cycles/kernel/film/film_accumulate.h new file mode 100644 index 00000000000..91424fdbe21 --- /dev/null +++ b/intern/cycles/kernel/film/film_accumulate.h @@ -0,0 +1,559 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/film/film_adaptive_sampling.h" +#include "kernel/film/film_write_passes.h" + +#include "kernel/integrator/integrator_shadow_catcher.h" + +CCL_NAMESPACE_BEGIN + +/* -------------------------------------------------------------------- + * BSDF Evaluation + * + * BSDF evaluation result, split between diffuse and glossy. This is used to + * accumulate render passes separately. Note that reflection, transmission + * and volume scattering are written to different render passes, but we assume + * that only one of those can happen at a bounce, and so do not need to accumulate + * them separately. */ + +ccl_device_inline void bsdf_eval_init(ccl_private BsdfEval *eval, + const bool is_diffuse, + float3 value) +{ + eval->diffuse = zero_float3(); + eval->glossy = zero_float3(); + + if (is_diffuse) { + eval->diffuse = value; + } + else { + eval->glossy = value; + } +} + +ccl_device_inline void bsdf_eval_accum(ccl_private BsdfEval *eval, + const bool is_diffuse, + float3 value, + float mis_weight) +{ + value *= mis_weight; + + if (is_diffuse) { + eval->diffuse += value; + } + else { + eval->glossy += value; + } +} + +ccl_device_inline bool bsdf_eval_is_zero(ccl_private BsdfEval *eval) +{ + return is_zero(eval->diffuse) && is_zero(eval->glossy); +} + +ccl_device_inline void bsdf_eval_mul(ccl_private BsdfEval *eval, float value) +{ + eval->diffuse *= value; + eval->glossy *= value; +} + +ccl_device_inline void bsdf_eval_mul3(ccl_private BsdfEval *eval, float3 value) +{ + eval->diffuse *= value; + eval->glossy *= value; +} + +ccl_device_inline float3 bsdf_eval_sum(ccl_private const BsdfEval *eval) +{ + return eval->diffuse + eval->glossy; +} + +ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(ccl_private const BsdfEval *eval) +{ + /* Ratio of diffuse and glossy to recover proportions for writing to render pass. + * We assume reflection, transmission and volume scatter to be exclusive. */ + return safe_divide_float3_float3(eval->diffuse, eval->diffuse + eval->glossy); +} + +/* -------------------------------------------------------------------- + * Clamping + * + * Clamping is done on a per-contribution basis so that we can write directly + * to render buffers instead of using per-thread memory, and to avoid the + * impact of clamping on other contributions. */ + +ccl_device_forceinline void kernel_accum_clamp(KernelGlobals kg, ccl_private float3 *L, int bounce) +{ +#ifdef __KERNEL_DEBUG_NAN__ + if (!isfinite3_safe(*L)) { + kernel_assert(!"Cycles sample with non-finite value detected"); + } +#endif + /* Make sure all components are finite, allowing the contribution to be usable by adaptive + * sampling convergence check, but also to make it so render result never causes issues with + * post-processing. */ + *L = ensure_finite3(*L); + +#ifdef __CLAMP_SAMPLE__ + float limit = (bounce > 0) ? kernel_data.integrator.sample_clamp_indirect : + kernel_data.integrator.sample_clamp_direct; + float sum = reduce_add(fabs(*L)); + if (sum > limit) { + *L *= limit / sum; + } +#endif +} + +/* -------------------------------------------------------------------- + * Pass accumulation utilities. + */ + +/* Get pointer to pixel in render buffer. */ +ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer( + KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer) +{ + const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); + const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * + kernel_data.film.pass_stride; + return render_buffer + render_buffer_offset; +} + +/* -------------------------------------------------------------------- + * Adaptive sampling. + */ + +ccl_device_inline int kernel_accum_sample(KernelGlobals kg, + ConstIntegratorState state, + ccl_global float *ccl_restrict render_buffer, + int sample) +{ + if (kernel_data.film.pass_sample_count == PASS_UNUSED) { + return sample; + } + + ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); + + return atomic_fetch_and_add_uint32((uint *)(buffer) + kernel_data.film.pass_sample_count, 1); +} + +ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg, + const int sample, + const float3 contribution, + ccl_global float *ccl_restrict buffer) +{ + /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping + * criteria. This is the heuristic from "A hierarchical automatic stopping condition for Monte + * Carlo global illumination" except that here it is applied per pixel and not in hierarchical + * tiles. */ + + if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) { + return; + } + + if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) { + kernel_write_pass_float4( + buffer + kernel_data.film.pass_adaptive_aux_buffer, + make_float4(contribution.x * 2.0f, contribution.y * 2.0f, contribution.z * 2.0f, 0.0f)); + } +} + +/* -------------------------------------------------------------------- + * Shadow catcher. + */ + +#ifdef __SHADOW_CATCHER__ + +/* Accumulate contribution to the Shadow Catcher pass. + * + * Returns truth if the contribution is fully handled here and is not to be added to the other + * passes (like combined, adaptive sampling). */ + +ccl_device bool kernel_accum_shadow_catcher(KernelGlobals kg, + const uint32_t path_flag, + const float3 contribution, + ccl_global float *ccl_restrict buffer) +{ + if (!kernel_data.integrator.has_shadow_catcher) { + return false; + } + + kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED); + kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); + + /* Matte pass. */ + if (kernel_shadow_catcher_is_matte_path(path_flag)) { + kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher_matte, contribution); + /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive + * sampling is based on how noisy the combined pass is as if there were no catchers in the + * scene. */ + } + + /* Shadow catcher pass. */ + if (kernel_shadow_catcher_is_object_pass(path_flag)) { + kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution); + return true; + } + + return false; +} + +ccl_device bool kernel_accum_shadow_catcher_transparent(KernelGlobals kg, + const uint32_t path_flag, + const float3 contribution, + const float transparent, + ccl_global float *ccl_restrict buffer) +{ + if (!kernel_data.integrator.has_shadow_catcher) { + return false; + } + + kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED); + kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); + + if (path_flag & PATH_RAY_SHADOW_CATCHER_BACKGROUND) { + return true; + } + + /* Matte pass. */ + if (kernel_shadow_catcher_is_matte_path(path_flag)) { + kernel_write_pass_float4( + buffer + kernel_data.film.pass_shadow_catcher_matte, + make_float4(contribution.x, contribution.y, contribution.z, transparent)); + /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive + * sampling is based on how noisy the combined pass is as if there were no catchers in the + * scene. */ + } + + /* Shadow catcher pass. */ + if (kernel_shadow_catcher_is_object_pass(path_flag)) { + /* NOTE: The transparency of the shadow catcher pass is ignored. It is not needed for the + * calculation and the alpha channel of the pass contains numbers of samples contributed to a + * pixel of the pass. */ + kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution); + return true; + } + + return false; +} + +ccl_device void kernel_accum_shadow_catcher_transparent_only(KernelGlobals kg, + const uint32_t path_flag, + const float transparent, + ccl_global float *ccl_restrict buffer) +{ + if (!kernel_data.integrator.has_shadow_catcher) { + return; + } + + kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); + + /* Matte pass. */ + if (kernel_shadow_catcher_is_matte_path(path_flag)) { + kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, transparent); + } +} + +#endif /* __SHADOW_CATCHER__ */ + +/* -------------------------------------------------------------------- + * Render passes. + */ + +/* Write combined pass. */ +ccl_device_inline void kernel_accum_combined_pass(KernelGlobals kg, + const uint32_t path_flag, + const int sample, + const float3 contribution, + ccl_global float *ccl_restrict buffer) +{ +#ifdef __SHADOW_CATCHER__ + if (kernel_accum_shadow_catcher(kg, path_flag, contribution, buffer)) { + return; + } +#endif + + if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) { + kernel_write_pass_float3(buffer + kernel_data.film.pass_combined, contribution); + } + + kernel_accum_adaptive_buffer(kg, sample, contribution, buffer); +} + +/* Write combined pass with transparency. */ +ccl_device_inline void kernel_accum_combined_transparent_pass(KernelGlobals kg, + const uint32_t path_flag, + const int sample, + const float3 contribution, + const float transparent, + ccl_global float *ccl_restrict + buffer) +{ +#ifdef __SHADOW_CATCHER__ + if (kernel_accum_shadow_catcher_transparent(kg, path_flag, contribution, transparent, buffer)) { + return; + } +#endif + + if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) { + kernel_write_pass_float4( + buffer + kernel_data.film.pass_combined, + make_float4(contribution.x, contribution.y, contribution.z, transparent)); + } + + kernel_accum_adaptive_buffer(kg, sample, contribution, buffer); +} + +/* Write background or emission to appropriate pass. */ +ccl_device_inline void kernel_accum_emission_or_background_pass(KernelGlobals kg, + ConstIntegratorState state, + float3 contribution, + ccl_global float *ccl_restrict + buffer, + const int pass) +{ + if (!(kernel_data.film.light_pass_flag & PASS_ANY)) { + return; + } + +#ifdef __PASSES__ + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); + int pass_offset = PASS_UNUSED; + + /* Denoising albedo. */ +# ifdef __DENOISING_FEATURES__ + if (path_flag & PATH_RAY_DENOISING_FEATURES) { + if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) { + const float3 denoising_feature_throughput = INTEGRATOR_STATE( + state, path, denoising_feature_throughput); + const float3 denoising_albedo = denoising_feature_throughput * contribution; + kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); + } + } +# endif /* __DENOISING_FEATURES__ */ + + if (!(path_flag & PATH_RAY_ANY_PASS)) { + /* Directly visible, write to emission or background pass. */ + pass_offset = pass; + } + else if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) { + /* Indirectly visible through reflection. */ + const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ? + ((INTEGRATOR_STATE(state, path, bounce) == 1) ? + kernel_data.film.pass_glossy_direct : + kernel_data.film.pass_glossy_indirect) : + ((INTEGRATOR_STATE(state, path, bounce) == 1) ? + kernel_data.film.pass_transmission_direct : + kernel_data.film.pass_transmission_indirect); + + if (glossy_pass_offset != PASS_UNUSED) { + /* Glossy is a subset of the throughput, reconstruct it here using the + * diffuse-glossy ratio. */ + const float3 ratio = INTEGRATOR_STATE(state, path, diffuse_glossy_ratio); + const float3 glossy_contribution = (one_float3() - ratio) * contribution; + kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution); + } + + /* Reconstruct diffuse subset of throughput. */ + pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ? + kernel_data.film.pass_diffuse_direct : + kernel_data.film.pass_diffuse_indirect; + if (pass_offset != PASS_UNUSED) { + contribution *= INTEGRATOR_STATE(state, path, diffuse_glossy_ratio); + } + } + else if (path_flag & PATH_RAY_VOLUME_PASS) { + /* Indirectly visible through volume. */ + pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ? + kernel_data.film.pass_volume_direct : + kernel_data.film.pass_volume_indirect; + } + + /* Single write call for GPU coherence. */ + if (pass_offset != PASS_UNUSED) { + kernel_write_pass_float3(buffer + pass_offset, contribution); + } +#endif /* __PASSES__ */ +} + +/* Write light contribution to render buffer. */ +ccl_device_inline void kernel_accum_light(KernelGlobals kg, + ConstIntegratorShadowState state, + ccl_global float *ccl_restrict render_buffer) +{ + /* The throughput for shadow paths already contains the light shader evaluation. */ + float3 contribution = INTEGRATOR_STATE(state, shadow_path, throughput); + kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, shadow_path, bounce)); + + const uint32_t render_pixel_index = INTEGRATOR_STATE(state, shadow_path, render_pixel_index); + const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * + kernel_data.film.pass_stride; + ccl_global float *buffer = render_buffer + render_buffer_offset; + + const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag); + const int sample = INTEGRATOR_STATE(state, shadow_path, sample); + + /* Ambient occlusion. */ + if (path_flag & PATH_RAY_SHADOW_FOR_AO) { + if ((kernel_data.kernel_features & KERNEL_FEATURE_AO_PASS) && (path_flag & PATH_RAY_CAMERA)) { + kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, contribution); + } + if (kernel_data.kernel_features & KERNEL_FEATURE_AO_ADDITIVE) { + const float3 ao_weight = INTEGRATOR_STATE(state, shadow_path, unshadowed_throughput); + kernel_accum_combined_pass(kg, path_flag, sample, contribution * ao_weight, buffer); + } + return; + } + + /* Direct light shadow. */ + kernel_accum_combined_pass(kg, path_flag, sample, contribution, buffer); + +#ifdef __PASSES__ + if (kernel_data.film.light_pass_flag & PASS_ANY) { + const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag); + int pass_offset = PASS_UNUSED; + + if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) { + /* Indirectly visible through reflection. */ + const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ? + ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ? + kernel_data.film.pass_glossy_direct : + kernel_data.film.pass_glossy_indirect) : + ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ? + kernel_data.film.pass_transmission_direct : + kernel_data.film.pass_transmission_indirect); + + if (glossy_pass_offset != PASS_UNUSED) { + /* Glossy is a subset of the throughput, reconstruct it here using the + * diffuse-glossy ratio. */ + const float3 ratio = INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio); + const float3 glossy_contribution = (one_float3() - ratio) * contribution; + kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution); + } + + /* Reconstruct diffuse subset of throughput. */ + pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ? + kernel_data.film.pass_diffuse_direct : + kernel_data.film.pass_diffuse_indirect; + if (pass_offset != PASS_UNUSED) { + contribution *= INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio); + } + } + else if (path_flag & PATH_RAY_VOLUME_PASS) { + /* Indirectly visible through volume. */ + pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ? + kernel_data.film.pass_volume_direct : + kernel_data.film.pass_volume_indirect; + } + + /* Single write call for GPU coherence. */ + if (pass_offset != PASS_UNUSED) { + kernel_write_pass_float3(buffer + pass_offset, contribution); + } + + /* Write shadow pass. */ + if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) && + (path_flag & PATH_RAY_CAMERA)) { + const float3 unshadowed_throughput = INTEGRATOR_STATE( + state, shadow_path, unshadowed_throughput); + const float3 shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput); + const float3 shadow = safe_divide_float3_float3(shadowed_throughput, unshadowed_throughput) * + kernel_data.film.pass_shadow_scale; + kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow, shadow); + } + } +#endif +} + +/* Write transparency to render buffer. + * + * Note that we accumulate transparency = 1 - alpha in the render buffer. + * Otherwise we'd have to write alpha on path termination, which happens + * in many places. */ +ccl_device_inline void kernel_accum_transparent(KernelGlobals kg, + ConstIntegratorState state, + const uint32_t path_flag, + const float transparent, + ccl_global float *ccl_restrict buffer) +{ + if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) { + kernel_write_pass_float(buffer + kernel_data.film.pass_combined + 3, transparent); + } + + kernel_accum_shadow_catcher_transparent_only(kg, path_flag, transparent, buffer); +} + +/* Write holdout to render buffer. */ +ccl_device_inline void kernel_accum_holdout(KernelGlobals kg, + ConstIntegratorState state, + const uint32_t path_flag, + const float transparent, + ccl_global float *ccl_restrict render_buffer) +{ + ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); + kernel_accum_transparent(kg, state, path_flag, transparent, buffer); +} + +/* Write background contribution to render buffer. + * + * Includes transparency, matching kernel_accum_transparent. */ +ccl_device_inline void kernel_accum_background(KernelGlobals kg, + ConstIntegratorState state, + const float3 L, + const float transparent, + const bool is_transparent_background_ray, + ccl_global float *ccl_restrict render_buffer) +{ + float3 contribution = INTEGRATOR_STATE(state, path, throughput) * L; + kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); + + ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); + + if (is_transparent_background_ray) { + kernel_accum_transparent(kg, state, path_flag, transparent, buffer); + } + else { + const int sample = INTEGRATOR_STATE(state, path, sample); + kernel_accum_combined_transparent_pass( + kg, path_flag, sample, contribution, transparent, buffer); + } + kernel_accum_emission_or_background_pass( + kg, state, contribution, buffer, kernel_data.film.pass_background); +} + +/* Write emission to render buffer. */ +ccl_device_inline void kernel_accum_emission(KernelGlobals kg, + ConstIntegratorState state, + const float3 throughput, + const float3 L, + ccl_global float *ccl_restrict render_buffer) +{ + float3 contribution = throughput * L; + kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); + + ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); + const int sample = INTEGRATOR_STATE(state, path, sample); + + kernel_accum_combined_pass(kg, path_flag, sample, contribution, buffer); + kernel_accum_emission_or_background_pass( + kg, state, contribution, buffer, kernel_data.film.pass_emission); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/film/film_adaptive_sampling.h b/intern/cycles/kernel/film/film_adaptive_sampling.h new file mode 100644 index 00000000000..c78b5f6b707 --- /dev/null +++ b/intern/cycles/kernel/film/film_adaptive_sampling.h @@ -0,0 +1,160 @@ +/* + * Copyright 2019 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/film/film_write_passes.h" + +CCL_NAMESPACE_BEGIN + +/* Check whether the pixel has converged and should not be sampled anymore. */ + +ccl_device_forceinline bool kernel_need_sample_pixel(KernelGlobals kg, + ConstIntegratorState state, + ccl_global float *render_buffer) +{ + if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) { + return true; + } + + const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); + const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * + kernel_data.film.pass_stride; + ccl_global float *buffer = render_buffer + render_buffer_offset; + + const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3; + return buffer[aux_w_offset] == 0.0f; +} + +/* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */ + +ccl_device bool kernel_adaptive_sampling_convergence_check(KernelGlobals kg, + ccl_global float *render_buffer, + int x, + int y, + float threshold, + bool reset, + int offset, + int stride) +{ + kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED); + kernel_assert(kernel_data.film.pass_sample_count != PASS_UNUSED); + + const int render_pixel_index = offset + x + y * stride; + ccl_global float *buffer = render_buffer + + (uint64_t)render_pixel_index * kernel_data.film.pass_stride; + + /* TODO(Stefan): Is this better in linear, sRGB or something else? */ + + const float4 A = kernel_read_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer); + if (!reset && A.w != 0.0f) { + /* If the pixel was considered converged, its state will not change in this kernel. Early + * output before doing any math. + * + * TODO(sergey): On a GPU it might be better to keep thread alive for better coherency? */ + return true; + } + + const float4 I = kernel_read_pass_float4(buffer + kernel_data.film.pass_combined); + + const float sample = __float_as_uint(buffer[kernel_data.film.pass_sample_count]); + const float inv_sample = 1.0f / sample; + + /* The per pixel error as seen in section 2.1 of + * "A hierarchical automatic stopping condition for Monte Carlo global illumination" */ + const float error_difference = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) * + inv_sample; + const float error_normalize = sqrtf((I.x + I.y + I.z) * inv_sample); + /* A small epsilon is added to the divisor to prevent division by zero. */ + const float error = error_difference / (0.0001f + error_normalize); + const bool did_converge = (error < threshold); + + const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3; + buffer[aux_w_offset] = did_converge; + + return did_converge; +} + +/* This is a simple box filter in two passes. + * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */ + +ccl_device void kernel_adaptive_sampling_filter_x(KernelGlobals kg, + ccl_global float *render_buffer, + int y, + int start_x, + int width, + int offset, + int stride) +{ + kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED); + + bool prev = false; + for (int x = start_x; x < start_x + width; ++x) { + int index = offset + x + y * stride; + ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride; + const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3; + + if (buffer[aux_w_offset] == 0.0f) { + if (x > start_x && !prev) { + index = index - 1; + buffer = render_buffer + index * kernel_data.film.pass_stride; + buffer[aux_w_offset] = 0.0f; + } + prev = true; + } + else { + if (prev) { + buffer[aux_w_offset] = 0.0f; + } + prev = false; + } + } +} + +ccl_device void kernel_adaptive_sampling_filter_y(KernelGlobals kg, + ccl_global float *render_buffer, + int x, + int start_y, + int height, + int offset, + int stride) +{ + kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED); + + bool prev = false; + for (int y = start_y; y < start_y + height; ++y) { + int index = offset + x + y * stride; + ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride; + const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3; + + if (buffer[aux_w_offset] == 0.0f) { + if (y > start_y && !prev) { + index = index - stride; + buffer = render_buffer + index * kernel_data.film.pass_stride; + buffer[aux_w_offset] = 0.0f; + } + prev = true; + } + else { + if (prev) { + buffer[aux_w_offset] = 0.0f; + } + prev = false; + } + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/film/film_id_passes.h b/intern/cycles/kernel/film/film_id_passes.h new file mode 100644 index 00000000000..d5b8c90a828 --- /dev/null +++ b/intern/cycles/kernel/film/film_id_passes.h @@ -0,0 +1,106 @@ +/* + * Copyright 2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +CCL_NAMESPACE_BEGIN + +/* Element of ID pass stored in the render buffers. + * It is `float2` semantically, but it must be unaligned since the offset of ID passes in the + * render buffers might not meet expected by compiler alignment. */ +typedef struct IDPassBufferElement { + float x; + float y; +} IDPassBufferElement; + +ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, + int num_slots, + float id, + float weight) +{ + kernel_assert(id != ID_NONE); + if (weight == 0.0f) { + return; + } + + for (int slot = 0; slot < num_slots; slot++) { + ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer; +#ifdef __ATOMIC_PASS_WRITE__ + /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ + if (id_buffer[slot].x == ID_NONE) { + /* Use an atomic to claim this slot. + * If a different thread got here first, try again from this slot on. */ + float old_id = atomic_compare_and_swap_float(buffer + slot * 2, ID_NONE, id); + if (old_id != ID_NONE && old_id != id) { + continue; + } + atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight); + break; + } + /* If there already is a slot for that ID, add the weight. + * If no slot was found, add it to the last. */ + else if (id_buffer[slot].x == id || slot == num_slots - 1) { + atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight); + break; + } +#else /* __ATOMIC_PASS_WRITE__ */ + /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ + if (id_buffer[slot].x == ID_NONE) { + id_buffer[slot].x = id; + id_buffer[slot].y = weight; + break; + } + /* If there already is a slot for that ID, add the weight. + * If no slot was found, add it to the last. */ + else if (id_buffer[slot].x == id || slot == num_slots - 1) { + id_buffer[slot].y += weight; + break; + } +#endif /* __ATOMIC_PASS_WRITE__ */ + } +} + +ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots) +{ + ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer; + for (int slot = 1; slot < num_slots; ++slot) { + if (id_buffer[slot].x == ID_NONE) { + return; + } + /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */ + int i = slot; + while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) { + const IDPassBufferElement swap = id_buffer[i]; + id_buffer[i] = id_buffer[i - 1]; + id_buffer[i - 1] = swap; + --i; + } + } +} + +/* post-sorting for Cryptomatte */ +ccl_device_inline void kernel_cryptomatte_post(KernelGlobals kg, + ccl_global float *render_buffer, + int pixel_index) +{ + const int pass_stride = kernel_data.film.pass_stride; + const uint64_t render_buffer_offset = (uint64_t)pixel_index * pass_stride; + ccl_global float *cryptomatte_buffer = render_buffer + render_buffer_offset + + kernel_data.film.pass_cryptomatte; + kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/film/film_passes.h b/intern/cycles/kernel/film/film_passes.h new file mode 100644 index 00000000000..6c124247f89 --- /dev/null +++ b/intern/cycles/kernel/film/film_passes.h @@ -0,0 +1,342 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/geom/geom.h" + +#include "kernel/film/film_id_passes.h" +#include "kernel/film/film_write_passes.h" + +CCL_NAMESPACE_BEGIN + +/* Get pointer to pixel in render buffer. */ +ccl_device_forceinline ccl_global float *kernel_pass_pixel_render_buffer( + KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer) +{ + const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); + const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * + kernel_data.film.pass_stride; + return render_buffer + render_buffer_offset; +} + +#ifdef __DENOISING_FEATURES__ + +ccl_device_forceinline void kernel_write_denoising_features_surface( + KernelGlobals kg, + IntegratorState state, + ccl_private const ShaderData *sd, + ccl_global float *ccl_restrict render_buffer) +{ + if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES)) { + return; + } + + /* Skip implicitly transparent surfaces. */ + if (sd->flag & SD_HAS_ONLY_VOLUME) { + return; + } + + ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer); + + if (kernel_data.film.pass_denoising_depth != PASS_UNUSED) { + const float3 denoising_feature_throughput = INTEGRATOR_STATE( + state, path, denoising_feature_throughput); + const float denoising_depth = ensure_finite(average(denoising_feature_throughput) * + sd->ray_length); + kernel_write_pass_float(buffer + kernel_data.film.pass_denoising_depth, denoising_depth); + } + + float3 normal = zero_float3(); + float3 diffuse_albedo = zero_float3(); + float3 specular_albedo = zero_float3(); + float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + continue; + } + + /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */ + normal += sc->N * sc->sample_weight; + sum_weight += sc->sample_weight; + + float3 closure_albedo = sc->weight; + /* Closures that include a Fresnel term typically have weights close to 1 even though their + * actual contribution is significantly lower. + * To account for this, we scale their weight by the average fresnel factor (the same is also + * done for the sample weight in the BSDF setup, so we don't need to scale that here). */ + if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) { + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc; + closure_albedo *= bsdf->extra->fresnel_color; + } + else if (sc->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_ID) { + ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)sc; + closure_albedo *= bsdf->avg_value; + } + else if (sc->type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID) { + closure_albedo *= bsdf_principled_hair_albedo(sc); + } + + if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) { + diffuse_albedo += closure_albedo; + sum_nonspecular_weight += sc->sample_weight; + } + else { + specular_albedo += closure_albedo; + } + } + + /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */ + if ((sum_weight == 0.0f) || (sum_nonspecular_weight * 4.0f > sum_weight)) { + if (sum_weight != 0.0f) { + normal /= sum_weight; + } + + if (kernel_data.film.pass_denoising_normal != PASS_UNUSED) { + /* Transform normal into camera space. */ + const Transform worldtocamera = kernel_data.cam.worldtocamera; + normal = transform_direction(&worldtocamera, normal); + + const float3 denoising_normal = ensure_finite3(normal); + kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal); + } + + if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) { + const float3 denoising_feature_throughput = INTEGRATOR_STATE( + state, path, denoising_feature_throughput); + const float3 denoising_albedo = ensure_finite3(denoising_feature_throughput * + diffuse_albedo); + kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); + } + + INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES; + } + else { + INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) *= specular_albedo; + } +} + +ccl_device_forceinline void kernel_write_denoising_features_volume(KernelGlobals kg, + IntegratorState state, + const float3 albedo, + const bool scatter, + ccl_global float *ccl_restrict + render_buffer) +{ + ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer); + const float3 denoising_feature_throughput = INTEGRATOR_STATE( + state, path, denoising_feature_throughput); + + if (scatter && kernel_data.film.pass_denoising_normal != PASS_UNUSED) { + /* Assume scatter is sufficiently diffuse to stop writing denoising features. */ + INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES; + + /* Write view direction as normal. */ + const float3 denoising_normal = make_float3(0.0f, 0.0f, -1.0f); + kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal); + } + + if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) { + /* Write albedo. */ + const float3 denoising_albedo = ensure_finite3(denoising_feature_throughput * albedo); + kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); + } +} +#endif /* __DENOISING_FEATURES__ */ + +#ifdef __SHADOW_CATCHER__ + +/* Write shadow catcher passes on a bounce from the shadow catcher object. */ +ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data( + KernelGlobals kg, + IntegratorState state, + ccl_private const ShaderData *sd, + ccl_global float *ccl_restrict render_buffer) +{ + if (!kernel_data.integrator.has_shadow_catcher) { + return; + } + + kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED); + kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); + + if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, sd->object_flag)) { + return; + } + + ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer); + + /* Count sample for the shadow catcher object. */ + kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f); + + /* Since the split is done, the sample does not contribute to the matte, so accumulate it as + * transparency to the matte. */ + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); + kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, + average(throughput)); +} + +#endif /* __SHADOW_CATCHER__ */ + +ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer, + size_t depth, + float id, + float matte_weight) +{ + kernel_write_id_slots(buffer, depth * 2, id, matte_weight); + return depth * 4; +} + +ccl_device_inline void kernel_write_data_passes(KernelGlobals kg, + IntegratorState state, + ccl_private const ShaderData *sd, + ccl_global float *ccl_restrict render_buffer) +{ +#ifdef __PASSES__ + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); + + if (!(path_flag & PATH_RAY_CAMERA)) { + return; + } + + const int flag = kernel_data.film.pass_flag; + + if (!(flag & PASS_ANY)) { + return; + } + + ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer); + + if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) { + if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f || + average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) { + if (INTEGRATOR_STATE(state, path, sample) == 0) { + if (flag & PASSMASK(DEPTH)) { + const float depth = camera_z_depth(kg, sd->P); + kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth); + } + if (flag & PASSMASK(OBJECT_ID)) { + const float id = object_pass_id(kg, sd->object); + kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id); + } + if (flag & PASSMASK(MATERIAL_ID)) { + const float id = shader_pass_id(kg, sd); + kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id); + } + if (flag & PASSMASK(POSITION)) { + const float3 position = sd->P; + kernel_write_pass_float3(buffer + kernel_data.film.pass_position, position); + } + } + + if (flag & PASSMASK(NORMAL)) { + const float3 normal = shader_bsdf_average_normal(kg, sd); + kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal); + } + if (flag & PASSMASK(ROUGHNESS)) { + const float roughness = shader_bsdf_average_roughness(sd); + kernel_write_pass_float(buffer + kernel_data.film.pass_roughness, roughness); + } + if (flag & PASSMASK(UV)) { + const float3 uv = primitive_uv(kg, sd); + kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv); + } + if (flag & PASSMASK(MOTION)) { + const float4 speed = primitive_motion_vector(kg, sd); + kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed); + kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f); + } + + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SINGLE_PASS_DONE; + } + } + + if (kernel_data.film.cryptomatte_passes) { + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); + const float matte_weight = average(throughput) * + (1.0f - average(shader_bsdf_transparency(kg, sd))); + if (matte_weight > 0.0f) { + ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte; + if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { + const float id = object_cryptomatte_id(kg, sd->object); + cryptomatte_buffer += kernel_write_id_pass( + cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight); + } + if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { + const float id = shader_cryptomatte_id(kg, sd->shader); + cryptomatte_buffer += kernel_write_id_pass( + cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight); + } + if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { + const float id = object_cryptomatte_asset_id(kg, sd->object); + cryptomatte_buffer += kernel_write_id_pass( + cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight); + } + } + } + + if (flag & PASSMASK(DIFFUSE_COLOR)) { + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); + kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, + shader_bsdf_diffuse(kg, sd) * throughput); + } + if (flag & PASSMASK(GLOSSY_COLOR)) { + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); + kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, + shader_bsdf_glossy(kg, sd) * throughput); + } + if (flag & PASSMASK(TRANSMISSION_COLOR)) { + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); + kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, + shader_bsdf_transmission(kg, sd) * throughput); + } + if (flag & PASSMASK(MIST)) { + /* Bring depth into 0..1 range. */ + const float mist_start = kernel_data.film.mist_start; + const float mist_inv_depth = kernel_data.film.mist_inv_depth; + + const float depth = camera_distance(kg, sd->P); + float mist = saturate((depth - mist_start) * mist_inv_depth); + + /* Falloff */ + const float mist_falloff = kernel_data.film.mist_falloff; + + if (mist_falloff == 1.0f) + ; + else if (mist_falloff == 2.0f) + mist = mist * mist; + else if (mist_falloff == 0.5f) + mist = sqrtf(mist); + else + mist = powf(mist, mist_falloff); + + /* Modulate by transparency */ + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); + const float3 alpha = shader_bsdf_alpha(kg, sd); + const float mist_output = (1.0f - mist) * average(throughput * alpha); + + /* Note that the final value in the render buffer we want is 1 - mist_output, + * to avoid having to tracking this in the Integrator state we do the negation + * after rendering. */ + kernel_write_pass_float(buffer + kernel_data.film.pass_mist, mist_output); + } +#endif +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/film/film_read.h b/intern/cycles/kernel/film/film_read.h new file mode 100644 index 00000000000..a87eff3832e --- /dev/null +++ b/intern/cycles/kernel/film/film_read.h @@ -0,0 +1,532 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +CCL_NAMESPACE_BEGIN + +/* -------------------------------------------------------------------- + * Common utilities. + */ + +/* The input buffer contains transparency = 1 - alpha, this converts it to + * alpha. Also clamp since alpha might end up outside of 0..1 due to Russian + * roulette. */ +ccl_device_forceinline float film_transparency_to_alpha(float transparency) +{ + return saturate(1.0f - transparency); +} + +ccl_device_inline float film_get_scale(ccl_global const KernelFilmConvert *ccl_restrict + kfilm_convert, + ccl_global const float *ccl_restrict buffer) +{ + if (kfilm_convert->pass_sample_count == PASS_UNUSED) { + return kfilm_convert->scale; + } + + if (kfilm_convert->pass_use_filter) { + const uint sample_count = *( + (ccl_global const uint *)(buffer + kfilm_convert->pass_sample_count)); + return 1.0f / sample_count; + } + + return 1.0f; +} + +ccl_device_inline float film_get_scale_exposure(ccl_global const KernelFilmConvert *ccl_restrict + kfilm_convert, + ccl_global const float *ccl_restrict buffer) +{ + if (kfilm_convert->pass_sample_count == PASS_UNUSED) { + return kfilm_convert->scale_exposure; + } + + const float scale = film_get_scale(kfilm_convert, buffer); + + if (kfilm_convert->pass_use_exposure) { + return scale * kfilm_convert->exposure; + } + + return scale; +} + +ccl_device_inline bool film_get_scale_and_scale_exposure( + ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert, + ccl_global const float *ccl_restrict buffer, + ccl_private float *ccl_restrict scale, + ccl_private float *ccl_restrict scale_exposure) +{ + if (kfilm_convert->pass_sample_count == PASS_UNUSED) { + *scale = kfilm_convert->scale; + *scale_exposure = kfilm_convert->scale_exposure; + return true; + } + + const uint sample_count = *( + (ccl_global const uint *)(buffer + kfilm_convert->pass_sample_count)); + if (!sample_count) { + *scale = 0.0f; + *scale_exposure = 0.0f; + return false; + } + + if (kfilm_convert->pass_use_filter) { + *scale = 1.0f / sample_count; + } + else { + *scale = 1.0f; + } + + if (kfilm_convert->pass_use_exposure) { + *scale_exposure = *scale * kfilm_convert->exposure; + } + else { + *scale_exposure = *scale; + } + + return true; +} + +/* -------------------------------------------------------------------- + * Float (scalar) passes. + */ + +ccl_device_inline void film_get_pass_pixel_depth(ccl_global const KernelFilmConvert *ccl_restrict + kfilm_convert, + ccl_global const float *ccl_restrict buffer, + ccl_private float *ccl_restrict pixel) +{ + kernel_assert(kfilm_convert->num_components >= 1); + kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED); + + const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer); + + ccl_global const float *in = buffer + kfilm_convert->pass_offset; + const float f = *in; + + pixel[0] = (f == 0.0f) ? 1e10f : f * scale_exposure; +} + +ccl_device_inline void film_get_pass_pixel_mist(ccl_global const KernelFilmConvert *ccl_restrict + kfilm_convert, + ccl_global const float *ccl_restrict buffer, + ccl_private float *ccl_restrict pixel) +{ + kernel_assert(kfilm_convert->num_components >= 1); + kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED); + + const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer); + + ccl_global const float *in = buffer + kfilm_convert->pass_offset; + const float f = *in; + + /* Note that we accumulate 1 - mist in the kernel to avoid having to + * track the mist values in the integrator state. */ + pixel[0] = saturate(1.0f - f * scale_exposure); +} + +ccl_device_inline void film_get_pass_pixel_sample_count( + ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert, + ccl_global const float *ccl_restrict buffer, + ccl_private float *ccl_restrict pixel) +{ + /* TODO(sergey): Consider normalizing into the [0..1] range, so that it is possible to see + * meaningful value when adaptive sampler stopped rendering image way before the maximum + * number of samples was reached (for examples when number of samples is set to 0 in + * viewport). */ + + kernel_assert(kfilm_convert->num_components >= 1); + kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED); + + ccl_global const float *in = buffer + kfilm_convert->pass_offset; + const float f = *in; + + pixel[0] = __float_as_uint(f) * kfilm_convert->scale; +} + +ccl_device_inline void film_get_pass_pixel_float(ccl_global const KernelFilmConvert *ccl_restrict + kfilm_convert, + ccl_global const float *ccl_restrict buffer, + ccl_private float *ccl_restrict pixel) +{ + kernel_assert(kfilm_convert->num_components >= 1); + kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED); + + const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer); + + ccl_global const float *in = buffer + kfilm_convert->pass_offset; + const float f = *in; + + pixel[0] = f * scale_exposure; +} + +/* -------------------------------------------------------------------- + * Float 3 passes. + */ + +ccl_device_inline void film_get_pass_pixel_light_path( + ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert, + ccl_global const float *ccl_restrict buffer, + ccl_private float *ccl_restrict pixel) +{ + kernel_assert(kfilm_convert->num_components >= 3); + kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED); + + /* Read light pass. */ + ccl_global const float *in = buffer + kfilm_convert->pass_offset; + float3 f = make_float3(in[0], in[1], in[2]); + + /* Optionally add indirect light pass. */ + if (kfilm_convert->pass_indirect != PASS_UNUSED) { + ccl_global const float *in_indirect = buffer + kfilm_convert->pass_indirect; + const float3 f_indirect = make_float3(in_indirect[0], in_indirect[1], in_indirect[2]); + f += f_indirect; + } + + /* Optionally divide out color. */ + if (kfilm_convert->pass_divide != PASS_UNUSED) { + ccl_global const float *in_divide = buffer + kfilm_convert->pass_divide; + const float3 f_divide = make_float3(in_divide[0], in_divide[1], in_divide[2]); + f = safe_divide_even_color(f, f_divide); + + /* Exposure only, sample scale cancels out. */ + f *= kfilm_convert->exposure; + } + else { + /* Sample scale and exposure. */ + f *= film_get_scale_exposure(kfilm_convert, buffer); + } + + pixel[0] = f.x; + pixel[1] = f.y; + pixel[2] = f.z; +} + +ccl_device_inline void film_get_pass_pixel_float3(ccl_global const KernelFilmConvert *ccl_restrict + kfilm_convert, + ccl_global const float *ccl_restrict buffer, + ccl_private float *ccl_restrict pixel) +{ + kernel_assert(kfilm_convert->num_components >= 3); + kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED); + + const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer); + + ccl_global const float *in = buffer + kfilm_convert->pass_offset; + + const float3 f = make_float3(in[0], in[1], in[2]) * scale_exposure; + + pixel[0] = f.x; + pixel[1] = f.y; + pixel[2] = f.z; +} + +/* -------------------------------------------------------------------- + * Float4 passes. + */ + +ccl_device_inline void film_get_pass_pixel_motion(ccl_global const KernelFilmConvert *ccl_restrict + kfilm_convert, + ccl_global const float *ccl_restrict buffer, + ccl_private float *ccl_restrict pixel) +{ + kernel_assert(kfilm_convert->num_components == 4); + kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED); + kernel_assert(kfilm_convert->pass_motion_weight != PASS_UNUSED); + + ccl_global const float *in = buffer + kfilm_convert->pass_offset; + ccl_global const float *in_weight = buffer + kfilm_convert->pass_motion_weight; + + const float weight = in_weight[0]; + const float weight_inv = (weight > 0.0f) ? 1.0f / weight : 0.0f; + + const float4 motion = make_float4(in[0], in[1], in[2], in[3]) * weight_inv; + + pixel[0] = motion.x; + pixel[1] = motion.y; + pixel[2] = motion.z; + pixel[3] = motion.w; +} + +ccl_device_inline void film_get_pass_pixel_cryptomatte( + ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert, + ccl_global const float *ccl_restrict buffer, + ccl_private float *ccl_restrict pixel) +{ + kernel_assert(kfilm_convert->num_components == 4); + kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED); + + const float scale = film_get_scale(kfilm_convert, buffer); + + ccl_global const float *in = buffer + kfilm_convert->pass_offset; + + const float4 f = make_float4(in[0], in[1], in[2], in[3]); + + /* x and z contain integer IDs, don't rescale them. + * y and w contain matte weights, they get scaled. */ + pixel[0] = f.x; + pixel[1] = f.y * scale; + pixel[2] = f.z; + pixel[3] = f.w * scale; +} + +ccl_device_inline void film_get_pass_pixel_float4(ccl_global const KernelFilmConvert *ccl_restrict + kfilm_convert, + ccl_global const float *ccl_restrict buffer, + ccl_private float *ccl_restrict pixel) +{ + kernel_assert(kfilm_convert->num_components == 4); + kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED); + + float scale, scale_exposure; + film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure); + + ccl_global const float *in = buffer + kfilm_convert->pass_offset; + + const float3 color = make_float3(in[0], in[1], in[2]) * scale_exposure; + const float alpha = in[3] * scale; + + pixel[0] = color.x; + pixel[1] = color.y; + pixel[2] = color.z; + pixel[3] = alpha; +} + +ccl_device_inline void film_get_pass_pixel_combined( + ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert, + ccl_global const float *ccl_restrict buffer, + ccl_private float *ccl_restrict pixel) +{ + kernel_assert(kfilm_convert->num_components == 4); + + /* 3rd channel contains transparency = 1 - alpha for the combined pass. */ + + kernel_assert(kfilm_convert->num_components == 4); + kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED); + + float scale, scale_exposure; + if (!film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure)) { + pixel[0] = 0.0f; + pixel[1] = 0.0f; + pixel[2] = 0.0f; + pixel[3] = 0.0f; + return; + } + + ccl_global const float *in = buffer + kfilm_convert->pass_offset; + + const float3 color = make_float3(in[0], in[1], in[2]) * scale_exposure; + const float alpha = in[3] * scale; + + pixel[0] = color.x; + pixel[1] = color.y; + pixel[2] = color.z; + pixel[3] = film_transparency_to_alpha(alpha); +} + +/* -------------------------------------------------------------------- + * Shadow catcher. + */ + +ccl_device_inline float3 film_calculate_shadow_catcher_denoised( + ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert, + ccl_global const float *ccl_restrict buffer) +{ + kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED); + + float scale, scale_exposure; + film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure); + + ccl_global const float *in_catcher = buffer + kfilm_convert->pass_shadow_catcher; + + const float3 pixel = make_float3(in_catcher[0], in_catcher[1], in_catcher[2]) * scale_exposure; + + return pixel; +} + +ccl_device_inline float3 safe_divide_shadow_catcher(float3 a, float3 b) +{ + float x, y, z; + + x = (b.x != 0.0f) ? a.x / b.x : 1.0f; + y = (b.y != 0.0f) ? a.y / b.y : 1.0f; + z = (b.z != 0.0f) ? a.z / b.z : 1.0f; + + return make_float3(x, y, z); +} + +ccl_device_inline float3 +film_calculate_shadow_catcher(ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert, + ccl_global const float *ccl_restrict buffer) +{ + /* For the shadow catcher pass we divide combined pass by the shadow catcher. + * Note that denoised shadow catcher pass contains value which only needs ot be scaled (but not + * to be calculated as division). */ + + if (kfilm_convert->is_denoised) { + return film_calculate_shadow_catcher_denoised(kfilm_convert, buffer); + } + + kernel_assert(kfilm_convert->pass_shadow_catcher_sample_count != PASS_UNUSED); + + /* If there is no shadow catcher object in this pixel, there is no modification of the light + * needed, so return one. */ + ccl_global const float *in_catcher_sample_count = + buffer + kfilm_convert->pass_shadow_catcher_sample_count; + const float num_samples = in_catcher_sample_count[0]; + if (num_samples == 0.0f) { + return one_float3(); + } + + kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED); + ccl_global const float *in_catcher = buffer + kfilm_convert->pass_shadow_catcher; + + /* NOTE: It is possible that the Shadow Catcher pass is requested as an output without actual + * shadow catcher objects in the scene. In this case there will be no auxiliary passes required + * for the decision (to save up memory). So delay the asserts to this point so that the number of + * samples check handles such configuration. */ + kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED); + kernel_assert(kfilm_convert->pass_combined != PASS_UNUSED); + kernel_assert(kfilm_convert->pass_shadow_catcher_matte != PASS_UNUSED); + + ccl_global const float *in_combined = buffer + kfilm_convert->pass_combined; + ccl_global const float *in_matte = buffer + kfilm_convert->pass_shadow_catcher_matte; + + /* No scaling needed. The integration works in way that number of samples in the combined and + * shadow catcher passes are the same, and exposure is canceled during the division. */ + const float3 color_catcher = make_float3(in_catcher[0], in_catcher[1], in_catcher[2]); + const float3 color_combined = make_float3(in_combined[0], in_combined[1], in_combined[2]); + const float3 color_matte = make_float3(in_matte[0], in_matte[1], in_matte[2]); + + /* Need to ignore contribution of the matte object when doing division (otherwise there will be + * artifacts caused by anti-aliasing). Since combined pass is used for adaptive sampling and need + * to contain matte objects, we subtract matte objects contribution here. This is the same as if + * the matte objects were not accumulated to the combined pass. */ + const float3 combined_no_matte = color_combined - color_matte; + + const float3 shadow_catcher = safe_divide_shadow_catcher(combined_no_matte, color_catcher); + + const float scale = film_get_scale(kfilm_convert, buffer); + const float transparency = in_combined[3] * scale; + const float alpha = film_transparency_to_alpha(transparency); + + /* Alpha-over on white using transparency of the combined pass. This allows to eliminate + * artifacts which are happening on an edge of a shadow catcher when using transparent film. + * Note that we treat shadow catcher as straight alpha here because alpha got canceled out + * during the division. */ + const float3 pixel = (1.0f - alpha) * one_float3() + alpha * shadow_catcher; + + return pixel; +} + +ccl_device_inline float4 film_calculate_shadow_catcher_matte_with_shadow( + ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert, + ccl_global const float *ccl_restrict buffer) +{ + /* The approximation of the shadow is 1 - average(shadow_catcher_pass). A better approximation + * is possible. + * + * The matte is alpha-overed onto the shadow (which is kind of alpha-overing shadow onto footage, + * and then alpha-overing synthetic objects on top). */ + + kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED); + kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED); + kernel_assert(kfilm_convert->pass_shadow_catcher_matte != PASS_UNUSED); + + float scale, scale_exposure; + if (!film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure)) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + ccl_global const float *in_matte = buffer + kfilm_convert->pass_shadow_catcher_matte; + + const float3 shadow_catcher = film_calculate_shadow_catcher(kfilm_convert, buffer); + const float3 color_matte = make_float3(in_matte[0], in_matte[1], in_matte[2]) * scale_exposure; + + const float transparency = in_matte[3] * scale; + const float alpha = saturate(1.0f - transparency); + + const float alpha_matte = (1.0f - alpha) * (1.0f - average(shadow_catcher)) + alpha; + + if (kfilm_convert->use_approximate_shadow_catcher_background) { + kernel_assert(kfilm_convert->pass_background != PASS_UNUSED); + + ccl_global const float *in_background = buffer + kfilm_convert->pass_background; + const float3 color_background = make_float3( + in_background[0], in_background[1], in_background[2]) * + scale_exposure; + const float3 alpha_over = color_matte + color_background * (1.0f - alpha_matte); + return make_float4(alpha_over.x, alpha_over.y, alpha_over.z, 1.0f); + } + + return make_float4(color_matte.x, color_matte.y, color_matte.z, alpha_matte); +} + +ccl_device_inline void film_get_pass_pixel_shadow_catcher( + ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert, + ccl_global const float *ccl_restrict buffer, + ccl_private float *ccl_restrict pixel) +{ + kernel_assert(kfilm_convert->num_components >= 3); + + const float3 pixel_value = film_calculate_shadow_catcher(kfilm_convert, buffer); + + pixel[0] = pixel_value.x; + pixel[1] = pixel_value.y; + pixel[2] = pixel_value.z; +} + +ccl_device_inline void film_get_pass_pixel_shadow_catcher_matte_with_shadow( + ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert, + ccl_global const float *ccl_restrict buffer, + ccl_private float *ccl_restrict pixel) +{ + kernel_assert(kfilm_convert->num_components == 3 || kfilm_convert->num_components == 4); + + const float4 pixel_value = film_calculate_shadow_catcher_matte_with_shadow(kfilm_convert, + buffer); + + pixel[0] = pixel_value.x; + pixel[1] = pixel_value.y; + pixel[2] = pixel_value.z; + if (kfilm_convert->num_components == 4) { + pixel[3] = pixel_value.w; + } +} + +/* -------------------------------------------------------------------- + * Compositing and overlays. + */ + +ccl_device_inline void film_apply_pass_pixel_overlays_rgba( + ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert, + ccl_global const float *ccl_restrict buffer, + ccl_private float *ccl_restrict pixel) +{ + if (kfilm_convert->show_active_pixels && + kfilm_convert->pass_adaptive_aux_buffer != PASS_UNUSED) { + if (buffer[kfilm_convert->pass_adaptive_aux_buffer + 3] == 0.0f) { + const float3 active_rgb = make_float3(1.0f, 0.0f, 0.0f); + const float3 mix_rgb = interp(make_float3(pixel[0], pixel[1], pixel[2]), active_rgb, 0.5f); + pixel[0] = mix_rgb.x; + pixel[1] = mix_rgb.y; + pixel[2] = mix_rgb.z; + } + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/film/film_write_passes.h b/intern/cycles/kernel/film/film_write_passes.h new file mode 100644 index 00000000000..9d379495629 --- /dev/null +++ b/intern/cycles/kernel/film/film_write_passes.h @@ -0,0 +1,88 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifdef __KERNEL_GPU__ +# define __ATOMIC_PASS_WRITE__ +#endif + +CCL_NAMESPACE_BEGIN + +ccl_device_inline void kernel_write_pass_float(ccl_global float *ccl_restrict buffer, float value) +{ +#ifdef __ATOMIC_PASS_WRITE__ + atomic_add_and_fetch_float(buffer, value); +#else + *buffer += value; +#endif +} + +ccl_device_inline void kernel_write_pass_float3(ccl_global float *ccl_restrict buffer, + float3 value) +{ +#ifdef __ATOMIC_PASS_WRITE__ + ccl_global float *buf_x = buffer + 0; + ccl_global float *buf_y = buffer + 1; + ccl_global float *buf_z = buffer + 2; + + atomic_add_and_fetch_float(buf_x, value.x); + atomic_add_and_fetch_float(buf_y, value.y); + atomic_add_and_fetch_float(buf_z, value.z); +#else + buffer[0] += value.x; + buffer[1] += value.y; + buffer[2] += value.z; +#endif +} + +ccl_device_inline void kernel_write_pass_float4(ccl_global float *ccl_restrict buffer, + float4 value) +{ +#ifdef __ATOMIC_PASS_WRITE__ + ccl_global float *buf_x = buffer + 0; + ccl_global float *buf_y = buffer + 1; + ccl_global float *buf_z = buffer + 2; + ccl_global float *buf_w = buffer + 3; + + atomic_add_and_fetch_float(buf_x, value.x); + atomic_add_and_fetch_float(buf_y, value.y); + atomic_add_and_fetch_float(buf_z, value.z); + atomic_add_and_fetch_float(buf_w, value.w); +#else + buffer[0] += value.x; + buffer[1] += value.y; + buffer[2] += value.z; + buffer[3] += value.w; +#endif +} + +ccl_device_inline float kernel_read_pass_float(ccl_global float *ccl_restrict buffer) +{ + return *buffer; +} + +ccl_device_inline float3 kernel_read_pass_float3(ccl_global float *ccl_restrict buffer) +{ + return make_float3(buffer[0], buffer[1], buffer[2]); +} + +ccl_device_inline float4 kernel_read_pass_float4(ccl_global float *ccl_restrict buffer) +{ + return make_float4(buffer[0], buffer[1], buffer[2], buffer[3]); +} + +CCL_NAMESPACE_END |