diff options
Diffstat (limited to 'intern/cycles/kernel/kernel_accumulate.h')
-rw-r--r-- | intern/cycles/kernel/kernel_accumulate.h | 972 |
1 files changed, 361 insertions, 611 deletions
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h index 61653d328f1..9e12d24dcf4 100644 --- a/intern/cycles/kernel/kernel_accumulate.h +++ b/intern/cycles/kernel/kernel_accumulate.h @@ -14,751 +14,501 @@ * limitations under the License. */ +#pragma once + +#include "kernel_adaptive_sampling.h" +#include "kernel_random.h" +#include "kernel_shadow_catcher.h" +#include "kernel_write_passes.h" + CCL_NAMESPACE_BEGIN -/* BSDF Eval +/* -------------------------------------------------------------------- + * BSDF Evaluation * - * BSDF evaluation result, split per BSDF type. This is used to accumulate - * render passes separately. */ - -ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, const ShaderData *sd); + * BSDF evaluation result, split between diffuse and glossy. This is used to + * accumulate render passes separately. Note that reflection, transmission + * and volume scattering are written to different render passes, but we assume + * that only one of those can happen at a bounce, and so do not need to accumulate + * them separately. */ -ccl_device_inline void bsdf_eval_init(BsdfEval *eval, - ClosureType type, - float3 value, - int use_light_pass) +ccl_device_inline void bsdf_eval_init(BsdfEval *eval, const bool is_diffuse, float3 value) { -#ifdef __PASSES__ - eval->use_light_pass = use_light_pass; - - if (eval->use_light_pass) { - eval->diffuse = zero_float3(); - eval->glossy = zero_float3(); - eval->transmission = zero_float3(); - eval->transparent = zero_float3(); - eval->volume = zero_float3(); - - if (type == CLOSURE_BSDF_TRANSPARENT_ID) - eval->transparent = value; - else if (CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_BSDF_BSSRDF(type)) - eval->diffuse = value; - else if (CLOSURE_IS_BSDF_GLOSSY(type)) - eval->glossy = value; - else if (CLOSURE_IS_BSDF_TRANSMISSION(type)) - eval->transmission = value; - else if (CLOSURE_IS_PHASE(type)) - eval->volume = value; - } - else -#endif - { + eval->diffuse = zero_float3(); + eval->glossy = zero_float3(); + + if (is_diffuse) { eval->diffuse = value; } -#ifdef __SHADOW_TRICKS__ - eval->sum_no_mis = zero_float3(); -#endif + else { + eval->glossy = value; + } } ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, - ClosureType type, + const bool is_diffuse, float3 value, float mis_weight) { -#ifdef __SHADOW_TRICKS__ - eval->sum_no_mis += value; -#endif value *= mis_weight; -#ifdef __PASSES__ - if (eval->use_light_pass) { - if (CLOSURE_IS_BSDF_DIFFUSE(type) || CLOSURE_IS_BSDF_BSSRDF(type)) - eval->diffuse += value; - else if (CLOSURE_IS_BSDF_GLOSSY(type)) - eval->glossy += value; - else if (CLOSURE_IS_BSDF_TRANSMISSION(type)) - eval->transmission += value; - else if (CLOSURE_IS_PHASE(type)) - eval->volume += value; - - /* skipping transparent, this function is used by for eval(), will be zero then */ - } - else -#endif - { - eval->diffuse += value; - } -} -ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval) -{ -#ifdef __PASSES__ - if (eval->use_light_pass) { - return is_zero(eval->diffuse) && is_zero(eval->glossy) && is_zero(eval->transmission) && - is_zero(eval->transparent) && is_zero(eval->volume); + if (is_diffuse) { + eval->diffuse += value; } - else -#endif - { - return is_zero(eval->diffuse); + else { + eval->glossy += value; } } -ccl_device_inline void bsdf_eval_mis(BsdfEval *eval, float value) +ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval) { -#ifdef __PASSES__ - if (eval->use_light_pass) { - eval->diffuse *= value; - eval->glossy *= value; - eval->transmission *= value; - eval->volume *= value; - - /* skipping transparent, this function is used by for eval(), will be zero then */ - } - else -#endif - { - eval->diffuse *= value; - } + return is_zero(eval->diffuse) && is_zero(eval->glossy); } ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float value) { -#ifdef __SHADOW_TRICKS__ - eval->sum_no_mis *= value; -#endif - bsdf_eval_mis(eval, value); + eval->diffuse *= value; + eval->glossy *= value; } ccl_device_inline void bsdf_eval_mul3(BsdfEval *eval, float3 value) { -#ifdef __SHADOW_TRICKS__ - eval->sum_no_mis *= value; -#endif -#ifdef __PASSES__ - if (eval->use_light_pass) { - eval->diffuse *= value; - eval->glossy *= value; - eval->transmission *= value; - eval->volume *= value; - - /* skipping transparent, this function is used by for eval(), will be zero then */ - } - else - eval->diffuse *= value; -#else eval->diffuse *= value; -#endif + eval->glossy *= value; } ccl_device_inline float3 bsdf_eval_sum(const BsdfEval *eval) { -#ifdef __PASSES__ - if (eval->use_light_pass) { - return eval->diffuse + eval->glossy + eval->transmission + eval->volume; - } - else -#endif - return eval->diffuse; + return eval->diffuse + eval->glossy; } -/* Path Radiance - * - * We accumulate different render passes separately. After summing at the end - * to get the combined result, it should be identical. We definite directly - * visible as the first non-transparent hit, while indirectly visible are the - * bounces after that. */ - -ccl_device_inline void path_radiance_init(KernelGlobals *kg, PathRadiance *L) +ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(const BsdfEval *eval) { - /* clear all */ -#ifdef __PASSES__ - L->use_light_pass = kernel_data.film.use_light_pass; - - if (kernel_data.film.use_light_pass) { - L->indirect = zero_float3(); - L->direct_emission = zero_float3(); - - L->color_diffuse = zero_float3(); - L->color_glossy = zero_float3(); - L->color_transmission = zero_float3(); - - L->direct_diffuse = zero_float3(); - L->direct_glossy = zero_float3(); - L->direct_transmission = zero_float3(); - L->direct_volume = zero_float3(); - - L->indirect_diffuse = zero_float3(); - L->indirect_glossy = zero_float3(); - L->indirect_transmission = zero_float3(); - L->indirect_volume = zero_float3(); - - L->transparent = 0.0f; - L->emission = zero_float3(); - L->background = zero_float3(); - L->ao = zero_float3(); - L->shadow = zero_float3(); - L->mist = 0.0f; - - L->state.diffuse = zero_float3(); - L->state.glossy = zero_float3(); - L->state.transmission = zero_float3(); - L->state.volume = zero_float3(); - L->state.direct = zero_float3(); - } - else -#endif - { - L->transparent = 0.0f; - L->emission = zero_float3(); - } - -#ifdef __SHADOW_TRICKS__ - L->path_total = zero_float3(); - L->path_total_shaded = zero_float3(); - L->shadow_background_color = zero_float3(); - L->shadow_throughput = 0.0f; - L->shadow_transparency = 1.0f; - L->has_shadow_catcher = 0; -#endif - -#ifdef __DENOISING_FEATURES__ - L->denoising_normal = zero_float3(); - L->denoising_albedo = zero_float3(); - L->denoising_depth = 0.0f; -#endif + /* Ratio of diffuse and glossy to recover proportions for writing to render pass. + * We assume reflection, transmission and volume scatter to be exclusive. */ + return safe_divide_float3_float3(eval->diffuse, eval->diffuse + eval->glossy); } -ccl_device_inline void path_radiance_bsdf_bounce(KernelGlobals *kg, - PathRadianceState *L_state, - ccl_addr_space float3 *throughput, - BsdfEval *bsdf_eval, - float bsdf_pdf, - int bounce, - int bsdf_label) -{ - float inverse_pdf = 1.0f / bsdf_pdf; - -#ifdef __PASSES__ - if (kernel_data.film.use_light_pass) { - if (bounce == 0 && !(bsdf_label & LABEL_TRANSPARENT)) { - /* first on directly visible surface */ - float3 value = *throughput * inverse_pdf; - - L_state->diffuse = bsdf_eval->diffuse * value; - L_state->glossy = bsdf_eval->glossy * value; - L_state->transmission = bsdf_eval->transmission * value; - L_state->volume = bsdf_eval->volume * value; - - *throughput = L_state->diffuse + L_state->glossy + L_state->transmission + L_state->volume; +/* -------------------------------------------------------------------- + * Clamping + * + * Clamping is done on a per-contribution basis so that we can write directly + * to render buffers instead of using per-thread memory, and to avoid the + * impact of clamping on other contributions. */ - L_state->direct = *throughput; - } - else { - /* transparent bounce before first hit, or indirectly visible through BSDF */ - float3 sum = (bsdf_eval_sum(bsdf_eval) + bsdf_eval->transparent) * inverse_pdf; - *throughput *= sum; - } +ccl_device_forceinline void kernel_accum_clamp(const KernelGlobals *kg, float3 *L, int bounce) +{ +#ifdef __KERNEL_DEBUG_NAN__ + if (!isfinite3_safe(*L)) { + kernel_assert(!"Cycles sample with non-finite value detected"); } - else #endif - { - *throughput *= bsdf_eval->diffuse * inverse_pdf; - } -} + /* Make sure all components are finite, allowing the contribution to be usable by adaptive + * sampling convergence check, but also to make it so render result never causes issues with + * post-processing. */ + *L = ensure_finite3(*L); #ifdef __CLAMP_SAMPLE__ -ccl_device_forceinline void path_radiance_clamp(KernelGlobals *kg, float3 *L, int bounce) -{ float limit = (bounce > 0) ? kernel_data.integrator.sample_clamp_indirect : kernel_data.integrator.sample_clamp_direct; float sum = reduce_add(fabs(*L)); if (sum > limit) { *L *= limit / sum; } +#endif } -ccl_device_forceinline void path_radiance_clamp_throughput(KernelGlobals *kg, - float3 *L, - float3 *throughput, - int bounce) -{ - float limit = (bounce > 0) ? kernel_data.integrator.sample_clamp_indirect : - kernel_data.integrator.sample_clamp_direct; +/* -------------------------------------------------------------------- + * Pass accumulation utilities. + */ - float sum = reduce_add(fabs(*L)); - if (sum > limit) { - float clamp_factor = limit / sum; - *L *= clamp_factor; - *throughput *= clamp_factor; - } +/* Get pointer to pixel in render buffer. */ +ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer( + INTEGRATOR_STATE_CONST_ARGS, ccl_global float *ccl_restrict render_buffer) +{ + const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index); + const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * + kernel_data.film.pass_stride; + return render_buffer + render_buffer_offset; } -#endif +/* -------------------------------------------------------------------- + * Adaptive sampling. + */ -ccl_device_inline void path_radiance_accum_emission(KernelGlobals *kg, - PathRadiance *L, - ccl_addr_space PathState *state, - float3 throughput, - float3 value) +ccl_device_inline int kernel_accum_sample(INTEGRATOR_STATE_CONST_ARGS, + ccl_global float *ccl_restrict render_buffer, + int sample) { -#ifdef __SHADOW_TRICKS__ - if (state->flag & PATH_RAY_SHADOW_CATCHER) { - return; + if (kernel_data.film.pass_sample_count == PASS_UNUSED) { + return sample; } -#endif - float3 contribution = throughput * value; -#ifdef __CLAMP_SAMPLE__ - path_radiance_clamp(kg, &contribution, state->bounce - 1); -#endif + ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS, + render_buffer); -#ifdef __PASSES__ - if (L->use_light_pass) { - if (state->bounce == 0) - L->emission += contribution; - else if (state->bounce == 1) - L->direct_emission += contribution; - else - L->indirect += contribution; - } - else -#endif - { - L->emission += contribution; - } + return atomic_fetch_and_add_uint32((uint *)(buffer) + kernel_data.film.pass_sample_count, 1); } -ccl_device_inline void path_radiance_accum_ao(KernelGlobals *kg, - PathRadiance *L, - ccl_addr_space PathState *state, - float3 throughput, - float3 alpha, - float3 bsdf, - float3 ao) +ccl_device void kernel_accum_adaptive_buffer(INTEGRATOR_STATE_CONST_ARGS, + const float3 contribution, + ccl_global float *ccl_restrict buffer) { -#ifdef __PASSES__ - /* Store AO pass. */ - if (L->use_light_pass && state->bounce == 0) { - L->ao += alpha * throughput * ao; - } -#endif - -#ifdef __SHADOW_TRICKS__ - /* For shadow catcher, accumulate ratio. */ - if (state->flag & PATH_RAY_STORE_SHADOW_INFO) { - float3 light = throughput * bsdf; - L->path_total += light; - L->path_total_shaded += ao * light; + /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping + * criteria. This is the heuristic from "A hierarchical automatic stopping condition for Monte + * Carlo global illumination" except that here it is applied per pixel and not in hierarchical + * tiles. */ - if (state->flag & PATH_RAY_SHADOW_CATCHER) { - return; - } + if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) { + return; } -#endif - - float3 contribution = throughput * bsdf * ao; -#ifdef __PASSES__ - if (L->use_light_pass) { - if (state->bounce == 0) { - /* Directly visible lighting. */ - L->direct_diffuse += contribution; - } - else { - /* Indirectly visible lighting after BSDF bounce. */ - L->indirect += contribution; - } - } - else -#endif - { - L->emission += contribution; + const int sample = INTEGRATOR_STATE(path, sample); + if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) { + kernel_write_pass_float4( + buffer + kernel_data.film.pass_adaptive_aux_buffer, + make_float4(contribution.x * 2.0f, contribution.y * 2.0f, contribution.z * 2.0f, 0.0f)); } } -ccl_device_inline void path_radiance_accum_total_ao(PathRadiance *L, - ccl_addr_space PathState *state, - float3 throughput, - float3 bsdf) -{ -#ifdef __SHADOW_TRICKS__ - if (state->flag & PATH_RAY_STORE_SHADOW_INFO) { - L->path_total += throughput * bsdf; - } -#else - (void)L; - (void)state; - (void)throughput; - (void)bsdf; -#endif -} +/* -------------------------------------------------------------------- + * Shadow catcher. + */ + +#ifdef __SHADOW_CATCHER__ -ccl_device_inline void path_radiance_accum_light(KernelGlobals *kg, - PathRadiance *L, - ccl_addr_space PathState *state, - float3 throughput, - BsdfEval *bsdf_eval, - float3 shadow, - float shadow_fac, - bool is_lamp) +/* Accumulate contribution to the Shadow Catcher pass. + * + * Returns truth if the contribution is fully handled here and is not to be added to the other + * passes (like combined, adaptive sampling). */ + +ccl_device bool kernel_accum_shadow_catcher(INTEGRATOR_STATE_CONST_ARGS, + const float3 contribution, + ccl_global float *ccl_restrict buffer) { -#ifdef __SHADOW_TRICKS__ - if (state->flag & PATH_RAY_STORE_SHADOW_INFO) { - float3 light = throughput * bsdf_eval->sum_no_mis; - L->path_total += light; - L->path_total_shaded += shadow * light; - - if (state->flag & PATH_RAY_SHADOW_CATCHER) { - return; - } + if (!kernel_data.integrator.has_shadow_catcher) { + return false; } -#endif - float3 shaded_throughput = throughput * shadow; + kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED); + kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); -#ifdef __PASSES__ - if (L->use_light_pass) { - /* Compute the clamping based on the total contribution. - * The resulting scale is then be applied to all individual components. */ - float3 full_contribution = shaded_throughput * bsdf_eval_sum(bsdf_eval); -# ifdef __CLAMP_SAMPLE__ - path_radiance_clamp_throughput(kg, &full_contribution, &shaded_throughput, state->bounce); -# endif - - if (state->bounce == 0) { - /* directly visible lighting */ - L->direct_diffuse += shaded_throughput * bsdf_eval->diffuse; - L->direct_glossy += shaded_throughput * bsdf_eval->glossy; - L->direct_transmission += shaded_throughput * bsdf_eval->transmission; - L->direct_volume += shaded_throughput * bsdf_eval->volume; - - if (is_lamp) { - L->shadow += shadow * shadow_fac; - } - } - else { - /* indirectly visible lighting after BSDF bounce */ - L->indirect += full_contribution; - } + /* Matte pass. */ + if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) { + kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher_matte, contribution); + /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive + * sampling is based on how noisy the combined pass is as if there were no catchers in the + * scene. */ } - else -#endif - { - float3 contribution = shaded_throughput * bsdf_eval->diffuse; - path_radiance_clamp(kg, &contribution, state->bounce); - L->emission += contribution; + + /* Shadow catcher pass. */ + if (kernel_shadow_catcher_is_object_pass(INTEGRATOR_STATE_PASS)) { + kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution); + return true; } -} -ccl_device_inline void path_radiance_accum_total_light(PathRadiance *L, - ccl_addr_space PathState *state, - float3 throughput, - const BsdfEval *bsdf_eval) -{ -#ifdef __SHADOW_TRICKS__ - if (state->flag & PATH_RAY_STORE_SHADOW_INFO) { - L->path_total += throughput * bsdf_eval->sum_no_mis; - } -#else - (void)L; - (void)state; - (void)throughput; - (void)bsdf_eval; -#endif + return false; } -ccl_device_inline void path_radiance_accum_background(KernelGlobals *kg, - PathRadiance *L, - ccl_addr_space PathState *state, - float3 throughput, - float3 value) +ccl_device bool kernel_accum_shadow_catcher_transparent(INTEGRATOR_STATE_CONST_ARGS, + const float3 contribution, + const float transparent, + ccl_global float *ccl_restrict buffer) { + if (!kernel_data.integrator.has_shadow_catcher) { + return false; + } -#ifdef __SHADOW_TRICKS__ - if (state->flag & PATH_RAY_STORE_SHADOW_INFO) { - L->path_total += throughput * value; - L->path_total_shaded += throughput * value * L->shadow_transparency; + kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED); + kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); - if (state->flag & PATH_RAY_SHADOW_CATCHER) { - return; - } + if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) { + return true; } -#endif - float3 contribution = throughput * value; -#ifdef __CLAMP_SAMPLE__ - path_radiance_clamp(kg, &contribution, state->bounce - 1); -#endif + /* Matte pass. */ + if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) { + kernel_write_pass_float4( + buffer + kernel_data.film.pass_shadow_catcher_matte, + make_float4(contribution.x, contribution.y, contribution.z, transparent)); + /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive + * sampling is based on how noisy the combined pass is as if there were no catchers in the + * scene. */ + } -#ifdef __PASSES__ - if (L->use_light_pass) { - if (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND) - L->background += contribution; - else if (state->bounce == 1) - L->direct_emission += contribution; - else - L->indirect += contribution; - } - else -#endif - { - L->emission += contribution; + /* Shadow catcher pass. */ + if (kernel_shadow_catcher_is_object_pass(INTEGRATOR_STATE_PASS)) { + /* NOTE: The transparency of the shadow catcher pass is ignored. It is not needed for the + * calculation and the alpha channel of the pass contains numbers of samples contributed to a + * pixel of the pass. */ + kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution); + return true; } -#ifdef __DENOISING_FEATURES__ - L->denoising_albedo += state->denoising_feature_weight * state->denoising_feature_throughput * - value; -#endif /* __DENOISING_FEATURES__ */ + return false; } -ccl_device_inline void path_radiance_accum_transparent(PathRadiance *L, - ccl_addr_space PathState *state, - float3 throughput) +ccl_device void kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_CONST_ARGS, + const float transparent, + ccl_global float *ccl_restrict buffer) { - L->transparent += average(throughput); -} + if (!kernel_data.integrator.has_shadow_catcher) { + return; + } -#ifdef __SHADOW_TRICKS__ -ccl_device_inline void path_radiance_accum_shadowcatcher(PathRadiance *L, - float3 throughput, - float3 background) -{ - L->shadow_throughput += average(throughput); - L->shadow_background_color += throughput * background; - L->has_shadow_catcher = 1; -} -#endif + kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); -ccl_device_inline void path_radiance_sum_indirect(PathRadiance *L) -{ -#ifdef __PASSES__ - /* this division is a bit ugly, but means we only have to keep track of - * only a single throughput further along the path, here we recover just - * the indirect path that is not influenced by any particular BSDF type */ - if (L->use_light_pass) { - L->direct_emission = safe_divide_color(L->direct_emission, L->state.direct); - L->direct_diffuse += L->state.diffuse * L->direct_emission; - L->direct_glossy += L->state.glossy * L->direct_emission; - L->direct_transmission += L->state.transmission * L->direct_emission; - L->direct_volume += L->state.volume * L->direct_emission; - - L->indirect = safe_divide_color(L->indirect, L->state.direct); - L->indirect_diffuse += L->state.diffuse * L->indirect; - L->indirect_glossy += L->state.glossy * L->indirect; - L->indirect_transmission += L->state.transmission * L->indirect; - L->indirect_volume += L->state.volume * L->indirect; + /* Matte pass. */ + if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) { + kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, transparent); } -#endif } -ccl_device_inline void path_radiance_reset_indirect(PathRadiance *L) -{ -#ifdef __PASSES__ - if (L->use_light_pass) { - L->state.diffuse = zero_float3(); - L->state.glossy = zero_float3(); - L->state.transmission = zero_float3(); - L->state.volume = zero_float3(); +#endif /* __SHADOW_CATCHER__ */ + +/* -------------------------------------------------------------------- + * Render passes. + */ - L->direct_emission = zero_float3(); - L->indirect = zero_float3(); +/* Write combined pass. */ +ccl_device_inline void kernel_accum_combined_pass(INTEGRATOR_STATE_CONST_ARGS, + const float3 contribution, + ccl_global float *ccl_restrict buffer) +{ +#ifdef __SHADOW_CATCHER__ + if (kernel_accum_shadow_catcher(INTEGRATOR_STATE_PASS, contribution, buffer)) { + return; } #endif + + if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) { + kernel_write_pass_float3(buffer + kernel_data.film.pass_combined, contribution); + } + + kernel_accum_adaptive_buffer(INTEGRATOR_STATE_PASS, contribution, buffer); } -ccl_device_inline void path_radiance_copy_indirect(PathRadiance *L, const PathRadiance *L_src) +/* Write combined pass with transparency. */ +ccl_device_inline void kernel_accum_combined_transparent_pass(INTEGRATOR_STATE_CONST_ARGS, + const float3 contribution, + const float transparent, + ccl_global float *ccl_restrict + buffer) { -#ifdef __PASSES__ - if (L->use_light_pass) { - L->state = L_src->state; - - L->direct_emission = L_src->direct_emission; - L->indirect = L_src->indirect; +#ifdef __SHADOW_CATCHER__ + if (kernel_accum_shadow_catcher_transparent( + INTEGRATOR_STATE_PASS, contribution, transparent, buffer)) { + return; } #endif + + if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) { + kernel_write_pass_float4( + buffer + kernel_data.film.pass_combined, + make_float4(contribution.x, contribution.y, contribution.z, transparent)); + } + + kernel_accum_adaptive_buffer(INTEGRATOR_STATE_PASS, contribution, buffer); } -#ifdef __SHADOW_TRICKS__ -ccl_device_inline void path_radiance_sum_shadowcatcher(KernelGlobals *kg, - PathRadiance *L, - float3 *L_sum, - float *alpha) +/* Write background or emission to appropriate pass. */ +ccl_device_inline void kernel_accum_emission_or_background_pass(INTEGRATOR_STATE_CONST_ARGS, + float3 contribution, + ccl_global float *ccl_restrict + buffer, + const int pass) { - /* Calculate current shadow of the path. */ - float path_total = average(L->path_total); - float shadow; + if (!(kernel_data.film.light_pass_flag & PASS_ANY)) { + return; + } - if (UNLIKELY(!isfinite_safe(path_total))) { -# ifdef __KERNEL_DEBUG_NAN__ - kernel_assert(!"Non-finite total radiance along the path"); -# endif - shadow = 0.0f; +#ifdef __PASSES__ + const int path_flag = INTEGRATOR_STATE(path, flag); + int pass_offset = PASS_UNUSED; + + /* Denoising albedo. */ +# ifdef __DENOISING_FEATURES__ + if (path_flag & PATH_RAY_DENOISING_FEATURES) { + if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) { + const float3 denoising_feature_throughput = INTEGRATOR_STATE(path, + denoising_feature_throughput); + const float3 denoising_albedo = denoising_feature_throughput * contribution; + kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); + } } - else if (path_total == 0.0f) { - shadow = L->shadow_transparency; +# endif /* __DENOISING_FEATURES__ */ + + if (!(path_flag & PATH_RAY_ANY_PASS)) { + /* Directly visible, write to emission or background pass. */ + pass_offset = pass; + } + else if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) { + /* Indirectly visible through reflection. */ + const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ? + ((INTEGRATOR_STATE(path, bounce) == 1) ? + kernel_data.film.pass_glossy_direct : + kernel_data.film.pass_glossy_indirect) : + ((INTEGRATOR_STATE(path, bounce) == 1) ? + kernel_data.film.pass_transmission_direct : + kernel_data.film.pass_transmission_indirect); + + if (glossy_pass_offset != PASS_UNUSED) { + /* Glossy is a subset of the throughput, reconstruct it here using the + * diffuse-glossy ratio. */ + const float3 ratio = INTEGRATOR_STATE(path, diffuse_glossy_ratio); + const float3 glossy_contribution = (one_float3() - ratio) * contribution; + kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution); + } + + /* Reconstruct diffuse subset of throughput. */ + pass_offset = (INTEGRATOR_STATE(path, bounce) == 1) ? kernel_data.film.pass_diffuse_direct : + kernel_data.film.pass_diffuse_indirect; + if (pass_offset != PASS_UNUSED) { + contribution *= INTEGRATOR_STATE(path, diffuse_glossy_ratio); + } } - else { - float path_total_shaded = average(L->path_total_shaded); - shadow = path_total_shaded / path_total; + else if (path_flag & PATH_RAY_VOLUME_PASS) { + /* Indirectly visible through volume. */ + pass_offset = (INTEGRATOR_STATE(path, bounce) == 1) ? kernel_data.film.pass_volume_direct : + kernel_data.film.pass_volume_indirect; } - /* Calculate final light sum and transparency for shadow catcher object. */ - if (kernel_data.background.transparent) { - *alpha -= L->shadow_throughput * shadow; - } - else { - L->shadow_background_color *= shadow; - *L_sum += L->shadow_background_color; + /* Single write call for GPU coherence. */ + if (pass_offset != PASS_UNUSED) { + kernel_write_pass_float3(buffer + pass_offset, contribution); } +#endif /* __PASSES__ */ } -#endif -ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, - PathRadiance *L, - float *alpha) +/* Write light contribution to render buffer. */ +ccl_device_inline void kernel_accum_light(INTEGRATOR_STATE_CONST_ARGS, + ccl_global float *ccl_restrict render_buffer) { - float3 L_sum; - /* Light Passes are used */ + /* The throughput for shadow paths already contains the light shader evaluation. */ + float3 contribution = INTEGRATOR_STATE(shadow_path, throughput); + kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(shadow_path, bounce) - 1); + + ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS, + render_buffer); + + kernel_accum_combined_pass(INTEGRATOR_STATE_PASS, contribution, buffer); + #ifdef __PASSES__ - float3 L_direct, L_indirect; - if (L->use_light_pass) { - path_radiance_sum_indirect(L); - - L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->direct_volume + - L->emission; - L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission + - L->indirect_volume; - - if (!kernel_data.background.transparent) - L_direct += L->background; - - L_sum = L_direct + L_indirect; - float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z); - - /* Reject invalid value */ - if (!isfinite_safe(sum)) { -# ifdef __KERNEL_DEBUG_NAN__ - kernel_assert(!"Non-finite sum in path_radiance_clamp_and_sum!"); -# endif - L_sum = zero_float3(); - - L->direct_diffuse = zero_float3(); - L->direct_glossy = zero_float3(); - L->direct_transmission = zero_float3(); - L->direct_volume = zero_float3(); - - L->indirect_diffuse = zero_float3(); - L->indirect_glossy = zero_float3(); - L->indirect_transmission = zero_float3(); - L->indirect_volume = zero_float3(); - - L->emission = zero_float3(); + if (kernel_data.film.light_pass_flag & PASS_ANY) { + const int path_flag = INTEGRATOR_STATE(shadow_path, flag); + int pass_offset = PASS_UNUSED; + + if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) { + /* Indirectly visible through reflection. */ + const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ? + ((INTEGRATOR_STATE(shadow_path, bounce) == 0) ? + kernel_data.film.pass_glossy_direct : + kernel_data.film.pass_glossy_indirect) : + ((INTEGRATOR_STATE(shadow_path, bounce) == 0) ? + kernel_data.film.pass_transmission_direct : + kernel_data.film.pass_transmission_indirect); + + if (glossy_pass_offset != PASS_UNUSED) { + /* Glossy is a subset of the throughput, reconstruct it here using the + * diffuse-glossy ratio. */ + const float3 ratio = INTEGRATOR_STATE(shadow_path, diffuse_glossy_ratio); + const float3 glossy_contribution = (one_float3() - ratio) * contribution; + kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution); + } + + /* Reconstruct diffuse subset of throughput. */ + pass_offset = (INTEGRATOR_STATE(shadow_path, bounce) == 0) ? + kernel_data.film.pass_diffuse_direct : + kernel_data.film.pass_diffuse_indirect; + if (pass_offset != PASS_UNUSED) { + contribution *= INTEGRATOR_STATE(shadow_path, diffuse_glossy_ratio); + } + } + else if (path_flag & PATH_RAY_VOLUME_PASS) { + /* Indirectly visible through volume. */ + pass_offset = (INTEGRATOR_STATE(shadow_path, bounce) == 0) ? + kernel_data.film.pass_volume_direct : + kernel_data.film.pass_volume_indirect; } - } - /* No Light Passes */ - else -#endif - { - L_sum = L->emission; + /* Single write call for GPU coherence. */ + if (pass_offset != PASS_UNUSED) { + kernel_write_pass_float3(buffer + pass_offset, contribution); + } - /* Reject invalid value */ - float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z); - if (!isfinite_safe(sum)) { -#ifdef __KERNEL_DEBUG_NAN__ - kernel_assert(!"Non-finite final sum in path_radiance_clamp_and_sum!"); -#endif - L_sum = zero_float3(); + /* Write shadow pass. */ + if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) && + (path_flag & PATH_RAY_CAMERA)) { + const float3 unshadowed_throughput = INTEGRATOR_STATE(shadow_path, unshadowed_throughput); + const float3 shadowed_throughput = INTEGRATOR_STATE(shadow_path, throughput); + const float3 shadow = safe_divide_float3_float3(shadowed_throughput, unshadowed_throughput) * + kernel_data.film.pass_shadow_scale; + kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow, shadow); } } +#endif +} - /* Compute alpha. */ - *alpha = 1.0f - L->transparent; +/* Write transparency to render buffer. + * + * Note that we accumulate transparency = 1 - alpha in the render buffer. + * Otherwise we'd have to write alpha on path termination, which happens + * in many places. */ +ccl_device_inline void kernel_accum_transparent(INTEGRATOR_STATE_CONST_ARGS, + const float transparent, + ccl_global float *ccl_restrict render_buffer) +{ + ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS, + render_buffer); - /* Add shadow catcher contributions. */ -#ifdef __SHADOW_TRICKS__ - if (L->has_shadow_catcher) { - path_radiance_sum_shadowcatcher(kg, L, &L_sum, alpha); + if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) { + kernel_write_pass_float(buffer + kernel_data.film.pass_combined + 3, transparent); } -#endif /* __SHADOW_TRICKS__ */ - return L_sum; + kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_PASS, transparent, buffer); } -ccl_device_inline void path_radiance_split_denoising(KernelGlobals *kg, - PathRadiance *L, - float3 *noisy, - float3 *clean) +/* Write background contribution to render buffer. + * + * Includes transparency, matching kernel_accum_transparent. */ +ccl_device_inline void kernel_accum_background(INTEGRATOR_STATE_CONST_ARGS, + const float3 L, + const float transparent, + const bool is_transparent_background_ray, + ccl_global float *ccl_restrict render_buffer) { -#ifdef __PASSES__ - kernel_assert(L->use_light_pass); - - *clean = L->emission + L->background; - *noisy = L->direct_volume + L->indirect_volume; - -# define ADD_COMPONENT(flag, component) \ - if (kernel_data.film.denoising_flags & flag) \ - *clean += component; \ - else \ - *noisy += component; - - ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_DIR, L->direct_diffuse); - ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_IND, L->indirect_diffuse); - ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_DIR, L->direct_glossy); - ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_IND, L->indirect_glossy); - ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_DIR, L->direct_transmission); - ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_IND, L->indirect_transmission); -# undef ADD_COMPONENT -#else - *noisy = L->emission; - *clean = zero_float3(); -#endif + float3 contribution = INTEGRATOR_STATE(path, throughput) * L; + kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(path, bounce) - 1); -#ifdef __SHADOW_TRICKS__ - if (L->has_shadow_catcher) { - *noisy += L->shadow_background_color; - } -#endif + ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS, + render_buffer); - *noisy = ensure_finite3(*noisy); - *clean = ensure_finite3(*clean); + if (is_transparent_background_ray) { + kernel_accum_transparent(INTEGRATOR_STATE_PASS, transparent, render_buffer); + } + else { + kernel_accum_combined_transparent_pass( + INTEGRATOR_STATE_PASS, contribution, transparent, buffer); + } + kernel_accum_emission_or_background_pass( + INTEGRATOR_STATE_PASS, contribution, buffer, kernel_data.film.pass_background); } -ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance *L_sample) +/* Write emission to render buffer. */ +ccl_device_inline void kernel_accum_emission(INTEGRATOR_STATE_CONST_ARGS, + const float3 throughput, + const float3 L, + ccl_global float *ccl_restrict render_buffer) { -#ifdef __SPLIT_KERNEL__ -# define safe_float3_add(f, v) \ - do { \ - ccl_global float *p = (ccl_global float *)(&(f)); \ - atomic_add_and_fetch_float(p + 0, (v).x); \ - atomic_add_and_fetch_float(p + 1, (v).y); \ - atomic_add_and_fetch_float(p + 2, (v).z); \ - } while (0) -# define safe_float_add(f, v) atomic_add_and_fetch_float(&(f), (v)) -#else -# define safe_float3_add(f, v) (f) += (v) -# define safe_float_add(f, v) (f) += (v) -#endif /* __SPLIT_KERNEL__ */ + float3 contribution = throughput * L; + kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(path, bounce) - 1); -#ifdef __PASSES__ - safe_float3_add(L->direct_diffuse, L_sample->direct_diffuse); - safe_float3_add(L->direct_glossy, L_sample->direct_glossy); - safe_float3_add(L->direct_transmission, L_sample->direct_transmission); - safe_float3_add(L->direct_volume, L_sample->direct_volume); - - safe_float3_add(L->indirect_diffuse, L_sample->indirect_diffuse); - safe_float3_add(L->indirect_glossy, L_sample->indirect_glossy); - safe_float3_add(L->indirect_transmission, L_sample->indirect_transmission); - safe_float3_add(L->indirect_volume, L_sample->indirect_volume); - - safe_float3_add(L->background, L_sample->background); - safe_float3_add(L->ao, L_sample->ao); - safe_float3_add(L->shadow, L_sample->shadow); - safe_float_add(L->mist, L_sample->mist); -#endif /* __PASSES__ */ - safe_float3_add(L->emission, L_sample->emission); + ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS, + render_buffer); -#undef safe_float_add -#undef safe_float3_add + kernel_accum_combined_pass(INTEGRATOR_STATE_PASS, contribution, buffer); + kernel_accum_emission_or_background_pass( + INTEGRATOR_STATE_PASS, contribution, buffer, kernel_data.film.pass_emission); } CCL_NAMESPACE_END |