diff options
author | Brecht Van Lommel <brecht> | 2021-10-17 17:10:10 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-10-18 20:02:10 +0300 |
commit | 1df3b51988852fa8ee6b530a64aa23346db9acd4 (patch) | |
tree | dd79dba4c8ff8bb8474cc399e9d1b308d845e0cb /intern/cycles/kernel/kernel_accumulate.h | |
parent | 44c3bb729be42d6d67eaf8918d7cbcb2ff0b315d (diff) |
Cycles: replace integrator state argument macros
* Rename struct KernelGlobals to struct KernelGlobalsCPU
* Add KernelGlobals, IntegratorState and ConstIntegratorState typedefs
that every device can define in its own way.
* Remove INTEGRATOR_STATE_ARGS and INTEGRATOR_STATE_PASS macros and
replace with these new typedefs.
* Add explicit state argument to INTEGRATOR_STATE and similar macros
In preparation for decoupling main and shadow paths.
Differential Revision: https://developer.blender.org/D12888
Diffstat (limited to 'intern/cycles/kernel/kernel_accumulate.h')
-rw-r--r-- | intern/cycles/kernel/kernel_accumulate.h | 150 |
1 files changed, 78 insertions, 72 deletions
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h index dc0aa9356f7..bc45bbd5b07 100644 --- a/intern/cycles/kernel/kernel_accumulate.h +++ b/intern/cycles/kernel/kernel_accumulate.h @@ -98,9 +98,7 @@ ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(ccl_private const BsdfEv * to render buffers instead of using per-thread memory, and to avoid the * impact of clamping on other contributions. */ -ccl_device_forceinline void kernel_accum_clamp(ccl_global const KernelGlobals *kg, - ccl_private float3 *L, - int bounce) +ccl_device_forceinline void kernel_accum_clamp(KernelGlobals kg, ccl_private float3 *L, int bounce) { #ifdef __KERNEL_DEBUG_NAN__ if (!isfinite3_safe(*L)) { @@ -128,9 +126,9 @@ ccl_device_forceinline void kernel_accum_clamp(ccl_global const KernelGlobals *k /* Get pointer to pixel in render buffer. */ ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer( - INTEGRATOR_STATE_CONST_ARGS, ccl_global float *ccl_restrict render_buffer) + KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer) { - const uint32_t render_pixel_index = INTEGRATOR_STATE(path, render_pixel_index); + const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * kernel_data.film.pass_stride; return render_buffer + render_buffer_offset; @@ -140,7 +138,8 @@ ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer( * Adaptive sampling. */ -ccl_device_inline int kernel_accum_sample(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline int kernel_accum_sample(KernelGlobals kg, + ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer, int sample) { @@ -148,13 +147,13 @@ ccl_device_inline int kernel_accum_sample(INTEGRATOR_STATE_CONST_ARGS, return sample; } - ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS, - render_buffer); + ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); return atomic_fetch_and_add_uint32((uint *)(buffer) + kernel_data.film.pass_sample_count, 1); } -ccl_device void kernel_accum_adaptive_buffer(INTEGRATOR_STATE_CONST_ARGS, +ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg, + ConstIntegratorState state, const float3 contribution, ccl_global float *ccl_restrict buffer) { @@ -167,7 +166,7 @@ ccl_device void kernel_accum_adaptive_buffer(INTEGRATOR_STATE_CONST_ARGS, return; } - const int sample = INTEGRATOR_STATE(path, sample); + const int sample = INTEGRATOR_STATE(state, path, sample); if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) { kernel_write_pass_float4( buffer + kernel_data.film.pass_adaptive_aux_buffer, @@ -186,7 +185,8 @@ ccl_device void kernel_accum_adaptive_buffer(INTEGRATOR_STATE_CONST_ARGS, * Returns truth if the contribution is fully handled here and is not to be added to the other * passes (like combined, adaptive sampling). */ -ccl_device bool kernel_accum_shadow_catcher(INTEGRATOR_STATE_CONST_ARGS, +ccl_device bool kernel_accum_shadow_catcher(KernelGlobals kg, + ConstIntegratorState state, const float3 contribution, ccl_global float *ccl_restrict buffer) { @@ -198,7 +198,7 @@ ccl_device bool kernel_accum_shadow_catcher(INTEGRATOR_STATE_CONST_ARGS, kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); /* Matte pass. */ - if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) { + if (kernel_shadow_catcher_is_matte_path(kg, state)) { kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher_matte, contribution); /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive * sampling is based on how noisy the combined pass is as if there were no catchers in the @@ -206,7 +206,7 @@ ccl_device bool kernel_accum_shadow_catcher(INTEGRATOR_STATE_CONST_ARGS, } /* Shadow catcher pass. */ - if (kernel_shadow_catcher_is_object_pass(INTEGRATOR_STATE_PASS)) { + if (kernel_shadow_catcher_is_object_pass(kg, state)) { kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution); return true; } @@ -214,7 +214,8 @@ ccl_device bool kernel_accum_shadow_catcher(INTEGRATOR_STATE_CONST_ARGS, return false; } -ccl_device bool kernel_accum_shadow_catcher_transparent(INTEGRATOR_STATE_CONST_ARGS, +ccl_device bool kernel_accum_shadow_catcher_transparent(KernelGlobals kg, + ConstIntegratorState state, const float3 contribution, const float transparent, ccl_global float *ccl_restrict buffer) @@ -226,12 +227,12 @@ ccl_device bool kernel_accum_shadow_catcher_transparent(INTEGRATOR_STATE_CONST_A kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED); kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); - if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) { + if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) { return true; } /* Matte pass. */ - if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) { + if (kernel_shadow_catcher_is_matte_path(kg, state)) { kernel_write_pass_float4( buffer + kernel_data.film.pass_shadow_catcher_matte, make_float4(contribution.x, contribution.y, contribution.z, transparent)); @@ -241,7 +242,7 @@ ccl_device bool kernel_accum_shadow_catcher_transparent(INTEGRATOR_STATE_CONST_A } /* Shadow catcher pass. */ - if (kernel_shadow_catcher_is_object_pass(INTEGRATOR_STATE_PASS)) { + if (kernel_shadow_catcher_is_object_pass(kg, state)) { /* NOTE: The transparency of the shadow catcher pass is ignored. It is not needed for the * calculation and the alpha channel of the pass contains numbers of samples contributed to a * pixel of the pass. */ @@ -252,7 +253,8 @@ ccl_device bool kernel_accum_shadow_catcher_transparent(INTEGRATOR_STATE_CONST_A return false; } -ccl_device void kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_CONST_ARGS, +ccl_device void kernel_accum_shadow_catcher_transparent_only(KernelGlobals kg, + ConstIntegratorState state, const float transparent, ccl_global float *ccl_restrict buffer) { @@ -263,7 +265,7 @@ ccl_device void kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_CO kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); /* Matte pass. */ - if (kernel_shadow_catcher_is_matte_path(INTEGRATOR_STATE_PASS)) { + if (kernel_shadow_catcher_is_matte_path(kg, state)) { kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, transparent); } } @@ -275,12 +277,13 @@ ccl_device void kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_CO */ /* Write combined pass. */ -ccl_device_inline void kernel_accum_combined_pass(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void kernel_accum_combined_pass(KernelGlobals kg, + ConstIntegratorState state, const float3 contribution, ccl_global float *ccl_restrict buffer) { #ifdef __SHADOW_CATCHER__ - if (kernel_accum_shadow_catcher(INTEGRATOR_STATE_PASS, contribution, buffer)) { + if (kernel_accum_shadow_catcher(kg, state, contribution, buffer)) { return; } #endif @@ -289,19 +292,19 @@ ccl_device_inline void kernel_accum_combined_pass(INTEGRATOR_STATE_CONST_ARGS, kernel_write_pass_float3(buffer + kernel_data.film.pass_combined, contribution); } - kernel_accum_adaptive_buffer(INTEGRATOR_STATE_PASS, contribution, buffer); + kernel_accum_adaptive_buffer(kg, state, contribution, buffer); } /* Write combined pass with transparency. */ -ccl_device_inline void kernel_accum_combined_transparent_pass(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void kernel_accum_combined_transparent_pass(KernelGlobals kg, + ConstIntegratorState state, const float3 contribution, const float transparent, ccl_global float *ccl_restrict buffer) { #ifdef __SHADOW_CATCHER__ - if (kernel_accum_shadow_catcher_transparent( - INTEGRATOR_STATE_PASS, contribution, transparent, buffer)) { + if (kernel_accum_shadow_catcher_transparent(kg, state, contribution, transparent, buffer)) { return; } #endif @@ -312,11 +315,12 @@ ccl_device_inline void kernel_accum_combined_transparent_pass(INTEGRATOR_STATE_C make_float4(contribution.x, contribution.y, contribution.z, transparent)); } - kernel_accum_adaptive_buffer(INTEGRATOR_STATE_PASS, contribution, buffer); + kernel_accum_adaptive_buffer(kg, state, contribution, buffer); } /* Write background or emission to appropriate pass. */ -ccl_device_inline void kernel_accum_emission_or_background_pass(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void kernel_accum_emission_or_background_pass(KernelGlobals kg, + ConstIntegratorState state, float3 contribution, ccl_global float *ccl_restrict buffer, @@ -327,15 +331,15 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(INTEGRATOR_STATE } #ifdef __PASSES__ - const int path_flag = INTEGRATOR_STATE(path, flag); + const int path_flag = INTEGRATOR_STATE(state, path, flag); int pass_offset = PASS_UNUSED; /* Denoising albedo. */ # ifdef __DENOISING_FEATURES__ if (path_flag & PATH_RAY_DENOISING_FEATURES) { if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) { - const float3 denoising_feature_throughput = INTEGRATOR_STATE(path, - denoising_feature_throughput); + const float3 denoising_feature_throughput = INTEGRATOR_STATE( + state, path, denoising_feature_throughput); const float3 denoising_albedo = denoising_feature_throughput * contribution; kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); } @@ -349,32 +353,34 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(INTEGRATOR_STATE else if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) { /* Indirectly visible through reflection. */ const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ? - ((INTEGRATOR_STATE(path, bounce) == 1) ? + ((INTEGRATOR_STATE(state, path, bounce) == 1) ? kernel_data.film.pass_glossy_direct : kernel_data.film.pass_glossy_indirect) : - ((INTEGRATOR_STATE(path, bounce) == 1) ? + ((INTEGRATOR_STATE(state, path, bounce) == 1) ? kernel_data.film.pass_transmission_direct : kernel_data.film.pass_transmission_indirect); if (glossy_pass_offset != PASS_UNUSED) { /* Glossy is a subset of the throughput, reconstruct it here using the * diffuse-glossy ratio. */ - const float3 ratio = INTEGRATOR_STATE(path, diffuse_glossy_ratio); + const float3 ratio = INTEGRATOR_STATE(state, path, diffuse_glossy_ratio); const float3 glossy_contribution = (one_float3() - ratio) * contribution; kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution); } /* Reconstruct diffuse subset of throughput. */ - pass_offset = (INTEGRATOR_STATE(path, bounce) == 1) ? kernel_data.film.pass_diffuse_direct : - kernel_data.film.pass_diffuse_indirect; + pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ? + kernel_data.film.pass_diffuse_direct : + kernel_data.film.pass_diffuse_indirect; if (pass_offset != PASS_UNUSED) { - contribution *= INTEGRATOR_STATE(path, diffuse_glossy_ratio); + contribution *= INTEGRATOR_STATE(state, path, diffuse_glossy_ratio); } } else if (path_flag & PATH_RAY_VOLUME_PASS) { /* Indirectly visible through volume. */ - pass_offset = (INTEGRATOR_STATE(path, bounce) == 1) ? kernel_data.film.pass_volume_direct : - kernel_data.film.pass_volume_indirect; + pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ? + kernel_data.film.pass_volume_direct : + kernel_data.film.pass_volume_indirect; } /* Single write call for GPU coherence. */ @@ -385,52 +391,52 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(INTEGRATOR_STATE } /* Write light contribution to render buffer. */ -ccl_device_inline void kernel_accum_light(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void kernel_accum_light(KernelGlobals kg, + ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer) { /* The throughput for shadow paths already contains the light shader evaluation. */ - float3 contribution = INTEGRATOR_STATE(shadow_path, throughput); - kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(shadow_path, bounce)); + float3 contribution = INTEGRATOR_STATE(state, shadow_path, throughput); + kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, shadow_path, bounce)); - ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS, - render_buffer); + ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); - kernel_accum_combined_pass(INTEGRATOR_STATE_PASS, contribution, buffer); + kernel_accum_combined_pass(kg, state, contribution, buffer); #ifdef __PASSES__ if (kernel_data.film.light_pass_flag & PASS_ANY) { - const int path_flag = INTEGRATOR_STATE(shadow_path, flag); + const int path_flag = INTEGRATOR_STATE(state, shadow_path, flag); int pass_offset = PASS_UNUSED; if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) { /* Indirectly visible through reflection. */ const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ? - ((INTEGRATOR_STATE(shadow_path, bounce) == 0) ? + ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ? kernel_data.film.pass_glossy_direct : kernel_data.film.pass_glossy_indirect) : - ((INTEGRATOR_STATE(shadow_path, bounce) == 0) ? + ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ? kernel_data.film.pass_transmission_direct : kernel_data.film.pass_transmission_indirect); if (glossy_pass_offset != PASS_UNUSED) { /* Glossy is a subset of the throughput, reconstruct it here using the * diffuse-glossy ratio. */ - const float3 ratio = INTEGRATOR_STATE(shadow_path, diffuse_glossy_ratio); + const float3 ratio = INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio); const float3 glossy_contribution = (one_float3() - ratio) * contribution; kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution); } /* Reconstruct diffuse subset of throughput. */ - pass_offset = (INTEGRATOR_STATE(shadow_path, bounce) == 0) ? + pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ? kernel_data.film.pass_diffuse_direct : kernel_data.film.pass_diffuse_indirect; if (pass_offset != PASS_UNUSED) { - contribution *= INTEGRATOR_STATE(shadow_path, diffuse_glossy_ratio); + contribution *= INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio); } } else if (path_flag & PATH_RAY_VOLUME_PASS) { /* Indirectly visible through volume. */ - pass_offset = (INTEGRATOR_STATE(shadow_path, bounce) == 0) ? + pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ? kernel_data.film.pass_volume_direct : kernel_data.film.pass_volume_indirect; } @@ -443,8 +449,9 @@ ccl_device_inline void kernel_accum_light(INTEGRATOR_STATE_CONST_ARGS, /* Write shadow pass. */ if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) && (path_flag & PATH_RAY_CAMERA)) { - const float3 unshadowed_throughput = INTEGRATOR_STATE(shadow_path, unshadowed_throughput); - const float3 shadowed_throughput = INTEGRATOR_STATE(shadow_path, throughput); + const float3 unshadowed_throughput = INTEGRATOR_STATE( + state, shadow_path, unshadowed_throughput); + const float3 shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput); const float3 shadow = safe_divide_float3_float3(shadowed_throughput, unshadowed_throughput) * kernel_data.film.pass_shadow_scale; kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow, shadow); @@ -458,61 +465,60 @@ ccl_device_inline void kernel_accum_light(INTEGRATOR_STATE_CONST_ARGS, * Note that we accumulate transparency = 1 - alpha in the render buffer. * Otherwise we'd have to write alpha on path termination, which happens * in many places. */ -ccl_device_inline void kernel_accum_transparent(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void kernel_accum_transparent(KernelGlobals kg, + ConstIntegratorState state, const float transparent, ccl_global float *ccl_restrict render_buffer) { - ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS, - render_buffer); + ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) { kernel_write_pass_float(buffer + kernel_data.film.pass_combined + 3, transparent); } - kernel_accum_shadow_catcher_transparent_only(INTEGRATOR_STATE_PASS, transparent, buffer); + kernel_accum_shadow_catcher_transparent_only(kg, state, transparent, buffer); } /* Write background contribution to render buffer. * * Includes transparency, matching kernel_accum_transparent. */ -ccl_device_inline void kernel_accum_background(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void kernel_accum_background(KernelGlobals kg, + ConstIntegratorState state, const float3 L, const float transparent, const bool is_transparent_background_ray, ccl_global float *ccl_restrict render_buffer) { - float3 contribution = INTEGRATOR_STATE(path, throughput) * L; - kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(path, bounce) - 1); + float3 contribution = INTEGRATOR_STATE(state, path, throughput) * L; + kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); - ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS, - render_buffer); + ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); if (is_transparent_background_ray) { - kernel_accum_transparent(INTEGRATOR_STATE_PASS, transparent, render_buffer); + kernel_accum_transparent(kg, state, transparent, render_buffer); } else { - kernel_accum_combined_transparent_pass( - INTEGRATOR_STATE_PASS, contribution, transparent, buffer); + kernel_accum_combined_transparent_pass(kg, state, contribution, transparent, buffer); } kernel_accum_emission_or_background_pass( - INTEGRATOR_STATE_PASS, contribution, buffer, kernel_data.film.pass_background); + kg, state, contribution, buffer, kernel_data.film.pass_background); } /* Write emission to render buffer. */ -ccl_device_inline void kernel_accum_emission(INTEGRATOR_STATE_CONST_ARGS, +ccl_device_inline void kernel_accum_emission(KernelGlobals kg, + ConstIntegratorState state, const float3 throughput, const float3 L, ccl_global float *ccl_restrict render_buffer) { float3 contribution = throughput * L; - kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(path, bounce) - 1); + kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); - ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS, - render_buffer); + ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); - kernel_accum_combined_pass(INTEGRATOR_STATE_PASS, contribution, buffer); + kernel_accum_combined_pass(kg, state, contribution, buffer); kernel_accum_emission_or_background_pass( - INTEGRATOR_STATE_PASS, contribution, buffer, kernel_data.film.pass_emission); + kg, state, contribution, buffer, kernel_data.film.pass_emission); } CCL_NAMESPACE_END |