diff options
author | Brecht Van Lommel <brecht@blender.org> | 2022-08-25 19:19:14 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2022-09-02 18:13:28 +0300 |
commit | cf57624764e2890711d2b729a2e22aee8a635679 (patch) | |
tree | 56cd4a56d0d78f41a99f94cc42895d7d49ed1951 /intern/cycles/kernel | |
parent | e72b9ca556c392f5b8810f909209440298f40ed6 (diff) |
Cleanup: refactoring of kernel film function names and organization
Diffstat (limited to 'intern/cycles/kernel')
23 files changed, 658 insertions, 611 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index f32a810786d..c5527ba7716 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -225,12 +225,14 @@ set(SRC_KERNEL_CAMERA_HEADERS ) set(SRC_KERNEL_FILM_HEADERS - film/accumulate.h film/adaptive_sampling.h - film/id_passes.h - film/passes.h + film/aov_passes.h + film/data_passes.h + film/denoising_passes.h + film/cryptomatte_passes.h + film/light_passes.h film/read.h - film/write_passes.h + film/write.h ) set(SRC_KERNEL_INTEGRATOR_HEADERS diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h index 0e5f7b4a2fd..0d7c06f4fc6 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h @@ -34,7 +34,7 @@ # include "kernel/integrator/megakernel.h" # include "kernel/film/adaptive_sampling.h" -# include "kernel/film/id_passes.h" +# include "kernel/film/cryptomatte_passes.h" # include "kernel/film/read.h" # include "kernel/bake/bake.h" @@ -169,7 +169,7 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)( STUB_ASSERT(KERNEL_ARCH, adaptive_sampling_convergence_check); return false; #else - return kernel_adaptive_sampling_convergence_check( + return film_adaptive_sampling_convergence_check( kg, render_buffer, x, y, threshold, reset, offset, stride); #endif } @@ -185,7 +185,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobalsCP #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, adaptive_sampling_filter_x); #else - kernel_adaptive_sampling_filter_x(kg, render_buffer, y, start_x, width, offset, stride); + film_adaptive_sampling_filter_x(kg, render_buffer, y, start_x, width, offset, stride); #endif } @@ -200,7 +200,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCP #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, adaptive_sampling_filter_y); #else - kernel_adaptive_sampling_filter_y(kg, render_buffer, x, start_y, height, offset, stride); + film_adaptive_sampling_filter_y(kg, render_buffer, x, start_y, height, offset, stride); #endif } @@ -215,7 +215,7 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU * #ifdef KERNEL_STUB STUB_ASSERT(KERNEL_ARCH, cryptomatte_postprocess); #else - kernel_cryptomatte_post(kg, render_buffer, pixel_index); + film_cryptomatte_post(kg, render_buffer, pixel_index); #endif } diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index e1ab802aa80..d7d2000775f 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -526,7 +526,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) bool converged = true; if (x < sw && y < sh) { - converged = ccl_gpu_kernel_call(kernel_adaptive_sampling_convergence_check( + converged = ccl_gpu_kernel_call(film_adaptive_sampling_convergence_check( nullptr, render_buffer, sx + x, sy + y, threshold, reset, offset, stride)); } @@ -553,7 +553,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) if (y < sh) { ccl_gpu_kernel_call( - kernel_adaptive_sampling_filter_x(NULL, render_buffer, sy + y, sx, sw, offset, stride)); + film_adaptive_sampling_filter_x(NULL, render_buffer, sy + y, sx, sw, offset, stride)); } } ccl_gpu_kernel_postfix @@ -572,7 +572,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) if (x < sw) { ccl_gpu_kernel_call( - kernel_adaptive_sampling_filter_y(NULL, render_buffer, sx + x, sy, sh, offset, stride)); + film_adaptive_sampling_filter_y(NULL, render_buffer, sx + x, sy, sh, offset, stride)); } } ccl_gpu_kernel_postfix @@ -589,7 +589,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) const int pixel_index = ccl_gpu_global_id_x(); if (pixel_index < num_pixels) { - ccl_gpu_kernel_call(kernel_cryptomatte_post(nullptr, render_buffer, pixel_index)); + ccl_gpu_kernel_call(film_cryptomatte_post(nullptr, render_buffer, pixel_index)); } } ccl_gpu_kernel_postfix diff --git a/intern/cycles/kernel/film/adaptive_sampling.h b/intern/cycles/kernel/film/adaptive_sampling.h index 16867c39d99..d28c87747c3 100644 --- a/intern/cycles/kernel/film/adaptive_sampling.h +++ b/intern/cycles/kernel/film/adaptive_sampling.h @@ -3,15 +3,15 @@ #pragma once -#include "kernel/film/write_passes.h" +#include "kernel/film/write.h" CCL_NAMESPACE_BEGIN /* Check whether the pixel has converged and should not be sampled anymore. */ -ccl_device_forceinline bool kernel_need_sample_pixel(KernelGlobals kg, - ConstIntegratorState state, - ccl_global float *render_buffer) +ccl_device_forceinline bool film_need_sample_pixel(KernelGlobals kg, + ConstIntegratorState state, + ccl_global float *render_buffer) { if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) { return true; @@ -28,14 +28,14 @@ ccl_device_forceinline bool kernel_need_sample_pixel(KernelGlobals kg, /* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */ -ccl_device bool kernel_adaptive_sampling_convergence_check(KernelGlobals kg, - ccl_global float *render_buffer, - int x, - int y, - float threshold, - bool reset, - int offset, - int stride) +ccl_device bool film_adaptive_sampling_convergence_check(KernelGlobals kg, + ccl_global float *render_buffer, + int x, + int y, + float threshold, + bool reset, + int offset, + int stride) { kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED); kernel_assert(kernel_data.film.pass_sample_count != PASS_UNUSED); @@ -78,13 +78,13 @@ ccl_device bool kernel_adaptive_sampling_convergence_check(KernelGlobals kg, /* This is a simple box filter in two passes. * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */ -ccl_device void kernel_adaptive_sampling_filter_x(KernelGlobals kg, - ccl_global float *render_buffer, - int y, - int start_x, - int width, - int offset, - int stride) +ccl_device void film_adaptive_sampling_filter_x(KernelGlobals kg, + ccl_global float *render_buffer, + int y, + int start_x, + int width, + int offset, + int stride) { kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED); @@ -111,13 +111,13 @@ ccl_device void kernel_adaptive_sampling_filter_x(KernelGlobals kg, } } -ccl_device void kernel_adaptive_sampling_filter_y(KernelGlobals kg, - ccl_global float *render_buffer, - int x, - int start_y, - int height, - int offset, - int stride) +ccl_device void film_adaptive_sampling_filter_y(KernelGlobals kg, + ccl_global float *render_buffer, + int x, + int start_y, + int height, + int offset, + int stride) { kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED); diff --git a/intern/cycles/kernel/film/aov_passes.h b/intern/cycles/kernel/film/aov_passes.h new file mode 100644 index 00000000000..3fbb250340f --- /dev/null +++ b/intern/cycles/kernel/film/aov_passes.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#pragma once + +#include "kernel/geom/geom.h" + +#include "kernel/film/write.h" + +CCL_NAMESPACE_BEGIN + +ccl_device_inline void film_write_aov_pass_value(KernelGlobals kg, + ConstIntegratorState state, + ccl_global float *ccl_restrict render_buffer, + const int aov_id, + const float value) +{ + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + film_write_pass_float(buffer + kernel_data.film.pass_aov_value + aov_id, value); +} + +ccl_device_inline void film_write_aov_pass_color(KernelGlobals kg, + ConstIntegratorState state, + ccl_global float *ccl_restrict render_buffer, + const int aov_id, + const float3 color) +{ + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + film_write_pass_float4(buffer + kernel_data.film.pass_aov_color + aov_id, + make_float4(color.x, color.y, color.z, 1.0f)); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/film/id_passes.h b/intern/cycles/kernel/film/cryptomatte_passes.h index c8317512bb2..4765777e7e2 100644 --- a/intern/cycles/kernel/film/id_passes.h +++ b/intern/cycles/kernel/film/cryptomatte_passes.h @@ -8,15 +8,15 @@ CCL_NAMESPACE_BEGIN /* Element of ID pass stored in the render buffers. * It is `float2` semantically, but it must be unaligned since the offset of ID passes in the * render buffers might not meet expected by compiler alignment. */ -typedef struct IDPassBufferElement { +typedef struct CryptoPassBufferElement { float x; float y; -} IDPassBufferElement; +} CryptoPassBufferElement; -ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, - int num_slots, - float id, - float weight) +ccl_device_inline void film_write_cryptomatte_slots(ccl_global float *buffer, + int num_slots, + float id, + float weight) { kernel_assert(id != ID_NONE); if (weight == 0.0f) { @@ -24,7 +24,7 @@ ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, } for (int slot = 0; slot < num_slots; slot++) { - ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer; + ccl_global CryptoPassBufferElement *id_buffer = (ccl_global CryptoPassBufferElement *)buffer; #ifdef __ATOMIC_PASS_WRITE__ /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ if (id_buffer[slot].x == ID_NONE) { @@ -60,9 +60,9 @@ ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, } } -ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots) +ccl_device_inline void film_sort_cryptomatte_slots(ccl_global float *buffer, int num_slots) { - ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer; + ccl_global CryptoPassBufferElement *id_buffer = (ccl_global CryptoPassBufferElement *)buffer; for (int slot = 1; slot < num_slots; ++slot) { if (id_buffer[slot].x == ID_NONE) { return; @@ -70,7 +70,7 @@ ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_sl /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */ int i = slot; while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) { - const IDPassBufferElement swap = id_buffer[i]; + const CryptoPassBufferElement swap = id_buffer[i]; id_buffer[i] = id_buffer[i - 1]; id_buffer[i - 1] = swap; --i; @@ -79,15 +79,15 @@ ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_sl } /* post-sorting for Cryptomatte */ -ccl_device_inline void kernel_cryptomatte_post(KernelGlobals kg, - ccl_global float *render_buffer, - int pixel_index) +ccl_device_inline void film_cryptomatte_post(KernelGlobals kg, + ccl_global float *render_buffer, + int pixel_index) { const int pass_stride = kernel_data.film.pass_stride; const uint64_t render_buffer_offset = (uint64_t)pixel_index * pass_stride; ccl_global float *cryptomatte_buffer = render_buffer + render_buffer_offset + kernel_data.film.pass_cryptomatte; - kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); + film_sort_cryptomatte_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/film/data_passes.h b/intern/cycles/kernel/film/data_passes.h new file mode 100644 index 00000000000..27721c93c1d --- /dev/null +++ b/intern/cycles/kernel/film/data_passes.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#pragma once + +#include "kernel/geom/geom.h" + +#include "kernel/film/cryptomatte_passes.h" +#include "kernel/film/write.h" + +CCL_NAMESPACE_BEGIN + +ccl_device_inline size_t film_write_cryptomatte_pass(ccl_global float *ccl_restrict buffer, + size_t depth, + float id, + float matte_weight) +{ + film_write_cryptomatte_slots(buffer, depth * 2, id, matte_weight); + return depth * 4; +} + +ccl_device_inline void film_write_data_passes(KernelGlobals kg, + IntegratorState state, + ccl_private const ShaderData *sd, + ccl_global float *ccl_restrict render_buffer) +{ +#ifdef __PASSES__ + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); + + if (!(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) { + return; + } + + const int flag = kernel_data.film.pass_flag; + + if (!(flag & PASS_ANY)) { + return; + } + + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + + if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) { + if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f || + average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) { + if (INTEGRATOR_STATE(state, path, sample) == 0) { + if (flag & PASSMASK(DEPTH)) { + const float depth = camera_z_depth(kg, sd->P); + film_write_pass_float(buffer + kernel_data.film.pass_depth, depth); + } + if (flag & PASSMASK(OBJECT_ID)) { + const float id = object_pass_id(kg, sd->object); + film_write_pass_float(buffer + kernel_data.film.pass_object_id, id); + } + if (flag & PASSMASK(MATERIAL_ID)) { + const float id = shader_pass_id(kg, sd); + film_write_pass_float(buffer + kernel_data.film.pass_material_id, id); + } + if (flag & PASSMASK(POSITION)) { + const float3 position = sd->P; + film_write_pass_float3(buffer + kernel_data.film.pass_position, position); + } + } + + if (flag & PASSMASK(NORMAL)) { + const float3 normal = shader_bsdf_average_normal(kg, sd); + film_write_pass_float3(buffer + kernel_data.film.pass_normal, normal); + } + if (flag & PASSMASK(ROUGHNESS)) { + const float roughness = shader_bsdf_average_roughness(sd); + film_write_pass_float(buffer + kernel_data.film.pass_roughness, roughness); + } + if (flag & PASSMASK(UV)) { + const float3 uv = primitive_uv(kg, sd); + film_write_pass_float3(buffer + kernel_data.film.pass_uv, uv); + } + if (flag & PASSMASK(MOTION)) { + const float4 speed = primitive_motion_vector(kg, sd); + film_write_pass_float4(buffer + kernel_data.film.pass_motion, speed); + film_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f); + } + + INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SINGLE_PASS_DONE; + } + } + + if (kernel_data.film.cryptomatte_passes) { + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + const float matte_weight = average(throughput) * + (1.0f - average(shader_bsdf_transparency(kg, sd))); + if (matte_weight > 0.0f) { + ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte; + if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { + const float id = object_cryptomatte_id(kg, sd->object); + cryptomatte_buffer += film_write_cryptomatte_pass( + cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight); + } + if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { + const float id = shader_cryptomatte_id(kg, sd->shader); + cryptomatte_buffer += film_write_cryptomatte_pass( + cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight); + } + if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { + const float id = object_cryptomatte_asset_id(kg, sd->object); + cryptomatte_buffer += film_write_cryptomatte_pass( + cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight); + } + } + } + + if (flag & PASSMASK(DIFFUSE_COLOR)) { + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + film_write_pass_spectrum(buffer + kernel_data.film.pass_diffuse_color, + shader_bsdf_diffuse(kg, sd) * throughput); + } + if (flag & PASSMASK(GLOSSY_COLOR)) { + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + film_write_pass_spectrum(buffer + kernel_data.film.pass_glossy_color, + shader_bsdf_glossy(kg, sd) * throughput); + } + if (flag & PASSMASK(TRANSMISSION_COLOR)) { + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + film_write_pass_spectrum(buffer + kernel_data.film.pass_transmission_color, + shader_bsdf_transmission(kg, sd) * throughput); + } + if (flag & PASSMASK(MIST)) { + /* Bring depth into 0..1 range. */ + const float mist_start = kernel_data.film.mist_start; + const float mist_inv_depth = kernel_data.film.mist_inv_depth; + + const float depth = camera_distance(kg, sd->P); + float mist = saturatef((depth - mist_start) * mist_inv_depth); + + /* Falloff */ + const float mist_falloff = kernel_data.film.mist_falloff; + + if (mist_falloff == 1.0f) + ; + else if (mist_falloff == 2.0f) + mist = mist * mist; + else if (mist_falloff == 0.5f) + mist = sqrtf(mist); + else + mist = powf(mist, mist_falloff); + + /* Modulate by transparency */ + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + const Spectrum alpha = shader_bsdf_alpha(kg, sd); + const float mist_output = (1.0f - mist) * average(throughput * alpha); + + /* Note that the final value in the render buffer we want is 1 - mist_output, + * to avoid having to tracking this in the Integrator state we do the negation + * after rendering. */ + film_write_pass_float(buffer + kernel_data.film.pass_mist, mist_output); + } +#endif +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/film/denoising_passes.h b/intern/cycles/kernel/film/denoising_passes.h new file mode 100644 index 00000000000..1517e8bad56 --- /dev/null +++ b/intern/cycles/kernel/film/denoising_passes.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#pragma once + +#include "kernel/geom/geom.h" + +#include "kernel/film/write.h" + +CCL_NAMESPACE_BEGIN + +#ifdef __DENOISING_FEATURES__ +ccl_device_forceinline void film_write_denoising_features_surface(KernelGlobals kg, + IntegratorState state, + ccl_private const ShaderData *sd, + ccl_global float *ccl_restrict + render_buffer) +{ + if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES)) { + return; + } + + /* Skip implicitly transparent surfaces. */ + if (sd->flag & SD_HAS_ONLY_VOLUME) { + return; + } + + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + + if (kernel_data.film.pass_denoising_depth != PASS_UNUSED) { + const Spectrum denoising_feature_throughput = INTEGRATOR_STATE( + state, path, denoising_feature_throughput); + const float denoising_depth = ensure_finite(average(denoising_feature_throughput) * + sd->ray_length); + film_write_pass_float(buffer + kernel_data.film.pass_denoising_depth, denoising_depth); + } + + float3 normal = zero_float3(); + Spectrum diffuse_albedo = zero_spectrum(); + Spectrum specular_albedo = zero_spectrum(); + float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + continue; + } + + /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */ + normal += sc->N * sc->sample_weight; + sum_weight += sc->sample_weight; + + Spectrum closure_albedo = sc->weight; + /* Closures that include a Fresnel term typically have weights close to 1 even though their + * actual contribution is significantly lower. + * To account for this, we scale their weight by the average fresnel factor (the same is also + * done for the sample weight in the BSDF setup, so we don't need to scale that here). */ + if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) { + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc; + closure_albedo *= bsdf->extra->fresnel_color; + } + else if (sc->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_ID) { + ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)sc; + closure_albedo *= bsdf->avg_value; + } + else if (sc->type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID) { + closure_albedo *= bsdf_principled_hair_albedo(sc); + } + else if (sc->type == CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID) { + /* BSSRDF already accounts for weight, retro-reflection would double up. */ + ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *) + sc; + if (bsdf->components == PRINCIPLED_DIFFUSE_RETRO_REFLECTION) { + continue; + } + } + + if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) { + diffuse_albedo += closure_albedo; + sum_nonspecular_weight += sc->sample_weight; + } + else { + specular_albedo += closure_albedo; + } + } + + /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */ + if ((sum_weight == 0.0f) || (sum_nonspecular_weight * 4.0f > sum_weight)) { + if (sum_weight != 0.0f) { + normal /= sum_weight; + } + + if (kernel_data.film.pass_denoising_normal != PASS_UNUSED) { + /* Transform normal into camera space. */ + const Transform worldtocamera = kernel_data.cam.worldtocamera; + normal = transform_direction(&worldtocamera, normal); + + const float3 denoising_normal = ensure_finite(normal); + film_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal); + } + + if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) { + const Spectrum denoising_feature_throughput = INTEGRATOR_STATE( + state, path, denoising_feature_throughput); + const Spectrum denoising_albedo = ensure_finite(denoising_feature_throughput * + diffuse_albedo); + film_write_pass_spectrum(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); + } + + INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES; + } + else { + INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) *= specular_albedo; + } +} + +ccl_device_forceinline void film_write_denoising_features_volume(KernelGlobals kg, + IntegratorState state, + const Spectrum albedo, + const bool scatter, + ccl_global float *ccl_restrict + render_buffer) +{ + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + const Spectrum denoising_feature_throughput = INTEGRATOR_STATE( + state, path, denoising_feature_throughput); + + if (scatter && kernel_data.film.pass_denoising_normal != PASS_UNUSED) { + /* Assume scatter is sufficiently diffuse to stop writing denoising features. */ + INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES; + + /* Write view direction as normal. */ + const float3 denoising_normal = make_float3(0.0f, 0.0f, -1.0f); + film_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal); + } + + if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) { + /* Write albedo. */ + const Spectrum denoising_albedo = ensure_finite(denoising_feature_throughput * albedo); + film_write_pass_spectrum(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); + } +} +#endif /* __DENOISING_FEATURES__ */ + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/film/accumulate.h b/intern/cycles/kernel/film/light_passes.h index 97ec915a8ad..5c306e8f088 100644 --- a/intern/cycles/kernel/film/accumulate.h +++ b/intern/cycles/kernel/film/light_passes.h @@ -4,7 +4,7 @@ #pragma once #include "kernel/film/adaptive_sampling.h" -#include "kernel/film/write_passes.h" +#include "kernel/film/write.h" #include "kernel/integrator/shadow_catcher.h" @@ -95,9 +95,7 @@ ccl_device_inline Spectrum bsdf_eval_pass_glossy_weight(ccl_private const BsdfEv * to render buffers instead of using per-thread memory, and to avoid the * impact of clamping on other contributions. */ -ccl_device_forceinline void kernel_accum_clamp(KernelGlobals kg, - ccl_private Spectrum *L, - int bounce) +ccl_device_forceinline void film_clamp_light(KernelGlobals kg, ccl_private Spectrum *L, int bounce) { #ifdef __KERNEL_DEBUG_NAN__ if (!isfinite_safe(*L)) { @@ -123,41 +121,31 @@ ccl_device_forceinline void kernel_accum_clamp(KernelGlobals kg, * Pass accumulation utilities. */ -/* Get pointer to pixel in render buffer. */ -ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer( - KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer) -{ - const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); - const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * - kernel_data.film.pass_stride; - return render_buffer + render_buffer_offset; -} - /* -------------------------------------------------------------------- * Adaptive sampling. */ -ccl_device_inline int kernel_accum_sample(KernelGlobals kg, - ConstIntegratorState state, - ccl_global float *ccl_restrict render_buffer, - int sample, - int sample_offset) +ccl_device_inline int film_write_sample(KernelGlobals kg, + ConstIntegratorState state, + ccl_global float *ccl_restrict render_buffer, + int sample, + int sample_offset) { if (kernel_data.film.pass_sample_count == PASS_UNUSED) { return sample; } - ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); return atomic_fetch_and_add_uint32( (ccl_global uint *)(buffer) + kernel_data.film.pass_sample_count, 1) + sample_offset; } -ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg, - const int sample, - const Spectrum contribution, - ccl_global float *ccl_restrict buffer) +ccl_device void film_write_adaptive_buffer(KernelGlobals kg, + const int sample, + const Spectrum contribution, + ccl_global float *ccl_restrict buffer) { /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping * criteria. This is the heuristic from "A hierarchical automatic stopping condition for Monte @@ -171,11 +159,11 @@ ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg, if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) { const float3 contribution_rgb = spectrum_to_rgb(contribution); - kernel_write_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer, - make_float4(contribution_rgb.x * 2.0f, - contribution_rgb.y * 2.0f, - contribution_rgb.z * 2.0f, - 0.0f)); + film_write_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer, + make_float4(contribution_rgb.x * 2.0f, + contribution_rgb.y * 2.0f, + contribution_rgb.z * 2.0f, + 0.0f)); } } @@ -190,10 +178,10 @@ ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg, * Returns truth if the contribution is fully handled here and is not to be added to the other * passes (like combined, adaptive sampling). */ -ccl_device bool kernel_accum_shadow_catcher(KernelGlobals kg, - const uint32_t path_flag, - const Spectrum contribution, - ccl_global float *ccl_restrict buffer) +ccl_device bool film_write_shadow_catcher(KernelGlobals kg, + const uint32_t path_flag, + const Spectrum contribution, + ccl_global float *ccl_restrict buffer) { if (!kernel_data.integrator.has_shadow_catcher) { return false; @@ -204,7 +192,7 @@ ccl_device bool kernel_accum_shadow_catcher(KernelGlobals kg, /* Matte pass. */ if (kernel_shadow_catcher_is_matte_path(path_flag)) { - kernel_write_pass_spectrum(buffer + kernel_data.film.pass_shadow_catcher_matte, contribution); + film_write_pass_spectrum(buffer + kernel_data.film.pass_shadow_catcher_matte, contribution); /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive * sampling is based on how noisy the combined pass is as if there were no catchers in the * scene. */ @@ -212,18 +200,18 @@ ccl_device bool kernel_accum_shadow_catcher(KernelGlobals kg, /* Shadow catcher pass. */ if (kernel_shadow_catcher_is_object_pass(path_flag)) { - kernel_write_pass_spectrum(buffer + kernel_data.film.pass_shadow_catcher, contribution); + film_write_pass_spectrum(buffer + kernel_data.film.pass_shadow_catcher, contribution); return true; } return false; } -ccl_device bool kernel_accum_shadow_catcher_transparent(KernelGlobals kg, - const uint32_t path_flag, - const Spectrum contribution, - const float transparent, - ccl_global float *ccl_restrict buffer) +ccl_device bool film_write_shadow_catcher_transparent(KernelGlobals kg, + const uint32_t path_flag, + const Spectrum contribution, + const float transparent, + ccl_global float *ccl_restrict buffer) { if (!kernel_data.integrator.has_shadow_catcher) { return false; @@ -240,7 +228,7 @@ ccl_device bool kernel_accum_shadow_catcher_transparent(KernelGlobals kg, if (kernel_shadow_catcher_is_matte_path(path_flag)) { const float3 contribution_rgb = spectrum_to_rgb(contribution); - kernel_write_pass_float4( + film_write_pass_float4( buffer + kernel_data.film.pass_shadow_catcher_matte, make_float4(contribution_rgb.x, contribution_rgb.y, contribution_rgb.z, transparent)); /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive @@ -253,17 +241,17 @@ ccl_device bool kernel_accum_shadow_catcher_transparent(KernelGlobals kg, /* NOTE: The transparency of the shadow catcher pass is ignored. It is not needed for the * calculation and the alpha channel of the pass contains numbers of samples contributed to a * pixel of the pass. */ - kernel_write_pass_spectrum(buffer + kernel_data.film.pass_shadow_catcher, contribution); + film_write_pass_spectrum(buffer + kernel_data.film.pass_shadow_catcher, contribution); return true; } return false; } -ccl_device void kernel_accum_shadow_catcher_transparent_only(KernelGlobals kg, - const uint32_t path_flag, - const float transparent, - ccl_global float *ccl_restrict buffer) +ccl_device void film_write_shadow_catcher_transparent_only(KernelGlobals kg, + const uint32_t path_flag, + const float transparent, + ccl_global float *ccl_restrict buffer) { if (!kernel_data.integrator.has_shadow_catcher) { return; @@ -273,10 +261,29 @@ ccl_device void kernel_accum_shadow_catcher_transparent_only(KernelGlobals kg, /* Matte pass. */ if (kernel_shadow_catcher_is_matte_path(path_flag)) { - kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, transparent); + film_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, transparent); } } +/* Write shadow catcher passes on a bounce from the shadow catcher object. */ +ccl_device_forceinline void film_write_shadow_catcher_bounce_data( + KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer) +{ + kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED); + kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); + + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + + /* Count sample for the shadow catcher object. */ + film_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f); + + /* Since the split is done, the sample does not contribute to the matte, so accumulate it as + * transparency to the matte. */ + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + film_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, + average(throughput)); +} + #endif /* __SHADOW_CATCHER__ */ /* -------------------------------------------------------------------- @@ -284,36 +291,35 @@ ccl_device void kernel_accum_shadow_catcher_transparent_only(KernelGlobals kg, */ /* Write combined pass. */ -ccl_device_inline void kernel_accum_combined_pass(KernelGlobals kg, - const uint32_t path_flag, - const int sample, - const Spectrum contribution, - ccl_global float *ccl_restrict buffer) +ccl_device_inline void film_write_combined_pass(KernelGlobals kg, + const uint32_t path_flag, + const int sample, + const Spectrum contribution, + ccl_global float *ccl_restrict buffer) { #ifdef __SHADOW_CATCHER__ - if (kernel_accum_shadow_catcher(kg, path_flag, contribution, buffer)) { + if (film_write_shadow_catcher(kg, path_flag, contribution, buffer)) { return; } #endif if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) { - kernel_write_pass_spectrum(buffer + kernel_data.film.pass_combined, contribution); + film_write_pass_spectrum(buffer + kernel_data.film.pass_combined, contribution); } - kernel_accum_adaptive_buffer(kg, sample, contribution, buffer); + film_write_adaptive_buffer(kg, sample, contribution, buffer); } /* Write combined pass with transparency. */ -ccl_device_inline void kernel_accum_combined_transparent_pass(KernelGlobals kg, - const uint32_t path_flag, - const int sample, - const Spectrum contribution, - const float transparent, - ccl_global float *ccl_restrict - buffer) +ccl_device_inline void film_write_combined_transparent_pass(KernelGlobals kg, + const uint32_t path_flag, + const int sample, + const Spectrum contribution, + const float transparent, + ccl_global float *ccl_restrict buffer) { #ifdef __SHADOW_CATCHER__ - if (kernel_accum_shadow_catcher_transparent(kg, path_flag, contribution, transparent, buffer)) { + if (film_write_shadow_catcher_transparent(kg, path_flag, contribution, transparent, buffer)) { return; } #endif @@ -321,16 +327,16 @@ ccl_device_inline void kernel_accum_combined_transparent_pass(KernelGlobals kg, if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) { const float3 contribution_rgb = spectrum_to_rgb(contribution); - kernel_write_pass_float4( + film_write_pass_float4( buffer + kernel_data.film.pass_combined, make_float4(contribution_rgb.x, contribution_rgb.y, contribution_rgb.z, transparent)); } - kernel_accum_adaptive_buffer(kg, sample, contribution, buffer); + film_write_adaptive_buffer(kg, sample, contribution, buffer); } /* Write background or emission to appropriate pass. */ -ccl_device_inline void kernel_accum_emission_or_background_pass( +ccl_device_inline void film_write_emission_or_background_pass( KernelGlobals kg, ConstIntegratorState state, Spectrum contribution, @@ -353,15 +359,14 @@ ccl_device_inline void kernel_accum_emission_or_background_pass( const Spectrum denoising_feature_throughput = INTEGRATOR_STATE( state, path, denoising_feature_throughput); const Spectrum denoising_albedo = denoising_feature_throughput * contribution; - kernel_write_pass_spectrum(buffer + kernel_data.film.pass_denoising_albedo, - denoising_albedo); + film_write_pass_spectrum(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); } } # endif /* __DENOISING_FEATURES__ */ if (lightgroup != LIGHTGROUP_NONE && kernel_data.film.pass_lightgroup != PASS_UNUSED) { - kernel_write_pass_spectrum(buffer + kernel_data.film.pass_lightgroup + 3 * lightgroup, - contribution); + film_write_pass_spectrum(buffer + kernel_data.film.pass_lightgroup + 3 * lightgroup, + contribution); } if (!(path_flag & PATH_RAY_ANY_PASS)) { @@ -385,7 +390,7 @@ ccl_device_inline void kernel_accum_emission_or_background_pass( kernel_data.film.pass_glossy_direct : kernel_data.film.pass_glossy_indirect); if (glossy_pass_offset != PASS_UNUSED) { - kernel_write_pass_spectrum(buffer + glossy_pass_offset, glossy_weight * contribution); + film_write_pass_spectrum(buffer + glossy_pass_offset, glossy_weight * contribution); } /* Transmission */ @@ -397,8 +402,8 @@ ccl_device_inline void kernel_accum_emission_or_background_pass( /* Transmission is what remains if not diffuse and glossy, not stored explicitly to save * GPU memory. */ const Spectrum transmission_weight = one_spectrum() - diffuse_weight - glossy_weight; - kernel_write_pass_spectrum(buffer + transmission_pass_offset, - transmission_weight * contribution); + film_write_pass_spectrum(buffer + transmission_pass_offset, + transmission_weight * contribution); } /* Reconstruct diffuse subset of throughput. */ @@ -419,19 +424,19 @@ ccl_device_inline void kernel_accum_emission_or_background_pass( /* Single write call for GPU coherence. */ if (pass_offset != PASS_UNUSED) { - kernel_write_pass_spectrum(buffer + pass_offset, contribution); + film_write_pass_spectrum(buffer + pass_offset, contribution); } #endif /* __PASSES__ */ } /* Write light contribution to render buffer. */ -ccl_device_inline void kernel_accum_light(KernelGlobals kg, - ConstIntegratorShadowState state, - ccl_global float *ccl_restrict render_buffer) +ccl_device_inline void film_write_direct_light(KernelGlobals kg, + ConstIntegratorShadowState state, + ccl_global float *ccl_restrict render_buffer) { /* The throughput for shadow paths already contains the light shader evaluation. */ Spectrum contribution = INTEGRATOR_STATE(state, shadow_path, throughput); - kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, shadow_path, bounce)); + film_clamp_light(kg, &contribution, INTEGRATOR_STATE(state, shadow_path, bounce)); const uint32_t render_pixel_index = INTEGRATOR_STATE(state, shadow_path, render_pixel_index); const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * @@ -444,17 +449,17 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg, /* Ambient occlusion. */ if (path_flag & PATH_RAY_SHADOW_FOR_AO) { if ((kernel_data.kernel_features & KERNEL_FEATURE_AO_PASS) && (path_flag & PATH_RAY_CAMERA)) { - kernel_write_pass_spectrum(buffer + kernel_data.film.pass_ao, contribution); + film_write_pass_spectrum(buffer + kernel_data.film.pass_ao, contribution); } if (kernel_data.kernel_features & KERNEL_FEATURE_AO_ADDITIVE) { const Spectrum ao_weight = INTEGRATOR_STATE(state, shadow_path, unshadowed_throughput); - kernel_accum_combined_pass(kg, path_flag, sample, contribution * ao_weight, buffer); + film_write_combined_pass(kg, path_flag, sample, contribution * ao_weight, buffer); } return; } /* Direct light shadow. */ - kernel_accum_combined_pass(kg, path_flag, sample, contribution, buffer); + film_write_combined_pass(kg, path_flag, sample, contribution, buffer); #ifdef __PASSES__ if (kernel_data.film.light_pass_flag & PASS_ANY) { @@ -469,8 +474,8 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg, /* Write lightgroup pass. LIGHTGROUP_NONE is ~0 so decode from unsigned to signed */ const int lightgroup = (int)(INTEGRATOR_STATE(state, shadow_path, lightgroup)) - 1; if (lightgroup != LIGHTGROUP_NONE && kernel_data.film.pass_lightgroup != PASS_UNUSED) { - kernel_write_pass_spectrum(buffer + kernel_data.film.pass_lightgroup + 3 * lightgroup, - contribution); + film_write_pass_spectrum(buffer + kernel_data.film.pass_lightgroup + 3 * lightgroup, + contribution); } if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { @@ -486,7 +491,7 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg, kernel_data.film.pass_glossy_direct : kernel_data.film.pass_glossy_indirect); if (glossy_pass_offset != PASS_UNUSED) { - kernel_write_pass_spectrum(buffer + glossy_pass_offset, glossy_weight * contribution); + film_write_pass_spectrum(buffer + glossy_pass_offset, glossy_weight * contribution); } /* Transmission */ @@ -498,8 +503,8 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg, /* Transmission is what remains if not diffuse and glossy, not stored explicitly to save * GPU memory. */ const Spectrum transmission_weight = one_spectrum() - diffuse_weight - glossy_weight; - kernel_write_pass_spectrum(buffer + transmission_pass_offset, - transmission_weight * contribution); + film_write_pass_spectrum(buffer + transmission_pass_offset, + transmission_weight * contribution); } /* Reconstruct diffuse subset of throughput. */ @@ -519,7 +524,7 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg, /* Single write call for GPU coherence. */ if (pass_offset != PASS_UNUSED) { - kernel_write_pass_spectrum(buffer + pass_offset, contribution); + film_write_pass_spectrum(buffer + pass_offset, contribution); } } @@ -531,7 +536,7 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg, const Spectrum shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput); const Spectrum shadow = safe_divide(shadowed_throughput, unshadowed_throughput) * kernel_data.film.pass_shadow_scale; - kernel_write_pass_spectrum(buffer + kernel_data.film.pass_shadow, shadow); + film_write_pass_spectrum(buffer + kernel_data.film.pass_shadow, shadow); } } #endif @@ -542,78 +547,96 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg, * Note that we accumulate transparency = 1 - alpha in the render buffer. * Otherwise we'd have to write alpha on path termination, which happens * in many places. */ -ccl_device_inline void kernel_accum_transparent(KernelGlobals kg, - ConstIntegratorState state, - const uint32_t path_flag, - const float transparent, - ccl_global float *ccl_restrict buffer) +ccl_device_inline void film_write_transparent(KernelGlobals kg, + ConstIntegratorState state, + const uint32_t path_flag, + const float transparent, + ccl_global float *ccl_restrict buffer) { if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) { - kernel_write_pass_float(buffer + kernel_data.film.pass_combined + 3, transparent); + film_write_pass_float(buffer + kernel_data.film.pass_combined + 3, transparent); } - kernel_accum_shadow_catcher_transparent_only(kg, path_flag, transparent, buffer); + film_write_shadow_catcher_transparent_only(kg, path_flag, transparent, buffer); } /* Write holdout to render buffer. */ -ccl_device_inline void kernel_accum_holdout(KernelGlobals kg, - ConstIntegratorState state, - const uint32_t path_flag, - const float transparent, - ccl_global float *ccl_restrict render_buffer) +ccl_device_inline void film_write_holdout(KernelGlobals kg, + ConstIntegratorState state, + const uint32_t path_flag, + const float transparent, + ccl_global float *ccl_restrict render_buffer) { - ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); - kernel_accum_transparent(kg, state, path_flag, transparent, buffer); + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + film_write_transparent(kg, state, path_flag, transparent, buffer); } /* Write background contribution to render buffer. * - * Includes transparency, matching kernel_accum_transparent. */ -ccl_device_inline void kernel_accum_background(KernelGlobals kg, - ConstIntegratorState state, - const Spectrum L, - const float transparent, - const bool is_transparent_background_ray, - ccl_global float *ccl_restrict render_buffer) + * Includes transparency, matching film_write_transparent. */ +ccl_device_inline void film_write_background(KernelGlobals kg, + ConstIntegratorState state, + const Spectrum L, + const float transparent, + const bool is_transparent_background_ray, + ccl_global float *ccl_restrict render_buffer) { Spectrum contribution = INTEGRATOR_STATE(state, path, throughput) * L; - kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); + film_clamp_light(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); - ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); if (is_transparent_background_ray) { - kernel_accum_transparent(kg, state, path_flag, transparent, buffer); + film_write_transparent(kg, state, path_flag, transparent, buffer); } else { const int sample = INTEGRATOR_STATE(state, path, sample); - kernel_accum_combined_transparent_pass( - kg, path_flag, sample, contribution, transparent, buffer); - } - kernel_accum_emission_or_background_pass(kg, - state, - contribution, - buffer, - kernel_data.film.pass_background, - kernel_data.background.lightgroup); + film_write_combined_transparent_pass(kg, path_flag, sample, contribution, transparent, buffer); + } + film_write_emission_or_background_pass(kg, + state, + contribution, + buffer, + kernel_data.film.pass_background, + kernel_data.background.lightgroup); } /* Write emission to render buffer. */ -ccl_device_inline void kernel_accum_emission(KernelGlobals kg, - ConstIntegratorState state, - const Spectrum L, - ccl_global float *ccl_restrict render_buffer, - const int lightgroup = LIGHTGROUP_NONE) +ccl_device_inline void film_write_volume_emission(KernelGlobals kg, + ConstIntegratorState state, + const Spectrum L, + ccl_global float *ccl_restrict render_buffer, + const int lightgroup = LIGHTGROUP_NONE) { Spectrum contribution = L; - kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); + film_clamp_light(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); + + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); + const int sample = INTEGRATOR_STATE(state, path, sample); + + film_write_combined_pass(kg, path_flag, sample, contribution, buffer); + film_write_emission_or_background_pass( + kg, state, contribution, buffer, kernel_data.film.pass_emission, lightgroup); +} + +ccl_device_inline void film_write_surface_emission(KernelGlobals kg, + ConstIntegratorState state, + const Spectrum L, + const float mis_weight, + ccl_global float *ccl_restrict render_buffer, + const int lightgroup = LIGHTGROUP_NONE) +{ + Spectrum contribution = INTEGRATOR_STATE(state, path, throughput) * L * mis_weight; + film_clamp_light(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1); - ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer); + ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer); const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); const int sample = INTEGRATOR_STATE(state, path, sample); - kernel_accum_combined_pass(kg, path_flag, sample, contribution, buffer); - kernel_accum_emission_or_background_pass( + film_write_combined_pass(kg, path_flag, sample, contribution, buffer); + film_write_emission_or_background_pass( kg, state, contribution, buffer, kernel_data.film.pass_emission, lightgroup); } diff --git a/intern/cycles/kernel/film/read.h b/intern/cycles/kernel/film/output.h index 995c20a0053..108f992e29d 100644 --- a/intern/cycles/kernel/film/read.h +++ b/intern/cycles/kernel/film/output.h @@ -1,6 +1,10 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright 2011-2022 Blender Foundation */ +/* Functions to retrieving render passes for display or output. Reading from + * the raw render buffer and normalizing based on the number of samples, + * computing alpha, compositing shadow catchers, etc. */ + #pragma once CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/film/passes.h b/intern/cycles/kernel/film/passes.h deleted file mode 100644 index bea23411000..00000000000 --- a/intern/cycles/kernel/film/passes.h +++ /dev/null @@ -1,304 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2011-2022 Blender Foundation */ - -#pragma once - -#include "kernel/geom/geom.h" - -#include "kernel/film/id_passes.h" -#include "kernel/film/write_passes.h" - -CCL_NAMESPACE_BEGIN - -/* Get pointer to pixel in render buffer. */ -ccl_device_forceinline ccl_global float *kernel_pass_pixel_render_buffer( - KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer) -{ - const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); - const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * - kernel_data.film.pass_stride; - return render_buffer + render_buffer_offset; -} - -#ifdef __DENOISING_FEATURES__ - -ccl_device_forceinline void kernel_write_denoising_features_surface( - KernelGlobals kg, - IntegratorState state, - ccl_private const ShaderData *sd, - ccl_global float *ccl_restrict render_buffer) -{ - if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES)) { - return; - } - - /* Skip implicitly transparent surfaces. */ - if (sd->flag & SD_HAS_ONLY_VOLUME) { - return; - } - - ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer); - - if (kernel_data.film.pass_denoising_depth != PASS_UNUSED) { - const Spectrum denoising_feature_throughput = INTEGRATOR_STATE( - state, path, denoising_feature_throughput); - const float denoising_depth = ensure_finite(average(denoising_feature_throughput) * - sd->ray_length); - kernel_write_pass_float(buffer + kernel_data.film.pass_denoising_depth, denoising_depth); - } - - float3 normal = zero_float3(); - Spectrum diffuse_albedo = zero_spectrum(); - Spectrum specular_albedo = zero_spectrum(); - float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - continue; - } - - /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */ - normal += sc->N * sc->sample_weight; - sum_weight += sc->sample_weight; - - Spectrum closure_albedo = sc->weight; - /* Closures that include a Fresnel term typically have weights close to 1 even though their - * actual contribution is significantly lower. - * To account for this, we scale their weight by the average fresnel factor (the same is also - * done for the sample weight in the BSDF setup, so we don't need to scale that here). */ - if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) { - ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc; - closure_albedo *= bsdf->extra->fresnel_color; - } - else if (sc->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_ID) { - ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)sc; - closure_albedo *= bsdf->avg_value; - } - else if (sc->type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID) { - closure_albedo *= bsdf_principled_hair_albedo(sc); - } - else if (sc->type == CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID) { - /* BSSRDF already accounts for weight, retro-reflection would double up. */ - ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *) - sc; - if (bsdf->components == PRINCIPLED_DIFFUSE_RETRO_REFLECTION) { - continue; - } - } - - if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) { - diffuse_albedo += closure_albedo; - sum_nonspecular_weight += sc->sample_weight; - } - else { - specular_albedo += closure_albedo; - } - } - - /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */ - if ((sum_weight == 0.0f) || (sum_nonspecular_weight * 4.0f > sum_weight)) { - if (sum_weight != 0.0f) { - normal /= sum_weight; - } - - if (kernel_data.film.pass_denoising_normal != PASS_UNUSED) { - /* Transform normal into camera space. */ - const Transform worldtocamera = kernel_data.cam.worldtocamera; - normal = transform_direction(&worldtocamera, normal); - - const float3 denoising_normal = ensure_finite(normal); - kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal); - } - - if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) { - const Spectrum denoising_feature_throughput = INTEGRATOR_STATE( - state, path, denoising_feature_throughput); - const Spectrum denoising_albedo = ensure_finite(denoising_feature_throughput * - diffuse_albedo); - kernel_write_pass_spectrum(buffer + kernel_data.film.pass_denoising_albedo, - denoising_albedo); - } - - INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES; - } - else { - INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) *= specular_albedo; - } -} - -ccl_device_forceinline void kernel_write_denoising_features_volume(KernelGlobals kg, - IntegratorState state, - const Spectrum albedo, - const bool scatter, - ccl_global float *ccl_restrict - render_buffer) -{ - ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer); - const Spectrum denoising_feature_throughput = INTEGRATOR_STATE( - state, path, denoising_feature_throughput); - - if (scatter && kernel_data.film.pass_denoising_normal != PASS_UNUSED) { - /* Assume scatter is sufficiently diffuse to stop writing denoising features. */ - INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES; - - /* Write view direction as normal. */ - const float3 denoising_normal = make_float3(0.0f, 0.0f, -1.0f); - kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal); - } - - if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) { - /* Write albedo. */ - const Spectrum denoising_albedo = ensure_finite(denoising_feature_throughput * albedo); - kernel_write_pass_spectrum(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo); - } -} -#endif /* __DENOISING_FEATURES__ */ - -ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer, - size_t depth, - float id, - float matte_weight) -{ - kernel_write_id_slots(buffer, depth * 2, id, matte_weight); - return depth * 4; -} - -ccl_device_inline void kernel_write_data_passes(KernelGlobals kg, - IntegratorState state, - ccl_private const ShaderData *sd, - ccl_global float *ccl_restrict render_buffer) -{ -#ifdef __PASSES__ - const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); - - if (!(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) { - return; - } - - const int flag = kernel_data.film.pass_flag; - - if (!(flag & PASS_ANY)) { - return; - } - - ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer); - - if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) { - if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f || - average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) { - if (INTEGRATOR_STATE(state, path, sample) == 0) { - if (flag & PASSMASK(DEPTH)) { - const float depth = camera_z_depth(kg, sd->P); - kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth); - } - if (flag & PASSMASK(OBJECT_ID)) { - const float id = object_pass_id(kg, sd->object); - kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id); - } - if (flag & PASSMASK(MATERIAL_ID)) { - const float id = shader_pass_id(kg, sd); - kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id); - } - if (flag & PASSMASK(POSITION)) { - const float3 position = sd->P; - kernel_write_pass_float3(buffer + kernel_data.film.pass_position, position); - } - } - - if (flag & PASSMASK(NORMAL)) { - const float3 normal = shader_bsdf_average_normal(kg, sd); - kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal); - } - if (flag & PASSMASK(ROUGHNESS)) { - const float roughness = shader_bsdf_average_roughness(sd); - kernel_write_pass_float(buffer + kernel_data.film.pass_roughness, roughness); - } - if (flag & PASSMASK(UV)) { - const float3 uv = primitive_uv(kg, sd); - kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv); - } - if (flag & PASSMASK(MOTION)) { - const float4 speed = primitive_motion_vector(kg, sd); - kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed); - kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f); - } - - INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SINGLE_PASS_DONE; - } - } - - if (kernel_data.film.cryptomatte_passes) { - const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); - const float matte_weight = average(throughput) * - (1.0f - average(shader_bsdf_transparency(kg, sd))); - if (matte_weight > 0.0f) { - ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte; - if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { - const float id = object_cryptomatte_id(kg, sd->object); - cryptomatte_buffer += kernel_write_id_pass( - cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight); - } - if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { - const float id = shader_cryptomatte_id(kg, sd->shader); - cryptomatte_buffer += kernel_write_id_pass( - cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight); - } - if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { - const float id = object_cryptomatte_asset_id(kg, sd->object); - cryptomatte_buffer += kernel_write_id_pass( - cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight); - } - } - } - - if (flag & PASSMASK(DIFFUSE_COLOR)) { - const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_write_pass_spectrum(buffer + kernel_data.film.pass_diffuse_color, - shader_bsdf_diffuse(kg, sd) * throughput); - } - if (flag & PASSMASK(GLOSSY_COLOR)) { - const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_write_pass_spectrum(buffer + kernel_data.film.pass_glossy_color, - shader_bsdf_glossy(kg, sd) * throughput); - } - if (flag & PASSMASK(TRANSMISSION_COLOR)) { - const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_write_pass_spectrum(buffer + kernel_data.film.pass_transmission_color, - shader_bsdf_transmission(kg, sd) * throughput); - } - if (flag & PASSMASK(MIST)) { - /* Bring depth into 0..1 range. */ - const float mist_start = kernel_data.film.mist_start; - const float mist_inv_depth = kernel_data.film.mist_inv_depth; - - const float depth = camera_distance(kg, sd->P); - float mist = saturatef((depth - mist_start) * mist_inv_depth); - - /* Falloff */ - const float mist_falloff = kernel_data.film.mist_falloff; - - if (mist_falloff == 1.0f) - ; - else if (mist_falloff == 2.0f) - mist = mist * mist; - else if (mist_falloff == 0.5f) - mist = sqrtf(mist); - else - mist = powf(mist, mist_falloff); - - /* Modulate by transparency */ - const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); - const Spectrum alpha = shader_bsdf_alpha(kg, sd); - const float mist_output = (1.0f - mist) * average(throughput * alpha); - - /* Note that the final value in the render buffer we want is 1 - mist_output, - * to avoid having to tracking this in the Integrator state we do the negation - * after rendering. */ - kernel_write_pass_float(buffer + kernel_data.film.pass_mist, mist_output); - } -#endif -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/film/write_passes.h b/intern/cycles/kernel/film/write.h index c78116cedc6..c630a522ee3 100644 --- a/intern/cycles/kernel/film/write_passes.h +++ b/intern/cycles/kernel/film/write.h @@ -11,7 +11,18 @@ CCL_NAMESPACE_BEGIN -ccl_device_inline void kernel_write_pass_float(ccl_global float *ccl_restrict buffer, float value) +/* Get pointer to pixel in render buffer. */ +ccl_device_forceinline ccl_global float *film_pass_pixel_render_buffer( + KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer) +{ + const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); + const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * + kernel_data.film.pass_stride; + return render_buffer + render_buffer_offset; +} + +/* Write to pixel. */ +ccl_device_inline void film_write_pass_float(ccl_global float *ccl_restrict buffer, float value) { #ifdef __ATOMIC_PASS_WRITE__ atomic_add_and_fetch_float(buffer, value); @@ -20,8 +31,7 @@ ccl_device_inline void kernel_write_pass_float(ccl_global float *ccl_restrict bu #endif } -ccl_device_inline void kernel_write_pass_float3(ccl_global float *ccl_restrict buffer, - float3 value) +ccl_device_inline void film_write_pass_float3(ccl_global float *ccl_restrict buffer, float3 value) { #ifdef __ATOMIC_PASS_WRITE__ ccl_global float *buf_x = buffer + 0; @@ -38,14 +48,13 @@ ccl_device_inline void kernel_write_pass_float3(ccl_global float *ccl_restrict b #endif } -ccl_device_inline void kernel_write_pass_spectrum(ccl_global float *ccl_restrict buffer, - Spectrum value) +ccl_device_inline void film_write_pass_spectrum(ccl_global float *ccl_restrict buffer, + Spectrum value) { - kernel_write_pass_float3(buffer, spectrum_to_rgb(value)); + film_write_pass_float3(buffer, spectrum_to_rgb(value)); } -ccl_device_inline void kernel_write_pass_float4(ccl_global float *ccl_restrict buffer, - float4 value) +ccl_device_inline void film_write_pass_float4(ccl_global float *ccl_restrict buffer, float4 value) { #ifdef __ATOMIC_PASS_WRITE__ ccl_global float *buf_x = buffer + 0; diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h index 9897bc3d65c..eca2c0b9ffb 100644 --- a/intern/cycles/kernel/integrator/init_from_bake.h +++ b/intern/cycles/kernel/integrator/init_from_bake.h @@ -5,8 +5,8 @@ #include "kernel/camera/camera.h" -#include "kernel/film/accumulate.h" #include "kernel/film/adaptive_sampling.h" +#include "kernel/film/light_passes.h" #include "kernel/integrator/path_state.h" @@ -92,12 +92,12 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, path_state_init(state, tile, x, y); /* Check whether the pixel has converged and should not be sampled anymore. */ - if (!kernel_need_sample_pixel(kg, state, render_buffer)) { + if (!film_need_sample_pixel(kg, state, render_buffer)) { return false; } /* Always count the sample, even if the camera sample will reject the ray. */ - const int sample = kernel_accum_sample( + const int sample = film_write_sample( kg, state, render_buffer, scheduled_sample, tile->sample_offset); /* Setup render buffers. */ @@ -112,7 +112,7 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, int prim = __float_as_uint(primitive[1]); if (prim == -1) { /* Accumulate transparency for empty pixels. */ - kernel_accum_transparent(kg, state, 0, 1.0f, buffer); + film_write_transparent(kg, state, 0, 1.0f, buffer); return true; } @@ -200,11 +200,11 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, /* Fast path for position and normal passes not affected by shaders. */ if (kernel_data.film.pass_position != PASS_UNUSED) { - kernel_write_pass_float3(buffer + kernel_data.film.pass_position, P); + film_write_pass_float3(buffer + kernel_data.film.pass_position, P); return true; } else if (kernel_data.film.pass_normal != PASS_UNUSED && !(shader_flags & SD_HAS_BUMP)) { - kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, N); + film_write_pass_float3(buffer + kernel_data.film.pass_normal, N); return true; } diff --git a/intern/cycles/kernel/integrator/init_from_camera.h b/intern/cycles/kernel/integrator/init_from_camera.h index 67ac3603d4c..8df3e1b9fb3 100644 --- a/intern/cycles/kernel/integrator/init_from_camera.h +++ b/intern/cycles/kernel/integrator/init_from_camera.h @@ -5,8 +5,8 @@ #include "kernel/camera/camera.h" -#include "kernel/film/accumulate.h" #include "kernel/film/adaptive_sampling.h" +#include "kernel/film/light_passes.h" #include "kernel/integrator/path_state.h" #include "kernel/integrator/shadow_catcher.h" @@ -57,7 +57,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg, path_state_init(state, tile, x, y); /* Check whether the pixel has converged and should not be sampled anymore. */ - if (!kernel_need_sample_pixel(kg, state, render_buffer)) { + if (!film_need_sample_pixel(kg, state, render_buffer)) { return false; } @@ -66,7 +66,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg, * This logic allows to both count actual number of samples per pixel, and to add samples to this * pixel after it was converged and samples were added somewhere else (in which case the * `scheduled_sample` will be different from actual number of samples in this pixel). */ - const int sample = kernel_accum_sample( + const int sample = film_write_sample( kg, state, render_buffer, scheduled_sample, tile->sample_offset); /* Initialize random number seed for path. */ diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h index 60299f2cb2f..4ecff56a3fd 100644 --- a/intern/cycles/kernel/integrator/intersect_closest.h +++ b/intern/cycles/kernel/integrator/intersect_closest.h @@ -5,6 +5,8 @@ #include "kernel/camera/projection.h" +#include "kernel/film/light_passes.h" + #include "kernel/integrator/path_state.h" #include "kernel/integrator/shadow_catcher.h" @@ -87,7 +89,7 @@ ccl_device_forceinline void integrator_split_shadow_catcher( return; } - kernel_write_shadow_catcher_bounce_data(kg, state, render_buffer); + film_write_shadow_catcher_bounce_data(kg, state, render_buffer); /* Mark state as having done a shadow catcher split so that it stops contributing to * the shadow catcher matte pass, but keeps contributing to the combined pass. */ diff --git a/intern/cycles/kernel/integrator/shade_background.h b/intern/cycles/kernel/integrator/shade_background.h index 57d060d58df..b3a1b83cf6b 100644 --- a/intern/cycles/kernel/integrator/shade_background.h +++ b/intern/cycles/kernel/integrator/shade_background.h @@ -3,8 +3,10 @@ #pragma once -#include "kernel/film/accumulate.h" +#include "kernel/film/light_passes.h" + #include "kernel/integrator/shader_eval.h" + #include "kernel/light/light.h" #include "kernel/light/sample.h" @@ -31,47 +33,30 @@ ccl_device Spectrum integrator_eval_background_shader(KernelGlobals kg, /* Use fast constant background color if available. */ Spectrum L = zero_spectrum(); - if (!shader_constant_emission_eval(kg, shader, &L)) { - /* Evaluate background shader. */ - - /* TODO: does aliasing like this break automatic SoA in CUDA? - * Should we instead store closures separate from ShaderData? */ - ShaderDataTinyStorage emission_sd_storage; - ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - - PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP); - shader_setup_from_background(kg, - emission_sd, - INTEGRATOR_STATE(state, ray, P), - INTEGRATOR_STATE(state, ray, D), - INTEGRATOR_STATE(state, ray, time)); - - PROFILING_SHADER(emission_sd->object, emission_sd->shader); - PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL); - shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_BACKGROUND>( - kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION); - - L = shader_background_eval(emission_sd); + if (shader_constant_emission_eval(kg, shader, &L)) { + return L; } - /* Background MIS weights. */ -# ifdef __BACKGROUND_MIS__ - /* Check if background light exists or if we should skip pdf. */ - if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) && - kernel_data.background.use_mis) { - const float3 ray_P = INTEGRATOR_STATE(state, ray, P); - const float3 ray_D = INTEGRATOR_STATE(state, ray, D); - const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); - - /* multiple importance sampling, get background light pdf for ray - * direction, and compute weight with respect to BSDF pdf */ - const float pdf = background_light_pdf(kg, ray_P, ray_D); - const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf); - L *= mis_weight; - } -# endif + /* Evaluate background shader. */ + + /* TODO: does aliasing like this break automatic SoA in CUDA? + * Should we instead store closures separate from ShaderData? */ + ShaderDataTinyStorage emission_sd_storage; + ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); + + PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP); + shader_setup_from_background(kg, + emission_sd, + INTEGRATOR_STATE(state, ray, P), + INTEGRATOR_STATE(state, ray, D), + INTEGRATOR_STATE(state, ray, time)); + + PROFILING_SHADER(emission_sd->object, emission_sd->shader); + PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL); + shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_BACKGROUND>( + kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION); - return L; + return shader_background_eval(emission_sd); #else return make_spectrum(0.8f); #endif @@ -117,17 +102,39 @@ ccl_device_inline void integrate_background(KernelGlobals kg, #endif /* __MNEE__ */ /* Evaluate background shader. */ - Spectrum L = (eval_background) ? integrator_eval_background_shader(kg, state, render_buffer) : - zero_spectrum(); + Spectrum L = zero_spectrum(); + + if (eval_background) { + L = integrator_eval_background_shader(kg, state, render_buffer); + + /* When using the ao bounces approximation, adjust background + * shader intensity with ao factor. */ + if (path_state_ao_bounce(kg, state)) { + L *= kernel_data.integrator.ao_bounces_factor; + } - /* When using the ao bounces approximation, adjust background - * shader intensity with ao factor. */ - if (path_state_ao_bounce(kg, state)) { - L *= kernel_data.integrator.ao_bounces_factor; + /* Background MIS weights. */ + float mis_weight = 1.0f; +#ifdef __BACKGROUND_MIS__ + /* Check if background light exists or if we should skip pdf. */ + if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) && + kernel_data.background.use_mis) { + const float3 ray_P = INTEGRATOR_STATE(state, ray, P); + const float3 ray_D = INTEGRATOR_STATE(state, ray, D); + const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); + + /* multiple importance sampling, get background light pdf for ray + * direction, and compute weight with respect to BSDF pdf */ + const float pdf = background_light_pdf(kg, ray_P, ray_D); + mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf); + } +#endif + + L *= mis_weight; } /* Write to render buffer. */ - kernel_accum_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer); + film_write_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer); } ccl_device_inline void integrate_distant_lights(KernelGlobals kg, @@ -175,18 +182,17 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg, } /* MIS weighting. */ + float mis_weight = 1.0f; if (!(path_flag & PATH_RAY_MIS_SKIP)) { /* multiple importance sampling, get regular light pdf, * and compute weight with respect to BSDF pdf */ const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); - const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf); - light_eval *= mis_weight; + mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf); } /* Write to render buffer. */ - const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_accum_emission( - kg, state, throughput * light_eval, render_buffer, kernel_data.background.lightgroup); + film_write_surface_emission( + kg, state, light_eval, mis_weight, render_buffer, kernel_data.background.lightgroup); } } } diff --git a/intern/cycles/kernel/integrator/shade_light.h b/intern/cycles/kernel/integrator/shade_light.h index ac9d1415abb..d91fa2a2663 100644 --- a/intern/cycles/kernel/integrator/shade_light.h +++ b/intern/cycles/kernel/integrator/shade_light.h @@ -3,7 +3,7 @@ #pragma once -#include "kernel/film/accumulate.h" +#include "kernel/film/light_passes.h" #include "kernel/integrator/shader_eval.h" #include "kernel/light/light.h" #include "kernel/light/sample.h" @@ -57,6 +57,7 @@ ccl_device_inline void integrate_light(KernelGlobals kg, } /* MIS weighting. */ + float mis_weight = 1.0f; if (!(path_flag & PATH_RAY_MIS_SKIP)) { /* multiple importance sampling, get regular light pdf, * and compute weight with respect to BSDF pdf */ @@ -66,8 +67,7 @@ ccl_device_inline void integrate_light(KernelGlobals kg, } /* Write to render buffer. */ - const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_accum_emission(kg, state, throughput * light_eval, render_buffer, ls.group); + film_write_surface_emission(kg, state, light_eval, mis_weight, render_buffer, ls.group); } ccl_device void integrator_shade_light(KernelGlobals kg, diff --git a/intern/cycles/kernel/integrator/shade_shadow.h b/intern/cycles/kernel/integrator/shade_shadow.h index a52706a77f1..074125bd200 100644 --- a/intern/cycles/kernel/integrator/shade_shadow.h +++ b/intern/cycles/kernel/integrator/shade_shadow.h @@ -165,7 +165,7 @@ ccl_device void integrator_shade_shadow(KernelGlobals kg, return; } else { - kernel_accum_light(kg, state, render_buffer); + film_write_direct_light(kg, state, render_buffer); integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); return; } diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index f3f8ed67713..be6fc4e0b17 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -3,8 +3,9 @@ #pragma once -#include "kernel/film/accumulate.h" -#include "kernel/film/passes.h" +#include "kernel/film/data_passes.h" +#include "kernel/film/denoising_passes.h" +#include "kernel/film/light_passes.h" #include "kernel/integrator/mnee.h" @@ -91,7 +92,7 @@ ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg, const Spectrum holdout_weight = shader_holdout_apply(kg, sd); const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); const float transparent = average(holdout_weight * throughput); - kernel_accum_holdout(kg, state, path_flag, transparent, render_buffer); + film_write_holdout(kg, state, path_flag, transparent, render_buffer); if (isequal(holdout_weight, one_spectrum())) { return false; } @@ -112,6 +113,7 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg, /* Evaluate emissive closure. */ Spectrum L = shader_emissive_eval(sd); + float mis_weight = 1.0f; # ifdef __HAIR__ if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && @@ -126,13 +128,11 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg, /* Multiple importance sampling, get triangle light pdf, * and compute weight with respect to BSDF pdf. */ float pdf = triangle_light_pdf(kg, sd, t); - float mis_weight = light_sample_mis_weight_forward(kg, bsdf_pdf, pdf); - L *= mis_weight; + mis_weight = light_sample_mis_weight_forward(kg, bsdf_pdf, pdf); } - const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_accum_emission( - kg, state, throughput * L, render_buffer, object_lightgroup(kg, sd->object)); + film_write_surface_emission( + kg, state, L, mis_weight, render_buffer, object_lightgroup(kg, sd->object)); } #endif /* __EMISSION__ */ @@ -599,11 +599,11 @@ ccl_device bool integrate_surface(KernelGlobals kg, /* Write render passes. */ #ifdef __PASSES__ PROFILING_EVENT(PROFILING_SHADE_SURFACE_PASSES); - kernel_write_data_passes(kg, state, &sd, render_buffer); + film_write_data_passes(kg, state, &sd, render_buffer); #endif #ifdef __DENOISING_FEATURES__ - kernel_write_denoising_features_surface(kg, state, &sd, render_buffer); + film_write_denoising_features_surface(kg, state, &sd, render_buffer); #endif } diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index 5e0584d4f98..5373fa2250a 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -3,8 +3,9 @@ #pragma once -#include "kernel/film/accumulate.h" -#include "kernel/film/passes.h" +#include "kernel/film/data_passes.h" +#include "kernel/film/denoising_passes.h" +#include "kernel/film/light_passes.h" #include "kernel/integrator/intersect_closest.h" #include "kernel/integrator/path_state.h" @@ -663,14 +664,14 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* Write accumulated emission. */ if (!is_zero(accum_emission)) { - kernel_accum_emission( + film_write_volume_emission( kg, state, accum_emission, render_buffer, object_lightgroup(kg, sd->object)); } # ifdef __DENOISING_FEATURES__ /* Write denoising features. */ if (write_denoising_features) { - kernel_write_denoising_features_volume( + film_write_denoising_features_volume( kg, state, accum_albedo, result.indirect_scatter, render_buffer); } # endif /* __DENOISING_FEATURES__ */ diff --git a/intern/cycles/kernel/integrator/shader_eval.h b/intern/cycles/kernel/integrator/shader_eval.h index b1316bda9bb..11ecf60d00d 100644 --- a/intern/cycles/kernel/integrator/shader_eval.h +++ b/intern/cycles/kernel/integrator/shader_eval.h @@ -10,7 +10,7 @@ #include "kernel/closure/bsdf_util.h" #include "kernel/closure/emissive.h" -#include "kernel/film/accumulate.h" +#include "kernel/film/light_passes.h" #include "kernel/svm/svm.h" diff --git a/intern/cycles/kernel/integrator/shadow_catcher.h b/intern/cycles/kernel/integrator/shadow_catcher.h index 7103b6032ac..a620853faea 100644 --- a/intern/cycles/kernel/integrator/shadow_catcher.h +++ b/intern/cycles/kernel/integrator/shadow_catcher.h @@ -3,7 +3,6 @@ #pragma once -#include "kernel/film/write_passes.h" #include "kernel/integrator/path_state.h" #include "kernel/integrator/state_util.h" @@ -76,28 +75,6 @@ ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(const uint32_t return path_flag & PATH_RAY_SHADOW_CATCHER_PASS; } -/* Write shadow catcher passes on a bounce from the shadow catcher object. */ -ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data( - KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer) -{ - kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED); - kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); - - const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); - const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * - kernel_data.film.pass_stride; - ccl_global float *buffer = render_buffer + render_buffer_offset; - - /* Count sample for the shadow catcher object. */ - kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f); - - /* Since the split is done, the sample does not contribute to the matte, so accumulate it as - * transparency to the matte. */ - const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, - average(throughput)); -} - #endif /* __SHADOW_CATCHER__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/aov.h b/intern/cycles/kernel/svm/aov.h index 9b818f0e6f8..c574b28c078 100644 --- a/intern/cycles/kernel/svm/aov.h +++ b/intern/cycles/kernel/svm/aov.h @@ -3,7 +3,7 @@ #pragma once -#include "kernel/film/write_passes.h" +#include "kernel/film/aov_passes.h" CCL_NAMESPACE_BEGIN @@ -27,12 +27,7 @@ ccl_device void svm_node_aov_color(KernelGlobals kg, IF_KERNEL_NODES_FEATURE(AOV) { const float3 val = stack_load_float3(stack, node.y); - const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); - const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * - kernel_data.film.pass_stride; - ccl_global float *buffer = render_buffer + render_buffer_offset + - (kernel_data.film.pass_aov_color + node.z); - kernel_write_pass_float4(buffer, make_float4(val.x, val.y, val.z, 1.0f)); + film_write_aov_pass_color(kg, state, render_buffer, node.z, val); } } @@ -47,12 +42,7 @@ ccl_device void svm_node_aov_value(KernelGlobals kg, IF_KERNEL_NODES_FEATURE(AOV) { const float val = stack_load_float(stack, node.y); - const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); - const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * - kernel_data.film.pass_stride; - ccl_global float *buffer = render_buffer + render_buffer_offset + - (kernel_data.film.pass_aov_value + node.z); - kernel_write_pass_float(buffer, val); + film_write_aov_pass_value(kg, state, render_buffer, node.z, val); } } CCL_NAMESPACE_END |