diff options
Diffstat (limited to 'intern/cycles/kernel/integrator')
27 files changed, 2755 insertions, 1492 deletions
diff --git a/intern/cycles/kernel/integrator/displacement_shader.h b/intern/cycles/kernel/integrator/displacement_shader.h new file mode 100644 index 00000000000..839dfe244ac --- /dev/null +++ b/intern/cycles/kernel/integrator/displacement_shader.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +/* Functions to evaluate displacement shader. */ + +#pragma once + +#ifdef __SVM__ +# include "kernel/svm/svm.h" +#endif +#ifdef __OSL__ +# include "kernel/osl/osl.h" +#endif + +CCL_NAMESPACE_BEGIN + +template<typename ConstIntegratorGenericState> +ccl_device void displacement_shader_eval(KernelGlobals kg, + ConstIntegratorGenericState state, + ccl_private ShaderData *sd) +{ + sd->num_closure = 0; + sd->num_closure_left = 0; + + /* this will modify sd->P */ +#ifdef __OSL__ + if (kg->osl) { + OSLShader::eval_displacement(kg, state, sd); + } + else +#endif + { +#ifdef __SVM__ + svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_DISPLACEMENT, SHADER_TYPE_DISPLACEMENT>( + kg, state, sd, NULL, 0); +#endif + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/guiding.h b/intern/cycles/kernel/integrator/guiding.h new file mode 100644 index 00000000000..634bba2a9b4 --- /dev/null +++ b/intern/cycles/kernel/integrator/guiding.h @@ -0,0 +1,547 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#pragma once + +#include "kernel/closure/alloc.h" +#include "kernel/closure/bsdf.h" +#include "kernel/film/write.h" + +CCL_NAMESPACE_BEGIN + +/* Utilities. */ + +#if defined(__PATH_GUIDING__) +static pgl_vec3f guiding_vec3f(const float3 v) +{ + return openpgl::cpp::Vector3(v.x, v.y, v.z); +} + +static pgl_point3f guiding_point3f(const float3 v) +{ + return openpgl::cpp::Point3(v.x, v.y, v.z); +} +#endif + +/* Path recording for guiding. */ + +/* Record Surface Interactions */ + +/* Records/Adds a new path segment with the current path vertex on a surface. + * If the path is not terminated this call is usually followed by a call of + * guiding_record_surface_bounce. */ +ccl_device_forceinline void guiding_record_surface_segment(KernelGlobals kg, + IntegratorState state, + ccl_private const ShaderData *sd) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 + if (!kernel_data.integrator.train_guiding) { + return; + } + + const pgl_vec3f zero = guiding_vec3f(zero_float3()); + const pgl_vec3f one = guiding_vec3f(one_float3()); + + state->guiding.path_segment = kg->opgl_path_segment_storage->NextSegment(); + openpgl::cpp::SetPosition(state->guiding.path_segment, guiding_point3f(sd->P)); + openpgl::cpp::SetDirectionOut(state->guiding.path_segment, guiding_vec3f(sd->I)); + openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, false); + openpgl::cpp::SetScatteredContribution(state->guiding.path_segment, zero); + openpgl::cpp::SetDirectContribution(state->guiding.path_segment, zero); + openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, one); + openpgl::cpp::SetEta(state->guiding.path_segment, 1.0); +#endif +} + +/* Records the surface scattering event at the current vertex position of the segment.*/ +ccl_device_forceinline void guiding_record_surface_bounce(KernelGlobals kg, + IntegratorState state, + ccl_private const ShaderData *sd, + const Spectrum weight, + const float pdf, + const float3 N, + const float3 omega_in, + const float2 roughness, + const float eta) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4 + if (!kernel_data.integrator.train_guiding) { + return; + } + const float min_roughness = safe_sqrtf(fminf(roughness.x, roughness.y)); + const bool is_delta = (min_roughness == 0.0f); + const float3 weight_rgb = spectrum_to_rgb(weight); + const float3 normal = clamp(N, -one_float3(), one_float3()); + + kernel_assert(state->guiding.path_segment != nullptr); + + openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, guiding_vec3f(one_float3())); + openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, false); + openpgl::cpp::SetNormal(state->guiding.path_segment, guiding_vec3f(normal)); + openpgl::cpp::SetDirectionIn(state->guiding.path_segment, guiding_vec3f(omega_in)); + openpgl::cpp::SetPDFDirectionIn(state->guiding.path_segment, pdf); + openpgl::cpp::SetScatteringWeight(state->guiding.path_segment, guiding_vec3f(weight_rgb)); + openpgl::cpp::SetIsDelta(state->guiding.path_segment, is_delta); + openpgl::cpp::SetEta(state->guiding.path_segment, eta); + openpgl::cpp::SetRoughness(state->guiding.path_segment, min_roughness); +#endif +} + +/* Records the emission at the current surface intersection (physical or virtual) */ +ccl_device_forceinline void guiding_record_surface_emission(KernelGlobals kg, + IntegratorState state, + const Spectrum Le, + const float mis_weight) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 + if (!kernel_data.integrator.train_guiding) { + return; + } + const float3 Le_rgb = spectrum_to_rgb(Le); + + openpgl::cpp::SetDirectContribution(state->guiding.path_segment, guiding_vec3f(Le_rgb)); + openpgl::cpp::SetMiWeight(state->guiding.path_segment, mis_weight); +#endif +} + +/* Record BSSRDF Interactions */ + +/* Records/Adds a new path segment where the vertex position is the point of entry + * of the sub surface scattering boundary. + * If the path is not terminated this call is usually followed by a call of + * guiding_record_bssrdf_weight and guiding_record_bssrdf_bounce. */ +ccl_device_forceinline void guiding_record_bssrdf_segment(KernelGlobals kg, + IntegratorState state, + const float3 P, + const float3 I) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 + if (!kernel_data.integrator.train_guiding) { + return; + } + const pgl_vec3f zero = guiding_vec3f(zero_float3()); + const pgl_vec3f one = guiding_vec3f(one_float3()); + + state->guiding.path_segment = kg->opgl_path_segment_storage->NextSegment(); + openpgl::cpp::SetPosition(state->guiding.path_segment, guiding_point3f(P)); + openpgl::cpp::SetDirectionOut(state->guiding.path_segment, guiding_vec3f(I)); + openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, true); + openpgl::cpp::SetScatteredContribution(state->guiding.path_segment, zero); + openpgl::cpp::SetDirectContribution(state->guiding.path_segment, zero); + openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, one); + openpgl::cpp::SetEta(state->guiding.path_segment, 1.0); +#endif +} + +/* Records the transmission of the path at the point of entry while passing + * the surface boundary.*/ +ccl_device_forceinline void guiding_record_bssrdf_weight(KernelGlobals kg, + IntegratorState state, + const Spectrum weight, + const Spectrum albedo) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 + if (!kernel_data.integrator.train_guiding) { + return; + } + + /* Note albedo left out here, will be included in guiding_record_bssrdf_bounce. */ + const float3 weight_rgb = spectrum_to_rgb(safe_divide_color(weight, albedo)); + + kernel_assert(state->guiding.path_segment != nullptr); + + openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, guiding_vec3f(zero_float3())); + openpgl::cpp::SetScatteringWeight(state->guiding.path_segment, guiding_vec3f(weight_rgb)); + openpgl::cpp::SetIsDelta(state->guiding.path_segment, false); + openpgl::cpp::SetEta(state->guiding.path_segment, 1.0f); + openpgl::cpp::SetRoughness(state->guiding.path_segment, 1.0f); +#endif +} + +/* Records the direction at the point of entry the path takes when sampling the SSS contribution. + * If not terminated this function is usually followed by a call of + * guiding_record_volume_transmission to record the transmittance between the point of entry and + * the point of exit.*/ +ccl_device_forceinline void guiding_record_bssrdf_bounce(KernelGlobals kg, + IntegratorState state, + const float pdf, + const float3 N, + const float3 omega_in, + const Spectrum weight, + const Spectrum albedo) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 + if (!kernel_data.integrator.train_guiding) { + return; + } + const float3 normal = clamp(N, -one_float3(), one_float3()); + const float3 weight_rgb = spectrum_to_rgb(weight * albedo); + + kernel_assert(state->guiding.path_segment != nullptr); + + openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, false); + openpgl::cpp::SetNormal(state->guiding.path_segment, guiding_vec3f(normal)); + openpgl::cpp::SetDirectionIn(state->guiding.path_segment, guiding_vec3f(omega_in)); + openpgl::cpp::SetPDFDirectionIn(state->guiding.path_segment, pdf); + openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, guiding_vec3f(weight_rgb)); +#endif +} + +/* Record Volume Interactions */ + +/* Records/Adds a new path segment with the current path vertex being inside a volume. + * If the path is not terminated this call is usually followed by a call of + * guiding_record_volume_bounce. */ +ccl_device_forceinline void guiding_record_volume_segment(KernelGlobals kg, + IntegratorState state, + const float3 P, + const float3 I) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 + if (!kernel_data.integrator.train_guiding) { + return; + } + const pgl_vec3f zero = guiding_vec3f(zero_float3()); + const pgl_vec3f one = guiding_vec3f(one_float3()); + + state->guiding.path_segment = kg->opgl_path_segment_storage->NextSegment(); + + openpgl::cpp::SetPosition(state->guiding.path_segment, guiding_point3f(P)); + openpgl::cpp::SetDirectionOut(state->guiding.path_segment, guiding_vec3f(I)); + openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, true); + openpgl::cpp::SetScatteredContribution(state->guiding.path_segment, zero); + openpgl::cpp::SetDirectContribution(state->guiding.path_segment, zero); + openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, one); + openpgl::cpp::SetEta(state->guiding.path_segment, 1.0); +#endif +} + +/* Records the volume scattering event at the current vertex position of the segment.*/ +ccl_device_forceinline void guiding_record_volume_bounce(KernelGlobals kg, + IntegratorState state, + ccl_private const ShaderData *sd, + const Spectrum weight, + const float pdf, + const float3 omega_in, + const float roughness) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4 + if (!kernel_data.integrator.train_guiding) { + return; + } + const float3 weight_rgb = spectrum_to_rgb(weight); + const float3 normal = make_float3(0.0f, 0.0f, 1.0f); + + kernel_assert(state->guiding.path_segment != nullptr); + + openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, true); + openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, guiding_vec3f(one_float3())); + openpgl::cpp::SetNormal(state->guiding.path_segment, guiding_vec3f(normal)); + openpgl::cpp::SetDirectionIn(state->guiding.path_segment, guiding_vec3f(omega_in)); + openpgl::cpp::SetPDFDirectionIn(state->guiding.path_segment, pdf); + openpgl::cpp::SetScatteringWeight(state->guiding.path_segment, guiding_vec3f(weight_rgb)); + openpgl::cpp::SetIsDelta(state->guiding.path_segment, false); + openpgl::cpp::SetEta(state->guiding.path_segment, 1.f); + openpgl::cpp::SetRoughness(state->guiding.path_segment, roughness); +#endif +} + +/* Records the transmission (a.k.a. transmittance weight) between the current path segment + * and the next one, when the path is inside or passes a volume.*/ +ccl_device_forceinline void guiding_record_volume_transmission(KernelGlobals kg, + IntegratorState state, + const float3 transmittance_weight) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 + if (!kernel_data.integrator.train_guiding) { + return; + } + + if (state->guiding.path_segment) { + // TODO (sherholz): need to find a better way to avoid this check + if ((transmittance_weight[0] < 0.f || !std::isfinite(transmittance_weight[0]) || + std::isnan(transmittance_weight[0])) || + (transmittance_weight[1] < 0.f || !std::isfinite(transmittance_weight[1]) || + std::isnan(transmittance_weight[1])) || + (transmittance_weight[2] < 0.f || !std::isfinite(transmittance_weight[2]) || + std::isnan(transmittance_weight[2]))) { + } + else { + openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, + guiding_vec3f(transmittance_weight)); + } + } +#endif +} + +/* Records the emission of a volume at the vertex of the current path segment. */ +ccl_device_forceinline void guiding_record_volume_emission(KernelGlobals kg, + IntegratorState state, + const Spectrum Le) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 + if (!kernel_data.integrator.train_guiding) { + return; + } + + if (state->guiding.path_segment) { + const float3 Le_rgb = spectrum_to_rgb(Le); + + openpgl::cpp::SetDirectContribution(state->guiding.path_segment, guiding_vec3f(Le_rgb)); + openpgl::cpp::SetMiWeight(state->guiding.path_segment, 1.0f); + } +#endif +} + +/* Record Light Interactions */ + +/* Adds a pseudo path vertex/segment when intersecting a virtual light source. + * (e.g., area, sphere, or disk light). This call is often followed + * a call of guiding_record_surface_emission, if the intersected light source + * emits light in the direction of the path. */ +ccl_device_forceinline void guiding_record_light_surface_segment( + KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 + if (!kernel_data.integrator.train_guiding) { + return; + } + const pgl_vec3f zero = guiding_vec3f(zero_float3()); + const pgl_vec3f one = guiding_vec3f(one_float3()); + const float3 ray_P = INTEGRATOR_STATE(state, ray, P); + const float3 ray_D = INTEGRATOR_STATE(state, ray, D); + const float3 P = ray_P + isect->t * ray_D; + + state->guiding.path_segment = kg->opgl_path_segment_storage->NextSegment(); + openpgl::cpp::SetPosition(state->guiding.path_segment, guiding_point3f(P)); + openpgl::cpp::SetDirectionOut(state->guiding.path_segment, guiding_vec3f(-ray_D)); + openpgl::cpp::SetNormal(state->guiding.path_segment, guiding_vec3f(-ray_D)); + openpgl::cpp::SetDirectionIn(state->guiding.path_segment, guiding_vec3f(ray_D)); + openpgl::cpp::SetPDFDirectionIn(state->guiding.path_segment, 1.0f); + openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, false); + openpgl::cpp::SetScatteredContribution(state->guiding.path_segment, zero); + openpgl::cpp::SetDirectContribution(state->guiding.path_segment, zero); + openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, one); + openpgl::cpp::SetScatteringWeight(state->guiding.path_segment, one); + openpgl::cpp::SetEta(state->guiding.path_segment, 1.0f); +#endif +} + +/* Records/Adds a final path segment when the path leaves the scene and + * intersects with a background light (e.g., background color, + * distant light, or env map). The vertex for this segment is placed along + * the current ray far out the scene.*/ +ccl_device_forceinline void guiding_record_background(KernelGlobals kg, + IntegratorState state, + const Spectrum L, + const float mis_weight) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 + if (!kernel_data.integrator.train_guiding) { + return; + } + + const float3 L_rgb = spectrum_to_rgb(L); + const float3 ray_P = INTEGRATOR_STATE(state, ray, P); + const float3 ray_D = INTEGRATOR_STATE(state, ray, D); + const float3 P = ray_P + (1e6f) * ray_D; + const float3 normal = make_float3(0.0f, 0.0f, 1.0f); + + openpgl::cpp::PathSegment background_segment; + openpgl::cpp::SetPosition(&background_segment, guiding_vec3f(P)); + openpgl::cpp::SetNormal(&background_segment, guiding_vec3f(normal)); + openpgl::cpp::SetDirectionOut(&background_segment, guiding_vec3f(-ray_D)); + openpgl::cpp::SetDirectContribution(&background_segment, guiding_vec3f(L_rgb)); + openpgl::cpp::SetMiWeight(&background_segment, mis_weight); + kg->opgl_path_segment_storage->AddSegment(background_segment); +#endif +} + +/* Records the scattered contribution of a next event estimation + * (i.e., a direct light estimate scattered at the current path vertex + * towards the previous vertex).*/ +ccl_device_forceinline void guiding_record_direct_light(KernelGlobals kg, + IntegratorShadowState state) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 + if (!kernel_data.integrator.train_guiding) { + return; + } + if (state->shadow_path.path_segment) { + const Spectrum Lo = safe_divide_color(INTEGRATOR_STATE(state, shadow_path, throughput), + INTEGRATOR_STATE(state, shadow_path, unlit_throughput)); + + const float3 Lo_rgb = spectrum_to_rgb(Lo); + openpgl::cpp::AddScatteredContribution(state->shadow_path.path_segment, guiding_vec3f(Lo_rgb)); + } +#endif +} + +/* Record Russian Roulette */ +/* Records the probability of continuing the path at the current path segment. */ +ccl_device_forceinline void guiding_record_continuation_probability( + KernelGlobals kg, IntegratorState state, const float continuation_probability) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 + if (!kernel_data.integrator.train_guiding) { + return; + } + + if (state->guiding.path_segment) { + openpgl::cpp::SetRussianRouletteProbability(state->guiding.path_segment, + continuation_probability); + } +#endif +} + +/* Path guiding debug render passes. */ + +/* Write a set of path guiding related debug information (e.g., guiding probability at first + * bounce) into separate rendering passes.*/ +ccl_device_forceinline void guiding_write_debug_passes(KernelGlobals kg, + IntegratorState state, + ccl_private const ShaderData *sd, + ccl_global float *ccl_restrict + render_buffer) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4 +# ifdef WITH_CYCLES_DEBUG + if (!kernel_data.integrator.train_guiding) { + return; + } + + if (INTEGRATOR_STATE(state, path, bounce) != 0) { + return; + } + + const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); + const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * + kernel_data.film.pass_stride; + ccl_global float *buffer = render_buffer + render_buffer_offset; + + if (kernel_data.film.pass_guiding_probability != PASS_UNUSED) { + float guiding_prob = state->guiding.surface_guiding_sampling_prob; + film_write_pass_float(buffer + kernel_data.film.pass_guiding_probability, guiding_prob); + } + + if (kernel_data.film.pass_guiding_avg_roughness != PASS_UNUSED) { + float avg_roughness = 0.0f; + float sum_sample_weight = 0.0f; + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + continue; + } + avg_roughness += sc->sample_weight * bsdf_get_specular_roughness_squared(sc); + sum_sample_weight += sc->sample_weight; + } + + avg_roughness = avg_roughness > 0.f ? avg_roughness / sum_sample_weight : 0.f; + + film_write_pass_float(buffer + kernel_data.film.pass_guiding_avg_roughness, avg_roughness); + } +# endif +#endif +} + +/* Guided BSDFs */ + +ccl_device_forceinline bool guiding_bsdf_init(KernelGlobals kg, + IntegratorState state, + const float3 P, + const float3 N, + ccl_private float &rand) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4 + if (kg->opgl_surface_sampling_distribution->Init( + kg->opgl_guiding_field, guiding_point3f(P), rand, true)) { + kg->opgl_surface_sampling_distribution->ApplyCosineProduct(guiding_point3f(N)); + return true; + } +#endif + + return false; +} + +ccl_device_forceinline float guiding_bsdf_sample(KernelGlobals kg, + IntegratorState state, + const float2 rand_bsdf, + ccl_private float3 *omega_in) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4 + pgl_vec3f wo; + const pgl_point2f rand = openpgl::cpp::Point2(rand_bsdf.x, rand_bsdf.y); + const float pdf = kg->opgl_surface_sampling_distribution->SamplePDF(rand, wo); + *omega_in = make_float3(wo.x, wo.y, wo.z); + return pdf; +#else + return 0.0f; +#endif +} + +ccl_device_forceinline float guiding_bsdf_pdf(KernelGlobals kg, + IntegratorState state, + const float3 omega_in) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4 + return kg->opgl_surface_sampling_distribution->PDF(guiding_vec3f(omega_in)); +#else + return 0.0f; +#endif +} + +/* Guided Volume Phases */ + +ccl_device_forceinline bool guiding_phase_init(KernelGlobals kg, + IntegratorState state, + const float3 P, + const float3 D, + const float g, + ccl_private float &rand) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4 + /* we do not need to guide almost delta phase functions */ + if (fabsf(g) >= 0.99f) { + return false; + } + + if (kg->opgl_volume_sampling_distribution->Init( + kg->opgl_guiding_field, guiding_point3f(P), rand, true)) { + kg->opgl_volume_sampling_distribution->ApplySingleLobeHenyeyGreensteinProduct(guiding_vec3f(D), + g); + return true; + } +#endif + + return false; +} + +ccl_device_forceinline float guiding_phase_sample(KernelGlobals kg, + IntegratorState state, + const float2 rand_phase, + ccl_private float3 *omega_in) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4 + pgl_vec3f wo; + const pgl_point2f rand = openpgl::cpp::Point2(rand_phase.x, rand_phase.y); + const float pdf = kg->opgl_volume_sampling_distribution->SamplePDF(rand, wo); + *omega_in = make_float3(wo.x, wo.y, wo.z); + return pdf; +#else + return 0.0f; +#endif +} + +ccl_device_forceinline float guiding_phase_pdf(KernelGlobals kg, + IntegratorState state, + const float3 omega_in) +{ +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4 + return kg->opgl_volume_sampling_distribution->PDF(guiding_vec3f(omega_in)); +#else + return 0.0f; +#endif +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h index bf3f41b52b9..667ba949760 100644 --- a/intern/cycles/kernel/integrator/init_from_bake.h +++ b/intern/cycles/kernel/integrator/init_from_bake.h @@ -5,8 +5,8 @@ #include "kernel/camera/camera.h" -#include "kernel/film/accumulate.h" #include "kernel/film/adaptive_sampling.h" +#include "kernel/film/light_passes.h" #include "kernel/integrator/path_state.h" @@ -92,12 +92,12 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, path_state_init(state, tile, x, y); /* Check whether the pixel has converged and should not be sampled anymore. */ - if (!kernel_need_sample_pixel(kg, state, render_buffer)) { + if (!film_need_sample_pixel(kg, state, render_buffer)) { return false; } /* Always count the sample, even if the camera sample will reject the ray. */ - const int sample = kernel_accum_sample( + const int sample = film_write_sample( kg, state, render_buffer, scheduled_sample, tile->sample_offset); /* Setup render buffers. */ @@ -112,8 +112,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, int prim = __float_as_uint(primitive[1]); if (prim == -1) { /* Accumulate transparency for empty pixels. */ - kernel_accum_transparent(kg, state, 0, 1.0f, buffer); - return false; + film_write_transparent(kg, state, 0, 1.0f, buffer); + return true; } prim += kernel_data.bake.tri_offset; @@ -121,13 +121,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, /* Random number generator. */ const uint rng_hash = hash_uint(seed) ^ kernel_data.integrator.seed; - float filter_x, filter_y; - if (sample == 0) { - filter_x = filter_y = 0.5f; - } - else { - path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_x, &filter_y); - } + const float2 rand_filter = (sample == 0) ? make_float2(0.5f, 0.5f) : + path_rng_2D(kg, rng_hash, sample, PRNG_FILTER); /* Initialize path state for path integration. */ path_state_init_integrator(kg, state, sample, rng_hash); @@ -150,11 +145,17 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, /* Sub-pixel offset. */ if (sample > 0) { - u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f); - v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f), + u = bake_clamp_mirror_repeat(u + dudx * (rand_filter.x - 0.5f) + dudy * (rand_filter.y - 0.5f), + 1.0f); + v = bake_clamp_mirror_repeat(v + dvdx * (rand_filter.x - 0.5f) + dvdy * (rand_filter.y - 0.5f), 1.0f - u); } + /* Convert from Blender to Cycles/Embree/OptiX barycentric convention. */ + const float tmp = u; + u = v; + v = 1.0f - tmp - v; + /* Position and normal on triangle. */ const int object = kernel_data.bake.object_index; float3 P, Ng; @@ -199,18 +200,61 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, /* Fast path for position and normal passes not affected by shaders. */ if (kernel_data.film.pass_position != PASS_UNUSED) { - kernel_write_pass_float3(buffer + kernel_data.film.pass_position, P); + film_write_pass_float3(buffer + kernel_data.film.pass_position, P); return true; } else if (kernel_data.film.pass_normal != PASS_UNUSED && !(shader_flags & SD_HAS_BUMP)) { - kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, N); + film_write_pass_float3(buffer + kernel_data.film.pass_normal, N); return true; } /* Setup ray. */ Ray ray ccl_optional_struct_init; - ray.P = P + N; - ray.D = -N; + + if (kernel_data.bake.use_camera) { + float3 D = camera_direction_from_point(kg, P); + + const float DN = dot(D, N); + + /* Nudge camera direction, so that the faces facing away from the camera still have + * somewhat usable shading. (Otherwise, glossy faces would be simply black.) + * + * The surface normal offset affects smooth surfaces. Lower values will make + * smooth surfaces more faceted, but higher values may show up from the camera + * at grazing angles. + * + * This value can actually be pretty high before it's noticeably wrong. */ + const float surface_normal_offset = 0.2f; + + /* Keep the ray direction at least `surface_normal_offset` "above" the smooth normal. */ + if (DN <= surface_normal_offset) { + D -= N * (DN - surface_normal_offset); + D = normalize(D); + } + + /* On the backside, just lerp towards the surface normal for the ray direction, + * as DN goes from 0.0 to -1.0. */ + if (DN <= 0.0f) { + D = normalize(mix(D, N, -DN)); + } + + /* We don't want to bake the back face, so make sure the ray direction never + * goes behind the geometry (flat) normal. This is a fail-safe, and should rarely happen. */ + const float true_normal_epsilon = 0.00001f; + + if (dot(D, Ng) <= true_normal_epsilon) { + D -= Ng * (dot(D, Ng) - true_normal_epsilon); + D = normalize(D); + } + + ray.P = P + D; + ray.D = -D; + } + else { + ray.P = P + N; + ray.D = -N; + } + ray.tmin = 0.0f; ray.tmax = FLT_MAX; ray.time = 0.5f; diff --git a/intern/cycles/kernel/integrator/init_from_camera.h b/intern/cycles/kernel/integrator/init_from_camera.h index e89ab3991c7..8df3e1b9fb3 100644 --- a/intern/cycles/kernel/integrator/init_from_camera.h +++ b/intern/cycles/kernel/integrator/init_from_camera.h @@ -5,8 +5,8 @@ #include "kernel/camera/camera.h" -#include "kernel/film/accumulate.h" #include "kernel/film/adaptive_sampling.h" +#include "kernel/film/light_passes.h" #include "kernel/integrator/path_state.h" #include "kernel/integrator/shadow_catcher.h" @@ -23,31 +23,21 @@ ccl_device_inline void integrate_camera_sample(KernelGlobals kg, ccl_private Ray *ray) { /* Filter sampling. */ - float filter_u, filter_v; - - if (sample == 0) { - filter_u = 0.5f; - filter_v = 0.5f; - } - else { - path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_u, &filter_v); - } + const float2 rand_filter = (sample == 0) ? make_float2(0.5f, 0.5f) : + path_rng_2D(kg, rng_hash, sample, PRNG_FILTER); /* Depth of field sampling. */ - float lens_u = 0.0f, lens_v = 0.0f; - if (kernel_data.cam.aperturesize > 0.0f) { - path_rng_2D(kg, rng_hash, sample, PRNG_LENS_U, &lens_u, &lens_v); - } + const float2 rand_lens = (kernel_data.cam.aperturesize > 0.0f) ? + path_rng_2D(kg, rng_hash, sample, PRNG_LENS) : + zero_float2(); /* Motion blur time sampling. */ - float time = 0.0f; -#ifdef __CAMERA_MOTION__ - if (kernel_data.cam.shuttertime != -1.0f) - time = path_rng_1D(kg, rng_hash, sample, PRNG_TIME); -#endif + const float rand_time = (kernel_data.cam.shuttertime != -1.0f) ? + path_rng_1D(kg, rng_hash, sample, PRNG_TIME) : + 0.0f; /* Generate camera ray. */ - camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray); + camera_sample(kg, x, y, rand_filter.x, rand_filter.y, rand_lens.x, rand_lens.y, rand_time, ray); } /* Return false to indicate that this pixel is finished. @@ -67,7 +57,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg, path_state_init(state, tile, x, y); /* Check whether the pixel has converged and should not be sampled anymore. */ - if (!kernel_need_sample_pixel(kg, state, render_buffer)) { + if (!film_need_sample_pixel(kg, state, render_buffer)) { return false; } @@ -76,7 +66,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg, * This logic allows to both count actual number of samples per pixel, and to add samples to this * pixel after it was converged and samples were added somewhere else (in which case the * `scheduled_sample` will be different from actual number of samples in this pixel). */ - const int sample = kernel_accum_sample( + const int sample = film_write_sample( kg, state, render_buffer, scheduled_sample, tile->sample_offset); /* Initialize random number seed for path. */ diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h index 60299f2cb2f..b9a81e25bcc 100644 --- a/intern/cycles/kernel/integrator/intersect_closest.h +++ b/intern/cycles/kernel/integrator/intersect_closest.h @@ -5,13 +5,14 @@ #include "kernel/camera/projection.h" +#include "kernel/film/light_passes.h" + +#include "kernel/integrator/guiding.h" #include "kernel/integrator/path_state.h" #include "kernel/integrator/shadow_catcher.h" #include "kernel/light/light.h" -#include "kernel/util/differential.h" - #include "kernel/geom/geom.h" #include "kernel/bvh/bvh.h" @@ -48,13 +49,15 @@ ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg, * surfaces in front of emission do we need to evaluate the shader, since we * perform MIS as part of indirect rays. */ const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); - const float probability = path_state_continuation_probability(kg, state, path_flag); - INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = probability; + const float continuation_probability = path_state_continuation_probability(kg, state, path_flag); + INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = continuation_probability; + + guiding_record_continuation_probability(kg, state, continuation_probability); - if (probability != 1.0f) { + if (continuation_probability != 1.0f) { const float terminate = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE); - if (probability == 0.0f || terminate >= probability) { + if (continuation_probability == 0.0f || terminate >= continuation_probability) { if (shader_flags & SD_HAS_EMISSION) { /* Mark path to be terminated right after shader evaluation on the surface. */ INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_ON_NEXT_SURFACE; @@ -87,7 +90,7 @@ ccl_device_forceinline void integrator_split_shadow_catcher( return; } - kernel_write_shadow_catcher_bounce_data(kg, state, render_buffer); + film_write_shadow_catcher_bounce_data(kg, state, render_buffer); /* Mark state as having done a shadow catcher split so that it stops contributing to * the shadow catcher matte pass, but keeps contributing to the combined pass. */ diff --git a/intern/cycles/kernel/integrator/intersect_shadow.h b/intern/cycles/kernel/integrator/intersect_shadow.h index 1b48b360858..25ff3d5b23f 100644 --- a/intern/cycles/kernel/integrator/intersect_shadow.h +++ b/intern/cycles/kernel/integrator/intersect_shadow.h @@ -51,7 +51,7 @@ ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals k } #ifdef __TRANSPARENT_SHADOWS__ -# if defined(__KERNEL_CPU__) +# ifndef __KERNEL_GPU__ ccl_device int shadow_intersections_compare(const void *a, const void *b) { const Intersection *isect_a = (const Intersection *)a; diff --git a/intern/cycles/kernel/integrator/intersect_volume_stack.h b/intern/cycles/kernel/integrator/intersect_volume_stack.h index 9ba4a0a3964..c2490581e4d 100644 --- a/intern/cycles/kernel/integrator/intersect_volume_stack.h +++ b/intern/cycles/kernel/integrator/intersect_volume_stack.h @@ -5,7 +5,6 @@ #include "kernel/bvh/bvh.h" #include "kernel/geom/geom.h" -#include "kernel/integrator/shader_eval.h" #include "kernel/integrator/volume_stack.h" CCL_NAMESPACE_BEGIN @@ -38,8 +37,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, #ifdef __VOLUME_RECORD_ALL__ Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1]; - uint num_hits = scene_intersect_volume_all( - kg, &volume_ray, hits, 2 * volume_stack_size, visibility); + uint num_hits = scene_intersect_volume(kg, &volume_ray, hits, 2 * volume_stack_size, visibility); if (num_hits > 0) { Intersection *isect = hits; @@ -108,8 +106,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s #ifdef __VOLUME_RECORD_ALL__ Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1]; - uint num_hits = scene_intersect_volume_all( - kg, &volume_ray, hits, 2 * volume_stack_size, visibility); + uint num_hits = scene_intersect_volume(kg, &volume_ray, hits, 2 * volume_stack_size, visibility); if (num_hits > 0) { int enclosed_volumes[MAX_VOLUME_STACK_SIZE]; Intersection *isect = hits; diff --git a/intern/cycles/kernel/integrator/mnee.h b/intern/cycles/kernel/integrator/mnee.h index f5d2bcfe9f2..142977f1ac7 100644 --- a/intern/cycles/kernel/integrator/mnee.h +++ b/intern/cycles/kernel/integrator/mnee.h @@ -186,7 +186,7 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg, triangle_vertices_and_normals(kg, sd_vtx->prim, verts, normals); /* Compute refined position (same code as in triangle_point_from_uv). */ - sd_vtx->P = isect->u * verts[0] + isect->v * verts[1] + (1.f - isect->u - isect->v) * verts[2]; + sd_vtx->P = (1.f - isect->u - isect->v) * verts[0] + isect->u * verts[1] + isect->v * verts[2]; if (!(sd_vtx->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd_vtx); sd_vtx->P = transform_point(&tfm, sd_vtx->P); @@ -213,8 +213,8 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg, } /* Tangent space (position derivatives) WRT barycentric (u, v). */ - float3 dp_du = verts[0] - verts[2]; - float3 dp_dv = verts[1] - verts[2]; + float3 dp_du = verts[1] - verts[0]; + float3 dp_dv = verts[2] - verts[0]; /* Geometric normal. */ vtx->ng = normalize(cross(dp_du, dp_dv)); @@ -223,16 +223,16 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg, /* Shading normals: Interpolate normals between vertices. */ float n_len; - vtx->n = normalize_len(normals[0] * sd_vtx->u + normals[1] * sd_vtx->v + - normals[2] * (1.0f - sd_vtx->u - sd_vtx->v), + vtx->n = normalize_len(normals[0] * (1.0f - sd_vtx->u - sd_vtx->v) + normals[1] * sd_vtx->u + + normals[2] * sd_vtx->v, &n_len); /* Shading normal derivatives WRT barycentric (u, v) * we calculate the derivative of n = |u*n0 + v*n1 + (1-u-v)*n2| using: * d/du [f(u)/|f(u)|] = [d/du f(u)]/|f(u)| - f(u)/|f(u)|^3 <f(u), d/du f(u)>. */ const float inv_n_len = 1.f / n_len; - float3 dn_du = inv_n_len * (normals[0] - normals[2]); - float3 dn_dv = inv_n_len * (normals[1] - normals[2]); + float3 dn_du = inv_n_len * (normals[1] - normals[0]); + float3 dn_dv = inv_n_len * (normals[2] - normals[0]); dn_du -= vtx->n * dot(vtx->n, dn_du); dn_dv -= vtx->n * dot(vtx->n, dn_dv); @@ -279,7 +279,15 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg, } /* Compute constraint derivatives. */ -ccl_device_forceinline bool mnee_compute_constraint_derivatives( + +# if defined(__KERNEL_METAL__) +/* Temporary workaround for front-end compilation bug (incorrect MNEE rendering when this is + * inlined). */ +__attribute__((noinline)) +# else +ccl_device_forceinline +# endif +bool mnee_compute_constraint_derivatives( int vertex_count, ccl_private ManifoldVertex *vertices, ccl_private const float3 &surface_sample_pos, @@ -392,7 +400,7 @@ ccl_device_forceinline bool mnee_compute_constraint_derivatives( /* Invert (block) constraint derivative matrix and solve linear system so we can map dh back to dx: * dh / dx = A * dx = inverse(A) x dh - * to use for specular specular manifold walk + * to use for specular manifold walk * (See for example http://faculty.washington.edu/finlayso/ebook/algebraic/advanced/LUtri.htm * for block tridiagonal matrix based linear system solve) */ ccl_device_forceinline bool mnee_solve_matrix_h_to_x(int vertex_count, @@ -634,9 +642,9 @@ mnee_sample_bsdf_dh(ClosureType type, float alpha_x, float alpha_y, float sample * We assume here that the pdf (in half-vector measure) is the same as * the one calculation when sampling the microfacet normals from the * specular chain above: this allows us to simplify the bsdf weight */ -ccl_device_forceinline float3 mnee_eval_bsdf_contribution(ccl_private ShaderClosure *closure, - float3 wi, - float3 wo) +ccl_device_forceinline Spectrum mnee_eval_bsdf_contribution(ccl_private ShaderClosure *closure, + float3 wi, + float3 wo) { ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)closure; @@ -808,7 +816,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, float3 wo = normalize_len(vertices[0].p - sd->P, &wo_len); /* Initialize throughput and evaluate receiver bsdf * |n.wo|. */ - shader_bsdf_eval(kg, sd, wo, false, throughput, ls->shader); + surface_shader_bsdf_eval(kg, state, sd, wo, throughput, ls->shader); /* Update light sample with new position / direct.ion * and keep pdf in vertex area measure */ @@ -836,7 +844,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, 1; INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce + vertex_count; - float3 light_eval = light_sample_shader_eval(kg, state, sd_mnee, ls, sd->time); + Spectrum light_eval = light_sample_shader_eval(kg, state, sd_mnee, ls, sd->time); bsdf_eval_mul(throughput, light_eval / ls->pdf); /* Generalized geometry term. */ @@ -914,7 +922,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce + 1 + vi; /* Evaluate shader nodes at solution vi. */ - shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( + surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( kg, state, sd_mnee, NULL, PATH_RAY_DIFFUSE, true); /* Set light looking dir. */ @@ -925,7 +933,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, /* Evaluate product term inside eq.6 at solution interface. vi * divided by corresponding sampled pdf: * fr(vi)_do / pdf_dh(vi) x |do/dh| x |n.wo / n.h| */ - float3 bsdf_contribution = mnee_eval_bsdf_contribution(v.bsdf, wi, wo); + Spectrum bsdf_contribution = mnee_eval_bsdf_contribution(v.bsdf, wi, wo); bsdf_eval_mul(throughput, bsdf_contribution); } @@ -1007,7 +1015,7 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg, return 0; /* Last bool argument is the MNEE flag (for TINY_MAX_CLOSURE cap in kernel_shader.h). */ - shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( + surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( kg, state, sd_mnee, NULL, PATH_RAY_DIFFUSE, true); /* Get and sample refraction bsdf */ @@ -1034,10 +1042,12 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg, float2 h = zero_float2(); if (microfacet_bsdf->alpha_x > 0.f && microfacet_bsdf->alpha_y > 0.f) { /* Sample transmissive microfacet bsdf. */ - float bsdf_u, bsdf_v; - path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); - h = mnee_sample_bsdf_dh( - bsdf->type, microfacet_bsdf->alpha_x, microfacet_bsdf->alpha_y, bsdf_u, bsdf_v); + const float2 bsdf_uv = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF); + h = mnee_sample_bsdf_dh(bsdf->type, + microfacet_bsdf->alpha_x, + microfacet_bsdf->alpha_y, + bsdf_uv.x, + bsdf_uv.y); } /* Setup differential geometry on vertex. */ diff --git a/intern/cycles/kernel/integrator/path_state.h b/intern/cycles/kernel/integrator/path_state.h index 912c380cdb6..7197f0f2f3a 100644 --- a/intern/cycles/kernel/integrator/path_state.h +++ b/intern/cycles/kernel/integrator/path_state.h @@ -13,7 +13,7 @@ CCL_NAMESPACE_BEGIN ccl_device_inline void path_state_init_queues(IntegratorState state) { INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ INTEGRATOR_STATE_WRITE(&state->shadow, shadow_path, queued_kernel) = 0; INTEGRATOR_STATE_WRITE(&state->ao, shadow_path, queued_kernel) = 0; #endif @@ -48,13 +48,25 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, path, volume_bounce) = 0; INTEGRATOR_STATE_WRITE(state, path, volume_bounds_bounce) = 0; INTEGRATOR_STATE_WRITE(state, path, rng_hash) = rng_hash; - INTEGRATOR_STATE_WRITE(state, path, rng_offset) = PRNG_BASE_NUM; + INTEGRATOR_STATE_WRITE(state, path, rng_offset) = PRNG_BOUNCE_NUM; INTEGRATOR_STATE_WRITE(state, path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP | PATH_RAY_TRANSPARENT_BACKGROUND; INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = 0.0f; INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = FLT_MAX; INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = 1.0f; - INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f); + INTEGRATOR_STATE_WRITE(state, path, throughput) = one_spectrum(); + +#ifdef __PATH_GUIDING__ + INTEGRATOR_STATE_WRITE(state, path, unguided_throughput) = 1.0f; + INTEGRATOR_STATE_WRITE(state, guiding, path_segment) = nullptr; + INTEGRATOR_STATE_WRITE(state, guiding, use_surface_guiding) = false; + INTEGRATOR_STATE_WRITE(state, guiding, sample_surface_guiding_rand) = 0.5f; + INTEGRATOR_STATE_WRITE(state, guiding, surface_guiding_sampling_prob) = 0.0f; + INTEGRATOR_STATE_WRITE(state, guiding, bssrdf_sampling_prob) = 0.0f; + INTEGRATOR_STATE_WRITE(state, guiding, use_volume_guiding) = false; + INTEGRATOR_STATE_WRITE(state, guiding, sample_volume_guiding_rand) = 0.5f; + INTEGRATOR_STATE_WRITE(state, guiding, volume_guiding_sampling_prob) = 0.0f; +#endif #ifdef __MNEE__ INTEGRATOR_STATE_WRITE(state, path, mnee) = 0; @@ -74,7 +86,7 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg, #ifdef __DENOISING_FEATURES__ if (kernel_data.kernel_features & KERNEL_FEATURE_DENOISING) { INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_DENOISING_FEATURES; - INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) = one_float3(); + INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) = one_spectrum(); } #endif } @@ -249,7 +261,11 @@ ccl_device_inline float path_state_continuation_probability(KernelGlobals kg, /* Probabilistic termination: use sqrt() to roughly match typical view * transform and do path termination a bit later on average. */ - return min(sqrtf(reduce_max(fabs(INTEGRATOR_STATE(state, path, throughput)))), 1.0f); + Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4 + throughput *= INTEGRATOR_STATE(state, path, unguided_throughput); +#endif + return min(sqrtf(reduce_max(fabs(throughput))), 1.0f); } ccl_device_inline bool path_state_ao_bounce(KernelGlobals kg, ConstIntegratorState state) @@ -298,38 +314,25 @@ ccl_device_inline void shadow_path_state_rng_load(ConstIntegratorShadowState sta ccl_device_inline float path_state_rng_1D(KernelGlobals kg, ccl_private const RNGState *rng_state, - int dimension) + const int dimension) { return path_rng_1D( kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension); } -ccl_device_inline void path_state_rng_2D(KernelGlobals kg, - ccl_private const RNGState *rng_state, - int dimension, - ccl_private float *fx, - ccl_private float *fy) -{ - path_rng_2D( - kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension, fx, fy); -} - -ccl_device_inline float path_state_rng_1D_hash(KernelGlobals kg, - ccl_private const RNGState *rng_state, - uint hash) +ccl_device_inline float2 path_state_rng_2D(KernelGlobals kg, + ccl_private const RNGState *rng_state, + const int dimension) { - /* Use a hash instead of dimension, this is not great but avoids adding - * more dimensions to each bounce which reduces quality of dimensions we - * are already using. */ - return path_rng_1D( - kg, cmj_hash_simple(rng_state->rng_hash, hash), rng_state->sample, rng_state->rng_offset); + return path_rng_2D( + kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension); } ccl_device_inline float path_branched_rng_1D(KernelGlobals kg, ccl_private const RNGState *rng_state, - int branch, - int num_branches, - int dimension) + const int branch, + const int num_branches, + const int dimension) { return path_rng_1D(kg, rng_state->rng_hash, @@ -337,20 +340,16 @@ ccl_device_inline float path_branched_rng_1D(KernelGlobals kg, rng_state->rng_offset + dimension); } -ccl_device_inline void path_branched_rng_2D(KernelGlobals kg, - ccl_private const RNGState *rng_state, - int branch, - int num_branches, - int dimension, - ccl_private float *fx, - ccl_private float *fy) +ccl_device_inline float2 path_branched_rng_2D(KernelGlobals kg, + ccl_private const RNGState *rng_state, + const int branch, + const int num_branches, + const int dimension) { - path_rng_2D(kg, - rng_state->rng_hash, - rng_state->sample * num_branches + branch, - rng_state->rng_offset + dimension, - fx, - fy); + return path_rng_2D(kg, + rng_state->rng_hash, + rng_state->sample * num_branches + branch, + rng_state->rng_offset + dimension); } /* Utility functions to get light termination value, diff --git a/intern/cycles/kernel/integrator/shade_background.h b/intern/cycles/kernel/integrator/shade_background.h index a7edfffd175..8fc5689683a 100644 --- a/intern/cycles/kernel/integrator/shade_background.h +++ b/intern/cycles/kernel/integrator/shade_background.h @@ -3,18 +3,20 @@ #pragma once -#include "kernel/film/accumulate.h" -#include "kernel/integrator/shader_eval.h" +#include "kernel/film/light_passes.h" + +#include "kernel/integrator/guiding.h" +#include "kernel/integrator/surface_shader.h" + #include "kernel/light/light.h" #include "kernel/light/sample.h" CCL_NAMESPACE_BEGIN -ccl_device float3 integrator_eval_background_shader(KernelGlobals kg, - IntegratorState state, - ccl_global float *ccl_restrict render_buffer) +ccl_device Spectrum integrator_eval_background_shader(KernelGlobals kg, + IntegratorState state, + ccl_global float *ccl_restrict render_buffer) { -#ifdef __BACKGROUND__ const int shader = kernel_data.background.surface_shader; const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); @@ -26,55 +28,35 @@ ccl_device float3 integrator_eval_background_shader(KernelGlobals kg, ((shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) || ((shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA)) || ((shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER))) - return zero_float3(); + return zero_spectrum(); } /* Use fast constant background color if available. */ - float3 L = zero_float3(); - if (!shader_constant_emission_eval(kg, shader, &L)) { - /* Evaluate background shader. */ - - /* TODO: does aliasing like this break automatic SoA in CUDA? - * Should we instead store closures separate from ShaderData? */ - ShaderDataTinyStorage emission_sd_storage; - ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - - PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP); - shader_setup_from_background(kg, - emission_sd, - INTEGRATOR_STATE(state, ray, P), - INTEGRATOR_STATE(state, ray, D), - INTEGRATOR_STATE(state, ray, time)); - - PROFILING_SHADER(emission_sd->object, emission_sd->shader); - PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL); - shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_BACKGROUND>( - kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION); - - L = shader_background_eval(emission_sd); + Spectrum L = zero_spectrum(); + if (surface_shader_constant_emission(kg, shader, &L)) { + return L; } - /* Background MIS weights. */ -# ifdef __BACKGROUND_MIS__ - /* Check if background light exists or if we should skip pdf. */ - if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) && - kernel_data.background.use_mis) { - const float3 ray_P = INTEGRATOR_STATE(state, ray, P); - const float3 ray_D = INTEGRATOR_STATE(state, ray, D); - const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); - - /* multiple importance sampling, get background light pdf for ray - * direction, and compute weight with respect to BSDF pdf */ - const float pdf = background_light_pdf(kg, ray_P, ray_D); - const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf); - L *= mis_weight; - } -# endif + /* Evaluate background shader. */ - return L; -#else - return make_float3(0.8f, 0.8f, 0.8f); -#endif + /* TODO: does aliasing like this break automatic SoA in CUDA? + * Should we instead store closures separate from ShaderData? */ + ShaderDataTinyStorage emission_sd_storage; + ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); + + PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP); + shader_setup_from_background(kg, + emission_sd, + INTEGRATOR_STATE(state, ray, P), + INTEGRATOR_STATE(state, ray, D), + INTEGRATOR_STATE(state, ray, time)); + + PROFILING_SHADER(emission_sd->object, emission_sd->shader); + PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL); + surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_BACKGROUND>( + kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION); + + return surface_shader_background(emission_sd); } ccl_device_inline void integrate_background(KernelGlobals kg, @@ -117,17 +99,38 @@ ccl_device_inline void integrate_background(KernelGlobals kg, #endif /* __MNEE__ */ /* Evaluate background shader. */ - float3 L = (eval_background) ? integrator_eval_background_shader(kg, state, render_buffer) : - zero_float3(); + Spectrum L = zero_spectrum(); + + if (eval_background) { + L = integrator_eval_background_shader(kg, state, render_buffer); + + /* When using the ao bounces approximation, adjust background + * shader intensity with ao factor. */ + if (path_state_ao_bounce(kg, state)) { + L *= kernel_data.integrator.ao_bounces_factor; + } + + /* Background MIS weights. */ + float mis_weight = 1.0f; + /* Check if background light exists or if we should skip pdf. */ + if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) && + kernel_data.background.use_mis) { + const float3 ray_P = INTEGRATOR_STATE(state, ray, P); + const float3 ray_D = INTEGRATOR_STATE(state, ray, D); + const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); + + /* multiple importance sampling, get background light pdf for ray + * direction, and compute weight with respect to BSDF pdf */ + const float pdf = background_light_pdf(kg, ray_P, ray_D); + mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf); + } - /* When using the ao bounces approximation, adjust background - * shader intensity with ao factor. */ - if (path_state_ao_bounce(kg, state)) { - L *= kernel_data.integrator.ao_bounces_factor; + guiding_record_background(kg, state, L, mis_weight); + L *= mis_weight; } /* Write to render buffer. */ - kernel_accum_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer); + film_write_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer); } ccl_device_inline void integrate_distant_lights(KernelGlobals kg, @@ -169,24 +172,24 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg, /* TODO: does aliasing like this break automatic SoA in CUDA? */ ShaderDataTinyStorage emission_sd_storage; ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time); + Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time); if (is_zero(light_eval)) { return; } /* MIS weighting. */ + float mis_weight = 1.0f; if (!(path_flag & PATH_RAY_MIS_SKIP)) { /* multiple importance sampling, get regular light pdf, * and compute weight with respect to BSDF pdf */ const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); - const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf); - light_eval *= mis_weight; + mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf); } /* Write to render buffer. */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_accum_emission( - kg, state, throughput * light_eval, render_buffer, kernel_data.background.lightgroup); + guiding_record_background(kg, state, light_eval, mis_weight); + film_write_surface_emission( + kg, state, light_eval, mis_weight, render_buffer, kernel_data.background.lightgroup); } } } diff --git a/intern/cycles/kernel/integrator/shade_light.h b/intern/cycles/kernel/integrator/shade_light.h index 910e3383f51..e0b0500dc78 100644 --- a/intern/cycles/kernel/integrator/shade_light.h +++ b/intern/cycles/kernel/integrator/shade_light.h @@ -3,8 +3,8 @@ #pragma once -#include "kernel/film/accumulate.h" -#include "kernel/integrator/shader_eval.h" +#include "kernel/film/light_passes.h" +#include "kernel/integrator/surface_shader.h" #include "kernel/light/light.h" #include "kernel/light/sample.h" @@ -18,6 +18,8 @@ ccl_device_inline void integrate_light(KernelGlobals kg, Intersection isect ccl_optional_struct_init; integrator_state_read_isect(kg, state, &isect); + guiding_record_light_surface_segment(kg, state, &isect); + float3 ray_P = INTEGRATOR_STATE(state, ray, P); const float3 ray_D = INTEGRATOR_STATE(state, ray, D); const float ray_time = INTEGRATOR_STATE(state, ray, time); @@ -51,23 +53,23 @@ ccl_device_inline void integrate_light(KernelGlobals kg, /* TODO: does aliasing like this break automatic SoA in CUDA? */ ShaderDataTinyStorage emission_sd_storage; ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time); + Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time); if (is_zero(light_eval)) { return; } /* MIS weighting. */ + float mis_weight = 1.0f; if (!(path_flag & PATH_RAY_MIS_SKIP)) { /* multiple importance sampling, get regular light pdf, * and compute weight with respect to BSDF pdf */ const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); - const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf); - light_eval *= mis_weight; + mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf); } /* Write to render buffer. */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_accum_emission(kg, state, throughput * light_eval, render_buffer, ls.group); + guiding_record_surface_emission(kg, state, light_eval, mis_weight); + film_write_surface_emission(kg, state, light_eval, mis_weight, render_buffer, ls.group); } ccl_device void integrator_shade_light(KernelGlobals kg, diff --git a/intern/cycles/kernel/integrator/shade_shadow.h b/intern/cycles/kernel/integrator/shade_shadow.h index 4b002a47bee..bedb15ddf89 100644 --- a/intern/cycles/kernel/integrator/shade_shadow.h +++ b/intern/cycles/kernel/integrator/shade_shadow.h @@ -3,8 +3,9 @@ #pragma once +#include "kernel/integrator/guiding.h" #include "kernel/integrator/shade_volume.h" -#include "kernel/integrator/shader_eval.h" +#include "kernel/integrator/surface_shader.h" #include "kernel/integrator/volume_stack.h" CCL_NAMESPACE_BEGIN @@ -15,9 +16,9 @@ ccl_device_inline bool shadow_intersections_has_remaining(const uint num_hits) } #ifdef __TRANSPARENT_SHADOWS__ -ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg, - IntegratorShadowState state, - const int hit) +ccl_device_inline Spectrum integrate_transparent_surface_shadow(KernelGlobals kg, + IntegratorShadowState state, + const int hit) { PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SURFACE); @@ -40,7 +41,7 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg, /* Evaluate shader. */ if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) { - shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( + surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( kg, state, shadow_sd, NULL, PATH_RAY_SHADOW); } @@ -50,7 +51,7 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg, # endif /* Compute transparency from closures. */ - return shader_bsdf_transparency(kg, shadow_sd); + return surface_shader_transparency(kg, shadow_sd); } # ifdef __VOLUME__ @@ -58,7 +59,7 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg, IntegratorShadowState state, const int hit, const int num_recorded_hits, - ccl_private float3 *ccl_restrict + ccl_private Spectrum *ccl_restrict throughput) { PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_VOLUME); @@ -100,7 +101,7 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg, if (hit < num_recorded_hits || !shadow_intersections_has_remaining(num_hits)) { # ifdef __VOLUME__ if (!integrator_state_shadow_volume_stack_is_empty(kg, state)) { - float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput); + Spectrum throughput = INTEGRATOR_STATE(state, shadow_path, throughput); integrate_transparent_volume_shadow(kg, state, hit, num_recorded_hits, &throughput); if (is_zero(throughput)) { return true; @@ -113,8 +114,8 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg, /* Surface shaders. */ if (hit < num_recorded_hits) { - const float3 shadow = integrate_transparent_surface_shadow(kg, state, hit); - const float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput) * shadow; + const Spectrum shadow = integrate_transparent_surface_shadow(kg, state, hit); + const Spectrum throughput = INTEGRATOR_STATE(state, shadow_path, throughput) * shadow; if (is_zero(throughput)) { return true; } @@ -165,7 +166,8 @@ ccl_device void integrator_shade_shadow(KernelGlobals kg, return; } else { - kernel_accum_light(kg, state, render_buffer); + guiding_record_direct_light(kg, state); + film_write_direct_light(kg, state, render_buffer); integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); return; } diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index 1514b3956ad..067d35ef9e3 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -3,14 +3,16 @@ #pragma once -#include "kernel/film/accumulate.h" -#include "kernel/film/passes.h" +#include "kernel/film/data_passes.h" +#include "kernel/film/denoising_passes.h" +#include "kernel/film/light_passes.h" #include "kernel/integrator/mnee.h" +#include "kernel/integrator/guiding.h" #include "kernel/integrator/path_state.h" -#include "kernel/integrator/shader_eval.h" #include "kernel/integrator/subsurface.h" +#include "kernel/integrator/surface_shader.h" #include "kernel/integrator/volume_stack.h" #include "kernel/light/light.h" @@ -31,7 +33,52 @@ ccl_device_forceinline void integrate_surface_shader_setup(KernelGlobals kg, shader_setup_from_ray(kg, sd, &ray, &isect); } -#ifdef __HOLDOUT__ +ccl_device_forceinline float3 integrate_surface_ray_offset(KernelGlobals kg, + const ccl_private ShaderData *sd, + const float3 ray_P, + const float3 ray_D) +{ + /* No ray offset needed for other primitive types. */ + if (!(sd->type & PRIMITIVE_TRIANGLE)) { + return ray_P; + } + + /* Self intersection tests already account for the case where a ray hits the + * same primitive. However precision issues can still cause neighboring + * triangles to be hit. Here we test if the ray-triangle intersection with + * the same primitive would miss, implying that a neighboring triangle would + * be hit instead. + * + * This relies on triangle intersection to be watertight, and the object inverse + * object transform to match the one used by ray intersection exactly. + * + * Potential improvements: + * - It appears this happens when either barycentric coordinates are small, + * or dot(sd->Ng, ray_D) is small. Detect such cases and skip test? + * - Instead of ray offset, can we tweak P to lie within the triangle? + */ + const uint tri_vindex = kernel_data_fetch(tri_vindex, sd->prim).w; + const packed_float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0), + tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1), + tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2); + + float3 local_ray_P = ray_P; + float3 local_ray_D = ray_D; + + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + const Transform itfm = object_get_inverse_transform(kg, sd); + local_ray_P = transform_point(&itfm, local_ray_P); + local_ray_D = transform_direction(&itfm, local_ray_D); + } + + if (ray_triangle_intersect_self(local_ray_P, local_ray_D, tri_a, tri_b, tri_c)) { + return ray_P; + } + else { + return ray_offset(ray_P, sd->Ng); + } +} + ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg, ConstIntegratorState state, ccl_private ShaderData *sd, @@ -42,24 +89,20 @@ ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg, if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) && (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) { - const float3 holdout_weight = shader_holdout_apply(kg, sd); - if (kernel_data.background.transparent) { - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - const float transparent = average(holdout_weight * throughput); - kernel_accum_holdout(kg, state, path_flag, transparent, render_buffer); - } - if (isequal(holdout_weight, one_float3())) { + const Spectrum holdout_weight = surface_shader_apply_holdout(kg, sd); + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + const float transparent = average(holdout_weight * throughput); + film_write_holdout(kg, state, path_flag, transparent, render_buffer); + if (isequal(holdout_weight, one_spectrum())) { return false; } } return true; } -#endif /* __HOLDOUT__ */ -#ifdef __EMISSION__ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg, - ConstIntegratorState state, + IntegratorState state, ccl_private const ShaderData *sd, ccl_global float *ccl_restrict render_buffer) @@ -67,14 +110,15 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg, const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); /* Evaluate emissive closure. */ - float3 L = shader_emissive_eval(sd); + Spectrum L = surface_shader_emission(sd); + float mis_weight = 1.0f; -# ifdef __HAIR__ +#ifdef __HAIR__ if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && (sd->type & PRIMITIVE_TRIANGLE)) -# else +#else if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS)) -# endif +#endif { const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); const float t = sd->ray_length; @@ -82,17 +126,14 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg, /* Multiple importance sampling, get triangle light pdf, * and compute weight with respect to BSDF pdf. */ float pdf = triangle_light_pdf(kg, sd, t); - float mis_weight = light_sample_mis_weight_forward(kg, bsdf_pdf, pdf); - L *= mis_weight; + mis_weight = light_sample_mis_weight_forward(kg, bsdf_pdf, pdf); } - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_accum_emission( - kg, state, throughput * L, render_buffer, object_lightgroup(kg, sd->object)); + guiding_record_surface_emission(kg, state, L, mis_weight); + film_write_surface_emission( + kg, state, L, mis_weight, render_buffer, object_lightgroup(kg, sd->object)); } -#endif /* __EMISSION__ */ -#ifdef __EMISSION__ /* Path tracing: sample point on light and evaluate light shader, then * queue shadow ray to be traced. */ template<uint node_feature_mask> @@ -111,11 +152,10 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, { const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); const uint bounce = INTEGRATOR_STATE(state, path, bounce); - float light_u, light_v; - path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v); + const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT); if (!light_distribution_sample_from_position( - kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, &ls)) { + kg, rand_light.x, rand_light.y, sd->time, sd->P, bounce, path_flag, &ls)) { return; } } @@ -133,9 +173,10 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, Ray ray ccl_optional_struct_init; BsdfEval bsdf_eval ccl_optional_struct_init; - const bool is_transmission = shader_bsdf_is_transmission(sd, ls.D); -# ifdef __MNEE__ + const bool is_transmission = dot(ls.D, sd->N) < 0.0f; + +#ifdef __MNEE__ int mnee_vertex_count = 0; IF_KERNEL_FEATURE(MNEE) { @@ -144,13 +185,15 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, const bool use_caustics = kernel_data_fetch(lights, ls.lamp).use_caustics; if (use_caustics) { /* Are we on a caustic caster? */ - if (is_transmission && (sd->object_flag & SD_OBJECT_CAUSTICS_CASTER)) + if (is_transmission && (sd->object_flag & SD_OBJECT_CAUSTICS_CASTER)) { return; + } /* Are we on a caustic receiver? */ - if (!is_transmission && (sd->object_flag & SD_OBJECT_CAUSTICS_RECEIVER)) + if (!is_transmission && (sd->object_flag & SD_OBJECT_CAUSTICS_RECEIVER)) { mnee_vertex_count = kernel_path_mnee_sample( kg, state, sd, emission_sd, rng_state, &ls, &bsdf_eval); + } } } } @@ -161,15 +204,15 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, light_sample_to_surface_shadow_ray(kg, emission_sd, &ls, &ray); } else -# endif /* __MNEE__ */ +#endif /* __MNEE__ */ { - const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time); + const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time); if (is_zero(light_eval)) { return; } /* Evaluate BSDF. */ - const float bsdf_pdf = shader_bsdf_eval(kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader); + const float bsdf_pdf = surface_shader_bsdf_eval(kg, state, sd, ls.D, &bsdf_eval, ls.shader); bsdf_eval_mul(&bsdf_eval, light_eval / ls.pdf); if (ls.shader & SHADER_USE_MIS) { @@ -197,9 +240,13 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state); if (is_transmission) { -# ifdef __VOLUME__ +#ifdef __VOLUME__ shadow_volume_stack_enter_exit(kg, shadow_state, sd); -# endif +#endif + } + + if (ray.self.object != OBJECT_NONE) { + ray.P = integrate_surface_ray_offset(kg, sd, ray.P, ray.D); } /* Write shadow ray and associated state to global memory. */ @@ -213,11 +260,12 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, /* Copy state from main path to shadow path. */ uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag); shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0; - const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * bsdf_eval_sum(&bsdf_eval); + const Spectrum unlit_throughput = INTEGRATOR_STATE(state, path, throughput); + const Spectrum throughput = unlit_throughput * bsdf_eval_sum(&bsdf_eval); if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { - packed_float3 pass_diffuse_weight; - packed_float3 pass_glossy_weight; + PackedSpectrum pass_diffuse_weight; + PackedSpectrum pass_glossy_weight; if (shadow_flag & PATH_RAY_ANY_PASS) { /* Indirect bounce, use weights from earlier surface or volume bounce. */ @@ -227,8 +275,8 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, else { /* Direct light, use BSDFs at this bounce. */ shadow_flag |= PATH_RAY_SURFACE_PASS; - pass_diffuse_weight = packed_float3(bsdf_eval_pass_diffuse_weight(&bsdf_eval)); - pass_glossy_weight = packed_float3(bsdf_eval_pass_glossy_weight(&bsdf_eval)); + pass_diffuse_weight = PackedSpectrum(bsdf_eval_pass_diffuse_weight(&bsdf_eval)); + pass_glossy_weight = PackedSpectrum(bsdf_eval_pass_glossy_weight(&bsdf_eval)); } INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight; @@ -250,7 +298,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, glossy_bounce) = INTEGRATOR_STATE( state, path, glossy_bounce); -# ifdef __MNEE__ +#ifdef __MNEE__ if (mnee_vertex_count > 0) { INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) = INTEGRATOR_STATE(state, path, transmission_bounce) + mnee_vertex_count - 1; @@ -262,7 +310,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, bounce) = INTEGRATOR_STATE(state, path, bounce) + mnee_vertex_count; } else -# endif +#endif { INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) = INTEGRATOR_STATE( state, path, transmission_bounce); @@ -283,8 +331,12 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, shadow_state, shadow_path, lightgroup) = (ls.type != LIGHT_BACKGROUND) ? ls.group + 1 : kernel_data.background.lightgroup + 1; -} +#ifdef __PATH_GUIDING__ + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unlit_throughput) = unlit_throughput; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = INTEGRATOR_STATE( + state, guiding, path_segment); #endif +} /* Path tracing: bounce off or through surface with new direction. */ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( @@ -298,9 +350,8 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( return LABEL_NONE; } - float bsdf_u, bsdf_v; - path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); - ccl_private const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u); + float2 rand_bsdf = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF); + ccl_private const ShaderClosure *sc = surface_shader_bsdf_bssrdf_pick(sd, &rand_bsdf); #ifdef __SUBSURFACE__ /* BSSRDF closure, we schedule subsurface intersection kernel. */ @@ -310,17 +361,52 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( #endif /* BSDF closure, sample direction. */ - float bsdf_pdf; + float bsdf_pdf = 0.0f, unguided_bsdf_pdf = 0.0f; BsdfEval bsdf_eval ccl_optional_struct_init; float3 bsdf_omega_in ccl_optional_struct_init; - differential3 bsdf_domega_in ccl_optional_struct_init; int label; - label = shader_bsdf_sample_closure( - kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf); + float2 bsdf_sampled_roughness = make_float2(1.0f, 1.0f); + float bsdf_eta = 1.0f; + +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4 + if (kernel_data.integrator.use_surface_guiding) { + label = surface_shader_bsdf_guided_sample_closure(kg, + state, + sd, + sc, + rand_bsdf, + &bsdf_eval, + &bsdf_omega_in, + &bsdf_pdf, + &unguided_bsdf_pdf, + &bsdf_sampled_roughness, + &bsdf_eta); + + if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) { + return LABEL_NONE; + } - if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) { - return LABEL_NONE; + INTEGRATOR_STATE_WRITE(state, path, unguided_throughput) *= bsdf_pdf / unguided_bsdf_pdf; + } + else +#endif + { + label = surface_shader_bsdf_sample_closure(kg, + sd, + sc, + rand_bsdf, + &bsdf_eval, + &bsdf_omega_in, + &bsdf_pdf, + &bsdf_sampled_roughness, + &bsdf_eta); + + if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) { + return LABEL_NONE; + } + + unguided_bsdf_pdf = bsdf_pdf; } if (label & LABEL_TRANSPARENT) { @@ -329,20 +415,19 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( } else { /* Setup ray with changed origin and direction. */ - INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; - INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in); + const float3 D = normalize(bsdf_omega_in); + INTEGRATOR_STATE_WRITE(state, ray, P) = integrate_surface_ray_offset(kg, sd, sd->P, D); + INTEGRATOR_STATE_WRITE(state, ray, D) = D; INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f; INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX; #ifdef __RAY_DIFFERENTIALS__ INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); - INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in); #endif } /* Update throughput. */ - float3 throughput = INTEGRATOR_STATE(state, path, throughput); - throughput *= bsdf_eval_sum(&bsdf_eval) / bsdf_pdf; - INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput; + const Spectrum bsdf_weight = bsdf_eval_sum(&bsdf_eval) / bsdf_pdf; + INTEGRATOR_STATE_WRITE(state, path, throughput) *= bsdf_weight; if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { if (INTEGRATOR_STATE(state, path, bounce) == 0) { @@ -357,10 +442,21 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( if (!(label & LABEL_TRANSPARENT)) { INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = bsdf_pdf; INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf( - bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf)); + unguided_bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf)); } path_state_next(kg, state, label); + + guiding_record_surface_bounce(kg, + state, + sd, + bsdf_weight, + bsdf_pdf, + sd->N, + normalize(bsdf_omega_in), + bsdf_sampled_roughness, + bsdf_eta); + return label; } @@ -382,14 +478,15 @@ ccl_device_forceinline int integrate_surface_volume_only_bounce(IntegratorState ccl_device_forceinline bool integrate_surface_terminate(IntegratorState state, const uint32_t path_flag) { - const float probability = (path_flag & PATH_RAY_TERMINATE_ON_NEXT_SURFACE) ? - 0.0f : - INTEGRATOR_STATE(state, path, continuation_probability); - if (probability == 0.0f) { + const float continuation_probability = (path_flag & PATH_RAY_TERMINATE_ON_NEXT_SURFACE) ? + 0.0f : + INTEGRATOR_STATE( + state, path, continuation_probability); + if (continuation_probability == 0.0f) { return true; } - else if (probability != 1.0f) { - INTEGRATOR_STATE_WRITE(state, path, throughput) /= probability; + else if (continuation_probability != 1.0f) { + INTEGRATOR_STATE_WRITE(state, path, throughput) /= continuation_probability; } return false; @@ -408,22 +505,24 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg, return; } - float bsdf_u, bsdf_v; - path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); + const float2 rand_bsdf = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF); float3 ao_N; - const float3 ao_weight = shader_bsdf_ao( + const Spectrum ao_weight = surface_shader_ao( kg, sd, kernel_data.integrator.ao_additive_factor, &ao_N); float3 ao_D; float ao_pdf; - sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); + sample_cos_hemisphere(ao_N, rand_bsdf.x, rand_bsdf.y, &ao_D, &ao_pdf); bool skip_self = true; Ray ray ccl_optional_struct_init; ray.P = shadow_ray_offset(kg, sd, ao_D, &skip_self); ray.D = ao_D; + if (skip_self) { + ray.P = integrate_surface_ray_offset(kg, sd, ray.P, ray.D); + } ray.tmin = 0.0f; ray.tmax = kernel_data.integrator.ao_bounces_distance; ray.time = sd->time; @@ -452,7 +551,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg, const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce); const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce); uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag) | PATH_RAY_SHADOW_FOR_AO; - const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * shader_bsdf_alpha(kg, sd); + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput) * + surface_shader_alpha(kg, sd); INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE( state, path, render_pixel_index); @@ -494,6 +594,8 @@ ccl_device bool integrate_surface(KernelGlobals kg, #ifdef __VOLUME__ if (!(sd.flag & SD_HAS_ONLY_VOLUME)) { #endif + guiding_record_surface_segment(kg, state, &sd); + #ifdef __SUBSURFACE__ /* Can skip shader evaluation for BSSRDF exit point without bump mapping. */ if (!(path_flag & PATH_RAY_SUBSURFACE) || ((sd.flag & SD_HAS_BSSRDF_BUMP))) @@ -501,7 +603,7 @@ ccl_device bool integrate_surface(KernelGlobals kg, { /* Evaluate shader. */ PROFILING_EVENT(PROFILING_SHADE_SURFACE_EVAL); - shader_eval_surface<node_feature_mask>(kg, state, &sd, render_buffer, path_flag); + surface_shader_eval<node_feature_mask>(kg, state, &sd, render_buffer, path_flag); /* Initialize additional RNG for BSDFs. */ if (sd.flag & SD_BSDF_NEEDS_LCG) { @@ -523,21 +625,17 @@ ccl_device bool integrate_surface(KernelGlobals kg, #endif { /* Filter closures. */ - shader_prepare_surface_closures(kg, state, &sd, path_flag); + surface_shader_prepare_closures(kg, state, &sd, path_flag); -#ifdef __HOLDOUT__ /* Evaluate holdout. */ if (!integrate_surface_holdout(kg, state, &sd, render_buffer)) { return false; } -#endif -#ifdef __EMISSION__ /* Write emission. */ if (sd.flag & SD_EMISSION) { integrate_surface_emission(kg, state, &sd, render_buffer); } -#endif /* Perform path termination. Most paths have already been terminated in * the intersect_closest kernel, this is just for emission and for dividing @@ -551,11 +649,11 @@ ccl_device bool integrate_surface(KernelGlobals kg, /* Write render passes. */ #ifdef __PASSES__ PROFILING_EVENT(PROFILING_SHADE_SURFACE_PASSES); - kernel_write_data_passes(kg, state, &sd, render_buffer); + film_write_data_passes(kg, state, &sd, render_buffer); #endif #ifdef __DENOISING_FEATURES__ - kernel_write_denoising_features_surface(kg, state, &sd, render_buffer); + film_write_denoising_features_surface(kg, state, &sd, render_buffer); #endif } @@ -563,6 +661,10 @@ ccl_device bool integrate_surface(KernelGlobals kg, RNGState rng_state; path_state_rng_load(state, &rng_state); +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4 + surface_shader_prepare_guiding(kg, state, &sd, &rng_state); + guiding_write_debug_passes(kg, state, &sd, render_buffer); +#endif /* Direct light. */ PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT); integrate_surface_direct_light<node_feature_mask>(kg, state, &sd, &rng_state); diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index 4aab097a7d8..a8324cda2dc 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -3,12 +3,14 @@ #pragma once -#include "kernel/film/accumulate.h" -#include "kernel/film/passes.h" +#include "kernel/film/data_passes.h" +#include "kernel/film/denoising_passes.h" +#include "kernel/film/light_passes.h" +#include "kernel/integrator/guiding.h" #include "kernel/integrator/intersect_closest.h" #include "kernel/integrator/path_state.h" -#include "kernel/integrator/shader_eval.h" +#include "kernel/integrator/volume_shader.h" #include "kernel/integrator/volume_stack.h" #include "kernel/light/light.h" @@ -29,13 +31,13 @@ typedef enum VolumeIntegrateEvent { typedef struct VolumeIntegrateResult { /* Throughput and offset for direct light scattering. */ bool direct_scatter; - float3 direct_throughput; + Spectrum direct_throughput; float direct_t; ShaderVolumePhases direct_phases; /* Throughput and offset for indirect light scattering. */ bool indirect_scatter; - float3 indirect_throughput; + Spectrum indirect_throughput; float indirect_t; ShaderVolumePhases indirect_phases; } VolumeIntegrateResult; @@ -52,19 +54,19 @@ typedef struct VolumeIntegrateResult { * sigma_t = sigma_a + sigma_s */ typedef struct VolumeShaderCoefficients { - float3 sigma_t; - float3 sigma_s; - float3 emission; + Spectrum sigma_t; + Spectrum sigma_s; + Spectrum emission; } VolumeShaderCoefficients; /* Evaluate shader to get extinction coefficient at P. */ ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg, IntegratorShadowState state, ccl_private ShaderData *ccl_restrict sd, - ccl_private float3 *ccl_restrict extinction) + ccl_private Spectrum *ccl_restrict extinction) { VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i)) - shader_eval_volume<true>(kg, state, sd, PATH_RAY_SHADOW, volume_read_lambda_pass); + volume_shader_eval<true>(kg, state, sd, PATH_RAY_SHADOW, volume_read_lambda_pass); if (!(sd->flag & SD_EXTINCTION)) { return false; @@ -83,15 +85,16 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg, { const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i)) - shader_eval_volume<false>(kg, state, sd, path_flag, volume_read_lambda_pass); + volume_shader_eval<false>(kg, state, sd, path_flag, volume_read_lambda_pass); if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) { return false; } - coeff->sigma_s = zero_float3(); - coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction : zero_float3(); - coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background : zero_float3(); + coeff->sigma_s = zero_spectrum(); + coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction : + zero_spectrum(); + coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background : zero_spectrum(); if (sd->flag & SD_SCATTER) { for (int i = 0; i < sd->num_closure; i++) { @@ -143,11 +146,11 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg, /* Perform shading at this offset within a step, to integrate over * over the entire step segment. */ - *step_shade_offset = path_state_rng_1D_hash(kg, rng_state, 0x1e31d8a4); + *step_shade_offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SHADE_OFFSET); /* Shift starting point of all segment by this random amount to avoid * banding artifacts from the volume bounding shape. */ - *steps_offset = path_state_rng_1D_hash(kg, rng_state, 0x3d22c7b3); + *steps_offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_OFFSET); } } @@ -162,9 +165,9 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg, ccl_device void volume_shadow_homogeneous(KernelGlobals kg, IntegratorState state, ccl_private Ray *ccl_restrict ray, ccl_private ShaderData *ccl_restrict sd, - ccl_global float3 *ccl_restrict throughput) + ccl_global Spectrum *ccl_restrict throughput) { - float3 sigma_t = zero_float3(); + Spectrum sigma_t = zero_spectrum(); if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) { *throughput *= volume_color_transmittance(sigma_t, ray->tmax - ray->tmin); @@ -178,14 +181,14 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, IntegratorShadowState state, ccl_private Ray *ccl_restrict ray, ccl_private ShaderData *ccl_restrict sd, - ccl_private float3 *ccl_restrict throughput, + ccl_private Spectrum *ccl_restrict throughput, const float object_step_size) { /* Load random number state. */ RNGState rng_state; shadow_path_state_rng_load(state, &rng_state); - float3 tp = *throughput; + Spectrum tp = *throughput; /* Prepare for stepping. * For shadows we do not offset all segments, since the starting point is @@ -207,7 +210,7 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, /* compute extinction at the start */ float t = ray->tmin; - float3 sum = zero_float3(); + Spectrum sum = zero_spectrum(); for (int i = 0; i < max_steps; i++) { /* advance to new position */ @@ -215,7 +218,7 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, float dt = new_t - t; float3 new_P = ray->P + ray->D * (t + dt * step_shade_offset); - float3 sigma_t = zero_float3(); + Spectrum sigma_t = zero_spectrum(); /* compute attenuation over segment */ sd->P = new_P; @@ -228,8 +231,7 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, tp = *throughput * exp(sum); /* stop if nearly all light is blocked */ - if (tp.x < VOLUME_THROUGHPUT_EPSILON && tp.y < VOLUME_THROUGHPUT_EPSILON && - tp.z < VOLUME_THROUGHPUT_EPSILON) + if (reduce_max(tp) < VOLUME_THROUGHPUT_EPSILON) break; } } @@ -334,22 +336,22 @@ ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray, /* Distance sampling */ ccl_device float volume_distance_sample(float max_t, - float3 sigma_t, + Spectrum sigma_t, int channel, float xi, - ccl_private float3 *transmittance, - ccl_private float3 *pdf) + ccl_private Spectrum *transmittance, + ccl_private Spectrum *pdf) { /* xi is [0, 1[ so log(0) should never happen, division by zero is * avoided because sample_sigma_t > 0 when SD_SCATTER is set */ float sample_sigma_t = volume_channel_get(sigma_t, channel); - float3 full_transmittance = volume_color_transmittance(sigma_t, max_t); + Spectrum full_transmittance = volume_color_transmittance(sigma_t, max_t); float sample_transmittance = volume_channel_get(full_transmittance, channel); float sample_t = min(max_t, -logf(1.0f - xi * (1.0f - sample_transmittance)) / sample_sigma_t); *transmittance = volume_color_transmittance(sigma_t, sample_t); - *pdf = safe_divide_color(sigma_t * *transmittance, one_float3() - full_transmittance); + *pdf = safe_divide_color(sigma_t * *transmittance, one_spectrum() - full_transmittance); /* todo: optimization: when taken together with hit/miss decision, * the full_transmittance cancels out drops out and xi does not @@ -358,33 +360,36 @@ ccl_device float volume_distance_sample(float max_t, return sample_t; } -ccl_device float3 volume_distance_pdf(float max_t, float3 sigma_t, float sample_t) +ccl_device Spectrum volume_distance_pdf(float max_t, Spectrum sigma_t, float sample_t) { - float3 full_transmittance = volume_color_transmittance(sigma_t, max_t); - float3 transmittance = volume_color_transmittance(sigma_t, sample_t); + Spectrum full_transmittance = volume_color_transmittance(sigma_t, max_t); + Spectrum transmittance = volume_color_transmittance(sigma_t, sample_t); - return safe_divide_color(sigma_t * transmittance, one_float3() - full_transmittance); + return safe_divide_color(sigma_t * transmittance, one_spectrum() - full_transmittance); } /* Emission */ -ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients *coeff, - int closure_flag, - float3 transmittance, - float t) +ccl_device Spectrum volume_emission_integrate(ccl_private VolumeShaderCoefficients *coeff, + int closure_flag, + Spectrum transmittance, + float t) { /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t * this goes to E * t as sigma_t goes to zero * * todo: we should use an epsilon to avoid precision issues near zero sigma_t */ - float3 emission = coeff->emission; + Spectrum emission = coeff->emission; if (closure_flag & SD_EXTINCTION) { - float3 sigma_t = coeff->sigma_t; + Spectrum sigma_t = coeff->sigma_t; - emission.x *= (sigma_t.x > 0.0f) ? (1.0f - transmittance.x) / sigma_t.x : t; - emission.y *= (sigma_t.y > 0.0f) ? (1.0f - transmittance.y) / sigma_t.y : t; - emission.z *= (sigma_t.z > 0.0f) ? (1.0f - transmittance.z) / sigma_t.z : t; + FOREACH_SPECTRUM_CHANNEL (i) { + GET_SPECTRUM_CHANNEL(emission, i) *= (GET_SPECTRUM_CHANNEL(sigma_t, i) > 0.0f) ? + (1.0f - GET_SPECTRUM_CHANNEL(transmittance, i)) / + GET_SPECTRUM_CHANNEL(sigma_t, i) : + t; + } } else emission *= t; @@ -419,14 +424,14 @@ ccl_device_forceinline void volume_integrate_step_scattering( ccl_private const Ray *ray, const float3 equiangular_light_P, ccl_private const VolumeShaderCoefficients &ccl_restrict coeff, - const float3 transmittance, + const Spectrum transmittance, ccl_private VolumeIntegrateState &ccl_restrict vstate, ccl_private VolumeIntegrateResult &ccl_restrict result) { /* Pick random color channel, we use the Veach one-sample * model with balance heuristic for the channels. */ - const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); - float3 channel_pdf; + const Spectrum albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); + Spectrum channel_pdf; const int channel = volume_sample_channel( albedo, result.indirect_throughput, vstate.rphase, &channel_pdf); @@ -435,11 +440,11 @@ ccl_device_forceinline void volume_integrate_step_scattering( if (result.direct_t >= vstate.tmin && result.direct_t <= vstate.tmax && vstate.equiangular_pdf > VOLUME_SAMPLE_PDF_CUTOFF) { const float new_dt = result.direct_t - vstate.tmin; - const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); + const Spectrum new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); result.direct_scatter = true; result.direct_throughput *= coeff.sigma_s * new_transmittance / vstate.equiangular_pdf; - shader_copy_volume_phases(&result.direct_phases, sd); + volume_shader_copy_phases(&result.direct_phases, sd); /* Multiple importance sampling. */ if (vstate.use_mis) { @@ -467,7 +472,7 @@ ccl_device_forceinline void volume_integrate_step_scattering( const float new_t = vstate.tmin + new_dt; /* transmittance and pdf */ - const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); + const Spectrum new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); const float distance_pdf = dot(channel_pdf, coeff.sigma_t * new_transmittance); if (vstate.distance_pdf * distance_pdf > VOLUME_SAMPLE_PDF_CUTOFF) { @@ -475,7 +480,7 @@ ccl_device_forceinline void volume_integrate_step_scattering( result.indirect_scatter = true; result.indirect_t = new_t; result.indirect_throughput *= coeff.sigma_s * new_transmittance / distance_pdf; - shader_copy_volume_phases(&result.indirect_phases, sd); + volume_shader_copy_phases(&result.indirect_phases, sd); if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) { /* If using distance sampling for direct light, just copy parameters @@ -483,7 +488,7 @@ ccl_device_forceinline void volume_integrate_step_scattering( result.direct_scatter = true; result.direct_t = result.indirect_t; result.direct_throughput = result.indirect_throughput; - shader_copy_volume_phases(&result.direct_phases, sd); + volume_shader_copy_phases(&result.direct_phases, sd); /* Multiple importance sampling. */ if (vstate.use_mis) { @@ -546,8 +551,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous( vstate.tmin = ray->tmin; vstate.tmax = ray->tmin; vstate.absorption_only = true; - vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE); - vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL); + vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SCATTER_DISTANCE); + vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_PHASE_CHANNEL); /* Multiple importance sampling: pick between equiangular and distance sampling strategy. */ vstate.direct_sample_method = direct_sample_method; @@ -566,7 +571,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous( vstate.distance_pdf = 1.0f; /* Initialize volume integration result. */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); result.direct_throughput = throughput; result.indirect_throughput = throughput; @@ -579,9 +584,9 @@ ccl_device_forceinline void volume_integrate_heterogeneous( # ifdef __DENOISING_FEATURES__ const bool write_denoising_features = (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES); - float3 accum_albedo = zero_float3(); + Spectrum accum_albedo = zero_spectrum(); # endif - float3 accum_emission = zero_float3(); + Spectrum accum_emission = zero_spectrum(); for (int i = 0; i < max_steps; i++) { /* Advance to new position */ @@ -596,18 +601,19 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* Evaluate transmittance over segment. */ const float dt = (vstate.tmax - vstate.tmin); - const float3 transmittance = (closure_flag & SD_EXTINCTION) ? - volume_color_transmittance(coeff.sigma_t, dt) : - one_float3(); + const Spectrum transmittance = (closure_flag & SD_EXTINCTION) ? + volume_color_transmittance(coeff.sigma_t, dt) : + one_spectrum(); /* Emission. */ if (closure_flag & SD_EMISSION) { /* Only write emission before indirect light scatter position, since we terminate * stepping at that point if we have already found a direct light scatter position. */ if (!result.indirect_scatter) { - const float3 emission = volume_emission_integrate( + const Spectrum emission = volume_emission_integrate( &coeff, closure_flag, transmittance, dt); accum_emission += result.indirect_throughput * emission; + guiding_record_volume_emission(kg, state, emission); } } @@ -616,8 +622,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous( # ifdef __DENOISING_FEATURES__ /* Accumulate albedo for denoising features. */ if (write_denoising_features && (closure_flag & SD_SCATTER)) { - const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); - accum_albedo += result.indirect_throughput * albedo * (one_float3() - transmittance); + const Spectrum albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); + accum_albedo += result.indirect_throughput * albedo * (one_spectrum() - transmittance); } # endif @@ -634,7 +640,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* Stop if nearly all light blocked. */ if (!result.indirect_scatter) { if (reduce_max(result.indirect_throughput) < VOLUME_THROUGHPUT_EPSILON) { - result.indirect_throughput = zero_float3(); + result.indirect_throughput = zero_spectrum(); break; } } @@ -660,20 +666,19 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* Write accumulated emission. */ if (!is_zero(accum_emission)) { - kernel_accum_emission( + film_write_volume_emission( kg, state, accum_emission, render_buffer, object_lightgroup(kg, sd->object)); } # ifdef __DENOISING_FEATURES__ /* Write denoising features. */ if (write_denoising_features) { - kernel_write_denoising_features_volume( + film_write_denoising_features_volume( kg, state, accum_albedo, result.indirect_scatter, render_buffer); } # endif /* __DENOISING_FEATURES__ */ } -# ifdef __EMISSION__ /* Path tracing: sample point on light and evaluate light shader, then * queue shadow ray to be traced. */ ccl_device_forceinline bool integrate_volume_sample_light( @@ -691,11 +696,10 @@ ccl_device_forceinline bool integrate_volume_sample_light( /* Sample position on a light. */ const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); const uint bounce = INTEGRATOR_STATE(state, path, bounce); - float light_u, light_v; - path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v); + const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT); if (!light_distribution_sample_from_volume_segment( - kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, ls)) { + kg, rand_light.x, rand_light.y, sd->time, sd->P, bounce, path_flag, ls)) { return false; } @@ -715,7 +719,7 @@ ccl_device_forceinline void integrate_volume_direct_light( ccl_private const RNGState *ccl_restrict rng_state, const float3 P, ccl_private const ShaderVolumePhases *ccl_restrict phases, - ccl_private const float3 throughput, + ccl_private const Spectrum throughput, ccl_private LightSample *ccl_restrict ls) { PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT); @@ -732,11 +736,10 @@ ccl_device_forceinline void integrate_volume_direct_light( { const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); const uint bounce = INTEGRATOR_STATE(state, path, bounce); - float light_u, light_v; - path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v); + const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT); if (!light_distribution_sample_from_position( - kg, light_u, light_v, sd->time, P, bounce, path_flag, ls)) { + kg, rand_light.x, rand_light.y, sd->time, P, bounce, path_flag, ls)) { return; } } @@ -753,14 +756,14 @@ ccl_device_forceinline void integrate_volume_direct_light( * non-constant light sources. */ ShaderDataTinyStorage emission_sd_storage; ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time); + const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time); if (is_zero(light_eval)) { return; } /* Evaluate BSDF. */ BsdfEval phase_eval ccl_optional_struct_init; - const float phase_pdf = shader_volume_phase_eval(kg, sd, phases, ls->D, &phase_eval); + float phase_pdf = volume_shader_phase_eval(kg, state, sd, phases, ls->D, &phase_eval); if (ls->shader & SHADER_USE_MIS) { float mis_weight = light_sample_mis_weight_nee(kg, ls->pdf, phase_pdf); @@ -796,11 +799,11 @@ ccl_device_forceinline void integrate_volume_direct_light( const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce); uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag); shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0; - const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval); + const Spectrum throughput_phase = throughput * bsdf_eval_sum(&phase_eval); if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { - packed_float3 pass_diffuse_weight; - packed_float3 pass_glossy_weight; + PackedSpectrum pass_diffuse_weight; + PackedSpectrum pass_glossy_weight; if (shadow_flag & PATH_RAY_ANY_PASS) { /* Indirect bounce, use weights from earlier surface or volume bounce. */ @@ -810,8 +813,8 @@ ccl_device_forceinline void integrate_volume_direct_light( else { /* Direct light, no diffuse/glossy distinction needed for volumes. */ shadow_flag |= PATH_RAY_VOLUME_PASS; - pass_diffuse_weight = packed_float3(one_float3()); - pass_glossy_weight = packed_float3(zero_float3()); + pass_diffuse_weight = one_spectrum(); + pass_glossy_weight = zero_spectrum(); } INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight; @@ -847,9 +850,14 @@ ccl_device_forceinline void integrate_volume_direct_light( ls->group + 1 : kernel_data.background.lightgroup + 1; +# ifdef __PATH_GUIDING__ + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unlit_throughput) = throughput; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = INTEGRATOR_STATE( + state, guiding, path_segment); +# endif + integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state); } -# endif /* Path tracing: scatter in new direction using phase function */ ccl_device_forceinline bool integrate_volume_phase_scatter( @@ -861,27 +869,54 @@ ccl_device_forceinline bool integrate_volume_phase_scatter( { PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INDIRECT_LIGHT); - float phase_u, phase_v; - path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &phase_u, &phase_v); + float2 rand_phase = path_state_rng_2D(kg, rng_state, PRNG_VOLUME_PHASE); + + ccl_private const ShaderVolumeClosure *svc = volume_shader_phase_pick(phases, &rand_phase); /* Phase closure, sample direction. */ - float phase_pdf; + float phase_pdf = 0.0f, unguided_phase_pdf = 0.0f; BsdfEval phase_eval ccl_optional_struct_init; float3 phase_omega_in ccl_optional_struct_init; - differential3 phase_domega_in ccl_optional_struct_init; - - const int label = shader_volume_phase_sample(kg, - sd, - phases, - phase_u, - phase_v, - &phase_eval, - &phase_omega_in, - &phase_domega_in, - &phase_pdf); - - if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) { - return false; + float sampled_roughness = 1.0f; + int label; + +# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4 + if (kernel_data.integrator.use_guiding) { + label = volume_shader_phase_guided_sample(kg, + state, + sd, + svc, + rand_phase, + &phase_eval, + &phase_omega_in, + &phase_pdf, + &unguided_phase_pdf, + &sampled_roughness); + + if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) { + return false; + } + + INTEGRATOR_STATE_WRITE(state, path, unguided_throughput) *= phase_pdf / unguided_phase_pdf; + } + else +# endif + { + label = volume_shader_phase_sample(kg, + sd, + phases, + svc, + rand_phase, + &phase_eval, + &phase_omega_in, + &phase_pdf, + &sampled_roughness); + + if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) { + return false; + } + + unguided_phase_pdf = phase_pdf; } /* Setup ray. */ @@ -891,26 +926,31 @@ ccl_device_forceinline bool integrate_volume_phase_scatter( INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX; # ifdef __RAY_DIFFERENTIALS__ INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); - INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in); # endif // Save memory by storing last hit prim and object in isect INTEGRATOR_STATE_WRITE(state, isect, prim) = sd->prim; INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object; + const Spectrum phase_weight = bsdf_eval_sum(&phase_eval) / phase_pdf; + + /* Add phase function sampling data to the path segment. */ + guiding_record_volume_bounce( + kg, state, sd, phase_weight, phase_pdf, normalize(phase_omega_in), sampled_roughness); + /* Update throughput. */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval) / phase_pdf; + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + const Spectrum throughput_phase = throughput * phase_weight; INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput_phase; if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { - INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_float3(); - INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_float3(); + INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_spectrum(); + INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_spectrum(); } /* Update path state */ INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf; INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf( - phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf)); + unguided_phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf)); path_state_next(kg, state, label); return true; @@ -949,6 +989,10 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg, VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i)) const float step_size = volume_stack_step_size(kg, volume_read_lambda_pass); +# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 + const float3 initial_throughput = INTEGRATOR_STATE(state, path, throughput); +# endif + /* TODO: expensive to zero closures? */ VolumeIntegrateResult result = {}; volume_integrate_heterogeneous(kg, @@ -966,17 +1010,50 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg, * to be terminated. That will shading evaluating to leave out any scattering closures, * but emission and absorption are still handled for multiple importance sampling. */ const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); - const float probability = (path_flag & PATH_RAY_TERMINATE_IN_NEXT_VOLUME) ? - 0.0f : - INTEGRATOR_STATE(state, path, continuation_probability); - if (probability == 0.0f) { + const float continuation_probability = (path_flag & PATH_RAY_TERMINATE_IN_NEXT_VOLUME) ? + 0.0f : + INTEGRATOR_STATE( + state, path, continuation_probability); + if (continuation_probability == 0.0f) { return VOLUME_PATH_MISSED; } +# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 + bool guiding_generated_new_segment = false; + if (kernel_data.integrator.use_guiding) { + /* Record transmittance using change in throughput. */ + float3 transmittance_weight = spectrum_to_rgb( + safe_divide_color(result.indirect_throughput, initial_throughput)); + guiding_record_volume_transmission(kg, state, transmittance_weight); + + if (result.indirect_scatter) { + const float3 P = ray->P + result.indirect_t * ray->D; + + /* Record volume segment up to direct scatter position. + * TODO: volume segment is wrong when direct_t and indirect_t. */ + if (result.direct_scatter && (result.direct_t == result.indirect_t)) { + guiding_record_volume_segment(kg, state, P, sd.I); + guiding_generated_new_segment = true; + } + +# if PATH_GUIDING_LEVEL >= 4 + /* TODO: this position will be wrong for direct light pdf computation, + * since the direct light position may be different? */ + volume_shader_prepare_guiding( + kg, state, &sd, &rng_state, P, ray->D, &result.direct_phases, direct_sample_method); +# endif + } + else { + /* No guiding if we don't scatter. */ + state->guiding.use_volume_guiding = false; + } + } +# endif + /* Direct light. */ if (result.direct_scatter) { const float3 direct_P = ray->P + result.direct_t * ray->D; - result.direct_throughput /= probability; + result.direct_throughput /= continuation_probability; integrate_volume_direct_light(kg, state, &sd, @@ -989,16 +1066,22 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg, /* Indirect light. * - * Only divide throughput by probability if we scatter. For the attenuation + * Only divide throughput by continuation_probability if we scatter. For the attenuation * case the next surface will already do this division. */ if (result.indirect_scatter) { - result.indirect_throughput /= probability; + result.indirect_throughput /= continuation_probability; } INTEGRATOR_STATE_WRITE(state, path, throughput) = result.indirect_throughput; if (result.indirect_scatter) { sd.P = ray->P + result.indirect_t * ray->D; +# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1 + if (!guiding_generated_new_segment) { + guiding_record_volume_segment(kg, state, sd.P, sd.I); + } +# endif + if (integrate_volume_phase_scatter(kg, state, &sd, &rng_state, &result.indirect_phases)) { return VOLUME_PATH_SCATTERED; } diff --git a/intern/cycles/kernel/integrator/shader_eval.h b/intern/cycles/kernel/integrator/shader_eval.h deleted file mode 100644 index ed4d973e864..00000000000 --- a/intern/cycles/kernel/integrator/shader_eval.h +++ /dev/null @@ -1,952 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2011-2022 Blender Foundation */ - -/* Functions to evaluate shaders and use the resulting shader closures. */ - -#pragma once - -#include "kernel/closure/alloc.h" -#include "kernel/closure/bsdf.h" -#include "kernel/closure/bsdf_util.h" -#include "kernel/closure/emissive.h" - -#include "kernel/film/accumulate.h" - -#include "kernel/svm/svm.h" - -#ifdef __OSL__ -# include "kernel/osl/shader.h" -#endif - -CCL_NAMESPACE_BEGIN - -/* Merging */ - -#if defined(__VOLUME__) -ccl_device_inline void shader_merge_volume_closures(ccl_private ShaderData *sd) -{ - /* Merge identical closures to save closure space with stacked volumes. */ - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sci = &sd->closure[i]; - - if (sci->type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) { - continue; - } - - for (int j = i + 1; j < sd->num_closure; j++) { - ccl_private ShaderClosure *scj = &sd->closure[j]; - if (sci->type != scj->type) { - continue; - } - - ccl_private const HenyeyGreensteinVolume *hgi = (ccl_private const HenyeyGreensteinVolume *) - sci; - ccl_private const HenyeyGreensteinVolume *hgj = (ccl_private const HenyeyGreensteinVolume *) - scj; - if (!(hgi->g == hgj->g)) { - continue; - } - - sci->weight += scj->weight; - sci->sample_weight += scj->sample_weight; - - int size = sd->num_closure - (j + 1); - if (size > 0) { - for (int k = 0; k < size; k++) { - scj[k] = scj[k + 1]; - } - } - - sd->num_closure--; - kernel_assert(sd->num_closure >= 0); - j--; - } - } -} - -ccl_device_inline void shader_copy_volume_phases(ccl_private ShaderVolumePhases *ccl_restrict - phases, - ccl_private const ShaderData *ccl_restrict sd) -{ - phases->num_closure = 0; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *from_sc = &sd->closure[i]; - ccl_private const HenyeyGreensteinVolume *from_hg = - (ccl_private const HenyeyGreensteinVolume *)from_sc; - - if (from_sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) { - ccl_private ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure]; - - to_sc->weight = from_sc->weight; - to_sc->sample_weight = from_sc->sample_weight; - to_sc->g = from_hg->g; - phases->num_closure++; - if (phases->num_closure >= MAX_VOLUME_CLOSURE) { - break; - } - } - } -} -#endif /* __VOLUME__ */ - -ccl_device_inline void shader_prepare_surface_closures(KernelGlobals kg, - ConstIntegratorState state, - ccl_private ShaderData *sd, - const uint32_t path_flag) -{ - /* Filter out closures. */ - if (kernel_data.integrator.filter_closures) { - if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_EMISSION) { - sd->closure_emission_background = zero_float3(); - } - - if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIRECT_LIGHT) { - sd->flag &= ~SD_BSDF_HAS_EVAL; - } - - if (path_flag & PATH_RAY_CAMERA) { - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - - if ((CLOSURE_IS_BSDF_DIFFUSE(sc->type) && - (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIFFUSE)) || - (CLOSURE_IS_BSDF_GLOSSY(sc->type) && - (kernel_data.integrator.filter_closures & FILTER_CLOSURE_GLOSSY)) || - (CLOSURE_IS_BSDF_TRANSMISSION(sc->type) && - (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSMISSION))) { - sc->type = CLOSURE_NONE_ID; - sc->sample_weight = 0.0f; - } - else if ((CLOSURE_IS_BSDF_TRANSPARENT(sc->type) && - (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSPARENT))) { - sc->type = CLOSURE_HOLDOUT_ID; - sc->sample_weight = 0.0f; - sd->flag |= SD_HOLDOUT; - } - } - } - } - - /* Defensive sampling. - * - * We can likely also do defensive sampling at deeper bounces, particularly - * for cases like a perfect mirror but possibly also others. This will need - * a good heuristic. */ - if (INTEGRATOR_STATE(state, path, bounce) + INTEGRATOR_STATE(state, path, transparent_bounce) == - 0 && - sd->num_closure > 1) { - float sum = 0.0f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - sum += sc->sample_weight; - } - } - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - sc->sample_weight = max(sc->sample_weight, 0.125f * sum); - } - } - } - - /* Filter glossy. - * - * Blurring of bsdf after bounces, for rays that have a small likelihood - * of following this particular path (diffuse, rough glossy) */ - if (kernel_data.integrator.filter_glossy != FLT_MAX -#ifdef __MNEE__ - && !(INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_VALID) -#endif - ) { - float blur_pdf = kernel_data.integrator.filter_glossy * - INTEGRATOR_STATE(state, path, min_ray_pdf); - - if (blur_pdf < 1.0f) { - float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - if (CLOSURE_IS_BSDF(sc->type)) { - bsdf_blur(kg, sc, blur_roughness); - } - } - } - } -} - -/* BSDF */ - -ccl_device_inline bool shader_bsdf_is_transmission(ccl_private const ShaderData *sd, - const float3 omega_in) -{ - return dot(sd->N, omega_in) < 0.0f; -} - -ccl_device_forceinline bool _shader_bsdf_exclude(ClosureType type, uint light_shader_flags) -{ - if (!(light_shader_flags & SHADER_EXCLUDE_ANY)) { - return false; - } - if (light_shader_flags & SHADER_EXCLUDE_DIFFUSE) { - if (CLOSURE_IS_BSDF_DIFFUSE(type)) { - return true; - } - } - if (light_shader_flags & SHADER_EXCLUDE_GLOSSY) { - if (CLOSURE_IS_BSDF_GLOSSY(type)) { - return true; - } - } - if (light_shader_flags & SHADER_EXCLUDE_TRANSMIT) { - if (CLOSURE_IS_BSDF_TRANSMISSION(type)) { - return true; - } - } - return false; -} - -ccl_device_inline float _shader_bsdf_multi_eval(KernelGlobals kg, - ccl_private ShaderData *sd, - const float3 omega_in, - const bool is_transmission, - ccl_private const ShaderClosure *skip_sc, - ccl_private BsdfEval *result_eval, - float sum_pdf, - float sum_sample_weight, - const uint light_shader_flags) -{ - /* This is the veach one-sample model with balance heuristic, - * some PDF factors drop out when using balance heuristic weighting. */ - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (sc == skip_sc) { - continue; - } - - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - if (CLOSURE_IS_BSDF(sc->type) && !_shader_bsdf_exclude(sc->type, light_shader_flags)) { - float bsdf_pdf = 0.0f; - float3 eval = bsdf_eval(kg, sd, sc, omega_in, is_transmission, &bsdf_pdf); - - if (bsdf_pdf != 0.0f) { - bsdf_eval_accum(result_eval, sc->type, eval * sc->weight); - sum_pdf += bsdf_pdf * sc->sample_weight; - } - } - - sum_sample_weight += sc->sample_weight; - } - } - - return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; -} - -#ifndef __KERNEL_CUDA__ -ccl_device -#else -ccl_device_inline -#endif - float - shader_bsdf_eval(KernelGlobals kg, - ccl_private ShaderData *sd, - const float3 omega_in, - const bool is_transmission, - ccl_private BsdfEval *bsdf_eval, - const uint light_shader_flags) -{ - bsdf_eval_init(bsdf_eval, CLOSURE_NONE_ID, zero_float3()); - - return _shader_bsdf_multi_eval( - kg, sd, omega_in, is_transmission, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags); -} - -/* Randomly sample a BSSRDF or BSDF proportional to ShaderClosure.sample_weight. */ -ccl_device_inline ccl_private const ShaderClosure *shader_bsdf_bssrdf_pick( - ccl_private const ShaderData *ccl_restrict sd, ccl_private float *randu) -{ - int sampled = 0; - - if (sd->num_closure > 1) { - /* Pick a BSDF or based on sample weights. */ - float sum = 0.0f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - sum += sc->sample_weight; - } - } - - float r = (*randu) * sum; - float partial_sum = 0.0f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - float next_sum = partial_sum + sc->sample_weight; - - if (r < next_sum) { - sampled = i; - - /* Rescale to reuse for direction sample, to better preserve stratification. */ - *randu = (r - partial_sum) / sc->sample_weight; - break; - } - - partial_sum = next_sum; - } - } - } - - return &sd->closure[sampled]; -} - -/* Return weight for picked BSSRDF. */ -ccl_device_inline float3 -shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd, - ccl_private const ShaderClosure *ccl_restrict bssrdf_sc) -{ - float3 weight = bssrdf_sc->weight; - - if (sd->num_closure > 1) { - float sum = 0.0f; - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - sum += sc->sample_weight; - } - } - weight *= sum / bssrdf_sc->sample_weight; - } - - return weight; -} - -/* Sample direction for picked BSDF, and return evaluation and pdf for all - * BSDFs combined using MIS. */ -ccl_device int shader_bsdf_sample_closure(KernelGlobals kg, - ccl_private ShaderData *sd, - ccl_private const ShaderClosure *sc, - float randu, - float randv, - ccl_private BsdfEval *bsdf_eval, - ccl_private float3 *omega_in, - ccl_private differential3 *domega_in, - ccl_private float *pdf) -{ - /* BSSRDF should already have been handled elsewhere. */ - kernel_assert(CLOSURE_IS_BSDF(sc->type)); - - int label; - float3 eval = zero_float3(); - - *pdf = 0.0f; - label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); - - if (*pdf != 0.0f) { - bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight); - - if (sd->num_closure > 1) { - const bool is_transmission = shader_bsdf_is_transmission(sd, *omega_in); - float sweight = sc->sample_weight; - *pdf = _shader_bsdf_multi_eval( - kg, sd, *omega_in, is_transmission, sc, bsdf_eval, *pdf * sweight, sweight, 0); - } - } - - return label; -} - -ccl_device float shader_bsdf_average_roughness(ccl_private const ShaderData *sd) -{ - float roughness = 0.0f; - float sum_weight = 0.0f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF(sc->type)) { - /* sqrt once to undo the squaring from multiplying roughness on the - * two axes, and once for the squared roughness convention. */ - float weight = fabsf(average(sc->weight)); - roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc))); - sum_weight += weight; - } - } - - return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f; -} - -ccl_device float3 shader_bsdf_transparency(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - if (sd->flag & SD_HAS_ONLY_VOLUME) { - return one_float3(); - } - else if (sd->flag & SD_TRANSPARENT) { - return sd->closure_transparent_extinction; - } - else { - return zero_float3(); - } -} - -ccl_device void shader_bsdf_disable_transparency(KernelGlobals kg, ccl_private ShaderData *sd) -{ - if (sd->flag & SD_TRANSPARENT) { - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - - if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) { - sc->sample_weight = 0.0f; - sc->weight = zero_float3(); - } - } - - sd->flag &= ~SD_TRANSPARENT; - } -} - -ccl_device float3 shader_bsdf_alpha(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - float3 alpha = one_float3() - shader_bsdf_transparency(kg, sd); - - alpha = max(alpha, zero_float3()); - alpha = min(alpha, one_float3()); - - return alpha; -} - -ccl_device float3 shader_bsdf_diffuse(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - float3 eval = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type)) - eval += sc->weight; - } - - return eval; -} - -ccl_device float3 shader_bsdf_glossy(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - float3 eval = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_GLOSSY(sc->type)) - eval += sc->weight; - } - - return eval; -} - -ccl_device float3 shader_bsdf_transmission(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - float3 eval = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type)) - eval += sc->weight; - } - - return eval; -} - -ccl_device float3 shader_bsdf_average_normal(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - float3 N = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) - N += sc->N * fabsf(average(sc->weight)); - } - - return (is_zero(N)) ? sd->N : normalize(N); -} - -ccl_device float3 shader_bsdf_ao(KernelGlobals kg, - ccl_private const ShaderData *sd, - const float ao_factor, - ccl_private float3 *N_) -{ - float3 eval = zero_float3(); - float3 N = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { - ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc; - eval += sc->weight * ao_factor; - N += bsdf->N * fabsf(average(sc->weight)); - } - } - - *N_ = (is_zero(N)) ? sd->N : normalize(N); - return eval; -} - -#ifdef __SUBSURFACE__ -ccl_device float3 shader_bssrdf_normal(ccl_private const ShaderData *sd) -{ - float3 N = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSSRDF(sc->type)) { - ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc; - float avg_weight = fabsf(average(sc->weight)); - - N += bssrdf->N * avg_weight; - } - } - - return (is_zero(N)) ? sd->N : normalize(N); -} -#endif /* __SUBSURFACE__ */ - -/* Constant emission optimization */ - -ccl_device bool shader_constant_emission_eval(KernelGlobals kg, - int shader, - ccl_private float3 *eval) -{ - int shader_index = shader & SHADER_MASK; - int shader_flag = kernel_data_fetch(shaders, shader_index).flags; - - if (shader_flag & SD_HAS_CONSTANT_EMISSION) { - *eval = make_float3(kernel_data_fetch(shaders, shader_index).constant_emission[0], - kernel_data_fetch(shaders, shader_index).constant_emission[1], - kernel_data_fetch(shaders, shader_index).constant_emission[2]); - - return true; - } - - return false; -} - -/* Background */ - -ccl_device float3 shader_background_eval(ccl_private const ShaderData *sd) -{ - if (sd->flag & SD_EMISSION) { - return sd->closure_emission_background; - } - else { - return zero_float3(); - } -} - -/* Emission */ - -ccl_device float3 shader_emissive_eval(ccl_private const ShaderData *sd) -{ - if (sd->flag & SD_EMISSION) { - return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background; - } - else { - return zero_float3(); - } -} - -/* Holdout */ - -ccl_device float3 shader_holdout_apply(KernelGlobals kg, ccl_private ShaderData *sd) -{ - float3 weight = zero_float3(); - - /* For objects marked as holdout, preserve transparency and remove all other - * closures, replacing them with a holdout weight. */ - if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { - if ((sd->flag & SD_TRANSPARENT) && !(sd->flag & SD_HAS_ONLY_VOLUME)) { - weight = one_float3() - sd->closure_transparent_extinction; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - if (!CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { - sc->type = NBUILTIN_CLOSURES; - } - } - - sd->flag &= ~(SD_CLOSURE_FLAGS - (SD_TRANSPARENT | SD_BSDF)); - } - else { - weight = one_float3(); - } - } - else { - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - if (CLOSURE_IS_HOLDOUT(sc->type)) { - weight += sc->weight; - } - } - } - - return weight; -} - -/* Surface Evaluation */ - -template<uint node_feature_mask, typename ConstIntegratorGenericState> -ccl_device void shader_eval_surface(KernelGlobals kg, - ConstIntegratorGenericState state, - ccl_private ShaderData *ccl_restrict sd, - ccl_global float *ccl_restrict buffer, - uint32_t path_flag, - bool use_caustics_storage = false) -{ - /* If path is being terminated, we are tracing a shadow ray or evaluating - * emission, then we don't need to store closures. The emission and shadow - * shader data also do not have a closure array to save GPU memory. */ - int max_closures; - if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { - max_closures = 0; - } - else { - max_closures = use_caustics_storage ? CAUSTICS_MAX_CLOSURE : kernel_data.max_closures; - } - - sd->num_closure = 0; - sd->num_closure_left = max_closures; - -#ifdef __OSL__ - if (kg->osl) { - if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) { - OSLShader::eval_background(kg, state, sd, path_flag); - } - else { - OSLShader::eval_surface(kg, state, sd, path_flag); - } - } - else -#endif - { -#ifdef __SVM__ - svm_eval_nodes<node_feature_mask, SHADER_TYPE_SURFACE>(kg, state, sd, buffer, path_flag); -#else - if (sd->object == OBJECT_NONE) { - sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f); - sd->flag |= SD_EMISSION; - } - else { - ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc( - sd, sizeof(DiffuseBsdf), make_float3(0.8f, 0.8f, 0.8f)); - if (bsdf != NULL) { - bsdf->N = sd->N; - sd->flag |= bsdf_diffuse_setup(bsdf); - } - } -#endif - } -} - -/* Volume */ - -#ifdef __VOLUME__ - -ccl_device_inline float _shader_volume_phase_multi_eval( - ccl_private const ShaderData *sd, - ccl_private const ShaderVolumePhases *phases, - const float3 omega_in, - int skip_phase, - ccl_private BsdfEval *result_eval, - float sum_pdf, - float sum_sample_weight) -{ - for (int i = 0; i < phases->num_closure; i++) { - if (i == skip_phase) - continue; - - ccl_private const ShaderVolumeClosure *svc = &phases->closure[i]; - float phase_pdf = 0.0f; - float3 eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf); - - if (phase_pdf != 0.0f) { - bsdf_eval_accum(result_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); - sum_pdf += phase_pdf * svc->sample_weight; - } - - sum_sample_weight += svc->sample_weight; - } - - return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; -} - -ccl_device float shader_volume_phase_eval(KernelGlobals kg, - ccl_private const ShaderData *sd, - ccl_private const ShaderVolumePhases *phases, - const float3 omega_in, - ccl_private BsdfEval *phase_eval) -{ - bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_float3()); - - return _shader_volume_phase_multi_eval(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f); -} - -ccl_device int shader_volume_phase_sample(KernelGlobals kg, - ccl_private const ShaderData *sd, - ccl_private const ShaderVolumePhases *phases, - float randu, - float randv, - ccl_private BsdfEval *phase_eval, - ccl_private float3 *omega_in, - ccl_private differential3 *domega_in, - ccl_private float *pdf) -{ - int sampled = 0; - - if (phases->num_closure > 1) { - /* pick a phase closure based on sample weights */ - float sum = 0.0f; - - for (sampled = 0; sampled < phases->num_closure; sampled++) { - ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; - sum += svc->sample_weight; - } - - float r = randu * sum; - float partial_sum = 0.0f; - - for (sampled = 0; sampled < phases->num_closure; sampled++) { - ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; - float next_sum = partial_sum + svc->sample_weight; - - if (r <= next_sum) { - /* Rescale to reuse for BSDF direction sample. */ - randu = (r - partial_sum) / svc->sample_weight; - break; - } - - partial_sum = next_sum; - } - - if (sampled == phases->num_closure) { - *pdf = 0.0f; - return LABEL_NONE; - } - } - - /* todo: this isn't quite correct, we don't weight anisotropy properly - * depending on color channels, even if this is perhaps not a common case */ - ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; - int label; - float3 eval = zero_float3(); - - *pdf = 0.0f; - label = volume_phase_sample(sd, svc, randu, randv, &eval, omega_in, domega_in, pdf); - - if (*pdf != 0.0f) { - bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); - } - - return label; -} - -ccl_device int shader_phase_sample_closure(KernelGlobals kg, - ccl_private const ShaderData *sd, - ccl_private const ShaderVolumeClosure *sc, - float randu, - float randv, - ccl_private BsdfEval *phase_eval, - ccl_private float3 *omega_in, - ccl_private differential3 *domega_in, - ccl_private float *pdf) -{ - int label; - float3 eval = zero_float3(); - - *pdf = 0.0f; - label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); - - if (*pdf != 0.0f) - bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); - - return label; -} - -/* Volume Evaluation */ - -template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState> -ccl_device_inline void shader_eval_volume(KernelGlobals kg, - ConstIntegratorGenericState state, - ccl_private ShaderData *ccl_restrict sd, - const uint32_t path_flag, - StackReadOp stack_read) -{ - /* If path is being terminated, we are tracing a shadow ray or evaluating - * emission, then we don't need to store closures. The emission and shadow - * shader data also do not have a closure array to save GPU memory. */ - int max_closures; - if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { - max_closures = 0; - } - else { - max_closures = kernel_data.max_closures; - } - - /* reset closures once at the start, we will be accumulating the closures - * for all volumes in the stack into a single array of closures */ - sd->num_closure = 0; - sd->num_closure_left = max_closures; - sd->flag = 0; - sd->object_flag = 0; - - for (int i = 0;; i++) { - const VolumeStack entry = stack_read(i); - if (entry.shader == SHADER_NONE) { - break; - } - - /* Setup shader-data from stack. it's mostly setup already in - * shader_setup_from_volume, this switching should be quick. */ - sd->object = entry.object; - sd->lamp = LAMP_NONE; - sd->shader = entry.shader; - - sd->flag &= ~SD_SHADER_FLAGS; - sd->flag |= kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).flags; - sd->object_flag &= ~SD_OBJECT_FLAGS; - - if (sd->object != OBJECT_NONE) { - sd->object_flag |= kernel_data_fetch(object_flag, sd->object); - -# ifdef __OBJECT_MOTION__ - /* todo: this is inefficient for motion blur, we should be - * caching matrices instead of recomputing them each step */ - shader_setup_object_transforms(kg, sd, sd->time); - - if ((sd->object_flag & SD_OBJECT_HAS_VOLUME_MOTION) != 0) { - AttributeDescriptor v_desc = find_attribute(kg, sd, ATTR_STD_VOLUME_VELOCITY); - kernel_assert(v_desc.offset != ATTR_STD_NOT_FOUND); - - const float3 P = sd->P; - const float velocity_scale = kernel_data_fetch(objects, sd->object).velocity_scale; - const float time_offset = kernel_data.cam.motion_position == MOTION_POSITION_CENTER ? - 0.5f : - 0.0f; - const float time = kernel_data.cam.motion_position == MOTION_POSITION_END ? - (1.0f - kernel_data.cam.shuttertime) + sd->time : - sd->time; - - /* Use a 1st order semi-lagrangian advection scheme to estimate what volume quantity - * existed, or will exist, at the given time: - * - * `phi(x, T) = phi(x - (T - t) * u(x, T), t)` - * - * where - * - * x : position - * T : super-sampled time (or ray time) - * t : current time of the simulation (in rendering we assume this is center frame with - * relative time = 0) - * phi : the volume quantity - * u : the velocity field - * - * But first we need to determine the velocity field `u(x, T)`, which we can estimate also - * using semi-lagrangian advection. - * - * `u(x, T) = u(x - (T - t) * u(x, T), t)` - * - * This is the typical way to model self-advection in fluid dynamics, however, we do not - * account for other forces affecting the velocity during simulation (pressure, buoyancy, - * etc.): this gives a linear interpolation when fluid are mostly "curvy". For better - * results, a higher order interpolation scheme can be used (at the cost of more lookups), - * or an interpolation of the velocity fields for the previous and next frames could also - * be used to estimate `u(x, T)` (which will cost more memory and lookups). - * - * References: - * "Eulerian Motion Blur", Kim and Ko, 2007 - * "Production Volume Rendering", Wreninge et al., 2012 - */ - - /* Find velocity. */ - float3 velocity = primitive_volume_attribute_float3(kg, sd, v_desc); - object_dir_transform(kg, sd, &velocity); - - /* Find advected P. */ - sd->P = P - (time - time_offset) * velocity_scale * velocity; - - /* Find advected velocity. */ - velocity = primitive_volume_attribute_float3(kg, sd, v_desc); - object_dir_transform(kg, sd, &velocity); - - /* Find advected P. */ - sd->P = P - (time - time_offset) * velocity_scale * velocity; - } -# endif - } - - /* evaluate shader */ -# ifdef __SVM__ -# ifdef __OSL__ - if (kg->osl) { - OSLShader::eval_volume(kg, state, sd, path_flag); - } - else -# endif - { - svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_VOLUME, SHADER_TYPE_VOLUME>( - kg, state, sd, NULL, path_flag); - } -# endif - - /* Merge closures to avoid exceeding number of closures limit. */ - if (!shadow) { - if (i > 0) { - shader_merge_volume_closures(sd); - } - } - } -} - -#endif /* __VOLUME__ */ - -/* Displacement Evaluation */ - -template<typename ConstIntegratorGenericState> -ccl_device void shader_eval_displacement(KernelGlobals kg, - ConstIntegratorGenericState state, - ccl_private ShaderData *sd) -{ - sd->num_closure = 0; - sd->num_closure_left = 0; - - /* this will modify sd->P */ -#ifdef __SVM__ -# ifdef __OSL__ - if (kg->osl) - OSLShader::eval_displacement(kg, state, sd); - else -# endif - { - svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_DISPLACEMENT, SHADER_TYPE_DISPLACEMENT>( - kg, state, sd, NULL, 0); - } -#endif -} - -/* Cryptomatte */ - -ccl_device float shader_cryptomatte_id(KernelGlobals kg, int shader) -{ - return kernel_data_fetch(shaders, (shader & SHADER_MASK)).cryptomatte_id; -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/shadow_catcher.h b/intern/cycles/kernel/integrator/shadow_catcher.h index ff63625aceb..a620853faea 100644 --- a/intern/cycles/kernel/integrator/shadow_catcher.h +++ b/intern/cycles/kernel/integrator/shadow_catcher.h @@ -3,7 +3,6 @@ #pragma once -#include "kernel/film/write_passes.h" #include "kernel/integrator/path_state.h" #include "kernel/integrator/state_util.h" @@ -76,28 +75,6 @@ ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(const uint32_t return path_flag & PATH_RAY_SHADOW_CATCHER_PASS; } -/* Write shadow catcher passes on a bounce from the shadow catcher object. */ -ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data( - KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer) -{ - kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED); - kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); - - const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); - const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * - kernel_data.film.pass_stride; - ccl_global float *buffer = render_buffer + render_buffer_offset; - - /* Count sample for the shadow catcher object. */ - kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f); - - /* Since the split is done, the sample does not contribute to the matte, so accumulate it as - * transparency to the matte. */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, - average(throughput)); -} - #endif /* __SHADOW_CATCHER__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/shadow_state_template.h b/intern/cycles/kernel/integrator/shadow_state_template.h index c340467606d..d731d1df339 100644 --- a/intern/cycles/kernel/integrator/shadow_state_template.h +++ b/intern/cycles/kernel/integrator/shadow_state_template.h @@ -27,19 +27,29 @@ KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_T /* enum PathRayFlag */ KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING) /* Throughput. */ -KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, throughput, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, throughput, KERNEL_FEATURE_PATH_TRACING) /* Throughput for shadow pass. */ KERNEL_STRUCT_MEMBER(shadow_path, - packed_float3, + PackedSpectrum, unshadowed_throughput, KERNEL_FEATURE_SHADOW_PASS | KERNEL_FEATURE_AO_ADDITIVE) /* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */ -KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES) -KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES) +KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES) +KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES) /* Number of intersections found by ray-tracing. */ KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, num_hits, KERNEL_FEATURE_PATH_TRACING) /* Light group. */ KERNEL_STRUCT_MEMBER(shadow_path, uint8_t, lightgroup, KERNEL_FEATURE_PATH_TRACING) +/* Path guiding. */ +KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, unlit_throughput, KERNEL_FEATURE_PATH_GUIDING) +#ifdef __PATH_GUIDING__ +KERNEL_STRUCT_MEMBER(shadow_path, + openpgl::cpp::PathSegment *, + path_segment, + KERNEL_FEATURE_PATH_GUIDING) +#else +KERNEL_STRUCT_MEMBER(shadow_path, uint64_t, path_segment, KERNEL_FEATURE_PATH_GUIDING) +#endif KERNEL_STRUCT_END(shadow_path) /********************************** Shadow Ray *******************************/ diff --git a/intern/cycles/kernel/integrator/state.h b/intern/cycles/kernel/integrator/state.h index d10d31e930e..f0fdc6f0d54 100644 --- a/intern/cycles/kernel/integrator/state.h +++ b/intern/cycles/kernel/integrator/state.h @@ -31,6 +31,10 @@ #include "util/types.h" +#ifdef __PATH_GUIDING__ +# include "util/guiding.h" +#endif + #pragma once CCL_NAMESPACE_BEGIN @@ -140,7 +144,7 @@ typedef struct IntegratorStateGPU { * happen from a kernel which operates on a "main" path. Attempt to use shadow catcher accessors * from a kernel which operates on a shadow catcher state will cause bad memory access. */ -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ /* Scalar access on CPU. */ @@ -159,7 +163,7 @@ typedef const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState; # define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \ ((state)->nested_struct[array_index].member) -#else /* __KERNEL_CPU__ */ +#else /* !__KERNEL_GPU__ */ /* Array access on GPU with Structure-of-Arrays. */ @@ -180,6 +184,6 @@ typedef int ConstIntegratorShadowState; # define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \ INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) -#endif /* __KERNEL_CPU__ */ +#endif /* !__KERNEL_GPU__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/state_flow.h b/intern/cycles/kernel/integrator/state_flow.h index 4b03c665e17..40961b1c5fb 100644 --- a/intern/cycles/kernel/integrator/state_flow.h +++ b/intern/cycles/kernel/integrator/state_flow.h @@ -76,6 +76,9 @@ ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init( &kernel_integrator_state.next_shadow_path_index[0], 1); atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; +# ifdef __PATH_GUIDING__ + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = nullptr; +# endif return shadow_state; } @@ -181,6 +184,9 @@ ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init( { IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow; INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; +# ifdef __PATH_GUIDING__ + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = nullptr; +# endif return shadow_state; } diff --git a/intern/cycles/kernel/integrator/state_template.h b/intern/cycles/kernel/integrator/state_template.h index 5c2af131945..610621f0abe 100644 --- a/intern/cycles/kernel/integrator/state_template.h +++ b/intern/cycles/kernel/integrator/state_template.h @@ -46,12 +46,15 @@ KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING) /* Continuation probability for path termination. */ KERNEL_STRUCT_MEMBER(path, float, continuation_probability, KERNEL_FEATURE_PATH_TRACING) /* Throughput. */ -KERNEL_STRUCT_MEMBER(path, packed_float3, throughput, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(path, PackedSpectrum, throughput, KERNEL_FEATURE_PATH_TRACING) +/* Factor to multiple with throughput to get remove any guiding PDFS. + * Such throughput without guiding PDFS is used for Russian roulette termination. */ +KERNEL_STRUCT_MEMBER(path, float, unguided_throughput, KERNEL_FEATURE_PATH_GUIDING) /* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */ -KERNEL_STRUCT_MEMBER(path, packed_float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES) -KERNEL_STRUCT_MEMBER(path, packed_float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES) +KERNEL_STRUCT_MEMBER(path, PackedSpectrum, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES) +KERNEL_STRUCT_MEMBER(path, PackedSpectrum, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES) /* Denoising. */ -KERNEL_STRUCT_MEMBER(path, packed_float3, denoising_feature_throughput, KERNEL_FEATURE_DENOISING) +KERNEL_STRUCT_MEMBER(path, PackedSpectrum, denoising_feature_throughput, KERNEL_FEATURE_DENOISING) /* Shader sorting. */ /* TODO: compress as uint16? or leave out entirely and recompute key in sorting code? */ KERNEL_STRUCT_MEMBER(path, uint32_t, shader_sort_key, KERNEL_FEATURE_PATH_TRACING) @@ -84,8 +87,8 @@ KERNEL_STRUCT_END(isect) /*************** Subsurface closure state for subsurface kernel ***************/ KERNEL_STRUCT_BEGIN(subsurface) -KERNEL_STRUCT_MEMBER(subsurface, packed_float3, albedo, KERNEL_FEATURE_SUBSURFACE) -KERNEL_STRUCT_MEMBER(subsurface, packed_float3, radius, KERNEL_FEATURE_SUBSURFACE) +KERNEL_STRUCT_MEMBER(subsurface, PackedSpectrum, albedo, KERNEL_FEATURE_SUBSURFACE) +KERNEL_STRUCT_MEMBER(subsurface, PackedSpectrum, radius, KERNEL_FEATURE_SUBSURFACE) KERNEL_STRUCT_MEMBER(subsurface, float, anisotropy, KERNEL_FEATURE_SUBSURFACE) KERNEL_STRUCT_MEMBER(subsurface, packed_float3, Ng, KERNEL_FEATURE_SUBSURFACE) KERNEL_STRUCT_END(subsurface) @@ -98,3 +101,33 @@ KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, shader, KERNEL_FEATURE_VOLUME) KERNEL_STRUCT_END_ARRAY(volume_stack, KERNEL_STRUCT_VOLUME_STACK_SIZE, KERNEL_STRUCT_VOLUME_STACK_SIZE) + +/************************************ Path Guiding *****************************/ +KERNEL_STRUCT_BEGIN(guiding) +#ifdef __PATH_GUIDING__ +/* Current path segment of the random walk/path. */ +KERNEL_STRUCT_MEMBER(guiding, + openpgl::cpp::PathSegment *, + path_segment, + KERNEL_FEATURE_PATH_GUIDING) +#else +/* Current path segment of the random walk/path. */ +KERNEL_STRUCT_MEMBER(guiding, uint64_t, path_segment, KERNEL_FEATURE_PATH_GUIDING) +#endif +/* If surface guiding is enabled */ +KERNEL_STRUCT_MEMBER(guiding, bool, use_surface_guiding, KERNEL_FEATURE_PATH_GUIDING) +/* Random number used for additional guiding decisions (e.g., cache query, selection to use guiding + * or BSDF sampling) */ +KERNEL_STRUCT_MEMBER(guiding, float, sample_surface_guiding_rand, KERNEL_FEATURE_PATH_GUIDING) +/* The probability to use surface guiding (i.e., diffuse sampling prob * guiding prob)*/ +KERNEL_STRUCT_MEMBER(guiding, float, surface_guiding_sampling_prob, KERNEL_FEATURE_PATH_GUIDING) +/* Probability of sampling a BSSRDF closure instead of a BSDF closure*/ +KERNEL_STRUCT_MEMBER(guiding, float, bssrdf_sampling_prob, KERNEL_FEATURE_PATH_GUIDING) +/* If volume guiding is enabled */ +KERNEL_STRUCT_MEMBER(guiding, bool, use_volume_guiding, KERNEL_FEATURE_PATH_GUIDING) +/* Random number used for additional guiding decisions (e.g., cache query, selection to use guiding + * or BSDF sampling) */ +KERNEL_STRUCT_MEMBER(guiding, float, sample_volume_guiding_rand, KERNEL_FEATURE_PATH_GUIDING) +/* The probability to use surface guiding (i.e., diffuse sampling prob * guiding prob). */ +KERNEL_STRUCT_MEMBER(guiding, float, volume_guiding_sampling_prob, KERNEL_FEATURE_PATH_GUIDING) +KERNEL_STRUCT_END(guiding) diff --git a/intern/cycles/kernel/integrator/state_util.h b/intern/cycles/kernel/integrator/state_util.h index 8dd58ad6bcd..168122d3a78 100644 --- a/intern/cycles/kernel/integrator/state_util.h +++ b/intern/cycles/kernel/integrator/state_util.h @@ -338,7 +338,7 @@ ccl_device_inline IntegratorState integrator_state_shadow_catcher_split(KernelGl return to_state; } -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ ccl_device_inline int integrator_state_bounce(ConstIntegratorState state, const int) { return INTEGRATOR_STATE(state, path, bounce); diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h index 2f96f215d8a..efd293e4141 100644 --- a/intern/cycles/kernel/integrator/subsurface.h +++ b/intern/cycles/kernel/integrator/subsurface.h @@ -15,9 +15,9 @@ #include "kernel/integrator/intersect_volume_stack.h" #include "kernel/integrator/path_state.h" -#include "kernel/integrator/shader_eval.h" #include "kernel/integrator/subsurface_disk.h" #include "kernel/integrator/subsurface_random_walk.h" +#include "kernel/integrator/surface_shader.h" CCL_NAMESPACE_BEGIN @@ -51,12 +51,10 @@ ccl_device int subsurface_bounce(KernelGlobals kg, PATH_RAY_SUBSURFACE_RANDOM_WALK); /* Compute weight, optionally including Fresnel from entry point. */ - float3 weight = shader_bssrdf_sample_weight(sd, sc); -# ifdef __PRINCIPLED__ + Spectrum weight = surface_shader_bssrdf_sample_weight(sd, sc); if (bssrdf->roughness != FLT_MAX) { path_flag |= PATH_RAY_SUBSURFACE_USE_FRESNEL; } -# endif if (sd->flag & SD_BACKFACING) { path_flag |= PATH_RAY_SUBSURFACE_BACKFACING; @@ -70,8 +68,8 @@ ccl_device int subsurface_bounce(KernelGlobals kg, if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { if (INTEGRATOR_STATE(state, path, bounce) == 0) { - INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_float3(); - INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_float3(); + INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_spectrum(); + INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_spectrum(); } } @@ -80,6 +78,9 @@ ccl_device int subsurface_bounce(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, subsurface, radius) = bssrdf->radius; INTEGRATOR_STATE_WRITE(state, subsurface, anisotropy) = bssrdf->anisotropy; + /* Path guiding. */ + guiding_record_bssrdf_weight(kg, state, weight, bssrdf->albedo); + return LABEL_SUBSURFACE_SCATTER; } @@ -91,7 +92,7 @@ ccl_device void subsurface_shader_data_setup(KernelGlobals kg, /* Get bump mapped normal from shader evaluation at exit point. */ float3 N = sd->N; if (sd->flag & SD_HAS_BSSRDF_BUMP) { - N = shader_bssrdf_normal(sd); + N = surface_shader_bssrdf_normal(sd); } /* Setup diffuse BSDF at the exit point. This replaces shader_eval_surface. */ @@ -99,9 +100,8 @@ ccl_device void subsurface_shader_data_setup(KernelGlobals kg, sd->num_closure = 0; sd->num_closure_left = kernel_data.max_closures; - const float3 weight = one_float3(); + const Spectrum weight = one_spectrum(); -# ifdef __PRINCIPLED__ if (path_flag & PATH_RAY_SUBSURFACE_USE_FRESNEL) { ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc( sd, sizeof(PrincipledDiffuseBsdf), weight); @@ -112,9 +112,7 @@ ccl_device void subsurface_shader_data_setup(KernelGlobals kg, sd->flag |= bsdf_principled_diffuse_setup(bsdf, PRINCIPLED_DIFFUSE_LAMBERT_EXIT); } } - else -# endif /* __PRINCIPLED__ */ - { + else { ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc( sd, sizeof(DiffuseBsdf), weight); diff --git a/intern/cycles/kernel/integrator/subsurface_disk.h b/intern/cycles/kernel/integrator/subsurface_disk.h index 2836934f6dd..16fb45392f4 100644 --- a/intern/cycles/kernel/integrator/subsurface_disk.h +++ b/intern/cycles/kernel/integrator/subsurface_disk.h @@ -1,6 +1,8 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright 2011-2022 Blender Foundation */ +#include "kernel/integrator/guiding.h" + CCL_NAMESPACE_BEGIN /* BSSRDF using disk based importance sampling. @@ -9,11 +11,11 @@ CCL_NAMESPACE_BEGIN * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf */ -ccl_device_inline float3 subsurface_disk_eval(const float3 radius, float disk_r, float r) +ccl_device_inline Spectrum subsurface_disk_eval(const Spectrum radius, float disk_r, float r) { - const float3 eval = bssrdf_eval(radius, r); + const Spectrum eval = bssrdf_eval(radius, r); const float pdf = bssrdf_pdf(radius, disk_r); - return (pdf > 0.0f) ? eval / pdf : zero_float3(); + return (pdf > 0.0f) ? eval / pdf : zero_spectrum(); } /* Subsurface scattering step, from a point on the surface to other @@ -25,8 +27,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, ccl_private LocalIntersection &ss_isect) { - float disk_u, disk_v; - path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &disk_u, &disk_v); + float2 rand_disk = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_DISK); /* Read shading point info from integrator state. */ const float3 P = INTEGRATOR_STATE(state, ray, P); @@ -37,7 +38,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); /* Read subsurface scattering parameters. */ - const float3 radius = INTEGRATOR_STATE(state, subsurface, radius); + const Spectrum radius = INTEGRATOR_STATE(state, subsurface, radius); /* Pick random axis in local frame and point on disk. */ float3 disk_N, disk_T, disk_B; @@ -46,20 +47,20 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, disk_N = Ng; make_orthonormals(disk_N, &disk_T, &disk_B); - if (disk_v < 0.5f) { + if (rand_disk.y < 0.5f) { pick_pdf_N = 0.5f; pick_pdf_T = 0.25f; pick_pdf_B = 0.25f; - disk_v *= 2.0f; + rand_disk.y *= 2.0f; } - else if (disk_v < 0.75f) { + else if (rand_disk.y < 0.75f) { float3 tmp = disk_N; disk_N = disk_T; disk_T = tmp; pick_pdf_N = 0.25f; pick_pdf_T = 0.5f; pick_pdf_B = 0.25f; - disk_v = (disk_v - 0.5f) * 4.0f; + rand_disk.y = (rand_disk.y - 0.5f) * 4.0f; } else { float3 tmp = disk_N; @@ -68,14 +69,14 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, pick_pdf_N = 0.25f; pick_pdf_T = 0.25f; pick_pdf_B = 0.5f; - disk_v = (disk_v - 0.75f) * 4.0f; + rand_disk.y = (rand_disk.y - 0.75f) * 4.0f; } /* Sample point on disk. */ - float phi = M_2PI_F * disk_v; + float phi = M_2PI_F * rand_disk.y; float disk_height, disk_r; - bssrdf_sample(radius, disk_u, &disk_r, &disk_height); + bssrdf_sample(radius, rand_disk.x, &disk_r, &disk_height); float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B; @@ -108,7 +109,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, * traversal algorithm. */ sort_intersections_and_normals(ss_isect.hits, ss_isect.Ng, num_eval_hits); - float3 weights[BSSRDF_MAX_HITS]; /* TODO: zero? */ + Spectrum weights[BSSRDF_MAX_HITS]; /* TODO: zero? */ float sum_weights = 0.0f; for (int hit = 0; hit < num_eval_hits; hit++) { @@ -126,17 +127,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { /* Transform normal to world space. */ Transform itfm; - Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm); + object_fetch_transform_motion_test(kg, object, time, &itfm); hit_Ng = normalize(transform_direction_transposed(&itfm, hit_Ng)); - - /* Transform t to world space, except for OptiX and MetalRT where it already is. */ -#ifdef __KERNEL_GPU_RAYTRACING__ - (void)tfm; -#else - float3 D = transform_direction(&itfm, ray.D); - D = normalize(D) * ss_isect.hits[hit].t; - ss_isect.hits[hit].t = len(transform_direction(&tfm, D)); -#endif } /* Quickly retrieve P and Ng without setting up ShaderData. */ @@ -159,7 +151,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, const float r = len(hit_P - P); /* Evaluate profiles. */ - const float3 weight = subsurface_disk_eval(radius, disk_r, r) * w; + const Spectrum weight = subsurface_disk_eval(radius, disk_r, r) * w; /* Store result. */ ss_isect.Ng[hit] = hit_Ng; @@ -172,18 +164,19 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, } /* Use importance resampling, sampling one of the hits proportional to weight. */ - const float r = lcg_step_float(&lcg_state) * sum_weights; + const float rand_resample = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_DISK_RESAMPLE); + const float r = rand_resample * sum_weights; float partial_sum = 0.0f; for (int hit = 0; hit < num_eval_hits; hit++) { - const float3 weight = weights[hit]; + const Spectrum weight = weights[hit]; const float sample_weight = average(fabs(weight)); float next_sum = partial_sum + sample_weight; if (r < next_sum) { /* Return exit point. */ - INTEGRATOR_STATE_WRITE(state, path, throughput) *= weight * sum_weights / sample_weight; - + const Spectrum resampled_weight = weight * sum_weights / sample_weight; + INTEGRATOR_STATE_WRITE(state, path, throughput) *= resampled_weight; ss_isect.hits[0] = ss_isect.hits[hit]; ss_isect.Ng[0] = ss_isect.Ng[hit]; @@ -191,6 +184,9 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, ray.D = ss_isect.Ng[hit]; ray.tmin = 0.0f; ray.tmax = 1.0f; + + guiding_record_bssrdf_bounce( + kg, state, 1.0f, Ng, -Ng, resampled_weight, INTEGRATOR_STATE(state, subsurface, albedo)); return true; } diff --git a/intern/cycles/kernel/integrator/subsurface_random_walk.h b/intern/cycles/kernel/integrator/subsurface_random_walk.h index c1691030817..fdcb66c32f5 100644 --- a/intern/cycles/kernel/integrator/subsurface_random_walk.h +++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h @@ -5,6 +5,8 @@ #include "kernel/bvh/bvh.h" +#include "kernel/integrator/guiding.h" + CCL_NAMESPACE_BEGIN /* Random walk subsurface scattering. @@ -65,19 +67,20 @@ ccl_device void subsurface_random_walk_remap(const float albedo, *sigma_t = sigma_t_prime / (1.0f - g); } -ccl_device void subsurface_random_walk_coefficients(const float3 albedo, - const float3 radius, +ccl_device void subsurface_random_walk_coefficients(const Spectrum albedo, + const Spectrum radius, const float anisotropy, - ccl_private float3 *sigma_t, - ccl_private float3 *alpha, - ccl_private float3 *throughput) + ccl_private Spectrum *sigma_t, + ccl_private Spectrum *alpha, + ccl_private Spectrum *throughput) { - float sigma_t_x, sigma_t_y, sigma_t_z; - float alpha_x, alpha_y, alpha_z; - - subsurface_random_walk_remap(albedo.x, radius.x, anisotropy, &sigma_t_x, &alpha_x); - subsurface_random_walk_remap(albedo.y, radius.y, anisotropy, &sigma_t_y, &alpha_y); - subsurface_random_walk_remap(albedo.z, radius.z, anisotropy, &sigma_t_z, &alpha_z); + FOREACH_SPECTRUM_CHANNEL (i) { + subsurface_random_walk_remap(GET_SPECTRUM_CHANNEL(albedo, i), + GET_SPECTRUM_CHANNEL(radius, i), + anisotropy, + &GET_SPECTRUM_CHANNEL(*sigma_t, i), + &GET_SPECTRUM_CHANNEL(*alpha, i)); + } /* Throughput already contains closure weight at this point, which includes the * albedo, as well as closure mixing and Fresnel weights. Divide out the albedo @@ -88,21 +91,12 @@ ccl_device void subsurface_random_walk_coefficients(const float3 albedo, * infinite phase functions. To avoid a sharp discontinuity as we go from * such values to 0.0, increase alpha and reduce the throughput to compensate. */ const float min_alpha = 0.2f; - if (alpha_x < min_alpha) { - (*throughput).x *= alpha_x / min_alpha; - alpha_x = min_alpha; - } - if (alpha_y < min_alpha) { - (*throughput).y *= alpha_y / min_alpha; - alpha_y = min_alpha; - } - if (alpha_z < min_alpha) { - (*throughput).z *= alpha_z / min_alpha; - alpha_z = min_alpha; + FOREACH_SPECTRUM_CHANNEL (i) { + if (GET_SPECTRUM_CHANNEL(*alpha, i) < min_alpha) { + GET_SPECTRUM_CHANNEL(*throughput, i) *= GET_SPECTRUM_CHANNEL(*alpha, i) / min_alpha; + GET_SPECTRUM_CHANNEL(*alpha, i) = min_alpha; + } } - - *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z); - *alpha = make_float3(alpha_x, alpha_y, alpha_z); } /* References for Dwivedi sampling: @@ -151,12 +145,12 @@ ccl_device_forceinline float3 direction_from_cosine(float3 D, float cos_theta, f return dir.x * T + dir.y * B + dir.z * D; } -ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t, - float t, - bool hit, - ccl_private float3 *transmittance) +ccl_device_forceinline Spectrum subsurface_random_walk_pdf(Spectrum sigma_t, + float t, + bool hit, + ccl_private Spectrum *transmittance) { - float3 T = volume_color_transmittance(sigma_t, t); + Spectrum T = volume_color_transmittance(sigma_t, t); if (transmittance) { *transmittance = T; } @@ -173,8 +167,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, ccl_private Ray &ray, ccl_private LocalIntersection &ss_isect) { - float bssrdf_u, bssrdf_v; - path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + const float2 rand_bsdf = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_BSDF); const float3 P = INTEGRATOR_STATE(state, ray, P); const float3 N = INTEGRATOR_STATE(state, ray, D); @@ -187,7 +180,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, /* Sample diffuse surface scatter into the object. */ float3 D; float pdf; - sample_cos_hemisphere(-N, bssrdf_u, bssrdf_v, &D, &pdf); + sample_cos_hemisphere(-N, rand_bsdf.x, rand_bsdf.y, &D, &pdf); if (dot(-Ng, D) <= 0.0f) { return false; } @@ -205,22 +198,16 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, ray.self.light_object = OBJECT_NONE; ray.self.light_prim = PRIM_NONE; -#ifndef __KERNEL_GPU_RAYTRACING__ - /* Compute or fetch object transforms. */ - Transform ob_itfm ccl_optional_struct_init; - Transform ob_tfm = object_fetch_transform_motion_test(kg, object, time, &ob_itfm); -#endif - /* Convert subsurface to volume coefficients. * The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */ - const float3 albedo = INTEGRATOR_STATE(state, subsurface, albedo); - const float3 radius = INTEGRATOR_STATE(state, subsurface, radius); + const Spectrum albedo = INTEGRATOR_STATE(state, subsurface, albedo); + const Spectrum radius = INTEGRATOR_STATE(state, subsurface, radius); const float anisotropy = INTEGRATOR_STATE(state, subsurface, anisotropy); - float3 sigma_t, alpha; - float3 throughput = INTEGRATOR_STATE_WRITE(state, path, throughput); + Spectrum sigma_t, alpha; + Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); subsurface_random_walk_coefficients(albedo, radius, anisotropy, &sigma_t, &alpha, &throughput); - float3 sigma_s = sigma_t * alpha; + Spectrum sigma_s = sigma_t * alpha; /* Theoretically it should be better to use the exact alpha for the channel we're sampling at * each bounce, but in practice there doesn't seem to be a noticeable difference in exchange @@ -243,7 +230,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, const float phase_log = logf((diffusion_length + 1.0f) / (diffusion_length - 1.0f)); /* Modify state for RNGs, decorrelated from other paths. */ - rng_state.rng_hash = cmj_hash(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef); + rng_state.rng_hash = hash_hp_seeded_uint(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef); /* Random walk until we hit the surface again. */ bool hit = false; @@ -255,10 +242,10 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, const float guided_fraction = 1.0f - fmaxf(0.5f, powf(fabsf(anisotropy), 0.125f)); #ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL - float3 sigma_s_star = sigma_s * (1.0f - anisotropy); - float3 sigma_t_star = sigma_t - sigma_s + sigma_s_star; - float3 sigma_t_org = sigma_t; - float3 sigma_s_org = sigma_s; + Spectrum sigma_s_star = sigma_s * (1.0f - anisotropy); + Spectrum sigma_t_star = sigma_t - sigma_s + sigma_s_star; + Spectrum sigma_t_org = sigma_t; + Spectrum sigma_s_org = sigma_s; const float anisotropy_org = anisotropy; const float guided_fraction_org = guided_fraction; #endif @@ -270,7 +257,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, #ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL // shadow with local variables according to depth float anisotropy, guided_fraction; - float3 sigma_s, sigma_t; + Spectrum sigma_s, sigma_t; if (bounce <= SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL) { anisotropy = anisotropy_org; guided_fraction = guided_fraction_org; @@ -286,11 +273,11 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, #endif /* Sample color channel, use MIS with balance heuristic. */ - float rphase = path_state_rng_1D(kg, &rng_state, PRNG_PHASE_CHANNEL); - float3 channel_pdf; + float rphase = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_PHASE_CHANNEL); + Spectrum channel_pdf; int channel = volume_sample_channel(alpha, throughput, rphase, &channel_pdf); float sample_sigma_t = volume_channel_get(sigma_t, channel); - float randt = path_state_rng_1D(kg, &rng_state, PRNG_SCATTER_DISTANCE); + float randt = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_SCATTER_DISTANCE); /* We need the result of the ray-cast to compute the full guided PDF, so just remember the * relevant terms to avoid recomputing them later. */ @@ -303,7 +290,8 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, /* For the initial ray, we already know the direction, so just do classic distance sampling. */ if (bounce > 0) { /* Decide whether we should use guided or classic sampling. */ - bool guided = (path_state_rng_1D(kg, &rng_state, PRNG_LIGHT_TERMINATE) < guided_fraction); + bool guided = (path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_GUIDE_STRATEGY) < + guided_fraction); /* Determine if we want to sample away from the incoming interface. * This only happens if we found a nearby opposite interface, and the probability for it @@ -317,27 +305,28 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, float x = clamp(dot(ray.P - P, -N), 0.0f, opposite_distance); backward_fraction = 1.0f / (1.0f + expf((opposite_distance - 2.0f * x) / diffusion_length)); - guide_backward = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE) < backward_fraction; + guide_backward = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_GUIDE_DIRECTION) < + backward_fraction; } /* Sample scattering direction. */ - float scatter_u, scatter_v; - path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &scatter_u, &scatter_v); + const float2 rand_scatter = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_BSDF); float cos_theta; float hg_pdf; if (guided) { - cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, scatter_u); + cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, rand_scatter.x); /* The backwards guiding distribution is just mirrored along `sd->N`, so swapping the * sign here is enough to sample from that instead. */ if (guide_backward) { cos_theta = -cos_theta; } - float3 newD = direction_from_cosine(N, cos_theta, scatter_v); + float3 newD = direction_from_cosine(N, cos_theta, rand_scatter.y); hg_pdf = single_peaked_henyey_greenstein(dot(ray.D, newD), anisotropy); ray.D = newD; } else { - float3 newD = henyey_greenstrein_sample(ray.D, anisotropy, scatter_u, scatter_v, &hg_pdf); + float3 newD = henyey_greenstrein_sample( + ray.D, anisotropy, rand_scatter.x, rand_scatter.y, &hg_pdf); cos_theta = dot(newD, N); ray.D = newD; } @@ -363,7 +352,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, } } - /* Sample direction along ray. */ + /* Sample distance along ray. */ float t = -logf(1.0f - randt) / sample_sigma_t; /* On the first bounce, we use the ray-cast to check if the opposite side is nearby. @@ -383,15 +372,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, hit = (ss_isect.num_hits > 0); if (hit) { -#ifdef __KERNEL_GPU_RAYTRACING__ - /* t is always in world space with OptiX and MetalRT. */ ray.tmax = ss_isect.hits[0].t; -#else - /* Compute world space distance to surface hit. */ - float3 D = transform_direction(&ob_itfm, ray.D); - D = normalize(D) * ss_isect.hits[0].t; - ray.tmax = len(transform_direction(&ob_tfm, D)); -#endif } if (bounce == 0) { @@ -413,16 +394,17 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, /* Advance to new scatter location. */ ray.P += t * ray.D; - float3 transmittance; - float3 pdf = subsurface_random_walk_pdf(sigma_t, t, hit, &transmittance); + Spectrum transmittance; + Spectrum pdf = subsurface_random_walk_pdf(sigma_t, t, hit, &transmittance); if (bounce > 0) { /* Compute PDF just like we do for classic sampling, but with the stretched sigma_t. */ - float3 guided_pdf = subsurface_random_walk_pdf(forward_stretching * sigma_t, t, hit, NULL); + Spectrum guided_pdf = subsurface_random_walk_pdf(forward_stretching * sigma_t, t, hit, NULL); if (have_opposite_interface) { /* First step of MIS: Depending on geometry we might have two methods for guided * sampling, so perform MIS between them. */ - float3 back_pdf = subsurface_random_walk_pdf(backward_stretching * sigma_t, t, hit, NULL); + Spectrum back_pdf = subsurface_random_walk_pdf( + backward_stretching * sigma_t, t, hit, NULL); guided_pdf = mix( guided_pdf * forward_pdf_factor, back_pdf * backward_pdf_factor, backward_fraction); } @@ -444,9 +426,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, /* If we hit the surface, we are done. */ break; } - else if (throughput.x < VOLUME_THROUGHPUT_EPSILON && - throughput.y < VOLUME_THROUGHPUT_EPSILON && - throughput.z < VOLUME_THROUGHPUT_EPSILON) { + else if (reduce_max(throughput) < VOLUME_THROUGHPUT_EPSILON) { /* Avoid unnecessary work and precision issue when throughput gets really small. */ break; } @@ -454,6 +434,16 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, if (hit) { kernel_assert(isfinite_safe(throughput)); + + guiding_record_bssrdf_bounce( + kg, + state, + pdf, + N, + D, + safe_divide_color(throughput, INTEGRATOR_STATE(state, path, throughput)), + albedo); + INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput; } diff --git a/intern/cycles/kernel/integrator/surface_shader.h b/intern/cycles/kernel/integrator/surface_shader.h new file mode 100644 index 00000000000..6c0097b11bd --- /dev/null +++ b/intern/cycles/kernel/integrator/surface_shader.h @@ -0,0 +1,860 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +/* Functions to evaluate shaders. */ + +#pragma once + +#include "kernel/closure/alloc.h" +#include "kernel/closure/bsdf.h" +#include "kernel/closure/bsdf_util.h" +#include "kernel/closure/emissive.h" + +#include "kernel/integrator/guiding.h" + +#ifdef __SVM__ +# include "kernel/svm/svm.h" +#endif +#ifdef __OSL__ +# include "kernel/osl/osl.h" +#endif + +CCL_NAMESPACE_BEGIN + +/* Guiding */ + +#ifdef __PATH_GUIDING__ +ccl_device_inline void surface_shader_prepare_guiding(KernelGlobals kg, + IntegratorState state, + ccl_private ShaderData *sd, + ccl_private const RNGState *rng_state) +{ + /* Have any BSDF to guide? */ + if (!(kernel_data.integrator.use_surface_guiding && (sd->flag & SD_BSDF_HAS_EVAL))) { + state->guiding.use_surface_guiding = false; + return; + } + + const float surface_guiding_probability = kernel_data.integrator.surface_guiding_probability; + float rand_bsdf_guiding = path_state_rng_1D(kg, rng_state, PRNG_SURFACE_BSDF_GUIDING); + + /* Compute proportion of diffuse BSDF and BSSRDFs .*/ + float diffuse_sampling_fraction = 0.0f; + float bssrdf_sampling_fraction = 0.0f; + float bsdf_bssrdf_sampling_sum = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + const float sweight = sc->sample_weight; + kernel_assert(sweight >= 0.0f); + + bsdf_bssrdf_sampling_sum += sweight; + if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) && sc->type < CLOSURE_BSDF_TRANSLUCENT_ID) { + diffuse_sampling_fraction += sweight; + } + if (CLOSURE_IS_BSSRDF(sc->type)) { + bssrdf_sampling_fraction += sweight; + } + } + } + + if (bsdf_bssrdf_sampling_sum > 0.0f) { + diffuse_sampling_fraction /= bsdf_bssrdf_sampling_sum; + bssrdf_sampling_fraction /= bsdf_bssrdf_sampling_sum; + } + + /* Init guiding (diffuse BSDFs only for now). */ + if (!(diffuse_sampling_fraction > 0.0f && + guiding_bsdf_init(kg, state, sd->P, sd->N, rand_bsdf_guiding))) { + state->guiding.use_surface_guiding = false; + return; + } + + state->guiding.use_surface_guiding = true; + state->guiding.surface_guiding_sampling_prob = surface_guiding_probability * + diffuse_sampling_fraction; + state->guiding.bssrdf_sampling_prob = bssrdf_sampling_fraction; + state->guiding.sample_surface_guiding_rand = rand_bsdf_guiding; + + kernel_assert(state->guiding.surface_guiding_sampling_prob > 0.0f && + state->guiding.surface_guiding_sampling_prob <= 1.0f); +} +#endif + +ccl_device_inline void surface_shader_prepare_closures(KernelGlobals kg, + ConstIntegratorState state, + ccl_private ShaderData *sd, + const uint32_t path_flag) +{ + /* Filter out closures. */ + if (kernel_data.integrator.filter_closures) { + if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_EMISSION) { + sd->closure_emission_background = zero_spectrum(); + } + + if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIRECT_LIGHT) { + sd->flag &= ~SD_BSDF_HAS_EVAL; + } + + if (path_flag & PATH_RAY_CAMERA) { + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + + if ((CLOSURE_IS_BSDF_DIFFUSE(sc->type) && + (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIFFUSE)) || + (CLOSURE_IS_BSDF_GLOSSY(sc->type) && + (kernel_data.integrator.filter_closures & FILTER_CLOSURE_GLOSSY)) || + (CLOSURE_IS_BSDF_TRANSMISSION(sc->type) && + (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSMISSION))) { + sc->type = CLOSURE_NONE_ID; + sc->sample_weight = 0.0f; + } + else if ((CLOSURE_IS_BSDF_TRANSPARENT(sc->type) && + (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSPARENT))) { + sc->type = CLOSURE_HOLDOUT_ID; + sc->sample_weight = 0.0f; + sd->flag |= SD_HOLDOUT; + } + } + } + } + + /* Defensive sampling. + * + * We can likely also do defensive sampling at deeper bounces, particularly + * for cases like a perfect mirror but possibly also others. This will need + * a good heuristic. */ + if (INTEGRATOR_STATE(state, path, bounce) + INTEGRATOR_STATE(state, path, transparent_bounce) == + 0 && + sd->num_closure > 1) { + float sum = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sum += sc->sample_weight; + } + } + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sc->sample_weight = max(sc->sample_weight, 0.125f * sum); + } + } + } + + /* Filter glossy. + * + * Blurring of bsdf after bounces, for rays that have a small likelihood + * of following this particular path (diffuse, rough glossy) */ + if (kernel_data.integrator.filter_glossy != FLT_MAX +#ifdef __MNEE__ + && !(INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_VALID) +#endif + ) { + float blur_pdf = kernel_data.integrator.filter_glossy * + INTEGRATOR_STATE(state, path, min_ray_pdf); + + if (blur_pdf < 1.0f) { + float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF(sc->type)) { + bsdf_blur(kg, sc, blur_roughness); + } + } + } + } +} + +/* BSDF */ +#if 0 +ccl_device_inline void surface_shader_validate_bsdf_sample(const KernelGlobals kg, + const ShaderClosure *sc, + const float3 omega_in, + const int org_label, + const float2 org_roughness, + const float org_eta) +{ + /* Validate the the bsdf_label and bsdf_roughness_eta functions + * by estimating the values after a bsdf sample. */ + const int comp_label = bsdf_label(kg, sc, omega_in); + kernel_assert(org_label == comp_label); + + float2 comp_roughness; + float comp_eta; + bsdf_roughness_eta(kg, sc, &comp_roughness, &comp_eta); + kernel_assert(org_eta == comp_eta); + kernel_assert(org_roughness.x == comp_roughness.x); + kernel_assert(org_roughness.y == comp_roughness.y); +} +#endif + +ccl_device_forceinline bool _surface_shader_exclude(ClosureType type, uint light_shader_flags) +{ + if (!(light_shader_flags & SHADER_EXCLUDE_ANY)) { + return false; + } + if (light_shader_flags & SHADER_EXCLUDE_DIFFUSE) { + if (CLOSURE_IS_BSDF_DIFFUSE(type)) { + return true; + } + } + if (light_shader_flags & SHADER_EXCLUDE_GLOSSY) { + if (CLOSURE_IS_BSDF_GLOSSY(type)) { + return true; + } + } + if (light_shader_flags & SHADER_EXCLUDE_TRANSMIT) { + if (CLOSURE_IS_BSDF_TRANSMISSION(type)) { + return true; + } + } + return false; +} + +ccl_device_inline float _surface_shader_bsdf_eval_mis(KernelGlobals kg, + ccl_private ShaderData *sd, + const float3 omega_in, + ccl_private const ShaderClosure *skip_sc, + ccl_private BsdfEval *result_eval, + float sum_pdf, + float sum_sample_weight, + const uint light_shader_flags) +{ + /* This is the veach one-sample model with balance heuristic, + * some PDF factors drop out when using balance heuristic weighting. */ + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (sc == skip_sc) { + continue; + } + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + if (CLOSURE_IS_BSDF(sc->type) && !_surface_shader_exclude(sc->type, light_shader_flags)) { + float bsdf_pdf = 0.0f; + Spectrum eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf); + + if (bsdf_pdf != 0.0f) { + bsdf_eval_accum(result_eval, sc->type, eval * sc->weight); + sum_pdf += bsdf_pdf * sc->sample_weight; + } + } + + sum_sample_weight += sc->sample_weight; + } + } + + return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; +} + +ccl_device_inline float surface_shader_bsdf_eval_pdfs(const KernelGlobals kg, + ccl_private ShaderData *sd, + const float3 omega_in, + ccl_private BsdfEval *result_eval, + ccl_private float *pdfs, + const uint light_shader_flags) +{ + /* This is the veach one-sample model with balance heuristic, some pdf + * factors drop out when using balance heuristic weighting. */ + float sum_pdf = 0.0f; + float sum_sample_weight = 0.0f; + bsdf_eval_init(result_eval, CLOSURE_NONE_ID, zero_spectrum()); + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + if (CLOSURE_IS_BSDF(sc->type) && !_surface_shader_exclude(sc->type, light_shader_flags)) { + float bsdf_pdf = 0.0f; + Spectrum eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf); + kernel_assert(bsdf_pdf >= 0.0f); + if (bsdf_pdf != 0.0f) { + bsdf_eval_accum(result_eval, sc->type, eval * sc->weight); + sum_pdf += bsdf_pdf * sc->sample_weight; + kernel_assert(bsdf_pdf * sc->sample_weight >= 0.0f); + pdfs[i] = bsdf_pdf * sc->sample_weight; + } + else { + pdfs[i] = 0.0f; + } + } + else { + pdfs[i] = 0.0f; + } + + sum_sample_weight += sc->sample_weight; + } + else { + pdfs[i] = 0.0f; + } + } + if (sum_pdf > 0.0f) { + for (int i = 0; i < sd->num_closure; i++) { + pdfs[i] /= sum_pdf; + } + } + + return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; +} + +#ifndef __KERNEL_CUDA__ +ccl_device +#else +ccl_device_inline +#endif + float + surface_shader_bsdf_eval(KernelGlobals kg, + IntegratorState state, + ccl_private ShaderData *sd, + const float3 omega_in, + ccl_private BsdfEval *bsdf_eval, + const uint light_shader_flags) +{ + bsdf_eval_init(bsdf_eval, CLOSURE_NONE_ID, zero_spectrum()); + + float pdf = _surface_shader_bsdf_eval_mis( + kg, sd, omega_in, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags); + +#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4 + if (state->guiding.use_surface_guiding) { + const float guiding_sampling_prob = state->guiding.surface_guiding_sampling_prob; + const float bssrdf_sampling_prob = state->guiding.bssrdf_sampling_prob; + const float guide_pdf = guiding_bsdf_pdf(kg, state, omega_in); + pdf = (guiding_sampling_prob * guide_pdf * (1.0f - bssrdf_sampling_prob)) + + (1.0f - guiding_sampling_prob) * pdf; + } +#endif + + return pdf; +} + +/* Randomly sample a BSSRDF or BSDF proportional to ShaderClosure.sample_weight. */ +ccl_device_inline ccl_private const ShaderClosure *surface_shader_bsdf_bssrdf_pick( + ccl_private const ShaderData *ccl_restrict sd, ccl_private float2 *rand_bsdf) +{ + int sampled = 0; + + if (sd->num_closure > 1) { + /* Pick a BSDF or based on sample weights. */ + float sum = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sum += sc->sample_weight; + } + } + + float r = (*rand_bsdf).x * sum; + float partial_sum = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + float next_sum = partial_sum + sc->sample_weight; + + if (r < next_sum) { + sampled = i; + + /* Rescale to reuse for direction sample, to better preserve stratification. */ + (*rand_bsdf).x = (r - partial_sum) / sc->sample_weight; + break; + } + + partial_sum = next_sum; + } + } + } + + return &sd->closure[sampled]; +} + +/* Return weight for picked BSSRDF. */ +ccl_device_inline Spectrum +surface_shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd, + ccl_private const ShaderClosure *ccl_restrict bssrdf_sc) +{ + Spectrum weight = bssrdf_sc->weight; + + if (sd->num_closure > 1) { + float sum = 0.0f; + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sum += sc->sample_weight; + } + } + weight *= sum / bssrdf_sc->sample_weight; + } + + return weight; +} + +#ifdef __PATH_GUIDING__ +/* Sample direction for picked BSDF, and return evaluation and pdf for all + * BSDFs combined using MIS. */ + +ccl_device int surface_shader_bsdf_guided_sample_closure(KernelGlobals kg, + IntegratorState state, + ccl_private ShaderData *sd, + ccl_private const ShaderClosure *sc, + const float2 rand_bsdf, + ccl_private BsdfEval *bsdf_eval, + ccl_private float3 *omega_in, + ccl_private float *bsdf_pdf, + ccl_private float *unguided_bsdf_pdf, + ccl_private float2 *sampled_rougness, + ccl_private float *eta) +{ + /* BSSRDF should already have been handled elsewhere. */ + kernel_assert(CLOSURE_IS_BSDF(sc->type)); + + const bool use_surface_guiding = state->guiding.use_surface_guiding; + const float guiding_sampling_prob = state->guiding.surface_guiding_sampling_prob; + const float bssrdf_sampling_prob = state->guiding.bssrdf_sampling_prob; + + /* Decide between sampling guiding distribution and BSDF. */ + bool sample_guiding = false; + float rand_bsdf_guiding = state->guiding.sample_surface_guiding_rand; + + if (use_surface_guiding && rand_bsdf_guiding < guiding_sampling_prob) { + sample_guiding = true; + rand_bsdf_guiding /= guiding_sampling_prob; + } + else { + rand_bsdf_guiding -= guiding_sampling_prob; + rand_bsdf_guiding /= (1.0f - guiding_sampling_prob); + } + + /* Initialize to zero. */ + int label = LABEL_NONE; + Spectrum eval = zero_spectrum(); + bsdf_eval_init(bsdf_eval, CLOSURE_NONE_ID, eval); + + *unguided_bsdf_pdf = 0.0f; + float guide_pdf = 0.0f; + + if (sample_guiding) { + /* Sample guiding distribution. */ + guide_pdf = guiding_bsdf_sample(kg, state, rand_bsdf, omega_in); + *bsdf_pdf = 0.0f; + + if (guide_pdf != 0.0f) { + float unguided_bsdf_pdfs[MAX_CLOSURE]; + + *unguided_bsdf_pdf = surface_shader_bsdf_eval_pdfs( + kg, sd, *omega_in, bsdf_eval, unguided_bsdf_pdfs, 0); + *bsdf_pdf = (guiding_sampling_prob * guide_pdf * (1.0f - bssrdf_sampling_prob)) + + ((1.0f - guiding_sampling_prob) * (*unguided_bsdf_pdf)); + float sum_pdfs = 0.0f; + + if (*unguided_bsdf_pdf > 0.0f) { + int idx = -1; + for (int i = 0; i < sd->num_closure; i++) { + sum_pdfs += unguided_bsdf_pdfs[i]; + if (rand_bsdf_guiding <= sum_pdfs) { + idx = i; + break; + } + } + + kernel_assert(idx >= 0); + /* Set the default idx to the last in the list. + * in case of numerical problems and rand_bsdf_guiding is just >=1.0f and + * the sum of all unguided_bsdf_pdfs is just < 1.0f. */ + idx = (rand_bsdf_guiding > sum_pdfs) ? sd->num_closure - 1 : idx; + + label = bsdf_label(kg, &sd->closure[idx], *omega_in); + } + } + + kernel_assert(reduce_min(bsdf_eval_sum(bsdf_eval)) >= 0.0f); + + *sampled_rougness = make_float2(1.0f, 1.0f); + *eta = 1.0f; + } + else { + /* Sample BSDF. */ + *bsdf_pdf = 0.0f; + label = bsdf_sample(kg, + sd, + sc, + rand_bsdf.x, + rand_bsdf.y, + &eval, + omega_in, + unguided_bsdf_pdf, + sampled_rougness, + eta); +# if 0 + if (*unguided_bsdf_pdf > 0.0f) { + surface_shader_validate_bsdf_sample(kg, sc, *omega_in, label, sampled_roughness, eta); + } +# endif + + if (*unguided_bsdf_pdf != 0.0f) { + bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight); + + kernel_assert(reduce_min(bsdf_eval_sum(bsdf_eval)) >= 0.0f); + + if (sd->num_closure > 1) { + float sweight = sc->sample_weight; + *unguided_bsdf_pdf = _surface_shader_bsdf_eval_mis( + kg, sd, *omega_in, sc, bsdf_eval, (*unguided_bsdf_pdf) * sweight, sweight, 0); + kernel_assert(reduce_min(bsdf_eval_sum(bsdf_eval)) >= 0.0f); + } + *bsdf_pdf = *unguided_bsdf_pdf; + + if (use_surface_guiding) { + guide_pdf = guiding_bsdf_pdf(kg, state, *omega_in); + *bsdf_pdf *= 1.0f - guiding_sampling_prob; + *bsdf_pdf += guiding_sampling_prob * guide_pdf * (1.0f - bssrdf_sampling_prob); + } + } + + kernel_assert(reduce_min(bsdf_eval_sum(bsdf_eval)) >= 0.0f); + } + + return label; +} +#endif + +/* Sample direction for picked BSDF, and return evaluation and pdf for all + * BSDFs combined using MIS. */ +ccl_device int surface_shader_bsdf_sample_closure(KernelGlobals kg, + ccl_private ShaderData *sd, + ccl_private const ShaderClosure *sc, + const float2 rand_bsdf, + ccl_private BsdfEval *bsdf_eval, + ccl_private float3 *omega_in, + ccl_private float *pdf, + ccl_private float2 *sampled_roughness, + ccl_private float *eta) +{ + /* BSSRDF should already have been handled elsewhere. */ + kernel_assert(CLOSURE_IS_BSDF(sc->type)); + + int label; + Spectrum eval = zero_spectrum(); + + *pdf = 0.0f; + label = bsdf_sample( + kg, sd, sc, rand_bsdf.x, rand_bsdf.y, &eval, omega_in, pdf, sampled_roughness, eta); + + if (*pdf != 0.0f) { + bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight); + + if (sd->num_closure > 1) { + float sweight = sc->sample_weight; + *pdf = _surface_shader_bsdf_eval_mis( + kg, sd, *omega_in, sc, bsdf_eval, *pdf * sweight, sweight, 0); + } + } + else { + bsdf_eval_init(bsdf_eval, sc->type, zero_spectrum()); + } + + return label; +} + +ccl_device float surface_shader_average_roughness(ccl_private const ShaderData *sd) +{ + float roughness = 0.0f; + float sum_weight = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF(sc->type)) { + /* sqrt once to undo the squaring from multiplying roughness on the + * two axes, and once for the squared roughness convention. */ + float weight = fabsf(average(sc->weight)); + roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc))); + sum_weight += weight; + } + } + + return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f; +} + +ccl_device Spectrum surface_shader_transparency(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + if (sd->flag & SD_HAS_ONLY_VOLUME) { + return one_spectrum(); + } + else if (sd->flag & SD_TRANSPARENT) { + return sd->closure_transparent_extinction; + } + else { + return zero_spectrum(); + } +} + +ccl_device void surface_shader_disable_transparency(KernelGlobals kg, ccl_private ShaderData *sd) +{ + if (sd->flag & SD_TRANSPARENT) { + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + + if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) { + sc->sample_weight = 0.0f; + sc->weight = zero_spectrum(); + } + } + + sd->flag &= ~SD_TRANSPARENT; + } +} + +ccl_device Spectrum surface_shader_alpha(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + Spectrum alpha = one_spectrum() - surface_shader_transparency(kg, sd); + + alpha = saturate(alpha); + + return alpha; +} + +ccl_device Spectrum surface_shader_diffuse(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + Spectrum eval = zero_spectrum(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type)) + eval += sc->weight; + } + + return eval; +} + +ccl_device Spectrum surface_shader_glossy(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + Spectrum eval = zero_spectrum(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_GLOSSY(sc->type)) + eval += sc->weight; + } + + return eval; +} + +ccl_device Spectrum surface_shader_transmission(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + Spectrum eval = zero_spectrum(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type)) + eval += sc->weight; + } + + return eval; +} + +ccl_device float3 surface_shader_average_normal(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + float3 N = zero_float3(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) + N += sc->N * fabsf(average(sc->weight)); + } + + return (is_zero(N)) ? sd->N : normalize(N); +} + +ccl_device Spectrum surface_shader_ao(KernelGlobals kg, + ccl_private const ShaderData *sd, + const float ao_factor, + ccl_private float3 *N_) +{ + Spectrum eval = zero_spectrum(); + float3 N = zero_float3(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { + ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc; + eval += sc->weight * ao_factor; + N += bsdf->N * fabsf(average(sc->weight)); + } + } + + *N_ = (is_zero(N)) ? sd->N : normalize(N); + return eval; +} + +#ifdef __SUBSURFACE__ +ccl_device float3 surface_shader_bssrdf_normal(ccl_private const ShaderData *sd) +{ + float3 N = zero_float3(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSSRDF(sc->type)) { + ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc; + float avg_weight = fabsf(average(sc->weight)); + + N += bssrdf->N * avg_weight; + } + } + + return (is_zero(N)) ? sd->N : normalize(N); +} +#endif /* __SUBSURFACE__ */ + +/* Constant emission optimization */ + +ccl_device bool surface_shader_constant_emission(KernelGlobals kg, + int shader, + ccl_private Spectrum *eval) +{ + int shader_index = shader & SHADER_MASK; + int shader_flag = kernel_data_fetch(shaders, shader_index).flags; + + if (shader_flag & SD_HAS_CONSTANT_EMISSION) { + const float3 emission_rgb = make_float3( + kernel_data_fetch(shaders, shader_index).constant_emission[0], + kernel_data_fetch(shaders, shader_index).constant_emission[1], + kernel_data_fetch(shaders, shader_index).constant_emission[2]); + *eval = rgb_to_spectrum(emission_rgb); + + return true; + } + + return false; +} + +/* Background */ + +ccl_device Spectrum surface_shader_background(ccl_private const ShaderData *sd) +{ + if (sd->flag & SD_EMISSION) { + return sd->closure_emission_background; + } + else { + return zero_spectrum(); + } +} + +/* Emission */ + +ccl_device Spectrum surface_shader_emission(ccl_private const ShaderData *sd) +{ + if (sd->flag & SD_EMISSION) { + return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background; + } + else { + return zero_spectrum(); + } +} + +/* Holdout */ + +ccl_device Spectrum surface_shader_apply_holdout(KernelGlobals kg, ccl_private ShaderData *sd) +{ + Spectrum weight = zero_spectrum(); + + /* For objects marked as holdout, preserve transparency and remove all other + * closures, replacing them with a holdout weight. */ + if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { + if ((sd->flag & SD_TRANSPARENT) && !(sd->flag & SD_HAS_ONLY_VOLUME)) { + weight = one_spectrum() - sd->closure_transparent_extinction; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + if (!CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { + sc->type = NBUILTIN_CLOSURES; + } + } + + sd->flag &= ~(SD_CLOSURE_FLAGS - (SD_TRANSPARENT | SD_BSDF)); + } + else { + weight = one_spectrum(); + } + } + else { + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_HOLDOUT(sc->type)) { + weight += sc->weight; + } + } + } + + return weight; +} + +/* Surface Evaluation */ + +template<uint node_feature_mask, typename ConstIntegratorGenericState> +ccl_device void surface_shader_eval(KernelGlobals kg, + ConstIntegratorGenericState state, + ccl_private ShaderData *ccl_restrict sd, + ccl_global float *ccl_restrict buffer, + uint32_t path_flag, + bool use_caustics_storage = false) +{ + /* If path is being terminated, we are tracing a shadow ray or evaluating + * emission, then we don't need to store closures. The emission and shadow + * shader data also do not have a closure array to save GPU memory. */ + int max_closures; + if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { + max_closures = 0; + } + else { + max_closures = use_caustics_storage ? CAUSTICS_MAX_CLOSURE : kernel_data.max_closures; + } + + sd->num_closure = 0; + sd->num_closure_left = max_closures; + +#ifdef __OSL__ + if (kg->osl) { + if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) { + OSLShader::eval_background(kg, state, sd, path_flag); + } + else { + OSLShader::eval_surface(kg, state, sd, path_flag); + } + } + else +#endif + { +#ifdef __SVM__ + svm_eval_nodes<node_feature_mask, SHADER_TYPE_SURFACE>(kg, state, sd, buffer, path_flag); +#else + if (sd->object == OBJECT_NONE) { + sd->closure_emission_background = make_spectrum(0.8f); + sd->flag |= SD_EMISSION; + } + else { + ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc( + sd, sizeof(DiffuseBsdf), make_spectrum(0.8f)); + if (bsdf != NULL) { + bsdf->N = sd->N; + sd->flag |= bsdf_diffuse_setup(bsdf); + } + } +#endif + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/volume_shader.h b/intern/cycles/kernel/integrator/volume_shader.h new file mode 100644 index 00000000000..0ff968723a1 --- /dev/null +++ b/intern/cycles/kernel/integrator/volume_shader.h @@ -0,0 +1,519 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +/* Volume shader evaluation and sampling. */ + +#pragma once + +#include "kernel/closure/alloc.h" +#include "kernel/closure/bsdf.h" +#include "kernel/closure/bsdf_util.h" +#include "kernel/closure/emissive.h" + +#ifdef __SVM__ +# include "kernel/svm/svm.h" +#endif +#ifdef __OSL__ +# include "kernel/osl/osl.h" +#endif + +CCL_NAMESPACE_BEGIN + +#ifdef __VOLUME__ + +/* Merging */ + +ccl_device_inline void volume_shader_merge_closures(ccl_private ShaderData *sd) +{ + /* Merge identical closures to save closure space with stacked volumes. */ + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sci = &sd->closure[i]; + + if (sci->type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) { + continue; + } + + for (int j = i + 1; j < sd->num_closure; j++) { + ccl_private ShaderClosure *scj = &sd->closure[j]; + if (sci->type != scj->type) { + continue; + } + + ccl_private const HenyeyGreensteinVolume *hgi = (ccl_private const HenyeyGreensteinVolume *) + sci; + ccl_private const HenyeyGreensteinVolume *hgj = (ccl_private const HenyeyGreensteinVolume *) + scj; + if (!(hgi->g == hgj->g)) { + continue; + } + + sci->weight += scj->weight; + sci->sample_weight += scj->sample_weight; + + int size = sd->num_closure - (j + 1); + if (size > 0) { + for (int k = 0; k < size; k++) { + scj[k] = scj[k + 1]; + } + } + + sd->num_closure--; + kernel_assert(sd->num_closure >= 0); + j--; + } + } +} + +ccl_device_inline void volume_shader_copy_phases(ccl_private ShaderVolumePhases *ccl_restrict + phases, + ccl_private const ShaderData *ccl_restrict sd) +{ + phases->num_closure = 0; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *from_sc = &sd->closure[i]; + ccl_private const HenyeyGreensteinVolume *from_hg = + (ccl_private const HenyeyGreensteinVolume *)from_sc; + + if (from_sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) { + ccl_private ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure]; + + to_sc->weight = from_sc->weight; + to_sc->sample_weight = from_sc->sample_weight; + to_sc->g = from_hg->g; + phases->num_closure++; + if (phases->num_closure >= MAX_VOLUME_CLOSURE) { + break; + } + } + } +} + +/* Guiding */ + +# ifdef __PATH_GUIDING__ +ccl_device_inline void volume_shader_prepare_guiding(KernelGlobals kg, + IntegratorState state, + ccl_private ShaderData *sd, + ccl_private const RNGState *rng_state, + const float3 P, + const float3 D, + ccl_private ShaderVolumePhases *phases, + const VolumeSampleMethod direct_sample_method) +{ + /* Have any phase functions to guide? */ + const int num_phases = phases->num_closure; + if (!kernel_data.integrator.use_volume_guiding || num_phases == 0) { + state->guiding.use_volume_guiding = false; + return; + } + + const float volume_guiding_probability = kernel_data.integrator.volume_guiding_probability; + float rand_phase_guiding = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_PHASE_GUIDING); + + /* If we have more than one phase function we select one random based on its + * sample weight to calculate the product distribution for guiding. */ + int phase_id = 0; + float phase_weight = 1.0f; + + if (num_phases > 1) { + /* Pick a phase closure based on sample weights. */ + float sum = 0.0f; + + for (phase_id = 0; phase_id < num_phases; phase_id++) { + ccl_private const ShaderVolumeClosure *svc = &phases->closure[phase_id]; + sum += svc->sample_weight; + } + + float r = rand_phase_guiding * sum; + float partial_sum = 0.0f; + + for (phase_id = 0; phase_id < num_phases; phase_id++) { + ccl_private const ShaderVolumeClosure *svc = &phases->closure[phase_id]; + float next_sum = partial_sum + svc->sample_weight; + + if (r <= next_sum) { + /* Rescale to reuse. */ + rand_phase_guiding = (r - partial_sum) / svc->sample_weight; + phase_weight = svc->sample_weight / sum; + break; + } + + partial_sum = next_sum; + } + + /* Adjust the sample weight of the component used for guiding. */ + phases->closure[phase_id].sample_weight *= volume_guiding_probability; + } + + /* Init guiding for selected phase function. */ + ccl_private const ShaderVolumeClosure *svc = &phases->closure[phase_id]; + if (!guiding_phase_init(kg, state, P, D, svc->g, rand_phase_guiding)) { + state->guiding.use_volume_guiding = false; + return; + } + + state->guiding.use_volume_guiding = true; + state->guiding.sample_volume_guiding_rand = rand_phase_guiding; + state->guiding.volume_guiding_sampling_prob = volume_guiding_probability * phase_weight; + + kernel_assert(state->guiding.volume_guiding_sampling_prob > 0.0f && + state->guiding.volume_guiding_sampling_prob <= 1.0f); +} +# endif + +/* Phase Evaluation & Sampling */ + +/* Randomly sample a volume phase function proportional to ShaderClosure.sample_weight. */ +ccl_device_inline ccl_private const ShaderVolumeClosure *volume_shader_phase_pick( + ccl_private const ShaderVolumePhases *phases, ccl_private float2 *rand_phase) +{ + int sampled = 0; + + if (phases->num_closure > 1) { + /* pick a phase closure based on sample weights */ + float sum = 0.0f; + + for (int i = 0; i < phases->num_closure; i++) { + ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; + sum += svc->sample_weight; + } + + float r = (*rand_phase).x * sum; + float partial_sum = 0.0f; + + for (int i = 0; i < phases->num_closure; i++) { + ccl_private const ShaderVolumeClosure *svc = &phases->closure[i]; + float next_sum = partial_sum + svc->sample_weight; + + if (r <= next_sum) { + /* Rescale to reuse for volume phase direction sample. */ + sampled = i; + (*rand_phase).x = (r - partial_sum) / svc->sample_weight; + break; + } + + partial_sum = next_sum; + } + } + + /* todo: this isn't quite correct, we don't weight anisotropy properly + * depending on color channels, even if this is perhaps not a common case */ + return &phases->closure[sampled]; +} + +ccl_device_inline float _volume_shader_phase_eval_mis(ccl_private const ShaderData *sd, + ccl_private const ShaderVolumePhases *phases, + const float3 omega_in, + int skip_phase, + ccl_private BsdfEval *result_eval, + float sum_pdf, + float sum_sample_weight) +{ + for (int i = 0; i < phases->num_closure; i++) { + if (i == skip_phase) + continue; + + ccl_private const ShaderVolumeClosure *svc = &phases->closure[i]; + float phase_pdf = 0.0f; + Spectrum eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf); + + if (phase_pdf != 0.0f) { + bsdf_eval_accum(result_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); + sum_pdf += phase_pdf * svc->sample_weight; + } + + sum_sample_weight += svc->sample_weight; + } + + return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; +} + +ccl_device float volume_shader_phase_eval(KernelGlobals kg, + ccl_private const ShaderData *sd, + ccl_private const ShaderVolumeClosure *svc, + const float3 omega_in, + ccl_private BsdfEval *phase_eval) +{ + float phase_pdf = 0.0f; + Spectrum eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf); + + if (phase_pdf != 0.0f) { + bsdf_eval_accum(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); + } + + return phase_pdf; +} + +ccl_device float volume_shader_phase_eval(KernelGlobals kg, + IntegratorState state, + ccl_private const ShaderData *sd, + ccl_private const ShaderVolumePhases *phases, + const float3 omega_in, + ccl_private BsdfEval *phase_eval) +{ + bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_spectrum()); + + float pdf = _volume_shader_phase_eval_mis(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f); + +# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4 + if (state->guiding.use_volume_guiding) { + const float guiding_sampling_prob = state->guiding.volume_guiding_sampling_prob; + const float guide_pdf = guiding_phase_pdf(kg, state, omega_in); + pdf = (guiding_sampling_prob * guide_pdf) + (1.0f - guiding_sampling_prob) * pdf; + } +# endif + + return pdf; +} + +# ifdef __PATH_GUIDING__ +ccl_device int volume_shader_phase_guided_sample(KernelGlobals kg, + IntegratorState state, + ccl_private const ShaderData *sd, + ccl_private const ShaderVolumeClosure *svc, + const float2 rand_phase, + ccl_private BsdfEval *phase_eval, + ccl_private float3 *omega_in, + ccl_private float *phase_pdf, + ccl_private float *unguided_phase_pdf, + ccl_private float *sampled_roughness) +{ + const bool use_volume_guiding = state->guiding.use_volume_guiding; + const float guiding_sampling_prob = state->guiding.volume_guiding_sampling_prob; + + /* Decide between sampling guiding distribution and phase. */ + float rand_phase_guiding = state->guiding.sample_volume_guiding_rand; + bool sample_guiding = false; + if (use_volume_guiding && rand_phase_guiding < guiding_sampling_prob) { + sample_guiding = true; + rand_phase_guiding /= guiding_sampling_prob; + } + else { + rand_phase_guiding -= guiding_sampling_prob; + rand_phase_guiding /= (1.0f - guiding_sampling_prob); + } + + /* Initialize to zero. */ + int label = LABEL_NONE; + Spectrum eval = zero_spectrum(); + + *unguided_phase_pdf = 0.0f; + float guide_pdf = 0.0f; + *sampled_roughness = 1.0f - fabsf(svc->g); + + bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_spectrum()); + + if (sample_guiding) { + /* Sample guiding distribution. */ + guide_pdf = guiding_phase_sample(kg, state, rand_phase, omega_in); + *phase_pdf = 0.0f; + + if (guide_pdf != 0.0f) { + *unguided_phase_pdf = volume_shader_phase_eval(kg, sd, svc, *omega_in, phase_eval); + *phase_pdf = (guiding_sampling_prob * guide_pdf) + + ((1.0f - guiding_sampling_prob) * (*unguided_phase_pdf)); + label = LABEL_VOLUME_SCATTER; + } + } + else { + /* Sample phase. */ + *phase_pdf = 0.0f; + label = volume_phase_sample( + sd, svc, rand_phase.x, rand_phase.y, &eval, omega_in, unguided_phase_pdf); + + if (*unguided_phase_pdf != 0.0f) { + bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); + + *phase_pdf = *unguided_phase_pdf; + if (use_volume_guiding) { + guide_pdf = guiding_phase_pdf(kg, state, *omega_in); + *phase_pdf *= 1.0f - guiding_sampling_prob; + *phase_pdf += guiding_sampling_prob * guide_pdf; + } + + kernel_assert(reduce_min(bsdf_eval_sum(phase_eval)) >= 0.0f); + } + else { + bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_spectrum()); + } + + kernel_assert(reduce_min(bsdf_eval_sum(phase_eval)) >= 0.0f); + } + + return label; +} +# endif + +ccl_device int volume_shader_phase_sample(KernelGlobals kg, + ccl_private const ShaderData *sd, + ccl_private const ShaderVolumePhases *phases, + ccl_private const ShaderVolumeClosure *svc, + float2 rand_phase, + ccl_private BsdfEval *phase_eval, + ccl_private float3 *omega_in, + ccl_private float *pdf, + ccl_private float *sampled_roughness) +{ + *sampled_roughness = 1.0f - fabsf(svc->g); + Spectrum eval = zero_spectrum(); + + *pdf = 0.0f; + int label = volume_phase_sample(sd, svc, rand_phase.x, rand_phase.y, &eval, omega_in, pdf); + + if (*pdf != 0.0f) { + bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); + } + + return label; +} + +/* Motion Blur */ + +# ifdef __OBJECT_MOTION__ +ccl_device_inline void volume_shader_motion_blur(KernelGlobals kg, + ccl_private ShaderData *ccl_restrict sd) +{ + if ((sd->object_flag & SD_OBJECT_HAS_VOLUME_MOTION) == 0) { + return; + } + + AttributeDescriptor v_desc = find_attribute(kg, sd, ATTR_STD_VOLUME_VELOCITY); + kernel_assert(v_desc.offset != ATTR_STD_NOT_FOUND); + + const float3 P = sd->P; + const float velocity_scale = kernel_data_fetch(objects, sd->object).velocity_scale; + const float time_offset = kernel_data.cam.motion_position == MOTION_POSITION_CENTER ? 0.5f : + 0.0f; + const float time = kernel_data.cam.motion_position == MOTION_POSITION_END ? + (1.0f - kernel_data.cam.shuttertime) + sd->time : + sd->time; + + /* Use a 1st order semi-lagrangian advection scheme to estimate what volume quantity + * existed, or will exist, at the given time: + * + * `phi(x, T) = phi(x - (T - t) * u(x, T), t)` + * + * where + * + * x : position + * T : super-sampled time (or ray time) + * t : current time of the simulation (in rendering we assume this is center frame with + * relative time = 0) + * phi : the volume quantity + * u : the velocity field + * + * But first we need to determine the velocity field `u(x, T)`, which we can estimate also + * using semi-lagrangian advection. + * + * `u(x, T) = u(x - (T - t) * u(x, T), t)` + * + * This is the typical way to model self-advection in fluid dynamics, however, we do not + * account for other forces affecting the velocity during simulation (pressure, buoyancy, + * etc.): this gives a linear interpolation when fluid are mostly "curvy". For better + * results, a higher order interpolation scheme can be used (at the cost of more lookups), + * or an interpolation of the velocity fields for the previous and next frames could also + * be used to estimate `u(x, T)` (which will cost more memory and lookups). + * + * References: + * "Eulerian Motion Blur", Kim and Ko, 2007 + * "Production Volume Rendering", Wreninge et al., 2012 + */ + + /* Find velocity. */ + float3 velocity = primitive_volume_attribute_float3(kg, sd, v_desc); + object_dir_transform(kg, sd, &velocity); + + /* Find advected P. */ + sd->P = P - (time - time_offset) * velocity_scale * velocity; + + /* Find advected velocity. */ + velocity = primitive_volume_attribute_float3(kg, sd, v_desc); + object_dir_transform(kg, sd, &velocity); + + /* Find advected P. */ + sd->P = P - (time - time_offset) * velocity_scale * velocity; +} +# endif + +/* Volume Evaluation */ + +template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState> +ccl_device_inline void volume_shader_eval(KernelGlobals kg, + ConstIntegratorGenericState state, + ccl_private ShaderData *ccl_restrict sd, + const uint32_t path_flag, + StackReadOp stack_read) +{ + /* If path is being terminated, we are tracing a shadow ray or evaluating + * emission, then we don't need to store closures. The emission and shadow + * shader data also do not have a closure array to save GPU memory. */ + int max_closures; + if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { + max_closures = 0; + } + else { + max_closures = kernel_data.max_closures; + } + + /* reset closures once at the start, we will be accumulating the closures + * for all volumes in the stack into a single array of closures */ + sd->num_closure = 0; + sd->num_closure_left = max_closures; + sd->flag = 0; + sd->object_flag = 0; + + for (int i = 0;; i++) { + const VolumeStack entry = stack_read(i); + if (entry.shader == SHADER_NONE) { + break; + } + + /* Setup shader-data from stack. it's mostly setup already in + * shader_setup_from_volume, this switching should be quick. */ + sd->object = entry.object; + sd->lamp = LAMP_NONE; + sd->shader = entry.shader; + + sd->flag &= ~SD_SHADER_FLAGS; + sd->flag |= kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).flags; + sd->object_flag &= ~SD_OBJECT_FLAGS; + + if (sd->object != OBJECT_NONE) { + sd->object_flag |= kernel_data_fetch(object_flag, sd->object); + +# ifdef __OBJECT_MOTION__ + /* todo: this is inefficient for motion blur, we should be + * caching matrices instead of recomputing them each step */ + shader_setup_object_transforms(kg, sd, sd->time); + + volume_shader_motion_blur(kg, sd); +# endif + } + + /* evaluate shader */ +# ifdef __OSL__ + if (kg->osl) { + OSLShader::eval_volume(kg, state, sd, path_flag); + } + else +# endif + { +# ifdef __SVM__ + svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_VOLUME, SHADER_TYPE_VOLUME>( + kg, state, sd, NULL, path_flag); +# endif + } + + /* Merge closures to avoid exceeding number of closures limit. */ + if (!shadow) { + if (i > 0) { + volume_shader_merge_closures(sd); + } + } + } +} + +#endif /* __VOLUME__ */ + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/volume_stack.h b/intern/cycles/kernel/integrator/volume_stack.h index 97a0f0f386c..675e1927fc0 100644 --- a/intern/cycles/kernel/integrator/volume_stack.h +++ b/intern/cycles/kernel/integrator/volume_stack.h @@ -39,7 +39,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg, break; } - if (entry.object == sd->object) { + if (entry.object == sd->object && entry.shader == sd->shader) { /* Shift back next stack entries. */ do { entry = stack_read(i + 1); @@ -61,7 +61,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg, } /* Already in the stack? then we have nothing to do. */ - if (entry.object == sd->object) { + if (entry.object == sd->object && entry.shader == sd->shader) { return; } } |