diff options
Diffstat (limited to 'intern/cycles/kernel/integrator')
18 files changed, 4671 insertions, 0 deletions
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_bake.h b/intern/cycles/kernel/integrator/integrator_init_from_bake.h new file mode 100644 index 00000000000..4898ff936c6 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_init_from_bake.h @@ -0,0 +1,181 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/kernel_accumulate.h" +#include "kernel/kernel_adaptive_sampling.h" +#include "kernel/kernel_camera.h" +#include "kernel/kernel_path_state.h" +#include "kernel/kernel_random.h" + +#include "kernel/geom/geom.h" + +CCL_NAMESPACE_BEGIN + +/* This helps with AA but it's not the real solution as it does not AA the geometry + * but it's better than nothing, thus committed. */ +ccl_device_inline float bake_clamp_mirror_repeat(float u, float max) +{ + /* use mirror repeat (like opengl texture) so that if the barycentric + * coordinate goes past the end of the triangle it is not always clamped + * to the same value, gives ugly patterns */ + u /= max; + float fu = floorf(u); + u = u - fu; + + return ((((int)fu) & 1) ? 1.0f - u : u) * max; +} + +/* Return false to indicate that this pixel is finished. + * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known + * that the pixel did converge. */ +ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS, + const ccl_global KernelWorkTile *ccl_restrict tile, + ccl_global float *render_buffer, + const int x, + const int y, + const int scheduled_sample) +{ + PROFILING_INIT(kg, PROFILING_RAY_SETUP); + + /* Initialize path state to give basic buffer access and allow early outputs. */ + path_state_init(INTEGRATOR_STATE_PASS, tile, x, y); + + /* Check whether the pixel has converged and should not be sampled anymore. */ + if (!kernel_need_sample_pixel(INTEGRATOR_STATE_PASS, render_buffer)) { + return false; + } + + /* Always count the sample, even if the camera sample will reject the ray. */ + const int sample = kernel_accum_sample(INTEGRATOR_STATE_PASS, render_buffer, scheduled_sample); + + /* Setup render buffers. */ + const int index = INTEGRATOR_STATE(path, render_pixel_index); + const int pass_stride = kernel_data.film.pass_stride; + render_buffer += index * pass_stride; + + ccl_global float *primitive = render_buffer + kernel_data.film.pass_bake_primitive; + ccl_global float *differential = render_buffer + kernel_data.film.pass_bake_differential; + + const int seed = __float_as_uint(primitive[0]); + int prim = __float_as_uint(primitive[1]); + if (prim == -1) { + return false; + } + + prim += kernel_data.bake.tri_offset; + + /* Random number generator. */ + const uint rng_hash = hash_uint(seed) ^ kernel_data.integrator.seed; + + float filter_x, filter_y; + if (sample == 0) { + filter_x = filter_y = 0.5f; + } + else { + path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_x, &filter_y); + } + + /* Initialize path state for path integration. */ + path_state_init_integrator(INTEGRATOR_STATE_PASS, sample, rng_hash); + + /* Barycentric UV with sub-pixel offset. */ + float u = primitive[2]; + float v = primitive[3]; + + float dudx = differential[0]; + float dudy = differential[1]; + float dvdx = differential[2]; + float dvdy = differential[3]; + + if (sample > 0) { + u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f); + v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f), + 1.0f - u); + } + + /* Position and normal on triangle. */ + float3 P, Ng; + int shader; + triangle_point_normal(kg, kernel_data.bake.object_index, prim, u, v, &P, &Ng, &shader); + shader &= SHADER_MASK; + + if (kernel_data.film.pass_background != PASS_UNUSED) { + /* Environment baking. */ + + /* Setup and write ray. */ + Ray ray ccl_optional_struct_init; + ray.P = zero_float3(); + ray.D = normalize(P); + ray.t = FLT_MAX; + ray.time = 0.5f; + ray.dP = differential_zero_compact(); + ray.dD = differential_zero_compact(); + integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray); + + /* Setup next kernel to execute. */ + INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + } + else { + /* Surface baking. */ + + /* Setup ray. */ + Ray ray ccl_optional_struct_init; + ray.P = P + Ng; + ray.D = -Ng; + ray.t = FLT_MAX; + ray.time = 0.5f; + + /* Setup differentials. */ + float3 dPdu, dPdv; + triangle_dPdudv(kg, prim, &dPdu, &dPdv); + differential3 dP; + dP.dx = dPdu * dudx + dPdv * dvdx; + dP.dy = dPdu * dudy + dPdv * dvdy; + ray.dP = differential_make_compact(dP); + ray.dD = differential_zero_compact(); + + /* Write ray. */ + integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray); + + /* Setup and write intersection. */ + Intersection isect ccl_optional_struct_init; + isect.object = kernel_data.bake.object_index; + isect.prim = prim; + isect.u = u; + isect.v = v; + isect.t = 1.0f; + isect.type = PRIMITIVE_TRIANGLE; +#ifdef __EMBREE__ + isect.Ng = Ng; +#endif + integrator_state_write_isect(INTEGRATOR_STATE_PASS, &isect); + + /* Setup next kernel to execute. */ + const int shader_flags = kernel_tex_fetch(__shaders, shader).flags; + if ((shader_flags & SD_HAS_RAYTRACE) || (kernel_data.film.pass_ao != PASS_UNUSED)) { + INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + } + else { + INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + } + } + + return true; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_init_from_camera.h b/intern/cycles/kernel/integrator/integrator_init_from_camera.h new file mode 100644 index 00000000000..58e7bde4c94 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_init_from_camera.h @@ -0,0 +1,120 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/kernel_accumulate.h" +#include "kernel/kernel_adaptive_sampling.h" +#include "kernel/kernel_camera.h" +#include "kernel/kernel_path_state.h" +#include "kernel/kernel_random.h" +#include "kernel/kernel_shadow_catcher.h" + +CCL_NAMESPACE_BEGIN + +ccl_device_inline void integrate_camera_sample(const KernelGlobals *ccl_restrict kg, + const int sample, + const int x, + const int y, + const uint rng_hash, + Ray *ray) +{ + /* Filter sampling. */ + float filter_u, filter_v; + + if (sample == 0) { + filter_u = 0.5f; + filter_v = 0.5f; + } + else { + path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_u, &filter_v); + } + + /* Depth of field sampling. */ + float lens_u = 0.0f, lens_v = 0.0f; + if (kernel_data.cam.aperturesize > 0.0f) { + path_rng_2D(kg, rng_hash, sample, PRNG_LENS_U, &lens_u, &lens_v); + } + + /* Motion blur time sampling. */ + float time = 0.0f; +#ifdef __CAMERA_MOTION__ + if (kernel_data.cam.shuttertime != -1.0f) + time = path_rng_1D(kg, rng_hash, sample, PRNG_TIME); +#endif + + /* Generate camera ray. */ + camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray); +} + +/* Return false to indicate that this pixel is finished. + * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known + * that the pixel did converge. */ +ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS, + const ccl_global KernelWorkTile *ccl_restrict tile, + ccl_global float *render_buffer, + const int x, + const int y, + const int scheduled_sample) +{ + PROFILING_INIT(kg, PROFILING_RAY_SETUP); + + /* Initialize path state to give basic buffer access and allow early outputs. */ + path_state_init(INTEGRATOR_STATE_PASS, tile, x, y); + + /* Check whether the pixel has converged and should not be sampled anymore. */ + if (!kernel_need_sample_pixel(INTEGRATOR_STATE_PASS, render_buffer)) { + return false; + } + + /* Count the sample and get an effective sample for this pixel. + * + * This logic allows to both count actual number of samples per pixel, and to add samples to this + * pixel after it was converged and samples were added somewhere else (in which case the + * `scheduled_sample` will be different from actual number of samples in this pixel). */ + const int sample = kernel_accum_sample(INTEGRATOR_STATE_PASS, render_buffer, scheduled_sample); + + /* Initialize random number seed for path. */ + const uint rng_hash = path_rng_hash_init(kg, sample, x, y); + + { + /* Generate camera ray. */ + Ray ray; + integrate_camera_sample(kg, sample, x, y, rng_hash, &ray); + if (ray.t == 0.0f) { + return true; + } + + /* Write camera ray to state. */ + integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray); + } + + /* Initialize path state for path integration. */ + path_state_init_integrator(INTEGRATOR_STATE_PASS, sample, rng_hash); + + /* Continue with intersect_closest kernel, optionally initializing volume + * stack before that if the camera may be inside a volume. */ + if (kernel_data.cam.is_inside_volume) { + INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); + } + else { + INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); + } + + return true; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_intersect_closest.h b/intern/cycles/kernel/integrator/integrator_intersect_closest.h new file mode 100644 index 00000000000..34ca6814534 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_intersect_closest.h @@ -0,0 +1,248 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/kernel_differential.h" +#include "kernel/kernel_light.h" +#include "kernel/kernel_path_state.h" +#include "kernel/kernel_projection.h" +#include "kernel/kernel_shadow_catcher.h" + +#include "kernel/geom/geom.h" + +#include "kernel/bvh/bvh.h" + +CCL_NAMESPACE_BEGIN + +template<uint32_t current_kernel> +ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS, + const int shader_flags) +{ + + /* Optional AO bounce termination. + * We continue evaluating emissive/transparent surfaces and volumes, similar + * to direct lighting. Only if we know there are none can we terminate the + * path immediately. */ + if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) { + if (shader_flags & (SD_HAS_TRANSPARENT_SHADOW | SD_HAS_EMISSION)) { + INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; + } + else if (!integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) { + INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_AFTER_VOLUME; + } + else { + return true; + } + } + + /* Load random number state. */ + RNGState rng_state; + path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state); + + /* We perform path termination in this kernel to avoid launching shade_surface + * and evaluating the shader when not needed. Only for emission and transparent + * surfaces in front of emission do we need to evaluate the shader, since we + * perform MIS as part of indirect rays. */ + const int path_flag = INTEGRATOR_STATE(path, flag); + const float probability = path_state_continuation_probability(INTEGRATOR_STATE_PASS, path_flag); + + if (probability != 1.0f) { + const float terminate = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE); + + if (probability == 0.0f || terminate >= probability) { + if (shader_flags & SD_HAS_EMISSION) { + /* Mark path to be terminated right after shader evaluation on the surface. */ + INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_ON_NEXT_SURFACE; + } + else if (!integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) { + /* TODO: only do this for emissive volumes. */ + INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_IN_NEXT_VOLUME; + } + else { + return true; + } + } + } + + return false; +} + +/* Note that current_kernel is a template value since making this a variable + * leads to poor performance with CUDA atomics. */ +template<uint32_t current_kernel> +ccl_device_forceinline void integrator_intersect_shader_next_kernel( + INTEGRATOR_STATE_ARGS, + const Intersection *ccl_restrict isect, + const int shader, + const int shader_flags) +{ + /* Note on scheduling. + * + * When there is no shadow catcher split the scheduling is simple: schedule surface shading with + * or without raytrace support, depending on the shader used. + * + * When there is a shadow catcher split the general idea is to have the following configuration: + * + * - Schedule surface shading kernel (with corresponding raytrace support) for the ray which + * will trace shadow catcher object. + * + * - When no alpha-over of approximate shadow catcher is needed, schedule surface shading for + * the matte ray. + * + * - Otherwise schedule background shading kernel, so that we have a background to alpha-over + * on. The background kernel will then schedule surface shading for the matte ray. + * + * Note that the splitting leaves kernel and sorting counters as-is, so use INIT semantic for + * the matte path. */ + + const bool use_raytrace_kernel = ((shader_flags & SD_HAS_RAYTRACE) || + (kernel_data.film.pass_ao != PASS_UNUSED)); + + if (use_raytrace_kernel) { + INTEGRATOR_PATH_NEXT_SORTED( + current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + } + else { + INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + } + +#ifdef __SHADOW_CATCHER__ + const int object_flags = intersection_get_object_flags(kg, isect); + if (kernel_shadow_catcher_split(INTEGRATOR_STATE_PASS, object_flags)) { + if (kernel_data.film.use_approximate_shadow_catcher && !kernel_data.background.transparent) { + INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND; + + if (use_raytrace_kernel) { + INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + } + else { + INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + } + } + else if (use_raytrace_kernel) { + INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + } + else { + INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + } + } +#endif +} + +ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS) +{ + PROFILING_INIT(kg, PROFILING_INTERSECT_CLOSEST); + + /* Read ray from integrator state into local memory. */ + Ray ray ccl_optional_struct_init; + integrator_state_read_ray(INTEGRATOR_STATE_PASS, &ray); + kernel_assert(ray.t != 0.0f); + + const uint visibility = path_state_ray_visibility(INTEGRATOR_STATE_PASS); + const int last_isect_prim = INTEGRATOR_STATE(isect, prim); + const int last_isect_object = INTEGRATOR_STATE(isect, object); + + /* Trick to use short AO rays to approximate indirect light at the end of the path. */ + if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) { + ray.t = kernel_data.integrator.ao_bounces_distance; + + const int last_object = last_isect_object != OBJECT_NONE ? + last_isect_object : + kernel_tex_fetch(__prim_object, last_isect_prim); + const float object_ao_distance = kernel_tex_fetch(__objects, last_object).ao_distance; + if (object_ao_distance != 0.0f) { + ray.t = object_ao_distance; + } + } + + /* Scene Intersection. */ + Intersection isect ccl_optional_struct_init; + bool hit = scene_intersect(kg, &ray, visibility, &isect); + + /* TODO: remove this and do it in the various intersection functions instead. */ + if (!hit) { + isect.prim = PRIM_NONE; + } + + /* Light intersection for MIS. */ + if (kernel_data.integrator.use_lamp_mis) { + /* NOTE: if we make lights visible to camera rays, we'll need to initialize + * these in the path_state_init. */ + const int last_type = INTEGRATOR_STATE(isect, type); + const int path_flag = INTEGRATOR_STATE(path, flag); + + hit = lights_intersect( + kg, &ray, &isect, last_isect_prim, last_isect_object, last_type, path_flag) || + hit; + } + + /* Write intersection result into global integrator state memory. */ + integrator_state_write_isect(INTEGRATOR_STATE_PASS, &isect); + +#ifdef __VOLUME__ + if (!integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) { + const bool hit_surface = hit && !(isect.type & PRIMITIVE_LAMP); + const int shader = (hit_surface) ? intersection_get_shader(kg, &isect) : SHADER_NONE; + const int flags = (hit_surface) ? kernel_tex_fetch(__shaders, shader).flags : 0; + + if (!integrator_intersect_terminate<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>( + INTEGRATOR_STATE_PASS, flags)) { + /* Continue with volume kernel if we are inside a volume, regardless + * if we hit anything. */ + INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST, + DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); + } + else { + INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); + } + return; + } +#endif + + if (hit) { + /* Hit a surface, continue with light or surface kernel. */ + if (isect.type & PRIMITIVE_LAMP) { + INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST, + DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + return; + } + else { + /* Hit a surface, continue with surface kernel unless terminated. */ + const int shader = intersection_get_shader(kg, &isect); + const int flags = kernel_tex_fetch(__shaders, shader).flags; + + if (!integrator_intersect_terminate<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>( + INTEGRATOR_STATE_PASS, flags)) { + integrator_intersect_shader_next_kernel<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>( + INTEGRATOR_STATE_PASS, &isect, shader, flags); + return; + } + else { + INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); + return; + } + } + } + else { + /* Nothing hit, continue with background kernel. */ + INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST, + DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + return; + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h new file mode 100644 index 00000000000..5bd9cfda4a4 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h @@ -0,0 +1,144 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +CCL_NAMESPACE_BEGIN + +/* Visibility for the shadow ray. */ +ccl_device_forceinline uint integrate_intersect_shadow_visibility(INTEGRATOR_STATE_CONST_ARGS) +{ + uint visibility = PATH_RAY_SHADOW; + +#ifdef __SHADOW_CATCHER__ + const uint32_t path_flag = INTEGRATOR_STATE(shadow_path, flag); + visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, visibility); +#endif + + return visibility; +} + +ccl_device bool integrate_intersect_shadow_opaque(INTEGRATOR_STATE_ARGS, + const Ray *ray, + const uint visibility) +{ + /* Mask which will pick only opaque visibility bits from the `visibility`. + * Calculate the mask at compile time: the visibility will either be a high bits for the shadow + * catcher objects, or lower bits for the regular objects (there is no need to check the path + * state here again). */ + constexpr const uint opaque_mask = SHADOW_CATCHER_VISIBILITY_SHIFT(PATH_RAY_SHADOW_OPAQUE) | + PATH_RAY_SHADOW_OPAQUE; + + Intersection isect; + const bool opaque_hit = scene_intersect(kg, ray, visibility & opaque_mask, &isect); + + if (!opaque_hit) { + INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = 0; + } + + return opaque_hit; +} + +ccl_device_forceinline int integrate_shadow_max_transparent_hits(INTEGRATOR_STATE_CONST_ARGS) +{ + const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce; + const int transparent_bounce = INTEGRATOR_STATE(shadow_path, transparent_bounce); + + return max(transparent_max_bounce - transparent_bounce - 1, 0); +} + +#ifdef __TRANSPARENT_SHADOWS__ +ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS, + const Ray *ray, + const uint visibility) +{ + Intersection isect[INTEGRATOR_SHADOW_ISECT_SIZE]; + + /* Limit the number hits to the max transparent bounces allowed and the size that we + * have available in the integrator state. */ + const uint max_transparent_hits = integrate_shadow_max_transparent_hits(INTEGRATOR_STATE_PASS); + const uint max_hits = min(max_transparent_hits, (uint)INTEGRATOR_SHADOW_ISECT_SIZE); + uint num_hits = 0; + bool opaque_hit = scene_intersect_shadow_all(kg, ray, isect, visibility, max_hits, &num_hits); + + /* If number of hits exceed the transparent bounces limit, make opaque. */ + if (num_hits > max_transparent_hits) { + opaque_hit = true; + } + + if (!opaque_hit) { + uint num_recorded_hits = min(num_hits, max_hits); + + if (num_recorded_hits > 0) { + sort_intersections(isect, num_recorded_hits); + + /* Write intersection result into global integrator state memory. */ + for (int hit = 0; hit < num_recorded_hits; hit++) { + integrator_state_write_shadow_isect(INTEGRATOR_STATE_PASS, &isect[hit], hit); + } + } + + INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = num_hits; + } + else { + INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = 0; + } + + return opaque_hit; +} +#endif + +ccl_device void integrator_intersect_shadow(INTEGRATOR_STATE_ARGS) +{ + PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW); + + /* Read ray from integrator state into local memory. */ + Ray ray ccl_optional_struct_init; + integrator_state_read_shadow_ray(INTEGRATOR_STATE_PASS, &ray); + + /* Compute visibility. */ + const uint visibility = integrate_intersect_shadow_visibility(INTEGRATOR_STATE_PASS); + +#ifdef __TRANSPARENT_SHADOWS__ + /* TODO: compile different kernels depending on this? Especially for OptiX + * conditional trace calls are bad. */ + const bool opaque_hit = + (kernel_data.integrator.transparent_shadows) ? + integrate_intersect_shadow_transparent(INTEGRATOR_STATE_PASS, &ray, visibility) : + integrate_intersect_shadow_opaque(INTEGRATOR_STATE_PASS, &ray, visibility); +#else + const bool opaque_hit = integrate_intersect_shadow_opaque( + INTEGRATOR_STATE_PASS, &ray, visibility); +#endif + + if (opaque_hit) { + /* Hit an opaque surface, shadow path ends here. */ + INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); + return; + } + else { + /* Hit nothing or transparent surfaces, continue to shadow kernel + * for shading and render buffer output. + * + * TODO: could also write to render buffer directly if no transparent shadows? + * Could save a kernel execution for the common case. */ + INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, + DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); + return; + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h b/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h new file mode 100644 index 00000000000..7c090952dc7 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h @@ -0,0 +1,36 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/integrator/integrator_subsurface.h" + +CCL_NAMESPACE_BEGIN + +ccl_device void integrator_intersect_subsurface(INTEGRATOR_STATE_ARGS) +{ + PROFILING_INIT(kg, PROFILING_INTERSECT_SUBSURFACE); + +#ifdef __SUBSURFACE__ + if (subsurface_scatter(INTEGRATOR_STATE_PASS)) { + return; + } +#endif + + INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h new file mode 100644 index 00000000000..60d8a8e3e54 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h @@ -0,0 +1,198 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/bvh/bvh.h" +#include "kernel/geom/geom.h" +#include "kernel/integrator/integrator_volume_stack.h" +#include "kernel/kernel_shader.h" + +CCL_NAMESPACE_BEGIN + +ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_ARGS, + const float3 from_P, + const float3 to_P) +{ + PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK); + + ShaderDataTinyStorage stack_sd_storage; + ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage); + + kernel_assert(kernel_data.integrator.use_volumes); + + Ray volume_ray ccl_optional_struct_init; + volume_ray.P = from_P; + volume_ray.D = normalize_len(to_P - from_P, &volume_ray.t); + +#ifdef __VOLUME_RECORD_ALL__ + Intersection hits[2 * VOLUME_STACK_SIZE + 1]; + uint num_hits = scene_intersect_volume_all( + kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, PATH_RAY_ALL_VISIBILITY); + if (num_hits > 0) { + Intersection *isect = hits; + + qsort(hits, num_hits, sizeof(Intersection), intersections_compare); + + for (uint hit = 0; hit < num_hits; ++hit, ++isect) { + shader_setup_from_ray(kg, stack_sd, &volume_ray, isect); + volume_stack_enter_exit(INTEGRATOR_STATE_PASS, stack_sd); + } + } +#else + Intersection isect; + int step = 0; + while (step < 2 * VOLUME_STACK_SIZE && + scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) { + shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect); + volume_stack_enter_exit(INTEGRATOR_STATE_PASS, stack_sd); + + /* Move ray forward. */ + volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng); + if (volume_ray.t != FLT_MAX) { + volume_ray.D = normalize_len(to_P - volume_ray.P, &volume_ray.t); + } + ++step; + } +#endif +} + +ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS) +{ + PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK); + + ShaderDataTinyStorage stack_sd_storage; + ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage); + + Ray volume_ray ccl_optional_struct_init; + integrator_state_read_ray(INTEGRATOR_STATE_PASS, &volume_ray); + volume_ray.t = FLT_MAX; + + const uint visibility = (INTEGRATOR_STATE(path, flag) & PATH_RAY_ALL_VISIBILITY); + int stack_index = 0, enclosed_index = 0; + + /* Write background shader. */ + if (kernel_data.background.volume_shader != SHADER_NONE) { + const VolumeStack new_entry = {OBJECT_NONE, kernel_data.background.volume_shader}; + integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry); + stack_index++; + } + +#ifdef __VOLUME_RECORD_ALL__ + Intersection hits[2 * VOLUME_STACK_SIZE + 1]; + uint num_hits = scene_intersect_volume_all( + kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, visibility); + if (num_hits > 0) { + int enclosed_volumes[VOLUME_STACK_SIZE]; + Intersection *isect = hits; + + qsort(hits, num_hits, sizeof(Intersection), intersections_compare); + + for (uint hit = 0; hit < num_hits; ++hit, ++isect) { + shader_setup_from_ray(kg, stack_sd, &volume_ray, isect); + if (stack_sd->flag & SD_BACKFACING) { + bool need_add = true; + for (int i = 0; i < enclosed_index && need_add; ++i) { + /* If ray exited the volume and never entered to that volume + * it means that camera is inside such a volume. + */ + if (enclosed_volumes[i] == stack_sd->object) { + need_add = false; + } + } + for (int i = 0; i < stack_index && need_add; ++i) { + /* Don't add intersections twice. */ + VolumeStack entry = integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i); + if (entry.object == stack_sd->object) { + need_add = false; + break; + } + } + if (need_add && stack_index < VOLUME_STACK_SIZE - 1) { + const VolumeStack new_entry = {stack_sd->object, stack_sd->shader}; + integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry); + ++stack_index; + } + } + else { + /* If ray from camera enters the volume, this volume shouldn't + * be added to the stack on exit. + */ + enclosed_volumes[enclosed_index++] = stack_sd->object; + } + } + } +#else + int enclosed_volumes[VOLUME_STACK_SIZE]; + int step = 0; + + while (stack_index < VOLUME_STACK_SIZE - 1 && enclosed_index < VOLUME_STACK_SIZE - 1 && + step < 2 * VOLUME_STACK_SIZE) { + Intersection isect; + if (!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) { + break; + } + + shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect); + if (stack_sd->flag & SD_BACKFACING) { + /* If ray exited the volume and never entered to that volume + * it means that camera is inside such a volume. + */ + bool need_add = true; + for (int i = 0; i < enclosed_index && need_add; ++i) { + /* If ray exited the volume and never entered to that volume + * it means that camera is inside such a volume. + */ + if (enclosed_volumes[i] == stack_sd->object) { + need_add = false; + } + } + for (int i = 0; i < stack_index && need_add; ++i) { + /* Don't add intersections twice. */ + VolumeStack entry = integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i); + if (entry.object == stack_sd->object) { + need_add = false; + break; + } + } + if (need_add) { + const VolumeStack new_entry = {stack_sd->object, stack_sd->shader}; + integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry); + ++stack_index; + } + } + else { + /* If ray from camera enters the volume, this volume shouldn't + * be added to the stack on exit. + */ + enclosed_volumes[enclosed_index++] = stack_sd->object; + } + + /* Move ray forward. */ + volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng); + ++step; + } +#endif + + /* Write terminator. */ + const VolumeStack new_entry = {OBJECT_NONE, SHADER_NONE}; + integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry); + + INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_megakernel.h b/intern/cycles/kernel/integrator/integrator_megakernel.h new file mode 100644 index 00000000000..91363ea1c7f --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_megakernel.h @@ -0,0 +1,93 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/integrator/integrator_init_from_camera.h" +#include "kernel/integrator/integrator_intersect_closest.h" +#include "kernel/integrator/integrator_intersect_shadow.h" +#include "kernel/integrator/integrator_intersect_subsurface.h" +#include "kernel/integrator/integrator_intersect_volume_stack.h" +#include "kernel/integrator/integrator_shade_background.h" +#include "kernel/integrator/integrator_shade_light.h" +#include "kernel/integrator/integrator_shade_shadow.h" +#include "kernel/integrator/integrator_shade_surface.h" +#include "kernel/integrator/integrator_shade_volume.h" + +CCL_NAMESPACE_BEGIN + +ccl_device void integrator_megakernel(INTEGRATOR_STATE_ARGS, + ccl_global float *ccl_restrict render_buffer) +{ + /* Each kernel indicates the next kernel to execute, so here we simply + * have to check what that kernel is and execute it. + * + * TODO: investigate if we can use device side enqueue for GPUs to avoid + * having to compile this big kernel. */ + while (true) { + if (INTEGRATOR_STATE(shadow_path, queued_kernel)) { + /* First handle any shadow paths before we potentially create more shadow paths. */ + switch (INTEGRATOR_STATE(shadow_path, queued_kernel)) { + case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW: + integrator_intersect_shadow(INTEGRATOR_STATE_PASS); + break; + case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW: + integrator_shade_shadow(INTEGRATOR_STATE_PASS, render_buffer); + break; + default: + kernel_assert(0); + break; + } + } + else if (INTEGRATOR_STATE(path, queued_kernel)) { + /* Then handle regular path kernels. */ + switch (INTEGRATOR_STATE(path, queued_kernel)) { + case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: + integrator_intersect_closest(INTEGRATOR_STATE_PASS); + break; + case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND: + integrator_shade_background(INTEGRATOR_STATE_PASS, render_buffer); + break; + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE: + integrator_shade_surface(INTEGRATOR_STATE_PASS, render_buffer); + break; + case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME: + integrator_shade_volume(INTEGRATOR_STATE_PASS, render_buffer); + break; + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: + integrator_shade_surface_raytrace(INTEGRATOR_STATE_PASS, render_buffer); + break; + case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT: + integrator_shade_light(INTEGRATOR_STATE_PASS, render_buffer); + break; + case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE: + integrator_intersect_subsurface(INTEGRATOR_STATE_PASS); + break; + case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: + integrator_intersect_volume_stack(INTEGRATOR_STATE_PASS); + break; + default: + kernel_assert(0); + break; + } + } + else { + break; + } + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_shade_background.h b/intern/cycles/kernel/integrator/integrator_shade_background.h new file mode 100644 index 00000000000..3e4cc837e9b --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_shade_background.h @@ -0,0 +1,215 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/kernel_accumulate.h" +#include "kernel/kernel_emission.h" +#include "kernel/kernel_light.h" +#include "kernel/kernel_shader.h" + +CCL_NAMESPACE_BEGIN + +ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS, + ccl_global float *ccl_restrict render_buffer) +{ +#ifdef __BACKGROUND__ + const int shader = kernel_data.background.surface_shader; + const uint32_t path_flag = INTEGRATOR_STATE(path, flag); + + /* Use visibility flag to skip lights. */ + if (shader & SHADER_EXCLUDE_ANY) { + if (((shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) || + ((shader & SHADER_EXCLUDE_GLOSSY) && ((path_flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) == + (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) || + ((shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) || + ((shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA)) || + ((shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER))) + return zero_float3(); + } + + /* Use fast constant background color if available. */ + float3 L = zero_float3(); + if (!shader_constant_emission_eval(kg, shader, &L)) { + /* Evaluate background shader. */ + + /* TODO: does aliasing like this break automatic SoA in CUDA? + * Should we instead store closures separate from ShaderData? */ + ShaderDataTinyStorage emission_sd_storage; + ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); + + PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP); + shader_setup_from_background(kg, + emission_sd, + INTEGRATOR_STATE(ray, P), + INTEGRATOR_STATE(ray, D), + INTEGRATOR_STATE(ray, time)); + + PROFILING_SHADER(emission_sd->object, emission_sd->shader); + PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL); + shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>( + INTEGRATOR_STATE_PASS, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION); + + L = shader_background_eval(emission_sd); + } + + /* Background MIS weights. */ +# ifdef __BACKGROUND_MIS__ + /* Check if background light exists or if we should skip pdf. */ + if (!(INTEGRATOR_STATE(path, flag) & PATH_RAY_MIS_SKIP) && kernel_data.background.use_mis) { + const float3 ray_P = INTEGRATOR_STATE(ray, P); + const float3 ray_D = INTEGRATOR_STATE(ray, D); + const float mis_ray_pdf = INTEGRATOR_STATE(path, mis_ray_pdf); + const float mis_ray_t = INTEGRATOR_STATE(path, mis_ray_t); + + /* multiple importance sampling, get background light pdf for ray + * direction, and compute weight with respect to BSDF pdf */ + const float pdf = background_light_pdf(kg, ray_P - ray_D * mis_ray_t, ray_D); + const float mis_weight = power_heuristic(mis_ray_pdf, pdf); + + L *= mis_weight; + } +# endif + + return L; +#else + return make_float3(0.8f, 0.8f, 0.8f); +#endif +} + +ccl_device_inline void integrate_background(INTEGRATOR_STATE_ARGS, + ccl_global float *ccl_restrict render_buffer) +{ + /* Accumulate transparency for transparent background. We can skip background + * shader evaluation unless a background pass is used. */ + bool eval_background = true; + float transparent = 0.0f; + + const bool is_transparent_background_ray = kernel_data.background.transparent && + (INTEGRATOR_STATE(path, flag) & + PATH_RAY_TRANSPARENT_BACKGROUND); + + if (is_transparent_background_ray) { + transparent = average(INTEGRATOR_STATE(path, throughput)); + +#ifdef __PASSES__ + eval_background = (kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND)); +#else + eval_background = false; +#endif + } + + /* Evaluate background shader. */ + float3 L = (eval_background) ? + integrator_eval_background_shader(INTEGRATOR_STATE_PASS, render_buffer) : + zero_float3(); + + /* When using the ao bounces approximation, adjust background + * shader intensity with ao factor. */ + if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) { + L *= kernel_data.integrator.ao_bounces_factor; + } + + /* Write to render buffer. */ + kernel_accum_background( + INTEGRATOR_STATE_PASS, L, transparent, is_transparent_background_ray, render_buffer); +} + +ccl_device_inline void integrate_distant_lights(INTEGRATOR_STATE_ARGS, + ccl_global float *ccl_restrict render_buffer) +{ + const float3 ray_D = INTEGRATOR_STATE(ray, D); + const float ray_time = INTEGRATOR_STATE(ray, time); + LightSample ls ccl_optional_struct_init; + for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) { + if (light_sample_from_distant_ray(kg, ray_D, lamp, &ls)) { + /* Use visibility flag to skip lights. */ +#ifdef __PASSES__ + const uint32_t path_flag = INTEGRATOR_STATE(path, flag); + + if (ls.shader & SHADER_EXCLUDE_ANY) { + if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) || + ((ls.shader & SHADER_EXCLUDE_GLOSSY) && + ((path_flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) == + (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) || + ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) || + ((ls.shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA)) || + ((ls.shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER))) + return; + } +#endif + + /* Evaluate light shader. */ + /* TODO: does aliasing like this break automatic SoA in CUDA? */ + ShaderDataTinyStorage emission_sd_storage; + ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); + float3 light_eval = light_sample_shader_eval( + INTEGRATOR_STATE_PASS, emission_sd, &ls, ray_time); + if (is_zero(light_eval)) { + return; + } + + /* MIS weighting. */ + if (!(path_flag & PATH_RAY_MIS_SKIP)) { + /* multiple importance sampling, get regular light pdf, + * and compute weight with respect to BSDF pdf */ + const float mis_ray_pdf = INTEGRATOR_STATE(path, mis_ray_pdf); + const float mis_weight = power_heuristic(mis_ray_pdf, ls.pdf); + light_eval *= mis_weight; + } + + /* Write to render buffer. */ + const float3 throughput = INTEGRATOR_STATE(path, throughput); + kernel_accum_emission(INTEGRATOR_STATE_PASS, throughput, light_eval, render_buffer); + } + } +} + +ccl_device void integrator_shade_background(INTEGRATOR_STATE_ARGS, + ccl_global float *ccl_restrict render_buffer) +{ + PROFILING_INIT(kg, PROFILING_SHADE_LIGHT_SETUP); + + /* TODO: unify these in a single loop to only have a single shader evaluation call. */ + integrate_distant_lights(INTEGRATOR_STATE_PASS, render_buffer); + integrate_background(INTEGRATOR_STATE_PASS, render_buffer); + +#ifdef __SHADOW_CATCHER__ + if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) { + INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_SHADOW_CATCHER_BACKGROUND; + + const int isect_prim = INTEGRATOR_STATE(isect, prim); + const int shader = intersection_get_shader_from_isect_prim(kg, isect_prim); + const int shader_flags = kernel_tex_fetch(__shaders, shader).flags; + + if ((shader_flags & SD_HAS_RAYTRACE) || (kernel_data.film.pass_ao != PASS_UNUSED)) { + INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND, + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, + shader); + } + else { + INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND, + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, + shader); + } + return; + } +#endif + + INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_shade_light.h b/intern/cycles/kernel/integrator/integrator_shade_light.h new file mode 100644 index 00000000000..05b530f9665 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_shade_light.h @@ -0,0 +1,126 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/kernel_accumulate.h" +#include "kernel/kernel_emission.h" +#include "kernel/kernel_light.h" +#include "kernel/kernel_shader.h" + +CCL_NAMESPACE_BEGIN + +ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS, + ccl_global float *ccl_restrict render_buffer) +{ + /* Setup light sample. */ + Intersection isect ccl_optional_struct_init; + integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect); + + float3 ray_P = INTEGRATOR_STATE(ray, P); + const float3 ray_D = INTEGRATOR_STATE(ray, D); + const float ray_time = INTEGRATOR_STATE(ray, time); + + /* Advance ray beyond light. */ + /* TODO: can we make this more numerically robust to avoid reintersecting the + * same light in some cases? */ + const float3 new_ray_P = ray_offset(ray_P + ray_D * isect.t, ray_D); + INTEGRATOR_STATE_WRITE(ray, P) = new_ray_P; + INTEGRATOR_STATE_WRITE(ray, t) -= isect.t; + + /* Set position to where the BSDF was sampled, for correct MIS PDF. */ + const float mis_ray_t = INTEGRATOR_STATE(path, mis_ray_t); + ray_P -= ray_D * mis_ray_t; + isect.t += mis_ray_t; + INTEGRATOR_STATE_WRITE(path, mis_ray_t) = mis_ray_t + isect.t; + + LightSample ls ccl_optional_struct_init; + const bool use_light_sample = light_sample_from_intersection(kg, &isect, ray_P, ray_D, &ls); + + if (!use_light_sample) { + return; + } + + /* Use visibility flag to skip lights. */ +#ifdef __PASSES__ + const uint32_t path_flag = INTEGRATOR_STATE(path, flag); + + if (ls.shader & SHADER_EXCLUDE_ANY) { + if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) || + ((ls.shader & SHADER_EXCLUDE_GLOSSY) && + ((path_flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) == + (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) || + ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) || + ((ls.shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER))) + return; + } +#endif + + /* Evaluate light shader. */ + /* TODO: does aliasing like this break automatic SoA in CUDA? */ + ShaderDataTinyStorage emission_sd_storage; + ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); + float3 light_eval = light_sample_shader_eval(INTEGRATOR_STATE_PASS, emission_sd, &ls, ray_time); + if (is_zero(light_eval)) { + return; + } + + /* MIS weighting. */ + if (!(path_flag & PATH_RAY_MIS_SKIP)) { + /* multiple importance sampling, get regular light pdf, + * and compute weight with respect to BSDF pdf */ + const float mis_ray_pdf = INTEGRATOR_STATE(path, mis_ray_pdf); + const float mis_weight = power_heuristic(mis_ray_pdf, ls.pdf); + light_eval *= mis_weight; + } + + /* Write to render buffer. */ + const float3 throughput = INTEGRATOR_STATE(path, throughput); + kernel_accum_emission(INTEGRATOR_STATE_PASS, throughput, light_eval, render_buffer); +} + +ccl_device void integrator_shade_light(INTEGRATOR_STATE_ARGS, + ccl_global float *ccl_restrict render_buffer) +{ + PROFILING_INIT(kg, PROFILING_SHADE_LIGHT_SETUP); + + integrate_light(INTEGRATOR_STATE_PASS, render_buffer); + + /* TODO: we could get stuck in an infinite loop if there are precision issues + * and the same light is hit again. + * + * As a workaround count this as a transparent bounce. It makes some sense + * to interpret lights as transparent surfaces (and support making them opaque), + * but this needs to be revisited. */ + uint32_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce) + 1; + INTEGRATOR_STATE_WRITE(path, transparent_bounce) = transparent_bounce; + + if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) { + INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + return; + } + else { + INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); + return; + } + + /* TODO: in some cases we could continue directly to SHADE_BACKGROUND, but + * probably that optimization is probably not practical if we add lights to + * scene geometry. */ +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_shade_shadow.h b/intern/cycles/kernel/integrator/integrator_shade_shadow.h new file mode 100644 index 00000000000..fd3c3ae1653 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_shade_shadow.h @@ -0,0 +1,182 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/integrator/integrator_shade_volume.h" +#include "kernel/integrator/integrator_volume_stack.h" + +#include "kernel/kernel_shader.h" + +CCL_NAMESPACE_BEGIN + +ccl_device_inline bool shadow_intersections_has_remaining(const int num_hits) +{ + return num_hits >= INTEGRATOR_SHADOW_ISECT_SIZE; +} + +#ifdef __TRANSPARENT_SHADOWS__ +ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_ARGS, const int hit) +{ + PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SURFACE); + + /* TODO: does aliasing like this break automatic SoA in CUDA? + * Should we instead store closures separate from ShaderData? + * + * TODO: is it better to declare this outside the loop or keep it local + * so the compiler can see there is no dependency between iterations? */ + ShaderDataTinyStorage shadow_sd_storage; + ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage); + + /* Setup shader data at surface. */ + Intersection isect ccl_optional_struct_init; + integrator_state_read_shadow_isect(INTEGRATOR_STATE_PASS, &isect, hit); + + Ray ray ccl_optional_struct_init; + integrator_state_read_shadow_ray(INTEGRATOR_STATE_PASS, &ray); + + shader_setup_from_ray(kg, shadow_sd, &ray, &isect); + + /* Evaluate shader. */ + if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) { + shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( + INTEGRATOR_STATE_PASS, shadow_sd, NULL, PATH_RAY_SHADOW); + } + +# ifdef __VOLUME__ + /* Exit/enter volume. */ + shadow_volume_stack_enter_exit(INTEGRATOR_STATE_PASS, shadow_sd); +# endif + + /* Compute transparency from closures. */ + return shader_bsdf_transparency(kg, shadow_sd); +} + +# ifdef __VOLUME__ +ccl_device_inline void integrate_transparent_volume_shadow(INTEGRATOR_STATE_ARGS, + const int hit, + const int num_recorded_hits, + float3 *ccl_restrict throughput) +{ + PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_VOLUME); + + /* TODO: deduplicate with surface, or does it not matter for memory usage? */ + ShaderDataTinyStorage shadow_sd_storage; + ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage); + + /* Setup shader data. */ + Ray ray ccl_optional_struct_init; + integrator_state_read_shadow_ray(INTEGRATOR_STATE_PASS, &ray); + + /* Modify ray position and length to match current segment. */ + const float start_t = (hit == 0) ? 0.0f : INTEGRATOR_STATE_ARRAY(shadow_isect, hit - 1, t); + const float end_t = (hit < num_recorded_hits) ? INTEGRATOR_STATE_ARRAY(shadow_isect, hit, t) : + ray.t; + ray.P += start_t * ray.D; + ray.t = end_t - start_t; + + shader_setup_from_volume(kg, shadow_sd, &ray); + + const float step_size = volume_stack_step_size(INTEGRATOR_STATE_PASS, [=](const int i) { + return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i); + }); + + volume_shadow_heterogeneous(INTEGRATOR_STATE_PASS, &ray, shadow_sd, throughput, step_size); +} +# endif + +ccl_device_inline bool integrate_transparent_shadow(INTEGRATOR_STATE_ARGS, const int num_hits) +{ + /* Accumulate shadow for transparent surfaces. */ + const int num_recorded_hits = min(num_hits, INTEGRATOR_SHADOW_ISECT_SIZE); + + for (int hit = 0; hit < num_recorded_hits + 1; hit++) { + /* Volume shaders. */ + if (hit < num_recorded_hits || !shadow_intersections_has_remaining(num_hits)) { +# ifdef __VOLUME__ + if (!integrator_state_shadow_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) { + float3 throughput = INTEGRATOR_STATE(shadow_path, throughput); + integrate_transparent_volume_shadow( + INTEGRATOR_STATE_PASS, hit, num_recorded_hits, &throughput); + if (is_zero(throughput)) { + return true; + } + + INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput; + } +# endif + } + + /* Surface shaders. */ + if (hit < num_recorded_hits) { + const float3 shadow = integrate_transparent_surface_shadow(INTEGRATOR_STATE_PASS, hit); + const float3 throughput = INTEGRATOR_STATE(shadow_path, throughput) * shadow; + if (is_zero(throughput)) { + return true; + } + + INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput; + INTEGRATOR_STATE_WRITE(shadow_path, transparent_bounce) += 1; + } + + /* Note we do not need to check max_transparent_bounce here, the number + * of intersections is already limited and made opaque in the + * INTERSECT_SHADOW kernel. */ + } + + if (shadow_intersections_has_remaining(num_hits)) { + /* There are more hits that we could not recorded due to memory usage, + * adjust ray to intersect again from the last hit. */ + const float last_hit_t = INTEGRATOR_STATE_ARRAY(shadow_isect, num_recorded_hits - 1, t); + const float3 ray_P = INTEGRATOR_STATE(shadow_ray, P); + const float3 ray_D = INTEGRATOR_STATE(shadow_ray, D); + INTEGRATOR_STATE_WRITE(shadow_ray, P) = ray_offset(ray_P + last_hit_t * ray_D, ray_D); + INTEGRATOR_STATE_WRITE(shadow_ray, t) -= last_hit_t; + } + + return false; +} +#endif /* __TRANSPARENT_SHADOWS__ */ + +ccl_device void integrator_shade_shadow(INTEGRATOR_STATE_ARGS, + ccl_global float *ccl_restrict render_buffer) +{ + PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SETUP); + const int num_hits = INTEGRATOR_STATE(shadow_path, num_hits); + +#ifdef __TRANSPARENT_SHADOWS__ + /* Evaluate transparent shadows. */ + const bool opaque = integrate_transparent_shadow(INTEGRATOR_STATE_PASS, num_hits); + if (opaque) { + INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); + return; + } +#endif + + if (shadow_intersections_has_remaining(num_hits)) { + /* More intersections to find, continue shadow ray. */ + INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); + return; + } + else { + kernel_accum_light(INTEGRATOR_STATE_PASS, render_buffer); + INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); + return; + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_shade_surface.h b/intern/cycles/kernel/integrator/integrator_shade_surface.h new file mode 100644 index 00000000000..73b7cad32be --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_shade_surface.h @@ -0,0 +1,502 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/kernel_accumulate.h" +#include "kernel/kernel_emission.h" +#include "kernel/kernel_light.h" +#include "kernel/kernel_passes.h" +#include "kernel/kernel_path_state.h" +#include "kernel/kernel_shader.h" + +#include "kernel/integrator/integrator_subsurface.h" +#include "kernel/integrator/integrator_volume_stack.h" + +CCL_NAMESPACE_BEGIN + +ccl_device_forceinline void integrate_surface_shader_setup(INTEGRATOR_STATE_CONST_ARGS, + ShaderData *sd) +{ + Intersection isect ccl_optional_struct_init; + integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect); + + Ray ray ccl_optional_struct_init; + integrator_state_read_ray(INTEGRATOR_STATE_PASS, &ray); + + shader_setup_from_ray(kg, sd, &ray, &isect); +} + +#ifdef __HOLDOUT__ +ccl_device_forceinline bool integrate_surface_holdout(INTEGRATOR_STATE_CONST_ARGS, + ShaderData *sd, + ccl_global float *ccl_restrict render_buffer) +{ + /* Write holdout transparency to render buffer and stop if fully holdout. */ + const uint32_t path_flag = INTEGRATOR_STATE(path, flag); + + if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) && + (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) { + const float3 holdout_weight = shader_holdout_apply(kg, sd); + if (kernel_data.background.transparent) { + const float3 throughput = INTEGRATOR_STATE(path, throughput); + const float transparent = average(holdout_weight * throughput); + kernel_accum_transparent(INTEGRATOR_STATE_PASS, transparent, render_buffer); + } + if (isequal_float3(holdout_weight, one_float3())) { + return false; + } + } + + return true; +} +#endif /* __HOLDOUT__ */ + +#ifdef __EMISSION__ +ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_ARGS, + const ShaderData *sd, + ccl_global float *ccl_restrict + render_buffer) +{ + const uint32_t path_flag = INTEGRATOR_STATE(path, flag); + + /* Evaluate emissive closure. */ + float3 L = shader_emissive_eval(sd); + +# ifdef __HAIR__ + if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && + (sd->type & PRIMITIVE_ALL_TRIANGLE)) +# else + if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS)) +# endif + { + const float bsdf_pdf = INTEGRATOR_STATE(path, mis_ray_pdf); + const float t = sd->ray_length + INTEGRATOR_STATE(path, mis_ray_t); + + /* Multiple importance sampling, get triangle light pdf, + * and compute weight with respect to BSDF pdf. */ + float pdf = triangle_light_pdf(kg, sd, t); + float mis_weight = power_heuristic(bsdf_pdf, pdf); + + L *= mis_weight; + } + + const float3 throughput = INTEGRATOR_STATE(path, throughput); + kernel_accum_emission(INTEGRATOR_STATE_PASS, throughput, L, render_buffer); +} +#endif /* __EMISSION__ */ + +#ifdef __EMISSION__ +/* Path tracing: sample point on light and evaluate light shader, then + * queue shadow ray to be traced. */ +ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS, + ShaderData *sd, + const RNGState *rng_state) +{ + /* Test if there is a light or BSDF that needs direct light. */ + if (!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL))) { + return; + } + + /* Sample position on a light. */ + LightSample ls ccl_optional_struct_init; + { + const int path_flag = INTEGRATOR_STATE(path, flag); + const uint bounce = INTEGRATOR_STATE(path, bounce); + float light_u, light_v; + path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v); + + if (!light_distribution_sample_from_position( + kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, &ls)) { + return; + } + } + + kernel_assert(ls.pdf != 0.0f); + + /* Evaluate light shader. + * + * TODO: can we reuse sd memory? In theory we can move this after + * integrate_surface_bounce, evaluate the BSDF, and only then evaluate + * the light shader. This could also move to its own kernel, for + * non-constant light sources. */ + ShaderDataTinyStorage emission_sd_storage; + ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); + const float3 light_eval = light_sample_shader_eval( + INTEGRATOR_STATE_PASS, emission_sd, &ls, sd->time); + if (is_zero(light_eval)) { + return; + } + + /* Evaluate BSDF. */ + const bool is_transmission = shader_bsdf_is_transmission(sd, ls.D); + + BsdfEval bsdf_eval ccl_optional_struct_init; + const float bsdf_pdf = shader_bsdf_eval(kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader); + bsdf_eval_mul3(&bsdf_eval, light_eval / ls.pdf); + + if (ls.shader & SHADER_USE_MIS) { + const float mis_weight = power_heuristic(ls.pdf, bsdf_pdf); + bsdf_eval_mul(&bsdf_eval, mis_weight); + } + + /* Path termination. */ + const float terminate = path_state_rng_light_termination(kg, rng_state); + if (light_sample_terminate(kg, &ls, &bsdf_eval, terminate)) { + return; + } + + /* Create shadow ray. */ + Ray ray ccl_optional_struct_init; + light_sample_to_surface_shadow_ray(kg, sd, &ls, &ray); + const bool is_light = light_sample_is_light(&ls); + + /* Copy volume stack and enter/exit volume. */ + integrator_state_copy_volume_stack_to_shadow(INTEGRATOR_STATE_PASS); + + if (is_transmission) { +# ifdef __VOLUME__ + shadow_volume_stack_enter_exit(INTEGRATOR_STATE_PASS, sd); +# endif + } + + /* Write shadow ray and associated state to global memory. */ + integrator_state_write_shadow_ray(INTEGRATOR_STATE_PASS, &ray); + + /* Copy state from main path to shadow path. */ + const uint16_t bounce = INTEGRATOR_STATE(path, bounce); + const uint16_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce); + uint32_t shadow_flag = INTEGRATOR_STATE(path, flag); + shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0; + shadow_flag |= (is_transmission) ? PATH_RAY_TRANSMISSION_PASS : PATH_RAY_REFLECT_PASS; + const float3 throughput = INTEGRATOR_STATE(path, throughput) * bsdf_eval_sum(&bsdf_eval); + + if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { + const float3 diffuse_glossy_ratio = (bounce == 0) ? + bsdf_eval_diffuse_glossy_ratio(&bsdf_eval) : + INTEGRATOR_STATE(path, diffuse_glossy_ratio); + INTEGRATOR_STATE_WRITE(shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio; + } + + INTEGRATOR_STATE_WRITE(shadow_path, flag) = shadow_flag; + INTEGRATOR_STATE_WRITE(shadow_path, bounce) = bounce; + INTEGRATOR_STATE_WRITE(shadow_path, transparent_bounce) = transparent_bounce; + INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput; + + if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) { + INTEGRATOR_STATE_WRITE(shadow_path, unshadowed_throughput) = throughput; + } + + /* Branch off shadow kernel. */ + INTEGRATOR_SHADOW_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); +} +#endif + +/* Path tracing: bounce off or through surface with new direction. */ +ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(INTEGRATOR_STATE_ARGS, + ShaderData *sd, + const RNGState *rng_state) +{ + /* Sample BSDF or BSSRDF. */ + if (!(sd->flag & (SD_BSDF | SD_BSSRDF))) { + return LABEL_NONE; + } + + float bsdf_u, bsdf_v; + path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); + const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u); + +#ifdef __SUBSURFACE__ + /* BSSRDF closure, we schedule subsurface intersection kernel. */ + if (CLOSURE_IS_BSSRDF(sc->type)) { + return subsurface_bounce(INTEGRATOR_STATE_PASS, sd, sc); + } +#endif + + /* BSDF closure, sample direction. */ + float bsdf_pdf; + BsdfEval bsdf_eval ccl_optional_struct_init; + float3 bsdf_omega_in ccl_optional_struct_init; + differential3 bsdf_domega_in ccl_optional_struct_init; + int label; + + label = shader_bsdf_sample_closure( + kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf); + + if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) { + return LABEL_NONE; + } + + /* Setup ray. Note that clipping works through transparent bounces. */ + INTEGRATOR_STATE_WRITE(ray, P) = ray_offset(sd->P, (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng); + INTEGRATOR_STATE_WRITE(ray, D) = normalize(bsdf_omega_in); + INTEGRATOR_STATE_WRITE(ray, t) = (label & LABEL_TRANSPARENT) ? + INTEGRATOR_STATE(ray, t) - sd->ray_length : + FLT_MAX; + +#ifdef __RAY_DIFFERENTIALS__ + INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP); + INTEGRATOR_STATE_WRITE(ray, dD) = differential_make_compact(bsdf_domega_in); +#endif + + /* Update throughput. */ + float3 throughput = INTEGRATOR_STATE(path, throughput); + throughput *= bsdf_eval_sum(&bsdf_eval) / bsdf_pdf; + INTEGRATOR_STATE_WRITE(path, throughput) = throughput; + + if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { + if (INTEGRATOR_STATE(path, bounce) == 0) { + INTEGRATOR_STATE_WRITE(path, + diffuse_glossy_ratio) = bsdf_eval_diffuse_glossy_ratio(&bsdf_eval); + } + } + + /* Update path state */ + if (label & LABEL_TRANSPARENT) { + INTEGRATOR_STATE_WRITE(path, mis_ray_t) += sd->ray_length; + } + else { + INTEGRATOR_STATE_WRITE(path, mis_ray_pdf) = bsdf_pdf; + INTEGRATOR_STATE_WRITE(path, mis_ray_t) = 0.0f; + INTEGRATOR_STATE_WRITE(path, min_ray_pdf) = fminf(bsdf_pdf, + INTEGRATOR_STATE(path, min_ray_pdf)); + } + + path_state_next(INTEGRATOR_STATE_PASS, label); + return label; +} + +#ifdef __VOLUME__ +ccl_device_forceinline bool integrate_surface_volume_only_bounce(INTEGRATOR_STATE_ARGS, + ShaderData *sd) +{ + if (!path_state_volume_next(INTEGRATOR_STATE_PASS)) { + return LABEL_NONE; + } + + /* Setup ray position, direction stays unchanged. */ + INTEGRATOR_STATE_WRITE(ray, P) = ray_offset(sd->P, -sd->Ng); + + /* Clipping works through transparent. */ + INTEGRATOR_STATE_WRITE(ray, t) -= sd->ray_length; + +# ifdef __RAY_DIFFERENTIALS__ + INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP); +# endif + + INTEGRATOR_STATE_WRITE(path, mis_ray_t) += sd->ray_length; + + return LABEL_TRANSMIT | LABEL_TRANSPARENT; +} +#endif + +#if defined(__AO__) && defined(__SHADER_RAYTRACE__) +ccl_device_forceinline void integrate_surface_ao_pass(INTEGRATOR_STATE_CONST_ARGS, + const ShaderData *ccl_restrict sd, + const RNGState *ccl_restrict rng_state, + ccl_global float *ccl_restrict render_buffer) +{ +# ifdef __KERNEL_OPTIX__ + optixDirectCall<void>(2, INTEGRATOR_STATE_PASS, sd, rng_state, render_buffer); +} + +extern "C" __device__ void __direct_callable__ao_pass(INTEGRATOR_STATE_CONST_ARGS, + const ShaderData *ccl_restrict sd, + const RNGState *ccl_restrict rng_state, + ccl_global float *ccl_restrict render_buffer) +{ +# endif /* __KERNEL_OPTIX__ */ + float bsdf_u, bsdf_v; + path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); + + const float3 ao_N = shader_bsdf_ao_normal(kg, sd); + float3 ao_D; + float ao_pdf; + sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); + + if (dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) { + Ray ray ccl_optional_struct_init; + ray.P = ray_offset(sd->P, sd->Ng); + ray.D = ao_D; + ray.t = kernel_data.integrator.ao_bounces_distance; + ray.time = sd->time; + ray.dP = differential_zero_compact(); + ray.dD = differential_zero_compact(); + + Intersection isect ccl_optional_struct_init; + if (!scene_intersect(kg, &ray, PATH_RAY_SHADOW_OPAQUE, &isect)) { + ccl_global float *buffer = kernel_pass_pixel_render_buffer(INTEGRATOR_STATE_PASS, + render_buffer); + const float3 throughput = INTEGRATOR_STATE(path, throughput); + kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, throughput); + } + } +} +#endif /* defined(__AO__) && defined(__SHADER_RAYTRACE__) */ + +template<uint node_feature_mask> +ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS, + ccl_global float *ccl_restrict render_buffer) + +{ + PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_SURFACE_SETUP); + + /* Setup shader data. */ + ShaderData sd; + integrate_surface_shader_setup(INTEGRATOR_STATE_PASS, &sd); + PROFILING_SHADER(sd.object, sd.shader); + + int continue_path_label = 0; + + /* Skip most work for volume bounding surface. */ +#ifdef __VOLUME__ + if (!(sd.flag & SD_HAS_ONLY_VOLUME)) { +#endif + + { + const int path_flag = INTEGRATOR_STATE(path, flag); +#ifdef __SUBSURFACE__ + /* Can skip shader evaluation for BSSRDF exit point without bump mapping. */ + if (!(path_flag & PATH_RAY_SUBSURFACE) || ((sd.flag & SD_HAS_BSSRDF_BUMP))) +#endif + { + /* Evaluate shader. */ + PROFILING_EVENT(PROFILING_SHADE_SURFACE_EVAL); + shader_eval_surface<node_feature_mask>( + INTEGRATOR_STATE_PASS, &sd, render_buffer, path_flag); + } + } + +#ifdef __SUBSURFACE__ + if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SUBSURFACE) { + /* When coming from inside subsurface scattering, setup a diffuse + * closure to perform lighting at the exit point. */ + INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_SUBSURFACE; + subsurface_shader_data_setup(INTEGRATOR_STATE_PASS, &sd); + } +#endif + + shader_prepare_surface_closures(INTEGRATOR_STATE_PASS, &sd); + +#ifdef __HOLDOUT__ + /* Evaluate holdout. */ + if (!integrate_surface_holdout(INTEGRATOR_STATE_PASS, &sd, render_buffer)) { + return false; + } +#endif + +#ifdef __EMISSION__ + /* Write emission. */ + if (sd.flag & SD_EMISSION) { + integrate_surface_emission(INTEGRATOR_STATE_PASS, &sd, render_buffer); + } +#endif + +#ifdef __PASSES__ + /* Write render passes. */ + PROFILING_EVENT(PROFILING_SHADE_SURFACE_PASSES); + kernel_write_data_passes(INTEGRATOR_STATE_PASS, &sd, render_buffer); +#endif + + /* Load random number state. */ + RNGState rng_state; + path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state); + + /* Perform path termination. Most paths have already been terminated in + * the intersect_closest kernel, this is just for emission and for dividing + * throughput by the probability at the right moment. */ + const int path_flag = INTEGRATOR_STATE(path, flag); + const float probability = (path_flag & PATH_RAY_TERMINATE_ON_NEXT_SURFACE) ? + 0.0f : + path_state_continuation_probability(INTEGRATOR_STATE_PASS, + path_flag); + if (probability == 0.0f) { + return false; + } + else if (probability != 1.0f) { + INTEGRATOR_STATE_WRITE(path, throughput) /= probability; + } + +#ifdef __DENOISING_FEATURES__ + kernel_write_denoising_features_surface(INTEGRATOR_STATE_PASS, &sd, render_buffer); +#endif + +#ifdef __SHADOW_CATCHER__ + kernel_write_shadow_catcher_bounce_data(INTEGRATOR_STATE_PASS, &sd, render_buffer); +#endif + + /* Direct light. */ + PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT); + integrate_surface_direct_light(INTEGRATOR_STATE_PASS, &sd, &rng_state); + +#if defined(__AO__) && defined(__SHADER_RAYTRACE__) + /* Ambient occlusion pass. */ + if (node_feature_mask & KERNEL_FEATURE_NODE_RAYTRACE) { + if ((kernel_data.film.pass_ao != PASS_UNUSED) && + (INTEGRATOR_STATE(path, flag) & PATH_RAY_CAMERA)) { + PROFILING_EVENT(PROFILING_SHADE_SURFACE_AO); + integrate_surface_ao_pass(INTEGRATOR_STATE_PASS, &sd, &rng_state, render_buffer); + } + } +#endif + + PROFILING_EVENT(PROFILING_SHADE_SURFACE_INDIRECT_LIGHT); + continue_path_label = integrate_surface_bsdf_bssrdf_bounce( + INTEGRATOR_STATE_PASS, &sd, &rng_state); +#ifdef __VOLUME__ + } + else { + PROFILING_EVENT(PROFILING_SHADE_SURFACE_INDIRECT_LIGHT); + continue_path_label = integrate_surface_volume_only_bounce(INTEGRATOR_STATE_PASS, &sd); + } + + if (continue_path_label & LABEL_TRANSMIT) { + /* Enter/Exit volume. */ + volume_stack_enter_exit(INTEGRATOR_STATE_PASS, &sd); + } +#endif + + return continue_path_label != 0; +} + +template<uint node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE, + int current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE> +ccl_device_forceinline void integrator_shade_surface(INTEGRATOR_STATE_ARGS, + ccl_global float *ccl_restrict render_buffer) +{ + if (integrate_surface<node_feature_mask>(INTEGRATOR_STATE_PASS, render_buffer)) { + if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SUBSURFACE) { + INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); + } + else { + kernel_assert(INTEGRATOR_STATE(ray, t) != 0.0f); + INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); + } + } + else { + INTEGRATOR_PATH_TERMINATE(current_kernel); + } +} + +ccl_device_forceinline void integrator_shade_surface_raytrace( + INTEGRATOR_STATE_ARGS, ccl_global float *ccl_restrict render_buffer) +{ + integrator_shade_surface<KERNEL_FEATURE_NODE_MASK_SURFACE, + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE>(INTEGRATOR_STATE_PASS, + render_buffer); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h new file mode 100644 index 00000000000..4a864b1e6ce --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h @@ -0,0 +1,1015 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/kernel_accumulate.h" +#include "kernel/kernel_emission.h" +#include "kernel/kernel_light.h" +#include "kernel/kernel_passes.h" +#include "kernel/kernel_path_state.h" +#include "kernel/kernel_shader.h" + +#include "kernel/integrator/integrator_intersect_closest.h" +#include "kernel/integrator/integrator_volume_stack.h" + +CCL_NAMESPACE_BEGIN + +#ifdef __VOLUME__ + +/* Events for probalistic scattering */ + +typedef enum VolumeIntegrateEvent { + VOLUME_PATH_SCATTERED = 0, + VOLUME_PATH_ATTENUATED = 1, + VOLUME_PATH_MISSED = 2 +} VolumeIntegrateEvent; + +typedef struct VolumeIntegrateResult { + /* Throughput and offset for direct light scattering. */ + bool direct_scatter; + float3 direct_throughput; + float direct_t; + ShaderVolumePhases direct_phases; + + /* Throughput and offset for indirect light scattering. */ + bool indirect_scatter; + float3 indirect_throughput; + float indirect_t; + ShaderVolumePhases indirect_phases; +} VolumeIntegrateResult; + +/* Ignore paths that have volume throughput below this value, to avoid unnecessary work + * and precision issues. + * todo: this value could be tweaked or turned into a probability to avoid unnecessary + * work in volumes and subsurface scattering. */ +# define VOLUME_THROUGHPUT_EPSILON 1e-6f + +/* Volume shader properties + * + * extinction coefficient = absorption coefficient + scattering coefficient + * sigma_t = sigma_a + sigma_s */ + +typedef struct VolumeShaderCoefficients { + float3 sigma_t; + float3 sigma_s; + float3 emission; +} VolumeShaderCoefficients; + +/* Evaluate shader to get extinction coefficient at P. */ +ccl_device_inline bool shadow_volume_shader_sample(INTEGRATOR_STATE_ARGS, + ShaderData *ccl_restrict sd, + float3 *ccl_restrict extinction) +{ + shader_eval_volume(INTEGRATOR_STATE_PASS, sd, PATH_RAY_SHADOW, [=](const int i) { + return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i); + }); + + if (!(sd->flag & SD_EXTINCTION)) { + return false; + } + + const float density = object_volume_density(kg, sd->object); + *extinction = sd->closure_transparent_extinction * density; + return true; +} + +/* Evaluate shader to get absorption, scattering and emission at P. */ +ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS, + ShaderData *ccl_restrict sd, + VolumeShaderCoefficients *coeff) +{ + const int path_flag = INTEGRATOR_STATE(path, flag); + shader_eval_volume(INTEGRATOR_STATE_PASS, sd, path_flag, [=](const int i) { + return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i); + }); + + if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) { + return false; + } + + coeff->sigma_s = zero_float3(); + coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction : zero_float3(); + coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background : zero_float3(); + + if (sd->flag & SD_SCATTER) { + for (int i = 0; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_VOLUME(sc->type)) { + coeff->sigma_s += sc->weight; + } + } + } + + const float density = object_volume_density(kg, sd->object); + coeff->sigma_s *= density; + coeff->sigma_t *= density; + coeff->emission *= density; + + return true; +} + +ccl_device_forceinline void volume_step_init(const KernelGlobals *kg, + const RNGState *rng_state, + const float object_step_size, + float t, + float *step_size, + float *step_shade_offset, + float *steps_offset, + int *max_steps) +{ + if (object_step_size == FLT_MAX) { + /* Homogeneous volume. */ + *step_size = t; + *step_shade_offset = 0.0f; + *steps_offset = 1.0f; + *max_steps = 1; + } + else { + /* Heterogeneous volume. */ + *max_steps = kernel_data.integrator.volume_max_steps; + float step = min(object_step_size, t); + + /* compute exact steps in advance for malloc */ + if (t > *max_steps * step) { + step = t / (float)*max_steps; + } + + *step_size = step; + + /* Perform shading at this offset within a step, to integrate over + * over the entire step segment. */ + *step_shade_offset = path_state_rng_1D_hash(kg, rng_state, 0x1e31d8a4); + + /* Shift starting point of all segment by this random amount to avoid + * banding artifacts from the volume bounding shape. */ + *steps_offset = path_state_rng_1D_hash(kg, rng_state, 0x3d22c7b3); + } +} + +/* Volume Shadows + * + * These functions are used to attenuate shadow rays to lights. Both absorption + * and scattering will block light, represented by the extinction coefficient. */ + +# if 0 +/* homogeneous volume: assume shader evaluation at the starts gives + * the extinction coefficient for the entire line segment */ +ccl_device void volume_shadow_homogeneous(INTEGRATOR_STATE_ARGS, + Ray *ccl_restrict ray, + ShaderData *ccl_restrict sd, + float3 *ccl_restrict throughput) +{ + float3 sigma_t = zero_float3(); + + if (shadow_volume_shader_sample(INTEGRATOR_STATE_PASS, sd, &sigma_t)) { + *throughput *= volume_color_transmittance(sigma_t, ray->t); + } +} +# endif + +/* heterogeneous volume: integrate stepping through the volume until we + * reach the end, get absorbed entirely, or run out of iterations */ +ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS, + Ray *ccl_restrict ray, + ShaderData *ccl_restrict sd, + float3 *ccl_restrict throughput, + const float object_step_size) +{ + /* Load random number state. */ + RNGState rng_state; + shadow_path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state); + + float3 tp = *throughput; + + /* Prepare for stepping. + * For shadows we do not offset all segments, since the starting point is + * already a random distance inside the volume. It also appears to create + * banding artifacts for unknown reasons. */ + int max_steps; + float step_size, step_shade_offset, unused; + volume_step_init(kg, + &rng_state, + object_step_size, + ray->t, + &step_size, + &step_shade_offset, + &unused, + &max_steps); + const float steps_offset = 1.0f; + + /* compute extinction at the start */ + float t = 0.0f; + + float3 sum = zero_float3(); + + for (int i = 0; i < max_steps; i++) { + /* advance to new position */ + float new_t = min(ray->t, (i + steps_offset) * step_size); + float dt = new_t - t; + + float3 new_P = ray->P + ray->D * (t + dt * step_shade_offset); + float3 sigma_t = zero_float3(); + + /* compute attenuation over segment */ + sd->P = new_P; + if (shadow_volume_shader_sample(INTEGRATOR_STATE_PASS, sd, &sigma_t)) { + /* Compute expf() only for every Nth step, to save some calculations + * because exp(a)*exp(b) = exp(a+b), also do a quick VOLUME_THROUGHPUT_EPSILON + * check then. */ + sum += (-sigma_t * dt); + if ((i & 0x07) == 0) { /* ToDo: Other interval? */ + tp = *throughput * exp3(sum); + + /* stop if nearly all light is blocked */ + if (tp.x < VOLUME_THROUGHPUT_EPSILON && tp.y < VOLUME_THROUGHPUT_EPSILON && + tp.z < VOLUME_THROUGHPUT_EPSILON) + break; + } + } + + /* stop if at the end of the volume */ + t = new_t; + if (t == ray->t) { + /* Update throughput in case we haven't done it above */ + tp = *throughput * exp3(sum); + break; + } + } + + *throughput = tp; +} + +/* Equi-angular sampling as in: + * "Importance Sampling Techniques for Path Tracing in Participating Media" */ + +ccl_device float volume_equiangular_sample(const Ray *ccl_restrict ray, + const float3 light_P, + const float xi, + float *pdf) +{ + const float t = ray->t; + const float delta = dot((light_P - ray->P), ray->D); + const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta); + if (UNLIKELY(D == 0.0f)) { + *pdf = 0.0f; + return 0.0f; + } + const float theta_a = -atan2f(delta, D); + const float theta_b = atan2f(t - delta, D); + const float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a); + if (UNLIKELY(theta_b == theta_a)) { + *pdf = 0.0f; + return 0.0f; + } + *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_)); + + return min(t, delta + t_); /* min is only for float precision errors */ +} + +ccl_device float volume_equiangular_pdf(const Ray *ccl_restrict ray, + const float3 light_P, + const float sample_t) +{ + const float delta = dot((light_P - ray->P), ray->D); + const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta); + if (UNLIKELY(D == 0.0f)) { + return 0.0f; + } + + const float t = ray->t; + const float t_ = sample_t - delta; + + const float theta_a = -atan2f(delta, D); + const float theta_b = atan2f(t - delta, D); + if (UNLIKELY(theta_b == theta_a)) { + return 0.0f; + } + + const float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_)); + + return pdf; +} + +ccl_device float volume_equiangular_cdf(const Ray *ccl_restrict ray, + const float3 light_P, + const float sample_t) +{ + float delta = dot((light_P - ray->P), ray->D); + float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta); + if (UNLIKELY(D == 0.0f)) { + return 0.0f; + } + + const float t = ray->t; + const float t_ = sample_t - delta; + + const float theta_a = -atan2f(delta, D); + const float theta_b = atan2f(t - delta, D); + if (UNLIKELY(theta_b == theta_a)) { + return 0.0f; + } + + const float theta_sample = atan2f(t_, D); + const float cdf = (theta_sample - theta_a) / (theta_b - theta_a); + + return cdf; +} + +/* Distance sampling */ + +ccl_device float volume_distance_sample( + float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf) +{ + /* xi is [0, 1[ so log(0) should never happen, division by zero is + * avoided because sample_sigma_t > 0 when SD_SCATTER is set */ + float sample_sigma_t = volume_channel_get(sigma_t, channel); + float3 full_transmittance = volume_color_transmittance(sigma_t, max_t); + float sample_transmittance = volume_channel_get(full_transmittance, channel); + + float sample_t = min(max_t, -logf(1.0f - xi * (1.0f - sample_transmittance)) / sample_sigma_t); + + *transmittance = volume_color_transmittance(sigma_t, sample_t); + *pdf = safe_divide_color(sigma_t * *transmittance, one_float3() - full_transmittance); + + /* todo: optimization: when taken together with hit/miss decision, + * the full_transmittance cancels out drops out and xi does not + * need to be remapped */ + + return sample_t; +} + +ccl_device float3 volume_distance_pdf(float max_t, float3 sigma_t, float sample_t) +{ + float3 full_transmittance = volume_color_transmittance(sigma_t, max_t); + float3 transmittance = volume_color_transmittance(sigma_t, sample_t); + + return safe_divide_color(sigma_t * transmittance, one_float3() - full_transmittance); +} + +/* Emission */ + +ccl_device float3 volume_emission_integrate(VolumeShaderCoefficients *coeff, + int closure_flag, + float3 transmittance, + float t) +{ + /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t + * this goes to E * t as sigma_t goes to zero + * + * todo: we should use an epsilon to avoid precision issues near zero sigma_t */ + float3 emission = coeff->emission; + + if (closure_flag & SD_EXTINCTION) { + float3 sigma_t = coeff->sigma_t; + + emission.x *= (sigma_t.x > 0.0f) ? (1.0f - transmittance.x) / sigma_t.x : t; + emission.y *= (sigma_t.y > 0.0f) ? (1.0f - transmittance.y) / sigma_t.y : t; + emission.z *= (sigma_t.z > 0.0f) ? (1.0f - transmittance.z) / sigma_t.z : t; + } + else + emission *= t; + + return emission; +} + +/* Volume Integration */ + +typedef struct VolumeIntegrateState { + /* Volume segment extents. */ + float start_t; + float end_t; + + /* If volume is absorption-only up to this point, and no probabilistic + * scattering or termination has been used yet. */ + bool absorption_only; + + /* Random numbers for scattering. */ + float rscatter; + float rphase; + + /* Multiple importance sampling. */ + VolumeSampleMethod direct_sample_method; + bool use_mis; + float distance_pdf; + float equiangular_pdf; +} VolumeIntegrateState; + +ccl_device_forceinline void volume_integrate_step_scattering( + const ShaderData *sd, + const Ray *ray, + const float3 equiangular_light_P, + const VolumeShaderCoefficients &ccl_restrict coeff, + const float3 transmittance, + VolumeIntegrateState &ccl_restrict vstate, + VolumeIntegrateResult &ccl_restrict result) +{ + /* Pick random color channel, we use the Veach one-sample + * model with balance heuristic for the channels. */ + const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); + float3 channel_pdf; + const int channel = volume_sample_channel( + albedo, result.indirect_throughput, vstate.rphase, &channel_pdf); + + /* Equiangular sampling for direct lighting. */ + if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR && !result.direct_scatter) { + if (result.direct_t >= vstate.start_t && result.direct_t <= vstate.end_t) { + const float new_dt = result.direct_t - vstate.start_t; + const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); + + result.direct_scatter = true; + result.direct_throughput *= coeff.sigma_s * new_transmittance / vstate.equiangular_pdf; + shader_copy_volume_phases(&result.direct_phases, sd); + + /* Multiple importance sampling. */ + if (vstate.use_mis) { + const float distance_pdf = vstate.distance_pdf * + dot(channel_pdf, coeff.sigma_t * new_transmittance); + const float mis_weight = 2.0f * power_heuristic(vstate.equiangular_pdf, distance_pdf); + result.direct_throughput *= mis_weight; + } + } + else { + result.direct_throughput *= transmittance; + vstate.distance_pdf *= dot(channel_pdf, transmittance); + } + } + + /* Distance sampling for indirect and optional direct lighting. */ + if (!result.indirect_scatter) { + /* decide if we will scatter or continue */ + const float sample_transmittance = volume_channel_get(transmittance, channel); + + if (1.0f - vstate.rscatter >= sample_transmittance) { + /* compute sampling distance */ + const float sample_sigma_t = volume_channel_get(coeff.sigma_t, channel); + const float new_dt = -logf(1.0f - vstate.rscatter) / sample_sigma_t; + const float new_t = vstate.start_t + new_dt; + + /* transmittance and pdf */ + const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); + const float distance_pdf = dot(channel_pdf, coeff.sigma_t * new_transmittance); + + /* throughput */ + result.indirect_scatter = true; + result.indirect_t = new_t; + result.indirect_throughput *= coeff.sigma_s * new_transmittance / distance_pdf; + shader_copy_volume_phases(&result.indirect_phases, sd); + + if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) { + /* If using distance sampling for direct light, just copy parameters + * of indirect light since we scatter at the same point then. */ + result.direct_scatter = true; + result.direct_t = result.indirect_t; + result.direct_throughput = result.indirect_throughput; + shader_copy_volume_phases(&result.direct_phases, sd); + + /* Multiple importance sampling. */ + if (vstate.use_mis) { + const float equiangular_pdf = volume_equiangular_pdf(ray, equiangular_light_P, new_t); + const float mis_weight = power_heuristic(vstate.distance_pdf * distance_pdf, + equiangular_pdf); + result.direct_throughput *= 2.0f * mis_weight; + } + } + } + else { + /* throughput */ + const float pdf = dot(channel_pdf, transmittance); + result.indirect_throughput *= transmittance / pdf; + if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) { + vstate.distance_pdf *= pdf; + } + + /* remap rscatter so we can reuse it and keep thing stratified */ + vstate.rscatter = 1.0f - (1.0f - vstate.rscatter) / sample_transmittance; + } + } +} + +/* heterogeneous volume distance sampling: integrate stepping through the + * volume until we reach the end, get absorbed entirely, or run out of + * iterations. this does probabilistically scatter or get transmitted through + * for path tracing where we don't want to branch. */ +ccl_device_forceinline void volume_integrate_heterogeneous( + INTEGRATOR_STATE_ARGS, + Ray *ccl_restrict ray, + ShaderData *ccl_restrict sd, + const RNGState *rng_state, + ccl_global float *ccl_restrict render_buffer, + const float object_step_size, + const VolumeSampleMethod direct_sample_method, + const float3 equiangular_light_P, + VolumeIntegrateResult &result) +{ + PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INTEGRATE); + + /* Prepare for stepping. + * Using a different step offset for the first step avoids banding artifacts. */ + int max_steps; + float step_size, step_shade_offset, steps_offset; + volume_step_init(kg, + rng_state, + object_step_size, + ray->t, + &step_size, + &step_shade_offset, + &steps_offset, + &max_steps); + + /* Initialize volume integration state. */ + VolumeIntegrateState vstate ccl_optional_struct_init; + vstate.start_t = 0.0f; + vstate.end_t = 0.0f; + vstate.absorption_only = true; + vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE); + vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL); + + /* Multiple importance sampling: pick between equiangular and distance sampling strategy. */ + vstate.direct_sample_method = direct_sample_method; + vstate.use_mis = (direct_sample_method == VOLUME_SAMPLE_MIS); + if (vstate.use_mis) { + if (vstate.rscatter < 0.5f) { + vstate.rscatter *= 2.0f; + vstate.direct_sample_method = VOLUME_SAMPLE_DISTANCE; + } + else { + vstate.rscatter = (vstate.rscatter - 0.5f) * 2.0f; + vstate.direct_sample_method = VOLUME_SAMPLE_EQUIANGULAR; + } + } + vstate.equiangular_pdf = 0.0f; + vstate.distance_pdf = 1.0f; + + /* Initialize volume integration result. */ + const float3 throughput = INTEGRATOR_STATE(path, throughput); + result.direct_throughput = throughput; + result.indirect_throughput = throughput; + + /* Equiangular sampling: compute distance and PDF in advance. */ + if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR) { + result.direct_t = volume_equiangular_sample( + ray, equiangular_light_P, vstate.rscatter, &vstate.equiangular_pdf); + } + +# ifdef __DENOISING_FEATURES__ + const bool write_denoising_features = (INTEGRATOR_STATE(path, flag) & + PATH_RAY_DENOISING_FEATURES); + float3 accum_albedo = zero_float3(); +# endif + float3 accum_emission = zero_float3(); + + for (int i = 0; i < max_steps; i++) { + /* Advance to new position */ + vstate.end_t = min(ray->t, (i + steps_offset) * step_size); + const float shade_t = vstate.start_t + (vstate.end_t - vstate.start_t) * step_shade_offset; + sd->P = ray->P + ray->D * shade_t; + + /* compute segment */ + VolumeShaderCoefficients coeff ccl_optional_struct_init; + if (volume_shader_sample(INTEGRATOR_STATE_PASS, sd, &coeff)) { + const int closure_flag = sd->flag; + + /* Evaluate transmittance over segment. */ + const float dt = (vstate.end_t - vstate.start_t); + const float3 transmittance = (closure_flag & SD_EXTINCTION) ? + volume_color_transmittance(coeff.sigma_t, dt) : + one_float3(); + + /* Emission. */ + if (closure_flag & SD_EMISSION) { + /* Only write emission before indirect light scatter position, since we terminate + * stepping at that point if we have already found a direct light scatter position. */ + if (!result.indirect_scatter) { + const float3 emission = volume_emission_integrate( + &coeff, closure_flag, transmittance, dt); + accum_emission += emission; + } + } + + if (closure_flag & SD_EXTINCTION) { + if ((closure_flag & SD_SCATTER) || !vstate.absorption_only) { +# ifdef __DENOISING_FEATURES__ + /* Accumulate albedo for denoising features. */ + if (write_denoising_features && (closure_flag & SD_SCATTER)) { + const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); + accum_albedo += result.indirect_throughput * albedo * (one_float3() - transmittance); + } +# endif + + /* Scattering and absorption. */ + volume_integrate_step_scattering( + sd, ray, equiangular_light_P, coeff, transmittance, vstate, result); + } + else { + /* Absorption only. */ + result.indirect_throughput *= transmittance; + result.direct_throughput *= transmittance; + } + + /* Stop if nearly all light blocked. */ + if (!result.indirect_scatter) { + if (max3(result.indirect_throughput) < VOLUME_THROUGHPUT_EPSILON) { + result.indirect_throughput = zero_float3(); + break; + } + } + else if (!result.direct_scatter) { + if (max3(result.direct_throughput) < VOLUME_THROUGHPUT_EPSILON) { + break; + } + } + } + + /* If we have scattering data for both direct and indirect, we're done. */ + if (result.direct_scatter && result.indirect_scatter) { + break; + } + } + + /* Stop if at the end of the volume. */ + vstate.start_t = vstate.end_t; + if (vstate.start_t == ray->t) { + break; + } + } + + /* Write accumulated emisison. */ + if (!is_zero(accum_emission)) { + kernel_accum_emission( + INTEGRATOR_STATE_PASS, result.indirect_throughput, accum_emission, render_buffer); + } + +# ifdef __DENOISING_FEATURES__ + /* Write denoising features. */ + if (write_denoising_features) { + kernel_write_denoising_features_volume( + INTEGRATOR_STATE_PASS, accum_albedo, result.indirect_scatter, render_buffer); + } +# endif /* __DENOISING_FEATURES__ */ +} + +# ifdef __EMISSION__ +/* Path tracing: sample point on light and evaluate light shader, then + * queue shadow ray to be traced. */ +ccl_device_forceinline bool integrate_volume_sample_light(INTEGRATOR_STATE_ARGS, + const ShaderData *ccl_restrict sd, + const RNGState *ccl_restrict rng_state, + LightSample *ccl_restrict ls) +{ + /* Test if there is a light or BSDF that needs direct light. */ + if (!kernel_data.integrator.use_direct_light) { + return false; + } + + /* Sample position on a light. */ + const int path_flag = INTEGRATOR_STATE(path, flag); + const uint bounce = INTEGRATOR_STATE(path, bounce); + float light_u, light_v; + path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v); + + light_distribution_sample_from_volume_segment( + kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, ls); + + if (ls->shader & SHADER_EXCLUDE_SCATTER) { + return false; + } + + return true; +} + +/* Path tracing: sample point on light and evaluate light shader, then + * queue shadow ray to be traced. */ +ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS, + const ShaderData *ccl_restrict sd, + const RNGState *ccl_restrict rng_state, + const float3 P, + const ShaderVolumePhases *ccl_restrict + phases, + const float3 throughput, + LightSample *ccl_restrict ls) +{ + PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT); + + if (!kernel_data.integrator.use_direct_light) { + return; + } + + /* Sample position on the same light again, now from the shading + * point where we scattered. + * + * TODO: decorrelate random numbers and use light_sample_new_position to + * avoid resampling the CDF. */ + { + const int path_flag = INTEGRATOR_STATE(path, flag); + const uint bounce = INTEGRATOR_STATE(path, bounce); + float light_u, light_v; + path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v); + + if (!light_distribution_sample_from_position( + kg, light_u, light_v, sd->time, P, bounce, path_flag, ls)) { + return; + } + } + + /* Evaluate light shader. + * + * TODO: can we reuse sd memory? In theory we can move this after + * integrate_surface_bounce, evaluate the BSDF, and only then evaluate + * the light shader. This could also move to its own kernel, for + * non-constant light sources. */ + ShaderDataTinyStorage emission_sd_storage; + ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); + const float3 light_eval = light_sample_shader_eval( + INTEGRATOR_STATE_PASS, emission_sd, ls, sd->time); + if (is_zero(light_eval)) { + return; + } + + /* Evaluate BSDF. */ + BsdfEval phase_eval ccl_optional_struct_init; + const float phase_pdf = shader_volume_phase_eval(kg, sd, phases, ls->D, &phase_eval); + + if (ls->shader & SHADER_USE_MIS) { + float mis_weight = power_heuristic(ls->pdf, phase_pdf); + bsdf_eval_mul(&phase_eval, mis_weight); + } + + bsdf_eval_mul3(&phase_eval, light_eval / ls->pdf); + + /* Path termination. */ + const float terminate = path_state_rng_light_termination(kg, rng_state); + if (light_sample_terminate(kg, ls, &phase_eval, terminate)) { + return; + } + + /* Create shadow ray. */ + Ray ray ccl_optional_struct_init; + light_sample_to_volume_shadow_ray(kg, sd, ls, P, &ray); + const bool is_light = light_sample_is_light(ls); + + /* Write shadow ray and associated state to global memory. */ + integrator_state_write_shadow_ray(INTEGRATOR_STATE_PASS, &ray); + + /* Copy state from main path to shadow path. */ + const uint16_t bounce = INTEGRATOR_STATE(path, bounce); + const uint16_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce); + uint32_t shadow_flag = INTEGRATOR_STATE(path, flag); + shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0; + shadow_flag |= PATH_RAY_VOLUME_PASS; + const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval); + + if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { + const float3 diffuse_glossy_ratio = (bounce == 0) ? + one_float3() : + INTEGRATOR_STATE(path, diffuse_glossy_ratio); + INTEGRATOR_STATE_WRITE(shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio; + } + + INTEGRATOR_STATE_WRITE(shadow_path, flag) = shadow_flag; + INTEGRATOR_STATE_WRITE(shadow_path, bounce) = bounce; + INTEGRATOR_STATE_WRITE(shadow_path, transparent_bounce) = transparent_bounce; + INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput_phase; + + if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) { + INTEGRATOR_STATE_WRITE(shadow_path, unshadowed_throughput) = throughput; + } + + integrator_state_copy_volume_stack_to_shadow(INTEGRATOR_STATE_PASS); + + /* Branch off shadow kernel. */ + INTEGRATOR_SHADOW_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); +} +# endif + +/* Path tracing: scatter in new direction using phase function */ +ccl_device_forceinline bool integrate_volume_phase_scatter(INTEGRATOR_STATE_ARGS, + ShaderData *sd, + const RNGState *rng_state, + const ShaderVolumePhases *phases) +{ + PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INDIRECT_LIGHT); + + float phase_u, phase_v; + path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &phase_u, &phase_v); + + /* Phase closure, sample direction. */ + float phase_pdf; + BsdfEval phase_eval ccl_optional_struct_init; + float3 phase_omega_in ccl_optional_struct_init; + differential3 phase_domega_in ccl_optional_struct_init; + + const int label = shader_volume_phase_sample(kg, + sd, + phases, + phase_u, + phase_v, + &phase_eval, + &phase_omega_in, + &phase_domega_in, + &phase_pdf); + + if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) { + return false; + } + + /* Setup ray. */ + INTEGRATOR_STATE_WRITE(ray, P) = sd->P; + INTEGRATOR_STATE_WRITE(ray, D) = normalize(phase_omega_in); + INTEGRATOR_STATE_WRITE(ray, t) = FLT_MAX; + +# ifdef __RAY_DIFFERENTIALS__ + INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP); + INTEGRATOR_STATE_WRITE(ray, dD) = differential_make_compact(phase_domega_in); +# endif + + /* Update throughput. */ + const float3 throughput = INTEGRATOR_STATE(path, throughput); + const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval) / phase_pdf; + INTEGRATOR_STATE_WRITE(path, throughput) = throughput_phase; + + if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { + INTEGRATOR_STATE_WRITE(path, diffuse_glossy_ratio) = one_float3(); + } + + /* Update path state */ + INTEGRATOR_STATE_WRITE(path, mis_ray_pdf) = phase_pdf; + INTEGRATOR_STATE_WRITE(path, mis_ray_t) = 0.0f; + INTEGRATOR_STATE_WRITE(path, min_ray_pdf) = fminf(phase_pdf, + INTEGRATOR_STATE(path, min_ray_pdf)); + + path_state_next(INTEGRATOR_STATE_PASS, label); + return true; +} + +/* get the volume attenuation and emission over line segment defined by + * ray, with the assumption that there are no surfaces blocking light + * between the endpoints. distance sampling is used to decide if we will + * scatter or not. */ +ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS, + Ray *ccl_restrict ray, + ccl_global float *ccl_restrict render_buffer) +{ + ShaderData sd; + shader_setup_from_volume(kg, &sd, ray); + + /* Load random number state. */ + RNGState rng_state; + path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state); + + /* Sample light ahead of volume stepping, for equiangular sampling. */ + /* TODO: distant lights are ignored now, but could instead use even distribution. */ + LightSample ls ccl_optional_struct_init; + const bool need_light_sample = !(INTEGRATOR_STATE(path, flag) & PATH_RAY_TERMINATE); + const bool have_equiangular_sample = need_light_sample && + integrate_volume_sample_light( + INTEGRATOR_STATE_PASS, &sd, &rng_state, &ls) && + (ls.t != FLT_MAX); + + VolumeSampleMethod direct_sample_method = (have_equiangular_sample) ? + volume_stack_sample_method(INTEGRATOR_STATE_PASS) : + VOLUME_SAMPLE_DISTANCE; + + /* Step through volume. */ + const float step_size = volume_stack_step_size(INTEGRATOR_STATE_PASS, [=](const int i) { + return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i); + }); + + /* TODO: expensive to zero closures? */ + VolumeIntegrateResult result = {}; + volume_integrate_heterogeneous(INTEGRATOR_STATE_PASS, + ray, + &sd, + &rng_state, + render_buffer, + step_size, + direct_sample_method, + ls.P, + result); + + /* Perform path termination. The intersect_closest will have already marked this path + * to be terminated. That will shading evaluating to leave out any scattering closures, + * but emission and absorption are still handled for multiple importance sampling. */ + const uint32_t path_flag = INTEGRATOR_STATE(path, flag); + const float probability = (path_flag & PATH_RAY_TERMINATE_IN_NEXT_VOLUME) ? + 0.0f : + path_state_continuation_probability(INTEGRATOR_STATE_PASS, + path_flag); + if (probability == 0.0f) { + return VOLUME_PATH_MISSED; + } + + /* Direct light. */ + if (result.direct_scatter) { + const float3 direct_P = ray->P + result.direct_t * ray->D; + result.direct_throughput /= probability; + integrate_volume_direct_light(INTEGRATOR_STATE_PASS, + &sd, + &rng_state, + direct_P, + &result.direct_phases, + result.direct_throughput, + &ls); + } + + /* Indirect light. + * + * Only divide throughput by probability if we scatter. For the attenuation + * case the next surface will already do this division. */ + if (result.indirect_scatter) { + result.indirect_throughput /= probability; + } + INTEGRATOR_STATE_WRITE(path, throughput) = result.indirect_throughput; + + if (result.indirect_scatter) { + sd.P = ray->P + result.indirect_t * ray->D; + + if (integrate_volume_phase_scatter( + INTEGRATOR_STATE_PASS, &sd, &rng_state, &result.indirect_phases)) { + return VOLUME_PATH_SCATTERED; + } + else { + return VOLUME_PATH_MISSED; + } + } + else { + return VOLUME_PATH_ATTENUATED; + } +} + +#endif + +ccl_device void integrator_shade_volume(INTEGRATOR_STATE_ARGS, + ccl_global float *ccl_restrict render_buffer) +{ + PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_SETUP); + +#ifdef __VOLUME__ + /* Setup shader data. */ + Ray ray ccl_optional_struct_init; + integrator_state_read_ray(INTEGRATOR_STATE_PASS, &ray); + + Intersection isect ccl_optional_struct_init; + integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect); + + /* Set ray length to current segment. */ + ray.t = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX; + + /* Clean volume stack for background rays. */ + if (isect.prim == PRIM_NONE) { + volume_stack_clean(INTEGRATOR_STATE_PASS); + } + + VolumeIntegrateEvent event = volume_integrate(INTEGRATOR_STATE_PASS, &ray, render_buffer); + + if (event == VOLUME_PATH_SCATTERED) { + /* Queue intersect_closest kernel. */ + INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); + return; + } + else if (event == VOLUME_PATH_MISSED) { + /* End path. */ + INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); + return; + } + else { + /* Continue to background, light or surface. */ + if (isect.prim == PRIM_NONE) { + INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME, + DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + return; + } + else if (isect.type & PRIMITIVE_LAMP) { + INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME, + DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + return; + } + else { + /* Hit a surface, continue with surface kernel unless terminated. */ + const int shader = intersection_get_shader(kg, &isect); + const int flags = kernel_tex_fetch(__shaders, shader).flags; + + integrator_intersect_shader_next_kernel<DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME>( + INTEGRATOR_STATE_PASS, &isect, shader, flags); + return; + } + } +#endif /* __VOLUME__ */ +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_state.h b/intern/cycles/kernel/integrator/integrator_state.h new file mode 100644 index 00000000000..8cef9cf31e2 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_state.h @@ -0,0 +1,185 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Integrator State + * + * This file defines the data structures that define the state of a path. Any state that is + * preserved and passed between kernel executions is part of this. + * + * The size of this state must be kept as small as possible, to reduce cache misses and keep memory + * usage under control on GPUs that may execute millions of kernels. + * + * Memory may be allocated and passed along in different ways depending on the device. There may + * be a scalar layout, or AoS or SoA layout for batches. The state may be passed along as a pointer + * to every kernel, or the pointer may exist at program scope or in constant memory. To abstract + * these differences between devices and experiment with different layouts, macros are used. + * + * INTEGRATOR_STATE_ARGS: prepend to argument definitions for every function that accesses + * path state. + * INTEGRATOR_STATE_CONST_ARGS: same as INTEGRATOR_STATE_ARGS, when state is read-only + * INTEGRATOR_STATE_PASS: use to pass along state to other functions access it. + * + * INTEGRATOR_STATE(x, y): read nested struct member x.y of IntegratorState + * INTEGRATOR_STATE_WRITE(x, y): write to nested struct member x.y of IntegratorState + * + * INTEGRATOR_STATE_ARRAY(x, index, y): read x[index].y + * INTEGRATOR_STATE_ARRAY_WRITE(x, index, y): write x[index].y + * + * INTEGRATOR_STATE_COPY(to_x, from_x): copy contents of one nested struct to another + * + * INTEGRATOR_STATE_IS_NULL: test if any integrator state is available, for shader evaluation + * INTEGRATOR_STATE_PASS_NULL: use to pass empty state to other functions. + * + * NOTE: if we end up with a device that passes no arguments, the leading comma will be a problem. + * Can solve it with more macros if we encouter it, but rather ugly so postpone for now. + */ + +#include "kernel/kernel_types.h" + +#include "util/util_types.h" + +#pragma once + +CCL_NAMESPACE_BEGIN + +/* Constants + * + * TODO: these could be made dynamic depending on the features used in the scene. */ + +#define INTEGRATOR_VOLUME_STACK_SIZE VOLUME_STACK_SIZE +#define INTEGRATOR_SHADOW_ISECT_SIZE 4 + +/* Data structures */ + +/* Integrator State + * + * CPU rendering path state with AoS layout. */ +typedef struct IntegratorStateCPU { +#define KERNEL_STRUCT_BEGIN(name) struct { +#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type name; +#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER +#define KERNEL_STRUCT_END(name) \ + } \ + name; +#define KERNEL_STRUCT_END_ARRAY(name, size) \ + } \ + name[size]; +#include "kernel/integrator/integrator_state_template.h" +#undef KERNEL_STRUCT_BEGIN +#undef KERNEL_STRUCT_MEMBER +#undef KERNEL_STRUCT_ARRAY_MEMBER +#undef KERNEL_STRUCT_END +#undef KERNEL_STRUCT_END_ARRAY +} IntegratorStateCPU; + +/* Path Queue + * + * Keep track of which kernels are queued to be executed next in the path + * for GPU rendering. */ +typedef struct IntegratorQueueCounter { + int num_queued[DEVICE_KERNEL_INTEGRATOR_NUM]; +} IntegratorQueueCounter; + +/* Integrator State GPU + * + * GPU rendering path state with SoA layout. */ +typedef struct IntegratorStateGPU { +#define KERNEL_STRUCT_BEGIN(name) struct { +#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type *name; +#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER +#define KERNEL_STRUCT_END(name) \ + } \ + name; +#define KERNEL_STRUCT_END_ARRAY(name, size) \ + } \ + name[size]; +#include "kernel/integrator/integrator_state_template.h" +#undef KERNEL_STRUCT_BEGIN +#undef KERNEL_STRUCT_MEMBER +#undef KERNEL_STRUCT_ARRAY_MEMBER +#undef KERNEL_STRUCT_END +#undef KERNEL_STRUCT_END_ARRAY + + /* Count number of queued kernels. */ + IntegratorQueueCounter *queue_counter; + + /* Count number of kernels queued for specific shaders. */ + int *sort_key_counter[DEVICE_KERNEL_INTEGRATOR_NUM]; + + /* Index of path which will be used by a next shadow catcher split. */ + int *next_shadow_catcher_path_index; +} IntegratorStateGPU; + +/* Abstraction + * + * Macros to access data structures on different devices. + * + * Note that there is a special access function for the shadow catcher state. This access is to + * happen from a kernel which operates on a "main" path. Attempt to use shadow catcher accessors + * from a kernel which operates on a shadow catcher state will cause bad memory acces. */ + +#ifdef __KERNEL_CPU__ + +/* Scalar access on CPU. */ + +typedef IntegratorStateCPU *ccl_restrict IntegratorState; + +# define INTEGRATOR_STATE_ARGS \ + ccl_attr_maybe_unused const KernelGlobals *ccl_restrict kg, \ + IntegratorStateCPU *ccl_restrict state +# define INTEGRATOR_STATE_CONST_ARGS \ + ccl_attr_maybe_unused const KernelGlobals *ccl_restrict kg, \ + const IntegratorStateCPU *ccl_restrict state +# define INTEGRATOR_STATE_PASS kg, state + +# define INTEGRATOR_STATE_PASS_NULL kg, NULL +# define INTEGRATOR_STATE_IS_NULL (state == NULL) + +# define INTEGRATOR_STATE(nested_struct, member) \ + (((const IntegratorStateCPU *)state)->nested_struct.member) +# define INTEGRATOR_STATE_WRITE(nested_struct, member) (state->nested_struct.member) + +# define INTEGRATOR_STATE_ARRAY(nested_struct, array_index, member) \ + (((const IntegratorStateCPU *)state)->nested_struct[array_index].member) +# define INTEGRATOR_STATE_ARRAY_WRITE(nested_struct, array_index, member) \ + ((state)->nested_struct[array_index].member) + +#else /* __KERNEL_CPU__ */ + +/* Array access on GPU with Structure-of-Arrays. */ + +typedef int IntegratorState; + +# define INTEGRATOR_STATE_ARGS const KernelGlobals *ccl_restrict kg, const IntegratorState state +# define INTEGRATOR_STATE_CONST_ARGS \ + const KernelGlobals *ccl_restrict kg, const IntegratorState state +# define INTEGRATOR_STATE_PASS kg, state + +# define INTEGRATOR_STATE_PASS_NULL kg, -1 +# define INTEGRATOR_STATE_IS_NULL (state == -1) + +# define INTEGRATOR_STATE(nested_struct, member) \ + kernel_integrator_state.nested_struct.member[state] +# define INTEGRATOR_STATE_WRITE(nested_struct, member) INTEGRATOR_STATE(nested_struct, member) + +# define INTEGRATOR_STATE_ARRAY(nested_struct, array_index, member) \ + kernel_integrator_state.nested_struct[array_index].member[state] +# define INTEGRATOR_STATE_ARRAY_WRITE(nested_struct, array_index, member) \ + INTEGRATOR_STATE_ARRAY(nested_struct, array_index, member) + +#endif /* __KERNEL_CPU__ */ + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_state_flow.h b/intern/cycles/kernel/integrator/integrator_state_flow.h new file mode 100644 index 00000000000..8477efd7b66 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_state_flow.h @@ -0,0 +1,144 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/kernel_types.h" +#include "util/util_atomic.h" + +CCL_NAMESPACE_BEGIN + +/* Control Flow + * + * Utilities for control flow between kernels. The implementation may differ per device + * or even be handled on the host side. To abstract such differences, experiment with + * different implementations and for debugging, this is abstracted using macros. + * + * There is a main path for regular path tracing camera for path tracing. Shadows for next + * event estimation branch off from this into their own path, that may be computed in + * parallel while the main path continues. + * + * Each kernel on the main path must call one of these functions. These may not be called + * multiple times from the same kernel. + * + * INTEGRATOR_PATH_INIT(next_kernel) + * INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) + * INTEGRATOR_PATH_TERMINATE(current_kernel) + * + * For the shadow path similar functions are used, and again each shadow kernel must call + * one of them, and only once. + */ + +#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(path, queued_kernel) == 0) +#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED (INTEGRATOR_STATE(shadow_path, queued_kernel) == 0) + +#ifdef __KERNEL_GPU__ + +# define INTEGRATOR_PATH_INIT(next_kernel) \ + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ + 1); \ + INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; +# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \ + atomic_fetch_and_sub_uint32( \ + &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ + 1); \ + INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; +# define INTEGRATOR_PATH_TERMINATE(current_kernel) \ + atomic_fetch_and_sub_uint32( \ + &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ + INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0; + +# define INTEGRATOR_SHADOW_PATH_INIT(next_kernel) \ + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ + 1); \ + INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel; +# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \ + atomic_fetch_and_sub_uint32( \ + &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ + 1); \ + INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel; +# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \ + atomic_fetch_and_sub_uint32( \ + &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ + INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0; + +# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \ + { \ + const int key_ = key; \ + atomic_fetch_and_add_uint32( \ + &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \ + INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \ + INTEGRATOR_STATE_WRITE(path, shader_sort_key) = key_; \ + atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \ + 1); \ + } +# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \ + { \ + const int key_ = key; \ + atomic_fetch_and_sub_uint32( \ + &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ + atomic_fetch_and_add_uint32( \ + &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \ + INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \ + INTEGRATOR_STATE_WRITE(path, shader_sort_key) = key_; \ + atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \ + 1); \ + } + +#else + +# define INTEGRATOR_PATH_INIT(next_kernel) \ + INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; +# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \ + { \ + INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \ + (void)key; \ + } +# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \ + { \ + INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \ + (void)current_kernel; \ + } +# define INTEGRATOR_PATH_TERMINATE(current_kernel) \ + { \ + INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0; \ + (void)current_kernel; \ + } +# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \ + { \ + INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \ + (void)key; \ + (void)current_kernel; \ + } + +# define INTEGRATOR_SHADOW_PATH_INIT(next_kernel) \ + INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel; +# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \ + { \ + INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel; \ + (void)current_kernel; \ + } +# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \ + { \ + INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0; \ + (void)current_kernel; \ + } + +#endif + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_state_template.h b/intern/cycles/kernel/integrator/integrator_state_template.h new file mode 100644 index 00000000000..41dd1bfcdbf --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_state_template.h @@ -0,0 +1,163 @@ + +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/************************************ Path State *****************************/ + +KERNEL_STRUCT_BEGIN(path) +/* Index of a pixel within the device render buffer where this path will write its result. + * To get an actual offset within the buffer the value needs to be multiplied by the + * `kernel_data.film.pass_stride`. + * + * The multiplication is delayed for later, so that state can use 32bit integer. */ +KERNEL_STRUCT_MEMBER(path, uint32_t, render_pixel_index, KERNEL_FEATURE_PATH_TRACING) +/* Current sample number. */ +KERNEL_STRUCT_MEMBER(path, uint16_t, sample, KERNEL_FEATURE_PATH_TRACING) +/* Current ray bounce depth. */ +KERNEL_STRUCT_MEMBER(path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING) +/* Current diffuse ray bounce depth. */ +KERNEL_STRUCT_MEMBER(path, uint16_t, diffuse_bounce, KERNEL_FEATURE_PATH_TRACING) +/* Current glossy ray bounce depth. */ +KERNEL_STRUCT_MEMBER(path, uint16_t, glossy_bounce, KERNEL_FEATURE_PATH_TRACING) +/* Current transmission ray bounce depth. */ +KERNEL_STRUCT_MEMBER(path, uint16_t, transmission_bounce, KERNEL_FEATURE_PATH_TRACING) +/* Current volume ray bounce depth. */ +KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounce, KERNEL_FEATURE_PATH_TRACING) +/* Current volume bounds ray bounce depth. */ +KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounds_bounce, KERNEL_FEATURE_PATH_TRACING) +/* Current transparent ray bounce depth. */ +KERNEL_STRUCT_MEMBER(path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING) +/* DeviceKernel bit indicating queued kernels. + * TODO: reduce size? */ +KERNEL_STRUCT_MEMBER(path, uint32_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING) +/* Random number generator seed. */ +KERNEL_STRUCT_MEMBER(path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING) +/* Random number dimension offset. */ +KERNEL_STRUCT_MEMBER(path, uint32_t, rng_offset, KERNEL_FEATURE_PATH_TRACING) +/* enum PathRayFlag */ +KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING) +/* Multiple importance sampling + * The PDF of BSDF sampling at the last scatter point, and distance to the + * last scatter point minus the last ray segment. This distance lets us + * compute the complete distance through transparent surfaces and volumes. */ +KERNEL_STRUCT_MEMBER(path, float, mis_ray_pdf, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(path, float, mis_ray_t, KERNEL_FEATURE_PATH_TRACING) +/* Filter glossy. */ +KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING) +/* Throughput. */ +KERNEL_STRUCT_MEMBER(path, float3, throughput, KERNEL_FEATURE_PATH_TRACING) +/* Ratio of throughput to distinguish diffuse and glossy render passes. */ +KERNEL_STRUCT_MEMBER(path, float3, diffuse_glossy_ratio, KERNEL_FEATURE_LIGHT_PASSES) +/* Denoising. */ +KERNEL_STRUCT_MEMBER(path, float3, denoising_feature_throughput, KERNEL_FEATURE_DENOISING) +/* Shader sorting. */ +/* TODO: compress as uint16? or leave out entirely and recompute key in sorting code? */ +KERNEL_STRUCT_MEMBER(path, uint32_t, shader_sort_key, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_END(path) + +/************************************** Ray ***********************************/ + +KERNEL_STRUCT_BEGIN(ray) +KERNEL_STRUCT_MEMBER(ray, float3, P, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(ray, float3, D, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(ray, float, t, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(ray, float, time, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(ray, float, dP, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(ray, float, dD, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_END(ray) + +/*************************** Intersection result ******************************/ + +/* Result from scene intersection. */ +KERNEL_STRUCT_BEGIN(isect) +KERNEL_STRUCT_MEMBER(isect, float, t, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(isect, float, u, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(isect, float, v, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(isect, int, prim, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(isect, int, object, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(isect, int, type, KERNEL_FEATURE_PATH_TRACING) +/* TODO: exclude for GPU. */ +KERNEL_STRUCT_MEMBER(isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_END(isect) + +/*************** Subsurface closure state for subsurface kernel ***************/ + +KERNEL_STRUCT_BEGIN(subsurface) +KERNEL_STRUCT_MEMBER(subsurface, float3, albedo, KERNEL_FEATURE_SUBSURFACE) +KERNEL_STRUCT_MEMBER(subsurface, float3, radius, KERNEL_FEATURE_SUBSURFACE) +KERNEL_STRUCT_MEMBER(subsurface, float, anisotropy, KERNEL_FEATURE_SUBSURFACE) +KERNEL_STRUCT_MEMBER(subsurface, float, roughness, KERNEL_FEATURE_SUBSURFACE) +KERNEL_STRUCT_END(subsurface) + +/********************************** Volume Stack ******************************/ + +KERNEL_STRUCT_BEGIN(volume_stack) +KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, object, KERNEL_FEATURE_VOLUME) +KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, shader, KERNEL_FEATURE_VOLUME) +KERNEL_STRUCT_END_ARRAY(volume_stack, INTEGRATOR_VOLUME_STACK_SIZE) + +/********************************* Shadow Path State **************************/ + +KERNEL_STRUCT_BEGIN(shadow_path) +/* Current ray bounce depth. */ +KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING) +/* Current transparent ray bounce depth. */ +KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING) +/* DeviceKernel bit indicating queued kernels. + * TODO: reduce size? */ +KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING) +/* enum PathRayFlag */ +KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING) +/* Throughput. */ +KERNEL_STRUCT_MEMBER(shadow_path, float3, throughput, KERNEL_FEATURE_PATH_TRACING) +/* Throughput for shadow pass. */ +KERNEL_STRUCT_MEMBER(shadow_path, float3, unshadowed_throughput, KERNEL_FEATURE_SHADOW_PASS) +/* Ratio of throughput to distinguish diffuse and glossy render passes. */ +KERNEL_STRUCT_MEMBER(shadow_path, float3, diffuse_glossy_ratio, KERNEL_FEATURE_LIGHT_PASSES) +/* Number of intersections found by ray-tracing. */ +KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, num_hits, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_END(shadow_path) + +/********************************** Shadow Ray *******************************/ + +KERNEL_STRUCT_BEGIN(shadow_ray) +KERNEL_STRUCT_MEMBER(shadow_ray, float3, P, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, float3, D, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, float, t, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, float, time, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, float, dP, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_END(shadow_ray) + +/*********************** Shadow Intersection result **************************/ + +/* Result from scene intersection. */ +KERNEL_STRUCT_BEGIN(shadow_isect) +KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, t, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, u, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, v, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, prim, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, object, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, type, KERNEL_FEATURE_PATH_TRACING) +/* TODO: exclude for GPU. */ +KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_END_ARRAY(shadow_isect, INTEGRATOR_SHADOW_ISECT_SIZE) + +/**************************** Shadow Volume Stack *****************************/ + +KERNEL_STRUCT_BEGIN(shadow_volume_stack) +KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, object, KERNEL_FEATURE_VOLUME) +KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, shader, KERNEL_FEATURE_VOLUME) +KERNEL_STRUCT_END_ARRAY(shadow_volume_stack, INTEGRATOR_VOLUME_STACK_SIZE) diff --git a/intern/cycles/kernel/integrator/integrator_state_util.h b/intern/cycles/kernel/integrator/integrator_state_util.h new file mode 100644 index 00000000000..cdf412fe22f --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_state_util.h @@ -0,0 +1,273 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/integrator/integrator_state.h" +#include "kernel/kernel_differential.h" + +CCL_NAMESPACE_BEGIN + +/* Ray */ + +ccl_device_forceinline void integrator_state_write_ray(INTEGRATOR_STATE_ARGS, + const Ray *ccl_restrict ray) +{ + INTEGRATOR_STATE_WRITE(ray, P) = ray->P; + INTEGRATOR_STATE_WRITE(ray, D) = ray->D; + INTEGRATOR_STATE_WRITE(ray, t) = ray->t; + INTEGRATOR_STATE_WRITE(ray, time) = ray->time; + INTEGRATOR_STATE_WRITE(ray, dP) = ray->dP; + INTEGRATOR_STATE_WRITE(ray, dD) = ray->dD; +} + +ccl_device_forceinline void integrator_state_read_ray(INTEGRATOR_STATE_CONST_ARGS, + Ray *ccl_restrict ray) +{ + ray->P = INTEGRATOR_STATE(ray, P); + ray->D = INTEGRATOR_STATE(ray, D); + ray->t = INTEGRATOR_STATE(ray, t); + ray->time = INTEGRATOR_STATE(ray, time); + ray->dP = INTEGRATOR_STATE(ray, dP); + ray->dD = INTEGRATOR_STATE(ray, dD); +} + +/* Shadow Ray */ + +ccl_device_forceinline void integrator_state_write_shadow_ray(INTEGRATOR_STATE_ARGS, + const Ray *ccl_restrict ray) +{ + INTEGRATOR_STATE_WRITE(shadow_ray, P) = ray->P; + INTEGRATOR_STATE_WRITE(shadow_ray, D) = ray->D; + INTEGRATOR_STATE_WRITE(shadow_ray, t) = ray->t; + INTEGRATOR_STATE_WRITE(shadow_ray, time) = ray->time; + INTEGRATOR_STATE_WRITE(shadow_ray, dP) = ray->dP; +} + +ccl_device_forceinline void integrator_state_read_shadow_ray(INTEGRATOR_STATE_CONST_ARGS, + Ray *ccl_restrict ray) +{ + ray->P = INTEGRATOR_STATE(shadow_ray, P); + ray->D = INTEGRATOR_STATE(shadow_ray, D); + ray->t = INTEGRATOR_STATE(shadow_ray, t); + ray->time = INTEGRATOR_STATE(shadow_ray, time); + ray->dP = INTEGRATOR_STATE(shadow_ray, dP); + ray->dD = differential_zero_compact(); +} + +/* Intersection */ + +ccl_device_forceinline void integrator_state_write_isect(INTEGRATOR_STATE_ARGS, + const Intersection *ccl_restrict isect) +{ + INTEGRATOR_STATE_WRITE(isect, t) = isect->t; + INTEGRATOR_STATE_WRITE(isect, u) = isect->u; + INTEGRATOR_STATE_WRITE(isect, v) = isect->v; + INTEGRATOR_STATE_WRITE(isect, object) = isect->object; + INTEGRATOR_STATE_WRITE(isect, prim) = isect->prim; + INTEGRATOR_STATE_WRITE(isect, type) = isect->type; +#ifdef __EMBREE__ + INTEGRATOR_STATE_WRITE(isect, Ng) = isect->Ng; +#endif +} + +ccl_device_forceinline void integrator_state_read_isect(INTEGRATOR_STATE_CONST_ARGS, + Intersection *ccl_restrict isect) +{ + isect->prim = INTEGRATOR_STATE(isect, prim); + isect->object = INTEGRATOR_STATE(isect, object); + isect->type = INTEGRATOR_STATE(isect, type); + isect->u = INTEGRATOR_STATE(isect, u); + isect->v = INTEGRATOR_STATE(isect, v); + isect->t = INTEGRATOR_STATE(isect, t); +#ifdef __EMBREE__ + isect->Ng = INTEGRATOR_STATE(isect, Ng); +#endif +} + +ccl_device_forceinline VolumeStack integrator_state_read_volume_stack(INTEGRATOR_STATE_CONST_ARGS, + int i) +{ + VolumeStack entry = {INTEGRATOR_STATE_ARRAY(volume_stack, i, object), + INTEGRATOR_STATE_ARRAY(volume_stack, i, shader)}; + return entry; +} + +ccl_device_forceinline void integrator_state_write_volume_stack(INTEGRATOR_STATE_ARGS, + int i, + VolumeStack entry) +{ + INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, i, object) = entry.object; + INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, i, shader) = entry.shader; +} + +ccl_device_forceinline bool integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_CONST_ARGS) +{ + return (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) ? + INTEGRATOR_STATE_ARRAY(volume_stack, 0, shader) == SHADER_NONE : + true; +} + +/* Shadow Intersection */ + +ccl_device_forceinline void integrator_state_write_shadow_isect( + INTEGRATOR_STATE_ARGS, const Intersection *ccl_restrict isect, const int index) +{ + INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, t) = isect->t; + INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, u) = isect->u; + INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, v) = isect->v; + INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, object) = isect->object; + INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, prim) = isect->prim; + INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, type) = isect->type; +#ifdef __EMBREE__ + INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, Ng) = isect->Ng; +#endif +} + +ccl_device_forceinline void integrator_state_read_shadow_isect(INTEGRATOR_STATE_CONST_ARGS, + Intersection *ccl_restrict isect, + const int index) +{ + isect->prim = INTEGRATOR_STATE_ARRAY(shadow_isect, index, prim); + isect->object = INTEGRATOR_STATE_ARRAY(shadow_isect, index, object); + isect->type = INTEGRATOR_STATE_ARRAY(shadow_isect, index, type); + isect->u = INTEGRATOR_STATE_ARRAY(shadow_isect, index, u); + isect->v = INTEGRATOR_STATE_ARRAY(shadow_isect, index, v); + isect->t = INTEGRATOR_STATE_ARRAY(shadow_isect, index, t); +#ifdef __EMBREE__ + isect->Ng = INTEGRATOR_STATE_ARRAY(shadow_isect, index, Ng); +#endif +} + +ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(INTEGRATOR_STATE_ARGS) +{ + if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) { + for (int i = 0; i < INTEGRATOR_VOLUME_STACK_SIZE; i++) { + INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, i, object) = INTEGRATOR_STATE_ARRAY( + volume_stack, i, object); + INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, i, shader) = INTEGRATOR_STATE_ARRAY( + volume_stack, i, shader); + } + } +} + +ccl_device_forceinline VolumeStack +integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_CONST_ARGS, int i) +{ + VolumeStack entry = {INTEGRATOR_STATE_ARRAY(shadow_volume_stack, i, object), + INTEGRATOR_STATE_ARRAY(shadow_volume_stack, i, shader)}; + return entry; +} + +ccl_device_forceinline bool integrator_state_shadow_volume_stack_is_empty( + INTEGRATOR_STATE_CONST_ARGS) +{ + return (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) ? + INTEGRATOR_STATE_ARRAY(shadow_volume_stack, 0, shader) == SHADER_NONE : + true; +} + +ccl_device_forceinline void integrator_state_write_shadow_volume_stack(INTEGRATOR_STATE_ARGS, + int i, + VolumeStack entry) +{ + INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, i, object) = entry.object; + INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, i, shader) = entry.shader; +} + +#if defined(__KERNEL_GPU__) +ccl_device_inline void integrator_state_copy_only(const IntegratorState to_state, + const IntegratorState state) +{ + int index; + + /* Rely on the compiler to optimize out unused assignments and `while(false)`'s. */ + +# define KERNEL_STRUCT_BEGIN(name) \ + index = 0; \ + do { + +# define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) \ + if (kernel_integrator_state.parent_struct.name != nullptr) { \ + kernel_integrator_state.parent_struct.name[to_state] = \ + kernel_integrator_state.parent_struct.name[state]; \ + } + +# define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) \ + if (kernel_integrator_state.parent_struct[index].name != nullptr) { \ + kernel_integrator_state.parent_struct[index].name[to_state] = \ + kernel_integrator_state.parent_struct[index].name[state]; \ + } + +# define KERNEL_STRUCT_END(name) \ + } \ + while (false) \ + ; + +# define KERNEL_STRUCT_END_ARRAY(name, array_size) \ + ++index; \ + } \ + while (index < array_size) \ + ; + +# include "kernel/integrator/integrator_state_template.h" + +# undef KERNEL_STRUCT_BEGIN +# undef KERNEL_STRUCT_MEMBER +# undef KERNEL_STRUCT_ARRAY_MEMBER +# undef KERNEL_STRUCT_END +# undef KERNEL_STRUCT_END_ARRAY +} + +ccl_device_inline void integrator_state_move(const IntegratorState to_state, + const IntegratorState state) +{ + integrator_state_copy_only(to_state, state); + + INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0; + INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0; +} + +#endif + +/* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths + * after this function. */ +ccl_device_inline void integrator_state_shadow_catcher_split(INTEGRATOR_STATE_ARGS) +{ +#if defined(__KERNEL_GPU__) + const IntegratorState to_state = atomic_fetch_and_add_uint32( + &kernel_integrator_state.next_shadow_catcher_path_index[0], 1); + + integrator_state_copy_only(to_state, state); + + kernel_integrator_state.path.flag[to_state] |= PATH_RAY_SHADOW_CATCHER_PASS; + + /* Sanity check: expect to split in the intersect-closest kernel, where there is no shadow ray + * and no sorting yet. */ + kernel_assert(INTEGRATOR_STATE(shadow_path, queued_kernel) == 0); + kernel_assert(kernel_integrator_state.sort_key_counter[INTEGRATOR_STATE(path, queued_kernel)] == + nullptr); +#else + + IntegratorStateCPU *ccl_restrict split_state = state + 1; + + *split_state = *state; + + split_state->path.flag |= PATH_RAY_SHADOW_CATCHER_PASS; +#endif +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_subsurface.h b/intern/cycles/kernel/integrator/integrator_subsurface.h new file mode 100644 index 00000000000..9490738404e --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_subsurface.h @@ -0,0 +1,623 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "kernel/kernel_path_state.h" +#include "kernel/kernel_projection.h" +#include "kernel/kernel_shader.h" + +#include "kernel/bvh/bvh.h" + +#include "kernel/closure/alloc.h" +#include "kernel/closure/bsdf_diffuse.h" +#include "kernel/closure/bsdf_principled_diffuse.h" +#include "kernel/closure/bssrdf.h" +#include "kernel/closure/volume.h" + +#include "kernel/integrator/integrator_intersect_volume_stack.h" + +CCL_NAMESPACE_BEGIN + +#ifdef __SUBSURFACE__ + +ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS, ShaderData *sd, const ShaderClosure *sc) +{ + /* We should never have two consecutive BSSRDF bounces, the second one should + * be converted to a diffuse BSDF to avoid this. */ + kernel_assert(!(INTEGRATOR_STATE(path, flag) & PATH_RAY_DIFFUSE_ANCESTOR)); + + /* Setup path state for intersect_subsurface kernel. */ + const Bssrdf *bssrdf = (const Bssrdf *)sc; + + /* Setup ray into surface. */ + INTEGRATOR_STATE_WRITE(ray, P) = sd->P; + INTEGRATOR_STATE_WRITE(ray, D) = sd->N; + INTEGRATOR_STATE_WRITE(ray, t) = FLT_MAX; + INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP); + INTEGRATOR_STATE_WRITE(ray, dD) = differential_zero_compact(); + + /* Pass along object info, reusing isect to save memory. */ + INTEGRATOR_STATE_WRITE(isect, Ng) = sd->Ng; + INTEGRATOR_STATE_WRITE(isect, object) = sd->object; + + /* Pass BSSRDF parameters. */ + const uint32_t path_flag = INTEGRATOR_STATE_WRITE(path, flag); + INTEGRATOR_STATE_WRITE(path, flag) = (path_flag & ~PATH_RAY_CAMERA) | PATH_RAY_SUBSURFACE; + INTEGRATOR_STATE_WRITE(path, throughput) *= shader_bssrdf_sample_weight(sd, sc); + + if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { + if (INTEGRATOR_STATE(path, bounce) == 0) { + INTEGRATOR_STATE_WRITE(path, diffuse_glossy_ratio) = one_float3(); + } + } + + INTEGRATOR_STATE_WRITE(subsurface, albedo) = bssrdf->albedo; + INTEGRATOR_STATE_WRITE(subsurface, radius) = bssrdf->radius; + INTEGRATOR_STATE_WRITE(subsurface, roughness) = bssrdf->roughness; + INTEGRATOR_STATE_WRITE(subsurface, anisotropy) = bssrdf->anisotropy; + + return LABEL_SUBSURFACE_SCATTER; +} + +ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS, ShaderData *sd) +{ + /* Get bump mapped normal from shader evaluation at exit point. */ + float3 N = sd->N; + if (sd->flag & SD_HAS_BSSRDF_BUMP) { + N = shader_bssrdf_normal(sd); + } + + /* Setup diffuse BSDF at the exit point. This replaces shader_eval_surface. */ + sd->flag &= ~SD_CLOSURE_FLAGS; + sd->num_closure = 0; + sd->num_closure_left = kernel_data.max_closures; + + const float3 weight = one_float3(); + const float roughness = INTEGRATOR_STATE(subsurface, roughness); + +# ifdef __PRINCIPLED__ + if (roughness != FLT_MAX) { + PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc( + sd, sizeof(PrincipledDiffuseBsdf), weight); + + if (bsdf) { + bsdf->N = N; + bsdf->roughness = roughness; + sd->flag |= bsdf_principled_diffuse_setup(bsdf); + + /* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes + * can recognize it as not being a regular Disney principled diffuse closure */ + bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID; + } + } + else +# endif /* __PRINCIPLED__ */ + { + DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight); + + if (bsdf) { + bsdf->N = N; + sd->flag |= bsdf_diffuse_setup(bsdf); + + /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes + * can recognize it as not being a regular diffuse closure */ + bsdf->type = CLOSURE_BSDF_BSSRDF_ID; + } + } +} + +/* Random walk subsurface scattering. + * + * "Practical and Controllable Subsurface Scattering for Production Path + * Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */ + +/* Support for anisotropy from: + * "Path Traced Subsurface Scattering using Anisotropic Phase Functions + * and Non-Exponential Free Flights". + * Magnus Wrenninge, Ryusuke Villemin, Christophe Hery. + * https://graphics.pixar.com/library/PathTracedSubsurface/ */ + +ccl_device void subsurface_random_walk_remap( + const float albedo, const float d, float g, float *sigma_t, float *alpha) +{ + /* Compute attenuation and scattering coefficients from albedo. */ + const float g2 = g * g; + const float g3 = g2 * g; + const float g4 = g3 * g; + const float g5 = g4 * g; + const float g6 = g5 * g; + const float g7 = g6 * g; + + const float A = 1.8260523782f + -1.28451056436f * g + -1.79904629312f * g2 + + 9.19393289202f * g3 + -22.8215585862f * g4 + 32.0234874259f * g5 + + -23.6264803333f * g6 + 7.21067002658f * g7; + const float B = 4.98511194385f + + 0.127355959438f * + expf(31.1491581433f * g + -201.847017512f * g2 + 841.576016723f * g3 + + -2018.09288505f * g4 + 2731.71560286f * g5 + -1935.41424244f * g6 + + 559.009054474f * g7); + const float C = 1.09686102424f + -0.394704063468f * g + 1.05258115941f * g2 + + -8.83963712726f * g3 + 28.8643230661f * g4 + -46.8802913581f * g5 + + 38.5402837518f * g6 + -12.7181042538f * g7; + const float D = 0.496310210422f + 0.360146581622f * g + -2.15139309747f * g2 + + 17.8896899217f * g3 + -55.2984010333f * g4 + 82.065982243f * g5 + + -58.5106008578f * g6 + 15.8478295021f * g7; + const float E = 4.23190299701f + + 0.00310603949088f * + expf(76.7316253952f * g + -594.356773233f * g2 + 2448.8834203f * g3 + + -5576.68528998f * g4 + 7116.60171912f * g5 + -4763.54467887f * g6 + + 1303.5318055f * g7); + const float F = 2.40602999408f + -2.51814844609f * g + 9.18494908356f * g2 + + -79.2191708682f * g3 + 259.082868209f * g4 + -403.613804597f * g5 + + 302.85712436f * g6 + -87.4370473567f * g7; + + const float blend = powf(albedo, 0.25f); + + *alpha = (1.0f - blend) * A * powf(atanf(B * albedo), C) + + blend * D * powf(atanf(E * albedo), F); + *alpha = clamp(*alpha, 0.0f, 0.999999f); // because of numerical precision + + float sigma_t_prime = 1.0f / fmaxf(d, 1e-16f); + *sigma_t = sigma_t_prime / (1.0f - g); +} + +ccl_device void subsurface_random_walk_coefficients(const float3 albedo, + const float3 radius, + const float anisotropy, + float3 *sigma_t, + float3 *alpha, + float3 *throughput) +{ + float sigma_t_x, sigma_t_y, sigma_t_z; + float alpha_x, alpha_y, alpha_z; + + subsurface_random_walk_remap(albedo.x, radius.x, anisotropy, &sigma_t_x, &alpha_x); + subsurface_random_walk_remap(albedo.y, radius.y, anisotropy, &sigma_t_y, &alpha_y); + subsurface_random_walk_remap(albedo.z, radius.z, anisotropy, &sigma_t_z, &alpha_z); + + /* Throughput already contains closure weight at this point, which includes the + * albedo, as well as closure mixing and Fresnel weights. Divide out the albedo + * which will be added through scattering. */ + *throughput = safe_divide_color(*throughput, albedo); + + /* With low albedo values (like 0.025) we get diffusion_length 1.0 and + * infinite phase functions. To avoid a sharp discontinuity as we go from + * such values to 0.0, increase alpha and reduce the throughput to compensate. */ + const float min_alpha = 0.2f; + if (alpha_x < min_alpha) { + (*throughput).x *= alpha_x / min_alpha; + alpha_x = min_alpha; + } + if (alpha_y < min_alpha) { + (*throughput).y *= alpha_y / min_alpha; + alpha_y = min_alpha; + } + if (alpha_z < min_alpha) { + (*throughput).z *= alpha_z / min_alpha; + alpha_z = min_alpha; + } + + *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z); + *alpha = make_float3(alpha_x, alpha_y, alpha_z); +} + +/* References for Dwivedi sampling: + * + * [1] "A Zero-variance-based Sampling Scheme for Monte Carlo Subsurface Scattering" + * by Jaroslav Křivánek and Eugene d'Eon (SIGGRAPH 2014) + * https://cgg.mff.cuni.cz/~jaroslav/papers/2014-zerovar/ + * + * [2] "Improving the Dwivedi Sampling Scheme" + * by Johannes Meng, Johannes Hanika, and Carsten Dachsbacher (EGSR 2016) + * https://cg.ivd.kit.edu/1951.php + * + * [3] "Zero-Variance Theory for Efficient Subsurface Scattering" + * by Eugene d'Eon and Jaroslav Křivánek (SIGGRAPH 2020) + * https://iliyan.com/publications/RenderingCourse2020 + */ + +ccl_device_forceinline float eval_phase_dwivedi(float v, float phase_log, float cos_theta) +{ + /* Eq. 9 from [2] using precomputed log((v + 1) / (v - 1)) */ + return 1.0f / ((v - cos_theta) * phase_log); +} + +ccl_device_forceinline float sample_phase_dwivedi(float v, float phase_log, float rand) +{ + /* Based on Eq. 10 from [2]: `v - (v + 1) * pow((v - 1) / (v + 1), rand)` + * Since we're already pre-computing `phase_log = log((v + 1) / (v - 1))` for the evaluation, + * we can implement the power function like this. */ + return v - (v + 1.0f) * expf(-rand * phase_log); +} + +ccl_device_forceinline float diffusion_length_dwivedi(float alpha) +{ + /* Eq. 67 from [3] */ + return 1.0f / sqrtf(1.0f - powf(alpha, 2.44294f - 0.0215813f * alpha + 0.578637f / alpha)); +} + +ccl_device_forceinline float3 direction_from_cosine(float3 D, float cos_theta, float randv) +{ + float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta); + float phi = M_2PI_F * randv; + float3 dir = make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cos_theta); + + float3 T, B; + make_orthonormals(D, &T, &B); + return dir.x * T + dir.y * B + dir.z * D; +} + +ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t, + float t, + bool hit, + float3 *transmittance) +{ + float3 T = volume_color_transmittance(sigma_t, t); + if (transmittance) { + *transmittance = T; + } + return hit ? T : sigma_t * T; +} + +/* Define the below variable to get the similarity code active, + * and the value represents the cutoff level */ +# define SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL 9 + +ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS, + RNGState rng_state, + Ray &ray, + LocalIntersection &ss_isect) +{ + float bssrdf_u, bssrdf_v; + path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + + const float3 P = INTEGRATOR_STATE(ray, P); + const float3 N = INTEGRATOR_STATE(ray, D); + const float ray_dP = INTEGRATOR_STATE(ray, dP); + const float time = INTEGRATOR_STATE(ray, time); + const float3 Ng = INTEGRATOR_STATE(isect, Ng); + const int object = INTEGRATOR_STATE(isect, object); + + /* Sample diffuse surface scatter into the object. */ + float3 D; + float pdf; + sample_cos_hemisphere(-N, bssrdf_u, bssrdf_v, &D, &pdf); + if (dot(-Ng, D) <= 0.0f) { + return false; + } + + /* Setup ray. */ + ray.P = ray_offset(P, -Ng); + ray.D = D; + ray.t = FLT_MAX; + ray.time = time; + ray.dP = ray_dP; + ray.dD = differential_zero_compact(); + +# ifndef __KERNEL_OPTIX__ + /* Compute or fetch object transforms. */ + Transform ob_itfm ccl_optional_struct_init; + Transform ob_tfm = object_fetch_transform_motion_test(kg, object, time, &ob_itfm); +# endif + + /* Convert subsurface to volume coefficients. + * The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */ + const float3 albedo = INTEGRATOR_STATE(subsurface, albedo); + const float3 radius = INTEGRATOR_STATE(subsurface, radius); + const float anisotropy = INTEGRATOR_STATE(subsurface, anisotropy); + + float3 sigma_t, alpha; + float3 throughput = INTEGRATOR_STATE_WRITE(path, throughput); + subsurface_random_walk_coefficients(albedo, radius, anisotropy, &sigma_t, &alpha, &throughput); + float3 sigma_s = sigma_t * alpha; + + /* Theoretically it should be better to use the exact alpha for the channel we're sampling at + * each bounce, but in practice there doesn't seem to be a noticeable difference in exchange + * for making the code significantly more complex and slower (if direction sampling depends on + * the sampled channel, we need to compute its PDF per-channel and consider it for MIS later on). + * + * Since the strength of the guided sampling increases as alpha gets lower, using a value that + * is too low results in fireflies while one that's too high just gives a bit more noise. + * Therefore, the code here uses the highest of the three albedos to be safe. */ + const float diffusion_length = diffusion_length_dwivedi(max3(alpha)); + + if (diffusion_length == 1.0f) { + /* With specific values of alpha the length might become 1, which in asymptotic makes phase to + * be infinite. After first bounce it will cause throughput to be 0. Do early output, avoiding + * numerical issues and extra unneeded work. */ + return false; + } + + /* Precompute term for phase sampling. */ + const float phase_log = logf((diffusion_length + 1.0f) / (diffusion_length - 1.0f)); + + /* Modify state for RNGs, decorrelated from other paths. */ + rng_state.rng_hash = cmj_hash(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef); + + /* Random walk until we hit the surface again. */ + bool hit = false; + bool have_opposite_interface = false; + float opposite_distance = 0.0f; + + /* Todo: Disable for alpha>0.999 or so? */ + /* Our heuristic, a compromise between guiding and classic. */ + const float guided_fraction = 1.0f - fmaxf(0.5f, powf(fabsf(anisotropy), 0.125f)); + +# ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL + float3 sigma_s_star = sigma_s * (1.0f - anisotropy); + float3 sigma_t_star = sigma_t - sigma_s + sigma_s_star; + float3 sigma_t_org = sigma_t; + float3 sigma_s_org = sigma_s; + const float anisotropy_org = anisotropy; + const float guided_fraction_org = guided_fraction; +# endif + + for (int bounce = 0; bounce < BSSRDF_MAX_BOUNCES; bounce++) { + /* Advance random number offset. */ + rng_state.rng_offset += PRNG_BOUNCE_NUM; + +# ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL + // shadow with local variables according to depth + float anisotropy, guided_fraction; + float3 sigma_s, sigma_t; + if (bounce <= SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL) { + anisotropy = anisotropy_org; + guided_fraction = guided_fraction_org; + sigma_t = sigma_t_org; + sigma_s = sigma_s_org; + } + else { + anisotropy = 0.0f; + guided_fraction = 0.75f; // back to isotropic heuristic from Blender + sigma_t = sigma_t_star; + sigma_s = sigma_s_star; + } +# endif + + /* Sample color channel, use MIS with balance heuristic. */ + float rphase = path_state_rng_1D(kg, &rng_state, PRNG_PHASE_CHANNEL); + float3 channel_pdf; + int channel = volume_sample_channel(alpha, throughput, rphase, &channel_pdf); + float sample_sigma_t = volume_channel_get(sigma_t, channel); + float randt = path_state_rng_1D(kg, &rng_state, PRNG_SCATTER_DISTANCE); + + /* We need the result of the raycast to compute the full guided PDF, so just remember the + * relevant terms to avoid recomputing them later. */ + float backward_fraction = 0.0f; + float forward_pdf_factor = 0.0f; + float forward_stretching = 1.0f; + float backward_pdf_factor = 0.0f; + float backward_stretching = 1.0f; + + /* For the initial ray, we already know the direction, so just do classic distance sampling. */ + if (bounce > 0) { + /* Decide whether we should use guided or classic sampling. */ + bool guided = (path_state_rng_1D(kg, &rng_state, PRNG_LIGHT_TERMINATE) < guided_fraction); + + /* Determine if we want to sample away from the incoming interface. + * This only happens if we found a nearby opposite interface, and the probability for it + * depends on how close we are to it already. + * This probability term comes from the recorded presentation of [3]. */ + bool guide_backward = false; + if (have_opposite_interface) { + /* Compute distance of the random walk between the tangent plane at the starting point + * and the assumed opposite interface (the parallel plane that contains the point we + * found in our ray query for the opposite side). */ + float x = clamp(dot(ray.P - P, -N), 0.0f, opposite_distance); + backward_fraction = 1.0f / + (1.0f + expf((opposite_distance - 2.0f * x) / diffusion_length)); + guide_backward = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE) < backward_fraction; + } + + /* Sample scattering direction. */ + float scatter_u, scatter_v; + path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &scatter_u, &scatter_v); + float cos_theta; + float hg_pdf; + if (guided) { + cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, scatter_u); + /* The backwards guiding distribution is just mirrored along sd->N, so swapping the + * sign here is enough to sample from that instead. */ + if (guide_backward) { + cos_theta = -cos_theta; + } + float3 newD = direction_from_cosine(N, cos_theta, scatter_v); + hg_pdf = single_peaked_henyey_greenstein(dot(ray.D, newD), anisotropy); + ray.D = newD; + } + else { + float3 newD = henyey_greenstrein_sample(ray.D, anisotropy, scatter_u, scatter_v, &hg_pdf); + cos_theta = dot(newD, N); + ray.D = newD; + } + + /* Compute PDF factor caused by phase sampling (as the ratio of guided / classic). + * Since phase sampling is channel-independent, we can get away with applying a factor + * to the guided PDF, which implicitly means pulling out the classic PDF term and letting + * it cancel with an equivalent term in the numerator of the full estimator. + * For the backward PDF, we again reuse the same probability distribution with a sign swap. + */ + forward_pdf_factor = M_1_2PI_F * eval_phase_dwivedi(diffusion_length, phase_log, cos_theta) / + hg_pdf; + backward_pdf_factor = M_1_2PI_F * + eval_phase_dwivedi(diffusion_length, phase_log, -cos_theta) / hg_pdf; + + /* Prepare distance sampling. + * For the backwards case, this also needs the sign swapped since now directions against + * sd->N (and therefore with negative cos_theta) are preferred. */ + forward_stretching = (1.0f - cos_theta / diffusion_length); + backward_stretching = (1.0f + cos_theta / diffusion_length); + if (guided) { + sample_sigma_t *= guide_backward ? backward_stretching : forward_stretching; + } + } + + /* Sample direction along ray. */ + float t = -logf(1.0f - randt) / sample_sigma_t; + + /* On the first bounce, we use the raycast to check if the opposite side is nearby. + * If yes, we will later use backwards guided sampling in order to have a decent + * chance of connecting to it. + * Todo: Maybe use less than 10 times the mean free path? */ + ray.t = (bounce == 0) ? max(t, 10.0f / (min3(sigma_t))) : t; + scene_intersect_local(kg, &ray, &ss_isect, object, NULL, 1); + hit = (ss_isect.num_hits > 0); + + if (hit) { +# ifdef __KERNEL_OPTIX__ + /* t is always in world space with OptiX. */ + ray.t = ss_isect.hits[0].t; +# else + /* Compute world space distance to surface hit. */ + float3 D = transform_direction(&ob_itfm, ray.D); + D = normalize(D) * ss_isect.hits[0].t; + ray.t = len(transform_direction(&ob_tfm, D)); +# endif + } + + if (bounce == 0) { + /* Check if we hit the opposite side. */ + if (hit) { + have_opposite_interface = true; + opposite_distance = dot(ray.P + ray.t * ray.D - P, -N); + } + /* Apart from the opposite side check, we were supposed to only trace up to distance t, + * so check if there would have been a hit in that case. */ + hit = ray.t < t; + } + + /* Use the distance to the exit point for the throughput update if we found one. */ + if (hit) { + t = ray.t; + } + else if (bounce == 0) { + /* Restore original position if nothing was hit after the first bounce, + * without the ray_offset() that was added to avoid self-intersection. + * Otherwise if that offset is relatively large compared to the scattering + * radius, we never go back up high enough to exit the surface. */ + ray.P = P; + } + + /* Advance to new scatter location. */ + ray.P += t * ray.D; + + float3 transmittance; + float3 pdf = subsurface_random_walk_pdf(sigma_t, t, hit, &transmittance); + if (bounce > 0) { + /* Compute PDF just like we do for classic sampling, but with the stretched sigma_t. */ + float3 guided_pdf = subsurface_random_walk_pdf(forward_stretching * sigma_t, t, hit, NULL); + + if (have_opposite_interface) { + /* First step of MIS: Depending on geometry we might have two methods for guided + * sampling, so perform MIS between them. */ + float3 back_pdf = subsurface_random_walk_pdf(backward_stretching * sigma_t, t, hit, NULL); + guided_pdf = mix( + guided_pdf * forward_pdf_factor, back_pdf * backward_pdf_factor, backward_fraction); + } + else { + /* Just include phase sampling factor otherwise. */ + guided_pdf *= forward_pdf_factor; + } + + /* Now we apply the MIS balance heuristic between the classic and guided sampling. */ + pdf = mix(pdf, guided_pdf, guided_fraction); + } + + /* Finally, we're applying MIS again to combine the three color channels. + * Altogether, the MIS computation combines up to nine different estimators: + * {classic, guided, backward_guided} x {r, g, b} */ + throughput *= (hit ? transmittance : sigma_s * transmittance) / dot(channel_pdf, pdf); + + if (hit) { + /* If we hit the surface, we are done. */ + break; + } + else if (throughput.x < VOLUME_THROUGHPUT_EPSILON && + throughput.y < VOLUME_THROUGHPUT_EPSILON && + throughput.z < VOLUME_THROUGHPUT_EPSILON) { + /* Avoid unnecessary work and precision issue when throughput gets really small. */ + break; + } + } + + if (hit) { + kernel_assert(isfinite3_safe(throughput)); + INTEGRATOR_STATE_WRITE(path, throughput) = throughput; + } + + return hit; +} + +ccl_device_inline bool subsurface_scatter(INTEGRATOR_STATE_ARGS) +{ + RNGState rng_state; + path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state); + + Ray ray ccl_optional_struct_init; + LocalIntersection ss_isect ccl_optional_struct_init; + + if (!subsurface_random_walk(INTEGRATOR_STATE_PASS, rng_state, ray, ss_isect)) { + return false; + } + +# ifdef __VOLUME__ + /* Update volume stack if needed. */ + if (kernel_data.integrator.use_volumes) { + const int object = intersection_get_object(kg, &ss_isect.hits[0]); + const int object_flag = kernel_tex_fetch(__object_flag, object); + + if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) { + float3 P = INTEGRATOR_STATE(ray, P); + const float3 Ng = INTEGRATOR_STATE(isect, Ng); + const float3 offset_P = ray_offset(P, -Ng); + + integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_PASS, offset_P, ray.P); + } + } +# endif /* __VOLUME__ */ + + /* Pretend ray is coming from the outside towards the exit point. This ensures + * correct front/back facing normals. + * TODO: find a more elegant solution? */ + ray.P += ray.D * ray.t * 2.0f; + ray.D = -ray.D; + + integrator_state_write_isect(INTEGRATOR_STATE_PASS, &ss_isect.hits[0]); + integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray); + + /* Advanced random number offset for bounce. */ + INTEGRATOR_STATE_WRITE(path, rng_offset) += PRNG_BOUNCE_NUM; + + const int shader = intersection_get_shader(kg, &ss_isect.hits[0]); + const int shader_flags = kernel_tex_fetch(__shaders, shader).flags; + if ((shader_flags & SD_HAS_RAYTRACE) || (kernel_data.film.pass_ao != PASS_UNUSED)) { + INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, + shader); + } + else { + INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, + shader); + } + + return true; +} + +#endif /* __SUBSURFACE__ */ + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_volume_stack.h b/intern/cycles/kernel/integrator/integrator_volume_stack.h new file mode 100644 index 00000000000..d53070095f0 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_volume_stack.h @@ -0,0 +1,223 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +CCL_NAMESPACE_BEGIN + +/* Volume Stack + * + * This is an array of object/shared ID's that the current segment of the path + * is inside of. */ + +template<typename StackReadOp, typename StackWriteOp> +ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, + const ShaderData *sd, + StackReadOp stack_read, + StackWriteOp stack_write) +{ + /* todo: we should have some way for objects to indicate if they want the + * world shader to work inside them. excluding it by default is problematic + * because non-volume objects can't be assumed to be closed manifolds */ + if (!(sd->flag & SD_HAS_VOLUME)) { + return; + } + + if (sd->flag & SD_BACKFACING) { + /* Exit volume object: remove from stack. */ + for (int i = 0;; i++) { + VolumeStack entry = stack_read(i); + if (entry.shader == SHADER_NONE) { + break; + } + + if (entry.object == sd->object) { + /* Shift back next stack entries. */ + do { + entry = stack_read(i + 1); + stack_write(i, entry); + i++; + } while (entry.shader != SHADER_NONE); + + return; + } + } + } + else { + /* Enter volume object: add to stack. */ + int i; + for (i = 0;; i++) { + VolumeStack entry = stack_read(i); + if (entry.shader == SHADER_NONE) { + break; + } + + /* Already in the stack? then we have nothing to do. */ + if (entry.object == sd->object) { + return; + } + } + + /* If we exceed the stack limit, ignore. */ + if (i >= VOLUME_STACK_SIZE - 1) { + return; + } + + /* Add to the end of the stack. */ + const VolumeStack new_entry = {sd->object, sd->shader}; + const VolumeStack empty_entry = {OBJECT_NONE, SHADER_NONE}; + stack_write(i, new_entry); + stack_write(i + 1, empty_entry); + } +} + +ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, const ShaderData *sd) +{ + volume_stack_enter_exit( + INTEGRATOR_STATE_PASS, + sd, + [=](const int i) { return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i); }, + [=](const int i, const VolumeStack entry) { + integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, i, entry); + }); +} + +ccl_device void shadow_volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, const ShaderData *sd) +{ + volume_stack_enter_exit( + INTEGRATOR_STATE_PASS, + sd, + [=](const int i) { + return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i); + }, + [=](const int i, const VolumeStack entry) { + integrator_state_write_shadow_volume_stack(INTEGRATOR_STATE_PASS, i, entry); + }); +} + +/* Clean stack after the last bounce. + * + * It is expected that all volumes are closed manifolds, so at the time when ray + * hits nothing (for example, it is a last bounce which goes to environment) the + * only expected volume in the stack is the world's one. All the rest volume + * entries should have been exited already. + * + * This isn't always true because of ray intersection precision issues, which + * could lead us to an infinite non-world volume in the stack, causing render + * artifacts. + * + * Use this function after the last bounce to get rid of all volumes apart from + * the world's one after the last bounce to avoid render artifacts. + */ +ccl_device_inline void volume_stack_clean(INTEGRATOR_STATE_ARGS) +{ + if (kernel_data.background.volume_shader != SHADER_NONE) { + /* Keep the world's volume in stack. */ + INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 1, shader) = SHADER_NONE; + } + else { + INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 0, shader) = SHADER_NONE; + } +} + +template<typename StackReadOp> +ccl_device float volume_stack_step_size(INTEGRATOR_STATE_ARGS, StackReadOp stack_read) +{ + float step_size = FLT_MAX; + + for (int i = 0;; i++) { + VolumeStack entry = stack_read(i); + if (entry.shader == SHADER_NONE) { + break; + } + + int shader_flag = kernel_tex_fetch(__shaders, (entry.shader & SHADER_MASK)).flags; + + bool heterogeneous = false; + + if (shader_flag & SD_HETEROGENEOUS_VOLUME) { + heterogeneous = true; + } + else if (shader_flag & SD_NEED_VOLUME_ATTRIBUTES) { + /* We want to render world or objects without any volume grids + * as homogeneous, but can only verify this at run-time since other + * heterogeneous volume objects may be using the same shader. */ + int object = entry.object; + if (object != OBJECT_NONE) { + int object_flag = kernel_tex_fetch(__object_flag, object); + if (object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) { + heterogeneous = true; + } + } + } + + if (heterogeneous) { + float object_step_size = object_volume_step_size(kg, entry.object); + object_step_size *= kernel_data.integrator.volume_step_rate; + step_size = fminf(object_step_size, step_size); + } + } + + return step_size; +} + +typedef enum VolumeSampleMethod { + VOLUME_SAMPLE_NONE = 0, + VOLUME_SAMPLE_DISTANCE = (1 << 0), + VOLUME_SAMPLE_EQUIANGULAR = (1 << 1), + VOLUME_SAMPLE_MIS = (VOLUME_SAMPLE_DISTANCE | VOLUME_SAMPLE_EQUIANGULAR), +} VolumeSampleMethod; + +ccl_device VolumeSampleMethod volume_stack_sample_method(INTEGRATOR_STATE_ARGS) +{ + VolumeSampleMethod method = VOLUME_SAMPLE_NONE; + + for (int i = 0;; i++) { + VolumeStack entry = integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i); + if (entry.shader == SHADER_NONE) { + break; + } + + int shader_flag = kernel_tex_fetch(__shaders, (entry.shader & SHADER_MASK)).flags; + + if (shader_flag & SD_VOLUME_MIS) { + /* Multiple importance sampling. */ + return VOLUME_SAMPLE_MIS; + } + else if (shader_flag & SD_VOLUME_EQUIANGULAR) { + /* Distance + equiangular sampling -> multiple importance sampling. */ + if (method == VOLUME_SAMPLE_DISTANCE) { + return VOLUME_SAMPLE_MIS; + } + + /* Only equiangular sampling. */ + method = VOLUME_SAMPLE_EQUIANGULAR; + } + else { + /* Distance + equiangular sampling -> multiple importance sampling. */ + if (method == VOLUME_SAMPLE_EQUIANGULAR) { + return VOLUME_SAMPLE_MIS; + } + + /* Distance sampling only. */ + method = VOLUME_SAMPLE_DISTANCE; + } + } + + return method; +} + +CCL_NAMESPACE_END |