Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/integrator')
-rw-r--r--intern/cycles/kernel/integrator/integrator_init_from_bake.h181
-rw-r--r--intern/cycles/kernel/integrator/integrator_init_from_camera.h120
-rw-r--r--intern/cycles/kernel/integrator/integrator_intersect_closest.h248
-rw-r--r--intern/cycles/kernel/integrator/integrator_intersect_shadow.h144
-rw-r--r--intern/cycles/kernel/integrator/integrator_intersect_subsurface.h36
-rw-r--r--intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h198
-rw-r--r--intern/cycles/kernel/integrator/integrator_megakernel.h93
-rw-r--r--intern/cycles/kernel/integrator/integrator_shade_background.h215
-rw-r--r--intern/cycles/kernel/integrator/integrator_shade_light.h126
-rw-r--r--intern/cycles/kernel/integrator/integrator_shade_shadow.h182
-rw-r--r--intern/cycles/kernel/integrator/integrator_shade_surface.h502
-rw-r--r--intern/cycles/kernel/integrator/integrator_shade_volume.h1015
-rw-r--r--intern/cycles/kernel/integrator/integrator_state.h185
-rw-r--r--intern/cycles/kernel/integrator/integrator_state_flow.h144
-rw-r--r--intern/cycles/kernel/integrator/integrator_state_template.h163
-rw-r--r--intern/cycles/kernel/integrator/integrator_state_util.h273
-rw-r--r--intern/cycles/kernel/integrator/integrator_subsurface.h623
-rw-r--r--intern/cycles/kernel/integrator/integrator_volume_stack.h223
18 files changed, 4671 insertions, 0 deletions
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_bake.h b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
new file mode 100644
index 00000000000..4898ff936c6
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/kernel_accumulate.h"
+#include "kernel/kernel_adaptive_sampling.h"
+#include "kernel/kernel_camera.h"
+#include "kernel/kernel_path_state.h"
+#include "kernel/kernel_random.h"
+
+#include "kernel/geom/geom.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* This helps with AA but it's not the real solution as it does not AA the geometry
+ * but it's better than nothing, thus committed. */
+ccl_device_inline float bake_clamp_mirror_repeat(float u, float max)
+{
+ /* use mirror repeat (like opengl texture) so that if the barycentric
+ * coordinate goes past the end of the triangle it is not always clamped
+ * to the same value, gives ugly patterns */
+ u /= max;
+ float fu = floorf(u);
+ u = u - fu;
+
+ return ((((int)fu) & 1) ? 1.0f - u : u) * max;
+}
+
+/* Return false to indicate that this pixel is finished.
+ * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
+ * that the pixel did converge. */
+ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
+ const ccl_global KernelWorkTile *ccl_restrict tile,
+ ccl_global float *render_buffer,
+ const int x,
+ const int y,
+ const int scheduled_sample)
+{
+ PROFILING_INIT(kg, PROFILING_RAY_SETUP);
+
+ /* Initialize path state to give basic buffer access and allow early outputs. */
+ path_state_init(INTEGRATOR_STATE_PASS, tile, x, y);
+
+ /* Check whether the pixel has converged and should not be sampled anymore. */
+ if (!kernel_need_sample_pixel(INTEGRATOR_STATE_PASS, render_buffer)) {
+ return false;
+ }
+
+ /* Always count the sample, even if the camera sample will reject the ray. */
+ const int sample = kernel_accum_sample(INTEGRATOR_STATE_PASS, render_buffer, scheduled_sample);
+
+ /* Setup render buffers. */
+ const int index = INTEGRATOR_STATE(path, render_pixel_index);
+ const int pass_stride = kernel_data.film.pass_stride;
+ render_buffer += index * pass_stride;
+
+ ccl_global float *primitive = render_buffer + kernel_data.film.pass_bake_primitive;
+ ccl_global float *differential = render_buffer + kernel_data.film.pass_bake_differential;
+
+ const int seed = __float_as_uint(primitive[0]);
+ int prim = __float_as_uint(primitive[1]);
+ if (prim == -1) {
+ return false;
+ }
+
+ prim += kernel_data.bake.tri_offset;
+
+ /* Random number generator. */
+ const uint rng_hash = hash_uint(seed) ^ kernel_data.integrator.seed;
+
+ float filter_x, filter_y;
+ if (sample == 0) {
+ filter_x = filter_y = 0.5f;
+ }
+ else {
+ path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_x, &filter_y);
+ }
+
+ /* Initialize path state for path integration. */
+ path_state_init_integrator(INTEGRATOR_STATE_PASS, sample, rng_hash);
+
+ /* Barycentric UV with sub-pixel offset. */
+ float u = primitive[2];
+ float v = primitive[3];
+
+ float dudx = differential[0];
+ float dudy = differential[1];
+ float dvdx = differential[2];
+ float dvdy = differential[3];
+
+ if (sample > 0) {
+ u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f);
+ v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f),
+ 1.0f - u);
+ }
+
+ /* Position and normal on triangle. */
+ float3 P, Ng;
+ int shader;
+ triangle_point_normal(kg, kernel_data.bake.object_index, prim, u, v, &P, &Ng, &shader);
+ shader &= SHADER_MASK;
+
+ if (kernel_data.film.pass_background != PASS_UNUSED) {
+ /* Environment baking. */
+
+ /* Setup and write ray. */
+ Ray ray ccl_optional_struct_init;
+ ray.P = zero_float3();
+ ray.D = normalize(P);
+ ray.t = FLT_MAX;
+ ray.time = 0.5f;
+ ray.dP = differential_zero_compact();
+ ray.dD = differential_zero_compact();
+ integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray);
+
+ /* Setup next kernel to execute. */
+ INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+ }
+ else {
+ /* Surface baking. */
+
+ /* Setup ray. */
+ Ray ray ccl_optional_struct_init;
+ ray.P = P + Ng;
+ ray.D = -Ng;
+ ray.t = FLT_MAX;
+ ray.time = 0.5f;
+
+ /* Setup differentials. */
+ float3 dPdu, dPdv;
+ triangle_dPdudv(kg, prim, &dPdu, &dPdv);
+ differential3 dP;
+ dP.dx = dPdu * dudx + dPdv * dvdx;
+ dP.dy = dPdu * dudy + dPdv * dvdy;
+ ray.dP = differential_make_compact(dP);
+ ray.dD = differential_zero_compact();
+
+ /* Write ray. */
+ integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray);
+
+ /* Setup and write intersection. */
+ Intersection isect ccl_optional_struct_init;
+ isect.object = kernel_data.bake.object_index;
+ isect.prim = prim;
+ isect.u = u;
+ isect.v = v;
+ isect.t = 1.0f;
+ isect.type = PRIMITIVE_TRIANGLE;
+#ifdef __EMBREE__
+ isect.Ng = Ng;
+#endif
+ integrator_state_write_isect(INTEGRATOR_STATE_PASS, &isect);
+
+ /* Setup next kernel to execute. */
+ const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
+ if ((shader_flags & SD_HAS_RAYTRACE) || (kernel_data.film.pass_ao != PASS_UNUSED)) {
+ INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+ }
+ else {
+ INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+ }
+ }
+
+ return true;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_camera.h b/intern/cycles/kernel/integrator/integrator_init_from_camera.h
new file mode 100644
index 00000000000..58e7bde4c94
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_init_from_camera.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/kernel_accumulate.h"
+#include "kernel/kernel_adaptive_sampling.h"
+#include "kernel/kernel_camera.h"
+#include "kernel/kernel_path_state.h"
+#include "kernel/kernel_random.h"
+#include "kernel/kernel_shadow_catcher.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void integrate_camera_sample(const KernelGlobals *ccl_restrict kg,
+ const int sample,
+ const int x,
+ const int y,
+ const uint rng_hash,
+ Ray *ray)
+{
+ /* Filter sampling. */
+ float filter_u, filter_v;
+
+ if (sample == 0) {
+ filter_u = 0.5f;
+ filter_v = 0.5f;
+ }
+ else {
+ path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_u, &filter_v);
+ }
+
+ /* Depth of field sampling. */
+ float lens_u = 0.0f, lens_v = 0.0f;
+ if (kernel_data.cam.aperturesize > 0.0f) {
+ path_rng_2D(kg, rng_hash, sample, PRNG_LENS_U, &lens_u, &lens_v);
+ }
+
+ /* Motion blur time sampling. */
+ float time = 0.0f;
+#ifdef __CAMERA_MOTION__
+ if (kernel_data.cam.shuttertime != -1.0f)
+ time = path_rng_1D(kg, rng_hash, sample, PRNG_TIME);
+#endif
+
+ /* Generate camera ray. */
+ camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
+}
+
+/* Return false to indicate that this pixel is finished.
+ * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
+ * that the pixel did converge. */
+ccl_device bool integrator_init_from_camera(INTEGRATOR_STATE_ARGS,
+ const ccl_global KernelWorkTile *ccl_restrict tile,
+ ccl_global float *render_buffer,
+ const int x,
+ const int y,
+ const int scheduled_sample)
+{
+ PROFILING_INIT(kg, PROFILING_RAY_SETUP);
+
+ /* Initialize path state to give basic buffer access and allow early outputs. */
+ path_state_init(INTEGRATOR_STATE_PASS, tile, x, y);
+
+ /* Check whether the pixel has converged and should not be sampled anymore. */
+ if (!kernel_need_sample_pixel(INTEGRATOR_STATE_PASS, render_buffer)) {
+ return false;
+ }
+
+ /* Count the sample and get an effective sample for this pixel.
+ *
+ * This logic allows to both count actual number of samples per pixel, and to add samples to this
+ * pixel after it was converged and samples were added somewhere else (in which case the
+ * `scheduled_sample` will be different from actual number of samples in this pixel). */
+ const int sample = kernel_accum_sample(INTEGRATOR_STATE_PASS, render_buffer, scheduled_sample);
+
+ /* Initialize random number seed for path. */
+ const uint rng_hash = path_rng_hash_init(kg, sample, x, y);
+
+ {
+ /* Generate camera ray. */
+ Ray ray;
+ integrate_camera_sample(kg, sample, x, y, rng_hash, &ray);
+ if (ray.t == 0.0f) {
+ return true;
+ }
+
+ /* Write camera ray to state. */
+ integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray);
+ }
+
+ /* Initialize path state for path integration. */
+ path_state_init_integrator(INTEGRATOR_STATE_PASS, sample, rng_hash);
+
+ /* Continue with intersect_closest kernel, optionally initializing volume
+ * stack before that if the camera may be inside a volume. */
+ if (kernel_data.cam.is_inside_volume) {
+ INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
+ }
+ else {
+ INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+ }
+
+ return true;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_closest.h b/intern/cycles/kernel/integrator/integrator_intersect_closest.h
new file mode 100644
index 00000000000..34ca6814534
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_intersect_closest.h
@@ -0,0 +1,248 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/kernel_differential.h"
+#include "kernel/kernel_light.h"
+#include "kernel/kernel_path_state.h"
+#include "kernel/kernel_projection.h"
+#include "kernel/kernel_shadow_catcher.h"
+
+#include "kernel/geom/geom.h"
+
+#include "kernel/bvh/bvh.h"
+
+CCL_NAMESPACE_BEGIN
+
+template<uint32_t current_kernel>
+ccl_device_forceinline bool integrator_intersect_terminate(INTEGRATOR_STATE_ARGS,
+ const int shader_flags)
+{
+
+ /* Optional AO bounce termination.
+ * We continue evaluating emissive/transparent surfaces and volumes, similar
+ * to direct lighting. Only if we know there are none can we terminate the
+ * path immediately. */
+ if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) {
+ if (shader_flags & (SD_HAS_TRANSPARENT_SHADOW | SD_HAS_EMISSION)) {
+ INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+ }
+ else if (!integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) {
+ INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_AFTER_VOLUME;
+ }
+ else {
+ return true;
+ }
+ }
+
+ /* Load random number state. */
+ RNGState rng_state;
+ path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+
+ /* We perform path termination in this kernel to avoid launching shade_surface
+ * and evaluating the shader when not needed. Only for emission and transparent
+ * surfaces in front of emission do we need to evaluate the shader, since we
+ * perform MIS as part of indirect rays. */
+ const int path_flag = INTEGRATOR_STATE(path, flag);
+ const float probability = path_state_continuation_probability(INTEGRATOR_STATE_PASS, path_flag);
+
+ if (probability != 1.0f) {
+ const float terminate = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE);
+
+ if (probability == 0.0f || terminate >= probability) {
+ if (shader_flags & SD_HAS_EMISSION) {
+ /* Mark path to be terminated right after shader evaluation on the surface. */
+ INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_ON_NEXT_SURFACE;
+ }
+ else if (!integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) {
+ /* TODO: only do this for emissive volumes. */
+ INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_TERMINATE_IN_NEXT_VOLUME;
+ }
+ else {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/* Note that current_kernel is a template value since making this a variable
+ * leads to poor performance with CUDA atomics. */
+template<uint32_t current_kernel>
+ccl_device_forceinline void integrator_intersect_shader_next_kernel(
+ INTEGRATOR_STATE_ARGS,
+ const Intersection *ccl_restrict isect,
+ const int shader,
+ const int shader_flags)
+{
+ /* Note on scheduling.
+ *
+ * When there is no shadow catcher split the scheduling is simple: schedule surface shading with
+ * or without raytrace support, depending on the shader used.
+ *
+ * When there is a shadow catcher split the general idea is to have the following configuration:
+ *
+ * - Schedule surface shading kernel (with corresponding raytrace support) for the ray which
+ * will trace shadow catcher object.
+ *
+ * - When no alpha-over of approximate shadow catcher is needed, schedule surface shading for
+ * the matte ray.
+ *
+ * - Otherwise schedule background shading kernel, so that we have a background to alpha-over
+ * on. The background kernel will then schedule surface shading for the matte ray.
+ *
+ * Note that the splitting leaves kernel and sorting counters as-is, so use INIT semantic for
+ * the matte path. */
+
+ const bool use_raytrace_kernel = ((shader_flags & SD_HAS_RAYTRACE) ||
+ (kernel_data.film.pass_ao != PASS_UNUSED));
+
+ if (use_raytrace_kernel) {
+ INTEGRATOR_PATH_NEXT_SORTED(
+ current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+ }
+ else {
+ INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+ }
+
+#ifdef __SHADOW_CATCHER__
+ const int object_flags = intersection_get_object_flags(kg, isect);
+ if (kernel_shadow_catcher_split(INTEGRATOR_STATE_PASS, object_flags)) {
+ if (kernel_data.film.use_approximate_shadow_catcher && !kernel_data.background.transparent) {
+ INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
+
+ if (use_raytrace_kernel) {
+ INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+ }
+ else {
+ INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+ }
+ }
+ else if (use_raytrace_kernel) {
+ INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+ }
+ else {
+ INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+ }
+ }
+#endif
+}
+
+ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS)
+{
+ PROFILING_INIT(kg, PROFILING_INTERSECT_CLOSEST);
+
+ /* Read ray from integrator state into local memory. */
+ Ray ray ccl_optional_struct_init;
+ integrator_state_read_ray(INTEGRATOR_STATE_PASS, &ray);
+ kernel_assert(ray.t != 0.0f);
+
+ const uint visibility = path_state_ray_visibility(INTEGRATOR_STATE_PASS);
+ const int last_isect_prim = INTEGRATOR_STATE(isect, prim);
+ const int last_isect_object = INTEGRATOR_STATE(isect, object);
+
+ /* Trick to use short AO rays to approximate indirect light at the end of the path. */
+ if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) {
+ ray.t = kernel_data.integrator.ao_bounces_distance;
+
+ const int last_object = last_isect_object != OBJECT_NONE ?
+ last_isect_object :
+ kernel_tex_fetch(__prim_object, last_isect_prim);
+ const float object_ao_distance = kernel_tex_fetch(__objects, last_object).ao_distance;
+ if (object_ao_distance != 0.0f) {
+ ray.t = object_ao_distance;
+ }
+ }
+
+ /* Scene Intersection. */
+ Intersection isect ccl_optional_struct_init;
+ bool hit = scene_intersect(kg, &ray, visibility, &isect);
+
+ /* TODO: remove this and do it in the various intersection functions instead. */
+ if (!hit) {
+ isect.prim = PRIM_NONE;
+ }
+
+ /* Light intersection for MIS. */
+ if (kernel_data.integrator.use_lamp_mis) {
+ /* NOTE: if we make lights visible to camera rays, we'll need to initialize
+ * these in the path_state_init. */
+ const int last_type = INTEGRATOR_STATE(isect, type);
+ const int path_flag = INTEGRATOR_STATE(path, flag);
+
+ hit = lights_intersect(
+ kg, &ray, &isect, last_isect_prim, last_isect_object, last_type, path_flag) ||
+ hit;
+ }
+
+ /* Write intersection result into global integrator state memory. */
+ integrator_state_write_isect(INTEGRATOR_STATE_PASS, &isect);
+
+#ifdef __VOLUME__
+ if (!integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) {
+ const bool hit_surface = hit && !(isect.type & PRIMITIVE_LAMP);
+ const int shader = (hit_surface) ? intersection_get_shader(kg, &isect) : SHADER_NONE;
+ const int flags = (hit_surface) ? kernel_tex_fetch(__shaders, shader).flags : 0;
+
+ if (!integrator_intersect_terminate<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
+ INTEGRATOR_STATE_PASS, flags)) {
+ /* Continue with volume kernel if we are inside a volume, regardless
+ * if we hit anything. */
+ INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
+ }
+ else {
+ INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+ }
+ return;
+ }
+#endif
+
+ if (hit) {
+ /* Hit a surface, continue with light or surface kernel. */
+ if (isect.type & PRIMITIVE_LAMP) {
+ INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
+ return;
+ }
+ else {
+ /* Hit a surface, continue with surface kernel unless terminated. */
+ const int shader = intersection_get_shader(kg, &isect);
+ const int flags = kernel_tex_fetch(__shaders, shader).flags;
+
+ if (!integrator_intersect_terminate<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
+ INTEGRATOR_STATE_PASS, flags)) {
+ integrator_intersect_shader_next_kernel<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
+ INTEGRATOR_STATE_PASS, &isect, shader, flags);
+ return;
+ }
+ else {
+ INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+ return;
+ }
+ }
+ }
+ else {
+ /* Nothing hit, continue with background kernel. */
+ INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+ return;
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
new file mode 100644
index 00000000000..5bd9cfda4a4
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Visibility for the shadow ray. */
+ccl_device_forceinline uint integrate_intersect_shadow_visibility(INTEGRATOR_STATE_CONST_ARGS)
+{
+ uint visibility = PATH_RAY_SHADOW;
+
+#ifdef __SHADOW_CATCHER__
+ const uint32_t path_flag = INTEGRATOR_STATE(shadow_path, flag);
+ visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, visibility);
+#endif
+
+ return visibility;
+}
+
+ccl_device bool integrate_intersect_shadow_opaque(INTEGRATOR_STATE_ARGS,
+ const Ray *ray,
+ const uint visibility)
+{
+ /* Mask which will pick only opaque visibility bits from the `visibility`.
+ * Calculate the mask at compile time: the visibility will either be a high bits for the shadow
+ * catcher objects, or lower bits for the regular objects (there is no need to check the path
+ * state here again). */
+ constexpr const uint opaque_mask = SHADOW_CATCHER_VISIBILITY_SHIFT(PATH_RAY_SHADOW_OPAQUE) |
+ PATH_RAY_SHADOW_OPAQUE;
+
+ Intersection isect;
+ const bool opaque_hit = scene_intersect(kg, ray, visibility & opaque_mask, &isect);
+
+ if (!opaque_hit) {
+ INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = 0;
+ }
+
+ return opaque_hit;
+}
+
+ccl_device_forceinline int integrate_shadow_max_transparent_hits(INTEGRATOR_STATE_CONST_ARGS)
+{
+ const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
+ const int transparent_bounce = INTEGRATOR_STATE(shadow_path, transparent_bounce);
+
+ return max(transparent_max_bounce - transparent_bounce - 1, 0);
+}
+
+#ifdef __TRANSPARENT_SHADOWS__
+ccl_device bool integrate_intersect_shadow_transparent(INTEGRATOR_STATE_ARGS,
+ const Ray *ray,
+ const uint visibility)
+{
+ Intersection isect[INTEGRATOR_SHADOW_ISECT_SIZE];
+
+ /* Limit the number hits to the max transparent bounces allowed and the size that we
+ * have available in the integrator state. */
+ const uint max_transparent_hits = integrate_shadow_max_transparent_hits(INTEGRATOR_STATE_PASS);
+ const uint max_hits = min(max_transparent_hits, (uint)INTEGRATOR_SHADOW_ISECT_SIZE);
+ uint num_hits = 0;
+ bool opaque_hit = scene_intersect_shadow_all(kg, ray, isect, visibility, max_hits, &num_hits);
+
+ /* If number of hits exceed the transparent bounces limit, make opaque. */
+ if (num_hits > max_transparent_hits) {
+ opaque_hit = true;
+ }
+
+ if (!opaque_hit) {
+ uint num_recorded_hits = min(num_hits, max_hits);
+
+ if (num_recorded_hits > 0) {
+ sort_intersections(isect, num_recorded_hits);
+
+ /* Write intersection result into global integrator state memory. */
+ for (int hit = 0; hit < num_recorded_hits; hit++) {
+ integrator_state_write_shadow_isect(INTEGRATOR_STATE_PASS, &isect[hit], hit);
+ }
+ }
+
+ INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = num_hits;
+ }
+ else {
+ INTEGRATOR_STATE_WRITE(shadow_path, num_hits) = 0;
+ }
+
+ return opaque_hit;
+}
+#endif
+
+ccl_device void integrator_intersect_shadow(INTEGRATOR_STATE_ARGS)
+{
+ PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW);
+
+ /* Read ray from integrator state into local memory. */
+ Ray ray ccl_optional_struct_init;
+ integrator_state_read_shadow_ray(INTEGRATOR_STATE_PASS, &ray);
+
+ /* Compute visibility. */
+ const uint visibility = integrate_intersect_shadow_visibility(INTEGRATOR_STATE_PASS);
+
+#ifdef __TRANSPARENT_SHADOWS__
+ /* TODO: compile different kernels depending on this? Especially for OptiX
+ * conditional trace calls are bad. */
+ const bool opaque_hit =
+ (kernel_data.integrator.transparent_shadows) ?
+ integrate_intersect_shadow_transparent(INTEGRATOR_STATE_PASS, &ray, visibility) :
+ integrate_intersect_shadow_opaque(INTEGRATOR_STATE_PASS, &ray, visibility);
+#else
+ const bool opaque_hit = integrate_intersect_shadow_opaque(
+ INTEGRATOR_STATE_PASS, &ray, visibility);
+#endif
+
+ if (opaque_hit) {
+ /* Hit an opaque surface, shadow path ends here. */
+ INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
+ return;
+ }
+ else {
+ /* Hit nothing or transparent surfaces, continue to shadow kernel
+ * for shading and render buffer output.
+ *
+ * TODO: could also write to render buffer directly if no transparent shadows?
+ * Could save a kernel execution for the common case. */
+ INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
+ return;
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h b/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h
new file mode 100644
index 00000000000..7c090952dc7
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/integrator/integrator_subsurface.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void integrator_intersect_subsurface(INTEGRATOR_STATE_ARGS)
+{
+ PROFILING_INIT(kg, PROFILING_INTERSECT_SUBSURFACE);
+
+#ifdef __SUBSURFACE__
+ if (subsurface_scatter(INTEGRATOR_STATE_PASS)) {
+ return;
+ }
+#endif
+
+ INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
new file mode 100644
index 00000000000..60d8a8e3e54
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/bvh/bvh.h"
+#include "kernel/geom/geom.h"
+#include "kernel/integrator/integrator_volume_stack.h"
+#include "kernel/kernel_shader.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_ARGS,
+ const float3 from_P,
+ const float3 to_P)
+{
+ PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK);
+
+ ShaderDataTinyStorage stack_sd_storage;
+ ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
+
+ kernel_assert(kernel_data.integrator.use_volumes);
+
+ Ray volume_ray ccl_optional_struct_init;
+ volume_ray.P = from_P;
+ volume_ray.D = normalize_len(to_P - from_P, &volume_ray.t);
+
+#ifdef __VOLUME_RECORD_ALL__
+ Intersection hits[2 * VOLUME_STACK_SIZE + 1];
+ uint num_hits = scene_intersect_volume_all(
+ kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, PATH_RAY_ALL_VISIBILITY);
+ if (num_hits > 0) {
+ Intersection *isect = hits;
+
+ qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
+
+ for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
+ shader_setup_from_ray(kg, stack_sd, &volume_ray, isect);
+ volume_stack_enter_exit(INTEGRATOR_STATE_PASS, stack_sd);
+ }
+ }
+#else
+ Intersection isect;
+ int step = 0;
+ while (step < 2 * VOLUME_STACK_SIZE &&
+ scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) {
+ shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect);
+ volume_stack_enter_exit(INTEGRATOR_STATE_PASS, stack_sd);
+
+ /* Move ray forward. */
+ volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
+ if (volume_ray.t != FLT_MAX) {
+ volume_ray.D = normalize_len(to_P - volume_ray.P, &volume_ray.t);
+ }
+ ++step;
+ }
+#endif
+}
+
+ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
+{
+ PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK);
+
+ ShaderDataTinyStorage stack_sd_storage;
+ ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
+
+ Ray volume_ray ccl_optional_struct_init;
+ integrator_state_read_ray(INTEGRATOR_STATE_PASS, &volume_ray);
+ volume_ray.t = FLT_MAX;
+
+ const uint visibility = (INTEGRATOR_STATE(path, flag) & PATH_RAY_ALL_VISIBILITY);
+ int stack_index = 0, enclosed_index = 0;
+
+ /* Write background shader. */
+ if (kernel_data.background.volume_shader != SHADER_NONE) {
+ const VolumeStack new_entry = {OBJECT_NONE, kernel_data.background.volume_shader};
+ integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry);
+ stack_index++;
+ }
+
+#ifdef __VOLUME_RECORD_ALL__
+ Intersection hits[2 * VOLUME_STACK_SIZE + 1];
+ uint num_hits = scene_intersect_volume_all(
+ kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, visibility);
+ if (num_hits > 0) {
+ int enclosed_volumes[VOLUME_STACK_SIZE];
+ Intersection *isect = hits;
+
+ qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
+
+ for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
+ shader_setup_from_ray(kg, stack_sd, &volume_ray, isect);
+ if (stack_sd->flag & SD_BACKFACING) {
+ bool need_add = true;
+ for (int i = 0; i < enclosed_index && need_add; ++i) {
+ /* If ray exited the volume and never entered to that volume
+ * it means that camera is inside such a volume.
+ */
+ if (enclosed_volumes[i] == stack_sd->object) {
+ need_add = false;
+ }
+ }
+ for (int i = 0; i < stack_index && need_add; ++i) {
+ /* Don't add intersections twice. */
+ VolumeStack entry = integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
+ if (entry.object == stack_sd->object) {
+ need_add = false;
+ break;
+ }
+ }
+ if (need_add && stack_index < VOLUME_STACK_SIZE - 1) {
+ const VolumeStack new_entry = {stack_sd->object, stack_sd->shader};
+ integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry);
+ ++stack_index;
+ }
+ }
+ else {
+ /* If ray from camera enters the volume, this volume shouldn't
+ * be added to the stack on exit.
+ */
+ enclosed_volumes[enclosed_index++] = stack_sd->object;
+ }
+ }
+ }
+#else
+ int enclosed_volumes[VOLUME_STACK_SIZE];
+ int step = 0;
+
+ while (stack_index < VOLUME_STACK_SIZE - 1 && enclosed_index < VOLUME_STACK_SIZE - 1 &&
+ step < 2 * VOLUME_STACK_SIZE) {
+ Intersection isect;
+ if (!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
+ break;
+ }
+
+ shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect);
+ if (stack_sd->flag & SD_BACKFACING) {
+ /* If ray exited the volume and never entered to that volume
+ * it means that camera is inside such a volume.
+ */
+ bool need_add = true;
+ for (int i = 0; i < enclosed_index && need_add; ++i) {
+ /* If ray exited the volume and never entered to that volume
+ * it means that camera is inside such a volume.
+ */
+ if (enclosed_volumes[i] == stack_sd->object) {
+ need_add = false;
+ }
+ }
+ for (int i = 0; i < stack_index && need_add; ++i) {
+ /* Don't add intersections twice. */
+ VolumeStack entry = integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
+ if (entry.object == stack_sd->object) {
+ need_add = false;
+ break;
+ }
+ }
+ if (need_add) {
+ const VolumeStack new_entry = {stack_sd->object, stack_sd->shader};
+ integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry);
+ ++stack_index;
+ }
+ }
+ else {
+ /* If ray from camera enters the volume, this volume shouldn't
+ * be added to the stack on exit.
+ */
+ enclosed_volumes[enclosed_index++] = stack_sd->object;
+ }
+
+ /* Move ray forward. */
+ volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
+ ++step;
+ }
+#endif
+
+ /* Write terminator. */
+ const VolumeStack new_entry = {OBJECT_NONE, SHADER_NONE};
+ integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry);
+
+ INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
+ DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_megakernel.h b/intern/cycles/kernel/integrator/integrator_megakernel.h
new file mode 100644
index 00000000000..91363ea1c7f
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_megakernel.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/integrator/integrator_init_from_camera.h"
+#include "kernel/integrator/integrator_intersect_closest.h"
+#include "kernel/integrator/integrator_intersect_shadow.h"
+#include "kernel/integrator/integrator_intersect_subsurface.h"
+#include "kernel/integrator/integrator_intersect_volume_stack.h"
+#include "kernel/integrator/integrator_shade_background.h"
+#include "kernel/integrator/integrator_shade_light.h"
+#include "kernel/integrator/integrator_shade_shadow.h"
+#include "kernel/integrator/integrator_shade_surface.h"
+#include "kernel/integrator/integrator_shade_volume.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void integrator_megakernel(INTEGRATOR_STATE_ARGS,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ /* Each kernel indicates the next kernel to execute, so here we simply
+ * have to check what that kernel is and execute it.
+ *
+ * TODO: investigate if we can use device side enqueue for GPUs to avoid
+ * having to compile this big kernel. */
+ while (true) {
+ if (INTEGRATOR_STATE(shadow_path, queued_kernel)) {
+ /* First handle any shadow paths before we potentially create more shadow paths. */
+ switch (INTEGRATOR_STATE(shadow_path, queued_kernel)) {
+ case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
+ integrator_intersect_shadow(INTEGRATOR_STATE_PASS);
+ break;
+ case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
+ integrator_shade_shadow(INTEGRATOR_STATE_PASS, render_buffer);
+ break;
+ default:
+ kernel_assert(0);
+ break;
+ }
+ }
+ else if (INTEGRATOR_STATE(path, queued_kernel)) {
+ /* Then handle regular path kernels. */
+ switch (INTEGRATOR_STATE(path, queued_kernel)) {
+ case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
+ integrator_intersect_closest(INTEGRATOR_STATE_PASS);
+ break;
+ case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
+ integrator_shade_background(INTEGRATOR_STATE_PASS, render_buffer);
+ break;
+ case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
+ integrator_shade_surface(INTEGRATOR_STATE_PASS, render_buffer);
+ break;
+ case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
+ integrator_shade_volume(INTEGRATOR_STATE_PASS, render_buffer);
+ break;
+ case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
+ integrator_shade_surface_raytrace(INTEGRATOR_STATE_PASS, render_buffer);
+ break;
+ case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
+ integrator_shade_light(INTEGRATOR_STATE_PASS, render_buffer);
+ break;
+ case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
+ integrator_intersect_subsurface(INTEGRATOR_STATE_PASS);
+ break;
+ case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
+ integrator_intersect_volume_stack(INTEGRATOR_STATE_PASS);
+ break;
+ default:
+ kernel_assert(0);
+ break;
+ }
+ }
+ else {
+ break;
+ }
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_shade_background.h b/intern/cycles/kernel/integrator/integrator_shade_background.h
new file mode 100644
index 00000000000..3e4cc837e9b
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_shade_background.h
@@ -0,0 +1,215 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/kernel_accumulate.h"
+#include "kernel/kernel_emission.h"
+#include "kernel/kernel_light.h"
+#include "kernel/kernel_shader.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device float3 integrator_eval_background_shader(INTEGRATOR_STATE_ARGS,
+ ccl_global float *ccl_restrict render_buffer)
+{
+#ifdef __BACKGROUND__
+ const int shader = kernel_data.background.surface_shader;
+ const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+
+ /* Use visibility flag to skip lights. */
+ if (shader & SHADER_EXCLUDE_ANY) {
+ if (((shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) ||
+ ((shader & SHADER_EXCLUDE_GLOSSY) && ((path_flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) ==
+ (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) ||
+ ((shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) ||
+ ((shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA)) ||
+ ((shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER)))
+ return zero_float3();
+ }
+
+ /* Use fast constant background color if available. */
+ float3 L = zero_float3();
+ if (!shader_constant_emission_eval(kg, shader, &L)) {
+ /* Evaluate background shader. */
+
+ /* TODO: does aliasing like this break automatic SoA in CUDA?
+ * Should we instead store closures separate from ShaderData? */
+ ShaderDataTinyStorage emission_sd_storage;
+ ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+
+ PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP);
+ shader_setup_from_background(kg,
+ emission_sd,
+ INTEGRATOR_STATE(ray, P),
+ INTEGRATOR_STATE(ray, D),
+ INTEGRATOR_STATE(ray, time));
+
+ PROFILING_SHADER(emission_sd->object, emission_sd->shader);
+ PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL);
+ shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>(
+ INTEGRATOR_STATE_PASS, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION);
+
+ L = shader_background_eval(emission_sd);
+ }
+
+ /* Background MIS weights. */
+# ifdef __BACKGROUND_MIS__
+ /* Check if background light exists or if we should skip pdf. */
+ if (!(INTEGRATOR_STATE(path, flag) & PATH_RAY_MIS_SKIP) && kernel_data.background.use_mis) {
+ const float3 ray_P = INTEGRATOR_STATE(ray, P);
+ const float3 ray_D = INTEGRATOR_STATE(ray, D);
+ const float mis_ray_pdf = INTEGRATOR_STATE(path, mis_ray_pdf);
+ const float mis_ray_t = INTEGRATOR_STATE(path, mis_ray_t);
+
+ /* multiple importance sampling, get background light pdf for ray
+ * direction, and compute weight with respect to BSDF pdf */
+ const float pdf = background_light_pdf(kg, ray_P - ray_D * mis_ray_t, ray_D);
+ const float mis_weight = power_heuristic(mis_ray_pdf, pdf);
+
+ L *= mis_weight;
+ }
+# endif
+
+ return L;
+#else
+ return make_float3(0.8f, 0.8f, 0.8f);
+#endif
+}
+
+ccl_device_inline void integrate_background(INTEGRATOR_STATE_ARGS,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ /* Accumulate transparency for transparent background. We can skip background
+ * shader evaluation unless a background pass is used. */
+ bool eval_background = true;
+ float transparent = 0.0f;
+
+ const bool is_transparent_background_ray = kernel_data.background.transparent &&
+ (INTEGRATOR_STATE(path, flag) &
+ PATH_RAY_TRANSPARENT_BACKGROUND);
+
+ if (is_transparent_background_ray) {
+ transparent = average(INTEGRATOR_STATE(path, throughput));
+
+#ifdef __PASSES__
+ eval_background = (kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND));
+#else
+ eval_background = false;
+#endif
+ }
+
+ /* Evaluate background shader. */
+ float3 L = (eval_background) ?
+ integrator_eval_background_shader(INTEGRATOR_STATE_PASS, render_buffer) :
+ zero_float3();
+
+ /* When using the ao bounces approximation, adjust background
+ * shader intensity with ao factor. */
+ if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) {
+ L *= kernel_data.integrator.ao_bounces_factor;
+ }
+
+ /* Write to render buffer. */
+ kernel_accum_background(
+ INTEGRATOR_STATE_PASS, L, transparent, is_transparent_background_ray, render_buffer);
+}
+
+ccl_device_inline void integrate_distant_lights(INTEGRATOR_STATE_ARGS,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ const float3 ray_D = INTEGRATOR_STATE(ray, D);
+ const float ray_time = INTEGRATOR_STATE(ray, time);
+ LightSample ls ccl_optional_struct_init;
+ for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) {
+ if (light_sample_from_distant_ray(kg, ray_D, lamp, &ls)) {
+ /* Use visibility flag to skip lights. */
+#ifdef __PASSES__
+ const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+
+ if (ls.shader & SHADER_EXCLUDE_ANY) {
+ if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) ||
+ ((ls.shader & SHADER_EXCLUDE_GLOSSY) &&
+ ((path_flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) ==
+ (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) ||
+ ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) ||
+ ((ls.shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA)) ||
+ ((ls.shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER)))
+ return;
+ }
+#endif
+
+ /* Evaluate light shader. */
+ /* TODO: does aliasing like this break automatic SoA in CUDA? */
+ ShaderDataTinyStorage emission_sd_storage;
+ ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+ float3 light_eval = light_sample_shader_eval(
+ INTEGRATOR_STATE_PASS, emission_sd, &ls, ray_time);
+ if (is_zero(light_eval)) {
+ return;
+ }
+
+ /* MIS weighting. */
+ if (!(path_flag & PATH_RAY_MIS_SKIP)) {
+ /* multiple importance sampling, get regular light pdf,
+ * and compute weight with respect to BSDF pdf */
+ const float mis_ray_pdf = INTEGRATOR_STATE(path, mis_ray_pdf);
+ const float mis_weight = power_heuristic(mis_ray_pdf, ls.pdf);
+ light_eval *= mis_weight;
+ }
+
+ /* Write to render buffer. */
+ const float3 throughput = INTEGRATOR_STATE(path, throughput);
+ kernel_accum_emission(INTEGRATOR_STATE_PASS, throughput, light_eval, render_buffer);
+ }
+ }
+}
+
+ccl_device void integrator_shade_background(INTEGRATOR_STATE_ARGS,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ PROFILING_INIT(kg, PROFILING_SHADE_LIGHT_SETUP);
+
+ /* TODO: unify these in a single loop to only have a single shader evaluation call. */
+ integrate_distant_lights(INTEGRATOR_STATE_PASS, render_buffer);
+ integrate_background(INTEGRATOR_STATE_PASS, render_buffer);
+
+#ifdef __SHADOW_CATCHER__
+ if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) {
+ INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_SHADOW_CATCHER_BACKGROUND;
+
+ const int isect_prim = INTEGRATOR_STATE(isect, prim);
+ const int shader = intersection_get_shader_from_isect_prim(kg, isect_prim);
+ const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
+
+ if ((shader_flags & SD_HAS_RAYTRACE) || (kernel_data.film.pass_ao != PASS_UNUSED)) {
+ INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
+ shader);
+ }
+ else {
+ INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
+ shader);
+ }
+ return;
+ }
+#endif
+
+ INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_shade_light.h b/intern/cycles/kernel/integrator/integrator_shade_light.h
new file mode 100644
index 00000000000..05b530f9665
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_shade_light.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/kernel_accumulate.h"
+#include "kernel/kernel_emission.h"
+#include "kernel/kernel_light.h"
+#include "kernel/kernel_shader.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void integrate_light(INTEGRATOR_STATE_ARGS,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ /* Setup light sample. */
+ Intersection isect ccl_optional_struct_init;
+ integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect);
+
+ float3 ray_P = INTEGRATOR_STATE(ray, P);
+ const float3 ray_D = INTEGRATOR_STATE(ray, D);
+ const float ray_time = INTEGRATOR_STATE(ray, time);
+
+ /* Advance ray beyond light. */
+ /* TODO: can we make this more numerically robust to avoid reintersecting the
+ * same light in some cases? */
+ const float3 new_ray_P = ray_offset(ray_P + ray_D * isect.t, ray_D);
+ INTEGRATOR_STATE_WRITE(ray, P) = new_ray_P;
+ INTEGRATOR_STATE_WRITE(ray, t) -= isect.t;
+
+ /* Set position to where the BSDF was sampled, for correct MIS PDF. */
+ const float mis_ray_t = INTEGRATOR_STATE(path, mis_ray_t);
+ ray_P -= ray_D * mis_ray_t;
+ isect.t += mis_ray_t;
+ INTEGRATOR_STATE_WRITE(path, mis_ray_t) = mis_ray_t + isect.t;
+
+ LightSample ls ccl_optional_struct_init;
+ const bool use_light_sample = light_sample_from_intersection(kg, &isect, ray_P, ray_D, &ls);
+
+ if (!use_light_sample) {
+ return;
+ }
+
+ /* Use visibility flag to skip lights. */
+#ifdef __PASSES__
+ const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+
+ if (ls.shader & SHADER_EXCLUDE_ANY) {
+ if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) ||
+ ((ls.shader & SHADER_EXCLUDE_GLOSSY) &&
+ ((path_flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) ==
+ (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) ||
+ ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) ||
+ ((ls.shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER)))
+ return;
+ }
+#endif
+
+ /* Evaluate light shader. */
+ /* TODO: does aliasing like this break automatic SoA in CUDA? */
+ ShaderDataTinyStorage emission_sd_storage;
+ ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+ float3 light_eval = light_sample_shader_eval(INTEGRATOR_STATE_PASS, emission_sd, &ls, ray_time);
+ if (is_zero(light_eval)) {
+ return;
+ }
+
+ /* MIS weighting. */
+ if (!(path_flag & PATH_RAY_MIS_SKIP)) {
+ /* multiple importance sampling, get regular light pdf,
+ * and compute weight with respect to BSDF pdf */
+ const float mis_ray_pdf = INTEGRATOR_STATE(path, mis_ray_pdf);
+ const float mis_weight = power_heuristic(mis_ray_pdf, ls.pdf);
+ light_eval *= mis_weight;
+ }
+
+ /* Write to render buffer. */
+ const float3 throughput = INTEGRATOR_STATE(path, throughput);
+ kernel_accum_emission(INTEGRATOR_STATE_PASS, throughput, light_eval, render_buffer);
+}
+
+ccl_device void integrator_shade_light(INTEGRATOR_STATE_ARGS,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ PROFILING_INIT(kg, PROFILING_SHADE_LIGHT_SETUP);
+
+ integrate_light(INTEGRATOR_STATE_PASS, render_buffer);
+
+ /* TODO: we could get stuck in an infinite loop if there are precision issues
+ * and the same light is hit again.
+ *
+ * As a workaround count this as a transparent bounce. It makes some sense
+ * to interpret lights as transparent surfaces (and support making them opaque),
+ * but this needs to be revisited. */
+ uint32_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce) + 1;
+ INTEGRATOR_STATE_WRITE(path, transparent_bounce) = transparent_bounce;
+
+ if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
+ INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
+ return;
+ }
+ else {
+ INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
+ DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+ return;
+ }
+
+ /* TODO: in some cases we could continue directly to SHADE_BACKGROUND, but
+ * probably that optimization is probably not practical if we add lights to
+ * scene geometry. */
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_shade_shadow.h b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
new file mode 100644
index 00000000000..fd3c3ae1653
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/integrator/integrator_shade_volume.h"
+#include "kernel/integrator/integrator_volume_stack.h"
+
+#include "kernel/kernel_shader.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline bool shadow_intersections_has_remaining(const int num_hits)
+{
+ return num_hits >= INTEGRATOR_SHADOW_ISECT_SIZE;
+}
+
+#ifdef __TRANSPARENT_SHADOWS__
+ccl_device_inline float3 integrate_transparent_surface_shadow(INTEGRATOR_STATE_ARGS, const int hit)
+{
+ PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SURFACE);
+
+ /* TODO: does aliasing like this break automatic SoA in CUDA?
+ * Should we instead store closures separate from ShaderData?
+ *
+ * TODO: is it better to declare this outside the loop or keep it local
+ * so the compiler can see there is no dependency between iterations? */
+ ShaderDataTinyStorage shadow_sd_storage;
+ ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
+
+ /* Setup shader data at surface. */
+ Intersection isect ccl_optional_struct_init;
+ integrator_state_read_shadow_isect(INTEGRATOR_STATE_PASS, &isect, hit);
+
+ Ray ray ccl_optional_struct_init;
+ integrator_state_read_shadow_ray(INTEGRATOR_STATE_PASS, &ray);
+
+ shader_setup_from_ray(kg, shadow_sd, &ray, &isect);
+
+ /* Evaluate shader. */
+ if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
+ shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
+ INTEGRATOR_STATE_PASS, shadow_sd, NULL, PATH_RAY_SHADOW);
+ }
+
+# ifdef __VOLUME__
+ /* Exit/enter volume. */
+ shadow_volume_stack_enter_exit(INTEGRATOR_STATE_PASS, shadow_sd);
+# endif
+
+ /* Compute transparency from closures. */
+ return shader_bsdf_transparency(kg, shadow_sd);
+}
+
+# ifdef __VOLUME__
+ccl_device_inline void integrate_transparent_volume_shadow(INTEGRATOR_STATE_ARGS,
+ const int hit,
+ const int num_recorded_hits,
+ float3 *ccl_restrict throughput)
+{
+ PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_VOLUME);
+
+ /* TODO: deduplicate with surface, or does it not matter for memory usage? */
+ ShaderDataTinyStorage shadow_sd_storage;
+ ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
+
+ /* Setup shader data. */
+ Ray ray ccl_optional_struct_init;
+ integrator_state_read_shadow_ray(INTEGRATOR_STATE_PASS, &ray);
+
+ /* Modify ray position and length to match current segment. */
+ const float start_t = (hit == 0) ? 0.0f : INTEGRATOR_STATE_ARRAY(shadow_isect, hit - 1, t);
+ const float end_t = (hit < num_recorded_hits) ? INTEGRATOR_STATE_ARRAY(shadow_isect, hit, t) :
+ ray.t;
+ ray.P += start_t * ray.D;
+ ray.t = end_t - start_t;
+
+ shader_setup_from_volume(kg, shadow_sd, &ray);
+
+ const float step_size = volume_stack_step_size(INTEGRATOR_STATE_PASS, [=](const int i) {
+ return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i);
+ });
+
+ volume_shadow_heterogeneous(INTEGRATOR_STATE_PASS, &ray, shadow_sd, throughput, step_size);
+}
+# endif
+
+ccl_device_inline bool integrate_transparent_shadow(INTEGRATOR_STATE_ARGS, const int num_hits)
+{
+ /* Accumulate shadow for transparent surfaces. */
+ const int num_recorded_hits = min(num_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+
+ for (int hit = 0; hit < num_recorded_hits + 1; hit++) {
+ /* Volume shaders. */
+ if (hit < num_recorded_hits || !shadow_intersections_has_remaining(num_hits)) {
+# ifdef __VOLUME__
+ if (!integrator_state_shadow_volume_stack_is_empty(INTEGRATOR_STATE_PASS)) {
+ float3 throughput = INTEGRATOR_STATE(shadow_path, throughput);
+ integrate_transparent_volume_shadow(
+ INTEGRATOR_STATE_PASS, hit, num_recorded_hits, &throughput);
+ if (is_zero(throughput)) {
+ return true;
+ }
+
+ INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput;
+ }
+# endif
+ }
+
+ /* Surface shaders. */
+ if (hit < num_recorded_hits) {
+ const float3 shadow = integrate_transparent_surface_shadow(INTEGRATOR_STATE_PASS, hit);
+ const float3 throughput = INTEGRATOR_STATE(shadow_path, throughput) * shadow;
+ if (is_zero(throughput)) {
+ return true;
+ }
+
+ INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput;
+ INTEGRATOR_STATE_WRITE(shadow_path, transparent_bounce) += 1;
+ }
+
+ /* Note we do not need to check max_transparent_bounce here, the number
+ * of intersections is already limited and made opaque in the
+ * INTERSECT_SHADOW kernel. */
+ }
+
+ if (shadow_intersections_has_remaining(num_hits)) {
+ /* There are more hits that we could not recorded due to memory usage,
+ * adjust ray to intersect again from the last hit. */
+ const float last_hit_t = INTEGRATOR_STATE_ARRAY(shadow_isect, num_recorded_hits - 1, t);
+ const float3 ray_P = INTEGRATOR_STATE(shadow_ray, P);
+ const float3 ray_D = INTEGRATOR_STATE(shadow_ray, D);
+ INTEGRATOR_STATE_WRITE(shadow_ray, P) = ray_offset(ray_P + last_hit_t * ray_D, ray_D);
+ INTEGRATOR_STATE_WRITE(shadow_ray, t) -= last_hit_t;
+ }
+
+ return false;
+}
+#endif /* __TRANSPARENT_SHADOWS__ */
+
+ccl_device void integrator_shade_shadow(INTEGRATOR_STATE_ARGS,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SETUP);
+ const int num_hits = INTEGRATOR_STATE(shadow_path, num_hits);
+
+#ifdef __TRANSPARENT_SHADOWS__
+ /* Evaluate transparent shadows. */
+ const bool opaque = integrate_transparent_shadow(INTEGRATOR_STATE_PASS, num_hits);
+ if (opaque) {
+ INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
+ return;
+ }
+#endif
+
+ if (shadow_intersections_has_remaining(num_hits)) {
+ /* More intersections to find, continue shadow ray. */
+ INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
+ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
+ return;
+ }
+ else {
+ kernel_accum_light(INTEGRATOR_STATE_PASS, render_buffer);
+ INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
+ return;
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_shade_surface.h b/intern/cycles/kernel/integrator/integrator_shade_surface.h
new file mode 100644
index 00000000000..73b7cad32be
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_shade_surface.h
@@ -0,0 +1,502 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/kernel_accumulate.h"
+#include "kernel/kernel_emission.h"
+#include "kernel/kernel_light.h"
+#include "kernel/kernel_passes.h"
+#include "kernel/kernel_path_state.h"
+#include "kernel/kernel_shader.h"
+
+#include "kernel/integrator/integrator_subsurface.h"
+#include "kernel/integrator/integrator_volume_stack.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_forceinline void integrate_surface_shader_setup(INTEGRATOR_STATE_CONST_ARGS,
+ ShaderData *sd)
+{
+ Intersection isect ccl_optional_struct_init;
+ integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect);
+
+ Ray ray ccl_optional_struct_init;
+ integrator_state_read_ray(INTEGRATOR_STATE_PASS, &ray);
+
+ shader_setup_from_ray(kg, sd, &ray, &isect);
+}
+
+#ifdef __HOLDOUT__
+ccl_device_forceinline bool integrate_surface_holdout(INTEGRATOR_STATE_CONST_ARGS,
+ ShaderData *sd,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ /* Write holdout transparency to render buffer and stop if fully holdout. */
+ const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+
+ if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
+ (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
+ const float3 holdout_weight = shader_holdout_apply(kg, sd);
+ if (kernel_data.background.transparent) {
+ const float3 throughput = INTEGRATOR_STATE(path, throughput);
+ const float transparent = average(holdout_weight * throughput);
+ kernel_accum_transparent(INTEGRATOR_STATE_PASS, transparent, render_buffer);
+ }
+ if (isequal_float3(holdout_weight, one_float3())) {
+ return false;
+ }
+ }
+
+ return true;
+}
+#endif /* __HOLDOUT__ */
+
+#ifdef __EMISSION__
+ccl_device_forceinline void integrate_surface_emission(INTEGRATOR_STATE_CONST_ARGS,
+ const ShaderData *sd,
+ ccl_global float *ccl_restrict
+ render_buffer)
+{
+ const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+
+ /* Evaluate emissive closure. */
+ float3 L = shader_emissive_eval(sd);
+
+# ifdef __HAIR__
+ if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) &&
+ (sd->type & PRIMITIVE_ALL_TRIANGLE))
+# else
+ if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS))
+# endif
+ {
+ const float bsdf_pdf = INTEGRATOR_STATE(path, mis_ray_pdf);
+ const float t = sd->ray_length + INTEGRATOR_STATE(path, mis_ray_t);
+
+ /* Multiple importance sampling, get triangle light pdf,
+ * and compute weight with respect to BSDF pdf. */
+ float pdf = triangle_light_pdf(kg, sd, t);
+ float mis_weight = power_heuristic(bsdf_pdf, pdf);
+
+ L *= mis_weight;
+ }
+
+ const float3 throughput = INTEGRATOR_STATE(path, throughput);
+ kernel_accum_emission(INTEGRATOR_STATE_PASS, throughput, L, render_buffer);
+}
+#endif /* __EMISSION__ */
+
+#ifdef __EMISSION__
+/* Path tracing: sample point on light and evaluate light shader, then
+ * queue shadow ray to be traced. */
+ccl_device_forceinline void integrate_surface_direct_light(INTEGRATOR_STATE_ARGS,
+ ShaderData *sd,
+ const RNGState *rng_state)
+{
+ /* Test if there is a light or BSDF that needs direct light. */
+ if (!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL))) {
+ return;
+ }
+
+ /* Sample position on a light. */
+ LightSample ls ccl_optional_struct_init;
+ {
+ const int path_flag = INTEGRATOR_STATE(path, flag);
+ const uint bounce = INTEGRATOR_STATE(path, bounce);
+ float light_u, light_v;
+ path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
+
+ if (!light_distribution_sample_from_position(
+ kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, &ls)) {
+ return;
+ }
+ }
+
+ kernel_assert(ls.pdf != 0.0f);
+
+ /* Evaluate light shader.
+ *
+ * TODO: can we reuse sd memory? In theory we can move this after
+ * integrate_surface_bounce, evaluate the BSDF, and only then evaluate
+ * the light shader. This could also move to its own kernel, for
+ * non-constant light sources. */
+ ShaderDataTinyStorage emission_sd_storage;
+ ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+ const float3 light_eval = light_sample_shader_eval(
+ INTEGRATOR_STATE_PASS, emission_sd, &ls, sd->time);
+ if (is_zero(light_eval)) {
+ return;
+ }
+
+ /* Evaluate BSDF. */
+ const bool is_transmission = shader_bsdf_is_transmission(sd, ls.D);
+
+ BsdfEval bsdf_eval ccl_optional_struct_init;
+ const float bsdf_pdf = shader_bsdf_eval(kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader);
+ bsdf_eval_mul3(&bsdf_eval, light_eval / ls.pdf);
+
+ if (ls.shader & SHADER_USE_MIS) {
+ const float mis_weight = power_heuristic(ls.pdf, bsdf_pdf);
+ bsdf_eval_mul(&bsdf_eval, mis_weight);
+ }
+
+ /* Path termination. */
+ const float terminate = path_state_rng_light_termination(kg, rng_state);
+ if (light_sample_terminate(kg, &ls, &bsdf_eval, terminate)) {
+ return;
+ }
+
+ /* Create shadow ray. */
+ Ray ray ccl_optional_struct_init;
+ light_sample_to_surface_shadow_ray(kg, sd, &ls, &ray);
+ const bool is_light = light_sample_is_light(&ls);
+
+ /* Copy volume stack and enter/exit volume. */
+ integrator_state_copy_volume_stack_to_shadow(INTEGRATOR_STATE_PASS);
+
+ if (is_transmission) {
+# ifdef __VOLUME__
+ shadow_volume_stack_enter_exit(INTEGRATOR_STATE_PASS, sd);
+# endif
+ }
+
+ /* Write shadow ray and associated state to global memory. */
+ integrator_state_write_shadow_ray(INTEGRATOR_STATE_PASS, &ray);
+
+ /* Copy state from main path to shadow path. */
+ const uint16_t bounce = INTEGRATOR_STATE(path, bounce);
+ const uint16_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce);
+ uint32_t shadow_flag = INTEGRATOR_STATE(path, flag);
+ shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
+ shadow_flag |= (is_transmission) ? PATH_RAY_TRANSMISSION_PASS : PATH_RAY_REFLECT_PASS;
+ const float3 throughput = INTEGRATOR_STATE(path, throughput) * bsdf_eval_sum(&bsdf_eval);
+
+ if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
+ const float3 diffuse_glossy_ratio = (bounce == 0) ?
+ bsdf_eval_diffuse_glossy_ratio(&bsdf_eval) :
+ INTEGRATOR_STATE(path, diffuse_glossy_ratio);
+ INTEGRATOR_STATE_WRITE(shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
+ }
+
+ INTEGRATOR_STATE_WRITE(shadow_path, flag) = shadow_flag;
+ INTEGRATOR_STATE_WRITE(shadow_path, bounce) = bounce;
+ INTEGRATOR_STATE_WRITE(shadow_path, transparent_bounce) = transparent_bounce;
+ INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput;
+
+ if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) {
+ INTEGRATOR_STATE_WRITE(shadow_path, unshadowed_throughput) = throughput;
+ }
+
+ /* Branch off shadow kernel. */
+ INTEGRATOR_SHADOW_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
+}
+#endif
+
+/* Path tracing: bounce off or through surface with new direction. */
+ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(INTEGRATOR_STATE_ARGS,
+ ShaderData *sd,
+ const RNGState *rng_state)
+{
+ /* Sample BSDF or BSSRDF. */
+ if (!(sd->flag & (SD_BSDF | SD_BSSRDF))) {
+ return LABEL_NONE;
+ }
+
+ float bsdf_u, bsdf_v;
+ path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+ const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u);
+
+#ifdef __SUBSURFACE__
+ /* BSSRDF closure, we schedule subsurface intersection kernel. */
+ if (CLOSURE_IS_BSSRDF(sc->type)) {
+ return subsurface_bounce(INTEGRATOR_STATE_PASS, sd, sc);
+ }
+#endif
+
+ /* BSDF closure, sample direction. */
+ float bsdf_pdf;
+ BsdfEval bsdf_eval ccl_optional_struct_init;
+ float3 bsdf_omega_in ccl_optional_struct_init;
+ differential3 bsdf_domega_in ccl_optional_struct_init;
+ int label;
+
+ label = shader_bsdf_sample_closure(
+ kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
+
+ if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) {
+ return LABEL_NONE;
+ }
+
+ /* Setup ray. Note that clipping works through transparent bounces. */
+ INTEGRATOR_STATE_WRITE(ray, P) = ray_offset(sd->P, (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng);
+ INTEGRATOR_STATE_WRITE(ray, D) = normalize(bsdf_omega_in);
+ INTEGRATOR_STATE_WRITE(ray, t) = (label & LABEL_TRANSPARENT) ?
+ INTEGRATOR_STATE(ray, t) - sd->ray_length :
+ FLT_MAX;
+
+#ifdef __RAY_DIFFERENTIALS__
+ INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP);
+ INTEGRATOR_STATE_WRITE(ray, dD) = differential_make_compact(bsdf_domega_in);
+#endif
+
+ /* Update throughput. */
+ float3 throughput = INTEGRATOR_STATE(path, throughput);
+ throughput *= bsdf_eval_sum(&bsdf_eval) / bsdf_pdf;
+ INTEGRATOR_STATE_WRITE(path, throughput) = throughput;
+
+ if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
+ if (INTEGRATOR_STATE(path, bounce) == 0) {
+ INTEGRATOR_STATE_WRITE(path,
+ diffuse_glossy_ratio) = bsdf_eval_diffuse_glossy_ratio(&bsdf_eval);
+ }
+ }
+
+ /* Update path state */
+ if (label & LABEL_TRANSPARENT) {
+ INTEGRATOR_STATE_WRITE(path, mis_ray_t) += sd->ray_length;
+ }
+ else {
+ INTEGRATOR_STATE_WRITE(path, mis_ray_pdf) = bsdf_pdf;
+ INTEGRATOR_STATE_WRITE(path, mis_ray_t) = 0.0f;
+ INTEGRATOR_STATE_WRITE(path, min_ray_pdf) = fminf(bsdf_pdf,
+ INTEGRATOR_STATE(path, min_ray_pdf));
+ }
+
+ path_state_next(INTEGRATOR_STATE_PASS, label);
+ return label;
+}
+
+#ifdef __VOLUME__
+ccl_device_forceinline bool integrate_surface_volume_only_bounce(INTEGRATOR_STATE_ARGS,
+ ShaderData *sd)
+{
+ if (!path_state_volume_next(INTEGRATOR_STATE_PASS)) {
+ return LABEL_NONE;
+ }
+
+ /* Setup ray position, direction stays unchanged. */
+ INTEGRATOR_STATE_WRITE(ray, P) = ray_offset(sd->P, -sd->Ng);
+
+ /* Clipping works through transparent. */
+ INTEGRATOR_STATE_WRITE(ray, t) -= sd->ray_length;
+
+# ifdef __RAY_DIFFERENTIALS__
+ INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP);
+# endif
+
+ INTEGRATOR_STATE_WRITE(path, mis_ray_t) += sd->ray_length;
+
+ return LABEL_TRANSMIT | LABEL_TRANSPARENT;
+}
+#endif
+
+#if defined(__AO__) && defined(__SHADER_RAYTRACE__)
+ccl_device_forceinline void integrate_surface_ao_pass(INTEGRATOR_STATE_CONST_ARGS,
+ const ShaderData *ccl_restrict sd,
+ const RNGState *ccl_restrict rng_state,
+ ccl_global float *ccl_restrict render_buffer)
+{
+# ifdef __KERNEL_OPTIX__
+ optixDirectCall<void>(2, INTEGRATOR_STATE_PASS, sd, rng_state, render_buffer);
+}
+
+extern "C" __device__ void __direct_callable__ao_pass(INTEGRATOR_STATE_CONST_ARGS,
+ const ShaderData *ccl_restrict sd,
+ const RNGState *ccl_restrict rng_state,
+ ccl_global float *ccl_restrict render_buffer)
+{
+# endif /* __KERNEL_OPTIX__ */
+ float bsdf_u, bsdf_v;
+ path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+
+ const float3 ao_N = shader_bsdf_ao_normal(kg, sd);
+ float3 ao_D;
+ float ao_pdf;
+ sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+
+ if (dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
+ Ray ray ccl_optional_struct_init;
+ ray.P = ray_offset(sd->P, sd->Ng);
+ ray.D = ao_D;
+ ray.t = kernel_data.integrator.ao_bounces_distance;
+ ray.time = sd->time;
+ ray.dP = differential_zero_compact();
+ ray.dD = differential_zero_compact();
+
+ Intersection isect ccl_optional_struct_init;
+ if (!scene_intersect(kg, &ray, PATH_RAY_SHADOW_OPAQUE, &isect)) {
+ ccl_global float *buffer = kernel_pass_pixel_render_buffer(INTEGRATOR_STATE_PASS,
+ render_buffer);
+ const float3 throughput = INTEGRATOR_STATE(path, throughput);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, throughput);
+ }
+ }
+}
+#endif /* defined(__AO__) && defined(__SHADER_RAYTRACE__) */
+
+template<uint node_feature_mask>
+ccl_device bool integrate_surface(INTEGRATOR_STATE_ARGS,
+ ccl_global float *ccl_restrict render_buffer)
+
+{
+ PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_SURFACE_SETUP);
+
+ /* Setup shader data. */
+ ShaderData sd;
+ integrate_surface_shader_setup(INTEGRATOR_STATE_PASS, &sd);
+ PROFILING_SHADER(sd.object, sd.shader);
+
+ int continue_path_label = 0;
+
+ /* Skip most work for volume bounding surface. */
+#ifdef __VOLUME__
+ if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+#endif
+
+ {
+ const int path_flag = INTEGRATOR_STATE(path, flag);
+#ifdef __SUBSURFACE__
+ /* Can skip shader evaluation for BSSRDF exit point without bump mapping. */
+ if (!(path_flag & PATH_RAY_SUBSURFACE) || ((sd.flag & SD_HAS_BSSRDF_BUMP)))
+#endif
+ {
+ /* Evaluate shader. */
+ PROFILING_EVENT(PROFILING_SHADE_SURFACE_EVAL);
+ shader_eval_surface<node_feature_mask>(
+ INTEGRATOR_STATE_PASS, &sd, render_buffer, path_flag);
+ }
+ }
+
+#ifdef __SUBSURFACE__
+ if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SUBSURFACE) {
+ /* When coming from inside subsurface scattering, setup a diffuse
+ * closure to perform lighting at the exit point. */
+ INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_SUBSURFACE;
+ subsurface_shader_data_setup(INTEGRATOR_STATE_PASS, &sd);
+ }
+#endif
+
+ shader_prepare_surface_closures(INTEGRATOR_STATE_PASS, &sd);
+
+#ifdef __HOLDOUT__
+ /* Evaluate holdout. */
+ if (!integrate_surface_holdout(INTEGRATOR_STATE_PASS, &sd, render_buffer)) {
+ return false;
+ }
+#endif
+
+#ifdef __EMISSION__
+ /* Write emission. */
+ if (sd.flag & SD_EMISSION) {
+ integrate_surface_emission(INTEGRATOR_STATE_PASS, &sd, render_buffer);
+ }
+#endif
+
+#ifdef __PASSES__
+ /* Write render passes. */
+ PROFILING_EVENT(PROFILING_SHADE_SURFACE_PASSES);
+ kernel_write_data_passes(INTEGRATOR_STATE_PASS, &sd, render_buffer);
+#endif
+
+ /* Load random number state. */
+ RNGState rng_state;
+ path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+
+ /* Perform path termination. Most paths have already been terminated in
+ * the intersect_closest kernel, this is just for emission and for dividing
+ * throughput by the probability at the right moment. */
+ const int path_flag = INTEGRATOR_STATE(path, flag);
+ const float probability = (path_flag & PATH_RAY_TERMINATE_ON_NEXT_SURFACE) ?
+ 0.0f :
+ path_state_continuation_probability(INTEGRATOR_STATE_PASS,
+ path_flag);
+ if (probability == 0.0f) {
+ return false;
+ }
+ else if (probability != 1.0f) {
+ INTEGRATOR_STATE_WRITE(path, throughput) /= probability;
+ }
+
+#ifdef __DENOISING_FEATURES__
+ kernel_write_denoising_features_surface(INTEGRATOR_STATE_PASS, &sd, render_buffer);
+#endif
+
+#ifdef __SHADOW_CATCHER__
+ kernel_write_shadow_catcher_bounce_data(INTEGRATOR_STATE_PASS, &sd, render_buffer);
+#endif
+
+ /* Direct light. */
+ PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT);
+ integrate_surface_direct_light(INTEGRATOR_STATE_PASS, &sd, &rng_state);
+
+#if defined(__AO__) && defined(__SHADER_RAYTRACE__)
+ /* Ambient occlusion pass. */
+ if (node_feature_mask & KERNEL_FEATURE_NODE_RAYTRACE) {
+ if ((kernel_data.film.pass_ao != PASS_UNUSED) &&
+ (INTEGRATOR_STATE(path, flag) & PATH_RAY_CAMERA)) {
+ PROFILING_EVENT(PROFILING_SHADE_SURFACE_AO);
+ integrate_surface_ao_pass(INTEGRATOR_STATE_PASS, &sd, &rng_state, render_buffer);
+ }
+ }
+#endif
+
+ PROFILING_EVENT(PROFILING_SHADE_SURFACE_INDIRECT_LIGHT);
+ continue_path_label = integrate_surface_bsdf_bssrdf_bounce(
+ INTEGRATOR_STATE_PASS, &sd, &rng_state);
+#ifdef __VOLUME__
+ }
+ else {
+ PROFILING_EVENT(PROFILING_SHADE_SURFACE_INDIRECT_LIGHT);
+ continue_path_label = integrate_surface_volume_only_bounce(INTEGRATOR_STATE_PASS, &sd);
+ }
+
+ if (continue_path_label & LABEL_TRANSMIT) {
+ /* Enter/Exit volume. */
+ volume_stack_enter_exit(INTEGRATOR_STATE_PASS, &sd);
+ }
+#endif
+
+ return continue_path_label != 0;
+}
+
+template<uint node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE,
+ int current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
+ccl_device_forceinline void integrator_shade_surface(INTEGRATOR_STATE_ARGS,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ if (integrate_surface<node_feature_mask>(INTEGRATOR_STATE_PASS, render_buffer)) {
+ if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SUBSURFACE) {
+ INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
+ }
+ else {
+ kernel_assert(INTEGRATOR_STATE(ray, t) != 0.0f);
+ INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+ }
+ }
+ else {
+ INTEGRATOR_PATH_TERMINATE(current_kernel);
+ }
+}
+
+ccl_device_forceinline void integrator_shade_surface_raytrace(
+ INTEGRATOR_STATE_ARGS, ccl_global float *ccl_restrict render_buffer)
+{
+ integrator_shade_surface<KERNEL_FEATURE_NODE_MASK_SURFACE,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE>(INTEGRATOR_STATE_PASS,
+ render_buffer);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h
new file mode 100644
index 00000000000..4a864b1e6ce
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_shade_volume.h
@@ -0,0 +1,1015 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/kernel_accumulate.h"
+#include "kernel/kernel_emission.h"
+#include "kernel/kernel_light.h"
+#include "kernel/kernel_passes.h"
+#include "kernel/kernel_path_state.h"
+#include "kernel/kernel_shader.h"
+
+#include "kernel/integrator/integrator_intersect_closest.h"
+#include "kernel/integrator/integrator_volume_stack.h"
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __VOLUME__
+
+/* Events for probalistic scattering */
+
+typedef enum VolumeIntegrateEvent {
+ VOLUME_PATH_SCATTERED = 0,
+ VOLUME_PATH_ATTENUATED = 1,
+ VOLUME_PATH_MISSED = 2
+} VolumeIntegrateEvent;
+
+typedef struct VolumeIntegrateResult {
+ /* Throughput and offset for direct light scattering. */
+ bool direct_scatter;
+ float3 direct_throughput;
+ float direct_t;
+ ShaderVolumePhases direct_phases;
+
+ /* Throughput and offset for indirect light scattering. */
+ bool indirect_scatter;
+ float3 indirect_throughput;
+ float indirect_t;
+ ShaderVolumePhases indirect_phases;
+} VolumeIntegrateResult;
+
+/* Ignore paths that have volume throughput below this value, to avoid unnecessary work
+ * and precision issues.
+ * todo: this value could be tweaked or turned into a probability to avoid unnecessary
+ * work in volumes and subsurface scattering. */
+# define VOLUME_THROUGHPUT_EPSILON 1e-6f
+
+/* Volume shader properties
+ *
+ * extinction coefficient = absorption coefficient + scattering coefficient
+ * sigma_t = sigma_a + sigma_s */
+
+typedef struct VolumeShaderCoefficients {
+ float3 sigma_t;
+ float3 sigma_s;
+ float3 emission;
+} VolumeShaderCoefficients;
+
+/* Evaluate shader to get extinction coefficient at P. */
+ccl_device_inline bool shadow_volume_shader_sample(INTEGRATOR_STATE_ARGS,
+ ShaderData *ccl_restrict sd,
+ float3 *ccl_restrict extinction)
+{
+ shader_eval_volume(INTEGRATOR_STATE_PASS, sd, PATH_RAY_SHADOW, [=](const int i) {
+ return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i);
+ });
+
+ if (!(sd->flag & SD_EXTINCTION)) {
+ return false;
+ }
+
+ const float density = object_volume_density(kg, sd->object);
+ *extinction = sd->closure_transparent_extinction * density;
+ return true;
+}
+
+/* Evaluate shader to get absorption, scattering and emission at P. */
+ccl_device_inline bool volume_shader_sample(INTEGRATOR_STATE_ARGS,
+ ShaderData *ccl_restrict sd,
+ VolumeShaderCoefficients *coeff)
+{
+ const int path_flag = INTEGRATOR_STATE(path, flag);
+ shader_eval_volume(INTEGRATOR_STATE_PASS, sd, path_flag, [=](const int i) {
+ return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
+ });
+
+ if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) {
+ return false;
+ }
+
+ coeff->sigma_s = zero_float3();
+ coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction : zero_float3();
+ coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background : zero_float3();
+
+ if (sd->flag & SD_SCATTER) {
+ for (int i = 0; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_VOLUME(sc->type)) {
+ coeff->sigma_s += sc->weight;
+ }
+ }
+ }
+
+ const float density = object_volume_density(kg, sd->object);
+ coeff->sigma_s *= density;
+ coeff->sigma_t *= density;
+ coeff->emission *= density;
+
+ return true;
+}
+
+ccl_device_forceinline void volume_step_init(const KernelGlobals *kg,
+ const RNGState *rng_state,
+ const float object_step_size,
+ float t,
+ float *step_size,
+ float *step_shade_offset,
+ float *steps_offset,
+ int *max_steps)
+{
+ if (object_step_size == FLT_MAX) {
+ /* Homogeneous volume. */
+ *step_size = t;
+ *step_shade_offset = 0.0f;
+ *steps_offset = 1.0f;
+ *max_steps = 1;
+ }
+ else {
+ /* Heterogeneous volume. */
+ *max_steps = kernel_data.integrator.volume_max_steps;
+ float step = min(object_step_size, t);
+
+ /* compute exact steps in advance for malloc */
+ if (t > *max_steps * step) {
+ step = t / (float)*max_steps;
+ }
+
+ *step_size = step;
+
+ /* Perform shading at this offset within a step, to integrate over
+ * over the entire step segment. */
+ *step_shade_offset = path_state_rng_1D_hash(kg, rng_state, 0x1e31d8a4);
+
+ /* Shift starting point of all segment by this random amount to avoid
+ * banding artifacts from the volume bounding shape. */
+ *steps_offset = path_state_rng_1D_hash(kg, rng_state, 0x3d22c7b3);
+ }
+}
+
+/* Volume Shadows
+ *
+ * These functions are used to attenuate shadow rays to lights. Both absorption
+ * and scattering will block light, represented by the extinction coefficient. */
+
+# if 0
+/* homogeneous volume: assume shader evaluation at the starts gives
+ * the extinction coefficient for the entire line segment */
+ccl_device void volume_shadow_homogeneous(INTEGRATOR_STATE_ARGS,
+ Ray *ccl_restrict ray,
+ ShaderData *ccl_restrict sd,
+ float3 *ccl_restrict throughput)
+{
+ float3 sigma_t = zero_float3();
+
+ if (shadow_volume_shader_sample(INTEGRATOR_STATE_PASS, sd, &sigma_t)) {
+ *throughput *= volume_color_transmittance(sigma_t, ray->t);
+ }
+}
+# endif
+
+/* heterogeneous volume: integrate stepping through the volume until we
+ * reach the end, get absorbed entirely, or run out of iterations */
+ccl_device void volume_shadow_heterogeneous(INTEGRATOR_STATE_ARGS,
+ Ray *ccl_restrict ray,
+ ShaderData *ccl_restrict sd,
+ float3 *ccl_restrict throughput,
+ const float object_step_size)
+{
+ /* Load random number state. */
+ RNGState rng_state;
+ shadow_path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+
+ float3 tp = *throughput;
+
+ /* Prepare for stepping.
+ * For shadows we do not offset all segments, since the starting point is
+ * already a random distance inside the volume. It also appears to create
+ * banding artifacts for unknown reasons. */
+ int max_steps;
+ float step_size, step_shade_offset, unused;
+ volume_step_init(kg,
+ &rng_state,
+ object_step_size,
+ ray->t,
+ &step_size,
+ &step_shade_offset,
+ &unused,
+ &max_steps);
+ const float steps_offset = 1.0f;
+
+ /* compute extinction at the start */
+ float t = 0.0f;
+
+ float3 sum = zero_float3();
+
+ for (int i = 0; i < max_steps; i++) {
+ /* advance to new position */
+ float new_t = min(ray->t, (i + steps_offset) * step_size);
+ float dt = new_t - t;
+
+ float3 new_P = ray->P + ray->D * (t + dt * step_shade_offset);
+ float3 sigma_t = zero_float3();
+
+ /* compute attenuation over segment */
+ sd->P = new_P;
+ if (shadow_volume_shader_sample(INTEGRATOR_STATE_PASS, sd, &sigma_t)) {
+ /* Compute expf() only for every Nth step, to save some calculations
+ * because exp(a)*exp(b) = exp(a+b), also do a quick VOLUME_THROUGHPUT_EPSILON
+ * check then. */
+ sum += (-sigma_t * dt);
+ if ((i & 0x07) == 0) { /* ToDo: Other interval? */
+ tp = *throughput * exp3(sum);
+
+ /* stop if nearly all light is blocked */
+ if (tp.x < VOLUME_THROUGHPUT_EPSILON && tp.y < VOLUME_THROUGHPUT_EPSILON &&
+ tp.z < VOLUME_THROUGHPUT_EPSILON)
+ break;
+ }
+ }
+
+ /* stop if at the end of the volume */
+ t = new_t;
+ if (t == ray->t) {
+ /* Update throughput in case we haven't done it above */
+ tp = *throughput * exp3(sum);
+ break;
+ }
+ }
+
+ *throughput = tp;
+}
+
+/* Equi-angular sampling as in:
+ * "Importance Sampling Techniques for Path Tracing in Participating Media" */
+
+ccl_device float volume_equiangular_sample(const Ray *ccl_restrict ray,
+ const float3 light_P,
+ const float xi,
+ float *pdf)
+{
+ const float t = ray->t;
+ const float delta = dot((light_P - ray->P), ray->D);
+ const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+ if (UNLIKELY(D == 0.0f)) {
+ *pdf = 0.0f;
+ return 0.0f;
+ }
+ const float theta_a = -atan2f(delta, D);
+ const float theta_b = atan2f(t - delta, D);
+ const float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
+ if (UNLIKELY(theta_b == theta_a)) {
+ *pdf = 0.0f;
+ return 0.0f;
+ }
+ *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
+
+ return min(t, delta + t_); /* min is only for float precision errors */
+}
+
+ccl_device float volume_equiangular_pdf(const Ray *ccl_restrict ray,
+ const float3 light_P,
+ const float sample_t)
+{
+ const float delta = dot((light_P - ray->P), ray->D);
+ const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+ if (UNLIKELY(D == 0.0f)) {
+ return 0.0f;
+ }
+
+ const float t = ray->t;
+ const float t_ = sample_t - delta;
+
+ const float theta_a = -atan2f(delta, D);
+ const float theta_b = atan2f(t - delta, D);
+ if (UNLIKELY(theta_b == theta_a)) {
+ return 0.0f;
+ }
+
+ const float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
+
+ return pdf;
+}
+
+ccl_device float volume_equiangular_cdf(const Ray *ccl_restrict ray,
+ const float3 light_P,
+ const float sample_t)
+{
+ float delta = dot((light_P - ray->P), ray->D);
+ float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+ if (UNLIKELY(D == 0.0f)) {
+ return 0.0f;
+ }
+
+ const float t = ray->t;
+ const float t_ = sample_t - delta;
+
+ const float theta_a = -atan2f(delta, D);
+ const float theta_b = atan2f(t - delta, D);
+ if (UNLIKELY(theta_b == theta_a)) {
+ return 0.0f;
+ }
+
+ const float theta_sample = atan2f(t_, D);
+ const float cdf = (theta_sample - theta_a) / (theta_b - theta_a);
+
+ return cdf;
+}
+
+/* Distance sampling */
+
+ccl_device float volume_distance_sample(
+ float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
+{
+ /* xi is [0, 1[ so log(0) should never happen, division by zero is
+ * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
+ float sample_sigma_t = volume_channel_get(sigma_t, channel);
+ float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
+ float sample_transmittance = volume_channel_get(full_transmittance, channel);
+
+ float sample_t = min(max_t, -logf(1.0f - xi * (1.0f - sample_transmittance)) / sample_sigma_t);
+
+ *transmittance = volume_color_transmittance(sigma_t, sample_t);
+ *pdf = safe_divide_color(sigma_t * *transmittance, one_float3() - full_transmittance);
+
+ /* todo: optimization: when taken together with hit/miss decision,
+ * the full_transmittance cancels out drops out and xi does not
+ * need to be remapped */
+
+ return sample_t;
+}
+
+ccl_device float3 volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
+{
+ float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
+ float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
+
+ return safe_divide_color(sigma_t * transmittance, one_float3() - full_transmittance);
+}
+
+/* Emission */
+
+ccl_device float3 volume_emission_integrate(VolumeShaderCoefficients *coeff,
+ int closure_flag,
+ float3 transmittance,
+ float t)
+{
+ /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
+ * this goes to E * t as sigma_t goes to zero
+ *
+ * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
+ float3 emission = coeff->emission;
+
+ if (closure_flag & SD_EXTINCTION) {
+ float3 sigma_t = coeff->sigma_t;
+
+ emission.x *= (sigma_t.x > 0.0f) ? (1.0f - transmittance.x) / sigma_t.x : t;
+ emission.y *= (sigma_t.y > 0.0f) ? (1.0f - transmittance.y) / sigma_t.y : t;
+ emission.z *= (sigma_t.z > 0.0f) ? (1.0f - transmittance.z) / sigma_t.z : t;
+ }
+ else
+ emission *= t;
+
+ return emission;
+}
+
+/* Volume Integration */
+
+typedef struct VolumeIntegrateState {
+ /* Volume segment extents. */
+ float start_t;
+ float end_t;
+
+ /* If volume is absorption-only up to this point, and no probabilistic
+ * scattering or termination has been used yet. */
+ bool absorption_only;
+
+ /* Random numbers for scattering. */
+ float rscatter;
+ float rphase;
+
+ /* Multiple importance sampling. */
+ VolumeSampleMethod direct_sample_method;
+ bool use_mis;
+ float distance_pdf;
+ float equiangular_pdf;
+} VolumeIntegrateState;
+
+ccl_device_forceinline void volume_integrate_step_scattering(
+ const ShaderData *sd,
+ const Ray *ray,
+ const float3 equiangular_light_P,
+ const VolumeShaderCoefficients &ccl_restrict coeff,
+ const float3 transmittance,
+ VolumeIntegrateState &ccl_restrict vstate,
+ VolumeIntegrateResult &ccl_restrict result)
+{
+ /* Pick random color channel, we use the Veach one-sample
+ * model with balance heuristic for the channels. */
+ const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+ float3 channel_pdf;
+ const int channel = volume_sample_channel(
+ albedo, result.indirect_throughput, vstate.rphase, &channel_pdf);
+
+ /* Equiangular sampling for direct lighting. */
+ if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR && !result.direct_scatter) {
+ if (result.direct_t >= vstate.start_t && result.direct_t <= vstate.end_t) {
+ const float new_dt = result.direct_t - vstate.start_t;
+ const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
+
+ result.direct_scatter = true;
+ result.direct_throughput *= coeff.sigma_s * new_transmittance / vstate.equiangular_pdf;
+ shader_copy_volume_phases(&result.direct_phases, sd);
+
+ /* Multiple importance sampling. */
+ if (vstate.use_mis) {
+ const float distance_pdf = vstate.distance_pdf *
+ dot(channel_pdf, coeff.sigma_t * new_transmittance);
+ const float mis_weight = 2.0f * power_heuristic(vstate.equiangular_pdf, distance_pdf);
+ result.direct_throughput *= mis_weight;
+ }
+ }
+ else {
+ result.direct_throughput *= transmittance;
+ vstate.distance_pdf *= dot(channel_pdf, transmittance);
+ }
+ }
+
+ /* Distance sampling for indirect and optional direct lighting. */
+ if (!result.indirect_scatter) {
+ /* decide if we will scatter or continue */
+ const float sample_transmittance = volume_channel_get(transmittance, channel);
+
+ if (1.0f - vstate.rscatter >= sample_transmittance) {
+ /* compute sampling distance */
+ const float sample_sigma_t = volume_channel_get(coeff.sigma_t, channel);
+ const float new_dt = -logf(1.0f - vstate.rscatter) / sample_sigma_t;
+ const float new_t = vstate.start_t + new_dt;
+
+ /* transmittance and pdf */
+ const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
+ const float distance_pdf = dot(channel_pdf, coeff.sigma_t * new_transmittance);
+
+ /* throughput */
+ result.indirect_scatter = true;
+ result.indirect_t = new_t;
+ result.indirect_throughput *= coeff.sigma_s * new_transmittance / distance_pdf;
+ shader_copy_volume_phases(&result.indirect_phases, sd);
+
+ if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) {
+ /* If using distance sampling for direct light, just copy parameters
+ * of indirect light since we scatter at the same point then. */
+ result.direct_scatter = true;
+ result.direct_t = result.indirect_t;
+ result.direct_throughput = result.indirect_throughput;
+ shader_copy_volume_phases(&result.direct_phases, sd);
+
+ /* Multiple importance sampling. */
+ if (vstate.use_mis) {
+ const float equiangular_pdf = volume_equiangular_pdf(ray, equiangular_light_P, new_t);
+ const float mis_weight = power_heuristic(vstate.distance_pdf * distance_pdf,
+ equiangular_pdf);
+ result.direct_throughput *= 2.0f * mis_weight;
+ }
+ }
+ }
+ else {
+ /* throughput */
+ const float pdf = dot(channel_pdf, transmittance);
+ result.indirect_throughput *= transmittance / pdf;
+ if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) {
+ vstate.distance_pdf *= pdf;
+ }
+
+ /* remap rscatter so we can reuse it and keep thing stratified */
+ vstate.rscatter = 1.0f - (1.0f - vstate.rscatter) / sample_transmittance;
+ }
+ }
+}
+
+/* heterogeneous volume distance sampling: integrate stepping through the
+ * volume until we reach the end, get absorbed entirely, or run out of
+ * iterations. this does probabilistically scatter or get transmitted through
+ * for path tracing where we don't want to branch. */
+ccl_device_forceinline void volume_integrate_heterogeneous(
+ INTEGRATOR_STATE_ARGS,
+ Ray *ccl_restrict ray,
+ ShaderData *ccl_restrict sd,
+ const RNGState *rng_state,
+ ccl_global float *ccl_restrict render_buffer,
+ const float object_step_size,
+ const VolumeSampleMethod direct_sample_method,
+ const float3 equiangular_light_P,
+ VolumeIntegrateResult &result)
+{
+ PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INTEGRATE);
+
+ /* Prepare for stepping.
+ * Using a different step offset for the first step avoids banding artifacts. */
+ int max_steps;
+ float step_size, step_shade_offset, steps_offset;
+ volume_step_init(kg,
+ rng_state,
+ object_step_size,
+ ray->t,
+ &step_size,
+ &step_shade_offset,
+ &steps_offset,
+ &max_steps);
+
+ /* Initialize volume integration state. */
+ VolumeIntegrateState vstate ccl_optional_struct_init;
+ vstate.start_t = 0.0f;
+ vstate.end_t = 0.0f;
+ vstate.absorption_only = true;
+ vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE);
+ vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL);
+
+ /* Multiple importance sampling: pick between equiangular and distance sampling strategy. */
+ vstate.direct_sample_method = direct_sample_method;
+ vstate.use_mis = (direct_sample_method == VOLUME_SAMPLE_MIS);
+ if (vstate.use_mis) {
+ if (vstate.rscatter < 0.5f) {
+ vstate.rscatter *= 2.0f;
+ vstate.direct_sample_method = VOLUME_SAMPLE_DISTANCE;
+ }
+ else {
+ vstate.rscatter = (vstate.rscatter - 0.5f) * 2.0f;
+ vstate.direct_sample_method = VOLUME_SAMPLE_EQUIANGULAR;
+ }
+ }
+ vstate.equiangular_pdf = 0.0f;
+ vstate.distance_pdf = 1.0f;
+
+ /* Initialize volume integration result. */
+ const float3 throughput = INTEGRATOR_STATE(path, throughput);
+ result.direct_throughput = throughput;
+ result.indirect_throughput = throughput;
+
+ /* Equiangular sampling: compute distance and PDF in advance. */
+ if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR) {
+ result.direct_t = volume_equiangular_sample(
+ ray, equiangular_light_P, vstate.rscatter, &vstate.equiangular_pdf);
+ }
+
+# ifdef __DENOISING_FEATURES__
+ const bool write_denoising_features = (INTEGRATOR_STATE(path, flag) &
+ PATH_RAY_DENOISING_FEATURES);
+ float3 accum_albedo = zero_float3();
+# endif
+ float3 accum_emission = zero_float3();
+
+ for (int i = 0; i < max_steps; i++) {
+ /* Advance to new position */
+ vstate.end_t = min(ray->t, (i + steps_offset) * step_size);
+ const float shade_t = vstate.start_t + (vstate.end_t - vstate.start_t) * step_shade_offset;
+ sd->P = ray->P + ray->D * shade_t;
+
+ /* compute segment */
+ VolumeShaderCoefficients coeff ccl_optional_struct_init;
+ if (volume_shader_sample(INTEGRATOR_STATE_PASS, sd, &coeff)) {
+ const int closure_flag = sd->flag;
+
+ /* Evaluate transmittance over segment. */
+ const float dt = (vstate.end_t - vstate.start_t);
+ const float3 transmittance = (closure_flag & SD_EXTINCTION) ?
+ volume_color_transmittance(coeff.sigma_t, dt) :
+ one_float3();
+
+ /* Emission. */
+ if (closure_flag & SD_EMISSION) {
+ /* Only write emission before indirect light scatter position, since we terminate
+ * stepping at that point if we have already found a direct light scatter position. */
+ if (!result.indirect_scatter) {
+ const float3 emission = volume_emission_integrate(
+ &coeff, closure_flag, transmittance, dt);
+ accum_emission += emission;
+ }
+ }
+
+ if (closure_flag & SD_EXTINCTION) {
+ if ((closure_flag & SD_SCATTER) || !vstate.absorption_only) {
+# ifdef __DENOISING_FEATURES__
+ /* Accumulate albedo for denoising features. */
+ if (write_denoising_features && (closure_flag & SD_SCATTER)) {
+ const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+ accum_albedo += result.indirect_throughput * albedo * (one_float3() - transmittance);
+ }
+# endif
+
+ /* Scattering and absorption. */
+ volume_integrate_step_scattering(
+ sd, ray, equiangular_light_P, coeff, transmittance, vstate, result);
+ }
+ else {
+ /* Absorption only. */
+ result.indirect_throughput *= transmittance;
+ result.direct_throughput *= transmittance;
+ }
+
+ /* Stop if nearly all light blocked. */
+ if (!result.indirect_scatter) {
+ if (max3(result.indirect_throughput) < VOLUME_THROUGHPUT_EPSILON) {
+ result.indirect_throughput = zero_float3();
+ break;
+ }
+ }
+ else if (!result.direct_scatter) {
+ if (max3(result.direct_throughput) < VOLUME_THROUGHPUT_EPSILON) {
+ break;
+ }
+ }
+ }
+
+ /* If we have scattering data for both direct and indirect, we're done. */
+ if (result.direct_scatter && result.indirect_scatter) {
+ break;
+ }
+ }
+
+ /* Stop if at the end of the volume. */
+ vstate.start_t = vstate.end_t;
+ if (vstate.start_t == ray->t) {
+ break;
+ }
+ }
+
+ /* Write accumulated emisison. */
+ if (!is_zero(accum_emission)) {
+ kernel_accum_emission(
+ INTEGRATOR_STATE_PASS, result.indirect_throughput, accum_emission, render_buffer);
+ }
+
+# ifdef __DENOISING_FEATURES__
+ /* Write denoising features. */
+ if (write_denoising_features) {
+ kernel_write_denoising_features_volume(
+ INTEGRATOR_STATE_PASS, accum_albedo, result.indirect_scatter, render_buffer);
+ }
+# endif /* __DENOISING_FEATURES__ */
+}
+
+# ifdef __EMISSION__
+/* Path tracing: sample point on light and evaluate light shader, then
+ * queue shadow ray to be traced. */
+ccl_device_forceinline bool integrate_volume_sample_light(INTEGRATOR_STATE_ARGS,
+ const ShaderData *ccl_restrict sd,
+ const RNGState *ccl_restrict rng_state,
+ LightSample *ccl_restrict ls)
+{
+ /* Test if there is a light or BSDF that needs direct light. */
+ if (!kernel_data.integrator.use_direct_light) {
+ return false;
+ }
+
+ /* Sample position on a light. */
+ const int path_flag = INTEGRATOR_STATE(path, flag);
+ const uint bounce = INTEGRATOR_STATE(path, bounce);
+ float light_u, light_v;
+ path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
+
+ light_distribution_sample_from_volume_segment(
+ kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, ls);
+
+ if (ls->shader & SHADER_EXCLUDE_SCATTER) {
+ return false;
+ }
+
+ return true;
+}
+
+/* Path tracing: sample point on light and evaluate light shader, then
+ * queue shadow ray to be traced. */
+ccl_device_forceinline void integrate_volume_direct_light(INTEGRATOR_STATE_ARGS,
+ const ShaderData *ccl_restrict sd,
+ const RNGState *ccl_restrict rng_state,
+ const float3 P,
+ const ShaderVolumePhases *ccl_restrict
+ phases,
+ const float3 throughput,
+ LightSample *ccl_restrict ls)
+{
+ PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT);
+
+ if (!kernel_data.integrator.use_direct_light) {
+ return;
+ }
+
+ /* Sample position on the same light again, now from the shading
+ * point where we scattered.
+ *
+ * TODO: decorrelate random numbers and use light_sample_new_position to
+ * avoid resampling the CDF. */
+ {
+ const int path_flag = INTEGRATOR_STATE(path, flag);
+ const uint bounce = INTEGRATOR_STATE(path, bounce);
+ float light_u, light_v;
+ path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
+
+ if (!light_distribution_sample_from_position(
+ kg, light_u, light_v, sd->time, P, bounce, path_flag, ls)) {
+ return;
+ }
+ }
+
+ /* Evaluate light shader.
+ *
+ * TODO: can we reuse sd memory? In theory we can move this after
+ * integrate_surface_bounce, evaluate the BSDF, and only then evaluate
+ * the light shader. This could also move to its own kernel, for
+ * non-constant light sources. */
+ ShaderDataTinyStorage emission_sd_storage;
+ ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+ const float3 light_eval = light_sample_shader_eval(
+ INTEGRATOR_STATE_PASS, emission_sd, ls, sd->time);
+ if (is_zero(light_eval)) {
+ return;
+ }
+
+ /* Evaluate BSDF. */
+ BsdfEval phase_eval ccl_optional_struct_init;
+ const float phase_pdf = shader_volume_phase_eval(kg, sd, phases, ls->D, &phase_eval);
+
+ if (ls->shader & SHADER_USE_MIS) {
+ float mis_weight = power_heuristic(ls->pdf, phase_pdf);
+ bsdf_eval_mul(&phase_eval, mis_weight);
+ }
+
+ bsdf_eval_mul3(&phase_eval, light_eval / ls->pdf);
+
+ /* Path termination. */
+ const float terminate = path_state_rng_light_termination(kg, rng_state);
+ if (light_sample_terminate(kg, ls, &phase_eval, terminate)) {
+ return;
+ }
+
+ /* Create shadow ray. */
+ Ray ray ccl_optional_struct_init;
+ light_sample_to_volume_shadow_ray(kg, sd, ls, P, &ray);
+ const bool is_light = light_sample_is_light(ls);
+
+ /* Write shadow ray and associated state to global memory. */
+ integrator_state_write_shadow_ray(INTEGRATOR_STATE_PASS, &ray);
+
+ /* Copy state from main path to shadow path. */
+ const uint16_t bounce = INTEGRATOR_STATE(path, bounce);
+ const uint16_t transparent_bounce = INTEGRATOR_STATE(path, transparent_bounce);
+ uint32_t shadow_flag = INTEGRATOR_STATE(path, flag);
+ shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
+ shadow_flag |= PATH_RAY_VOLUME_PASS;
+ const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval);
+
+ if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
+ const float3 diffuse_glossy_ratio = (bounce == 0) ?
+ one_float3() :
+ INTEGRATOR_STATE(path, diffuse_glossy_ratio);
+ INTEGRATOR_STATE_WRITE(shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
+ }
+
+ INTEGRATOR_STATE_WRITE(shadow_path, flag) = shadow_flag;
+ INTEGRATOR_STATE_WRITE(shadow_path, bounce) = bounce;
+ INTEGRATOR_STATE_WRITE(shadow_path, transparent_bounce) = transparent_bounce;
+ INTEGRATOR_STATE_WRITE(shadow_path, throughput) = throughput_phase;
+
+ if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) {
+ INTEGRATOR_STATE_WRITE(shadow_path, unshadowed_throughput) = throughput;
+ }
+
+ integrator_state_copy_volume_stack_to_shadow(INTEGRATOR_STATE_PASS);
+
+ /* Branch off shadow kernel. */
+ INTEGRATOR_SHADOW_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
+}
+# endif
+
+/* Path tracing: scatter in new direction using phase function */
+ccl_device_forceinline bool integrate_volume_phase_scatter(INTEGRATOR_STATE_ARGS,
+ ShaderData *sd,
+ const RNGState *rng_state,
+ const ShaderVolumePhases *phases)
+{
+ PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INDIRECT_LIGHT);
+
+ float phase_u, phase_v;
+ path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &phase_u, &phase_v);
+
+ /* Phase closure, sample direction. */
+ float phase_pdf;
+ BsdfEval phase_eval ccl_optional_struct_init;
+ float3 phase_omega_in ccl_optional_struct_init;
+ differential3 phase_domega_in ccl_optional_struct_init;
+
+ const int label = shader_volume_phase_sample(kg,
+ sd,
+ phases,
+ phase_u,
+ phase_v,
+ &phase_eval,
+ &phase_omega_in,
+ &phase_domega_in,
+ &phase_pdf);
+
+ if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) {
+ return false;
+ }
+
+ /* Setup ray. */
+ INTEGRATOR_STATE_WRITE(ray, P) = sd->P;
+ INTEGRATOR_STATE_WRITE(ray, D) = normalize(phase_omega_in);
+ INTEGRATOR_STATE_WRITE(ray, t) = FLT_MAX;
+
+# ifdef __RAY_DIFFERENTIALS__
+ INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP);
+ INTEGRATOR_STATE_WRITE(ray, dD) = differential_make_compact(phase_domega_in);
+# endif
+
+ /* Update throughput. */
+ const float3 throughput = INTEGRATOR_STATE(path, throughput);
+ const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval) / phase_pdf;
+ INTEGRATOR_STATE_WRITE(path, throughput) = throughput_phase;
+
+ if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
+ INTEGRATOR_STATE_WRITE(path, diffuse_glossy_ratio) = one_float3();
+ }
+
+ /* Update path state */
+ INTEGRATOR_STATE_WRITE(path, mis_ray_pdf) = phase_pdf;
+ INTEGRATOR_STATE_WRITE(path, mis_ray_t) = 0.0f;
+ INTEGRATOR_STATE_WRITE(path, min_ray_pdf) = fminf(phase_pdf,
+ INTEGRATOR_STATE(path, min_ray_pdf));
+
+ path_state_next(INTEGRATOR_STATE_PASS, label);
+ return true;
+}
+
+/* get the volume attenuation and emission over line segment defined by
+ * ray, with the assumption that there are no surfaces blocking light
+ * between the endpoints. distance sampling is used to decide if we will
+ * scatter or not. */
+ccl_device VolumeIntegrateEvent volume_integrate(INTEGRATOR_STATE_ARGS,
+ Ray *ccl_restrict ray,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ ShaderData sd;
+ shader_setup_from_volume(kg, &sd, ray);
+
+ /* Load random number state. */
+ RNGState rng_state;
+ path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+
+ /* Sample light ahead of volume stepping, for equiangular sampling. */
+ /* TODO: distant lights are ignored now, but could instead use even distribution. */
+ LightSample ls ccl_optional_struct_init;
+ const bool need_light_sample = !(INTEGRATOR_STATE(path, flag) & PATH_RAY_TERMINATE);
+ const bool have_equiangular_sample = need_light_sample &&
+ integrate_volume_sample_light(
+ INTEGRATOR_STATE_PASS, &sd, &rng_state, &ls) &&
+ (ls.t != FLT_MAX);
+
+ VolumeSampleMethod direct_sample_method = (have_equiangular_sample) ?
+ volume_stack_sample_method(INTEGRATOR_STATE_PASS) :
+ VOLUME_SAMPLE_DISTANCE;
+
+ /* Step through volume. */
+ const float step_size = volume_stack_step_size(INTEGRATOR_STATE_PASS, [=](const int i) {
+ return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
+ });
+
+ /* TODO: expensive to zero closures? */
+ VolumeIntegrateResult result = {};
+ volume_integrate_heterogeneous(INTEGRATOR_STATE_PASS,
+ ray,
+ &sd,
+ &rng_state,
+ render_buffer,
+ step_size,
+ direct_sample_method,
+ ls.P,
+ result);
+
+ /* Perform path termination. The intersect_closest will have already marked this path
+ * to be terminated. That will shading evaluating to leave out any scattering closures,
+ * but emission and absorption are still handled for multiple importance sampling. */
+ const uint32_t path_flag = INTEGRATOR_STATE(path, flag);
+ const float probability = (path_flag & PATH_RAY_TERMINATE_IN_NEXT_VOLUME) ?
+ 0.0f :
+ path_state_continuation_probability(INTEGRATOR_STATE_PASS,
+ path_flag);
+ if (probability == 0.0f) {
+ return VOLUME_PATH_MISSED;
+ }
+
+ /* Direct light. */
+ if (result.direct_scatter) {
+ const float3 direct_P = ray->P + result.direct_t * ray->D;
+ result.direct_throughput /= probability;
+ integrate_volume_direct_light(INTEGRATOR_STATE_PASS,
+ &sd,
+ &rng_state,
+ direct_P,
+ &result.direct_phases,
+ result.direct_throughput,
+ &ls);
+ }
+
+ /* Indirect light.
+ *
+ * Only divide throughput by probability if we scatter. For the attenuation
+ * case the next surface will already do this division. */
+ if (result.indirect_scatter) {
+ result.indirect_throughput /= probability;
+ }
+ INTEGRATOR_STATE_WRITE(path, throughput) = result.indirect_throughput;
+
+ if (result.indirect_scatter) {
+ sd.P = ray->P + result.indirect_t * ray->D;
+
+ if (integrate_volume_phase_scatter(
+ INTEGRATOR_STATE_PASS, &sd, &rng_state, &result.indirect_phases)) {
+ return VOLUME_PATH_SCATTERED;
+ }
+ else {
+ return VOLUME_PATH_MISSED;
+ }
+ }
+ else {
+ return VOLUME_PATH_ATTENUATED;
+ }
+}
+
+#endif
+
+ccl_device void integrator_shade_volume(INTEGRATOR_STATE_ARGS,
+ ccl_global float *ccl_restrict render_buffer)
+{
+ PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_SETUP);
+
+#ifdef __VOLUME__
+ /* Setup shader data. */
+ Ray ray ccl_optional_struct_init;
+ integrator_state_read_ray(INTEGRATOR_STATE_PASS, &ray);
+
+ Intersection isect ccl_optional_struct_init;
+ integrator_state_read_isect(INTEGRATOR_STATE_PASS, &isect);
+
+ /* Set ray length to current segment. */
+ ray.t = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX;
+
+ /* Clean volume stack for background rays. */
+ if (isect.prim == PRIM_NONE) {
+ volume_stack_clean(INTEGRATOR_STATE_PASS);
+ }
+
+ VolumeIntegrateEvent event = volume_integrate(INTEGRATOR_STATE_PASS, &ray, render_buffer);
+
+ if (event == VOLUME_PATH_SCATTERED) {
+ /* Queue intersect_closest kernel. */
+ INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
+ DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+ return;
+ }
+ else if (event == VOLUME_PATH_MISSED) {
+ /* End path. */
+ INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
+ return;
+ }
+ else {
+ /* Continue to background, light or surface. */
+ if (isect.prim == PRIM_NONE) {
+ INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+ return;
+ }
+ else if (isect.type & PRIMITIVE_LAMP) {
+ INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
+ return;
+ }
+ else {
+ /* Hit a surface, continue with surface kernel unless terminated. */
+ const int shader = intersection_get_shader(kg, &isect);
+ const int flags = kernel_tex_fetch(__shaders, shader).flags;
+
+ integrator_intersect_shader_next_kernel<DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME>(
+ INTEGRATOR_STATE_PASS, &isect, shader, flags);
+ return;
+ }
+ }
+#endif /* __VOLUME__ */
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_state.h b/intern/cycles/kernel/integrator/integrator_state.h
new file mode 100644
index 00000000000..8cef9cf31e2
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_state.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Integrator State
+ *
+ * This file defines the data structures that define the state of a path. Any state that is
+ * preserved and passed between kernel executions is part of this.
+ *
+ * The size of this state must be kept as small as possible, to reduce cache misses and keep memory
+ * usage under control on GPUs that may execute millions of kernels.
+ *
+ * Memory may be allocated and passed along in different ways depending on the device. There may
+ * be a scalar layout, or AoS or SoA layout for batches. The state may be passed along as a pointer
+ * to every kernel, or the pointer may exist at program scope or in constant memory. To abstract
+ * these differences between devices and experiment with different layouts, macros are used.
+ *
+ * INTEGRATOR_STATE_ARGS: prepend to argument definitions for every function that accesses
+ * path state.
+ * INTEGRATOR_STATE_CONST_ARGS: same as INTEGRATOR_STATE_ARGS, when state is read-only
+ * INTEGRATOR_STATE_PASS: use to pass along state to other functions access it.
+ *
+ * INTEGRATOR_STATE(x, y): read nested struct member x.y of IntegratorState
+ * INTEGRATOR_STATE_WRITE(x, y): write to nested struct member x.y of IntegratorState
+ *
+ * INTEGRATOR_STATE_ARRAY(x, index, y): read x[index].y
+ * INTEGRATOR_STATE_ARRAY_WRITE(x, index, y): write x[index].y
+ *
+ * INTEGRATOR_STATE_COPY(to_x, from_x): copy contents of one nested struct to another
+ *
+ * INTEGRATOR_STATE_IS_NULL: test if any integrator state is available, for shader evaluation
+ * INTEGRATOR_STATE_PASS_NULL: use to pass empty state to other functions.
+ *
+ * NOTE: if we end up with a device that passes no arguments, the leading comma will be a problem.
+ * Can solve it with more macros if we encouter it, but rather ugly so postpone for now.
+ */
+
+#include "kernel/kernel_types.h"
+
+#include "util/util_types.h"
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Constants
+ *
+ * TODO: these could be made dynamic depending on the features used in the scene. */
+
+#define INTEGRATOR_VOLUME_STACK_SIZE VOLUME_STACK_SIZE
+#define INTEGRATOR_SHADOW_ISECT_SIZE 4
+
+/* Data structures */
+
+/* Integrator State
+ *
+ * CPU rendering path state with AoS layout. */
+typedef struct IntegratorStateCPU {
+#define KERNEL_STRUCT_BEGIN(name) struct {
+#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type name;
+#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
+#define KERNEL_STRUCT_END(name) \
+ } \
+ name;
+#define KERNEL_STRUCT_END_ARRAY(name, size) \
+ } \
+ name[size];
+#include "kernel/integrator/integrator_state_template.h"
+#undef KERNEL_STRUCT_BEGIN
+#undef KERNEL_STRUCT_MEMBER
+#undef KERNEL_STRUCT_ARRAY_MEMBER
+#undef KERNEL_STRUCT_END
+#undef KERNEL_STRUCT_END_ARRAY
+} IntegratorStateCPU;
+
+/* Path Queue
+ *
+ * Keep track of which kernels are queued to be executed next in the path
+ * for GPU rendering. */
+typedef struct IntegratorQueueCounter {
+ int num_queued[DEVICE_KERNEL_INTEGRATOR_NUM];
+} IntegratorQueueCounter;
+
+/* Integrator State GPU
+ *
+ * GPU rendering path state with SoA layout. */
+typedef struct IntegratorStateGPU {
+#define KERNEL_STRUCT_BEGIN(name) struct {
+#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type *name;
+#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
+#define KERNEL_STRUCT_END(name) \
+ } \
+ name;
+#define KERNEL_STRUCT_END_ARRAY(name, size) \
+ } \
+ name[size];
+#include "kernel/integrator/integrator_state_template.h"
+#undef KERNEL_STRUCT_BEGIN
+#undef KERNEL_STRUCT_MEMBER
+#undef KERNEL_STRUCT_ARRAY_MEMBER
+#undef KERNEL_STRUCT_END
+#undef KERNEL_STRUCT_END_ARRAY
+
+ /* Count number of queued kernels. */
+ IntegratorQueueCounter *queue_counter;
+
+ /* Count number of kernels queued for specific shaders. */
+ int *sort_key_counter[DEVICE_KERNEL_INTEGRATOR_NUM];
+
+ /* Index of path which will be used by a next shadow catcher split. */
+ int *next_shadow_catcher_path_index;
+} IntegratorStateGPU;
+
+/* Abstraction
+ *
+ * Macros to access data structures on different devices.
+ *
+ * Note that there is a special access function for the shadow catcher state. This access is to
+ * happen from a kernel which operates on a "main" path. Attempt to use shadow catcher accessors
+ * from a kernel which operates on a shadow catcher state will cause bad memory acces. */
+
+#ifdef __KERNEL_CPU__
+
+/* Scalar access on CPU. */
+
+typedef IntegratorStateCPU *ccl_restrict IntegratorState;
+
+# define INTEGRATOR_STATE_ARGS \
+ ccl_attr_maybe_unused const KernelGlobals *ccl_restrict kg, \
+ IntegratorStateCPU *ccl_restrict state
+# define INTEGRATOR_STATE_CONST_ARGS \
+ ccl_attr_maybe_unused const KernelGlobals *ccl_restrict kg, \
+ const IntegratorStateCPU *ccl_restrict state
+# define INTEGRATOR_STATE_PASS kg, state
+
+# define INTEGRATOR_STATE_PASS_NULL kg, NULL
+# define INTEGRATOR_STATE_IS_NULL (state == NULL)
+
+# define INTEGRATOR_STATE(nested_struct, member) \
+ (((const IntegratorStateCPU *)state)->nested_struct.member)
+# define INTEGRATOR_STATE_WRITE(nested_struct, member) (state->nested_struct.member)
+
+# define INTEGRATOR_STATE_ARRAY(nested_struct, array_index, member) \
+ (((const IntegratorStateCPU *)state)->nested_struct[array_index].member)
+# define INTEGRATOR_STATE_ARRAY_WRITE(nested_struct, array_index, member) \
+ ((state)->nested_struct[array_index].member)
+
+#else /* __KERNEL_CPU__ */
+
+/* Array access on GPU with Structure-of-Arrays. */
+
+typedef int IntegratorState;
+
+# define INTEGRATOR_STATE_ARGS const KernelGlobals *ccl_restrict kg, const IntegratorState state
+# define INTEGRATOR_STATE_CONST_ARGS \
+ const KernelGlobals *ccl_restrict kg, const IntegratorState state
+# define INTEGRATOR_STATE_PASS kg, state
+
+# define INTEGRATOR_STATE_PASS_NULL kg, -1
+# define INTEGRATOR_STATE_IS_NULL (state == -1)
+
+# define INTEGRATOR_STATE(nested_struct, member) \
+ kernel_integrator_state.nested_struct.member[state]
+# define INTEGRATOR_STATE_WRITE(nested_struct, member) INTEGRATOR_STATE(nested_struct, member)
+
+# define INTEGRATOR_STATE_ARRAY(nested_struct, array_index, member) \
+ kernel_integrator_state.nested_struct[array_index].member[state]
+# define INTEGRATOR_STATE_ARRAY_WRITE(nested_struct, array_index, member) \
+ INTEGRATOR_STATE_ARRAY(nested_struct, array_index, member)
+
+#endif /* __KERNEL_CPU__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_state_flow.h b/intern/cycles/kernel/integrator/integrator_state_flow.h
new file mode 100644
index 00000000000..8477efd7b66
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_state_flow.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/kernel_types.h"
+#include "util/util_atomic.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Control Flow
+ *
+ * Utilities for control flow between kernels. The implementation may differ per device
+ * or even be handled on the host side. To abstract such differences, experiment with
+ * different implementations and for debugging, this is abstracted using macros.
+ *
+ * There is a main path for regular path tracing camera for path tracing. Shadows for next
+ * event estimation branch off from this into their own path, that may be computed in
+ * parallel while the main path continues.
+ *
+ * Each kernel on the main path must call one of these functions. These may not be called
+ * multiple times from the same kernel.
+ *
+ * INTEGRATOR_PATH_INIT(next_kernel)
+ * INTEGRATOR_PATH_NEXT(current_kernel, next_kernel)
+ * INTEGRATOR_PATH_TERMINATE(current_kernel)
+ *
+ * For the shadow path similar functions are used, and again each shadow kernel must call
+ * one of them, and only once.
+ */
+
+#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(path, queued_kernel) == 0)
+#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED (INTEGRATOR_STATE(shadow_path, queued_kernel) == 0)
+
+#ifdef __KERNEL_GPU__
+
+# define INTEGRATOR_PATH_INIT(next_kernel) \
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
+ 1); \
+ INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel;
+# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
+ atomic_fetch_and_sub_uint32( \
+ &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
+ 1); \
+ INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel;
+# define INTEGRATOR_PATH_TERMINATE(current_kernel) \
+ atomic_fetch_and_sub_uint32( \
+ &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
+ INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0;
+
+# define INTEGRATOR_SHADOW_PATH_INIT(next_kernel) \
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
+ 1); \
+ INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel;
+# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
+ atomic_fetch_and_sub_uint32( \
+ &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
+ 1); \
+ INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel;
+# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
+ atomic_fetch_and_sub_uint32( \
+ &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
+ INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0;
+
+# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
+ { \
+ const int key_ = key; \
+ atomic_fetch_and_add_uint32( \
+ &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
+ INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \
+ INTEGRATOR_STATE_WRITE(path, shader_sort_key) = key_; \
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
+ 1); \
+ }
+# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
+ { \
+ const int key_ = key; \
+ atomic_fetch_and_sub_uint32( \
+ &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
+ atomic_fetch_and_add_uint32( \
+ &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
+ INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \
+ INTEGRATOR_STATE_WRITE(path, shader_sort_key) = key_; \
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
+ 1); \
+ }
+
+#else
+
+# define INTEGRATOR_PATH_INIT(next_kernel) \
+ INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel;
+# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
+ { \
+ INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \
+ (void)key; \
+ }
+# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
+ { \
+ INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \
+ (void)current_kernel; \
+ }
+# define INTEGRATOR_PATH_TERMINATE(current_kernel) \
+ { \
+ INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0; \
+ (void)current_kernel; \
+ }
+# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
+ { \
+ INTEGRATOR_STATE_WRITE(path, queued_kernel) = next_kernel; \
+ (void)key; \
+ (void)current_kernel; \
+ }
+
+# define INTEGRATOR_SHADOW_PATH_INIT(next_kernel) \
+ INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel;
+# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
+ { \
+ INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = next_kernel; \
+ (void)current_kernel; \
+ }
+# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
+ { \
+ INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0; \
+ (void)current_kernel; \
+ }
+
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_state_template.h b/intern/cycles/kernel/integrator/integrator_state_template.h
new file mode 100644
index 00000000000..41dd1bfcdbf
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_state_template.h
@@ -0,0 +1,163 @@
+
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/************************************ Path State *****************************/
+
+KERNEL_STRUCT_BEGIN(path)
+/* Index of a pixel within the device render buffer where this path will write its result.
+ * To get an actual offset within the buffer the value needs to be multiplied by the
+ * `kernel_data.film.pass_stride`.
+ *
+ * The multiplication is delayed for later, so that state can use 32bit integer. */
+KERNEL_STRUCT_MEMBER(path, uint32_t, render_pixel_index, KERNEL_FEATURE_PATH_TRACING)
+/* Current sample number. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, sample, KERNEL_FEATURE_PATH_TRACING)
+/* Current ray bounce depth. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current diffuse ray bounce depth. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, diffuse_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current glossy ray bounce depth. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, glossy_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current transmission ray bounce depth. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, transmission_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current volume ray bounce depth. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current volume bounds ray bounce depth. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounds_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current transparent ray bounce depth. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* DeviceKernel bit indicating queued kernels.
+ * TODO: reduce size? */
+KERNEL_STRUCT_MEMBER(path, uint32_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
+/* Random number generator seed. */
+KERNEL_STRUCT_MEMBER(path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING)
+/* Random number dimension offset. */
+KERNEL_STRUCT_MEMBER(path, uint32_t, rng_offset, KERNEL_FEATURE_PATH_TRACING)
+/* enum PathRayFlag */
+KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
+/* Multiple importance sampling
+ * The PDF of BSDF sampling at the last scatter point, and distance to the
+ * last scatter point minus the last ray segment. This distance lets us
+ * compute the complete distance through transparent surfaces and volumes. */
+KERNEL_STRUCT_MEMBER(path, float, mis_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(path, float, mis_ray_t, KERNEL_FEATURE_PATH_TRACING)
+/* Filter glossy. */
+KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
+/* Throughput. */
+KERNEL_STRUCT_MEMBER(path, float3, throughput, KERNEL_FEATURE_PATH_TRACING)
+/* Ratio of throughput to distinguish diffuse and glossy render passes. */
+KERNEL_STRUCT_MEMBER(path, float3, diffuse_glossy_ratio, KERNEL_FEATURE_LIGHT_PASSES)
+/* Denoising. */
+KERNEL_STRUCT_MEMBER(path, float3, denoising_feature_throughput, KERNEL_FEATURE_DENOISING)
+/* Shader sorting. */
+/* TODO: compress as uint16? or leave out entirely and recompute key in sorting code? */
+KERNEL_STRUCT_MEMBER(path, uint32_t, shader_sort_key, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_END(path)
+
+/************************************** Ray ***********************************/
+
+KERNEL_STRUCT_BEGIN(ray)
+KERNEL_STRUCT_MEMBER(ray, float3, P, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(ray, float3, D, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(ray, float, t, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(ray, float, time, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(ray, float, dP, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(ray, float, dD, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_END(ray)
+
+/*************************** Intersection result ******************************/
+
+/* Result from scene intersection. */
+KERNEL_STRUCT_BEGIN(isect)
+KERNEL_STRUCT_MEMBER(isect, float, t, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(isect, float, u, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(isect, float, v, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(isect, int, prim, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(isect, int, object, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(isect, int, type, KERNEL_FEATURE_PATH_TRACING)
+/* TODO: exclude for GPU. */
+KERNEL_STRUCT_MEMBER(isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_END(isect)
+
+/*************** Subsurface closure state for subsurface kernel ***************/
+
+KERNEL_STRUCT_BEGIN(subsurface)
+KERNEL_STRUCT_MEMBER(subsurface, float3, albedo, KERNEL_FEATURE_SUBSURFACE)
+KERNEL_STRUCT_MEMBER(subsurface, float3, radius, KERNEL_FEATURE_SUBSURFACE)
+KERNEL_STRUCT_MEMBER(subsurface, float, anisotropy, KERNEL_FEATURE_SUBSURFACE)
+KERNEL_STRUCT_MEMBER(subsurface, float, roughness, KERNEL_FEATURE_SUBSURFACE)
+KERNEL_STRUCT_END(subsurface)
+
+/********************************** Volume Stack ******************************/
+
+KERNEL_STRUCT_BEGIN(volume_stack)
+KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, object, KERNEL_FEATURE_VOLUME)
+KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, shader, KERNEL_FEATURE_VOLUME)
+KERNEL_STRUCT_END_ARRAY(volume_stack, INTEGRATOR_VOLUME_STACK_SIZE)
+
+/********************************* Shadow Path State **************************/
+
+KERNEL_STRUCT_BEGIN(shadow_path)
+/* Current ray bounce depth. */
+KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current transparent ray bounce depth. */
+KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* DeviceKernel bit indicating queued kernels.
+ * TODO: reduce size? */
+KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
+/* enum PathRayFlag */
+KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
+/* Throughput. */
+KERNEL_STRUCT_MEMBER(shadow_path, float3, throughput, KERNEL_FEATURE_PATH_TRACING)
+/* Throughput for shadow pass. */
+KERNEL_STRUCT_MEMBER(shadow_path, float3, unshadowed_throughput, KERNEL_FEATURE_SHADOW_PASS)
+/* Ratio of throughput to distinguish diffuse and glossy render passes. */
+KERNEL_STRUCT_MEMBER(shadow_path, float3, diffuse_glossy_ratio, KERNEL_FEATURE_LIGHT_PASSES)
+/* Number of intersections found by ray-tracing. */
+KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, num_hits, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_END(shadow_path)
+
+/********************************** Shadow Ray *******************************/
+
+KERNEL_STRUCT_BEGIN(shadow_ray)
+KERNEL_STRUCT_MEMBER(shadow_ray, float3, P, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(shadow_ray, float3, D, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(shadow_ray, float, t, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(shadow_ray, float, time, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(shadow_ray, float, dP, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_END(shadow_ray)
+
+/*********************** Shadow Intersection result **************************/
+
+/* Result from scene intersection. */
+KERNEL_STRUCT_BEGIN(shadow_isect)
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, t, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, u, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, v, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, prim, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, object, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, type, KERNEL_FEATURE_PATH_TRACING)
+/* TODO: exclude for GPU. */
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_END_ARRAY(shadow_isect, INTEGRATOR_SHADOW_ISECT_SIZE)
+
+/**************************** Shadow Volume Stack *****************************/
+
+KERNEL_STRUCT_BEGIN(shadow_volume_stack)
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, object, KERNEL_FEATURE_VOLUME)
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, shader, KERNEL_FEATURE_VOLUME)
+KERNEL_STRUCT_END_ARRAY(shadow_volume_stack, INTEGRATOR_VOLUME_STACK_SIZE)
diff --git a/intern/cycles/kernel/integrator/integrator_state_util.h b/intern/cycles/kernel/integrator/integrator_state_util.h
new file mode 100644
index 00000000000..cdf412fe22f
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_state_util.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/integrator/integrator_state.h"
+#include "kernel/kernel_differential.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Ray */
+
+ccl_device_forceinline void integrator_state_write_ray(INTEGRATOR_STATE_ARGS,
+ const Ray *ccl_restrict ray)
+{
+ INTEGRATOR_STATE_WRITE(ray, P) = ray->P;
+ INTEGRATOR_STATE_WRITE(ray, D) = ray->D;
+ INTEGRATOR_STATE_WRITE(ray, t) = ray->t;
+ INTEGRATOR_STATE_WRITE(ray, time) = ray->time;
+ INTEGRATOR_STATE_WRITE(ray, dP) = ray->dP;
+ INTEGRATOR_STATE_WRITE(ray, dD) = ray->dD;
+}
+
+ccl_device_forceinline void integrator_state_read_ray(INTEGRATOR_STATE_CONST_ARGS,
+ Ray *ccl_restrict ray)
+{
+ ray->P = INTEGRATOR_STATE(ray, P);
+ ray->D = INTEGRATOR_STATE(ray, D);
+ ray->t = INTEGRATOR_STATE(ray, t);
+ ray->time = INTEGRATOR_STATE(ray, time);
+ ray->dP = INTEGRATOR_STATE(ray, dP);
+ ray->dD = INTEGRATOR_STATE(ray, dD);
+}
+
+/* Shadow Ray */
+
+ccl_device_forceinline void integrator_state_write_shadow_ray(INTEGRATOR_STATE_ARGS,
+ const Ray *ccl_restrict ray)
+{
+ INTEGRATOR_STATE_WRITE(shadow_ray, P) = ray->P;
+ INTEGRATOR_STATE_WRITE(shadow_ray, D) = ray->D;
+ INTEGRATOR_STATE_WRITE(shadow_ray, t) = ray->t;
+ INTEGRATOR_STATE_WRITE(shadow_ray, time) = ray->time;
+ INTEGRATOR_STATE_WRITE(shadow_ray, dP) = ray->dP;
+}
+
+ccl_device_forceinline void integrator_state_read_shadow_ray(INTEGRATOR_STATE_CONST_ARGS,
+ Ray *ccl_restrict ray)
+{
+ ray->P = INTEGRATOR_STATE(shadow_ray, P);
+ ray->D = INTEGRATOR_STATE(shadow_ray, D);
+ ray->t = INTEGRATOR_STATE(shadow_ray, t);
+ ray->time = INTEGRATOR_STATE(shadow_ray, time);
+ ray->dP = INTEGRATOR_STATE(shadow_ray, dP);
+ ray->dD = differential_zero_compact();
+}
+
+/* Intersection */
+
+ccl_device_forceinline void integrator_state_write_isect(INTEGRATOR_STATE_ARGS,
+ const Intersection *ccl_restrict isect)
+{
+ INTEGRATOR_STATE_WRITE(isect, t) = isect->t;
+ INTEGRATOR_STATE_WRITE(isect, u) = isect->u;
+ INTEGRATOR_STATE_WRITE(isect, v) = isect->v;
+ INTEGRATOR_STATE_WRITE(isect, object) = isect->object;
+ INTEGRATOR_STATE_WRITE(isect, prim) = isect->prim;
+ INTEGRATOR_STATE_WRITE(isect, type) = isect->type;
+#ifdef __EMBREE__
+ INTEGRATOR_STATE_WRITE(isect, Ng) = isect->Ng;
+#endif
+}
+
+ccl_device_forceinline void integrator_state_read_isect(INTEGRATOR_STATE_CONST_ARGS,
+ Intersection *ccl_restrict isect)
+{
+ isect->prim = INTEGRATOR_STATE(isect, prim);
+ isect->object = INTEGRATOR_STATE(isect, object);
+ isect->type = INTEGRATOR_STATE(isect, type);
+ isect->u = INTEGRATOR_STATE(isect, u);
+ isect->v = INTEGRATOR_STATE(isect, v);
+ isect->t = INTEGRATOR_STATE(isect, t);
+#ifdef __EMBREE__
+ isect->Ng = INTEGRATOR_STATE(isect, Ng);
+#endif
+}
+
+ccl_device_forceinline VolumeStack integrator_state_read_volume_stack(INTEGRATOR_STATE_CONST_ARGS,
+ int i)
+{
+ VolumeStack entry = {INTEGRATOR_STATE_ARRAY(volume_stack, i, object),
+ INTEGRATOR_STATE_ARRAY(volume_stack, i, shader)};
+ return entry;
+}
+
+ccl_device_forceinline void integrator_state_write_volume_stack(INTEGRATOR_STATE_ARGS,
+ int i,
+ VolumeStack entry)
+{
+ INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, i, object) = entry.object;
+ INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, i, shader) = entry.shader;
+}
+
+ccl_device_forceinline bool integrator_state_volume_stack_is_empty(INTEGRATOR_STATE_CONST_ARGS)
+{
+ return (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) ?
+ INTEGRATOR_STATE_ARRAY(volume_stack, 0, shader) == SHADER_NONE :
+ true;
+}
+
+/* Shadow Intersection */
+
+ccl_device_forceinline void integrator_state_write_shadow_isect(
+ INTEGRATOR_STATE_ARGS, const Intersection *ccl_restrict isect, const int index)
+{
+ INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, t) = isect->t;
+ INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, u) = isect->u;
+ INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, v) = isect->v;
+ INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, object) = isect->object;
+ INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, prim) = isect->prim;
+ INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, type) = isect->type;
+#ifdef __EMBREE__
+ INTEGRATOR_STATE_ARRAY_WRITE(shadow_isect, index, Ng) = isect->Ng;
+#endif
+}
+
+ccl_device_forceinline void integrator_state_read_shadow_isect(INTEGRATOR_STATE_CONST_ARGS,
+ Intersection *ccl_restrict isect,
+ const int index)
+{
+ isect->prim = INTEGRATOR_STATE_ARRAY(shadow_isect, index, prim);
+ isect->object = INTEGRATOR_STATE_ARRAY(shadow_isect, index, object);
+ isect->type = INTEGRATOR_STATE_ARRAY(shadow_isect, index, type);
+ isect->u = INTEGRATOR_STATE_ARRAY(shadow_isect, index, u);
+ isect->v = INTEGRATOR_STATE_ARRAY(shadow_isect, index, v);
+ isect->t = INTEGRATOR_STATE_ARRAY(shadow_isect, index, t);
+#ifdef __EMBREE__
+ isect->Ng = INTEGRATOR_STATE_ARRAY(shadow_isect, index, Ng);
+#endif
+}
+
+ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(INTEGRATOR_STATE_ARGS)
+{
+ if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) {
+ for (int i = 0; i < INTEGRATOR_VOLUME_STACK_SIZE; i++) {
+ INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, i, object) = INTEGRATOR_STATE_ARRAY(
+ volume_stack, i, object);
+ INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, i, shader) = INTEGRATOR_STATE_ARRAY(
+ volume_stack, i, shader);
+ }
+ }
+}
+
+ccl_device_forceinline VolumeStack
+integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_CONST_ARGS, int i)
+{
+ VolumeStack entry = {INTEGRATOR_STATE_ARRAY(shadow_volume_stack, i, object),
+ INTEGRATOR_STATE_ARRAY(shadow_volume_stack, i, shader)};
+ return entry;
+}
+
+ccl_device_forceinline bool integrator_state_shadow_volume_stack_is_empty(
+ INTEGRATOR_STATE_CONST_ARGS)
+{
+ return (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) ?
+ INTEGRATOR_STATE_ARRAY(shadow_volume_stack, 0, shader) == SHADER_NONE :
+ true;
+}
+
+ccl_device_forceinline void integrator_state_write_shadow_volume_stack(INTEGRATOR_STATE_ARGS,
+ int i,
+ VolumeStack entry)
+{
+ INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, i, object) = entry.object;
+ INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, i, shader) = entry.shader;
+}
+
+#if defined(__KERNEL_GPU__)
+ccl_device_inline void integrator_state_copy_only(const IntegratorState to_state,
+ const IntegratorState state)
+{
+ int index;
+
+ /* Rely on the compiler to optimize out unused assignments and `while(false)`'s. */
+
+# define KERNEL_STRUCT_BEGIN(name) \
+ index = 0; \
+ do {
+
+# define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) \
+ if (kernel_integrator_state.parent_struct.name != nullptr) { \
+ kernel_integrator_state.parent_struct.name[to_state] = \
+ kernel_integrator_state.parent_struct.name[state]; \
+ }
+
+# define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) \
+ if (kernel_integrator_state.parent_struct[index].name != nullptr) { \
+ kernel_integrator_state.parent_struct[index].name[to_state] = \
+ kernel_integrator_state.parent_struct[index].name[state]; \
+ }
+
+# define KERNEL_STRUCT_END(name) \
+ } \
+ while (false) \
+ ;
+
+# define KERNEL_STRUCT_END_ARRAY(name, array_size) \
+ ++index; \
+ } \
+ while (index < array_size) \
+ ;
+
+# include "kernel/integrator/integrator_state_template.h"
+
+# undef KERNEL_STRUCT_BEGIN
+# undef KERNEL_STRUCT_MEMBER
+# undef KERNEL_STRUCT_ARRAY_MEMBER
+# undef KERNEL_STRUCT_END
+# undef KERNEL_STRUCT_END_ARRAY
+}
+
+ccl_device_inline void integrator_state_move(const IntegratorState to_state,
+ const IntegratorState state)
+{
+ integrator_state_copy_only(to_state, state);
+
+ INTEGRATOR_STATE_WRITE(path, queued_kernel) = 0;
+ INTEGRATOR_STATE_WRITE(shadow_path, queued_kernel) = 0;
+}
+
+#endif
+
+/* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths
+ * after this function. */
+ccl_device_inline void integrator_state_shadow_catcher_split(INTEGRATOR_STATE_ARGS)
+{
+#if defined(__KERNEL_GPU__)
+ const IntegratorState to_state = atomic_fetch_and_add_uint32(
+ &kernel_integrator_state.next_shadow_catcher_path_index[0], 1);
+
+ integrator_state_copy_only(to_state, state);
+
+ kernel_integrator_state.path.flag[to_state] |= PATH_RAY_SHADOW_CATCHER_PASS;
+
+ /* Sanity check: expect to split in the intersect-closest kernel, where there is no shadow ray
+ * and no sorting yet. */
+ kernel_assert(INTEGRATOR_STATE(shadow_path, queued_kernel) == 0);
+ kernel_assert(kernel_integrator_state.sort_key_counter[INTEGRATOR_STATE(path, queued_kernel)] ==
+ nullptr);
+#else
+
+ IntegratorStateCPU *ccl_restrict split_state = state + 1;
+
+ *split_state = *state;
+
+ split_state->path.flag |= PATH_RAY_SHADOW_CATCHER_PASS;
+#endif
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface.h b/intern/cycles/kernel/integrator/integrator_subsurface.h
new file mode 100644
index 00000000000..9490738404e
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_subsurface.h
@@ -0,0 +1,623 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/kernel_path_state.h"
+#include "kernel/kernel_projection.h"
+#include "kernel/kernel_shader.h"
+
+#include "kernel/bvh/bvh.h"
+
+#include "kernel/closure/alloc.h"
+#include "kernel/closure/bsdf_diffuse.h"
+#include "kernel/closure/bsdf_principled_diffuse.h"
+#include "kernel/closure/bssrdf.h"
+#include "kernel/closure/volume.h"
+
+#include "kernel/integrator/integrator_intersect_volume_stack.h"
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __SUBSURFACE__
+
+ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS, ShaderData *sd, const ShaderClosure *sc)
+{
+ /* We should never have two consecutive BSSRDF bounces, the second one should
+ * be converted to a diffuse BSDF to avoid this. */
+ kernel_assert(!(INTEGRATOR_STATE(path, flag) & PATH_RAY_DIFFUSE_ANCESTOR));
+
+ /* Setup path state for intersect_subsurface kernel. */
+ const Bssrdf *bssrdf = (const Bssrdf *)sc;
+
+ /* Setup ray into surface. */
+ INTEGRATOR_STATE_WRITE(ray, P) = sd->P;
+ INTEGRATOR_STATE_WRITE(ray, D) = sd->N;
+ INTEGRATOR_STATE_WRITE(ray, t) = FLT_MAX;
+ INTEGRATOR_STATE_WRITE(ray, dP) = differential_make_compact(sd->dP);
+ INTEGRATOR_STATE_WRITE(ray, dD) = differential_zero_compact();
+
+ /* Pass along object info, reusing isect to save memory. */
+ INTEGRATOR_STATE_WRITE(isect, Ng) = sd->Ng;
+ INTEGRATOR_STATE_WRITE(isect, object) = sd->object;
+
+ /* Pass BSSRDF parameters. */
+ const uint32_t path_flag = INTEGRATOR_STATE_WRITE(path, flag);
+ INTEGRATOR_STATE_WRITE(path, flag) = (path_flag & ~PATH_RAY_CAMERA) | PATH_RAY_SUBSURFACE;
+ INTEGRATOR_STATE_WRITE(path, throughput) *= shader_bssrdf_sample_weight(sd, sc);
+
+ if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
+ if (INTEGRATOR_STATE(path, bounce) == 0) {
+ INTEGRATOR_STATE_WRITE(path, diffuse_glossy_ratio) = one_float3();
+ }
+ }
+
+ INTEGRATOR_STATE_WRITE(subsurface, albedo) = bssrdf->albedo;
+ INTEGRATOR_STATE_WRITE(subsurface, radius) = bssrdf->radius;
+ INTEGRATOR_STATE_WRITE(subsurface, roughness) = bssrdf->roughness;
+ INTEGRATOR_STATE_WRITE(subsurface, anisotropy) = bssrdf->anisotropy;
+
+ return LABEL_SUBSURFACE_SCATTER;
+}
+
+ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS, ShaderData *sd)
+{
+ /* Get bump mapped normal from shader evaluation at exit point. */
+ float3 N = sd->N;
+ if (sd->flag & SD_HAS_BSSRDF_BUMP) {
+ N = shader_bssrdf_normal(sd);
+ }
+
+ /* Setup diffuse BSDF at the exit point. This replaces shader_eval_surface. */
+ sd->flag &= ~SD_CLOSURE_FLAGS;
+ sd->num_closure = 0;
+ sd->num_closure_left = kernel_data.max_closures;
+
+ const float3 weight = one_float3();
+ const float roughness = INTEGRATOR_STATE(subsurface, roughness);
+
+# ifdef __PRINCIPLED__
+ if (roughness != FLT_MAX) {
+ PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
+ sd, sizeof(PrincipledDiffuseBsdf), weight);
+
+ if (bsdf) {
+ bsdf->N = N;
+ bsdf->roughness = roughness;
+ sd->flag |= bsdf_principled_diffuse_setup(bsdf);
+
+ /* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes
+ * can recognize it as not being a regular Disney principled diffuse closure */
+ bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
+ }
+ }
+ else
+# endif /* __PRINCIPLED__ */
+ {
+ DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
+
+ if (bsdf) {
+ bsdf->N = N;
+ sd->flag |= bsdf_diffuse_setup(bsdf);
+
+ /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
+ * can recognize it as not being a regular diffuse closure */
+ bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
+ }
+ }
+}
+
+/* Random walk subsurface scattering.
+ *
+ * "Practical and Controllable Subsurface Scattering for Production Path
+ * Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
+
+/* Support for anisotropy from:
+ * "Path Traced Subsurface Scattering using Anisotropic Phase Functions
+ * and Non-Exponential Free Flights".
+ * Magnus Wrenninge, Ryusuke Villemin, Christophe Hery.
+ * https://graphics.pixar.com/library/PathTracedSubsurface/ */
+
+ccl_device void subsurface_random_walk_remap(
+ const float albedo, const float d, float g, float *sigma_t, float *alpha)
+{
+ /* Compute attenuation and scattering coefficients from albedo. */
+ const float g2 = g * g;
+ const float g3 = g2 * g;
+ const float g4 = g3 * g;
+ const float g5 = g4 * g;
+ const float g6 = g5 * g;
+ const float g7 = g6 * g;
+
+ const float A = 1.8260523782f + -1.28451056436f * g + -1.79904629312f * g2 +
+ 9.19393289202f * g3 + -22.8215585862f * g4 + 32.0234874259f * g5 +
+ -23.6264803333f * g6 + 7.21067002658f * g7;
+ const float B = 4.98511194385f +
+ 0.127355959438f *
+ expf(31.1491581433f * g + -201.847017512f * g2 + 841.576016723f * g3 +
+ -2018.09288505f * g4 + 2731.71560286f * g5 + -1935.41424244f * g6 +
+ 559.009054474f * g7);
+ const float C = 1.09686102424f + -0.394704063468f * g + 1.05258115941f * g2 +
+ -8.83963712726f * g3 + 28.8643230661f * g4 + -46.8802913581f * g5 +
+ 38.5402837518f * g6 + -12.7181042538f * g7;
+ const float D = 0.496310210422f + 0.360146581622f * g + -2.15139309747f * g2 +
+ 17.8896899217f * g3 + -55.2984010333f * g4 + 82.065982243f * g5 +
+ -58.5106008578f * g6 + 15.8478295021f * g7;
+ const float E = 4.23190299701f +
+ 0.00310603949088f *
+ expf(76.7316253952f * g + -594.356773233f * g2 + 2448.8834203f * g3 +
+ -5576.68528998f * g4 + 7116.60171912f * g5 + -4763.54467887f * g6 +
+ 1303.5318055f * g7);
+ const float F = 2.40602999408f + -2.51814844609f * g + 9.18494908356f * g2 +
+ -79.2191708682f * g3 + 259.082868209f * g4 + -403.613804597f * g5 +
+ 302.85712436f * g6 + -87.4370473567f * g7;
+
+ const float blend = powf(albedo, 0.25f);
+
+ *alpha = (1.0f - blend) * A * powf(atanf(B * albedo), C) +
+ blend * D * powf(atanf(E * albedo), F);
+ *alpha = clamp(*alpha, 0.0f, 0.999999f); // because of numerical precision
+
+ float sigma_t_prime = 1.0f / fmaxf(d, 1e-16f);
+ *sigma_t = sigma_t_prime / (1.0f - g);
+}
+
+ccl_device void subsurface_random_walk_coefficients(const float3 albedo,
+ const float3 radius,
+ const float anisotropy,
+ float3 *sigma_t,
+ float3 *alpha,
+ float3 *throughput)
+{
+ float sigma_t_x, sigma_t_y, sigma_t_z;
+ float alpha_x, alpha_y, alpha_z;
+
+ subsurface_random_walk_remap(albedo.x, radius.x, anisotropy, &sigma_t_x, &alpha_x);
+ subsurface_random_walk_remap(albedo.y, radius.y, anisotropy, &sigma_t_y, &alpha_y);
+ subsurface_random_walk_remap(albedo.z, radius.z, anisotropy, &sigma_t_z, &alpha_z);
+
+ /* Throughput already contains closure weight at this point, which includes the
+ * albedo, as well as closure mixing and Fresnel weights. Divide out the albedo
+ * which will be added through scattering. */
+ *throughput = safe_divide_color(*throughput, albedo);
+
+ /* With low albedo values (like 0.025) we get diffusion_length 1.0 and
+ * infinite phase functions. To avoid a sharp discontinuity as we go from
+ * such values to 0.0, increase alpha and reduce the throughput to compensate. */
+ const float min_alpha = 0.2f;
+ if (alpha_x < min_alpha) {
+ (*throughput).x *= alpha_x / min_alpha;
+ alpha_x = min_alpha;
+ }
+ if (alpha_y < min_alpha) {
+ (*throughput).y *= alpha_y / min_alpha;
+ alpha_y = min_alpha;
+ }
+ if (alpha_z < min_alpha) {
+ (*throughput).z *= alpha_z / min_alpha;
+ alpha_z = min_alpha;
+ }
+
+ *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z);
+ *alpha = make_float3(alpha_x, alpha_y, alpha_z);
+}
+
+/* References for Dwivedi sampling:
+ *
+ * [1] "A Zero-variance-based Sampling Scheme for Monte Carlo Subsurface Scattering"
+ * by Jaroslav Křivánek and Eugene d'Eon (SIGGRAPH 2014)
+ * https://cgg.mff.cuni.cz/~jaroslav/papers/2014-zerovar/
+ *
+ * [2] "Improving the Dwivedi Sampling Scheme"
+ * by Johannes Meng, Johannes Hanika, and Carsten Dachsbacher (EGSR 2016)
+ * https://cg.ivd.kit.edu/1951.php
+ *
+ * [3] "Zero-Variance Theory for Efficient Subsurface Scattering"
+ * by Eugene d'Eon and Jaroslav Křivánek (SIGGRAPH 2020)
+ * https://iliyan.com/publications/RenderingCourse2020
+ */
+
+ccl_device_forceinline float eval_phase_dwivedi(float v, float phase_log, float cos_theta)
+{
+ /* Eq. 9 from [2] using precomputed log((v + 1) / (v - 1)) */
+ return 1.0f / ((v - cos_theta) * phase_log);
+}
+
+ccl_device_forceinline float sample_phase_dwivedi(float v, float phase_log, float rand)
+{
+ /* Based on Eq. 10 from [2]: `v - (v + 1) * pow((v - 1) / (v + 1), rand)`
+ * Since we're already pre-computing `phase_log = log((v + 1) / (v - 1))` for the evaluation,
+ * we can implement the power function like this. */
+ return v - (v + 1.0f) * expf(-rand * phase_log);
+}
+
+ccl_device_forceinline float diffusion_length_dwivedi(float alpha)
+{
+ /* Eq. 67 from [3] */
+ return 1.0f / sqrtf(1.0f - powf(alpha, 2.44294f - 0.0215813f * alpha + 0.578637f / alpha));
+}
+
+ccl_device_forceinline float3 direction_from_cosine(float3 D, float cos_theta, float randv)
+{
+ float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta);
+ float phi = M_2PI_F * randv;
+ float3 dir = make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cos_theta);
+
+ float3 T, B;
+ make_orthonormals(D, &T, &B);
+ return dir.x * T + dir.y * B + dir.z * D;
+}
+
+ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t,
+ float t,
+ bool hit,
+ float3 *transmittance)
+{
+ float3 T = volume_color_transmittance(sigma_t, t);
+ if (transmittance) {
+ *transmittance = T;
+ }
+ return hit ? T : sigma_t * T;
+}
+
+/* Define the below variable to get the similarity code active,
+ * and the value represents the cutoff level */
+# define SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL 9
+
+ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS,
+ RNGState rng_state,
+ Ray &ray,
+ LocalIntersection &ss_isect)
+{
+ float bssrdf_u, bssrdf_v;
+ path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+
+ const float3 P = INTEGRATOR_STATE(ray, P);
+ const float3 N = INTEGRATOR_STATE(ray, D);
+ const float ray_dP = INTEGRATOR_STATE(ray, dP);
+ const float time = INTEGRATOR_STATE(ray, time);
+ const float3 Ng = INTEGRATOR_STATE(isect, Ng);
+ const int object = INTEGRATOR_STATE(isect, object);
+
+ /* Sample diffuse surface scatter into the object. */
+ float3 D;
+ float pdf;
+ sample_cos_hemisphere(-N, bssrdf_u, bssrdf_v, &D, &pdf);
+ if (dot(-Ng, D) <= 0.0f) {
+ return false;
+ }
+
+ /* Setup ray. */
+ ray.P = ray_offset(P, -Ng);
+ ray.D = D;
+ ray.t = FLT_MAX;
+ ray.time = time;
+ ray.dP = ray_dP;
+ ray.dD = differential_zero_compact();
+
+# ifndef __KERNEL_OPTIX__
+ /* Compute or fetch object transforms. */
+ Transform ob_itfm ccl_optional_struct_init;
+ Transform ob_tfm = object_fetch_transform_motion_test(kg, object, time, &ob_itfm);
+# endif
+
+ /* Convert subsurface to volume coefficients.
+ * The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */
+ const float3 albedo = INTEGRATOR_STATE(subsurface, albedo);
+ const float3 radius = INTEGRATOR_STATE(subsurface, radius);
+ const float anisotropy = INTEGRATOR_STATE(subsurface, anisotropy);
+
+ float3 sigma_t, alpha;
+ float3 throughput = INTEGRATOR_STATE_WRITE(path, throughput);
+ subsurface_random_walk_coefficients(albedo, radius, anisotropy, &sigma_t, &alpha, &throughput);
+ float3 sigma_s = sigma_t * alpha;
+
+ /* Theoretically it should be better to use the exact alpha for the channel we're sampling at
+ * each bounce, but in practice there doesn't seem to be a noticeable difference in exchange
+ * for making the code significantly more complex and slower (if direction sampling depends on
+ * the sampled channel, we need to compute its PDF per-channel and consider it for MIS later on).
+ *
+ * Since the strength of the guided sampling increases as alpha gets lower, using a value that
+ * is too low results in fireflies while one that's too high just gives a bit more noise.
+ * Therefore, the code here uses the highest of the three albedos to be safe. */
+ const float diffusion_length = diffusion_length_dwivedi(max3(alpha));
+
+ if (diffusion_length == 1.0f) {
+ /* With specific values of alpha the length might become 1, which in asymptotic makes phase to
+ * be infinite. After first bounce it will cause throughput to be 0. Do early output, avoiding
+ * numerical issues and extra unneeded work. */
+ return false;
+ }
+
+ /* Precompute term for phase sampling. */
+ const float phase_log = logf((diffusion_length + 1.0f) / (diffusion_length - 1.0f));
+
+ /* Modify state for RNGs, decorrelated from other paths. */
+ rng_state.rng_hash = cmj_hash(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef);
+
+ /* Random walk until we hit the surface again. */
+ bool hit = false;
+ bool have_opposite_interface = false;
+ float opposite_distance = 0.0f;
+
+ /* Todo: Disable for alpha>0.999 or so? */
+ /* Our heuristic, a compromise between guiding and classic. */
+ const float guided_fraction = 1.0f - fmaxf(0.5f, powf(fabsf(anisotropy), 0.125f));
+
+# ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL
+ float3 sigma_s_star = sigma_s * (1.0f - anisotropy);
+ float3 sigma_t_star = sigma_t - sigma_s + sigma_s_star;
+ float3 sigma_t_org = sigma_t;
+ float3 sigma_s_org = sigma_s;
+ const float anisotropy_org = anisotropy;
+ const float guided_fraction_org = guided_fraction;
+# endif
+
+ for (int bounce = 0; bounce < BSSRDF_MAX_BOUNCES; bounce++) {
+ /* Advance random number offset. */
+ rng_state.rng_offset += PRNG_BOUNCE_NUM;
+
+# ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL
+ // shadow with local variables according to depth
+ float anisotropy, guided_fraction;
+ float3 sigma_s, sigma_t;
+ if (bounce <= SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL) {
+ anisotropy = anisotropy_org;
+ guided_fraction = guided_fraction_org;
+ sigma_t = sigma_t_org;
+ sigma_s = sigma_s_org;
+ }
+ else {
+ anisotropy = 0.0f;
+ guided_fraction = 0.75f; // back to isotropic heuristic from Blender
+ sigma_t = sigma_t_star;
+ sigma_s = sigma_s_star;
+ }
+# endif
+
+ /* Sample color channel, use MIS with balance heuristic. */
+ float rphase = path_state_rng_1D(kg, &rng_state, PRNG_PHASE_CHANNEL);
+ float3 channel_pdf;
+ int channel = volume_sample_channel(alpha, throughput, rphase, &channel_pdf);
+ float sample_sigma_t = volume_channel_get(sigma_t, channel);
+ float randt = path_state_rng_1D(kg, &rng_state, PRNG_SCATTER_DISTANCE);
+
+ /* We need the result of the raycast to compute the full guided PDF, so just remember the
+ * relevant terms to avoid recomputing them later. */
+ float backward_fraction = 0.0f;
+ float forward_pdf_factor = 0.0f;
+ float forward_stretching = 1.0f;
+ float backward_pdf_factor = 0.0f;
+ float backward_stretching = 1.0f;
+
+ /* For the initial ray, we already know the direction, so just do classic distance sampling. */
+ if (bounce > 0) {
+ /* Decide whether we should use guided or classic sampling. */
+ bool guided = (path_state_rng_1D(kg, &rng_state, PRNG_LIGHT_TERMINATE) < guided_fraction);
+
+ /* Determine if we want to sample away from the incoming interface.
+ * This only happens if we found a nearby opposite interface, and the probability for it
+ * depends on how close we are to it already.
+ * This probability term comes from the recorded presentation of [3]. */
+ bool guide_backward = false;
+ if (have_opposite_interface) {
+ /* Compute distance of the random walk between the tangent plane at the starting point
+ * and the assumed opposite interface (the parallel plane that contains the point we
+ * found in our ray query for the opposite side). */
+ float x = clamp(dot(ray.P - P, -N), 0.0f, opposite_distance);
+ backward_fraction = 1.0f /
+ (1.0f + expf((opposite_distance - 2.0f * x) / diffusion_length));
+ guide_backward = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE) < backward_fraction;
+ }
+
+ /* Sample scattering direction. */
+ float scatter_u, scatter_v;
+ path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &scatter_u, &scatter_v);
+ float cos_theta;
+ float hg_pdf;
+ if (guided) {
+ cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, scatter_u);
+ /* The backwards guiding distribution is just mirrored along sd->N, so swapping the
+ * sign here is enough to sample from that instead. */
+ if (guide_backward) {
+ cos_theta = -cos_theta;
+ }
+ float3 newD = direction_from_cosine(N, cos_theta, scatter_v);
+ hg_pdf = single_peaked_henyey_greenstein(dot(ray.D, newD), anisotropy);
+ ray.D = newD;
+ }
+ else {
+ float3 newD = henyey_greenstrein_sample(ray.D, anisotropy, scatter_u, scatter_v, &hg_pdf);
+ cos_theta = dot(newD, N);
+ ray.D = newD;
+ }
+
+ /* Compute PDF factor caused by phase sampling (as the ratio of guided / classic).
+ * Since phase sampling is channel-independent, we can get away with applying a factor
+ * to the guided PDF, which implicitly means pulling out the classic PDF term and letting
+ * it cancel with an equivalent term in the numerator of the full estimator.
+ * For the backward PDF, we again reuse the same probability distribution with a sign swap.
+ */
+ forward_pdf_factor = M_1_2PI_F * eval_phase_dwivedi(diffusion_length, phase_log, cos_theta) /
+ hg_pdf;
+ backward_pdf_factor = M_1_2PI_F *
+ eval_phase_dwivedi(diffusion_length, phase_log, -cos_theta) / hg_pdf;
+
+ /* Prepare distance sampling.
+ * For the backwards case, this also needs the sign swapped since now directions against
+ * sd->N (and therefore with negative cos_theta) are preferred. */
+ forward_stretching = (1.0f - cos_theta / diffusion_length);
+ backward_stretching = (1.0f + cos_theta / diffusion_length);
+ if (guided) {
+ sample_sigma_t *= guide_backward ? backward_stretching : forward_stretching;
+ }
+ }
+
+ /* Sample direction along ray. */
+ float t = -logf(1.0f - randt) / sample_sigma_t;
+
+ /* On the first bounce, we use the raycast to check if the opposite side is nearby.
+ * If yes, we will later use backwards guided sampling in order to have a decent
+ * chance of connecting to it.
+ * Todo: Maybe use less than 10 times the mean free path? */
+ ray.t = (bounce == 0) ? max(t, 10.0f / (min3(sigma_t))) : t;
+ scene_intersect_local(kg, &ray, &ss_isect, object, NULL, 1);
+ hit = (ss_isect.num_hits > 0);
+
+ if (hit) {
+# ifdef __KERNEL_OPTIX__
+ /* t is always in world space with OptiX. */
+ ray.t = ss_isect.hits[0].t;
+# else
+ /* Compute world space distance to surface hit. */
+ float3 D = transform_direction(&ob_itfm, ray.D);
+ D = normalize(D) * ss_isect.hits[0].t;
+ ray.t = len(transform_direction(&ob_tfm, D));
+# endif
+ }
+
+ if (bounce == 0) {
+ /* Check if we hit the opposite side. */
+ if (hit) {
+ have_opposite_interface = true;
+ opposite_distance = dot(ray.P + ray.t * ray.D - P, -N);
+ }
+ /* Apart from the opposite side check, we were supposed to only trace up to distance t,
+ * so check if there would have been a hit in that case. */
+ hit = ray.t < t;
+ }
+
+ /* Use the distance to the exit point for the throughput update if we found one. */
+ if (hit) {
+ t = ray.t;
+ }
+ else if (bounce == 0) {
+ /* Restore original position if nothing was hit after the first bounce,
+ * without the ray_offset() that was added to avoid self-intersection.
+ * Otherwise if that offset is relatively large compared to the scattering
+ * radius, we never go back up high enough to exit the surface. */
+ ray.P = P;
+ }
+
+ /* Advance to new scatter location. */
+ ray.P += t * ray.D;
+
+ float3 transmittance;
+ float3 pdf = subsurface_random_walk_pdf(sigma_t, t, hit, &transmittance);
+ if (bounce > 0) {
+ /* Compute PDF just like we do for classic sampling, but with the stretched sigma_t. */
+ float3 guided_pdf = subsurface_random_walk_pdf(forward_stretching * sigma_t, t, hit, NULL);
+
+ if (have_opposite_interface) {
+ /* First step of MIS: Depending on geometry we might have two methods for guided
+ * sampling, so perform MIS between them. */
+ float3 back_pdf = subsurface_random_walk_pdf(backward_stretching * sigma_t, t, hit, NULL);
+ guided_pdf = mix(
+ guided_pdf * forward_pdf_factor, back_pdf * backward_pdf_factor, backward_fraction);
+ }
+ else {
+ /* Just include phase sampling factor otherwise. */
+ guided_pdf *= forward_pdf_factor;
+ }
+
+ /* Now we apply the MIS balance heuristic between the classic and guided sampling. */
+ pdf = mix(pdf, guided_pdf, guided_fraction);
+ }
+
+ /* Finally, we're applying MIS again to combine the three color channels.
+ * Altogether, the MIS computation combines up to nine different estimators:
+ * {classic, guided, backward_guided} x {r, g, b} */
+ throughput *= (hit ? transmittance : sigma_s * transmittance) / dot(channel_pdf, pdf);
+
+ if (hit) {
+ /* If we hit the surface, we are done. */
+ break;
+ }
+ else if (throughput.x < VOLUME_THROUGHPUT_EPSILON &&
+ throughput.y < VOLUME_THROUGHPUT_EPSILON &&
+ throughput.z < VOLUME_THROUGHPUT_EPSILON) {
+ /* Avoid unnecessary work and precision issue when throughput gets really small. */
+ break;
+ }
+ }
+
+ if (hit) {
+ kernel_assert(isfinite3_safe(throughput));
+ INTEGRATOR_STATE_WRITE(path, throughput) = throughput;
+ }
+
+ return hit;
+}
+
+ccl_device_inline bool subsurface_scatter(INTEGRATOR_STATE_ARGS)
+{
+ RNGState rng_state;
+ path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
+
+ Ray ray ccl_optional_struct_init;
+ LocalIntersection ss_isect ccl_optional_struct_init;
+
+ if (!subsurface_random_walk(INTEGRATOR_STATE_PASS, rng_state, ray, ss_isect)) {
+ return false;
+ }
+
+# ifdef __VOLUME__
+ /* Update volume stack if needed. */
+ if (kernel_data.integrator.use_volumes) {
+ const int object = intersection_get_object(kg, &ss_isect.hits[0]);
+ const int object_flag = kernel_tex_fetch(__object_flag, object);
+
+ if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) {
+ float3 P = INTEGRATOR_STATE(ray, P);
+ const float3 Ng = INTEGRATOR_STATE(isect, Ng);
+ const float3 offset_P = ray_offset(P, -Ng);
+
+ integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_PASS, offset_P, ray.P);
+ }
+ }
+# endif /* __VOLUME__ */
+
+ /* Pretend ray is coming from the outside towards the exit point. This ensures
+ * correct front/back facing normals.
+ * TODO: find a more elegant solution? */
+ ray.P += ray.D * ray.t * 2.0f;
+ ray.D = -ray.D;
+
+ integrator_state_write_isect(INTEGRATOR_STATE_PASS, &ss_isect.hits[0]);
+ integrator_state_write_ray(INTEGRATOR_STATE_PASS, &ray);
+
+ /* Advanced random number offset for bounce. */
+ INTEGRATOR_STATE_WRITE(path, rng_offset) += PRNG_BOUNCE_NUM;
+
+ const int shader = intersection_get_shader(kg, &ss_isect.hits[0]);
+ const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
+ if ((shader_flags & SD_HAS_RAYTRACE) || (kernel_data.film.pass_ao != PASS_UNUSED)) {
+ INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
+ shader);
+ }
+ else {
+ INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
+ shader);
+ }
+
+ return true;
+}
+
+#endif /* __SUBSURFACE__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_volume_stack.h b/intern/cycles/kernel/integrator/integrator_volume_stack.h
new file mode 100644
index 00000000000..d53070095f0
--- /dev/null
+++ b/intern/cycles/kernel/integrator/integrator_volume_stack.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Volume Stack
+ *
+ * This is an array of object/shared ID's that the current segment of the path
+ * is inside of. */
+
+template<typename StackReadOp, typename StackWriteOp>
+ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
+ const ShaderData *sd,
+ StackReadOp stack_read,
+ StackWriteOp stack_write)
+{
+ /* todo: we should have some way for objects to indicate if they want the
+ * world shader to work inside them. excluding it by default is problematic
+ * because non-volume objects can't be assumed to be closed manifolds */
+ if (!(sd->flag & SD_HAS_VOLUME)) {
+ return;
+ }
+
+ if (sd->flag & SD_BACKFACING) {
+ /* Exit volume object: remove from stack. */
+ for (int i = 0;; i++) {
+ VolumeStack entry = stack_read(i);
+ if (entry.shader == SHADER_NONE) {
+ break;
+ }
+
+ if (entry.object == sd->object) {
+ /* Shift back next stack entries. */
+ do {
+ entry = stack_read(i + 1);
+ stack_write(i, entry);
+ i++;
+ } while (entry.shader != SHADER_NONE);
+
+ return;
+ }
+ }
+ }
+ else {
+ /* Enter volume object: add to stack. */
+ int i;
+ for (i = 0;; i++) {
+ VolumeStack entry = stack_read(i);
+ if (entry.shader == SHADER_NONE) {
+ break;
+ }
+
+ /* Already in the stack? then we have nothing to do. */
+ if (entry.object == sd->object) {
+ return;
+ }
+ }
+
+ /* If we exceed the stack limit, ignore. */
+ if (i >= VOLUME_STACK_SIZE - 1) {
+ return;
+ }
+
+ /* Add to the end of the stack. */
+ const VolumeStack new_entry = {sd->object, sd->shader};
+ const VolumeStack empty_entry = {OBJECT_NONE, SHADER_NONE};
+ stack_write(i, new_entry);
+ stack_write(i + 1, empty_entry);
+ }
+}
+
+ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, const ShaderData *sd)
+{
+ volume_stack_enter_exit(
+ INTEGRATOR_STATE_PASS,
+ sd,
+ [=](const int i) { return integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i); },
+ [=](const int i, const VolumeStack entry) {
+ integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, i, entry);
+ });
+}
+
+ccl_device void shadow_volume_stack_enter_exit(INTEGRATOR_STATE_ARGS, const ShaderData *sd)
+{
+ volume_stack_enter_exit(
+ INTEGRATOR_STATE_PASS,
+ sd,
+ [=](const int i) {
+ return integrator_state_read_shadow_volume_stack(INTEGRATOR_STATE_PASS, i);
+ },
+ [=](const int i, const VolumeStack entry) {
+ integrator_state_write_shadow_volume_stack(INTEGRATOR_STATE_PASS, i, entry);
+ });
+}
+
+/* Clean stack after the last bounce.
+ *
+ * It is expected that all volumes are closed manifolds, so at the time when ray
+ * hits nothing (for example, it is a last bounce which goes to environment) the
+ * only expected volume in the stack is the world's one. All the rest volume
+ * entries should have been exited already.
+ *
+ * This isn't always true because of ray intersection precision issues, which
+ * could lead us to an infinite non-world volume in the stack, causing render
+ * artifacts.
+ *
+ * Use this function after the last bounce to get rid of all volumes apart from
+ * the world's one after the last bounce to avoid render artifacts.
+ */
+ccl_device_inline void volume_stack_clean(INTEGRATOR_STATE_ARGS)
+{
+ if (kernel_data.background.volume_shader != SHADER_NONE) {
+ /* Keep the world's volume in stack. */
+ INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 1, shader) = SHADER_NONE;
+ }
+ else {
+ INTEGRATOR_STATE_ARRAY_WRITE(volume_stack, 0, shader) = SHADER_NONE;
+ }
+}
+
+template<typename StackReadOp>
+ccl_device float volume_stack_step_size(INTEGRATOR_STATE_ARGS, StackReadOp stack_read)
+{
+ float step_size = FLT_MAX;
+
+ for (int i = 0;; i++) {
+ VolumeStack entry = stack_read(i);
+ if (entry.shader == SHADER_NONE) {
+ break;
+ }
+
+ int shader_flag = kernel_tex_fetch(__shaders, (entry.shader & SHADER_MASK)).flags;
+
+ bool heterogeneous = false;
+
+ if (shader_flag & SD_HETEROGENEOUS_VOLUME) {
+ heterogeneous = true;
+ }
+ else if (shader_flag & SD_NEED_VOLUME_ATTRIBUTES) {
+ /* We want to render world or objects without any volume grids
+ * as homogeneous, but can only verify this at run-time since other
+ * heterogeneous volume objects may be using the same shader. */
+ int object = entry.object;
+ if (object != OBJECT_NONE) {
+ int object_flag = kernel_tex_fetch(__object_flag, object);
+ if (object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) {
+ heterogeneous = true;
+ }
+ }
+ }
+
+ if (heterogeneous) {
+ float object_step_size = object_volume_step_size(kg, entry.object);
+ object_step_size *= kernel_data.integrator.volume_step_rate;
+ step_size = fminf(object_step_size, step_size);
+ }
+ }
+
+ return step_size;
+}
+
+typedef enum VolumeSampleMethod {
+ VOLUME_SAMPLE_NONE = 0,
+ VOLUME_SAMPLE_DISTANCE = (1 << 0),
+ VOLUME_SAMPLE_EQUIANGULAR = (1 << 1),
+ VOLUME_SAMPLE_MIS = (VOLUME_SAMPLE_DISTANCE | VOLUME_SAMPLE_EQUIANGULAR),
+} VolumeSampleMethod;
+
+ccl_device VolumeSampleMethod volume_stack_sample_method(INTEGRATOR_STATE_ARGS)
+{
+ VolumeSampleMethod method = VOLUME_SAMPLE_NONE;
+
+ for (int i = 0;; i++) {
+ VolumeStack entry = integrator_state_read_volume_stack(INTEGRATOR_STATE_PASS, i);
+ if (entry.shader == SHADER_NONE) {
+ break;
+ }
+
+ int shader_flag = kernel_tex_fetch(__shaders, (entry.shader & SHADER_MASK)).flags;
+
+ if (shader_flag & SD_VOLUME_MIS) {
+ /* Multiple importance sampling. */
+ return VOLUME_SAMPLE_MIS;
+ }
+ else if (shader_flag & SD_VOLUME_EQUIANGULAR) {
+ /* Distance + equiangular sampling -> multiple importance sampling. */
+ if (method == VOLUME_SAMPLE_DISTANCE) {
+ return VOLUME_SAMPLE_MIS;
+ }
+
+ /* Only equiangular sampling. */
+ method = VOLUME_SAMPLE_EQUIANGULAR;
+ }
+ else {
+ /* Distance + equiangular sampling -> multiple importance sampling. */
+ if (method == VOLUME_SAMPLE_EQUIANGULAR) {
+ return VOLUME_SAMPLE_MIS;
+ }
+
+ /* Distance sampling only. */
+ method = VOLUME_SAMPLE_DISTANCE;
+ }
+ }
+
+ return method;
+}
+
+CCL_NAMESPACE_END