Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/integrator')
-rw-r--r--intern/cycles/kernel/integrator/displacement_shader.h38
-rw-r--r--intern/cycles/kernel/integrator/init_from_bake.h56
-rw-r--r--intern/cycles/kernel/integrator/init_from_camera.h40
-rw-r--r--intern/cycles/kernel/integrator/intersect_closest.h95
-rw-r--r--intern/cycles/kernel/integrator/intersect_shadow.h8
-rw-r--r--intern/cycles/kernel/integrator/intersect_subsurface.h2
-rw-r--r--intern/cycles/kernel/integrator/intersect_volume_stack.h24
-rw-r--r--intern/cycles/kernel/integrator/mnee.h88
-rw-r--r--intern/cycles/kernel/integrator/path_state.h64
-rw-r--r--intern/cycles/kernel/integrator/shade_background.h131
-rw-r--r--intern/cycles/kernel/integrator/shade_light.h31
-rw-r--r--intern/cycles/kernel/integrator/shade_shadow.h45
-rw-r--r--intern/cycles/kernel/integrator/shade_surface.h231
-rw-r--r--intern/cycles/kernel/integrator/shade_volume.h279
-rw-r--r--intern/cycles/kernel/integrator/shader_eval.h952
-rw-r--r--intern/cycles/kernel/integrator/shadow_catcher.h25
-rw-r--r--intern/cycles/kernel/integrator/shadow_state_template.h11
-rw-r--r--intern/cycles/kernel/integrator/state.h9
-rw-r--r--intern/cycles/kernel/integrator/state_flow.h273
-rw-r--r--intern/cycles/kernel/integrator/state_template.h22
-rw-r--r--intern/cycles/kernel/integrator/state_util.h14
-rw-r--r--intern/cycles/kernel/integrator/subsurface.h40
-rw-r--r--intern/cycles/kernel/integrator/subsurface_disk.h53
-rw-r--r--intern/cycles/kernel/integrator/subsurface_random_walk.h147
-rw-r--r--intern/cycles/kernel/integrator/surface_shader.h587
-rw-r--r--intern/cycles/kernel/integrator/volume_shader.h353
-rw-r--r--intern/cycles/kernel/integrator/volume_stack.h10
27 files changed, 1847 insertions, 1781 deletions
diff --git a/intern/cycles/kernel/integrator/displacement_shader.h b/intern/cycles/kernel/integrator/displacement_shader.h
new file mode 100644
index 00000000000..71a0f56fb3e
--- /dev/null
+++ b/intern/cycles/kernel/integrator/displacement_shader.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+/* Functions to evaluate displacement shader. */
+
+#pragma once
+
+#include "kernel/svm/svm.h"
+
+#ifdef __OSL__
+# include "kernel/osl/shader.h"
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+template<typename ConstIntegratorGenericState>
+ccl_device void displacement_shader_eval(KernelGlobals kg,
+ ConstIntegratorGenericState state,
+ ccl_private ShaderData *sd)
+{
+ sd->num_closure = 0;
+ sd->num_closure_left = 0;
+
+ /* this will modify sd->P */
+#ifdef __SVM__
+# ifdef __OSL__
+ if (kg->osl)
+ OSLShader::eval_displacement(kg, state, sd);
+ else
+# endif
+ {
+ svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_DISPLACEMENT, SHADER_TYPE_DISPLACEMENT>(
+ kg, state, sd, NULL, 0);
+ }
+#endif
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h
index 0db4241b6e3..eca2c0b9ffb 100644
--- a/intern/cycles/kernel/integrator/init_from_bake.h
+++ b/intern/cycles/kernel/integrator/init_from_bake.h
@@ -5,8 +5,8 @@
#include "kernel/camera/camera.h"
-#include "kernel/film/accumulate.h"
#include "kernel/film/adaptive_sampling.h"
+#include "kernel/film/light_passes.h"
#include "kernel/integrator/path_state.h"
@@ -49,7 +49,8 @@ ccl_device const float2 bake_offset_towards_center(KernelGlobals kg,
const float3 to_center = center - P;
const float3 offset_P = P + normalize(to_center) *
- min(len(to_center), max(max3(fabs(P)), 1.0f) * position_offset);
+ min(len(to_center),
+ max(reduce_max(fabs(P)), 1.0f) * position_offset);
/* Compute barycentric coordinates at new position. */
const float3 v1 = tri_verts[1] - tri_verts[0];
@@ -91,12 +92,12 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
path_state_init(state, tile, x, y);
/* Check whether the pixel has converged and should not be sampled anymore. */
- if (!kernel_need_sample_pixel(kg, state, render_buffer)) {
+ if (!film_need_sample_pixel(kg, state, render_buffer)) {
return false;
}
/* Always count the sample, even if the camera sample will reject the ray. */
- const int sample = kernel_accum_sample(
+ const int sample = film_write_sample(
kg, state, render_buffer, scheduled_sample, tile->sample_offset);
/* Setup render buffers. */
@@ -111,8 +112,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
int prim = __float_as_uint(primitive[1]);
if (prim == -1) {
/* Accumulate transparency for empty pixels. */
- kernel_accum_transparent(kg, state, 0, 1.0f, buffer);
- return false;
+ film_write_transparent(kg, state, 0, 1.0f, buffer);
+ return true;
}
prim += kernel_data.bake.tri_offset;
@@ -120,13 +121,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
/* Random number generator. */
const uint rng_hash = hash_uint(seed) ^ kernel_data.integrator.seed;
- float filter_x, filter_y;
- if (sample == 0) {
- filter_x = filter_y = 0.5f;
- }
- else {
- path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_x, &filter_y);
- }
+ const float2 rand_filter = (sample == 0) ? make_float2(0.5f, 0.5f) :
+ path_rng_2D(kg, rng_hash, sample, PRNG_FILTER);
/* Initialize path state for path integration. */
path_state_init_integrator(kg, state, sample, rng_hash);
@@ -149,18 +145,24 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
/* Sub-pixel offset. */
if (sample > 0) {
- u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f);
- v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f),
+ u = bake_clamp_mirror_repeat(u + dudx * (rand_filter.x - 0.5f) + dudy * (rand_filter.y - 0.5f),
+ 1.0f);
+ v = bake_clamp_mirror_repeat(v + dvdx * (rand_filter.x - 0.5f) + dvdy * (rand_filter.y - 0.5f),
1.0f - u);
}
+ /* Convert from Blender to Cycles/Embree/OptiX barycentric convention. */
+ const float tmp = u;
+ u = v;
+ v = 1.0f - tmp - v;
+
/* Position and normal on triangle. */
const int object = kernel_data.bake.object_index;
float3 P, Ng;
int shader;
triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
- const int object_flag = kernel_tex_fetch(__object_flag, object);
+ const int object_flag = kernel_data_fetch(object_flag, object);
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
P = transform_point_auto(&tfm, P);
@@ -173,14 +175,15 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
Ray ray ccl_optional_struct_init;
ray.P = zero_float3();
ray.D = normalize(P);
- ray.t = FLT_MAX;
+ ray.tmin = 0.0f;
+ ray.tmax = FLT_MAX;
ray.time = 0.5f;
ray.dP = differential_zero_compact();
ray.dD = differential_zero_compact();
integrator_state_write_ray(kg, state, &ray);
/* Setup next kernel to execute. */
- INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+ integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
}
else {
/* Surface baking. */
@@ -193,15 +196,15 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
}
const int shader_index = shader & SHADER_MASK;
- const int shader_flags = kernel_tex_fetch(__shaders, shader_index).flags;
+ const int shader_flags = kernel_data_fetch(shaders, shader_index).flags;
/* Fast path for position and normal passes not affected by shaders. */
if (kernel_data.film.pass_position != PASS_UNUSED) {
- kernel_write_pass_float3(buffer + kernel_data.film.pass_position, P);
+ film_write_pass_float3(buffer + kernel_data.film.pass_position, P);
return true;
}
else if (kernel_data.film.pass_normal != PASS_UNUSED && !(shader_flags & SD_HAS_BUMP)) {
- kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, N);
+ film_write_pass_float3(buffer + kernel_data.film.pass_normal, N);
return true;
}
@@ -209,7 +212,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
Ray ray ccl_optional_struct_init;
ray.P = P + N;
ray.D = -N;
- ray.t = FLT_MAX;
+ ray.tmin = 0.0f;
+ ray.tmax = FLT_MAX;
ray.time = 0.5f;
/* Setup differentials. */
@@ -246,13 +250,15 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
if (use_caustics) {
- INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index);
+ integrator_path_init_sorted(
+ kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index);
}
else if (use_raytrace_kernel) {
- INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
+ integrator_path_init_sorted(
+ kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
}
else {
- INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index);
+ integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index);
}
}
diff --git a/intern/cycles/kernel/integrator/init_from_camera.h b/intern/cycles/kernel/integrator/init_from_camera.h
index 9fe27cdda9a..8df3e1b9fb3 100644
--- a/intern/cycles/kernel/integrator/init_from_camera.h
+++ b/intern/cycles/kernel/integrator/init_from_camera.h
@@ -5,8 +5,8 @@
#include "kernel/camera/camera.h"
-#include "kernel/film/accumulate.h"
#include "kernel/film/adaptive_sampling.h"
+#include "kernel/film/light_passes.h"
#include "kernel/integrator/path_state.h"
#include "kernel/integrator/shadow_catcher.h"
@@ -23,31 +23,21 @@ ccl_device_inline void integrate_camera_sample(KernelGlobals kg,
ccl_private Ray *ray)
{
/* Filter sampling. */
- float filter_u, filter_v;
-
- if (sample == 0) {
- filter_u = 0.5f;
- filter_v = 0.5f;
- }
- else {
- path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_u, &filter_v);
- }
+ const float2 rand_filter = (sample == 0) ? make_float2(0.5f, 0.5f) :
+ path_rng_2D(kg, rng_hash, sample, PRNG_FILTER);
/* Depth of field sampling. */
- float lens_u = 0.0f, lens_v = 0.0f;
- if (kernel_data.cam.aperturesize > 0.0f) {
- path_rng_2D(kg, rng_hash, sample, PRNG_LENS_U, &lens_u, &lens_v);
- }
+ const float2 rand_lens = (kernel_data.cam.aperturesize > 0.0f) ?
+ path_rng_2D(kg, rng_hash, sample, PRNG_LENS) :
+ zero_float2();
/* Motion blur time sampling. */
- float time = 0.0f;
-#ifdef __CAMERA_MOTION__
- if (kernel_data.cam.shuttertime != -1.0f)
- time = path_rng_1D(kg, rng_hash, sample, PRNG_TIME);
-#endif
+ const float rand_time = (kernel_data.cam.shuttertime != -1.0f) ?
+ path_rng_1D(kg, rng_hash, sample, PRNG_TIME) :
+ 0.0f;
/* Generate camera ray. */
- camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
+ camera_sample(kg, x, y, rand_filter.x, rand_filter.y, rand_lens.x, rand_lens.y, rand_time, ray);
}
/* Return false to indicate that this pixel is finished.
@@ -67,7 +57,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg,
path_state_init(state, tile, x, y);
/* Check whether the pixel has converged and should not be sampled anymore. */
- if (!kernel_need_sample_pixel(kg, state, render_buffer)) {
+ if (!film_need_sample_pixel(kg, state, render_buffer)) {
return false;
}
@@ -76,7 +66,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg,
* This logic allows to both count actual number of samples per pixel, and to add samples to this
* pixel after it was converged and samples were added somewhere else (in which case the
* `scheduled_sample` will be different from actual number of samples in this pixel). */
- const int sample = kernel_accum_sample(
+ const int sample = film_write_sample(
kg, state, render_buffer, scheduled_sample, tile->sample_offset);
/* Initialize random number seed for path. */
@@ -86,7 +76,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg,
/* Generate camera ray. */
Ray ray;
integrate_camera_sample(kg, sample, x, y, rng_hash, &ray);
- if (ray.t == 0.0f) {
+ if (ray.tmax == 0.0f) {
return true;
}
@@ -100,10 +90,10 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg,
/* Continue with intersect_closest kernel, optionally initializing volume
* stack before that if the camera may be inside a volume. */
if (kernel_data.cam.is_inside_volume) {
- INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
+ integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
}
else {
- INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+ integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
}
return true;
diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h
index 2dfac44b414..4ecff56a3fd 100644
--- a/intern/cycles/kernel/integrator/intersect_closest.h
+++ b/intern/cycles/kernel/integrator/intersect_closest.h
@@ -5,6 +5,8 @@
#include "kernel/camera/projection.h"
+#include "kernel/film/light_passes.h"
+
#include "kernel/integrator/path_state.h"
#include "kernel/integrator/shadow_catcher.h"
@@ -87,7 +89,7 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
return;
}
- kernel_write_shadow_catcher_bounce_data(kg, state, render_buffer);
+ film_write_shadow_catcher_bounce_data(kg, state, render_buffer);
/* Mark state as having done a shadow catcher split so that it stops contributing to
* the shadow catcher matte pass, but keeps contributing to the combined pass. */
@@ -109,37 +111,38 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
/* If using background pass, schedule background shading kernel so that we have a background
* to alpha-over on. The background kernel will then continue the path afterwards. */
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
- INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+ integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
return;
}
if (!integrator_state_volume_stack_is_empty(kg, state)) {
/* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher
* objects from it, and then continue shading volume and shadow catcher surface after. */
- INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
+ integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
return;
}
/* Continue with shading shadow catcher surface. */
const int shader = intersection_get_shader(kg, isect);
- const int flags = kernel_tex_fetch(__shaders, shader).flags;
+ const int flags = kernel_data_fetch(shaders, shader).flags;
const bool use_caustics = kernel_data.integrator.use_caustics &&
(object_flags & SD_OBJECT_CAUSTICS);
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
if (use_caustics) {
- INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
+ integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
}
else if (use_raytrace_kernel) {
- INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+ integrator_path_init_sorted(
+ kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
}
else {
- INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+ integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
}
}
/* Schedule next kernel to be executed after updating volume stack for shadow catcher. */
-template<uint32_t current_kernel>
+template<DeviceKernel current_kernel>
ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_volume(
KernelGlobals kg, IntegratorState state)
{
@@ -149,27 +152,28 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
integrator_state_read_isect(kg, state, &isect);
const int shader = intersection_get_shader(kg, &isect);
- const int flags = kernel_tex_fetch(__shaders, shader).flags;
+ const int flags = kernel_data_fetch(shaders, shader).flags;
const int object_flags = intersection_get_object_flags(kg, &isect);
const bool use_caustics = kernel_data.integrator.use_caustics &&
(object_flags & SD_OBJECT_CAUSTICS);
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
if (use_caustics) {
- INTEGRATOR_PATH_NEXT_SORTED(
- current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
}
else if (use_raytrace_kernel) {
- INTEGRATOR_PATH_NEXT_SORTED(
- current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
}
else {
- INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
}
}
/* Schedule next kernel to be executed after executing background shader for shadow catcher. */
-template<uint32_t current_kernel>
+template<DeviceKernel current_kernel>
ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_background(
KernelGlobals kg, IntegratorState state)
{
@@ -177,7 +181,8 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
if (!integrator_state_volume_stack_is_empty(kg, state)) {
/* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher
* objects from it, and then continue shading volume and shadow catcher surface after. */
- INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
+ integrator_path_next(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
return;
}
@@ -190,7 +195,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
*
* Note that current_kernel is a template value since making this a variable
* leads to poor performance with CUDA atomics. */
-template<uint32_t current_kernel>
+template<DeviceKernel current_kernel>
ccl_device_forceinline void integrator_intersect_next_kernel(
KernelGlobals kg,
IntegratorState state,
@@ -203,13 +208,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
if (!integrator_state_volume_stack_is_empty(kg, state)) {
const bool hit_surface = hit && !(isect->type & PRIMITIVE_LAMP);
const int shader = (hit_surface) ? intersection_get_shader(kg, isect) : SHADER_NONE;
- const int flags = (hit_surface) ? kernel_tex_fetch(__shaders, shader).flags : 0;
+ const int flags = (hit_surface) ? kernel_data_fetch(shaders, shader).flags : 0;
if (!integrator_intersect_terminate(kg, state, flags)) {
- INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
+ integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
}
else {
- INTEGRATOR_PATH_TERMINATE(current_kernel);
+ integrator_path_terminate(kg, state, current_kernel);
}
return;
}
@@ -218,12 +223,12 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
if (hit) {
/* Hit a surface, continue with light or surface kernel. */
if (isect->type & PRIMITIVE_LAMP) {
- INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
+ integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
}
else {
/* Hit a surface, continue with surface kernel unless terminated. */
const int shader = intersection_get_shader(kg, isect);
- const int flags = kernel_tex_fetch(__shaders, shader).flags;
+ const int flags = kernel_data_fetch(shaders, shader).flags;
if (!integrator_intersect_terminate(kg, state, flags)) {
const int object_flags = intersection_get_object_flags(kg, isect);
@@ -231,16 +236,16 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
(object_flags & SD_OBJECT_CAUSTICS);
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
if (use_caustics) {
- INTEGRATOR_PATH_NEXT_SORTED(
- current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
}
else if (use_raytrace_kernel) {
- INTEGRATOR_PATH_NEXT_SORTED(
- current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
}
else {
- INTEGRATOR_PATH_NEXT_SORTED(
- current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
}
#ifdef __SHADOW_CATCHER__
@@ -249,13 +254,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
#endif
}
else {
- INTEGRATOR_PATH_TERMINATE(current_kernel);
+ integrator_path_terminate(kg, state, current_kernel);
}
}
}
else {
/* Nothing hit, continue with background kernel. */
- INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+ integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
}
}
@@ -263,7 +268,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
*
* The logic here matches integrator_intersect_next_kernel, except that
* volume shading and termination testing have already been done. */
-template<uint32_t current_kernel>
+template<DeviceKernel current_kernel>
ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
KernelGlobals kg,
IntegratorState state,
@@ -273,29 +278,29 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
if (isect->prim != PRIM_NONE) {
/* Hit a surface, continue with light or surface kernel. */
if (isect->type & PRIMITIVE_LAMP) {
- INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
+ integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
return;
}
else {
/* Hit a surface, continue with surface kernel unless terminated. */
const int shader = intersection_get_shader(kg, isect);
- const int flags = kernel_tex_fetch(__shaders, shader).flags;
+ const int flags = kernel_data_fetch(shaders, shader).flags;
const int object_flags = intersection_get_object_flags(kg, isect);
const bool use_caustics = kernel_data.integrator.use_caustics &&
(object_flags & SD_OBJECT_CAUSTICS);
const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
if (use_caustics) {
- INTEGRATOR_PATH_NEXT_SORTED(
- current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader);
}
else if (use_raytrace_kernel) {
- INTEGRATOR_PATH_NEXT_SORTED(
- current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
}
else {
- INTEGRATOR_PATH_NEXT_SORTED(
- current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+ integrator_path_next_sorted(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
}
#ifdef __SHADOW_CATCHER__
@@ -307,7 +312,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
}
else {
/* Nothing hit, continue with background kernel. */
- INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+ integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
return;
}
}
@@ -321,7 +326,7 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg,
/* Read ray from integrator state into local memory. */
Ray ray ccl_optional_struct_init;
integrator_state_read_ray(kg, state, &ray);
- kernel_assert(ray.t != 0.0f);
+ kernel_assert(ray.tmax != 0.0f);
const uint visibility = path_state_ray_visibility(state);
const int last_isect_prim = INTEGRATOR_STATE(state, isect, prim);
@@ -329,12 +334,12 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg,
/* Trick to use short AO rays to approximate indirect light at the end of the path. */
if (path_state_ao_bounce(kg, state)) {
- ray.t = kernel_data.integrator.ao_bounces_distance;
+ ray.tmax = kernel_data.integrator.ao_bounces_distance;
if (last_isect_object != OBJECT_NONE) {
- const float object_ao_distance = kernel_tex_fetch(__objects, last_isect_object).ao_distance;
+ const float object_ao_distance = kernel_data_fetch(objects, last_isect_object).ao_distance;
if (object_ao_distance != 0.0f) {
- ray.t = object_ao_distance;
+ ray.tmax = object_ao_distance;
}
}
}
@@ -366,7 +371,7 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg,
bool from_caustic_caster = false;
bool from_caustic_receiver = false;
if (!(path_flag & PATH_RAY_CAMERA) && last_isect_object != OBJECT_NONE) {
- const int object_flags = kernel_tex_fetch(__object_flag, last_isect_object);
+ const int object_flags = kernel_data_fetch(object_flag, last_isect_object);
from_caustic_receiver = (object_flags & SD_OBJECT_CAUSTICS_RECEIVER);
from_caustic_caster = (object_flags & SD_OBJECT_CAUSTICS_CASTER);
}
diff --git a/intern/cycles/kernel/integrator/intersect_shadow.h b/intern/cycles/kernel/integrator/intersect_shadow.h
index 3e746998225..25ff3d5b23f 100644
--- a/intern/cycles/kernel/integrator/intersect_shadow.h
+++ b/intern/cycles/kernel/integrator/intersect_shadow.h
@@ -51,7 +51,7 @@ ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals k
}
#ifdef __TRANSPARENT_SHADOWS__
-# if defined(__KERNEL_CPU__)
+# ifndef __KERNEL_GPU__
ccl_device int shadow_intersections_compare(const void *a, const void *b)
{
const Intersection *isect_a = (const Intersection *)a;
@@ -162,7 +162,7 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt
if (opaque_hit) {
/* Hit an opaque surface, shadow path ends here. */
- INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
+ integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
return;
}
else {
@@ -171,7 +171,9 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt
*
* TODO: could also write to render buffer directly if no transparent shadows?
* Could save a kernel execution for the common case. */
- INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW,
+ integrator_shadow_path_next(kg,
+ state,
+ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW,
DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
return;
}
diff --git a/intern/cycles/kernel/integrator/intersect_subsurface.h b/intern/cycles/kernel/integrator/intersect_subsurface.h
index 0a2c4ad680d..f439d6905a0 100644
--- a/intern/cycles/kernel/integrator/intersect_subsurface.h
+++ b/intern/cycles/kernel/integrator/intersect_subsurface.h
@@ -17,7 +17,7 @@ ccl_device void integrator_intersect_subsurface(KernelGlobals kg, IntegratorStat
}
#endif
- INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
+ integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/intersect_volume_stack.h b/intern/cycles/kernel/integrator/intersect_volume_stack.h
index 49ef01dc870..c2490581e4d 100644
--- a/intern/cycles/kernel/integrator/intersect_volume_stack.h
+++ b/intern/cycles/kernel/integrator/intersect_volume_stack.h
@@ -5,7 +5,6 @@
#include "kernel/bvh/bvh.h"
#include "kernel/geom/geom.h"
-#include "kernel/integrator/shader_eval.h"
#include "kernel/integrator/volume_stack.h"
CCL_NAMESPACE_BEGIN
@@ -24,7 +23,8 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
Ray volume_ray ccl_optional_struct_init;
volume_ray.P = from_P;
- volume_ray.D = normalize_len(to_P - from_P, &volume_ray.t);
+ volume_ray.D = normalize_len(to_P - from_P, &volume_ray.tmax);
+ volume_ray.tmin = 0.0f;
volume_ray.self.object = INTEGRATOR_STATE(state, isect, object);
volume_ray.self.prim = INTEGRATOR_STATE(state, isect, prim);
volume_ray.self.light_object = OBJECT_NONE;
@@ -37,8 +37,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
#ifdef __VOLUME_RECORD_ALL__
Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1];
- uint num_hits = scene_intersect_volume_all(
- kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
+ uint num_hits = scene_intersect_volume(kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
if (num_hits > 0) {
Intersection *isect = hits;
@@ -58,12 +57,9 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
volume_stack_enter_exit(kg, state, stack_sd);
/* Move ray forward. */
- volume_ray.P = stack_sd->P;
+ volume_ray.tmin = intersection_t_offset(isect.t);
volume_ray.self.object = isect.object;
volume_ray.self.prim = isect.prim;
- if (volume_ray.t != FLT_MAX) {
- volume_ray.D = normalize_len(to_P - volume_ray.P, &volume_ray.t);
- }
++step;
}
#endif
@@ -82,7 +78,8 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
/* Trace ray in random direction. Any direction works, Z up is a guess to get the
* fewest hits. */
volume_ray.D = make_float3(0.0f, 0.0f, 1.0f);
- volume_ray.t = FLT_MAX;
+ volume_ray.tmin = 0.0f;
+ volume_ray.tmax = FLT_MAX;
volume_ray.self.object = OBJECT_NONE;
volume_ray.self.prim = PRIM_NONE;
volume_ray.self.light_object = OBJECT_NONE;
@@ -109,8 +106,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
#ifdef __VOLUME_RECORD_ALL__
Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1];
- uint num_hits = scene_intersect_volume_all(
- kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
+ uint num_hits = scene_intersect_volume(kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
if (num_hits > 0) {
int enclosed_volumes[MAX_VOLUME_STACK_SIZE];
Intersection *isect = hits;
@@ -199,7 +195,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
}
/* Move ray forward. */
- volume_ray.P = stack_sd->P;
+ volume_ray.tmin = intersection_t_offset(isect.t);
volume_ray.self.object = isect.object;
volume_ray.self.prim = isect.prim;
++step;
@@ -222,7 +218,9 @@ ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorSt
}
else {
/* Volume stack init for camera rays, continue with intersection of camera ray. */
- INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
+ integrator_path_next(kg,
+ state,
+ DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
}
}
diff --git a/intern/cycles/kernel/integrator/mnee.h b/intern/cycles/kernel/integrator/mnee.h
index ad83f82d091..a0ad7afe591 100644
--- a/intern/cycles/kernel/integrator/mnee.h
+++ b/intern/cycles/kernel/integrator/mnee.h
@@ -115,7 +115,7 @@ ccl_device_forceinline void mnee_update_light_sample(KernelGlobals kg,
{
/* correct light sample position/direction and pdf
* NOTE: preserve pdf in area measure */
- const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, ls->lamp);
+ const ccl_global KernelLight *klight = &kernel_data_fetch(lights, ls->lamp);
if (ls->type == LIGHT_POINT || ls->type == LIGHT_SPOT) {
ls->D = normalize_len(ls->P - P, &ls->t);
@@ -137,8 +137,14 @@ ccl_device_forceinline void mnee_update_light_sample(KernelGlobals kg,
}
}
else if (ls->type == LIGHT_AREA) {
+ float invarea = fabsf(klight->area.invarea);
ls->D = normalize_len(ls->P - P, &ls->t);
- ls->pdf = fabsf(klight->area.invarea);
+ ls->pdf = invarea;
+ if (klight->area.tan_spread > 0.f) {
+ ls->eval_fac = 0.25f * invarea;
+ ls->eval_fac *= light_spread_attenuation(
+ ls->D, ls->Ng, klight->area.tan_spread, klight->area.normalize_spread);
+ }
}
ls->pdf *= kernel_data.integrator.pdf_lights;
@@ -154,12 +160,12 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg,
ccl_private const Intersection *isect,
ccl_private ShaderData *sd_vtx)
{
- sd_vtx->object = (isect->object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, isect->prim) :
+ sd_vtx->object = (isect->object == OBJECT_NONE) ? kernel_data_fetch(prim_object, isect->prim) :
isect->object;
sd_vtx->type = isect->type;
sd_vtx->flag = 0;
- sd_vtx->object_flag = kernel_tex_fetch(__object_flag, sd_vtx->object);
+ sd_vtx->object_flag = kernel_data_fetch(object_flag, sd_vtx->object);
/* Matrices and time. */
shader_setup_object_transforms(kg, sd_vtx, ray->time);
@@ -171,7 +177,7 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg,
sd_vtx->u = isect->u;
sd_vtx->v = isect->v;
- sd_vtx->shader = kernel_tex_fetch(__tri_shader, sd_vtx->prim);
+ sd_vtx->shader = kernel_data_fetch(tri_shader, sd_vtx->prim);
float3 verts[3];
float3 normals[3];
@@ -180,7 +186,7 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg,
triangle_vertices_and_normals(kg, sd_vtx->prim, verts, normals);
/* Compute refined position (same code as in triangle_point_from_uv). */
- sd_vtx->P = isect->u * verts[0] + isect->v * verts[1] + (1.f - isect->u - isect->v) * verts[2];
+ sd_vtx->P = (1.f - isect->u - isect->v) * verts[0] + isect->u * verts[1] + isect->v * verts[2];
if (!(sd_vtx->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
const Transform tfm = object_get_transform(kg, sd_vtx);
sd_vtx->P = transform_point(&tfm, sd_vtx->P);
@@ -207,8 +213,8 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg,
}
/* Tangent space (position derivatives) WRT barycentric (u, v). */
- float3 dp_du = verts[0] - verts[2];
- float3 dp_dv = verts[1] - verts[2];
+ float3 dp_du = verts[1] - verts[0];
+ float3 dp_dv = verts[2] - verts[0];
/* Geometric normal. */
vtx->ng = normalize(cross(dp_du, dp_dv));
@@ -217,16 +223,16 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg,
/* Shading normals: Interpolate normals between vertices. */
float n_len;
- vtx->n = normalize_len(normals[0] * sd_vtx->u + normals[1] * sd_vtx->v +
- normals[2] * (1.0f - sd_vtx->u - sd_vtx->v),
+ vtx->n = normalize_len(normals[0] * (1.0f - sd_vtx->u - sd_vtx->v) + normals[1] * sd_vtx->u +
+ normals[2] * sd_vtx->v,
&n_len);
/* Shading normal derivatives WRT barycentric (u, v)
* we calculate the derivative of n = |u*n0 + v*n1 + (1-u-v)*n2| using:
* d/du [f(u)/|f(u)|] = [d/du f(u)]/|f(u)| - f(u)/|f(u)|^3 <f(u), d/du f(u)>. */
const float inv_n_len = 1.f / n_len;
- float3 dn_du = inv_n_len * (normals[0] - normals[2]);
- float3 dn_dv = inv_n_len * (normals[1] - normals[2]);
+ float3 dn_du = inv_n_len * (normals[1] - normals[0]);
+ float3 dn_dv = inv_n_len * (normals[2] - normals[0]);
dn_du -= vtx->n * dot(vtx->n, dn_du);
dn_dv -= vtx->n * dot(vtx->n, dn_dv);
@@ -386,7 +392,7 @@ ccl_device_forceinline bool mnee_compute_constraint_derivatives(
/* Invert (block) constraint derivative matrix and solve linear system so we can map dh back to dx:
* dh / dx = A
* dx = inverse(A) x dh
- * to use for specular specular manifold walk
+ * to use for specular manifold walk
* (See for example http://faculty.washington.edu/finlayso/ebook/algebraic/advanced/LUtri.htm
* for block tridiagonal matrix based linear system solve) */
ccl_device_forceinline bool mnee_solve_matrix_h_to_x(int vertex_count,
@@ -436,6 +442,7 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg,
projection_ray.self.light_prim = PRIM_NONE;
projection_ray.dP = differential_make_compact(sd->dP);
projection_ray.dD = differential_zero_compact();
+ projection_ray.tmin = 0.0f;
projection_ray.time = sd->time;
Intersection projection_isect;
@@ -499,8 +506,8 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg,
projection_ray.self.prim = pv.prim;
projection_ray.P = pv.p;
}
- projection_ray.D = normalize_len(tentative_p - projection_ray.P, &projection_ray.t);
- projection_ray.t *= MNEE_PROJECTION_DISTANCE_MULTIPLIER;
+ projection_ray.D = normalize_len(tentative_p - projection_ray.P, &projection_ray.tmax);
+ projection_ray.tmax *= MNEE_PROJECTION_DISTANCE_MULTIPLIER;
bool projection_success = false;
for (int isect_count = 0; isect_count < MNEE_MAX_INTERSECTION_COUNT; isect_count++) {
@@ -509,7 +516,7 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg,
break;
int hit_object = (projection_isect.object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, projection_isect.prim) :
+ kernel_data_fetch(prim_object, projection_isect.prim) :
projection_isect.object;
if (hit_object == mv.object) {
@@ -519,8 +526,7 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg,
projection_ray.self.object = projection_isect.object;
projection_ray.self.prim = projection_isect.prim;
- projection_ray.P += projection_isect.t * projection_ray.D;
- projection_ray.t -= projection_isect.t;
+ projection_ray.tmin = intersection_t_offset(projection_isect.t);
}
if (!projection_success) {
reduce_stepsize = true;
@@ -628,9 +634,9 @@ mnee_sample_bsdf_dh(ClosureType type, float alpha_x, float alpha_y, float sample
* We assume here that the pdf (in half-vector measure) is the same as
* the one calculation when sampling the microfacet normals from the
* specular chain above: this allows us to simplify the bsdf weight */
-ccl_device_forceinline float3 mnee_eval_bsdf_contribution(ccl_private ShaderClosure *closure,
- float3 wi,
- float3 wo)
+ccl_device_forceinline Spectrum mnee_eval_bsdf_contribution(ccl_private ShaderClosure *closure,
+ float3 wi,
+ float3 wo)
{
ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)closure;
@@ -801,7 +807,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
float3 wo = normalize_len(vertices[0].p - sd->P, &wo_len);
/* Initialize throughput and evaluate receiver bsdf * |n.wo|. */
- shader_bsdf_eval(kg, sd, wo, false, throughput, ls->shader);
+ surface_shader_bsdf_eval(kg, sd, wo, false, throughput, ls->shader);
/* Update light sample with new position / direct.ion
* and keep pdf in vertex area measure */
@@ -829,8 +835,8 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
1;
INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce + vertex_count;
- float3 light_eval = light_sample_shader_eval(kg, state, sd_mnee, ls, sd->time);
- bsdf_eval_mul3(throughput, light_eval / ls->pdf);
+ Spectrum light_eval = light_sample_shader_eval(kg, state, sd_mnee, ls, sd->time);
+ bsdf_eval_mul(throughput, light_eval / ls->pdf);
/* Generalized geometry term. */
@@ -852,6 +858,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
Ray probe_ray;
probe_ray.self.light_object = ls->object;
probe_ray.self.light_prim = ls->prim;
+ probe_ray.tmin = 0.0f;
probe_ray.dP = differential_make_compact(sd->dP);
probe_ray.dD = differential_zero_compact();
probe_ray.time = sd->time;
@@ -867,13 +874,13 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
ccl_private const ManifoldVertex &v = vertices[vi];
/* Check visibility. */
- probe_ray.D = normalize_len(v.p - probe_ray.P, &probe_ray.t);
+ probe_ray.D = normalize_len(v.p - probe_ray.P, &probe_ray.tmax);
if (scene_intersect(kg, &probe_ray, PATH_RAY_TRANSMIT, &probe_isect)) {
int hit_object = (probe_isect.object == OBJECT_NONE) ?
- kernel_tex_fetch(__prim_object, probe_isect.prim) :
+ kernel_data_fetch(prim_object, probe_isect.prim) :
probe_isect.object;
/* Test whether the ray hit the appropriate object at its intended location. */
- if (hit_object != v.object || fabsf(probe_ray.t - probe_isect.t) > MNEE_MIN_DISTANCE)
+ if (hit_object != v.object || fabsf(probe_ray.tmax - probe_isect.t) > MNEE_MIN_DISTANCE)
return false;
}
probe_ray.self.object = v.object;
@@ -906,7 +913,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce + 1 + vi;
/* Evaluate shader nodes at solution vi. */
- shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
+ surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
kg, state, sd_mnee, NULL, PATH_RAY_DIFFUSE, true);
/* Set light looking dir. */
@@ -917,8 +924,8 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
/* Evaluate product term inside eq.6 at solution interface. vi
* divided by corresponding sampled pdf:
* fr(vi)_do / pdf_dh(vi) x |do/dh| x |n.wo / n.h| */
- float3 bsdf_contribution = mnee_eval_bsdf_contribution(v.bsdf, wi, wo);
- bsdf_eval_mul3(throughput, bsdf_contribution);
+ Spectrum bsdf_contribution = mnee_eval_bsdf_contribution(v.bsdf, wi, wo);
+ bsdf_eval_mul(throughput, bsdf_contribution);
}
/* Restore original state path bounce info. */
@@ -952,15 +959,16 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg,
probe_ray.self.light_object = ls->object;
probe_ray.self.light_prim = ls->prim;
probe_ray.P = sd->P;
+ probe_ray.tmin = 0.0f;
if (ls->t == FLT_MAX) {
/* Distant / env light. */
probe_ray.D = ls->D;
- probe_ray.t = ls->t;
+ probe_ray.tmax = ls->t;
}
else {
/* Other lights, avoid self-intersection. */
probe_ray.D = ls->P - probe_ray.P;
- probe_ray.D = normalize_len(probe_ray.D, &probe_ray.t);
+ probe_ray.D = normalize_len(probe_ray.D, &probe_ray.tmax);
}
probe_ray.dP = differential_make_compact(sd->dP);
probe_ray.dD = differential_zero_compact();
@@ -998,7 +1006,7 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg,
return 0;
/* Last bool argument is the MNEE flag (for TINY_MAX_CLOSURE cap in kernel_shader.h). */
- shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
+ surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
kg, state, sd_mnee, NULL, PATH_RAY_DIFFUSE, true);
/* Get and sample refraction bsdf */
@@ -1025,10 +1033,12 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg,
float2 h = zero_float2();
if (microfacet_bsdf->alpha_x > 0.f && microfacet_bsdf->alpha_y > 0.f) {
/* Sample transmissive microfacet bsdf. */
- float bsdf_u, bsdf_v;
- path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
- h = mnee_sample_bsdf_dh(
- bsdf->type, microfacet_bsdf->alpha_x, microfacet_bsdf->alpha_y, bsdf_u, bsdf_v);
+ const float2 bsdf_uv = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF);
+ h = mnee_sample_bsdf_dh(bsdf->type,
+ microfacet_bsdf->alpha_x,
+ microfacet_bsdf->alpha_y,
+ bsdf_uv.x,
+ bsdf_uv.y);
}
/* Setup differential geometry on vertex. */
@@ -1042,9 +1052,7 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg,
probe_ray.self.object = probe_isect.object;
probe_ray.self.prim = probe_isect.prim;
- probe_ray.P += probe_isect.t * probe_ray.D;
- if (ls->t != FLT_MAX)
- probe_ray.t -= probe_isect.t;
+ probe_ray.tmin = intersection_t_offset(probe_isect.t);
};
/* Mark the manifold walk invalid to keep mollification on by default. */
diff --git a/intern/cycles/kernel/integrator/path_state.h b/intern/cycles/kernel/integrator/path_state.h
index ec93ac6d46f..54560905397 100644
--- a/intern/cycles/kernel/integrator/path_state.h
+++ b/intern/cycles/kernel/integrator/path_state.h
@@ -13,7 +13,7 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline void path_state_init_queues(IntegratorState state)
{
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
INTEGRATOR_STATE_WRITE(&state->shadow, shadow_path, queued_kernel) = 0;
INTEGRATOR_STATE_WRITE(&state->ao, shadow_path, queued_kernel) = 0;
#endif
@@ -48,14 +48,13 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
INTEGRATOR_STATE_WRITE(state, path, volume_bounce) = 0;
INTEGRATOR_STATE_WRITE(state, path, volume_bounds_bounce) = 0;
INTEGRATOR_STATE_WRITE(state, path, rng_hash) = rng_hash;
- INTEGRATOR_STATE_WRITE(state, path, rng_offset) = PRNG_BASE_NUM;
+ INTEGRATOR_STATE_WRITE(state, path, rng_offset) = PRNG_BOUNCE_NUM;
INTEGRATOR_STATE_WRITE(state, path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP |
PATH_RAY_TRANSPARENT_BACKGROUND;
INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = 0.0f;
- INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = FLT_MAX;
INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = 1.0f;
- INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f);
+ INTEGRATOR_STATE_WRITE(state, path, throughput) = one_spectrum();
#ifdef __MNEE__
INTEGRATOR_STATE_WRITE(state, path, mnee) = 0;
@@ -75,7 +74,7 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
#ifdef __DENOISING_FEATURES__
if (kernel_data.kernel_features & KERNEL_FEATURE_DENOISING) {
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_DENOISING_FEATURES;
- INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) = one_float3();
+ INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) = one_spectrum();
}
#endif
}
@@ -250,7 +249,7 @@ ccl_device_inline float path_state_continuation_probability(KernelGlobals kg,
/* Probabilistic termination: use sqrt() to roughly match typical view
* transform and do path termination a bit later on average. */
- return min(sqrtf(max3(fabs(INTEGRATOR_STATE(state, path, throughput)))), 1.0f);
+ return min(sqrtf(reduce_max(fabs(INTEGRATOR_STATE(state, path, throughput)))), 1.0f);
}
ccl_device_inline bool path_state_ao_bounce(KernelGlobals kg, ConstIntegratorState state)
@@ -299,38 +298,25 @@ ccl_device_inline void shadow_path_state_rng_load(ConstIntegratorShadowState sta
ccl_device_inline float path_state_rng_1D(KernelGlobals kg,
ccl_private const RNGState *rng_state,
- int dimension)
+ const int dimension)
{
return path_rng_1D(
kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension);
}
-ccl_device_inline void path_state_rng_2D(KernelGlobals kg,
- ccl_private const RNGState *rng_state,
- int dimension,
- ccl_private float *fx,
- ccl_private float *fy)
+ccl_device_inline float2 path_state_rng_2D(KernelGlobals kg,
+ ccl_private const RNGState *rng_state,
+ const int dimension)
{
- path_rng_2D(
- kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension, fx, fy);
-}
-
-ccl_device_inline float path_state_rng_1D_hash(KernelGlobals kg,
- ccl_private const RNGState *rng_state,
- uint hash)
-{
- /* Use a hash instead of dimension, this is not great but avoids adding
- * more dimensions to each bounce which reduces quality of dimensions we
- * are already using. */
- return path_rng_1D(
- kg, cmj_hash_simple(rng_state->rng_hash, hash), rng_state->sample, rng_state->rng_offset);
+ return path_rng_2D(
+ kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension);
}
ccl_device_inline float path_branched_rng_1D(KernelGlobals kg,
ccl_private const RNGState *rng_state,
- int branch,
- int num_branches,
- int dimension)
+ const int branch,
+ const int num_branches,
+ const int dimension)
{
return path_rng_1D(kg,
rng_state->rng_hash,
@@ -338,20 +324,16 @@ ccl_device_inline float path_branched_rng_1D(KernelGlobals kg,
rng_state->rng_offset + dimension);
}
-ccl_device_inline void path_branched_rng_2D(KernelGlobals kg,
- ccl_private const RNGState *rng_state,
- int branch,
- int num_branches,
- int dimension,
- ccl_private float *fx,
- ccl_private float *fy)
+ccl_device_inline float2 path_branched_rng_2D(KernelGlobals kg,
+ ccl_private const RNGState *rng_state,
+ const int branch,
+ const int num_branches,
+ const int dimension)
{
- path_rng_2D(kg,
- rng_state->rng_hash,
- rng_state->sample * num_branches + branch,
- rng_state->rng_offset + dimension,
- fx,
- fy);
+ return path_rng_2D(kg,
+ rng_state->rng_hash,
+ rng_state->sample * num_branches + branch,
+ rng_state->rng_offset + dimension);
}
/* Utility functions to get light termination value,
diff --git a/intern/cycles/kernel/integrator/shade_background.h b/intern/cycles/kernel/integrator/shade_background.h
index 72ecf67e8a0..30ce0999258 100644
--- a/intern/cycles/kernel/integrator/shade_background.h
+++ b/intern/cycles/kernel/integrator/shade_background.h
@@ -3,18 +3,19 @@
#pragma once
-#include "kernel/film/accumulate.h"
-#include "kernel/integrator/shader_eval.h"
+#include "kernel/film/light_passes.h"
+
+#include "kernel/integrator/surface_shader.h"
+
#include "kernel/light/light.h"
#include "kernel/light/sample.h"
CCL_NAMESPACE_BEGIN
-ccl_device float3 integrator_eval_background_shader(KernelGlobals kg,
- IntegratorState state,
- ccl_global float *ccl_restrict render_buffer)
+ccl_device Spectrum integrator_eval_background_shader(KernelGlobals kg,
+ IntegratorState state,
+ ccl_global float *ccl_restrict render_buffer)
{
-#ifdef __BACKGROUND__
const int shader = kernel_data.background.surface_shader;
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
@@ -26,56 +27,35 @@ ccl_device float3 integrator_eval_background_shader(KernelGlobals kg,
((shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) ||
((shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA)) ||
((shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER)))
- return zero_float3();
+ return zero_spectrum();
}
/* Use fast constant background color if available. */
- float3 L = zero_float3();
- if (!shader_constant_emission_eval(kg, shader, &L)) {
- /* Evaluate background shader. */
-
- /* TODO: does aliasing like this break automatic SoA in CUDA?
- * Should we instead store closures separate from ShaderData? */
- ShaderDataTinyStorage emission_sd_storage;
- ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-
- PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP);
- shader_setup_from_background(kg,
- emission_sd,
- INTEGRATOR_STATE(state, ray, P),
- INTEGRATOR_STATE(state, ray, D),
- INTEGRATOR_STATE(state, ray, time));
-
- PROFILING_SHADER(emission_sd->object, emission_sd->shader);
- PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL);
- shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_BACKGROUND>(
- kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION);
-
- L = shader_background_eval(emission_sd);
+ Spectrum L = zero_spectrum();
+ if (surface_shader_constant_emission(kg, shader, &L)) {
+ return L;
}
- /* Background MIS weights. */
-# ifdef __BACKGROUND_MIS__
- /* Check if background light exists or if we should skip pdf. */
- if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) &&
- kernel_data.background.use_mis) {
- const float3 ray_P = INTEGRATOR_STATE(state, ray, P);
- const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
- const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
- const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
-
- /* multiple importance sampling, get background light pdf for ray
- * direction, and compute weight with respect to BSDF pdf */
- const float pdf = background_light_pdf(kg, ray_P - ray_D * mis_ray_t, ray_D);
- const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf);
- L *= mis_weight;
- }
-# endif
+ /* Evaluate background shader. */
- return L;
-#else
- return make_float3(0.8f, 0.8f, 0.8f);
-#endif
+ /* TODO: does aliasing like this break automatic SoA in CUDA?
+ * Should we instead store closures separate from ShaderData? */
+ ShaderDataTinyStorage emission_sd_storage;
+ ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+
+ PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP);
+ shader_setup_from_background(kg,
+ emission_sd,
+ INTEGRATOR_STATE(state, ray, P),
+ INTEGRATOR_STATE(state, ray, D),
+ INTEGRATOR_STATE(state, ray, time));
+
+ PROFILING_SHADER(emission_sd->object, emission_sd->shader);
+ PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL);
+ surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_BACKGROUND>(
+ kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION);
+
+ return surface_shader_background(emission_sd);
}
ccl_device_inline void integrate_background(KernelGlobals kg,
@@ -107,7 +87,7 @@ ccl_device_inline void integrate_background(KernelGlobals kg,
for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) {
/* This path should have been resolved with mnee, it will
* generate a firefly for small lights since it is improbable. */
- const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
+ const ccl_global KernelLight *klight = &kernel_data_fetch(lights, lamp);
if (klight->type == LIGHT_BACKGROUND && klight->use_caustics) {
eval_background = false;
break;
@@ -118,17 +98,37 @@ ccl_device_inline void integrate_background(KernelGlobals kg,
#endif /* __MNEE__ */
/* Evaluate background shader. */
- float3 L = (eval_background) ? integrator_eval_background_shader(kg, state, render_buffer) :
- zero_float3();
+ Spectrum L = zero_spectrum();
+
+ if (eval_background) {
+ L = integrator_eval_background_shader(kg, state, render_buffer);
+
+ /* When using the ao bounces approximation, adjust background
+ * shader intensity with ao factor. */
+ if (path_state_ao_bounce(kg, state)) {
+ L *= kernel_data.integrator.ao_bounces_factor;
+ }
+
+ /* Background MIS weights. */
+ float mis_weight = 1.0f;
+ /* Check if background light exists or if we should skip pdf. */
+ if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) &&
+ kernel_data.background.use_mis) {
+ const float3 ray_P = INTEGRATOR_STATE(state, ray, P);
+ const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
+ const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
+
+ /* multiple importance sampling, get background light pdf for ray
+ * direction, and compute weight with respect to BSDF pdf */
+ const float pdf = background_light_pdf(kg, ray_P, ray_D);
+ mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf);
+ }
- /* When using the ao bounces approximation, adjust background
- * shader intensity with ao factor. */
- if (path_state_ao_bounce(kg, state)) {
- L *= kernel_data.integrator.ao_bounces_factor;
+ L *= mis_weight;
}
/* Write to render buffer. */
- kernel_accum_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer);
+ film_write_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer);
}
ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
@@ -160,7 +160,7 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
if (INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_CULL_LIGHT_CONNECTION) {
/* This path should have been resolved with mnee, it will
* generate a firefly for small lights since it is improbable. */
- const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
+ const ccl_global KernelLight *klight = &kernel_data_fetch(lights, lamp);
if (klight->use_caustics)
return;
}
@@ -170,24 +170,23 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
/* TODO: does aliasing like this break automatic SoA in CUDA? */
ShaderDataTinyStorage emission_sd_storage;
ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
- float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
+ Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
if (is_zero(light_eval)) {
return;
}
/* MIS weighting. */
+ float mis_weight = 1.0f;
if (!(path_flag & PATH_RAY_MIS_SKIP)) {
/* multiple importance sampling, get regular light pdf,
* and compute weight with respect to BSDF pdf */
const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
- const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf);
- light_eval *= mis_weight;
+ mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf);
}
/* Write to render buffer. */
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
- kernel_accum_emission(
- kg, state, throughput * light_eval, render_buffer, kernel_data.background.lightgroup);
+ film_write_surface_emission(
+ kg, state, light_eval, mis_weight, render_buffer, kernel_data.background.lightgroup);
}
}
}
@@ -213,7 +212,7 @@ ccl_device void integrator_shade_background(KernelGlobals kg,
}
#endif
- INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+ integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/shade_light.h b/intern/cycles/kernel/integrator/shade_light.h
index be926c78439..a4246f99bbf 100644
--- a/intern/cycles/kernel/integrator/shade_light.h
+++ b/intern/cycles/kernel/integrator/shade_light.h
@@ -3,8 +3,8 @@
#pragma once
-#include "kernel/film/accumulate.h"
-#include "kernel/integrator/shader_eval.h"
+#include "kernel/film/light_passes.h"
+#include "kernel/integrator/surface_shader.h"
#include "kernel/light/light.h"
#include "kernel/light/sample.h"
@@ -22,19 +22,8 @@ ccl_device_inline void integrate_light(KernelGlobals kg,
const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
const float ray_time = INTEGRATOR_STATE(state, ray, time);
- /* Advance ray beyond light. */
- /* TODO: can we make this more numerically robust to avoid reintersecting the
- * same light in some cases? Ray should not intersect surface anymore as the
- * object and prim ids will prevent self intersection. */
- const float3 new_ray_P = ray_P + ray_D * isect.t;
- INTEGRATOR_STATE_WRITE(state, ray, P) = new_ray_P;
- INTEGRATOR_STATE_WRITE(state, ray, t) -= isect.t;
-
- /* Set position to where the BSDF was sampled, for correct MIS PDF. */
- const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
- ray_P -= ray_D * mis_ray_t;
- isect.t += mis_ray_t;
- INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = isect.t;
+ /* Advance ray to new start distance. */
+ INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(isect.t);
LightSample ls ccl_optional_struct_init;
const bool use_light_sample = light_sample_from_intersection(kg, &isect, ray_P, ray_D, &ls);
@@ -62,12 +51,13 @@ ccl_device_inline void integrate_light(KernelGlobals kg,
/* TODO: does aliasing like this break automatic SoA in CUDA? */
ShaderDataTinyStorage emission_sd_storage;
ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
- float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
+ Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
if (is_zero(light_eval)) {
return;
}
/* MIS weighting. */
+ float mis_weight = 1.0f;
if (!(path_flag & PATH_RAY_MIS_SKIP)) {
/* multiple importance sampling, get regular light pdf,
* and compute weight with respect to BSDF pdf */
@@ -77,8 +67,7 @@ ccl_device_inline void integrate_light(KernelGlobals kg,
}
/* Write to render buffer. */
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
- kernel_accum_emission(kg, state, throughput * light_eval, render_buffer, ls.group);
+ film_write_surface_emission(kg, state, light_eval, mis_weight, render_buffer, ls.group);
}
ccl_device void integrator_shade_light(KernelGlobals kg,
@@ -99,11 +88,13 @@ ccl_device void integrator_shade_light(KernelGlobals kg,
INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce;
if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
- INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
+ integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
return;
}
else {
- INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
+ integrator_path_next(kg,
+ state,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
return;
}
diff --git a/intern/cycles/kernel/integrator/shade_shadow.h b/intern/cycles/kernel/integrator/shade_shadow.h
index 2b929b7b62e..ba18aed6ff0 100644
--- a/intern/cycles/kernel/integrator/shade_shadow.h
+++ b/intern/cycles/kernel/integrator/shade_shadow.h
@@ -4,7 +4,7 @@
#pragma once
#include "kernel/integrator/shade_volume.h"
-#include "kernel/integrator/shader_eval.h"
+#include "kernel/integrator/surface_shader.h"
#include "kernel/integrator/volume_stack.h"
CCL_NAMESPACE_BEGIN
@@ -15,9 +15,9 @@ ccl_device_inline bool shadow_intersections_has_remaining(const uint num_hits)
}
#ifdef __TRANSPARENT_SHADOWS__
-ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg,
- IntegratorShadowState state,
- const int hit)
+ccl_device_inline Spectrum integrate_transparent_surface_shadow(KernelGlobals kg,
+ IntegratorShadowState state,
+ const int hit)
{
PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SURFACE);
@@ -40,7 +40,7 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg,
/* Evaluate shader. */
if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
- shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
+ surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
kg, state, shadow_sd, NULL, PATH_RAY_SHADOW);
}
@@ -50,7 +50,7 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg,
# endif
/* Compute transparency from closures. */
- return shader_bsdf_transparency(kg, shadow_sd);
+ return surface_shader_transparency(kg, shadow_sd);
}
# ifdef __VOLUME__
@@ -58,7 +58,7 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg,
IntegratorShadowState state,
const int hit,
const int num_recorded_hits,
- ccl_private float3 *ccl_restrict
+ ccl_private Spectrum *ccl_restrict
throughput)
{
PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_VOLUME);
@@ -75,13 +75,9 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg,
ray.self.light_object = OBJECT_NONE;
ray.self.light_prim = PRIM_NONE;
/* Modify ray position and length to match current segment. */
- const float start_t = (hit == 0) ? 0.0f :
- INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t);
- const float end_t = (hit < num_recorded_hits) ?
- INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) :
- ray.t;
- ray.P += start_t * ray.D;
- ray.t = end_t - start_t;
+ ray.tmin = (hit == 0) ? ray.tmin : INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t);
+ ray.tmax = (hit < num_recorded_hits) ? INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) :
+ ray.tmax;
shader_setup_from_volume(kg, shadow_sd, &ray);
@@ -104,7 +100,7 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg,
if (hit < num_recorded_hits || !shadow_intersections_has_remaining(num_hits)) {
# ifdef __VOLUME__
if (!integrator_state_shadow_volume_stack_is_empty(kg, state)) {
- float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
+ Spectrum throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
integrate_transparent_volume_shadow(kg, state, hit, num_recorded_hits, &throughput);
if (is_zero(throughput)) {
return true;
@@ -117,8 +113,8 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg,
/* Surface shaders. */
if (hit < num_recorded_hits) {
- const float3 shadow = integrate_transparent_surface_shadow(kg, state, hit);
- const float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput) * shadow;
+ const Spectrum shadow = integrate_transparent_surface_shadow(kg, state, hit);
+ const Spectrum throughput = INTEGRATOR_STATE(state, shadow_path, throughput) * shadow;
if (is_zero(throughput)) {
return true;
}
@@ -137,10 +133,7 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg,
/* There are more hits that we could not recorded due to memory usage,
* adjust ray to intersect again from the last hit. */
const float last_hit_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, num_recorded_hits - 1, t);
- const float3 ray_P = INTEGRATOR_STATE(state, shadow_ray, P);
- const float3 ray_D = INTEGRATOR_STATE(state, shadow_ray, D);
- INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray_P + last_hit_t * ray_D;
- INTEGRATOR_STATE_WRITE(state, shadow_ray, t) -= last_hit_t;
+ INTEGRATOR_STATE_WRITE(state, shadow_ray, tmin) = intersection_t_offset(last_hit_t);
}
return false;
@@ -158,20 +151,22 @@ ccl_device void integrator_shade_shadow(KernelGlobals kg,
/* Evaluate transparent shadows. */
const bool opaque = integrate_transparent_shadow(kg, state, num_hits);
if (opaque) {
- INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
+ integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
return;
}
#endif
if (shadow_intersections_has_remaining(num_hits)) {
/* More intersections to find, continue shadow ray. */
- INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
+ integrator_shadow_path_next(kg,
+ state,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
return;
}
else {
- kernel_accum_light(kg, state, render_buffer);
- INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
+ film_write_direct_light(kg, state, render_buffer);
+ integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
return;
}
}
diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h
index ce1398859b7..c19f56a9b70 100644
--- a/intern/cycles/kernel/integrator/shade_surface.h
+++ b/intern/cycles/kernel/integrator/shade_surface.h
@@ -3,14 +3,15 @@
#pragma once
-#include "kernel/film/accumulate.h"
-#include "kernel/film/passes.h"
+#include "kernel/film/data_passes.h"
+#include "kernel/film/denoising_passes.h"
+#include "kernel/film/light_passes.h"
#include "kernel/integrator/mnee.h"
#include "kernel/integrator/path_state.h"
-#include "kernel/integrator/shader_eval.h"
#include "kernel/integrator/subsurface.h"
+#include "kernel/integrator/surface_shader.h"
#include "kernel/integrator/volume_stack.h"
#include "kernel/light/light.h"
@@ -31,7 +32,52 @@ ccl_device_forceinline void integrate_surface_shader_setup(KernelGlobals kg,
shader_setup_from_ray(kg, sd, &ray, &isect);
}
-#ifdef __HOLDOUT__
+ccl_device_forceinline float3 integrate_surface_ray_offset(KernelGlobals kg,
+ const ccl_private ShaderData *sd,
+ const float3 ray_P,
+ const float3 ray_D)
+{
+ /* No ray offset needed for other primitive types. */
+ if (!(sd->type & PRIMITIVE_TRIANGLE)) {
+ return ray_P;
+ }
+
+ /* Self intersection tests already account for the case where a ray hits the
+ * same primitive. However precision issues can still cause neighboring
+ * triangles to be hit. Here we test if the ray-triangle intersection with
+ * the same primitive would miss, implying that a neighboring triangle would
+ * be hit instead.
+ *
+ * This relies on triangle intersection to be watertight, and the object inverse
+ * object transform to match the one used by ray intersection exactly.
+ *
+ * Potential improvements:
+ * - It appears this happens when either barycentric coordinates are small,
+ * or dot(sd->Ng, ray_D) is small. Detect such cases and skip test?
+ * - Instead of ray offset, can we tweak P to lie within the triangle?
+ */
+ const uint tri_vindex = kernel_data_fetch(tri_vindex, sd->prim).w;
+ const packed_float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0),
+ tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1),
+ tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
+
+ float3 local_ray_P = ray_P;
+ float3 local_ray_D = ray_D;
+
+ if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ const Transform itfm = object_get_inverse_transform(kg, sd);
+ local_ray_P = transform_point(&itfm, local_ray_P);
+ local_ray_D = transform_direction(&itfm, local_ray_D);
+ }
+
+ if (ray_triangle_intersect_self(local_ray_P, local_ray_D, tri_a, tri_b, tri_c)) {
+ return ray_P;
+ }
+ else {
+ return ray_offset(ray_P, sd->Ng);
+ }
+}
+
ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg,
ConstIntegratorState state,
ccl_private ShaderData *sd,
@@ -42,22 +88,18 @@ ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg,
if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
- const float3 holdout_weight = shader_holdout_apply(kg, sd);
- if (kernel_data.background.transparent) {
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
- const float transparent = average(holdout_weight * throughput);
- kernel_accum_holdout(kg, state, path_flag, transparent, render_buffer);
- }
- if (isequal_float3(holdout_weight, one_float3())) {
+ const Spectrum holdout_weight = surface_shader_apply_holdout(kg, sd);
+ const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
+ const float transparent = average(holdout_weight * throughput);
+ film_write_holdout(kg, state, path_flag, transparent, render_buffer);
+ if (isequal(holdout_weight, one_spectrum())) {
return false;
}
}
return true;
}
-#endif /* __HOLDOUT__ */
-#ifdef __EMISSION__
ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
ConstIntegratorState state,
ccl_private const ShaderData *sd,
@@ -67,32 +109,29 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
/* Evaluate emissive closure. */
- float3 L = shader_emissive_eval(sd);
+ Spectrum L = surface_shader_emission(sd);
+ float mis_weight = 1.0f;
-# ifdef __HAIR__
+#ifdef __HAIR__
if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) &&
(sd->type & PRIMITIVE_TRIANGLE))
-# else
+#else
if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS))
-# endif
+#endif
{
const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
- const float t = sd->ray_length + INTEGRATOR_STATE(state, path, mis_ray_t);
+ const float t = sd->ray_length;
/* Multiple importance sampling, get triangle light pdf,
* and compute weight with respect to BSDF pdf. */
float pdf = triangle_light_pdf(kg, sd, t);
- float mis_weight = light_sample_mis_weight_forward(kg, bsdf_pdf, pdf);
- L *= mis_weight;
+ mis_weight = light_sample_mis_weight_forward(kg, bsdf_pdf, pdf);
}
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
- kernel_accum_emission(
- kg, state, throughput * L, render_buffer, object_lightgroup(kg, sd->object));
+ film_write_surface_emission(
+ kg, state, L, mis_weight, render_buffer, object_lightgroup(kg, sd->object));
}
-#endif /* __EMISSION__ */
-#ifdef __EMISSION__
/* Path tracing: sample point on light and evaluate light shader, then
* queue shadow ray to be traced. */
template<uint node_feature_mask>
@@ -111,11 +150,10 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
{
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
const uint bounce = INTEGRATOR_STATE(state, path, bounce);
- float light_u, light_v;
- path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
+ const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT);
if (!light_distribution_sample_from_position(
- kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, &ls)) {
+ kg, rand_light.x, rand_light.y, sd->time, sd->P, bounce, path_flag, &ls)) {
return;
}
}
@@ -133,15 +171,15 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
Ray ray ccl_optional_struct_init;
BsdfEval bsdf_eval ccl_optional_struct_init;
- const bool is_transmission = shader_bsdf_is_transmission(sd, ls.D);
+ const bool is_transmission = surface_shader_is_transmission(sd, ls.D);
-# ifdef __MNEE__
+#ifdef __MNEE__
int mnee_vertex_count = 0;
IF_KERNEL_FEATURE(MNEE)
{
if (ls.lamp != LAMP_NONE) {
/* Is this a caustic light? */
- const bool use_caustics = kernel_tex_fetch(__lights, ls.lamp).use_caustics;
+ const bool use_caustics = kernel_data_fetch(lights, ls.lamp).use_caustics;
if (use_caustics) {
/* Are we on a caustic caster? */
if (is_transmission && (sd->object_flag & SD_OBJECT_CAUSTICS_CASTER))
@@ -161,16 +199,17 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
light_sample_to_surface_shadow_ray(kg, emission_sd, &ls, &ray);
}
else
-# endif /* __MNEE__ */
+#endif /* __MNEE__ */
{
- const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time);
+ const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time);
if (is_zero(light_eval)) {
return;
}
/* Evaluate BSDF. */
- const float bsdf_pdf = shader_bsdf_eval(kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader);
- bsdf_eval_mul3(&bsdf_eval, light_eval / ls.pdf);
+ const float bsdf_pdf = surface_shader_bsdf_eval(
+ kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader);
+ bsdf_eval_mul(&bsdf_eval, light_eval / ls.pdf);
if (ls.shader & SHADER_USE_MIS) {
const float mis_weight = light_sample_mis_weight_nee(kg, ls.pdf, bsdf_pdf);
@@ -190,16 +229,20 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
const bool is_light = light_sample_is_light(&ls);
/* Branch off shadow kernel. */
- INTEGRATOR_SHADOW_PATH_INIT(
- shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow);
+ IntegratorShadowState shadow_state = integrator_shadow_path_init(
+ kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false);
/* Copy volume stack and enter/exit volume. */
integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
if (is_transmission) {
-# ifdef __VOLUME__
+#ifdef __VOLUME__
shadow_volume_stack_enter_exit(kg, shadow_state, sd);
-# endif
+#endif
+ }
+
+ if (ray.self.object != OBJECT_NONE) {
+ ray.P = integrate_surface_ray_offset(kg, sd, ray.P, ray.D);
}
/* Write shadow ray and associated state to global memory. */
@@ -213,11 +256,12 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
/* Copy state from main path to shadow path. */
uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * bsdf_eval_sum(&bsdf_eval);
+ const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput) *
+ bsdf_eval_sum(&bsdf_eval);
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
- packed_float3 pass_diffuse_weight;
- packed_float3 pass_glossy_weight;
+ PackedSpectrum pass_diffuse_weight;
+ PackedSpectrum pass_glossy_weight;
if (shadow_flag & PATH_RAY_ANY_PASS) {
/* Indirect bounce, use weights from earlier surface or volume bounce. */
@@ -227,8 +271,8 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
else {
/* Direct light, use BSDFs at this bounce. */
shadow_flag |= PATH_RAY_SURFACE_PASS;
- pass_diffuse_weight = packed_float3(bsdf_eval_pass_diffuse_weight(&bsdf_eval));
- pass_glossy_weight = packed_float3(bsdf_eval_pass_glossy_weight(&bsdf_eval));
+ pass_diffuse_weight = PackedSpectrum(bsdf_eval_pass_diffuse_weight(&bsdf_eval));
+ pass_glossy_weight = PackedSpectrum(bsdf_eval_pass_glossy_weight(&bsdf_eval));
}
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight;
@@ -250,7 +294,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, glossy_bounce) = INTEGRATOR_STATE(
state, path, glossy_bounce);
-# ifdef __MNEE__
+#ifdef __MNEE__
if (mnee_vertex_count > 0) {
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) =
INTEGRATOR_STATE(state, path, transmission_bounce) + mnee_vertex_count - 1;
@@ -262,7 +306,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
bounce) = INTEGRATOR_STATE(state, path, bounce) + mnee_vertex_count;
}
else
-# endif
+#endif
{
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) = INTEGRATOR_STATE(
state, path, transmission_bounce);
@@ -284,7 +328,6 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
ls.group + 1 :
kernel_data.background.lightgroup + 1;
}
-#endif
/* Path tracing: bounce off or through surface with new direction. */
ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
@@ -298,9 +341,8 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
return LABEL_NONE;
}
- float bsdf_u, bsdf_v;
- path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
- ccl_private const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u);
+ float2 rand_bsdf = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF);
+ ccl_private const ShaderClosure *sc = surface_shader_bsdf_bssrdf_pick(sd, &rand_bsdf);
#ifdef __SUBSURFACE__
/* BSSRDF closure, we schedule subsurface intersection kernel. */
@@ -313,29 +355,33 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
float bsdf_pdf;
BsdfEval bsdf_eval ccl_optional_struct_init;
float3 bsdf_omega_in ccl_optional_struct_init;
- differential3 bsdf_domega_in ccl_optional_struct_init;
int label;
- label = shader_bsdf_sample_closure(
- kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
+ label = surface_shader_bsdf_sample_closure(
+ kg, sd, sc, rand_bsdf, &bsdf_eval, &bsdf_omega_in, &bsdf_pdf);
if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) {
return LABEL_NONE;
}
- /* Setup ray. Note that clipping works through transparent bounces. */
- INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
- INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in);
- INTEGRATOR_STATE_WRITE(state, ray, t) = (label & LABEL_TRANSPARENT) ?
- INTEGRATOR_STATE(state, ray, t) - sd->ray_length :
- FLT_MAX;
+ if (label & LABEL_TRANSPARENT) {
+ /* Only need to modify start distance for transparent. */
+ INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(sd->ray_length);
+ }
+ else {
+ /* Setup ray with changed origin and direction. */
+ const float3 D = normalize(bsdf_omega_in);
+ INTEGRATOR_STATE_WRITE(state, ray, P) = integrate_surface_ray_offset(kg, sd, sd->P, D);
+ INTEGRATOR_STATE_WRITE(state, ray, D) = D;
+ INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
+ INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
#ifdef __RAY_DIFFERENTIALS__
- INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
- INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in);
+ INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
#endif
+ }
/* Update throughput. */
- float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+ Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
throughput *= bsdf_eval_sum(&bsdf_eval) / bsdf_pdf;
INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput;
@@ -349,12 +395,8 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
}
/* Update path state */
- if (label & LABEL_TRANSPARENT) {
- INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
- }
- else {
+ if (!(label & LABEL_TRANSPARENT)) {
INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = bsdf_pdf;
- INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
}
@@ -371,17 +413,8 @@ ccl_device_forceinline int integrate_surface_volume_only_bounce(IntegratorState
return LABEL_NONE;
}
- /* Setup ray position, direction stays unchanged. */
- INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
-
- /* Clipping works through transparent. */
- INTEGRATOR_STATE_WRITE(state, ray, t) -= sd->ray_length;
-
-# ifdef __RAY_DIFFERENTIALS__
- INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
-# endif
-
- INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
+ /* Only modify start distance. */
+ INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(sd->ray_length);
return LABEL_TRANSMIT | LABEL_TRANSPARENT;
}
@@ -416,23 +449,26 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
return;
}
- float bsdf_u, bsdf_v;
- path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+ const float2 rand_bsdf = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF);
float3 ao_N;
- const float3 ao_weight = shader_bsdf_ao(
+ const Spectrum ao_weight = surface_shader_ao(
kg, sd, kernel_data.integrator.ao_additive_factor, &ao_N);
float3 ao_D;
float ao_pdf;
- sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+ sample_cos_hemisphere(ao_N, rand_bsdf.x, rand_bsdf.y, &ao_D, &ao_pdf);
bool skip_self = true;
Ray ray ccl_optional_struct_init;
ray.P = shadow_ray_offset(kg, sd, ao_D, &skip_self);
ray.D = ao_D;
- ray.t = kernel_data.integrator.ao_bounces_distance;
+ if (skip_self) {
+ ray.P = integrate_surface_ray_offset(kg, sd, ray.P, ray.D);
+ }
+ ray.tmin = 0.0f;
+ ray.tmax = kernel_data.integrator.ao_bounces_distance;
ray.time = sd->time;
ray.self.object = (skip_self) ? sd->object : OBJECT_NONE;
ray.self.prim = (skip_self) ? sd->prim : PRIM_NONE;
@@ -442,7 +478,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
ray.dD = differential_zero_compact();
/* Branch off shadow kernel. */
- INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, ao);
+ IntegratorShadowState shadow_state = integrator_shadow_path_init(
+ kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, true);
/* Copy volume stack and enter/exit volume. */
integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
@@ -458,7 +495,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce);
const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag) | PATH_RAY_SHADOW_FOR_AO;
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * shader_bsdf_alpha(kg, sd);
+ const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput) *
+ surface_shader_alpha(kg, sd);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
state, path, render_pixel_index);
@@ -507,7 +545,7 @@ ccl_device bool integrate_surface(KernelGlobals kg,
{
/* Evaluate shader. */
PROFILING_EVENT(PROFILING_SHADE_SURFACE_EVAL);
- shader_eval_surface<node_feature_mask>(kg, state, &sd, render_buffer, path_flag);
+ surface_shader_eval<node_feature_mask>(kg, state, &sd, render_buffer, path_flag);
/* Initialize additional RNG for BSDFs. */
if (sd.flag & SD_BSDF_NEEDS_LCG) {
@@ -529,21 +567,17 @@ ccl_device bool integrate_surface(KernelGlobals kg,
#endif
{
/* Filter closures. */
- shader_prepare_surface_closures(kg, state, &sd, path_flag);
+ surface_shader_prepare_closures(kg, state, &sd, path_flag);
-#ifdef __HOLDOUT__
/* Evaluate holdout. */
if (!integrate_surface_holdout(kg, state, &sd, render_buffer)) {
return false;
}
-#endif
-#ifdef __EMISSION__
/* Write emission. */
if (sd.flag & SD_EMISSION) {
integrate_surface_emission(kg, state, &sd, render_buffer);
}
-#endif
/* Perform path termination. Most paths have already been terminated in
* the intersect_closest kernel, this is just for emission and for dividing
@@ -557,11 +591,11 @@ ccl_device bool integrate_surface(KernelGlobals kg,
/* Write render passes. */
#ifdef __PASSES__
PROFILING_EVENT(PROFILING_SHADE_SURFACE_PASSES);
- kernel_write_data_passes(kg, state, &sd, render_buffer);
+ film_write_data_passes(kg, state, &sd, render_buffer);
#endif
#ifdef __DENOISING_FEATURES__
- kernel_write_denoising_features_surface(kg, state, &sd, render_buffer);
+ film_write_denoising_features_surface(kg, state, &sd, render_buffer);
#endif
}
@@ -604,22 +638,23 @@ ccl_device bool integrate_surface(KernelGlobals kg,
}
template<uint node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE,
- int current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
+ DeviceKernel current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg,
IntegratorState state,
ccl_global float *ccl_restrict render_buffer)
{
if (integrate_surface<node_feature_mask>(kg, state, render_buffer)) {
if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE) {
- INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
+ integrator_path_next(
+ kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
}
else {
- kernel_assert(INTEGRATOR_STATE(state, ray, t) != 0.0f);
- INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+ kernel_assert(INTEGRATOR_STATE(state, ray, tmax) != 0.0f);
+ integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
}
}
else {
- INTEGRATOR_PATH_TERMINATE(current_kernel);
+ integrator_path_terminate(kg, state, current_kernel);
}
}
diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h
index 4a5015946aa..aaef92729d6 100644
--- a/intern/cycles/kernel/integrator/shade_volume.h
+++ b/intern/cycles/kernel/integrator/shade_volume.h
@@ -3,12 +3,13 @@
#pragma once
-#include "kernel/film/accumulate.h"
-#include "kernel/film/passes.h"
+#include "kernel/film/data_passes.h"
+#include "kernel/film/denoising_passes.h"
+#include "kernel/film/light_passes.h"
#include "kernel/integrator/intersect_closest.h"
#include "kernel/integrator/path_state.h"
-#include "kernel/integrator/shader_eval.h"
+#include "kernel/integrator/volume_shader.h"
#include "kernel/integrator/volume_stack.h"
#include "kernel/light/light.h"
@@ -29,13 +30,13 @@ typedef enum VolumeIntegrateEvent {
typedef struct VolumeIntegrateResult {
/* Throughput and offset for direct light scattering. */
bool direct_scatter;
- float3 direct_throughput;
+ Spectrum direct_throughput;
float direct_t;
ShaderVolumePhases direct_phases;
/* Throughput and offset for indirect light scattering. */
bool indirect_scatter;
- float3 indirect_throughput;
+ Spectrum indirect_throughput;
float indirect_t;
ShaderVolumePhases indirect_phases;
} VolumeIntegrateResult;
@@ -52,19 +53,19 @@ typedef struct VolumeIntegrateResult {
* sigma_t = sigma_a + sigma_s */
typedef struct VolumeShaderCoefficients {
- float3 sigma_t;
- float3 sigma_s;
- float3 emission;
+ Spectrum sigma_t;
+ Spectrum sigma_s;
+ Spectrum emission;
} VolumeShaderCoefficients;
/* Evaluate shader to get extinction coefficient at P. */
ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg,
IntegratorShadowState state,
ccl_private ShaderData *ccl_restrict sd,
- ccl_private float3 *ccl_restrict extinction)
+ ccl_private Spectrum *ccl_restrict extinction)
{
VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i))
- shader_eval_volume<true>(kg, state, sd, PATH_RAY_SHADOW, volume_read_lambda_pass);
+ volume_shader_eval<true>(kg, state, sd, PATH_RAY_SHADOW, volume_read_lambda_pass);
if (!(sd->flag & SD_EXTINCTION)) {
return false;
@@ -83,15 +84,16 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg,
{
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i))
- shader_eval_volume<false>(kg, state, sd, path_flag, volume_read_lambda_pass);
+ volume_shader_eval<false>(kg, state, sd, path_flag, volume_read_lambda_pass);
if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) {
return false;
}
- coeff->sigma_s = zero_float3();
- coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction : zero_float3();
- coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background : zero_float3();
+ coeff->sigma_s = zero_spectrum();
+ coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction :
+ zero_spectrum();
+ coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background : zero_spectrum();
if (sd->flag & SD_SCATTER) {
for (int i = 0; i < sd->num_closure; i++) {
@@ -114,7 +116,8 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg,
ccl_device_forceinline void volume_step_init(KernelGlobals kg,
ccl_private const RNGState *rng_state,
const float object_step_size,
- float t,
+ const float tmin,
+ const float tmax,
ccl_private float *step_size,
ccl_private float *step_shade_offset,
ccl_private float *steps_offset,
@@ -122,7 +125,7 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg,
{
if (object_step_size == FLT_MAX) {
/* Homogeneous volume. */
- *step_size = t;
+ *step_size = tmax - tmin;
*step_shade_offset = 0.0f;
*steps_offset = 1.0f;
*max_steps = 1;
@@ -130,6 +133,7 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg,
else {
/* Heterogeneous volume. */
*max_steps = kernel_data.integrator.volume_max_steps;
+ const float t = tmax - tmin;
float step = min(object_step_size, t);
/* compute exact steps in advance for malloc */
@@ -141,11 +145,11 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg,
/* Perform shading at this offset within a step, to integrate over
* over the entire step segment. */
- *step_shade_offset = path_state_rng_1D_hash(kg, rng_state, 0x1e31d8a4);
+ *step_shade_offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SHADE_OFFSET);
/* Shift starting point of all segment by this random amount to avoid
* banding artifacts from the volume bounding shape. */
- *steps_offset = path_state_rng_1D_hash(kg, rng_state, 0x3d22c7b3);
+ *steps_offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_OFFSET);
}
}
@@ -160,12 +164,12 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg,
ccl_device void volume_shadow_homogeneous(KernelGlobals kg, IntegratorState state,
ccl_private Ray *ccl_restrict ray,
ccl_private ShaderData *ccl_restrict sd,
- ccl_global float3 *ccl_restrict throughput)
+ ccl_global Spectrum *ccl_restrict throughput)
{
- float3 sigma_t = zero_float3();
+ Spectrum sigma_t = zero_spectrum();
if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) {
- *throughput *= volume_color_transmittance(sigma_t, ray->t);
+ *throughput *= volume_color_transmittance(sigma_t, ray->tmax - ray->tmin);
}
}
# endif
@@ -176,14 +180,14 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
IntegratorShadowState state,
ccl_private Ray *ccl_restrict ray,
ccl_private ShaderData *ccl_restrict sd,
- ccl_private float3 *ccl_restrict throughput,
+ ccl_private Spectrum *ccl_restrict throughput,
const float object_step_size)
{
/* Load random number state. */
RNGState rng_state;
shadow_path_state_rng_load(state, &rng_state);
- float3 tp = *throughput;
+ Spectrum tp = *throughput;
/* Prepare for stepping.
* For shadows we do not offset all segments, since the starting point is
@@ -194,7 +198,8 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
volume_step_init(kg,
&rng_state,
object_step_size,
- ray->t,
+ ray->tmin,
+ ray->tmax,
&step_size,
&step_shade_offset,
&unused,
@@ -202,17 +207,17 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
const float steps_offset = 1.0f;
/* compute extinction at the start */
- float t = 0.0f;
+ float t = ray->tmin;
- float3 sum = zero_float3();
+ Spectrum sum = zero_spectrum();
for (int i = 0; i < max_steps; i++) {
/* advance to new position */
- float new_t = min(ray->t, (i + steps_offset) * step_size);
+ float new_t = min(ray->tmax, ray->tmin + (i + steps_offset) * step_size);
float dt = new_t - t;
float3 new_P = ray->P + ray->D * (t + dt * step_shade_offset);
- float3 sigma_t = zero_float3();
+ Spectrum sigma_t = zero_spectrum();
/* compute attenuation over segment */
sd->P = new_P;
@@ -222,20 +227,19 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
* check then. */
sum += (-sigma_t * dt);
if ((i & 0x07) == 0) { /* TODO: Other interval? */
- tp = *throughput * exp3(sum);
+ tp = *throughput * exp(sum);
/* stop if nearly all light is blocked */
- if (tp.x < VOLUME_THROUGHPUT_EPSILON && tp.y < VOLUME_THROUGHPUT_EPSILON &&
- tp.z < VOLUME_THROUGHPUT_EPSILON)
+ if (reduce_max(tp) < VOLUME_THROUGHPUT_EPSILON)
break;
}
}
/* stop if at the end of the volume */
t = new_t;
- if (t == ray->t) {
+ if (t == ray->tmax) {
/* Update throughput in case we haven't done it above */
- tp = *throughput * exp3(sum);
+ tp = *throughput * exp(sum);
break;
}
}
@@ -257,15 +261,16 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r
const float xi,
ccl_private float *pdf)
{
- const float t = ray->t;
+ const float tmin = ray->tmin;
+ const float tmax = ray->tmax;
const float delta = dot((light_P - ray->P), ray->D);
const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
if (UNLIKELY(D == 0.0f)) {
*pdf = 0.0f;
return 0.0f;
}
- const float theta_a = -atan2f(delta, D);
- const float theta_b = atan2f(t - delta, D);
+ const float theta_a = atan2f(tmin - delta, D);
+ const float theta_b = atan2f(tmax - delta, D);
const float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
if (UNLIKELY(theta_b == theta_a)) {
*pdf = 0.0f;
@@ -273,7 +278,7 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r
}
*pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
- return min(t, delta + t_); /* min is only for float precision errors */
+ return clamp(delta + t_, tmin, tmax); /* clamp is only for float precision errors */
}
ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
@@ -286,11 +291,12 @@ ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
return 0.0f;
}
- const float t = ray->t;
+ const float tmin = ray->tmin;
+ const float tmax = ray->tmax;
const float t_ = sample_t - delta;
- const float theta_a = -atan2f(delta, D);
- const float theta_b = atan2f(t - delta, D);
+ const float theta_a = atan2f(tmin - delta, D);
+ const float theta_b = atan2f(tmax - delta, D);
if (UNLIKELY(theta_b == theta_a)) {
return 0.0f;
}
@@ -310,11 +316,12 @@ ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray,
return 0.0f;
}
- const float t = ray->t;
+ const float tmin = ray->tmin;
+ const float tmax = ray->tmax;
const float t_ = sample_t - delta;
- const float theta_a = -atan2f(delta, D);
- const float theta_b = atan2f(t - delta, D);
+ const float theta_a = atan2f(tmin - delta, D);
+ const float theta_b = atan2f(tmax - delta, D);
if (UNLIKELY(theta_b == theta_a)) {
return 0.0f;
}
@@ -328,22 +335,22 @@ ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray,
/* Distance sampling */
ccl_device float volume_distance_sample(float max_t,
- float3 sigma_t,
+ Spectrum sigma_t,
int channel,
float xi,
- ccl_private float3 *transmittance,
- ccl_private float3 *pdf)
+ ccl_private Spectrum *transmittance,
+ ccl_private Spectrum *pdf)
{
/* xi is [0, 1[ so log(0) should never happen, division by zero is
* avoided because sample_sigma_t > 0 when SD_SCATTER is set */
float sample_sigma_t = volume_channel_get(sigma_t, channel);
- float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
+ Spectrum full_transmittance = volume_color_transmittance(sigma_t, max_t);
float sample_transmittance = volume_channel_get(full_transmittance, channel);
float sample_t = min(max_t, -logf(1.0f - xi * (1.0f - sample_transmittance)) / sample_sigma_t);
*transmittance = volume_color_transmittance(sigma_t, sample_t);
- *pdf = safe_divide_color(sigma_t * *transmittance, one_float3() - full_transmittance);
+ *pdf = safe_divide_color(sigma_t * *transmittance, one_spectrum() - full_transmittance);
/* todo: optimization: when taken together with hit/miss decision,
* the full_transmittance cancels out drops out and xi does not
@@ -352,33 +359,36 @@ ccl_device float volume_distance_sample(float max_t,
return sample_t;
}
-ccl_device float3 volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
+ccl_device Spectrum volume_distance_pdf(float max_t, Spectrum sigma_t, float sample_t)
{
- float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
- float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
+ Spectrum full_transmittance = volume_color_transmittance(sigma_t, max_t);
+ Spectrum transmittance = volume_color_transmittance(sigma_t, sample_t);
- return safe_divide_color(sigma_t * transmittance, one_float3() - full_transmittance);
+ return safe_divide_color(sigma_t * transmittance, one_spectrum() - full_transmittance);
}
/* Emission */
-ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients *coeff,
- int closure_flag,
- float3 transmittance,
- float t)
+ccl_device Spectrum volume_emission_integrate(ccl_private VolumeShaderCoefficients *coeff,
+ int closure_flag,
+ Spectrum transmittance,
+ float t)
{
/* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
* this goes to E * t as sigma_t goes to zero
*
* todo: we should use an epsilon to avoid precision issues near zero sigma_t */
- float3 emission = coeff->emission;
+ Spectrum emission = coeff->emission;
if (closure_flag & SD_EXTINCTION) {
- float3 sigma_t = coeff->sigma_t;
+ Spectrum sigma_t = coeff->sigma_t;
- emission.x *= (sigma_t.x > 0.0f) ? (1.0f - transmittance.x) / sigma_t.x : t;
- emission.y *= (sigma_t.y > 0.0f) ? (1.0f - transmittance.y) / sigma_t.y : t;
- emission.z *= (sigma_t.z > 0.0f) ? (1.0f - transmittance.z) / sigma_t.z : t;
+ FOREACH_SPECTRUM_CHANNEL (i) {
+ GET_SPECTRUM_CHANNEL(emission, i) *= (GET_SPECTRUM_CHANNEL(sigma_t, i) > 0.0f) ?
+ (1.0f - GET_SPECTRUM_CHANNEL(transmittance, i)) /
+ GET_SPECTRUM_CHANNEL(sigma_t, i) :
+ t;
+ }
}
else
emission *= t;
@@ -390,8 +400,8 @@ ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients
typedef struct VolumeIntegrateState {
/* Volume segment extents. */
- float start_t;
- float end_t;
+ float tmin;
+ float tmax;
/* If volume is absorption-only up to this point, and no probabilistic
* scattering or termination has been used yet. */
@@ -413,27 +423,27 @@ ccl_device_forceinline void volume_integrate_step_scattering(
ccl_private const Ray *ray,
const float3 equiangular_light_P,
ccl_private const VolumeShaderCoefficients &ccl_restrict coeff,
- const float3 transmittance,
+ const Spectrum transmittance,
ccl_private VolumeIntegrateState &ccl_restrict vstate,
ccl_private VolumeIntegrateResult &ccl_restrict result)
{
/* Pick random color channel, we use the Veach one-sample
* model with balance heuristic for the channels. */
- const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
- float3 channel_pdf;
+ const Spectrum albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+ Spectrum channel_pdf;
const int channel = volume_sample_channel(
albedo, result.indirect_throughput, vstate.rphase, &channel_pdf);
/* Equiangular sampling for direct lighting. */
if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR && !result.direct_scatter) {
- if (result.direct_t >= vstate.start_t && result.direct_t <= vstate.end_t &&
+ if (result.direct_t >= vstate.tmin && result.direct_t <= vstate.tmax &&
vstate.equiangular_pdf > VOLUME_SAMPLE_PDF_CUTOFF) {
- const float new_dt = result.direct_t - vstate.start_t;
- const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
+ const float new_dt = result.direct_t - vstate.tmin;
+ const Spectrum new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
result.direct_scatter = true;
result.direct_throughput *= coeff.sigma_s * new_transmittance / vstate.equiangular_pdf;
- shader_copy_volume_phases(&result.direct_phases, sd);
+ volume_shader_copy_phases(&result.direct_phases, sd);
/* Multiple importance sampling. */
if (vstate.use_mis) {
@@ -458,10 +468,10 @@ ccl_device_forceinline void volume_integrate_step_scattering(
/* compute sampling distance */
const float sample_sigma_t = volume_channel_get(coeff.sigma_t, channel);
const float new_dt = -logf(1.0f - vstate.rscatter) / sample_sigma_t;
- const float new_t = vstate.start_t + new_dt;
+ const float new_t = vstate.tmin + new_dt;
/* transmittance and pdf */
- const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
+ const Spectrum new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
const float distance_pdf = dot(channel_pdf, coeff.sigma_t * new_transmittance);
if (vstate.distance_pdf * distance_pdf > VOLUME_SAMPLE_PDF_CUTOFF) {
@@ -469,7 +479,7 @@ ccl_device_forceinline void volume_integrate_step_scattering(
result.indirect_scatter = true;
result.indirect_t = new_t;
result.indirect_throughput *= coeff.sigma_s * new_transmittance / distance_pdf;
- shader_copy_volume_phases(&result.indirect_phases, sd);
+ volume_shader_copy_phases(&result.indirect_phases, sd);
if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) {
/* If using distance sampling for direct light, just copy parameters
@@ -477,7 +487,7 @@ ccl_device_forceinline void volume_integrate_step_scattering(
result.direct_scatter = true;
result.direct_t = result.indirect_t;
result.direct_throughput = result.indirect_throughput;
- shader_copy_volume_phases(&result.direct_phases, sd);
+ volume_shader_copy_phases(&result.direct_phases, sd);
/* Multiple importance sampling. */
if (vstate.use_mis) {
@@ -528,7 +538,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
volume_step_init(kg,
rng_state,
object_step_size,
- ray->t,
+ ray->tmin,
+ ray->tmax,
&step_size,
&step_shade_offset,
&steps_offset,
@@ -536,11 +547,11 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
/* Initialize volume integration state. */
VolumeIntegrateState vstate ccl_optional_struct_init;
- vstate.start_t = 0.0f;
- vstate.end_t = 0.0f;
+ vstate.tmin = ray->tmin;
+ vstate.tmax = ray->tmin;
vstate.absorption_only = true;
- vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE);
- vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL);
+ vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SCATTER_DISTANCE);
+ vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_PHASE_CHANNEL);
/* Multiple importance sampling: pick between equiangular and distance sampling strategy. */
vstate.direct_sample_method = direct_sample_method;
@@ -559,7 +570,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
vstate.distance_pdf = 1.0f;
/* Initialize volume integration result. */
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+ const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
result.direct_throughput = throughput;
result.indirect_throughput = throughput;
@@ -572,14 +583,14 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
# ifdef __DENOISING_FEATURES__
const bool write_denoising_features = (INTEGRATOR_STATE(state, path, flag) &
PATH_RAY_DENOISING_FEATURES);
- float3 accum_albedo = zero_float3();
+ Spectrum accum_albedo = zero_spectrum();
# endif
- float3 accum_emission = zero_float3();
+ Spectrum accum_emission = zero_spectrum();
for (int i = 0; i < max_steps; i++) {
/* Advance to new position */
- vstate.end_t = min(ray->t, (i + steps_offset) * step_size);
- const float shade_t = vstate.start_t + (vstate.end_t - vstate.start_t) * step_shade_offset;
+ vstate.tmax = min(ray->tmax, ray->tmin + (i + steps_offset) * step_size);
+ const float shade_t = vstate.tmin + (vstate.tmax - vstate.tmin) * step_shade_offset;
sd->P = ray->P + ray->D * shade_t;
/* compute segment */
@@ -588,17 +599,17 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
const int closure_flag = sd->flag;
/* Evaluate transmittance over segment. */
- const float dt = (vstate.end_t - vstate.start_t);
- const float3 transmittance = (closure_flag & SD_EXTINCTION) ?
- volume_color_transmittance(coeff.sigma_t, dt) :
- one_float3();
+ const float dt = (vstate.tmax - vstate.tmin);
+ const Spectrum transmittance = (closure_flag & SD_EXTINCTION) ?
+ volume_color_transmittance(coeff.sigma_t, dt) :
+ one_spectrum();
/* Emission. */
if (closure_flag & SD_EMISSION) {
/* Only write emission before indirect light scatter position, since we terminate
* stepping at that point if we have already found a direct light scatter position. */
if (!result.indirect_scatter) {
- const float3 emission = volume_emission_integrate(
+ const Spectrum emission = volume_emission_integrate(
&coeff, closure_flag, transmittance, dt);
accum_emission += result.indirect_throughput * emission;
}
@@ -609,8 +620,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
# ifdef __DENOISING_FEATURES__
/* Accumulate albedo for denoising features. */
if (write_denoising_features && (closure_flag & SD_SCATTER)) {
- const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
- accum_albedo += result.indirect_throughput * albedo * (one_float3() - transmittance);
+ const Spectrum albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+ accum_albedo += result.indirect_throughput * albedo * (one_spectrum() - transmittance);
}
# endif
@@ -626,13 +637,13 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
/* Stop if nearly all light blocked. */
if (!result.indirect_scatter) {
- if (max3(result.indirect_throughput) < VOLUME_THROUGHPUT_EPSILON) {
- result.indirect_throughput = zero_float3();
+ if (reduce_max(result.indirect_throughput) < VOLUME_THROUGHPUT_EPSILON) {
+ result.indirect_throughput = zero_spectrum();
break;
}
}
else if (!result.direct_scatter) {
- if (max3(result.direct_throughput) < VOLUME_THROUGHPUT_EPSILON) {
+ if (reduce_max(result.direct_throughput) < VOLUME_THROUGHPUT_EPSILON) {
break;
}
}
@@ -645,28 +656,27 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
}
/* Stop if at the end of the volume. */
- vstate.start_t = vstate.end_t;
- if (vstate.start_t == ray->t) {
+ vstate.tmin = vstate.tmax;
+ if (vstate.tmin == ray->tmax) {
break;
}
}
/* Write accumulated emission. */
if (!is_zero(accum_emission)) {
- kernel_accum_emission(
+ film_write_volume_emission(
kg, state, accum_emission, render_buffer, object_lightgroup(kg, sd->object));
}
# ifdef __DENOISING_FEATURES__
/* Write denoising features. */
if (write_denoising_features) {
- kernel_write_denoising_features_volume(
+ film_write_denoising_features_volume(
kg, state, accum_albedo, result.indirect_scatter, render_buffer);
}
# endif /* __DENOISING_FEATURES__ */
}
-# ifdef __EMISSION__
/* Path tracing: sample point on light and evaluate light shader, then
* queue shadow ray to be traced. */
ccl_device_forceinline bool integrate_volume_sample_light(
@@ -684,11 +694,10 @@ ccl_device_forceinline bool integrate_volume_sample_light(
/* Sample position on a light. */
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
const uint bounce = INTEGRATOR_STATE(state, path, bounce);
- float light_u, light_v;
- path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
+ const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT);
if (!light_distribution_sample_from_volume_segment(
- kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, ls)) {
+ kg, rand_light.x, rand_light.y, sd->time, sd->P, bounce, path_flag, ls)) {
return false;
}
@@ -708,7 +717,7 @@ ccl_device_forceinline void integrate_volume_direct_light(
ccl_private const RNGState *ccl_restrict rng_state,
const float3 P,
ccl_private const ShaderVolumePhases *ccl_restrict phases,
- ccl_private const float3 throughput,
+ ccl_private const Spectrum throughput,
ccl_private LightSample *ccl_restrict ls)
{
PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT);
@@ -725,11 +734,10 @@ ccl_device_forceinline void integrate_volume_direct_light(
{
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
const uint bounce = INTEGRATOR_STATE(state, path, bounce);
- float light_u, light_v;
- path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
+ const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT);
if (!light_distribution_sample_from_position(
- kg, light_u, light_v, sd->time, P, bounce, path_flag, ls)) {
+ kg, rand_light.x, rand_light.y, sd->time, P, bounce, path_flag, ls)) {
return;
}
}
@@ -746,21 +754,21 @@ ccl_device_forceinline void integrate_volume_direct_light(
* non-constant light sources. */
ShaderDataTinyStorage emission_sd_storage;
ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
- const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time);
+ const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time);
if (is_zero(light_eval)) {
return;
}
/* Evaluate BSDF. */
BsdfEval phase_eval ccl_optional_struct_init;
- const float phase_pdf = shader_volume_phase_eval(kg, sd, phases, ls->D, &phase_eval);
+ const float phase_pdf = volume_shader_phase_eval(kg, sd, phases, ls->D, &phase_eval);
if (ls->shader & SHADER_USE_MIS) {
float mis_weight = light_sample_mis_weight_nee(kg, ls->pdf, phase_pdf);
bsdf_eval_mul(&phase_eval, mis_weight);
}
- bsdf_eval_mul3(&phase_eval, light_eval / ls->pdf);
+ bsdf_eval_mul(&phase_eval, light_eval / ls->pdf);
/* Path termination. */
const float terminate = path_state_rng_light_termination(kg, rng_state);
@@ -774,8 +782,8 @@ ccl_device_forceinline void integrate_volume_direct_light(
const bool is_light = light_sample_is_light(ls);
/* Branch off shadow kernel. */
- INTEGRATOR_SHADOW_PATH_INIT(
- shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow);
+ IntegratorShadowState shadow_state = integrator_shadow_path_init(
+ kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false);
/* Write shadow ray and associated state to global memory. */
integrator_state_write_shadow_ray(kg, shadow_state, &ray);
@@ -789,11 +797,11 @@ ccl_device_forceinline void integrate_volume_direct_light(
const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
- const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval);
+ const Spectrum throughput_phase = throughput * bsdf_eval_sum(&phase_eval);
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
- packed_float3 pass_diffuse_weight;
- packed_float3 pass_glossy_weight;
+ PackedSpectrum pass_diffuse_weight;
+ PackedSpectrum pass_glossy_weight;
if (shadow_flag & PATH_RAY_ANY_PASS) {
/* Indirect bounce, use weights from earlier surface or volume bounce. */
@@ -803,8 +811,8 @@ ccl_device_forceinline void integrate_volume_direct_light(
else {
/* Direct light, no diffuse/glossy distinction needed for volumes. */
shadow_flag |= PATH_RAY_VOLUME_PASS;
- pass_diffuse_weight = packed_float3(one_float3());
- pass_glossy_weight = packed_float3(zero_float3());
+ pass_diffuse_weight = one_spectrum();
+ pass_glossy_weight = zero_spectrum();
}
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight;
@@ -842,7 +850,6 @@ ccl_device_forceinline void integrate_volume_direct_light(
integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
}
-# endif
/* Path tracing: scatter in new direction using phase function */
ccl_device_forceinline bool integrate_volume_phase_scatter(
@@ -854,24 +861,15 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
{
PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INDIRECT_LIGHT);
- float phase_u, phase_v;
- path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &phase_u, &phase_v);
+ const float2 rand_phase = path_state_rng_2D(kg, rng_state, PRNG_VOLUME_PHASE);
/* Phase closure, sample direction. */
float phase_pdf;
BsdfEval phase_eval ccl_optional_struct_init;
float3 phase_omega_in ccl_optional_struct_init;
- differential3 phase_domega_in ccl_optional_struct_init;
-
- const int label = shader_volume_phase_sample(kg,
- sd,
- phases,
- phase_u,
- phase_v,
- &phase_eval,
- &phase_omega_in,
- &phase_domega_in,
- &phase_pdf);
+
+ const int label = volume_shader_phase_sample(
+ kg, sd, phases, rand_phase, &phase_eval, &phase_omega_in, &phase_pdf);
if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) {
return false;
@@ -880,28 +878,27 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
/* Setup ray. */
INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_omega_in);
- INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
+ INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
+ INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
# ifdef __RAY_DIFFERENTIALS__
INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
- INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in);
# endif
// Save memory by storing last hit prim and object in isect
INTEGRATOR_STATE_WRITE(state, isect, prim) = sd->prim;
INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object;
/* Update throughput. */
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
- const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval) / phase_pdf;
+ const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
+ const Spectrum throughput_phase = throughput * bsdf_eval_sum(&phase_eval) / phase_pdf;
INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput_phase;
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
- INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_float3();
- INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_float3();
+ INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_spectrum();
+ INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_spectrum();
}
/* Update path state */
INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf;
- INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
@@ -1021,7 +1018,7 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,
integrator_state_read_isect(kg, state, &isect);
/* Set ray length to current segment. */
- ray.t = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX;
+ ray.tmax = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX;
/* Clean volume stack for background rays. */
if (isect.prim == PRIM_NONE) {
@@ -1032,13 +1029,15 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,
if (event == VOLUME_PATH_SCATTERED) {
/* Queue intersect_closest kernel. */
- INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
+ integrator_path_next(kg,
+ state,
+ DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
return;
}
else if (event == VOLUME_PATH_MISSED) {
/* End path. */
- INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
+ integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
return;
}
else {
diff --git a/intern/cycles/kernel/integrator/shader_eval.h b/intern/cycles/kernel/integrator/shader_eval.h
deleted file mode 100644
index 4da92929366..00000000000
--- a/intern/cycles/kernel/integrator/shader_eval.h
+++ /dev/null
@@ -1,952 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2022 Blender Foundation */
-
-/* Functions to evaluate shaders and use the resulting shader closures. */
-
-#pragma once
-
-#include "kernel/closure/alloc.h"
-#include "kernel/closure/bsdf.h"
-#include "kernel/closure/bsdf_util.h"
-#include "kernel/closure/emissive.h"
-
-#include "kernel/film/accumulate.h"
-
-#include "kernel/svm/svm.h"
-
-#ifdef __OSL__
-# include "kernel/osl/shader.h"
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/* Merging */
-
-#if defined(__VOLUME__)
-ccl_device_inline void shader_merge_volume_closures(ccl_private ShaderData *sd)
-{
- /* Merge identical closures to save closure space with stacked volumes. */
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private ShaderClosure *sci = &sd->closure[i];
-
- if (sci->type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
- continue;
- }
-
- for (int j = i + 1; j < sd->num_closure; j++) {
- ccl_private ShaderClosure *scj = &sd->closure[j];
- if (sci->type != scj->type) {
- continue;
- }
-
- ccl_private const HenyeyGreensteinVolume *hgi = (ccl_private const HenyeyGreensteinVolume *)
- sci;
- ccl_private const HenyeyGreensteinVolume *hgj = (ccl_private const HenyeyGreensteinVolume *)
- scj;
- if (!(hgi->g == hgj->g)) {
- continue;
- }
-
- sci->weight += scj->weight;
- sci->sample_weight += scj->sample_weight;
-
- int size = sd->num_closure - (j + 1);
- if (size > 0) {
- for (int k = 0; k < size; k++) {
- scj[k] = scj[k + 1];
- }
- }
-
- sd->num_closure--;
- kernel_assert(sd->num_closure >= 0);
- j--;
- }
- }
-}
-
-ccl_device_inline void shader_copy_volume_phases(ccl_private ShaderVolumePhases *ccl_restrict
- phases,
- ccl_private const ShaderData *ccl_restrict sd)
-{
- phases->num_closure = 0;
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *from_sc = &sd->closure[i];
- ccl_private const HenyeyGreensteinVolume *from_hg =
- (ccl_private const HenyeyGreensteinVolume *)from_sc;
-
- if (from_sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
- ccl_private ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure];
-
- to_sc->weight = from_sc->weight;
- to_sc->sample_weight = from_sc->sample_weight;
- to_sc->g = from_hg->g;
- phases->num_closure++;
- if (phases->num_closure >= MAX_VOLUME_CLOSURE) {
- break;
- }
- }
- }
-}
-#endif /* __VOLUME__ */
-
-ccl_device_inline void shader_prepare_surface_closures(KernelGlobals kg,
- ConstIntegratorState state,
- ccl_private ShaderData *sd,
- const uint32_t path_flag)
-{
- /* Filter out closures. */
- if (kernel_data.integrator.filter_closures) {
- if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_EMISSION) {
- sd->closure_emission_background = zero_float3();
- }
-
- if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIRECT_LIGHT) {
- sd->flag &= ~SD_BSDF_HAS_EVAL;
- }
-
- if (path_flag & PATH_RAY_CAMERA) {
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private ShaderClosure *sc = &sd->closure[i];
-
- if ((CLOSURE_IS_BSDF_DIFFUSE(sc->type) &&
- (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIFFUSE)) ||
- (CLOSURE_IS_BSDF_GLOSSY(sc->type) &&
- (kernel_data.integrator.filter_closures & FILTER_CLOSURE_GLOSSY)) ||
- (CLOSURE_IS_BSDF_TRANSMISSION(sc->type) &&
- (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSMISSION))) {
- sc->type = CLOSURE_NONE_ID;
- sc->sample_weight = 0.0f;
- }
- else if ((CLOSURE_IS_BSDF_TRANSPARENT(sc->type) &&
- (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSPARENT))) {
- sc->type = CLOSURE_HOLDOUT_ID;
- sc->sample_weight = 0.0f;
- sd->flag |= SD_HOLDOUT;
- }
- }
- }
- }
-
- /* Defensive sampling.
- *
- * We can likely also do defensive sampling at deeper bounces, particularly
- * for cases like a perfect mirror but possibly also others. This will need
- * a good heuristic. */
- if (INTEGRATOR_STATE(state, path, bounce) + INTEGRATOR_STATE(state, path, transparent_bounce) ==
- 0 &&
- sd->num_closure > 1) {
- float sum = 0.0f;
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private ShaderClosure *sc = &sd->closure[i];
- if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- sum += sc->sample_weight;
- }
- }
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private ShaderClosure *sc = &sd->closure[i];
- if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- sc->sample_weight = max(sc->sample_weight, 0.125f * sum);
- }
- }
- }
-
- /* Filter glossy.
- *
- * Blurring of bsdf after bounces, for rays that have a small likelihood
- * of following this particular path (diffuse, rough glossy) */
- if (kernel_data.integrator.filter_glossy != FLT_MAX
-#ifdef __MNEE__
- && !(INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_VALID)
-#endif
- ) {
- float blur_pdf = kernel_data.integrator.filter_glossy *
- INTEGRATOR_STATE(state, path, min_ray_pdf);
-
- if (blur_pdf < 1.0f) {
- float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f;
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private ShaderClosure *sc = &sd->closure[i];
- if (CLOSURE_IS_BSDF(sc->type)) {
- bsdf_blur(kg, sc, blur_roughness);
- }
- }
- }
- }
-}
-
-/* BSDF */
-
-ccl_device_inline bool shader_bsdf_is_transmission(ccl_private const ShaderData *sd,
- const float3 omega_in)
-{
- return dot(sd->N, omega_in) < 0.0f;
-}
-
-ccl_device_forceinline bool _shader_bsdf_exclude(ClosureType type, uint light_shader_flags)
-{
- if (!(light_shader_flags & SHADER_EXCLUDE_ANY)) {
- return false;
- }
- if (light_shader_flags & SHADER_EXCLUDE_DIFFUSE) {
- if (CLOSURE_IS_BSDF_DIFFUSE(type)) {
- return true;
- }
- }
- if (light_shader_flags & SHADER_EXCLUDE_GLOSSY) {
- if (CLOSURE_IS_BSDF_GLOSSY(type)) {
- return true;
- }
- }
- if (light_shader_flags & SHADER_EXCLUDE_TRANSMIT) {
- if (CLOSURE_IS_BSDF_TRANSMISSION(type)) {
- return true;
- }
- }
- return false;
-}
-
-ccl_device_inline float _shader_bsdf_multi_eval(KernelGlobals kg,
- ccl_private ShaderData *sd,
- const float3 omega_in,
- const bool is_transmission,
- ccl_private const ShaderClosure *skip_sc,
- ccl_private BsdfEval *result_eval,
- float sum_pdf,
- float sum_sample_weight,
- const uint light_shader_flags)
-{
- /* This is the veach one-sample model with balance heuristic,
- * some PDF factors drop out when using balance heuristic weighting. */
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (sc == skip_sc) {
- continue;
- }
-
- if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- if (CLOSURE_IS_BSDF(sc->type) && !_shader_bsdf_exclude(sc->type, light_shader_flags)) {
- float bsdf_pdf = 0.0f;
- float3 eval = bsdf_eval(kg, sd, sc, omega_in, is_transmission, &bsdf_pdf);
-
- if (bsdf_pdf != 0.0f) {
- bsdf_eval_accum(result_eval, sc->type, eval * sc->weight);
- sum_pdf += bsdf_pdf * sc->sample_weight;
- }
- }
-
- sum_sample_weight += sc->sample_weight;
- }
- }
-
- return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
-}
-
-#ifndef __KERNEL_CUDA__
-ccl_device
-#else
-ccl_device_inline
-#endif
- float
- shader_bsdf_eval(KernelGlobals kg,
- ccl_private ShaderData *sd,
- const float3 omega_in,
- const bool is_transmission,
- ccl_private BsdfEval *bsdf_eval,
- const uint light_shader_flags)
-{
- bsdf_eval_init(bsdf_eval, CLOSURE_NONE_ID, zero_float3());
-
- return _shader_bsdf_multi_eval(
- kg, sd, omega_in, is_transmission, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags);
-}
-
-/* Randomly sample a BSSRDF or BSDF proportional to ShaderClosure.sample_weight. */
-ccl_device_inline ccl_private const ShaderClosure *shader_bsdf_bssrdf_pick(
- ccl_private const ShaderData *ccl_restrict sd, ccl_private float *randu)
-{
- int sampled = 0;
-
- if (sd->num_closure > 1) {
- /* Pick a BSDF or based on sample weights. */
- float sum = 0.0f;
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- sum += sc->sample_weight;
- }
- }
-
- float r = (*randu) * sum;
- float partial_sum = 0.0f;
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- float next_sum = partial_sum + sc->sample_weight;
-
- if (r < next_sum) {
- sampled = i;
-
- /* Rescale to reuse for direction sample, to better preserve stratification. */
- *randu = (r - partial_sum) / sc->sample_weight;
- break;
- }
-
- partial_sum = next_sum;
- }
- }
- }
-
- return &sd->closure[sampled];
-}
-
-/* Return weight for picked BSSRDF. */
-ccl_device_inline float3
-shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd,
- ccl_private const ShaderClosure *ccl_restrict bssrdf_sc)
-{
- float3 weight = bssrdf_sc->weight;
-
- if (sd->num_closure > 1) {
- float sum = 0.0f;
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- sum += sc->sample_weight;
- }
- }
- weight *= sum / bssrdf_sc->sample_weight;
- }
-
- return weight;
-}
-
-/* Sample direction for picked BSDF, and return evaluation and pdf for all
- * BSDFs combined using MIS. */
-ccl_device int shader_bsdf_sample_closure(KernelGlobals kg,
- ccl_private ShaderData *sd,
- ccl_private const ShaderClosure *sc,
- float randu,
- float randv,
- ccl_private BsdfEval *bsdf_eval,
- ccl_private float3 *omega_in,
- ccl_private differential3 *domega_in,
- ccl_private float *pdf)
-{
- /* BSSRDF should already have been handled elsewhere. */
- kernel_assert(CLOSURE_IS_BSDF(sc->type));
-
- int label;
- float3 eval = zero_float3();
-
- *pdf = 0.0f;
- label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
-
- if (*pdf != 0.0f) {
- bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight);
-
- if (sd->num_closure > 1) {
- const bool is_transmission = shader_bsdf_is_transmission(sd, *omega_in);
- float sweight = sc->sample_weight;
- *pdf = _shader_bsdf_multi_eval(
- kg, sd, *omega_in, is_transmission, sc, bsdf_eval, *pdf * sweight, sweight, 0);
- }
- }
-
- return label;
-}
-
-ccl_device float shader_bsdf_average_roughness(ccl_private const ShaderData *sd)
-{
- float roughness = 0.0f;
- float sum_weight = 0.0f;
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSDF(sc->type)) {
- /* sqrt once to undo the squaring from multiplying roughness on the
- * two axes, and once for the squared roughness convention. */
- float weight = fabsf(average(sc->weight));
- roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc)));
- sum_weight += weight;
- }
- }
-
- return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f;
-}
-
-ccl_device float3 shader_bsdf_transparency(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
- if (sd->flag & SD_HAS_ONLY_VOLUME) {
- return one_float3();
- }
- else if (sd->flag & SD_TRANSPARENT) {
- return sd->closure_transparent_extinction;
- }
- else {
- return zero_float3();
- }
-}
-
-ccl_device void shader_bsdf_disable_transparency(KernelGlobals kg, ccl_private ShaderData *sd)
-{
- if (sd->flag & SD_TRANSPARENT) {
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private ShaderClosure *sc = &sd->closure[i];
-
- if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
- sc->sample_weight = 0.0f;
- sc->weight = zero_float3();
- }
- }
-
- sd->flag &= ~SD_TRANSPARENT;
- }
-}
-
-ccl_device float3 shader_bsdf_alpha(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
- float3 alpha = one_float3() - shader_bsdf_transparency(kg, sd);
-
- alpha = max(alpha, zero_float3());
- alpha = min(alpha, one_float3());
-
- return alpha;
-}
-
-ccl_device float3 shader_bsdf_diffuse(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
- float3 eval = zero_float3();
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type))
- eval += sc->weight;
- }
-
- return eval;
-}
-
-ccl_device float3 shader_bsdf_glossy(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
- float3 eval = zero_float3();
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
- eval += sc->weight;
- }
-
- return eval;
-}
-
-ccl_device float3 shader_bsdf_transmission(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
- float3 eval = zero_float3();
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
- eval += sc->weight;
- }
-
- return eval;
-}
-
-ccl_device float3 shader_bsdf_average_normal(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
- float3 N = zero_float3();
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
- if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
- N += sc->N * fabsf(average(sc->weight));
- }
-
- return (is_zero(N)) ? sd->N : normalize(N);
-}
-
-ccl_device float3 shader_bsdf_ao(KernelGlobals kg,
- ccl_private const ShaderData *sd,
- const float ao_factor,
- ccl_private float3 *N_)
-{
- float3 eval = zero_float3();
- float3 N = zero_float3();
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
- ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
- eval += sc->weight * ao_factor;
- N += bsdf->N * fabsf(average(sc->weight));
- }
- }
-
- *N_ = (is_zero(N)) ? sd->N : normalize(N);
- return eval;
-}
-
-#ifdef __SUBSURFACE__
-ccl_device float3 shader_bssrdf_normal(ccl_private const ShaderData *sd)
-{
- float3 N = zero_float3();
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSSRDF(sc->type)) {
- ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc;
- float avg_weight = fabsf(average(sc->weight));
-
- N += bssrdf->N * avg_weight;
- }
- }
-
- return (is_zero(N)) ? sd->N : normalize(N);
-}
-#endif /* __SUBSURFACE__ */
-
-/* Constant emission optimization */
-
-ccl_device bool shader_constant_emission_eval(KernelGlobals kg,
- int shader,
- ccl_private float3 *eval)
-{
- int shader_index = shader & SHADER_MASK;
- int shader_flag = kernel_tex_fetch(__shaders, shader_index).flags;
-
- if (shader_flag & SD_HAS_CONSTANT_EMISSION) {
- *eval = make_float3(kernel_tex_fetch(__shaders, shader_index).constant_emission[0],
- kernel_tex_fetch(__shaders, shader_index).constant_emission[1],
- kernel_tex_fetch(__shaders, shader_index).constant_emission[2]);
-
- return true;
- }
-
- return false;
-}
-
-/* Background */
-
-ccl_device float3 shader_background_eval(ccl_private const ShaderData *sd)
-{
- if (sd->flag & SD_EMISSION) {
- return sd->closure_emission_background;
- }
- else {
- return zero_float3();
- }
-}
-
-/* Emission */
-
-ccl_device float3 shader_emissive_eval(ccl_private const ShaderData *sd)
-{
- if (sd->flag & SD_EMISSION) {
- return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background;
- }
- else {
- return zero_float3();
- }
-}
-
-/* Holdout */
-
-ccl_device float3 shader_holdout_apply(KernelGlobals kg, ccl_private ShaderData *sd)
-{
- float3 weight = zero_float3();
-
- /* For objects marked as holdout, preserve transparency and remove all other
- * closures, replacing them with a holdout weight. */
- if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
- if ((sd->flag & SD_TRANSPARENT) && !(sd->flag & SD_HAS_ONLY_VOLUME)) {
- weight = one_float3() - sd->closure_transparent_extinction;
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private ShaderClosure *sc = &sd->closure[i];
- if (!CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
- sc->type = NBUILTIN_CLOSURES;
- }
- }
-
- sd->flag &= ~(SD_CLOSURE_FLAGS - (SD_TRANSPARENT | SD_BSDF));
- }
- else {
- weight = one_float3();
- }
- }
- else {
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
- if (CLOSURE_IS_HOLDOUT(sc->type)) {
- weight += sc->weight;
- }
- }
- }
-
- return weight;
-}
-
-/* Surface Evaluation */
-
-template<uint node_feature_mask, typename ConstIntegratorGenericState>
-ccl_device void shader_eval_surface(KernelGlobals kg,
- ConstIntegratorGenericState state,
- ccl_private ShaderData *ccl_restrict sd,
- ccl_global float *ccl_restrict buffer,
- uint32_t path_flag,
- bool use_caustics_storage = false)
-{
- /* If path is being terminated, we are tracing a shadow ray or evaluating
- * emission, then we don't need to store closures. The emission and shadow
- * shader data also do not have a closure array to save GPU memory. */
- int max_closures;
- if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
- max_closures = 0;
- }
- else {
- max_closures = use_caustics_storage ? CAUSTICS_MAX_CLOSURE : kernel_data.max_closures;
- }
-
- sd->num_closure = 0;
- sd->num_closure_left = max_closures;
-
-#ifdef __OSL__
- if (kg->osl) {
- if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) {
- OSLShader::eval_background(kg, state, sd, path_flag);
- }
- else {
- OSLShader::eval_surface(kg, state, sd, path_flag);
- }
- }
- else
-#endif
- {
-#ifdef __SVM__
- svm_eval_nodes<node_feature_mask, SHADER_TYPE_SURFACE>(kg, state, sd, buffer, path_flag);
-#else
- if (sd->object == OBJECT_NONE) {
- sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f);
- sd->flag |= SD_EMISSION;
- }
- else {
- ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
- sd, sizeof(DiffuseBsdf), make_float3(0.8f, 0.8f, 0.8f));
- if (bsdf != NULL) {
- bsdf->N = sd->N;
- sd->flag |= bsdf_diffuse_setup(bsdf);
- }
- }
-#endif
- }
-}
-
-/* Volume */
-
-#ifdef __VOLUME__
-
-ccl_device_inline float _shader_volume_phase_multi_eval(
- ccl_private const ShaderData *sd,
- ccl_private const ShaderVolumePhases *phases,
- const float3 omega_in,
- int skip_phase,
- ccl_private BsdfEval *result_eval,
- float sum_pdf,
- float sum_sample_weight)
-{
- for (int i = 0; i < phases->num_closure; i++) {
- if (i == skip_phase)
- continue;
-
- ccl_private const ShaderVolumeClosure *svc = &phases->closure[i];
- float phase_pdf = 0.0f;
- float3 eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf);
-
- if (phase_pdf != 0.0f) {
- bsdf_eval_accum(result_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
- sum_pdf += phase_pdf * svc->sample_weight;
- }
-
- sum_sample_weight += svc->sample_weight;
- }
-
- return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
-}
-
-ccl_device float shader_volume_phase_eval(KernelGlobals kg,
- ccl_private const ShaderData *sd,
- ccl_private const ShaderVolumePhases *phases,
- const float3 omega_in,
- ccl_private BsdfEval *phase_eval)
-{
- bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_float3());
-
- return _shader_volume_phase_multi_eval(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f);
-}
-
-ccl_device int shader_volume_phase_sample(KernelGlobals kg,
- ccl_private const ShaderData *sd,
- ccl_private const ShaderVolumePhases *phases,
- float randu,
- float randv,
- ccl_private BsdfEval *phase_eval,
- ccl_private float3 *omega_in,
- ccl_private differential3 *domega_in,
- ccl_private float *pdf)
-{
- int sampled = 0;
-
- if (phases->num_closure > 1) {
- /* pick a phase closure based on sample weights */
- float sum = 0.0f;
-
- for (sampled = 0; sampled < phases->num_closure; sampled++) {
- ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
- sum += svc->sample_weight;
- }
-
- float r = randu * sum;
- float partial_sum = 0.0f;
-
- for (sampled = 0; sampled < phases->num_closure; sampled++) {
- ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
- float next_sum = partial_sum + svc->sample_weight;
-
- if (r <= next_sum) {
- /* Rescale to reuse for BSDF direction sample. */
- randu = (r - partial_sum) / svc->sample_weight;
- break;
- }
-
- partial_sum = next_sum;
- }
-
- if (sampled == phases->num_closure) {
- *pdf = 0.0f;
- return LABEL_NONE;
- }
- }
-
- /* todo: this isn't quite correct, we don't weight anisotropy properly
- * depending on color channels, even if this is perhaps not a common case */
- ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
- int label;
- float3 eval = zero_float3();
-
- *pdf = 0.0f;
- label = volume_phase_sample(sd, svc, randu, randv, &eval, omega_in, domega_in, pdf);
-
- if (*pdf != 0.0f) {
- bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
- }
-
- return label;
-}
-
-ccl_device int shader_phase_sample_closure(KernelGlobals kg,
- ccl_private const ShaderData *sd,
- ccl_private const ShaderVolumeClosure *sc,
- float randu,
- float randv,
- ccl_private BsdfEval *phase_eval,
- ccl_private float3 *omega_in,
- ccl_private differential3 *domega_in,
- ccl_private float *pdf)
-{
- int label;
- float3 eval = zero_float3();
-
- *pdf = 0.0f;
- label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
-
- if (*pdf != 0.0f)
- bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
-
- return label;
-}
-
-/* Volume Evaluation */
-
-template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState>
-ccl_device_inline void shader_eval_volume(KernelGlobals kg,
- ConstIntegratorGenericState state,
- ccl_private ShaderData *ccl_restrict sd,
- const uint32_t path_flag,
- StackReadOp stack_read)
-{
- /* If path is being terminated, we are tracing a shadow ray or evaluating
- * emission, then we don't need to store closures. The emission and shadow
- * shader data also do not have a closure array to save GPU memory. */
- int max_closures;
- if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
- max_closures = 0;
- }
- else {
- max_closures = kernel_data.max_closures;
- }
-
- /* reset closures once at the start, we will be accumulating the closures
- * for all volumes in the stack into a single array of closures */
- sd->num_closure = 0;
- sd->num_closure_left = max_closures;
- sd->flag = 0;
- sd->object_flag = 0;
-
- for (int i = 0;; i++) {
- const VolumeStack entry = stack_read(i);
- if (entry.shader == SHADER_NONE) {
- break;
- }
-
- /* Setup shader-data from stack. it's mostly setup already in
- * shader_setup_from_volume, this switching should be quick. */
- sd->object = entry.object;
- sd->lamp = LAMP_NONE;
- sd->shader = entry.shader;
-
- sd->flag &= ~SD_SHADER_FLAGS;
- sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
- sd->object_flag &= ~SD_OBJECT_FLAGS;
-
- if (sd->object != OBJECT_NONE) {
- sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object);
-
-# ifdef __OBJECT_MOTION__
- /* todo: this is inefficient for motion blur, we should be
- * caching matrices instead of recomputing them each step */
- shader_setup_object_transforms(kg, sd, sd->time);
-
- if ((sd->object_flag & SD_OBJECT_HAS_VOLUME_MOTION) != 0) {
- AttributeDescriptor v_desc = find_attribute(kg, sd, ATTR_STD_VOLUME_VELOCITY);
- kernel_assert(v_desc.offset != ATTR_STD_NOT_FOUND);
-
- const float3 P = sd->P;
- const float velocity_scale = kernel_tex_fetch(__objects, sd->object).velocity_scale;
- const float time_offset = kernel_data.cam.motion_position == MOTION_POSITION_CENTER ?
- 0.5f :
- 0.0f;
- const float time = kernel_data.cam.motion_position == MOTION_POSITION_END ?
- (1.0f - kernel_data.cam.shuttertime) + sd->time :
- sd->time;
-
- /* Use a 1st order semi-lagrangian advection scheme to estimate what volume quantity
- * existed, or will exist, at the given time:
- *
- * `phi(x, T) = phi(x - (T - t) * u(x, T), t)`
- *
- * where
- *
- * x : position
- * T : super-sampled time (or ray time)
- * t : current time of the simulation (in rendering we assume this is center frame with
- * relative time = 0)
- * phi : the volume quantity
- * u : the velocity field
- *
- * But first we need to determine the velocity field `u(x, T)`, which we can estimate also
- * using semi-lagrangian advection.
- *
- * `u(x, T) = u(x - (T - t) * u(x, T), t)`
- *
- * This is the typical way to model self-advection in fluid dynamics, however, we do not
- * account for other forces affecting the velocity during simulation (pressure, buoyancy,
- * etc.): this gives a linear interpolation when fluid are mostly "curvy". For better
- * results, a higher order interpolation scheme can be used (at the cost of more lookups),
- * or an interpolation of the velocity fields for the previous and next frames could also
- * be used to estimate `u(x, T)` (which will cost more memory and lookups).
- *
- * References:
- * "Eulerian Motion Blur", Kim and Ko, 2007
- * "Production Volume Rendering", Wreninge et al., 2012
- */
-
- /* Find velocity. */
- float3 velocity = primitive_volume_attribute_float3(kg, sd, v_desc);
- object_dir_transform(kg, sd, &velocity);
-
- /* Find advected P. */
- sd->P = P - (time - time_offset) * velocity_scale * velocity;
-
- /* Find advected velocity. */
- velocity = primitive_volume_attribute_float3(kg, sd, v_desc);
- object_dir_transform(kg, sd, &velocity);
-
- /* Find advected P. */
- sd->P = P - (time - time_offset) * velocity_scale * velocity;
- }
-# endif
- }
-
- /* evaluate shader */
-# ifdef __SVM__
-# ifdef __OSL__
- if (kg->osl) {
- OSLShader::eval_volume(kg, state, sd, path_flag);
- }
- else
-# endif
- {
- svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_VOLUME, SHADER_TYPE_VOLUME>(
- kg, state, sd, NULL, path_flag);
- }
-# endif
-
- /* Merge closures to avoid exceeding number of closures limit. */
- if (!shadow) {
- if (i > 0) {
- shader_merge_volume_closures(sd);
- }
- }
- }
-}
-
-#endif /* __VOLUME__ */
-
-/* Displacement Evaluation */
-
-template<typename ConstIntegratorGenericState>
-ccl_device void shader_eval_displacement(KernelGlobals kg,
- ConstIntegratorGenericState state,
- ccl_private ShaderData *sd)
-{
- sd->num_closure = 0;
- sd->num_closure_left = 0;
-
- /* this will modify sd->P */
-#ifdef __SVM__
-# ifdef __OSL__
- if (kg->osl)
- OSLShader::eval_displacement(kg, state, sd);
- else
-# endif
- {
- svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_DISPLACEMENT, SHADER_TYPE_DISPLACEMENT>(
- kg, state, sd, NULL, 0);
- }
-#endif
-}
-
-/* Cryptomatte */
-
-ccl_device float shader_cryptomatte_id(KernelGlobals kg, int shader)
-{
- return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/shadow_catcher.h b/intern/cycles/kernel/integrator/shadow_catcher.h
index 42d44580f80..a620853faea 100644
--- a/intern/cycles/kernel/integrator/shadow_catcher.h
+++ b/intern/cycles/kernel/integrator/shadow_catcher.h
@@ -3,7 +3,6 @@
#pragma once
-#include "kernel/film/write_passes.h"
#include "kernel/integrator/path_state.h"
#include "kernel/integrator/state_util.h"
@@ -50,7 +49,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals
ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg,
ConstIntegratorState state)
{
- if (INTEGRATOR_PATH_IS_TERMINATED) {
+ if (integrator_path_is_terminated(state)) {
return false;
}
@@ -76,28 +75,6 @@ ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(const uint32_t
return path_flag & PATH_RAY_SHADOW_CATCHER_PASS;
}
-/* Write shadow catcher passes on a bounce from the shadow catcher object. */
-ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
- KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
-{
- kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
- kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
-
- const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
- const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
- kernel_data.film.pass_stride;
- ccl_global float *buffer = render_buffer + render_buffer_offset;
-
- /* Count sample for the shadow catcher object. */
- kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
-
- /* Since the split is done, the sample does not contribute to the matte, so accumulate it as
- * transparency to the matte. */
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
- kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
- average(throughput));
-}
-
#endif /* __SHADOW_CATCHER__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/shadow_state_template.h b/intern/cycles/kernel/integrator/shadow_state_template.h
index eaee65ada40..3b490ecffdd 100644
--- a/intern/cycles/kernel/integrator/shadow_state_template.h
+++ b/intern/cycles/kernel/integrator/shadow_state_template.h
@@ -27,15 +27,15 @@ KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_T
/* enum PathRayFlag */
KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
/* Throughput. */
-KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, throughput, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, throughput, KERNEL_FEATURE_PATH_TRACING)
/* Throughput for shadow pass. */
KERNEL_STRUCT_MEMBER(shadow_path,
- packed_float3,
+ PackedSpectrum,
unshadowed_throughput,
KERNEL_FEATURE_SHADOW_PASS | KERNEL_FEATURE_AO_ADDITIVE)
/* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */
-KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES)
-KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES)
+KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES)
+KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES)
/* Number of intersections found by ray-tracing. */
KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, num_hits, KERNEL_FEATURE_PATH_TRACING)
/* Light group. */
@@ -47,7 +47,8 @@ KERNEL_STRUCT_END(shadow_path)
KERNEL_STRUCT_BEGIN(shadow_ray)
KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, P, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(shadow_ray, float, t, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(shadow_ray, float, tmin, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(shadow_ray, float, tmax, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(shadow_ray, float, time, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(shadow_ray, float, dP, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(shadow_ray, int, object, KERNEL_FEATURE_PATH_TRACING)
diff --git a/intern/cycles/kernel/integrator/state.h b/intern/cycles/kernel/integrator/state.h
index d6fef27f344..d1907bd6e16 100644
--- a/intern/cycles/kernel/integrator/state.h
+++ b/intern/cycles/kernel/integrator/state.h
@@ -127,6 +127,9 @@ typedef struct IntegratorStateGPU {
/* Index of main path which will be used by a next shadow catcher split. */
ccl_global int *next_main_path_index;
+
+ /* Divisor used to partition active indices by locality when sorting by material. */
+ uint sort_partition_divisor;
} IntegratorStateGPU;
/* Abstraction
@@ -137,7 +140,7 @@ typedef struct IntegratorStateGPU {
* happen from a kernel which operates on a "main" path. Attempt to use shadow catcher accessors
* from a kernel which operates on a shadow catcher state will cause bad memory access. */
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
/* Scalar access on CPU. */
@@ -156,7 +159,7 @@ typedef const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState;
# define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
((state)->nested_struct[array_index].member)
-#else /* __KERNEL_CPU__ */
+#else /* !__KERNEL_GPU__ */
/* Array access on GPU with Structure-of-Arrays. */
@@ -177,6 +180,6 @@ typedef int ConstIntegratorShadowState;
# define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member)
-#endif /* __KERNEL_CPU__ */
+#endif /* !__KERNEL_GPU__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/state_flow.h b/intern/cycles/kernel/integrator/state_flow.h
index fed74d49434..4b03c665e17 100644
--- a/intern/cycles/kernel/integrator/state_flow.h
+++ b/intern/cycles/kernel/integrator/state_flow.h
@@ -10,125 +10,196 @@ CCL_NAMESPACE_BEGIN
/* Control Flow
*
- * Utilities for control flow between kernels. The implementation may differ per device
- * or even be handled on the host side. To abstract such differences, experiment with
- * different implementations and for debugging, this is abstracted using macros.
+ * Utilities for control flow between kernels. The implementation is different between CPU and
+ * GPU devices. For the latter part of the logic is handled on the host side with wavefronts.
*
* There is a main path for regular path tracing camera for path tracing. Shadows for next
* event estimation branch off from this into their own path, that may be computed in
- * parallel while the main path continues.
+ * parallel while the main path continues. Additionally, shading kernels are sorted using
+ * a key for coherence.
*
* Each kernel on the main path must call one of these functions. These may not be called
* multiple times from the same kernel.
*
- * INTEGRATOR_PATH_INIT(next_kernel)
- * INTEGRATOR_PATH_NEXT(current_kernel, next_kernel)
- * INTEGRATOR_PATH_TERMINATE(current_kernel)
+ * integrator_path_init(kg, state, next_kernel)
+ * integrator_path_next(kg, state, current_kernel, next_kernel)
+ * integrator_path_terminate(kg, state, current_kernel)
*
* For the shadow path similar functions are used, and again each shadow kernel must call
* one of them, and only once.
*/
-#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(state, path, queued_kernel) == 0)
-#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED \
- (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0)
+ccl_device_forceinline bool integrator_path_is_terminated(ConstIntegratorState state)
+{
+ return INTEGRATOR_STATE(state, path, queued_kernel) == 0;
+}
+
+ccl_device_forceinline bool integrator_shadow_path_is_terminated(ConstIntegratorShadowState state)
+{
+ return INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0;
+}
#ifdef __KERNEL_GPU__
-# define INTEGRATOR_PATH_INIT(next_kernel) \
- atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
- 1); \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
- atomic_fetch_and_sub_uint32( \
- &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
- atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
- 1); \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-# define INTEGRATOR_PATH_TERMINATE(current_kernel) \
- atomic_fetch_and_sub_uint32( \
- &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
-
-# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \
- IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32( \
- &kernel_integrator_state.next_shadow_path_index[0], 1); \
- atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
- 1); \
- INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
-# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
- atomic_fetch_and_sub_uint32( \
- &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
- atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
- 1); \
- INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
-# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
- atomic_fetch_and_sub_uint32( \
- &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
- INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
-
-# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
- { \
- const int key_ = key; \
- atomic_fetch_and_add_uint32( \
- &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
- INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
- atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
- 1); \
- }
-# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
- { \
- const int key_ = key; \
- atomic_fetch_and_sub_uint32( \
- &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
- atomic_fetch_and_add_uint32( \
- &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
- INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
- atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
- 1); \
- }
+ccl_device_forceinline void integrator_path_init(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel next_kernel)
+{
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+}
+
+ccl_device_forceinline void integrator_path_next(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel current_kernel,
+ const DeviceKernel next_kernel)
+{
+ atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
+ 1);
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+}
+
+ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel current_kernel)
+{
+ atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
+ 1);
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
+}
+
+ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(
+ KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
+{
+ IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32(
+ &kernel_integrator_state.next_shadow_path_index[0], 1);
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
+ INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
+ return shadow_state;
+}
+
+ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg,
+ IntegratorShadowState state,
+ const DeviceKernel current_kernel,
+ const DeviceKernel next_kernel)
+{
+ atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
+ 1);
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
+ INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
+}
+
+ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg,
+ IntegratorShadowState state,
+ const DeviceKernel current_kernel)
+{
+ atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
+ 1);
+ INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
+}
+
+/* Sort first by truncated state index (for good locality), then by key (for good coherence). */
+# define INTEGRATOR_SORT_KEY(key, state) \
+ (key + kernel_data.max_shaders * (state / kernel_integrator_state.sort_partition_divisor))
+
+ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel next_kernel,
+ const uint32_t key)
+{
+ const int key_ = INTEGRATOR_SORT_KEY(key, state);
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+ INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
+}
+
+ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel current_kernel,
+ const DeviceKernel next_kernel,
+ const uint32_t key)
+{
+ const int key_ = INTEGRATOR_SORT_KEY(key, state);
+ atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
+ 1);
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+ INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
+ atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
+}
#else
-# define INTEGRATOR_PATH_INIT(next_kernel) \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
- { \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
- (void)key; \
- }
-# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
- { \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
- (void)current_kernel; \
- }
-# define INTEGRATOR_PATH_TERMINATE(current_kernel) \
- { \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; \
- (void)current_kernel; \
- }
-# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
- { \
- INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
- (void)key; \
- (void)current_kernel; \
- }
-
-# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \
- IntegratorShadowState shadow_state = &state->shadow_type; \
- INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
-# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
- { \
- INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; \
- (void)current_kernel; \
- }
-# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
- { \
- INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; \
- (void)current_kernel; \
- }
+ccl_device_forceinline void integrator_path_init(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel next_kernel)
+{
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+}
+
+ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel next_kernel,
+ const uint32_t key)
+{
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+ (void)key;
+}
+
+ccl_device_forceinline void integrator_path_next(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel current_kernel,
+ const DeviceKernel next_kernel)
+{
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+ (void)current_kernel;
+}
+
+ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel current_kernel)
+{
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
+ (void)current_kernel;
+}
+
+ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg,
+ IntegratorState state,
+ const DeviceKernel current_kernel,
+ const DeviceKernel next_kernel,
+ const uint32_t key)
+{
+ INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+ (void)key;
+ (void)current_kernel;
+}
+
+ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(
+ KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
+{
+ IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow;
+ INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
+ return shadow_state;
+}
+
+ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg,
+ IntegratorShadowState state,
+ const DeviceKernel current_kernel,
+ const DeviceKernel next_kernel)
+{
+ INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
+ (void)current_kernel;
+}
+
+ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg,
+ IntegratorShadowState state,
+ const DeviceKernel current_kernel)
+{
+ INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
+ (void)current_kernel;
+}
#endif
diff --git a/intern/cycles/kernel/integrator/state_template.h b/intern/cycles/kernel/integrator/state_template.h
index e7e6db037b0..f4e280e4cb2 100644
--- a/intern/cycles/kernel/integrator/state_template.h
+++ b/intern/cycles/kernel/integrator/state_template.h
@@ -37,22 +37,21 @@ KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
/* enum PathRayMNEE */
KERNEL_STRUCT_MEMBER(path, uint8_t, mnee, KERNEL_FEATURE_PATH_TRACING)
/* Multiple importance sampling
- * The PDF of BSDF sampling at the last scatter point, and distance to the
- * last scatter point minus the last ray segment. This distance lets us
- * compute the complete distance through transparent surfaces and volumes. */
+ * The PDF of BSDF sampling at the last scatter point, which is at ray distance
+ * zero and distance. Note that transparency and volume attenuation increase
+ * the ray tmin but keep P unmodified so that this works. */
KERNEL_STRUCT_MEMBER(path, float, mis_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(path, float, mis_ray_t, KERNEL_FEATURE_PATH_TRACING)
/* Filter glossy. */
KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
/* Continuation probability for path termination. */
KERNEL_STRUCT_MEMBER(path, float, continuation_probability, KERNEL_FEATURE_PATH_TRACING)
/* Throughput. */
-KERNEL_STRUCT_MEMBER(path, packed_float3, throughput, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(path, PackedSpectrum, throughput, KERNEL_FEATURE_PATH_TRACING)
/* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */
-KERNEL_STRUCT_MEMBER(path, packed_float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES)
-KERNEL_STRUCT_MEMBER(path, packed_float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES)
+KERNEL_STRUCT_MEMBER(path, PackedSpectrum, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES)
+KERNEL_STRUCT_MEMBER(path, PackedSpectrum, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES)
/* Denoising. */
-KERNEL_STRUCT_MEMBER(path, packed_float3, denoising_feature_throughput, KERNEL_FEATURE_DENOISING)
+KERNEL_STRUCT_MEMBER(path, PackedSpectrum, denoising_feature_throughput, KERNEL_FEATURE_DENOISING)
/* Shader sorting. */
/* TODO: compress as uint16? or leave out entirely and recompute key in sorting code? */
KERNEL_STRUCT_MEMBER(path, uint32_t, shader_sort_key, KERNEL_FEATURE_PATH_TRACING)
@@ -63,7 +62,8 @@ KERNEL_STRUCT_END(path)
KERNEL_STRUCT_BEGIN(ray)
KERNEL_STRUCT_MEMBER(ray, packed_float3, P, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(ray, float, t, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(ray, float, tmin, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(ray, float, tmax, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(ray, float, time, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(ray, float, dP, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(ray, float, dD, KERNEL_FEATURE_PATH_TRACING)
@@ -84,8 +84,8 @@ KERNEL_STRUCT_END(isect)
/*************** Subsurface closure state for subsurface kernel ***************/
KERNEL_STRUCT_BEGIN(subsurface)
-KERNEL_STRUCT_MEMBER(subsurface, packed_float3, albedo, KERNEL_FEATURE_SUBSURFACE)
-KERNEL_STRUCT_MEMBER(subsurface, packed_float3, radius, KERNEL_FEATURE_SUBSURFACE)
+KERNEL_STRUCT_MEMBER(subsurface, PackedSpectrum, albedo, KERNEL_FEATURE_SUBSURFACE)
+KERNEL_STRUCT_MEMBER(subsurface, PackedSpectrum, radius, KERNEL_FEATURE_SUBSURFACE)
KERNEL_STRUCT_MEMBER(subsurface, float, anisotropy, KERNEL_FEATURE_SUBSURFACE)
KERNEL_STRUCT_MEMBER(subsurface, packed_float3, Ng, KERNEL_FEATURE_SUBSURFACE)
KERNEL_STRUCT_END(subsurface)
diff --git a/intern/cycles/kernel/integrator/state_util.h b/intern/cycles/kernel/integrator/state_util.h
index 280db2d1aac..168122d3a78 100644
--- a/intern/cycles/kernel/integrator/state_util.h
+++ b/intern/cycles/kernel/integrator/state_util.h
@@ -17,7 +17,8 @@ ccl_device_forceinline void integrator_state_write_ray(KernelGlobals kg,
{
INTEGRATOR_STATE_WRITE(state, ray, P) = ray->P;
INTEGRATOR_STATE_WRITE(state, ray, D) = ray->D;
- INTEGRATOR_STATE_WRITE(state, ray, t) = ray->t;
+ INTEGRATOR_STATE_WRITE(state, ray, tmin) = ray->tmin;
+ INTEGRATOR_STATE_WRITE(state, ray, tmax) = ray->tmax;
INTEGRATOR_STATE_WRITE(state, ray, time) = ray->time;
INTEGRATOR_STATE_WRITE(state, ray, dP) = ray->dP;
INTEGRATOR_STATE_WRITE(state, ray, dD) = ray->dD;
@@ -29,7 +30,8 @@ ccl_device_forceinline void integrator_state_read_ray(KernelGlobals kg,
{
ray->P = INTEGRATOR_STATE(state, ray, P);
ray->D = INTEGRATOR_STATE(state, ray, D);
- ray->t = INTEGRATOR_STATE(state, ray, t);
+ ray->tmin = INTEGRATOR_STATE(state, ray, tmin);
+ ray->tmax = INTEGRATOR_STATE(state, ray, tmax);
ray->time = INTEGRATOR_STATE(state, ray, time);
ray->dP = INTEGRATOR_STATE(state, ray, dP);
ray->dD = INTEGRATOR_STATE(state, ray, dD);
@@ -42,7 +44,8 @@ ccl_device_forceinline void integrator_state_write_shadow_ray(
{
INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray->P;
INTEGRATOR_STATE_WRITE(state, shadow_ray, D) = ray->D;
- INTEGRATOR_STATE_WRITE(state, shadow_ray, t) = ray->t;
+ INTEGRATOR_STATE_WRITE(state, shadow_ray, tmin) = ray->tmin;
+ INTEGRATOR_STATE_WRITE(state, shadow_ray, tmax) = ray->tmax;
INTEGRATOR_STATE_WRITE(state, shadow_ray, time) = ray->time;
INTEGRATOR_STATE_WRITE(state, shadow_ray, dP) = ray->dP;
}
@@ -53,7 +56,8 @@ ccl_device_forceinline void integrator_state_read_shadow_ray(KernelGlobals kg,
{
ray->P = INTEGRATOR_STATE(state, shadow_ray, P);
ray->D = INTEGRATOR_STATE(state, shadow_ray, D);
- ray->t = INTEGRATOR_STATE(state, shadow_ray, t);
+ ray->tmin = INTEGRATOR_STATE(state, shadow_ray, tmin);
+ ray->tmax = INTEGRATOR_STATE(state, shadow_ray, tmax);
ray->time = INTEGRATOR_STATE(state, shadow_ray, time);
ray->dP = INTEGRATOR_STATE(state, shadow_ray, dP);
ray->dD = differential_zero_compact();
@@ -334,7 +338,7 @@ ccl_device_inline IntegratorState integrator_state_shadow_catcher_split(KernelGl
return to_state;
}
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
ccl_device_inline int integrator_state_bounce(ConstIntegratorState state, const int)
{
return INTEGRATOR_STATE(state, path, bounce);
diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h
index b449f807290..15c2cb1c708 100644
--- a/intern/cycles/kernel/integrator/subsurface.h
+++ b/intern/cycles/kernel/integrator/subsurface.h
@@ -15,9 +15,9 @@
#include "kernel/integrator/intersect_volume_stack.h"
#include "kernel/integrator/path_state.h"
-#include "kernel/integrator/shader_eval.h"
#include "kernel/integrator/subsurface_disk.h"
#include "kernel/integrator/subsurface_random_walk.h"
+#include "kernel/integrator/surface_shader.h"
CCL_NAMESPACE_BEGIN
@@ -38,7 +38,8 @@ ccl_device int subsurface_bounce(KernelGlobals kg,
/* Setup ray into surface. */
INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
INTEGRATOR_STATE_WRITE(state, ray, D) = bssrdf->N;
- INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
+ INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
+ INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact();
@@ -50,12 +51,10 @@ ccl_device int subsurface_bounce(KernelGlobals kg,
PATH_RAY_SUBSURFACE_RANDOM_WALK);
/* Compute weight, optionally including Fresnel from entry point. */
- float3 weight = shader_bssrdf_sample_weight(sd, sc);
-# ifdef __PRINCIPLED__
+ Spectrum weight = surface_shader_bssrdf_sample_weight(sd, sc);
if (bssrdf->roughness != FLT_MAX) {
path_flag |= PATH_RAY_SUBSURFACE_USE_FRESNEL;
}
-# endif
if (sd->flag & SD_BACKFACING) {
path_flag |= PATH_RAY_SUBSURFACE_BACKFACING;
@@ -69,8 +68,8 @@ ccl_device int subsurface_bounce(KernelGlobals kg,
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
if (INTEGRATOR_STATE(state, path, bounce) == 0) {
- INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_float3();
- INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_float3();
+ INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_spectrum();
+ INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_spectrum();
}
}
@@ -90,7 +89,7 @@ ccl_device void subsurface_shader_data_setup(KernelGlobals kg,
/* Get bump mapped normal from shader evaluation at exit point. */
float3 N = sd->N;
if (sd->flag & SD_HAS_BSSRDF_BUMP) {
- N = shader_bssrdf_normal(sd);
+ N = surface_shader_bssrdf_normal(sd);
}
/* Setup diffuse BSDF at the exit point. This replaces shader_eval_surface. */
@@ -98,9 +97,8 @@ ccl_device void subsurface_shader_data_setup(KernelGlobals kg,
sd->num_closure = 0;
sd->num_closure_left = kernel_data.max_closures;
- const float3 weight = one_float3();
+ const Spectrum weight = one_spectrum();
-# ifdef __PRINCIPLED__
if (path_flag & PATH_RAY_SUBSURFACE_USE_FRESNEL) {
ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
sd, sizeof(PrincipledDiffuseBsdf), weight);
@@ -111,9 +109,7 @@ ccl_device void subsurface_shader_data_setup(KernelGlobals kg,
sd->flag |= bsdf_principled_diffuse_setup(bsdf, PRINCIPLED_DIFFUSE_LAMBERT_EXIT);
}
}
- else
-# endif /* __PRINCIPLED__ */
- {
+ else {
ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
sd, sizeof(DiffuseBsdf), weight);
@@ -147,7 +143,7 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
/* Update volume stack if needed. */
if (kernel_data.integrator.use_volumes) {
const int object = ss_isect.hits[0].object;
- const int object_flag = kernel_tex_fetch(__object_flag, object);
+ const int object_flag = kernel_data_fetch(object_flag, object);
if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) {
float3 P = INTEGRATOR_STATE(state, ray, P);
@@ -160,7 +156,7 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
/* Pretend ray is coming from the outside towards the exit point. This ensures
* correct front/back facing normals.
* TODO: find a more elegant solution? */
- ray.P += ray.D * ray.t * 2.0f;
+ ray.P += ray.D * ray.tmax * 2.0f;
ray.D = -ray.D;
integrator_state_write_isect(kg, state, &ss_isect.hits[0]);
@@ -170,24 +166,30 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
const int shader = intersection_get_shader(kg, &ss_isect.hits[0]);
- const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
+ const int shader_flags = kernel_data_fetch(shaders, shader).flags;
const int object_flags = intersection_get_object_flags(kg, &ss_isect.hits[0]);
const bool use_caustics = kernel_data.integrator.use_caustics &&
(object_flags & SD_OBJECT_CAUSTICS);
const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
if (use_caustics) {
- INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
+ integrator_path_next_sorted(kg,
+ state,
+ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE,
shader);
}
else if (use_raytrace_kernel) {
- INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
+ integrator_path_next_sorted(kg,
+ state,
+ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
shader);
}
else {
- INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
+ integrator_path_next_sorted(kg,
+ state,
+ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
shader);
}
diff --git a/intern/cycles/kernel/integrator/subsurface_disk.h b/intern/cycles/kernel/integrator/subsurface_disk.h
index 34330671748..a44b6a74d7b 100644
--- a/intern/cycles/kernel/integrator/subsurface_disk.h
+++ b/intern/cycles/kernel/integrator/subsurface_disk.h
@@ -9,11 +9,11 @@ CCL_NAMESPACE_BEGIN
* http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
*/
-ccl_device_inline float3 subsurface_disk_eval(const float3 radius, float disk_r, float r)
+ccl_device_inline Spectrum subsurface_disk_eval(const Spectrum radius, float disk_r, float r)
{
- const float3 eval = bssrdf_eval(radius, r);
+ const Spectrum eval = bssrdf_eval(radius, r);
const float pdf = bssrdf_pdf(radius, disk_r);
- return (pdf > 0.0f) ? eval / pdf : zero_float3();
+ return (pdf > 0.0f) ? eval / pdf : zero_spectrum();
}
/* Subsurface scattering step, from a point on the surface to other
@@ -25,8 +25,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
ccl_private LocalIntersection &ss_isect)
{
- float disk_u, disk_v;
- path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &disk_u, &disk_v);
+ float2 rand_disk = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_DISK);
/* Read shading point info from integrator state. */
const float3 P = INTEGRATOR_STATE(state, ray, P);
@@ -37,7 +36,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
/* Read subsurface scattering parameters. */
- const float3 radius = INTEGRATOR_STATE(state, subsurface, radius);
+ const Spectrum radius = INTEGRATOR_STATE(state, subsurface, radius);
/* Pick random axis in local frame and point on disk. */
float3 disk_N, disk_T, disk_B;
@@ -46,20 +45,20 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
disk_N = Ng;
make_orthonormals(disk_N, &disk_T, &disk_B);
- if (disk_v < 0.5f) {
+ if (rand_disk.y < 0.5f) {
pick_pdf_N = 0.5f;
pick_pdf_T = 0.25f;
pick_pdf_B = 0.25f;
- disk_v *= 2.0f;
+ rand_disk.y *= 2.0f;
}
- else if (disk_v < 0.75f) {
+ else if (rand_disk.y < 0.75f) {
float3 tmp = disk_N;
disk_N = disk_T;
disk_T = tmp;
pick_pdf_N = 0.25f;
pick_pdf_T = 0.5f;
pick_pdf_B = 0.25f;
- disk_v = (disk_v - 0.5f) * 4.0f;
+ rand_disk.y = (rand_disk.y - 0.5f) * 4.0f;
}
else {
float3 tmp = disk_N;
@@ -68,21 +67,22 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
pick_pdf_N = 0.25f;
pick_pdf_T = 0.25f;
pick_pdf_B = 0.5f;
- disk_v = (disk_v - 0.75f) * 4.0f;
+ rand_disk.y = (rand_disk.y - 0.75f) * 4.0f;
}
/* Sample point on disk. */
- float phi = M_2PI_F * disk_v;
+ float phi = M_2PI_F * rand_disk.y;
float disk_height, disk_r;
- bssrdf_sample(radius, disk_u, &disk_r, &disk_height);
+ bssrdf_sample(radius, rand_disk.x, &disk_r, &disk_height);
float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B;
/* Create ray. */
ray.P = P + disk_N * disk_height + disk_P;
ray.D = -disk_N;
- ray.t = 2.0f * disk_height;
+ ray.tmin = 0.0f;
+ ray.tmax = 2.0f * disk_height;
ray.dP = ray_dP;
ray.dD = differential_zero_compact();
ray.time = time;
@@ -107,13 +107,13 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
* traversal algorithm. */
sort_intersections_and_normals(ss_isect.hits, ss_isect.Ng, num_eval_hits);
- float3 weights[BSSRDF_MAX_HITS]; /* TODO: zero? */
+ Spectrum weights[BSSRDF_MAX_HITS]; /* TODO: zero? */
float sum_weights = 0.0f;
for (int hit = 0; hit < num_eval_hits; hit++) {
/* Get geometric normal. */
const int object = ss_isect.hits[hit].object;
- const int object_flag = kernel_tex_fetch(__object_flag, object);
+ const int object_flag = kernel_data_fetch(object_flag, object);
float3 hit_Ng = ss_isect.Ng[hit];
if (path_flag & PATH_RAY_SUBSURFACE_BACKFACING) {
hit_Ng = -hit_Ng;
@@ -125,17 +125,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
/* Transform normal to world space. */
Transform itfm;
- Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm);
+ object_fetch_transform_motion_test(kg, object, time, &itfm);
hit_Ng = normalize(transform_direction_transposed(&itfm, hit_Ng));
-
- /* Transform t to world space, except for OptiX and MetalRT where it already is. */
-#ifdef __KERNEL_GPU_RAYTRACING__
- (void)tfm;
-#else
- float3 D = transform_direction(&itfm, ray.D);
- D = normalize(D) * ss_isect.hits[hit].t;
- ss_isect.hits[hit].t = len(transform_direction(&tfm, D));
-#endif
}
/* Quickly retrieve P and Ng without setting up ShaderData. */
@@ -158,7 +149,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
const float r = len(hit_P - P);
/* Evaluate profiles. */
- const float3 weight = subsurface_disk_eval(radius, disk_r, r) * w;
+ const Spectrum weight = subsurface_disk_eval(radius, disk_r, r) * w;
/* Store result. */
ss_isect.Ng[hit] = hit_Ng;
@@ -171,11 +162,12 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
}
/* Use importance resampling, sampling one of the hits proportional to weight. */
- const float r = lcg_step_float(&lcg_state) * sum_weights;
+ const float rand_resample = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_DISK_RESAMPLE);
+ const float r = rand_resample * sum_weights;
float partial_sum = 0.0f;
for (int hit = 0; hit < num_eval_hits; hit++) {
- const float3 weight = weights[hit];
+ const Spectrum weight = weights[hit];
const float sample_weight = average(fabs(weight));
float next_sum = partial_sum + sample_weight;
@@ -188,7 +180,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
ray.P = ray.P + ray.D * ss_isect.hits[hit].t;
ray.D = ss_isect.Ng[hit];
- ray.t = 1.0f;
+ ray.tmin = 0.0f;
+ ray.tmax = 1.0f;
return true;
}
diff --git a/intern/cycles/kernel/integrator/subsurface_random_walk.h b/intern/cycles/kernel/integrator/subsurface_random_walk.h
index b6cd4aae195..a6a59e286c9 100644
--- a/intern/cycles/kernel/integrator/subsurface_random_walk.h
+++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h
@@ -65,19 +65,20 @@ ccl_device void subsurface_random_walk_remap(const float albedo,
*sigma_t = sigma_t_prime / (1.0f - g);
}
-ccl_device void subsurface_random_walk_coefficients(const float3 albedo,
- const float3 radius,
+ccl_device void subsurface_random_walk_coefficients(const Spectrum albedo,
+ const Spectrum radius,
const float anisotropy,
- ccl_private float3 *sigma_t,
- ccl_private float3 *alpha,
- ccl_private float3 *throughput)
+ ccl_private Spectrum *sigma_t,
+ ccl_private Spectrum *alpha,
+ ccl_private Spectrum *throughput)
{
- float sigma_t_x, sigma_t_y, sigma_t_z;
- float alpha_x, alpha_y, alpha_z;
-
- subsurface_random_walk_remap(albedo.x, radius.x, anisotropy, &sigma_t_x, &alpha_x);
- subsurface_random_walk_remap(albedo.y, radius.y, anisotropy, &sigma_t_y, &alpha_y);
- subsurface_random_walk_remap(albedo.z, radius.z, anisotropy, &sigma_t_z, &alpha_z);
+ FOREACH_SPECTRUM_CHANNEL (i) {
+ subsurface_random_walk_remap(GET_SPECTRUM_CHANNEL(albedo, i),
+ GET_SPECTRUM_CHANNEL(radius, i),
+ anisotropy,
+ &GET_SPECTRUM_CHANNEL(*sigma_t, i),
+ &GET_SPECTRUM_CHANNEL(*alpha, i));
+ }
/* Throughput already contains closure weight at this point, which includes the
* albedo, as well as closure mixing and Fresnel weights. Divide out the albedo
@@ -88,21 +89,12 @@ ccl_device void subsurface_random_walk_coefficients(const float3 albedo,
* infinite phase functions. To avoid a sharp discontinuity as we go from
* such values to 0.0, increase alpha and reduce the throughput to compensate. */
const float min_alpha = 0.2f;
- if (alpha_x < min_alpha) {
- (*throughput).x *= alpha_x / min_alpha;
- alpha_x = min_alpha;
- }
- if (alpha_y < min_alpha) {
- (*throughput).y *= alpha_y / min_alpha;
- alpha_y = min_alpha;
- }
- if (alpha_z < min_alpha) {
- (*throughput).z *= alpha_z / min_alpha;
- alpha_z = min_alpha;
+ FOREACH_SPECTRUM_CHANNEL (i) {
+ if (GET_SPECTRUM_CHANNEL(*alpha, i) < min_alpha) {
+ GET_SPECTRUM_CHANNEL(*throughput, i) *= GET_SPECTRUM_CHANNEL(*alpha, i) / min_alpha;
+ GET_SPECTRUM_CHANNEL(*alpha, i) = min_alpha;
+ }
}
-
- *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z);
- *alpha = make_float3(alpha_x, alpha_y, alpha_z);
}
/* References for Dwivedi sampling:
@@ -151,12 +143,12 @@ ccl_device_forceinline float3 direction_from_cosine(float3 D, float cos_theta, f
return dir.x * T + dir.y * B + dir.z * D;
}
-ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t,
- float t,
- bool hit,
- ccl_private float3 *transmittance)
+ccl_device_forceinline Spectrum subsurface_random_walk_pdf(Spectrum sigma_t,
+ float t,
+ bool hit,
+ ccl_private Spectrum *transmittance)
{
- float3 T = volume_color_transmittance(sigma_t, t);
+ Spectrum T = volume_color_transmittance(sigma_t, t);
if (transmittance) {
*transmittance = T;
}
@@ -173,8 +165,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
ccl_private Ray &ray,
ccl_private LocalIntersection &ss_isect)
{
- float bssrdf_u, bssrdf_v;
- path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+ const float2 rand_bsdf = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_BSDF);
const float3 P = INTEGRATOR_STATE(state, ray, P);
const float3 N = INTEGRATOR_STATE(state, ray, D);
@@ -187,7 +178,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
/* Sample diffuse surface scatter into the object. */
float3 D;
float pdf;
- sample_cos_hemisphere(-N, bssrdf_u, bssrdf_v, &D, &pdf);
+ sample_cos_hemisphere(-N, rand_bsdf.x, rand_bsdf.y, &D, &pdf);
if (dot(-Ng, D) <= 0.0f) {
return false;
}
@@ -195,7 +186,8 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
/* Setup ray. */
ray.P = P;
ray.D = D;
- ray.t = FLT_MAX;
+ ray.tmin = 0.0f;
+ ray.tmax = FLT_MAX;
ray.time = time;
ray.dP = ray_dP;
ray.dD = differential_zero_compact();
@@ -204,22 +196,16 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
ray.self.light_object = OBJECT_NONE;
ray.self.light_prim = PRIM_NONE;
-#ifndef __KERNEL_GPU_RAYTRACING__
- /* Compute or fetch object transforms. */
- Transform ob_itfm ccl_optional_struct_init;
- Transform ob_tfm = object_fetch_transform_motion_test(kg, object, time, &ob_itfm);
-#endif
-
/* Convert subsurface to volume coefficients.
* The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */
- const float3 albedo = INTEGRATOR_STATE(state, subsurface, albedo);
- const float3 radius = INTEGRATOR_STATE(state, subsurface, radius);
+ const Spectrum albedo = INTEGRATOR_STATE(state, subsurface, albedo);
+ const Spectrum radius = INTEGRATOR_STATE(state, subsurface, radius);
const float anisotropy = INTEGRATOR_STATE(state, subsurface, anisotropy);
- float3 sigma_t, alpha;
- float3 throughput = INTEGRATOR_STATE_WRITE(state, path, throughput);
+ Spectrum sigma_t, alpha;
+ Spectrum throughput = INTEGRATOR_STATE_WRITE(state, path, throughput);
subsurface_random_walk_coefficients(albedo, radius, anisotropy, &sigma_t, &alpha, &throughput);
- float3 sigma_s = sigma_t * alpha;
+ Spectrum sigma_s = sigma_t * alpha;
/* Theoretically it should be better to use the exact alpha for the channel we're sampling at
* each bounce, but in practice there doesn't seem to be a noticeable difference in exchange
@@ -229,7 +215,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
* Since the strength of the guided sampling increases as alpha gets lower, using a value that
* is too low results in fireflies while one that's too high just gives a bit more noise.
* Therefore, the code here uses the highest of the three albedos to be safe. */
- const float diffusion_length = diffusion_length_dwivedi(max3(alpha));
+ const float diffusion_length = diffusion_length_dwivedi(reduce_max(alpha));
if (diffusion_length == 1.0f) {
/* With specific values of alpha the length might become 1, which in asymptotic makes phase to
@@ -242,7 +228,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
const float phase_log = logf((diffusion_length + 1.0f) / (diffusion_length - 1.0f));
/* Modify state for RNGs, decorrelated from other paths. */
- rng_state.rng_hash = cmj_hash(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef);
+ rng_state.rng_hash = hash_hp_seeded_uint(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef);
/* Random walk until we hit the surface again. */
bool hit = false;
@@ -254,10 +240,10 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
const float guided_fraction = 1.0f - fmaxf(0.5f, powf(fabsf(anisotropy), 0.125f));
#ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL
- float3 sigma_s_star = sigma_s * (1.0f - anisotropy);
- float3 sigma_t_star = sigma_t - sigma_s + sigma_s_star;
- float3 sigma_t_org = sigma_t;
- float3 sigma_s_org = sigma_s;
+ Spectrum sigma_s_star = sigma_s * (1.0f - anisotropy);
+ Spectrum sigma_t_star = sigma_t - sigma_s + sigma_s_star;
+ Spectrum sigma_t_org = sigma_t;
+ Spectrum sigma_s_org = sigma_s;
const float anisotropy_org = anisotropy;
const float guided_fraction_org = guided_fraction;
#endif
@@ -269,7 +255,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
#ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL
// shadow with local variables according to depth
float anisotropy, guided_fraction;
- float3 sigma_s, sigma_t;
+ Spectrum sigma_s, sigma_t;
if (bounce <= SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL) {
anisotropy = anisotropy_org;
guided_fraction = guided_fraction_org;
@@ -285,11 +271,11 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
#endif
/* Sample color channel, use MIS with balance heuristic. */
- float rphase = path_state_rng_1D(kg, &rng_state, PRNG_PHASE_CHANNEL);
- float3 channel_pdf;
+ float rphase = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_PHASE_CHANNEL);
+ Spectrum channel_pdf;
int channel = volume_sample_channel(alpha, throughput, rphase, &channel_pdf);
float sample_sigma_t = volume_channel_get(sigma_t, channel);
- float randt = path_state_rng_1D(kg, &rng_state, PRNG_SCATTER_DISTANCE);
+ float randt = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_SCATTER_DISTANCE);
/* We need the result of the ray-cast to compute the full guided PDF, so just remember the
* relevant terms to avoid recomputing them later. */
@@ -302,7 +288,8 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
/* For the initial ray, we already know the direction, so just do classic distance sampling. */
if (bounce > 0) {
/* Decide whether we should use guided or classic sampling. */
- bool guided = (path_state_rng_1D(kg, &rng_state, PRNG_LIGHT_TERMINATE) < guided_fraction);
+ bool guided = (path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_GUIDE_STRATEGY) <
+ guided_fraction);
/* Determine if we want to sample away from the incoming interface.
* This only happens if we found a nearby opposite interface, and the probability for it
@@ -316,27 +303,28 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
float x = clamp(dot(ray.P - P, -N), 0.0f, opposite_distance);
backward_fraction = 1.0f /
(1.0f + expf((opposite_distance - 2.0f * x) / diffusion_length));
- guide_backward = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE) < backward_fraction;
+ guide_backward = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_GUIDE_DIRECTION) <
+ backward_fraction;
}
/* Sample scattering direction. */
- float scatter_u, scatter_v;
- path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &scatter_u, &scatter_v);
+ const float2 rand_scatter = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_BSDF);
float cos_theta;
float hg_pdf;
if (guided) {
- cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, scatter_u);
+ cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, rand_scatter.x);
/* The backwards guiding distribution is just mirrored along `sd->N`, so swapping the
* sign here is enough to sample from that instead. */
if (guide_backward) {
cos_theta = -cos_theta;
}
- float3 newD = direction_from_cosine(N, cos_theta, scatter_v);
+ float3 newD = direction_from_cosine(N, cos_theta, rand_scatter.y);
hg_pdf = single_peaked_henyey_greenstein(dot(ray.D, newD), anisotropy);
ray.D = newD;
}
else {
- float3 newD = henyey_greenstrein_sample(ray.D, anisotropy, scatter_u, scatter_v, &hg_pdf);
+ float3 newD = henyey_greenstrein_sample(
+ ray.D, anisotropy, rand_scatter.x, rand_scatter.y, &hg_pdf);
cos_theta = dot(newD, N);
ray.D = newD;
}
@@ -370,10 +358,10 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
* chance of connecting to it.
* TODO: Maybe use less than 10 times the mean free path? */
if (bounce == 0) {
- ray.t = max(t, 10.0f / (min3(sigma_t)));
+ ray.tmax = max(t, 10.0f / (reduce_min(sigma_t)));
}
else {
- ray.t = t;
+ ray.tmax = t;
/* After the first bounce the object can intersect the same surface again */
ray.self.object = OBJECT_NONE;
ray.self.prim = PRIM_NONE;
@@ -382,46 +370,39 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
hit = (ss_isect.num_hits > 0);
if (hit) {
-#ifdef __KERNEL_GPU_RAYTRACING__
- /* t is always in world space with OptiX and MetalRT. */
- ray.t = ss_isect.hits[0].t;
-#else
- /* Compute world space distance to surface hit. */
- float3 D = transform_direction(&ob_itfm, ray.D);
- D = normalize(D) * ss_isect.hits[0].t;
- ray.t = len(transform_direction(&ob_tfm, D));
-#endif
+ ray.tmax = ss_isect.hits[0].t;
}
if (bounce == 0) {
/* Check if we hit the opposite side. */
if (hit) {
have_opposite_interface = true;
- opposite_distance = dot(ray.P + ray.t * ray.D - P, -N);
+ opposite_distance = dot(ray.P + ray.tmax * ray.D - P, -N);
}
/* Apart from the opposite side check, we were supposed to only trace up to distance t,
* so check if there would have been a hit in that case. */
- hit = ray.t < t;
+ hit = ray.tmax < t;
}
/* Use the distance to the exit point for the throughput update if we found one. */
if (hit) {
- t = ray.t;
+ t = ray.tmax;
}
/* Advance to new scatter location. */
ray.P += t * ray.D;
- float3 transmittance;
- float3 pdf = subsurface_random_walk_pdf(sigma_t, t, hit, &transmittance);
+ Spectrum transmittance;
+ Spectrum pdf = subsurface_random_walk_pdf(sigma_t, t, hit, &transmittance);
if (bounce > 0) {
/* Compute PDF just like we do for classic sampling, but with the stretched sigma_t. */
- float3 guided_pdf = subsurface_random_walk_pdf(forward_stretching * sigma_t, t, hit, NULL);
+ Spectrum guided_pdf = subsurface_random_walk_pdf(forward_stretching * sigma_t, t, hit, NULL);
if (have_opposite_interface) {
/* First step of MIS: Depending on geometry we might have two methods for guided
* sampling, so perform MIS between them. */
- float3 back_pdf = subsurface_random_walk_pdf(backward_stretching * sigma_t, t, hit, NULL);
+ Spectrum back_pdf = subsurface_random_walk_pdf(
+ backward_stretching * sigma_t, t, hit, NULL);
guided_pdf = mix(
guided_pdf * forward_pdf_factor, back_pdf * backward_pdf_factor, backward_fraction);
}
@@ -443,16 +424,14 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
/* If we hit the surface, we are done. */
break;
}
- else if (throughput.x < VOLUME_THROUGHPUT_EPSILON &&
- throughput.y < VOLUME_THROUGHPUT_EPSILON &&
- throughput.z < VOLUME_THROUGHPUT_EPSILON) {
+ else if (reduce_max(throughput) < VOLUME_THROUGHPUT_EPSILON) {
/* Avoid unnecessary work and precision issue when throughput gets really small. */
break;
}
}
if (hit) {
- kernel_assert(isfinite3_safe(throughput));
+ kernel_assert(isfinite_safe(throughput));
INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput;
}
diff --git a/intern/cycles/kernel/integrator/surface_shader.h b/intern/cycles/kernel/integrator/surface_shader.h
new file mode 100644
index 00000000000..f40ff3c33ee
--- /dev/null
+++ b/intern/cycles/kernel/integrator/surface_shader.h
@@ -0,0 +1,587 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+/* Functions to evaluate shaders. */
+
+#pragma once
+
+#include "kernel/closure/alloc.h"
+#include "kernel/closure/bsdf.h"
+#include "kernel/closure/bsdf_util.h"
+#include "kernel/closure/emissive.h"
+
+#include "kernel/svm/svm.h"
+
+#ifdef __OSL__
+# include "kernel/osl/shader.h"
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void surface_shader_prepare_closures(KernelGlobals kg,
+ ConstIntegratorState state,
+ ccl_private ShaderData *sd,
+ const uint32_t path_flag)
+{
+ /* Filter out closures. */
+ if (kernel_data.integrator.filter_closures) {
+ if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_EMISSION) {
+ sd->closure_emission_background = zero_spectrum();
+ }
+
+ if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIRECT_LIGHT) {
+ sd->flag &= ~SD_BSDF_HAS_EVAL;
+ }
+
+ if (path_flag & PATH_RAY_CAMERA) {
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private ShaderClosure *sc = &sd->closure[i];
+
+ if ((CLOSURE_IS_BSDF_DIFFUSE(sc->type) &&
+ (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIFFUSE)) ||
+ (CLOSURE_IS_BSDF_GLOSSY(sc->type) &&
+ (kernel_data.integrator.filter_closures & FILTER_CLOSURE_GLOSSY)) ||
+ (CLOSURE_IS_BSDF_TRANSMISSION(sc->type) &&
+ (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSMISSION))) {
+ sc->type = CLOSURE_NONE_ID;
+ sc->sample_weight = 0.0f;
+ }
+ else if ((CLOSURE_IS_BSDF_TRANSPARENT(sc->type) &&
+ (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSPARENT))) {
+ sc->type = CLOSURE_HOLDOUT_ID;
+ sc->sample_weight = 0.0f;
+ sd->flag |= SD_HOLDOUT;
+ }
+ }
+ }
+ }
+
+ /* Defensive sampling.
+ *
+ * We can likely also do defensive sampling at deeper bounces, particularly
+ * for cases like a perfect mirror but possibly also others. This will need
+ * a good heuristic. */
+ if (INTEGRATOR_STATE(state, path, bounce) + INTEGRATOR_STATE(state, path, transparent_bounce) ==
+ 0 &&
+ sd->num_closure > 1) {
+ float sum = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private ShaderClosure *sc = &sd->closure[i];
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ sum += sc->sample_weight;
+ }
+ }
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private ShaderClosure *sc = &sd->closure[i];
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ sc->sample_weight = max(sc->sample_weight, 0.125f * sum);
+ }
+ }
+ }
+
+ /* Filter glossy.
+ *
+ * Blurring of bsdf after bounces, for rays that have a small likelihood
+ * of following this particular path (diffuse, rough glossy) */
+ if (kernel_data.integrator.filter_glossy != FLT_MAX
+#ifdef __MNEE__
+ && !(INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_VALID)
+#endif
+ ) {
+ float blur_pdf = kernel_data.integrator.filter_glossy *
+ INTEGRATOR_STATE(state, path, min_ray_pdf);
+
+ if (blur_pdf < 1.0f) {
+ float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private ShaderClosure *sc = &sd->closure[i];
+ if (CLOSURE_IS_BSDF(sc->type)) {
+ bsdf_blur(kg, sc, blur_roughness);
+ }
+ }
+ }
+ }
+}
+
+/* BSDF */
+
+ccl_device_inline bool surface_shader_is_transmission(ccl_private const ShaderData *sd,
+ const float3 omega_in)
+{
+ return dot(sd->N, omega_in) < 0.0f;
+}
+
+ccl_device_forceinline bool _surface_shader_exclude(ClosureType type, uint light_shader_flags)
+{
+ if (!(light_shader_flags & SHADER_EXCLUDE_ANY)) {
+ return false;
+ }
+ if (light_shader_flags & SHADER_EXCLUDE_DIFFUSE) {
+ if (CLOSURE_IS_BSDF_DIFFUSE(type)) {
+ return true;
+ }
+ }
+ if (light_shader_flags & SHADER_EXCLUDE_GLOSSY) {
+ if (CLOSURE_IS_BSDF_GLOSSY(type)) {
+ return true;
+ }
+ }
+ if (light_shader_flags & SHADER_EXCLUDE_TRANSMIT) {
+ if (CLOSURE_IS_BSDF_TRANSMISSION(type)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+ccl_device_inline float _surface_shader_bsdf_eval_mis(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ const float3 omega_in,
+ const bool is_transmission,
+ ccl_private const ShaderClosure *skip_sc,
+ ccl_private BsdfEval *result_eval,
+ float sum_pdf,
+ float sum_sample_weight,
+ const uint light_shader_flags)
+{
+ /* This is the veach one-sample model with balance heuristic,
+ * some PDF factors drop out when using balance heuristic weighting. */
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (sc == skip_sc) {
+ continue;
+ }
+
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ if (CLOSURE_IS_BSDF(sc->type) && !_surface_shader_exclude(sc->type, light_shader_flags)) {
+ float bsdf_pdf = 0.0f;
+ Spectrum eval = bsdf_eval(kg, sd, sc, omega_in, is_transmission, &bsdf_pdf);
+
+ if (bsdf_pdf != 0.0f) {
+ bsdf_eval_accum(result_eval, sc->type, eval * sc->weight);
+ sum_pdf += bsdf_pdf * sc->sample_weight;
+ }
+ }
+
+ sum_sample_weight += sc->sample_weight;
+ }
+ }
+
+ return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
+}
+
+#ifndef __KERNEL_CUDA__
+ccl_device
+#else
+ccl_device_inline
+#endif
+ float
+ surface_shader_bsdf_eval(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ const float3 omega_in,
+ const bool is_transmission,
+ ccl_private BsdfEval *bsdf_eval,
+ const uint light_shader_flags)
+{
+ bsdf_eval_init(bsdf_eval, CLOSURE_NONE_ID, zero_spectrum());
+
+ return _surface_shader_bsdf_eval_mis(
+ kg, sd, omega_in, is_transmission, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags);
+}
+
+/* Randomly sample a BSSRDF or BSDF proportional to ShaderClosure.sample_weight. */
+ccl_device_inline ccl_private const ShaderClosure *surface_shader_bsdf_bssrdf_pick(
+ ccl_private const ShaderData *ccl_restrict sd, ccl_private float2 *rand_bsdf)
+{
+ int sampled = 0;
+
+ if (sd->num_closure > 1) {
+ /* Pick a BSDF or based on sample weights. */
+ float sum = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ sum += sc->sample_weight;
+ }
+ }
+
+ float r = (*rand_bsdf).x * sum;
+ float partial_sum = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ float next_sum = partial_sum + sc->sample_weight;
+
+ if (r < next_sum) {
+ sampled = i;
+
+ /* Rescale to reuse for direction sample, to better preserve stratification. */
+ (*rand_bsdf).x = (r - partial_sum) / sc->sample_weight;
+ break;
+ }
+
+ partial_sum = next_sum;
+ }
+ }
+ }
+
+ return &sd->closure[sampled];
+}
+
+/* Return weight for picked BSSRDF. */
+ccl_device_inline Spectrum
+surface_shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd,
+ ccl_private const ShaderClosure *ccl_restrict bssrdf_sc)
+{
+ Spectrum weight = bssrdf_sc->weight;
+
+ if (sd->num_closure > 1) {
+ float sum = 0.0f;
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ sum += sc->sample_weight;
+ }
+ }
+ weight *= sum / bssrdf_sc->sample_weight;
+ }
+
+ return weight;
+}
+
+/* Sample direction for picked BSDF, and return evaluation and pdf for all
+ * BSDFs combined using MIS. */
+ccl_device int surface_shader_bsdf_sample_closure(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ ccl_private const ShaderClosure *sc,
+ const float2 rand_bsdf,
+ ccl_private BsdfEval *bsdf_eval,
+ ccl_private float3 *omega_in,
+ ccl_private float *pdf)
+{
+ /* BSSRDF should already have been handled elsewhere. */
+ kernel_assert(CLOSURE_IS_BSDF(sc->type));
+
+ int label;
+ Spectrum eval = zero_spectrum();
+
+ *pdf = 0.0f;
+ label = bsdf_sample(kg, sd, sc, rand_bsdf.x, rand_bsdf.y, &eval, omega_in, pdf);
+
+ if (*pdf != 0.0f) {
+ bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight);
+
+ if (sd->num_closure > 1) {
+ const bool is_transmission = surface_shader_is_transmission(sd, *omega_in);
+ float sweight = sc->sample_weight;
+ *pdf = _surface_shader_bsdf_eval_mis(
+ kg, sd, *omega_in, is_transmission, sc, bsdf_eval, *pdf * sweight, sweight, 0);
+ }
+ }
+
+ return label;
+}
+
+ccl_device float surface_shader_average_roughness(ccl_private const ShaderData *sd)
+{
+ float roughness = 0.0f;
+ float sum_weight = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF(sc->type)) {
+ /* sqrt once to undo the squaring from multiplying roughness on the
+ * two axes, and once for the squared roughness convention. */
+ float weight = fabsf(average(sc->weight));
+ roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc)));
+ sum_weight += weight;
+ }
+ }
+
+ return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f;
+}
+
+ccl_device Spectrum surface_shader_transparency(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+ if (sd->flag & SD_HAS_ONLY_VOLUME) {
+ return one_spectrum();
+ }
+ else if (sd->flag & SD_TRANSPARENT) {
+ return sd->closure_transparent_extinction;
+ }
+ else {
+ return zero_spectrum();
+ }
+}
+
+ccl_device void surface_shader_disable_transparency(KernelGlobals kg, ccl_private ShaderData *sd)
+{
+ if (sd->flag & SD_TRANSPARENT) {
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private ShaderClosure *sc = &sd->closure[i];
+
+ if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
+ sc->sample_weight = 0.0f;
+ sc->weight = zero_spectrum();
+ }
+ }
+
+ sd->flag &= ~SD_TRANSPARENT;
+ }
+}
+
+ccl_device Spectrum surface_shader_alpha(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+ Spectrum alpha = one_spectrum() - surface_shader_transparency(kg, sd);
+
+ alpha = saturate(alpha);
+
+ return alpha;
+}
+
+ccl_device Spectrum surface_shader_diffuse(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+ Spectrum eval = zero_spectrum();
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type))
+ eval += sc->weight;
+ }
+
+ return eval;
+}
+
+ccl_device Spectrum surface_shader_glossy(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+ Spectrum eval = zero_spectrum();
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
+ eval += sc->weight;
+ }
+
+ return eval;
+}
+
+ccl_device Spectrum surface_shader_transmission(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+ Spectrum eval = zero_spectrum();
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
+ eval += sc->weight;
+ }
+
+ return eval;
+}
+
+ccl_device float3 surface_shader_average_normal(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+ float3 N = zero_float3();
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
+ N += sc->N * fabsf(average(sc->weight));
+ }
+
+ return (is_zero(N)) ? sd->N : normalize(N);
+}
+
+ccl_device Spectrum surface_shader_ao(KernelGlobals kg,
+ ccl_private const ShaderData *sd,
+ const float ao_factor,
+ ccl_private float3 *N_)
+{
+ Spectrum eval = zero_spectrum();
+ float3 N = zero_float3();
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
+ ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
+ eval += sc->weight * ao_factor;
+ N += bsdf->N * fabsf(average(sc->weight));
+ }
+ }
+
+ *N_ = (is_zero(N)) ? sd->N : normalize(N);
+ return eval;
+}
+
+#ifdef __SUBSURFACE__
+ccl_device float3 surface_shader_bssrdf_normal(ccl_private const ShaderData *sd)
+{
+ float3 N = zero_float3();
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSSRDF(sc->type)) {
+ ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc;
+ float avg_weight = fabsf(average(sc->weight));
+
+ N += bssrdf->N * avg_weight;
+ }
+ }
+
+ return (is_zero(N)) ? sd->N : normalize(N);
+}
+#endif /* __SUBSURFACE__ */
+
+/* Constant emission optimization */
+
+ccl_device bool surface_shader_constant_emission(KernelGlobals kg,
+ int shader,
+ ccl_private Spectrum *eval)
+{
+ int shader_index = shader & SHADER_MASK;
+ int shader_flag = kernel_data_fetch(shaders, shader_index).flags;
+
+ if (shader_flag & SD_HAS_CONSTANT_EMISSION) {
+ const float3 emission_rgb = make_float3(
+ kernel_data_fetch(shaders, shader_index).constant_emission[0],
+ kernel_data_fetch(shaders, shader_index).constant_emission[1],
+ kernel_data_fetch(shaders, shader_index).constant_emission[2]);
+ *eval = rgb_to_spectrum(emission_rgb);
+
+ return true;
+ }
+
+ return false;
+}
+
+/* Background */
+
+ccl_device Spectrum surface_shader_background(ccl_private const ShaderData *sd)
+{
+ if (sd->flag & SD_EMISSION) {
+ return sd->closure_emission_background;
+ }
+ else {
+ return zero_spectrum();
+ }
+}
+
+/* Emission */
+
+ccl_device Spectrum surface_shader_emission(ccl_private const ShaderData *sd)
+{
+ if (sd->flag & SD_EMISSION) {
+ return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background;
+ }
+ else {
+ return zero_spectrum();
+ }
+}
+
+/* Holdout */
+
+ccl_device Spectrum surface_shader_apply_holdout(KernelGlobals kg, ccl_private ShaderData *sd)
+{
+ Spectrum weight = zero_spectrum();
+
+ /* For objects marked as holdout, preserve transparency and remove all other
+ * closures, replacing them with a holdout weight. */
+ if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+ if ((sd->flag & SD_TRANSPARENT) && !(sd->flag & SD_HAS_ONLY_VOLUME)) {
+ weight = one_spectrum() - sd->closure_transparent_extinction;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private ShaderClosure *sc = &sd->closure[i];
+ if (!CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
+ sc->type = NBUILTIN_CLOSURES;
+ }
+ }
+
+ sd->flag &= ~(SD_CLOSURE_FLAGS - (SD_TRANSPARENT | SD_BSDF));
+ }
+ else {
+ weight = one_spectrum();
+ }
+ }
+ else {
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+ if (CLOSURE_IS_HOLDOUT(sc->type)) {
+ weight += sc->weight;
+ }
+ }
+ }
+
+ return weight;
+}
+
+/* Surface Evaluation */
+
+template<uint node_feature_mask, typename ConstIntegratorGenericState>
+ccl_device void surface_shader_eval(KernelGlobals kg,
+ ConstIntegratorGenericState state,
+ ccl_private ShaderData *ccl_restrict sd,
+ ccl_global float *ccl_restrict buffer,
+ uint32_t path_flag,
+ bool use_caustics_storage = false)
+{
+ /* If path is being terminated, we are tracing a shadow ray or evaluating
+ * emission, then we don't need to store closures. The emission and shadow
+ * shader data also do not have a closure array to save GPU memory. */
+ int max_closures;
+ if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
+ max_closures = 0;
+ }
+ else {
+ max_closures = use_caustics_storage ? CAUSTICS_MAX_CLOSURE : kernel_data.max_closures;
+ }
+
+ sd->num_closure = 0;
+ sd->num_closure_left = max_closures;
+
+#ifdef __OSL__
+ if (kg->osl) {
+ if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) {
+ OSLShader::eval_background(kg, state, sd, path_flag);
+ }
+ else {
+ OSLShader::eval_surface(kg, state, sd, path_flag);
+ }
+ }
+ else
+#endif
+ {
+#ifdef __SVM__
+ svm_eval_nodes<node_feature_mask, SHADER_TYPE_SURFACE>(kg, state, sd, buffer, path_flag);
+#else
+ if (sd->object == OBJECT_NONE) {
+ sd->closure_emission_background = make_spectrum(0.8f);
+ sd->flag |= SD_EMISSION;
+ }
+ else {
+ ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
+ sd, sizeof(DiffuseBsdf), make_spectrum(0.8f));
+ if (bsdf != NULL) {
+ bsdf->N = sd->N;
+ sd->flag |= bsdf_diffuse_setup(bsdf);
+ }
+ }
+#endif
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/volume_shader.h b/intern/cycles/kernel/integrator/volume_shader.h
new file mode 100644
index 00000000000..a1d191e2d32
--- /dev/null
+++ b/intern/cycles/kernel/integrator/volume_shader.h
@@ -0,0 +1,353 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+/* Volume shader evaluation and sampling. */
+
+#pragma once
+
+#include "kernel/closure/alloc.h"
+#include "kernel/closure/bsdf.h"
+#include "kernel/closure/bsdf_util.h"
+#include "kernel/closure/emissive.h"
+
+#include "kernel/svm/svm.h"
+
+#ifdef __OSL__
+# include "kernel/osl/shader.h"
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __VOLUME__
+
+/* Merging */
+ccl_device_inline void volume_shader_merge_closures(ccl_private ShaderData *sd)
+{
+ /* Merge identical closures to save closure space with stacked volumes. */
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private ShaderClosure *sci = &sd->closure[i];
+
+ if (sci->type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
+ continue;
+ }
+
+ for (int j = i + 1; j < sd->num_closure; j++) {
+ ccl_private ShaderClosure *scj = &sd->closure[j];
+ if (sci->type != scj->type) {
+ continue;
+ }
+
+ ccl_private const HenyeyGreensteinVolume *hgi = (ccl_private const HenyeyGreensteinVolume *)
+ sci;
+ ccl_private const HenyeyGreensteinVolume *hgj = (ccl_private const HenyeyGreensteinVolume *)
+ scj;
+ if (!(hgi->g == hgj->g)) {
+ continue;
+ }
+
+ sci->weight += scj->weight;
+ sci->sample_weight += scj->sample_weight;
+
+ int size = sd->num_closure - (j + 1);
+ if (size > 0) {
+ for (int k = 0; k < size; k++) {
+ scj[k] = scj[k + 1];
+ }
+ }
+
+ sd->num_closure--;
+ kernel_assert(sd->num_closure >= 0);
+ j--;
+ }
+ }
+}
+
+ccl_device_inline void volume_shader_copy_phases(ccl_private ShaderVolumePhases *ccl_restrict
+ phases,
+ ccl_private const ShaderData *ccl_restrict sd)
+{
+ phases->num_closure = 0;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *from_sc = &sd->closure[i];
+ ccl_private const HenyeyGreensteinVolume *from_hg =
+ (ccl_private const HenyeyGreensteinVolume *)from_sc;
+
+ if (from_sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
+ ccl_private ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure];
+
+ to_sc->weight = from_sc->weight;
+ to_sc->sample_weight = from_sc->sample_weight;
+ to_sc->g = from_hg->g;
+ phases->num_closure++;
+ if (phases->num_closure >= MAX_VOLUME_CLOSURE) {
+ break;
+ }
+ }
+ }
+}
+
+ccl_device_inline float _volume_shader_phase_eval_mis(ccl_private const ShaderData *sd,
+ ccl_private const ShaderVolumePhases *phases,
+ const float3 omega_in,
+ int skip_phase,
+ ccl_private BsdfEval *result_eval,
+ float sum_pdf,
+ float sum_sample_weight)
+{
+ for (int i = 0; i < phases->num_closure; i++) {
+ if (i == skip_phase)
+ continue;
+
+ ccl_private const ShaderVolumeClosure *svc = &phases->closure[i];
+ float phase_pdf = 0.0f;
+ Spectrum eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf);
+
+ if (phase_pdf != 0.0f) {
+ bsdf_eval_accum(result_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
+ sum_pdf += phase_pdf * svc->sample_weight;
+ }
+
+ sum_sample_weight += svc->sample_weight;
+ }
+
+ return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
+}
+
+ccl_device float volume_shader_phase_eval(KernelGlobals kg,
+ ccl_private const ShaderData *sd,
+ ccl_private const ShaderVolumePhases *phases,
+ const float3 omega_in,
+ ccl_private BsdfEval *phase_eval)
+{
+ bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_spectrum());
+
+ return _volume_shader_phase_eval_mis(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f);
+}
+
+ccl_device int volume_shader_phase_sample(KernelGlobals kg,
+ ccl_private const ShaderData *sd,
+ ccl_private const ShaderVolumePhases *phases,
+ float2 rand_phase,
+ ccl_private BsdfEval *phase_eval,
+ ccl_private float3 *omega_in,
+ ccl_private float *pdf)
+{
+ int sampled = 0;
+
+ if (phases->num_closure > 1) {
+ /* pick a phase closure based on sample weights */
+ float sum = 0.0f;
+
+ for (sampled = 0; sampled < phases->num_closure; sampled++) {
+ ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
+ sum += svc->sample_weight;
+ }
+
+ float r = rand_phase.x * sum;
+ float partial_sum = 0.0f;
+
+ for (sampled = 0; sampled < phases->num_closure; sampled++) {
+ ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
+ float next_sum = partial_sum + svc->sample_weight;
+
+ if (r <= next_sum) {
+ /* Rescale to reuse for BSDF direction sample. */
+ rand_phase.x = (r - partial_sum) / svc->sample_weight;
+ break;
+ }
+
+ partial_sum = next_sum;
+ }
+
+ if (sampled == phases->num_closure) {
+ *pdf = 0.0f;
+ return LABEL_NONE;
+ }
+ }
+
+ /* todo: this isn't quite correct, we don't weight anisotropy properly
+ * depending on color channels, even if this is perhaps not a common case */
+ ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
+ int label;
+ Spectrum eval = zero_spectrum();
+
+ *pdf = 0.0f;
+ label = volume_phase_sample(sd, svc, rand_phase.x, rand_phase.y, &eval, omega_in, pdf);
+
+ if (*pdf != 0.0f) {
+ bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
+ }
+
+ return label;
+}
+
+ccl_device int volume_shader_phase_sample_closure(KernelGlobals kg,
+ ccl_private const ShaderData *sd,
+ ccl_private const ShaderVolumeClosure *sc,
+ const float2 rand_phase,
+ ccl_private BsdfEval *phase_eval,
+ ccl_private float3 *omega_in,
+ ccl_private float *pdf)
+{
+ int label;
+ Spectrum eval = zero_spectrum();
+
+ *pdf = 0.0f;
+ label = volume_phase_sample(sd, sc, rand_phase.x, rand_phase.y, &eval, omega_in, pdf);
+
+ if (*pdf != 0.0f)
+ bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
+
+ return label;
+}
+
+/* Motion Blur */
+
+# ifdef __OBJECT_MOTION__
+ccl_device_inline void volume_shader_motion_blur(KernelGlobals kg,
+ ccl_private ShaderData *ccl_restrict sd)
+{
+ if ((sd->object_flag & SD_OBJECT_HAS_VOLUME_MOTION) == 0) {
+ return;
+ }
+
+ AttributeDescriptor v_desc = find_attribute(kg, sd, ATTR_STD_VOLUME_VELOCITY);
+ kernel_assert(v_desc.offset != ATTR_STD_NOT_FOUND);
+
+ const float3 P = sd->P;
+ const float velocity_scale = kernel_data_fetch(objects, sd->object).velocity_scale;
+ const float time_offset = kernel_data.cam.motion_position == MOTION_POSITION_CENTER ? 0.5f :
+ 0.0f;
+ const float time = kernel_data.cam.motion_position == MOTION_POSITION_END ?
+ (1.0f - kernel_data.cam.shuttertime) + sd->time :
+ sd->time;
+
+ /* Use a 1st order semi-lagrangian advection scheme to estimate what volume quantity
+ * existed, or will exist, at the given time:
+ *
+ * `phi(x, T) = phi(x - (T - t) * u(x, T), t)`
+ *
+ * where
+ *
+ * x : position
+ * T : super-sampled time (or ray time)
+ * t : current time of the simulation (in rendering we assume this is center frame with
+ * relative time = 0)
+ * phi : the volume quantity
+ * u : the velocity field
+ *
+ * But first we need to determine the velocity field `u(x, T)`, which we can estimate also
+ * using semi-lagrangian advection.
+ *
+ * `u(x, T) = u(x - (T - t) * u(x, T), t)`
+ *
+ * This is the typical way to model self-advection in fluid dynamics, however, we do not
+ * account for other forces affecting the velocity during simulation (pressure, buoyancy,
+ * etc.): this gives a linear interpolation when fluid are mostly "curvy". For better
+ * results, a higher order interpolation scheme can be used (at the cost of more lookups),
+ * or an interpolation of the velocity fields for the previous and next frames could also
+ * be used to estimate `u(x, T)` (which will cost more memory and lookups).
+ *
+ * References:
+ * "Eulerian Motion Blur", Kim and Ko, 2007
+ * "Production Volume Rendering", Wreninge et al., 2012
+ */
+
+ /* Find velocity. */
+ float3 velocity = primitive_volume_attribute_float3(kg, sd, v_desc);
+ object_dir_transform(kg, sd, &velocity);
+
+ /* Find advected P. */
+ sd->P = P - (time - time_offset) * velocity_scale * velocity;
+
+ /* Find advected velocity. */
+ velocity = primitive_volume_attribute_float3(kg, sd, v_desc);
+ object_dir_transform(kg, sd, &velocity);
+
+ /* Find advected P. */
+ sd->P = P - (time - time_offset) * velocity_scale * velocity;
+}
+# endif
+
+/* Volume Evaluation */
+
+template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState>
+ccl_device_inline void volume_shader_eval(KernelGlobals kg,
+ ConstIntegratorGenericState state,
+ ccl_private ShaderData *ccl_restrict sd,
+ const uint32_t path_flag,
+ StackReadOp stack_read)
+{
+ /* If path is being terminated, we are tracing a shadow ray or evaluating
+ * emission, then we don't need to store closures. The emission and shadow
+ * shader data also do not have a closure array to save GPU memory. */
+ int max_closures;
+ if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
+ max_closures = 0;
+ }
+ else {
+ max_closures = kernel_data.max_closures;
+ }
+
+ /* reset closures once at the start, we will be accumulating the closures
+ * for all volumes in the stack into a single array of closures */
+ sd->num_closure = 0;
+ sd->num_closure_left = max_closures;
+ sd->flag = 0;
+ sd->object_flag = 0;
+
+ for (int i = 0;; i++) {
+ const VolumeStack entry = stack_read(i);
+ if (entry.shader == SHADER_NONE) {
+ break;
+ }
+
+ /* Setup shader-data from stack. it's mostly setup already in
+ * shader_setup_from_volume, this switching should be quick. */
+ sd->object = entry.object;
+ sd->lamp = LAMP_NONE;
+ sd->shader = entry.shader;
+
+ sd->flag &= ~SD_SHADER_FLAGS;
+ sd->flag |= kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).flags;
+ sd->object_flag &= ~SD_OBJECT_FLAGS;
+
+ if (sd->object != OBJECT_NONE) {
+ sd->object_flag |= kernel_data_fetch(object_flag, sd->object);
+
+# ifdef __OBJECT_MOTION__
+ /* todo: this is inefficient for motion blur, we should be
+ * caching matrices instead of recomputing them each step */
+ shader_setup_object_transforms(kg, sd, sd->time);
+
+ volume_shader_motion_blur(kg, sd);
+# endif
+ }
+
+ /* evaluate shader */
+# ifdef __SVM__
+# ifdef __OSL__
+ if (kg->osl) {
+ OSLShader::eval_volume(kg, state, sd, path_flag);
+ }
+ else
+# endif
+ {
+ svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_VOLUME, SHADER_TYPE_VOLUME>(
+ kg, state, sd, NULL, path_flag);
+ }
+# endif
+
+ /* Merge closures to avoid exceeding number of closures limit. */
+ if (!shadow) {
+ if (i > 0) {
+ volume_shader_merge_closures(sd);
+ }
+ }
+ }
+}
+
+#endif /* __VOLUME__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/volume_stack.h b/intern/cycles/kernel/integrator/volume_stack.h
index 5256349a0cc..675e1927fc0 100644
--- a/intern/cycles/kernel/integrator/volume_stack.h
+++ b/intern/cycles/kernel/integrator/volume_stack.h
@@ -39,7 +39,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg,
break;
}
- if (entry.object == sd->object) {
+ if (entry.object == sd->object && entry.shader == sd->shader) {
/* Shift back next stack entries. */
do {
entry = stack_read(i + 1);
@@ -61,7 +61,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg,
}
/* Already in the stack? then we have nothing to do. */
- if (entry.object == sd->object) {
+ if (entry.object == sd->object && entry.shader == sd->shader) {
return;
}
}
@@ -133,7 +133,7 @@ ccl_device float volume_stack_step_size(KernelGlobals kg, StackReadOp stack_read
break;
}
- int shader_flag = kernel_tex_fetch(__shaders, (entry.shader & SHADER_MASK)).flags;
+ int shader_flag = kernel_data_fetch(shaders, (entry.shader & SHADER_MASK)).flags;
bool heterogeneous = false;
@@ -146,7 +146,7 @@ ccl_device float volume_stack_step_size(KernelGlobals kg, StackReadOp stack_read
* heterogeneous volume objects may be using the same shader. */
int object = entry.object;
if (object != OBJECT_NONE) {
- int object_flag = kernel_tex_fetch(__object_flag, object);
+ int object_flag = kernel_data_fetch(object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) {
heterogeneous = true;
}
@@ -180,7 +180,7 @@ ccl_device VolumeSampleMethod volume_stack_sample_method(KernelGlobals kg, Integ
break;
}
- int shader_flag = kernel_tex_fetch(__shaders, (entry.shader & SHADER_MASK)).flags;
+ int shader_flag = kernel_data_fetch(shaders, (entry.shader & SHADER_MASK)).flags;
if (shader_flag & SD_VOLUME_MIS) {
/* Multiple importance sampling. */