diff options
Diffstat (limited to 'intern/cycles/kernel/integrator')
27 files changed, 1847 insertions, 1781 deletions
diff --git a/intern/cycles/kernel/integrator/displacement_shader.h b/intern/cycles/kernel/integrator/displacement_shader.h new file mode 100644 index 00000000000..71a0f56fb3e --- /dev/null +++ b/intern/cycles/kernel/integrator/displacement_shader.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +/* Functions to evaluate displacement shader. */ + +#pragma once + +#include "kernel/svm/svm.h" + +#ifdef __OSL__ +# include "kernel/osl/shader.h" +#endif + +CCL_NAMESPACE_BEGIN + +template<typename ConstIntegratorGenericState> +ccl_device void displacement_shader_eval(KernelGlobals kg, + ConstIntegratorGenericState state, + ccl_private ShaderData *sd) +{ + sd->num_closure = 0; + sd->num_closure_left = 0; + + /* this will modify sd->P */ +#ifdef __SVM__ +# ifdef __OSL__ + if (kg->osl) + OSLShader::eval_displacement(kg, state, sd); + else +# endif + { + svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_DISPLACEMENT, SHADER_TYPE_DISPLACEMENT>( + kg, state, sd, NULL, 0); + } +#endif +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h index 0db4241b6e3..eca2c0b9ffb 100644 --- a/intern/cycles/kernel/integrator/init_from_bake.h +++ b/intern/cycles/kernel/integrator/init_from_bake.h @@ -5,8 +5,8 @@ #include "kernel/camera/camera.h" -#include "kernel/film/accumulate.h" #include "kernel/film/adaptive_sampling.h" +#include "kernel/film/light_passes.h" #include "kernel/integrator/path_state.h" @@ -49,7 +49,8 @@ ccl_device const float2 bake_offset_towards_center(KernelGlobals kg, const float3 to_center = center - P; const float3 offset_P = P + normalize(to_center) * - min(len(to_center), max(max3(fabs(P)), 1.0f) * position_offset); + min(len(to_center), + max(reduce_max(fabs(P)), 1.0f) * position_offset); /* Compute barycentric coordinates at new position. */ const float3 v1 = tri_verts[1] - tri_verts[0]; @@ -91,12 +92,12 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, path_state_init(state, tile, x, y); /* Check whether the pixel has converged and should not be sampled anymore. */ - if (!kernel_need_sample_pixel(kg, state, render_buffer)) { + if (!film_need_sample_pixel(kg, state, render_buffer)) { return false; } /* Always count the sample, even if the camera sample will reject the ray. */ - const int sample = kernel_accum_sample( + const int sample = film_write_sample( kg, state, render_buffer, scheduled_sample, tile->sample_offset); /* Setup render buffers. */ @@ -111,8 +112,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, int prim = __float_as_uint(primitive[1]); if (prim == -1) { /* Accumulate transparency for empty pixels. */ - kernel_accum_transparent(kg, state, 0, 1.0f, buffer); - return false; + film_write_transparent(kg, state, 0, 1.0f, buffer); + return true; } prim += kernel_data.bake.tri_offset; @@ -120,13 +121,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, /* Random number generator. */ const uint rng_hash = hash_uint(seed) ^ kernel_data.integrator.seed; - float filter_x, filter_y; - if (sample == 0) { - filter_x = filter_y = 0.5f; - } - else { - path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_x, &filter_y); - } + const float2 rand_filter = (sample == 0) ? make_float2(0.5f, 0.5f) : + path_rng_2D(kg, rng_hash, sample, PRNG_FILTER); /* Initialize path state for path integration. */ path_state_init_integrator(kg, state, sample, rng_hash); @@ -149,18 +145,24 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, /* Sub-pixel offset. */ if (sample > 0) { - u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f); - v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f), + u = bake_clamp_mirror_repeat(u + dudx * (rand_filter.x - 0.5f) + dudy * (rand_filter.y - 0.5f), + 1.0f); + v = bake_clamp_mirror_repeat(v + dvdx * (rand_filter.x - 0.5f) + dvdy * (rand_filter.y - 0.5f), 1.0f - u); } + /* Convert from Blender to Cycles/Embree/OptiX barycentric convention. */ + const float tmp = u; + u = v; + v = 1.0f - tmp - v; + /* Position and normal on triangle. */ const int object = kernel_data.bake.object_index; float3 P, Ng; int shader; triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader); - const int object_flag = kernel_tex_fetch(__object_flag, object); + const int object_flag = kernel_data_fetch(object_flag, object); if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); P = transform_point_auto(&tfm, P); @@ -173,14 +175,15 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, Ray ray ccl_optional_struct_init; ray.P = zero_float3(); ray.D = normalize(P); - ray.t = FLT_MAX; + ray.tmin = 0.0f; + ray.tmax = FLT_MAX; ray.time = 0.5f; ray.dP = differential_zero_compact(); ray.dD = differential_zero_compact(); integrator_state_write_ray(kg, state, &ray); /* Setup next kernel to execute. */ - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); } else { /* Surface baking. */ @@ -193,15 +196,15 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, } const int shader_index = shader & SHADER_MASK; - const int shader_flags = kernel_tex_fetch(__shaders, shader_index).flags; + const int shader_flags = kernel_data_fetch(shaders, shader_index).flags; /* Fast path for position and normal passes not affected by shaders. */ if (kernel_data.film.pass_position != PASS_UNUSED) { - kernel_write_pass_float3(buffer + kernel_data.film.pass_position, P); + film_write_pass_float3(buffer + kernel_data.film.pass_position, P); return true; } else if (kernel_data.film.pass_normal != PASS_UNUSED && !(shader_flags & SD_HAS_BUMP)) { - kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, N); + film_write_pass_float3(buffer + kernel_data.film.pass_normal, N); return true; } @@ -209,7 +212,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, Ray ray ccl_optional_struct_init; ray.P = P + N; ray.D = -N; - ray.t = FLT_MAX; + ray.tmin = 0.0f; + ray.tmax = FLT_MAX; ray.time = 0.5f; /* Setup differentials. */ @@ -246,13 +250,15 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index); + integrator_path_init_sorted( + kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index); + integrator_path_init_sorted( + kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index); } else { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index); + integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index); } } diff --git a/intern/cycles/kernel/integrator/init_from_camera.h b/intern/cycles/kernel/integrator/init_from_camera.h index 9fe27cdda9a..8df3e1b9fb3 100644 --- a/intern/cycles/kernel/integrator/init_from_camera.h +++ b/intern/cycles/kernel/integrator/init_from_camera.h @@ -5,8 +5,8 @@ #include "kernel/camera/camera.h" -#include "kernel/film/accumulate.h" #include "kernel/film/adaptive_sampling.h" +#include "kernel/film/light_passes.h" #include "kernel/integrator/path_state.h" #include "kernel/integrator/shadow_catcher.h" @@ -23,31 +23,21 @@ ccl_device_inline void integrate_camera_sample(KernelGlobals kg, ccl_private Ray *ray) { /* Filter sampling. */ - float filter_u, filter_v; - - if (sample == 0) { - filter_u = 0.5f; - filter_v = 0.5f; - } - else { - path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_u, &filter_v); - } + const float2 rand_filter = (sample == 0) ? make_float2(0.5f, 0.5f) : + path_rng_2D(kg, rng_hash, sample, PRNG_FILTER); /* Depth of field sampling. */ - float lens_u = 0.0f, lens_v = 0.0f; - if (kernel_data.cam.aperturesize > 0.0f) { - path_rng_2D(kg, rng_hash, sample, PRNG_LENS_U, &lens_u, &lens_v); - } + const float2 rand_lens = (kernel_data.cam.aperturesize > 0.0f) ? + path_rng_2D(kg, rng_hash, sample, PRNG_LENS) : + zero_float2(); /* Motion blur time sampling. */ - float time = 0.0f; -#ifdef __CAMERA_MOTION__ - if (kernel_data.cam.shuttertime != -1.0f) - time = path_rng_1D(kg, rng_hash, sample, PRNG_TIME); -#endif + const float rand_time = (kernel_data.cam.shuttertime != -1.0f) ? + path_rng_1D(kg, rng_hash, sample, PRNG_TIME) : + 0.0f; /* Generate camera ray. */ - camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray); + camera_sample(kg, x, y, rand_filter.x, rand_filter.y, rand_lens.x, rand_lens.y, rand_time, ray); } /* Return false to indicate that this pixel is finished. @@ -67,7 +57,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg, path_state_init(state, tile, x, y); /* Check whether the pixel has converged and should not be sampled anymore. */ - if (!kernel_need_sample_pixel(kg, state, render_buffer)) { + if (!film_need_sample_pixel(kg, state, render_buffer)) { return false; } @@ -76,7 +66,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg, * This logic allows to both count actual number of samples per pixel, and to add samples to this * pixel after it was converged and samples were added somewhere else (in which case the * `scheduled_sample` will be different from actual number of samples in this pixel). */ - const int sample = kernel_accum_sample( + const int sample = film_write_sample( kg, state, render_buffer, scheduled_sample, tile->sample_offset); /* Initialize random number seed for path. */ @@ -86,7 +76,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg, /* Generate camera ray. */ Ray ray; integrate_camera_sample(kg, sample, x, y, rng_hash, &ray); - if (ray.t == 0.0f) { + if (ray.tmax == 0.0f) { return true; } @@ -100,10 +90,10 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg, /* Continue with intersect_closest kernel, optionally initializing volume * stack before that if the camera may be inside a volume. */ if (kernel_data.cam.is_inside_volume) { - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); } else { - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); } return true; diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h index 2dfac44b414..4ecff56a3fd 100644 --- a/intern/cycles/kernel/integrator/intersect_closest.h +++ b/intern/cycles/kernel/integrator/intersect_closest.h @@ -5,6 +5,8 @@ #include "kernel/camera/projection.h" +#include "kernel/film/light_passes.h" + #include "kernel/integrator/path_state.h" #include "kernel/integrator/shadow_catcher.h" @@ -87,7 +89,7 @@ ccl_device_forceinline void integrator_split_shadow_catcher( return; } - kernel_write_shadow_catcher_bounce_data(kg, state, render_buffer); + film_write_shadow_catcher_bounce_data(kg, state, render_buffer); /* Mark state as having done a shadow catcher split so that it stops contributing to * the shadow catcher matte pass, but keeps contributing to the combined pass. */ @@ -109,37 +111,38 @@ ccl_device_forceinline void integrator_split_shadow_catcher( /* If using background pass, schedule background shading kernel so that we have a background * to alpha-over on. The background kernel will then continue the path afterwards. */ INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND; - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); return; } if (!integrator_state_volume_stack_is_empty(kg, state)) { /* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher * objects from it, and then continue shading volume and shadow catcher surface after. */ - INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); + integrator_path_init(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); return; } /* Continue with shading shadow catcher surface. */ const int shader = intersection_get_shader(kg, isect); - const int flags = kernel_tex_fetch(__shaders, shader).flags; + const int flags = kernel_data_fetch(shaders, shader).flags; const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + integrator_path_init_sorted( + kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + integrator_path_init_sorted(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } } /* Schedule next kernel to be executed after updating volume stack for shadow catcher. */ -template<uint32_t current_kernel> +template<DeviceKernel current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_volume( KernelGlobals kg, IntegratorState state) { @@ -149,27 +152,28 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche integrator_state_read_isect(kg, state, &isect); const int shader = intersection_get_shader(kg, &isect); - const int flags = kernel_tex_fetch(__shaders, shader).flags; + const int flags = kernel_data_fetch(shaders, shader).flags; const int object_flags = intersection_get_object_flags(kg, &isect); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } } /* Schedule next kernel to be executed after executing background shader for shadow catcher. */ -template<uint32_t current_kernel> +template<DeviceKernel current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catcher_background( KernelGlobals kg, IntegratorState state) { @@ -177,7 +181,8 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche if (!integrator_state_volume_stack_is_empty(kg, state)) { /* Volume stack is not empty. Re-init the volume stack to exclude any non-shadow catcher * objects from it, and then continue shading volume and shadow catcher surface after. */ - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); + integrator_path_next( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); return; } @@ -190,7 +195,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche * * Note that current_kernel is a template value since making this a variable * leads to poor performance with CUDA atomics. */ -template<uint32_t current_kernel> +template<DeviceKernel current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel( KernelGlobals kg, IntegratorState state, @@ -203,13 +208,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel( if (!integrator_state_volume_stack_is_empty(kg, state)) { const bool hit_surface = hit && !(isect->type & PRIMITIVE_LAMP); const int shader = (hit_surface) ? intersection_get_shader(kg, isect) : SHADER_NONE; - const int flags = (hit_surface) ? kernel_tex_fetch(__shaders, shader).flags : 0; + const int flags = (hit_surface) ? kernel_data_fetch(shaders, shader).flags : 0; if (!integrator_intersect_terminate(kg, state, flags)) { - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); } else { - INTEGRATOR_PATH_TERMINATE(current_kernel); + integrator_path_terminate(kg, state, current_kernel); } return; } @@ -218,12 +223,12 @@ ccl_device_forceinline void integrator_intersect_next_kernel( if (hit) { /* Hit a surface, continue with light or surface kernel. */ if (isect->type & PRIMITIVE_LAMP) { - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); } else { /* Hit a surface, continue with surface kernel unless terminated. */ const int shader = intersection_get_shader(kg, isect); - const int flags = kernel_tex_fetch(__shaders, shader).flags; + const int flags = kernel_data_fetch(shaders, shader).flags; if (!integrator_intersect_terminate(kg, state, flags)) { const int object_flags = intersection_get_object_flags(kg, isect); @@ -231,16 +236,16 @@ ccl_device_forceinline void integrator_intersect_next_kernel( (object_flags & SD_OBJECT_CAUSTICS); const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } #ifdef __SHADOW_CATCHER__ @@ -249,13 +254,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel( #endif } else { - INTEGRATOR_PATH_TERMINATE(current_kernel); + integrator_path_terminate(kg, state, current_kernel); } } } else { /* Nothing hit, continue with background kernel. */ - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); } } @@ -263,7 +268,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel( * * The logic here matches integrator_intersect_next_kernel, except that * volume shading and termination testing have already been done. */ -template<uint32_t current_kernel> +template<DeviceKernel current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( KernelGlobals kg, IntegratorState state, @@ -273,29 +278,29 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( if (isect->prim != PRIM_NONE) { /* Hit a surface, continue with light or surface kernel. */ if (isect->type & PRIMITIVE_LAMP) { - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); return; } else { /* Hit a surface, continue with surface kernel unless terminated. */ const int shader = intersection_get_shader(kg, isect); - const int flags = kernel_tex_fetch(__shaders, shader).flags; + const int flags = kernel_data_fetch(shaders, shader).flags; const int object_flags = intersection_get_object_flags(kg, isect); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_NEXT_SORTED( - current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); + integrator_path_next_sorted( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } #ifdef __SHADOW_CATCHER__ @@ -307,7 +312,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( } else { /* Nothing hit, continue with background kernel. */ - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); return; } } @@ -321,7 +326,7 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, /* Read ray from integrator state into local memory. */ Ray ray ccl_optional_struct_init; integrator_state_read_ray(kg, state, &ray); - kernel_assert(ray.t != 0.0f); + kernel_assert(ray.tmax != 0.0f); const uint visibility = path_state_ray_visibility(state); const int last_isect_prim = INTEGRATOR_STATE(state, isect, prim); @@ -329,12 +334,12 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, /* Trick to use short AO rays to approximate indirect light at the end of the path. */ if (path_state_ao_bounce(kg, state)) { - ray.t = kernel_data.integrator.ao_bounces_distance; + ray.tmax = kernel_data.integrator.ao_bounces_distance; if (last_isect_object != OBJECT_NONE) { - const float object_ao_distance = kernel_tex_fetch(__objects, last_isect_object).ao_distance; + const float object_ao_distance = kernel_data_fetch(objects, last_isect_object).ao_distance; if (object_ao_distance != 0.0f) { - ray.t = object_ao_distance; + ray.tmax = object_ao_distance; } } } @@ -366,7 +371,7 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, bool from_caustic_caster = false; bool from_caustic_receiver = false; if (!(path_flag & PATH_RAY_CAMERA) && last_isect_object != OBJECT_NONE) { - const int object_flags = kernel_tex_fetch(__object_flag, last_isect_object); + const int object_flags = kernel_data_fetch(object_flag, last_isect_object); from_caustic_receiver = (object_flags & SD_OBJECT_CAUSTICS_RECEIVER); from_caustic_caster = (object_flags & SD_OBJECT_CAUSTICS_CASTER); } diff --git a/intern/cycles/kernel/integrator/intersect_shadow.h b/intern/cycles/kernel/integrator/intersect_shadow.h index 3e746998225..25ff3d5b23f 100644 --- a/intern/cycles/kernel/integrator/intersect_shadow.h +++ b/intern/cycles/kernel/integrator/intersect_shadow.h @@ -51,7 +51,7 @@ ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals k } #ifdef __TRANSPARENT_SHADOWS__ -# if defined(__KERNEL_CPU__) +# ifndef __KERNEL_GPU__ ccl_device int shadow_intersections_compare(const void *a, const void *b) { const Intersection *isect_a = (const Intersection *)a; @@ -162,7 +162,7 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt if (opaque_hit) { /* Hit an opaque surface, shadow path ends here. */ - INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); + integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); return; } else { @@ -171,7 +171,9 @@ ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowSt * * TODO: could also write to render buffer directly if no transparent shadows? * Could save a kernel execution for the common case. */ - INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, + integrator_shadow_path_next(kg, + state, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); return; } diff --git a/intern/cycles/kernel/integrator/intersect_subsurface.h b/intern/cycles/kernel/integrator/intersect_subsurface.h index 0a2c4ad680d..f439d6905a0 100644 --- a/intern/cycles/kernel/integrator/intersect_subsurface.h +++ b/intern/cycles/kernel/integrator/intersect_subsurface.h @@ -17,7 +17,7 @@ ccl_device void integrator_intersect_subsurface(KernelGlobals kg, IntegratorStat } #endif - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); + integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/intersect_volume_stack.h b/intern/cycles/kernel/integrator/intersect_volume_stack.h index 49ef01dc870..c2490581e4d 100644 --- a/intern/cycles/kernel/integrator/intersect_volume_stack.h +++ b/intern/cycles/kernel/integrator/intersect_volume_stack.h @@ -5,7 +5,6 @@ #include "kernel/bvh/bvh.h" #include "kernel/geom/geom.h" -#include "kernel/integrator/shader_eval.h" #include "kernel/integrator/volume_stack.h" CCL_NAMESPACE_BEGIN @@ -24,7 +23,8 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, Ray volume_ray ccl_optional_struct_init; volume_ray.P = from_P; - volume_ray.D = normalize_len(to_P - from_P, &volume_ray.t); + volume_ray.D = normalize_len(to_P - from_P, &volume_ray.tmax); + volume_ray.tmin = 0.0f; volume_ray.self.object = INTEGRATOR_STATE(state, isect, object); volume_ray.self.prim = INTEGRATOR_STATE(state, isect, prim); volume_ray.self.light_object = OBJECT_NONE; @@ -37,8 +37,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, #ifdef __VOLUME_RECORD_ALL__ Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1]; - uint num_hits = scene_intersect_volume_all( - kg, &volume_ray, hits, 2 * volume_stack_size, visibility); + uint num_hits = scene_intersect_volume(kg, &volume_ray, hits, 2 * volume_stack_size, visibility); if (num_hits > 0) { Intersection *isect = hits; @@ -58,12 +57,9 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg, volume_stack_enter_exit(kg, state, stack_sd); /* Move ray forward. */ - volume_ray.P = stack_sd->P; + volume_ray.tmin = intersection_t_offset(isect.t); volume_ray.self.object = isect.object; volume_ray.self.prim = isect.prim; - if (volume_ray.t != FLT_MAX) { - volume_ray.D = normalize_len(to_P - volume_ray.P, &volume_ray.t); - } ++step; } #endif @@ -82,7 +78,8 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s /* Trace ray in random direction. Any direction works, Z up is a guess to get the * fewest hits. */ volume_ray.D = make_float3(0.0f, 0.0f, 1.0f); - volume_ray.t = FLT_MAX; + volume_ray.tmin = 0.0f; + volume_ray.tmax = FLT_MAX; volume_ray.self.object = OBJECT_NONE; volume_ray.self.prim = PRIM_NONE; volume_ray.self.light_object = OBJECT_NONE; @@ -109,8 +106,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s #ifdef __VOLUME_RECORD_ALL__ Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1]; - uint num_hits = scene_intersect_volume_all( - kg, &volume_ray, hits, 2 * volume_stack_size, visibility); + uint num_hits = scene_intersect_volume(kg, &volume_ray, hits, 2 * volume_stack_size, visibility); if (num_hits > 0) { int enclosed_volumes[MAX_VOLUME_STACK_SIZE]; Intersection *isect = hits; @@ -199,7 +195,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s } /* Move ray forward. */ - volume_ray.P = stack_sd->P; + volume_ray.tmin = intersection_t_offset(isect.t); volume_ray.self.object = isect.object; volume_ray.self.prim = isect.prim; ++step; @@ -222,7 +218,9 @@ ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorSt } else { /* Volume stack init for camera rays, continue with intersection of camera ray. */ - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK, + integrator_path_next(kg, + state, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); } } diff --git a/intern/cycles/kernel/integrator/mnee.h b/intern/cycles/kernel/integrator/mnee.h index ad83f82d091..a0ad7afe591 100644 --- a/intern/cycles/kernel/integrator/mnee.h +++ b/intern/cycles/kernel/integrator/mnee.h @@ -115,7 +115,7 @@ ccl_device_forceinline void mnee_update_light_sample(KernelGlobals kg, { /* correct light sample position/direction and pdf * NOTE: preserve pdf in area measure */ - const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, ls->lamp); + const ccl_global KernelLight *klight = &kernel_data_fetch(lights, ls->lamp); if (ls->type == LIGHT_POINT || ls->type == LIGHT_SPOT) { ls->D = normalize_len(ls->P - P, &ls->t); @@ -137,8 +137,14 @@ ccl_device_forceinline void mnee_update_light_sample(KernelGlobals kg, } } else if (ls->type == LIGHT_AREA) { + float invarea = fabsf(klight->area.invarea); ls->D = normalize_len(ls->P - P, &ls->t); - ls->pdf = fabsf(klight->area.invarea); + ls->pdf = invarea; + if (klight->area.tan_spread > 0.f) { + ls->eval_fac = 0.25f * invarea; + ls->eval_fac *= light_spread_attenuation( + ls->D, ls->Ng, klight->area.tan_spread, klight->area.normalize_spread); + } } ls->pdf *= kernel_data.integrator.pdf_lights; @@ -154,12 +160,12 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg, ccl_private const Intersection *isect, ccl_private ShaderData *sd_vtx) { - sd_vtx->object = (isect->object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, isect->prim) : + sd_vtx->object = (isect->object == OBJECT_NONE) ? kernel_data_fetch(prim_object, isect->prim) : isect->object; sd_vtx->type = isect->type; sd_vtx->flag = 0; - sd_vtx->object_flag = kernel_tex_fetch(__object_flag, sd_vtx->object); + sd_vtx->object_flag = kernel_data_fetch(object_flag, sd_vtx->object); /* Matrices and time. */ shader_setup_object_transforms(kg, sd_vtx, ray->time); @@ -171,7 +177,7 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg, sd_vtx->u = isect->u; sd_vtx->v = isect->v; - sd_vtx->shader = kernel_tex_fetch(__tri_shader, sd_vtx->prim); + sd_vtx->shader = kernel_data_fetch(tri_shader, sd_vtx->prim); float3 verts[3]; float3 normals[3]; @@ -180,7 +186,7 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg, triangle_vertices_and_normals(kg, sd_vtx->prim, verts, normals); /* Compute refined position (same code as in triangle_point_from_uv). */ - sd_vtx->P = isect->u * verts[0] + isect->v * verts[1] + (1.f - isect->u - isect->v) * verts[2]; + sd_vtx->P = (1.f - isect->u - isect->v) * verts[0] + isect->u * verts[1] + isect->v * verts[2]; if (!(sd_vtx->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd_vtx); sd_vtx->P = transform_point(&tfm, sd_vtx->P); @@ -207,8 +213,8 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg, } /* Tangent space (position derivatives) WRT barycentric (u, v). */ - float3 dp_du = verts[0] - verts[2]; - float3 dp_dv = verts[1] - verts[2]; + float3 dp_du = verts[1] - verts[0]; + float3 dp_dv = verts[2] - verts[0]; /* Geometric normal. */ vtx->ng = normalize(cross(dp_du, dp_dv)); @@ -217,16 +223,16 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg, /* Shading normals: Interpolate normals between vertices. */ float n_len; - vtx->n = normalize_len(normals[0] * sd_vtx->u + normals[1] * sd_vtx->v + - normals[2] * (1.0f - sd_vtx->u - sd_vtx->v), + vtx->n = normalize_len(normals[0] * (1.0f - sd_vtx->u - sd_vtx->v) + normals[1] * sd_vtx->u + + normals[2] * sd_vtx->v, &n_len); /* Shading normal derivatives WRT barycentric (u, v) * we calculate the derivative of n = |u*n0 + v*n1 + (1-u-v)*n2| using: * d/du [f(u)/|f(u)|] = [d/du f(u)]/|f(u)| - f(u)/|f(u)|^3 <f(u), d/du f(u)>. */ const float inv_n_len = 1.f / n_len; - float3 dn_du = inv_n_len * (normals[0] - normals[2]); - float3 dn_dv = inv_n_len * (normals[1] - normals[2]); + float3 dn_du = inv_n_len * (normals[1] - normals[0]); + float3 dn_dv = inv_n_len * (normals[2] - normals[0]); dn_du -= vtx->n * dot(vtx->n, dn_du); dn_dv -= vtx->n * dot(vtx->n, dn_dv); @@ -386,7 +392,7 @@ ccl_device_forceinline bool mnee_compute_constraint_derivatives( /* Invert (block) constraint derivative matrix and solve linear system so we can map dh back to dx: * dh / dx = A * dx = inverse(A) x dh - * to use for specular specular manifold walk + * to use for specular manifold walk * (See for example http://faculty.washington.edu/finlayso/ebook/algebraic/advanced/LUtri.htm * for block tridiagonal matrix based linear system solve) */ ccl_device_forceinline bool mnee_solve_matrix_h_to_x(int vertex_count, @@ -436,6 +442,7 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg, projection_ray.self.light_prim = PRIM_NONE; projection_ray.dP = differential_make_compact(sd->dP); projection_ray.dD = differential_zero_compact(); + projection_ray.tmin = 0.0f; projection_ray.time = sd->time; Intersection projection_isect; @@ -499,8 +506,8 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg, projection_ray.self.prim = pv.prim; projection_ray.P = pv.p; } - projection_ray.D = normalize_len(tentative_p - projection_ray.P, &projection_ray.t); - projection_ray.t *= MNEE_PROJECTION_DISTANCE_MULTIPLIER; + projection_ray.D = normalize_len(tentative_p - projection_ray.P, &projection_ray.tmax); + projection_ray.tmax *= MNEE_PROJECTION_DISTANCE_MULTIPLIER; bool projection_success = false; for (int isect_count = 0; isect_count < MNEE_MAX_INTERSECTION_COUNT; isect_count++) { @@ -509,7 +516,7 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg, break; int hit_object = (projection_isect.object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, projection_isect.prim) : + kernel_data_fetch(prim_object, projection_isect.prim) : projection_isect.object; if (hit_object == mv.object) { @@ -519,8 +526,7 @@ ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg, projection_ray.self.object = projection_isect.object; projection_ray.self.prim = projection_isect.prim; - projection_ray.P += projection_isect.t * projection_ray.D; - projection_ray.t -= projection_isect.t; + projection_ray.tmin = intersection_t_offset(projection_isect.t); } if (!projection_success) { reduce_stepsize = true; @@ -628,9 +634,9 @@ mnee_sample_bsdf_dh(ClosureType type, float alpha_x, float alpha_y, float sample * We assume here that the pdf (in half-vector measure) is the same as * the one calculation when sampling the microfacet normals from the * specular chain above: this allows us to simplify the bsdf weight */ -ccl_device_forceinline float3 mnee_eval_bsdf_contribution(ccl_private ShaderClosure *closure, - float3 wi, - float3 wo) +ccl_device_forceinline Spectrum mnee_eval_bsdf_contribution(ccl_private ShaderClosure *closure, + float3 wi, + float3 wo) { ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)closure; @@ -801,7 +807,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, float3 wo = normalize_len(vertices[0].p - sd->P, &wo_len); /* Initialize throughput and evaluate receiver bsdf * |n.wo|. */ - shader_bsdf_eval(kg, sd, wo, false, throughput, ls->shader); + surface_shader_bsdf_eval(kg, sd, wo, false, throughput, ls->shader); /* Update light sample with new position / direct.ion * and keep pdf in vertex area measure */ @@ -829,8 +835,8 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, 1; INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce + vertex_count; - float3 light_eval = light_sample_shader_eval(kg, state, sd_mnee, ls, sd->time); - bsdf_eval_mul3(throughput, light_eval / ls->pdf); + Spectrum light_eval = light_sample_shader_eval(kg, state, sd_mnee, ls, sd->time); + bsdf_eval_mul(throughput, light_eval / ls->pdf); /* Generalized geometry term. */ @@ -852,6 +858,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, Ray probe_ray; probe_ray.self.light_object = ls->object; probe_ray.self.light_prim = ls->prim; + probe_ray.tmin = 0.0f; probe_ray.dP = differential_make_compact(sd->dP); probe_ray.dD = differential_zero_compact(); probe_ray.time = sd->time; @@ -867,13 +874,13 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, ccl_private const ManifoldVertex &v = vertices[vi]; /* Check visibility. */ - probe_ray.D = normalize_len(v.p - probe_ray.P, &probe_ray.t); + probe_ray.D = normalize_len(v.p - probe_ray.P, &probe_ray.tmax); if (scene_intersect(kg, &probe_ray, PATH_RAY_TRANSMIT, &probe_isect)) { int hit_object = (probe_isect.object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, probe_isect.prim) : + kernel_data_fetch(prim_object, probe_isect.prim) : probe_isect.object; /* Test whether the ray hit the appropriate object at its intended location. */ - if (hit_object != v.object || fabsf(probe_ray.t - probe_isect.t) > MNEE_MIN_DISTANCE) + if (hit_object != v.object || fabsf(probe_ray.tmax - probe_isect.t) > MNEE_MIN_DISTANCE) return false; } probe_ray.self.object = v.object; @@ -906,7 +913,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce + 1 + vi; /* Evaluate shader nodes at solution vi. */ - shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( + surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( kg, state, sd_mnee, NULL, PATH_RAY_DIFFUSE, true); /* Set light looking dir. */ @@ -917,8 +924,8 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, /* Evaluate product term inside eq.6 at solution interface. vi * divided by corresponding sampled pdf: * fr(vi)_do / pdf_dh(vi) x |do/dh| x |n.wo / n.h| */ - float3 bsdf_contribution = mnee_eval_bsdf_contribution(v.bsdf, wi, wo); - bsdf_eval_mul3(throughput, bsdf_contribution); + Spectrum bsdf_contribution = mnee_eval_bsdf_contribution(v.bsdf, wi, wo); + bsdf_eval_mul(throughput, bsdf_contribution); } /* Restore original state path bounce info. */ @@ -952,15 +959,16 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg, probe_ray.self.light_object = ls->object; probe_ray.self.light_prim = ls->prim; probe_ray.P = sd->P; + probe_ray.tmin = 0.0f; if (ls->t == FLT_MAX) { /* Distant / env light. */ probe_ray.D = ls->D; - probe_ray.t = ls->t; + probe_ray.tmax = ls->t; } else { /* Other lights, avoid self-intersection. */ probe_ray.D = ls->P - probe_ray.P; - probe_ray.D = normalize_len(probe_ray.D, &probe_ray.t); + probe_ray.D = normalize_len(probe_ray.D, &probe_ray.tmax); } probe_ray.dP = differential_make_compact(sd->dP); probe_ray.dD = differential_zero_compact(); @@ -998,7 +1006,7 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg, return 0; /* Last bool argument is the MNEE flag (for TINY_MAX_CLOSURE cap in kernel_shader.h). */ - shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( + surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( kg, state, sd_mnee, NULL, PATH_RAY_DIFFUSE, true); /* Get and sample refraction bsdf */ @@ -1025,10 +1033,12 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg, float2 h = zero_float2(); if (microfacet_bsdf->alpha_x > 0.f && microfacet_bsdf->alpha_y > 0.f) { /* Sample transmissive microfacet bsdf. */ - float bsdf_u, bsdf_v; - path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); - h = mnee_sample_bsdf_dh( - bsdf->type, microfacet_bsdf->alpha_x, microfacet_bsdf->alpha_y, bsdf_u, bsdf_v); + const float2 bsdf_uv = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF); + h = mnee_sample_bsdf_dh(bsdf->type, + microfacet_bsdf->alpha_x, + microfacet_bsdf->alpha_y, + bsdf_uv.x, + bsdf_uv.y); } /* Setup differential geometry on vertex. */ @@ -1042,9 +1052,7 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg, probe_ray.self.object = probe_isect.object; probe_ray.self.prim = probe_isect.prim; - probe_ray.P += probe_isect.t * probe_ray.D; - if (ls->t != FLT_MAX) - probe_ray.t -= probe_isect.t; + probe_ray.tmin = intersection_t_offset(probe_isect.t); }; /* Mark the manifold walk invalid to keep mollification on by default. */ diff --git a/intern/cycles/kernel/integrator/path_state.h b/intern/cycles/kernel/integrator/path_state.h index ec93ac6d46f..54560905397 100644 --- a/intern/cycles/kernel/integrator/path_state.h +++ b/intern/cycles/kernel/integrator/path_state.h @@ -13,7 +13,7 @@ CCL_NAMESPACE_BEGIN ccl_device_inline void path_state_init_queues(IntegratorState state) { INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ INTEGRATOR_STATE_WRITE(&state->shadow, shadow_path, queued_kernel) = 0; INTEGRATOR_STATE_WRITE(&state->ao, shadow_path, queued_kernel) = 0; #endif @@ -48,14 +48,13 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, path, volume_bounce) = 0; INTEGRATOR_STATE_WRITE(state, path, volume_bounds_bounce) = 0; INTEGRATOR_STATE_WRITE(state, path, rng_hash) = rng_hash; - INTEGRATOR_STATE_WRITE(state, path, rng_offset) = PRNG_BASE_NUM; + INTEGRATOR_STATE_WRITE(state, path, rng_offset) = PRNG_BOUNCE_NUM; INTEGRATOR_STATE_WRITE(state, path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP | PATH_RAY_TRANSPARENT_BACKGROUND; INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = 0.0f; - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f; INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = FLT_MAX; INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = 1.0f; - INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f); + INTEGRATOR_STATE_WRITE(state, path, throughput) = one_spectrum(); #ifdef __MNEE__ INTEGRATOR_STATE_WRITE(state, path, mnee) = 0; @@ -75,7 +74,7 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg, #ifdef __DENOISING_FEATURES__ if (kernel_data.kernel_features & KERNEL_FEATURE_DENOISING) { INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_DENOISING_FEATURES; - INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) = one_float3(); + INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) = one_spectrum(); } #endif } @@ -250,7 +249,7 @@ ccl_device_inline float path_state_continuation_probability(KernelGlobals kg, /* Probabilistic termination: use sqrt() to roughly match typical view * transform and do path termination a bit later on average. */ - return min(sqrtf(max3(fabs(INTEGRATOR_STATE(state, path, throughput)))), 1.0f); + return min(sqrtf(reduce_max(fabs(INTEGRATOR_STATE(state, path, throughput)))), 1.0f); } ccl_device_inline bool path_state_ao_bounce(KernelGlobals kg, ConstIntegratorState state) @@ -299,38 +298,25 @@ ccl_device_inline void shadow_path_state_rng_load(ConstIntegratorShadowState sta ccl_device_inline float path_state_rng_1D(KernelGlobals kg, ccl_private const RNGState *rng_state, - int dimension) + const int dimension) { return path_rng_1D( kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension); } -ccl_device_inline void path_state_rng_2D(KernelGlobals kg, - ccl_private const RNGState *rng_state, - int dimension, - ccl_private float *fx, - ccl_private float *fy) +ccl_device_inline float2 path_state_rng_2D(KernelGlobals kg, + ccl_private const RNGState *rng_state, + const int dimension) { - path_rng_2D( - kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension, fx, fy); -} - -ccl_device_inline float path_state_rng_1D_hash(KernelGlobals kg, - ccl_private const RNGState *rng_state, - uint hash) -{ - /* Use a hash instead of dimension, this is not great but avoids adding - * more dimensions to each bounce which reduces quality of dimensions we - * are already using. */ - return path_rng_1D( - kg, cmj_hash_simple(rng_state->rng_hash, hash), rng_state->sample, rng_state->rng_offset); + return path_rng_2D( + kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension); } ccl_device_inline float path_branched_rng_1D(KernelGlobals kg, ccl_private const RNGState *rng_state, - int branch, - int num_branches, - int dimension) + const int branch, + const int num_branches, + const int dimension) { return path_rng_1D(kg, rng_state->rng_hash, @@ -338,20 +324,16 @@ ccl_device_inline float path_branched_rng_1D(KernelGlobals kg, rng_state->rng_offset + dimension); } -ccl_device_inline void path_branched_rng_2D(KernelGlobals kg, - ccl_private const RNGState *rng_state, - int branch, - int num_branches, - int dimension, - ccl_private float *fx, - ccl_private float *fy) +ccl_device_inline float2 path_branched_rng_2D(KernelGlobals kg, + ccl_private const RNGState *rng_state, + const int branch, + const int num_branches, + const int dimension) { - path_rng_2D(kg, - rng_state->rng_hash, - rng_state->sample * num_branches + branch, - rng_state->rng_offset + dimension, - fx, - fy); + return path_rng_2D(kg, + rng_state->rng_hash, + rng_state->sample * num_branches + branch, + rng_state->rng_offset + dimension); } /* Utility functions to get light termination value, diff --git a/intern/cycles/kernel/integrator/shade_background.h b/intern/cycles/kernel/integrator/shade_background.h index 72ecf67e8a0..30ce0999258 100644 --- a/intern/cycles/kernel/integrator/shade_background.h +++ b/intern/cycles/kernel/integrator/shade_background.h @@ -3,18 +3,19 @@ #pragma once -#include "kernel/film/accumulate.h" -#include "kernel/integrator/shader_eval.h" +#include "kernel/film/light_passes.h" + +#include "kernel/integrator/surface_shader.h" + #include "kernel/light/light.h" #include "kernel/light/sample.h" CCL_NAMESPACE_BEGIN -ccl_device float3 integrator_eval_background_shader(KernelGlobals kg, - IntegratorState state, - ccl_global float *ccl_restrict render_buffer) +ccl_device Spectrum integrator_eval_background_shader(KernelGlobals kg, + IntegratorState state, + ccl_global float *ccl_restrict render_buffer) { -#ifdef __BACKGROUND__ const int shader = kernel_data.background.surface_shader; const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); @@ -26,56 +27,35 @@ ccl_device float3 integrator_eval_background_shader(KernelGlobals kg, ((shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) || ((shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA)) || ((shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER))) - return zero_float3(); + return zero_spectrum(); } /* Use fast constant background color if available. */ - float3 L = zero_float3(); - if (!shader_constant_emission_eval(kg, shader, &L)) { - /* Evaluate background shader. */ - - /* TODO: does aliasing like this break automatic SoA in CUDA? - * Should we instead store closures separate from ShaderData? */ - ShaderDataTinyStorage emission_sd_storage; - ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - - PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP); - shader_setup_from_background(kg, - emission_sd, - INTEGRATOR_STATE(state, ray, P), - INTEGRATOR_STATE(state, ray, D), - INTEGRATOR_STATE(state, ray, time)); - - PROFILING_SHADER(emission_sd->object, emission_sd->shader); - PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL); - shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_BACKGROUND>( - kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION); - - L = shader_background_eval(emission_sd); + Spectrum L = zero_spectrum(); + if (surface_shader_constant_emission(kg, shader, &L)) { + return L; } - /* Background MIS weights. */ -# ifdef __BACKGROUND_MIS__ - /* Check if background light exists or if we should skip pdf. */ - if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) && - kernel_data.background.use_mis) { - const float3 ray_P = INTEGRATOR_STATE(state, ray, P); - const float3 ray_D = INTEGRATOR_STATE(state, ray, D); - const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); - const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t); - - /* multiple importance sampling, get background light pdf for ray - * direction, and compute weight with respect to BSDF pdf */ - const float pdf = background_light_pdf(kg, ray_P - ray_D * mis_ray_t, ray_D); - const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf); - L *= mis_weight; - } -# endif + /* Evaluate background shader. */ - return L; -#else - return make_float3(0.8f, 0.8f, 0.8f); -#endif + /* TODO: does aliasing like this break automatic SoA in CUDA? + * Should we instead store closures separate from ShaderData? */ + ShaderDataTinyStorage emission_sd_storage; + ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); + + PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP); + shader_setup_from_background(kg, + emission_sd, + INTEGRATOR_STATE(state, ray, P), + INTEGRATOR_STATE(state, ray, D), + INTEGRATOR_STATE(state, ray, time)); + + PROFILING_SHADER(emission_sd->object, emission_sd->shader); + PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL); + surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_BACKGROUND>( + kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION); + + return surface_shader_background(emission_sd); } ccl_device_inline void integrate_background(KernelGlobals kg, @@ -107,7 +87,7 @@ ccl_device_inline void integrate_background(KernelGlobals kg, for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) { /* This path should have been resolved with mnee, it will * generate a firefly for small lights since it is improbable. */ - const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp); + const ccl_global KernelLight *klight = &kernel_data_fetch(lights, lamp); if (klight->type == LIGHT_BACKGROUND && klight->use_caustics) { eval_background = false; break; @@ -118,17 +98,37 @@ ccl_device_inline void integrate_background(KernelGlobals kg, #endif /* __MNEE__ */ /* Evaluate background shader. */ - float3 L = (eval_background) ? integrator_eval_background_shader(kg, state, render_buffer) : - zero_float3(); + Spectrum L = zero_spectrum(); + + if (eval_background) { + L = integrator_eval_background_shader(kg, state, render_buffer); + + /* When using the ao bounces approximation, adjust background + * shader intensity with ao factor. */ + if (path_state_ao_bounce(kg, state)) { + L *= kernel_data.integrator.ao_bounces_factor; + } + + /* Background MIS weights. */ + float mis_weight = 1.0f; + /* Check if background light exists or if we should skip pdf. */ + if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) && + kernel_data.background.use_mis) { + const float3 ray_P = INTEGRATOR_STATE(state, ray, P); + const float3 ray_D = INTEGRATOR_STATE(state, ray, D); + const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); + + /* multiple importance sampling, get background light pdf for ray + * direction, and compute weight with respect to BSDF pdf */ + const float pdf = background_light_pdf(kg, ray_P, ray_D); + mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf); + } - /* When using the ao bounces approximation, adjust background - * shader intensity with ao factor. */ - if (path_state_ao_bounce(kg, state)) { - L *= kernel_data.integrator.ao_bounces_factor; + L *= mis_weight; } /* Write to render buffer. */ - kernel_accum_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer); + film_write_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer); } ccl_device_inline void integrate_distant_lights(KernelGlobals kg, @@ -160,7 +160,7 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg, if (INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_CULL_LIGHT_CONNECTION) { /* This path should have been resolved with mnee, it will * generate a firefly for small lights since it is improbable. */ - const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp); + const ccl_global KernelLight *klight = &kernel_data_fetch(lights, lamp); if (klight->use_caustics) return; } @@ -170,24 +170,23 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg, /* TODO: does aliasing like this break automatic SoA in CUDA? */ ShaderDataTinyStorage emission_sd_storage; ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time); + Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time); if (is_zero(light_eval)) { return; } /* MIS weighting. */ + float mis_weight = 1.0f; if (!(path_flag & PATH_RAY_MIS_SKIP)) { /* multiple importance sampling, get regular light pdf, * and compute weight with respect to BSDF pdf */ const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); - const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf); - light_eval *= mis_weight; + mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf); } /* Write to render buffer. */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_accum_emission( - kg, state, throughput * light_eval, render_buffer, kernel_data.background.lightgroup); + film_write_surface_emission( + kg, state, light_eval, mis_weight, render_buffer, kernel_data.background.lightgroup); } } } @@ -213,7 +212,7 @@ ccl_device void integrator_shade_background(KernelGlobals kg, } #endif - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); + integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/shade_light.h b/intern/cycles/kernel/integrator/shade_light.h index be926c78439..a4246f99bbf 100644 --- a/intern/cycles/kernel/integrator/shade_light.h +++ b/intern/cycles/kernel/integrator/shade_light.h @@ -3,8 +3,8 @@ #pragma once -#include "kernel/film/accumulate.h" -#include "kernel/integrator/shader_eval.h" +#include "kernel/film/light_passes.h" +#include "kernel/integrator/surface_shader.h" #include "kernel/light/light.h" #include "kernel/light/sample.h" @@ -22,19 +22,8 @@ ccl_device_inline void integrate_light(KernelGlobals kg, const float3 ray_D = INTEGRATOR_STATE(state, ray, D); const float ray_time = INTEGRATOR_STATE(state, ray, time); - /* Advance ray beyond light. */ - /* TODO: can we make this more numerically robust to avoid reintersecting the - * same light in some cases? Ray should not intersect surface anymore as the - * object and prim ids will prevent self intersection. */ - const float3 new_ray_P = ray_P + ray_D * isect.t; - INTEGRATOR_STATE_WRITE(state, ray, P) = new_ray_P; - INTEGRATOR_STATE_WRITE(state, ray, t) -= isect.t; - - /* Set position to where the BSDF was sampled, for correct MIS PDF. */ - const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t); - ray_P -= ray_D * mis_ray_t; - isect.t += mis_ray_t; - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = isect.t; + /* Advance ray to new start distance. */ + INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(isect.t); LightSample ls ccl_optional_struct_init; const bool use_light_sample = light_sample_from_intersection(kg, &isect, ray_P, ray_D, &ls); @@ -62,12 +51,13 @@ ccl_device_inline void integrate_light(KernelGlobals kg, /* TODO: does aliasing like this break automatic SoA in CUDA? */ ShaderDataTinyStorage emission_sd_storage; ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time); + Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time); if (is_zero(light_eval)) { return; } /* MIS weighting. */ + float mis_weight = 1.0f; if (!(path_flag & PATH_RAY_MIS_SKIP)) { /* multiple importance sampling, get regular light pdf, * and compute weight with respect to BSDF pdf */ @@ -77,8 +67,7 @@ ccl_device_inline void integrate_light(KernelGlobals kg, } /* Write to render buffer. */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_accum_emission(kg, state, throughput * light_eval, render_buffer, ls.group); + film_write_surface_emission(kg, state, light_eval, mis_weight, render_buffer, ls.group); } ccl_device void integrator_shade_light(KernelGlobals kg, @@ -99,11 +88,13 @@ ccl_device void integrator_shade_light(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce; if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) { - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); + integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT); return; } else { - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT, + integrator_path_next(kg, + state, + DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); return; } diff --git a/intern/cycles/kernel/integrator/shade_shadow.h b/intern/cycles/kernel/integrator/shade_shadow.h index 2b929b7b62e..ba18aed6ff0 100644 --- a/intern/cycles/kernel/integrator/shade_shadow.h +++ b/intern/cycles/kernel/integrator/shade_shadow.h @@ -4,7 +4,7 @@ #pragma once #include "kernel/integrator/shade_volume.h" -#include "kernel/integrator/shader_eval.h" +#include "kernel/integrator/surface_shader.h" #include "kernel/integrator/volume_stack.h" CCL_NAMESPACE_BEGIN @@ -15,9 +15,9 @@ ccl_device_inline bool shadow_intersections_has_remaining(const uint num_hits) } #ifdef __TRANSPARENT_SHADOWS__ -ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg, - IntegratorShadowState state, - const int hit) +ccl_device_inline Spectrum integrate_transparent_surface_shadow(KernelGlobals kg, + IntegratorShadowState state, + const int hit) { PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SURFACE); @@ -40,7 +40,7 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg, /* Evaluate shader. */ if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) { - shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( + surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( kg, state, shadow_sd, NULL, PATH_RAY_SHADOW); } @@ -50,7 +50,7 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg, # endif /* Compute transparency from closures. */ - return shader_bsdf_transparency(kg, shadow_sd); + return surface_shader_transparency(kg, shadow_sd); } # ifdef __VOLUME__ @@ -58,7 +58,7 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg, IntegratorShadowState state, const int hit, const int num_recorded_hits, - ccl_private float3 *ccl_restrict + ccl_private Spectrum *ccl_restrict throughput) { PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_VOLUME); @@ -75,13 +75,9 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg, ray.self.light_object = OBJECT_NONE; ray.self.light_prim = PRIM_NONE; /* Modify ray position and length to match current segment. */ - const float start_t = (hit == 0) ? 0.0f : - INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t); - const float end_t = (hit < num_recorded_hits) ? - INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) : - ray.t; - ray.P += start_t * ray.D; - ray.t = end_t - start_t; + ray.tmin = (hit == 0) ? ray.tmin : INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t); + ray.tmax = (hit < num_recorded_hits) ? INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) : + ray.tmax; shader_setup_from_volume(kg, shadow_sd, &ray); @@ -104,7 +100,7 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg, if (hit < num_recorded_hits || !shadow_intersections_has_remaining(num_hits)) { # ifdef __VOLUME__ if (!integrator_state_shadow_volume_stack_is_empty(kg, state)) { - float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput); + Spectrum throughput = INTEGRATOR_STATE(state, shadow_path, throughput); integrate_transparent_volume_shadow(kg, state, hit, num_recorded_hits, &throughput); if (is_zero(throughput)) { return true; @@ -117,8 +113,8 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg, /* Surface shaders. */ if (hit < num_recorded_hits) { - const float3 shadow = integrate_transparent_surface_shadow(kg, state, hit); - const float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput) * shadow; + const Spectrum shadow = integrate_transparent_surface_shadow(kg, state, hit); + const Spectrum throughput = INTEGRATOR_STATE(state, shadow_path, throughput) * shadow; if (is_zero(throughput)) { return true; } @@ -137,10 +133,7 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg, /* There are more hits that we could not recorded due to memory usage, * adjust ray to intersect again from the last hit. */ const float last_hit_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, num_recorded_hits - 1, t); - const float3 ray_P = INTEGRATOR_STATE(state, shadow_ray, P); - const float3 ray_D = INTEGRATOR_STATE(state, shadow_ray, D); - INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray_P + last_hit_t * ray_D; - INTEGRATOR_STATE_WRITE(state, shadow_ray, t) -= last_hit_t; + INTEGRATOR_STATE_WRITE(state, shadow_ray, tmin) = intersection_t_offset(last_hit_t); } return false; @@ -158,20 +151,22 @@ ccl_device void integrator_shade_shadow(KernelGlobals kg, /* Evaluate transparent shadows. */ const bool opaque = integrate_transparent_shadow(kg, state, num_hits); if (opaque) { - INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); + integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); return; } #endif if (shadow_intersections_has_remaining(num_hits)) { /* More intersections to find, continue shadow ray. */ - INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW, + integrator_shadow_path_next(kg, + state, + DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW); return; } else { - kernel_accum_light(kg, state, render_buffer); - INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); + film_write_direct_light(kg, state, render_buffer); + integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW); return; } } diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index ce1398859b7..c19f56a9b70 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -3,14 +3,15 @@ #pragma once -#include "kernel/film/accumulate.h" -#include "kernel/film/passes.h" +#include "kernel/film/data_passes.h" +#include "kernel/film/denoising_passes.h" +#include "kernel/film/light_passes.h" #include "kernel/integrator/mnee.h" #include "kernel/integrator/path_state.h" -#include "kernel/integrator/shader_eval.h" #include "kernel/integrator/subsurface.h" +#include "kernel/integrator/surface_shader.h" #include "kernel/integrator/volume_stack.h" #include "kernel/light/light.h" @@ -31,7 +32,52 @@ ccl_device_forceinline void integrate_surface_shader_setup(KernelGlobals kg, shader_setup_from_ray(kg, sd, &ray, &isect); } -#ifdef __HOLDOUT__ +ccl_device_forceinline float3 integrate_surface_ray_offset(KernelGlobals kg, + const ccl_private ShaderData *sd, + const float3 ray_P, + const float3 ray_D) +{ + /* No ray offset needed for other primitive types. */ + if (!(sd->type & PRIMITIVE_TRIANGLE)) { + return ray_P; + } + + /* Self intersection tests already account for the case where a ray hits the + * same primitive. However precision issues can still cause neighboring + * triangles to be hit. Here we test if the ray-triangle intersection with + * the same primitive would miss, implying that a neighboring triangle would + * be hit instead. + * + * This relies on triangle intersection to be watertight, and the object inverse + * object transform to match the one used by ray intersection exactly. + * + * Potential improvements: + * - It appears this happens when either barycentric coordinates are small, + * or dot(sd->Ng, ray_D) is small. Detect such cases and skip test? + * - Instead of ray offset, can we tweak P to lie within the triangle? + */ + const uint tri_vindex = kernel_data_fetch(tri_vindex, sd->prim).w; + const packed_float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0), + tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1), + tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2); + + float3 local_ray_P = ray_P; + float3 local_ray_D = ray_D; + + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + const Transform itfm = object_get_inverse_transform(kg, sd); + local_ray_P = transform_point(&itfm, local_ray_P); + local_ray_D = transform_direction(&itfm, local_ray_D); + } + + if (ray_triangle_intersect_self(local_ray_P, local_ray_D, tri_a, tri_b, tri_c)) { + return ray_P; + } + else { + return ray_offset(ray_P, sd->Ng); + } +} + ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg, ConstIntegratorState state, ccl_private ShaderData *sd, @@ -42,22 +88,18 @@ ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg, if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) && (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) { - const float3 holdout_weight = shader_holdout_apply(kg, sd); - if (kernel_data.background.transparent) { - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - const float transparent = average(holdout_weight * throughput); - kernel_accum_holdout(kg, state, path_flag, transparent, render_buffer); - } - if (isequal_float3(holdout_weight, one_float3())) { + const Spectrum holdout_weight = surface_shader_apply_holdout(kg, sd); + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + const float transparent = average(holdout_weight * throughput); + film_write_holdout(kg, state, path_flag, transparent, render_buffer); + if (isequal(holdout_weight, one_spectrum())) { return false; } } return true; } -#endif /* __HOLDOUT__ */ -#ifdef __EMISSION__ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg, ConstIntegratorState state, ccl_private const ShaderData *sd, @@ -67,32 +109,29 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg, const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); /* Evaluate emissive closure. */ - float3 L = shader_emissive_eval(sd); + Spectrum L = surface_shader_emission(sd); + float mis_weight = 1.0f; -# ifdef __HAIR__ +#ifdef __HAIR__ if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && (sd->type & PRIMITIVE_TRIANGLE)) -# else +#else if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS)) -# endif +#endif { const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf); - const float t = sd->ray_length + INTEGRATOR_STATE(state, path, mis_ray_t); + const float t = sd->ray_length; /* Multiple importance sampling, get triangle light pdf, * and compute weight with respect to BSDF pdf. */ float pdf = triangle_light_pdf(kg, sd, t); - float mis_weight = light_sample_mis_weight_forward(kg, bsdf_pdf, pdf); - L *= mis_weight; + mis_weight = light_sample_mis_weight_forward(kg, bsdf_pdf, pdf); } - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_accum_emission( - kg, state, throughput * L, render_buffer, object_lightgroup(kg, sd->object)); + film_write_surface_emission( + kg, state, L, mis_weight, render_buffer, object_lightgroup(kg, sd->object)); } -#endif /* __EMISSION__ */ -#ifdef __EMISSION__ /* Path tracing: sample point on light and evaluate light shader, then * queue shadow ray to be traced. */ template<uint node_feature_mask> @@ -111,11 +150,10 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, { const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); const uint bounce = INTEGRATOR_STATE(state, path, bounce); - float light_u, light_v; - path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v); + const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT); if (!light_distribution_sample_from_position( - kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, &ls)) { + kg, rand_light.x, rand_light.y, sd->time, sd->P, bounce, path_flag, &ls)) { return; } } @@ -133,15 +171,15 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, Ray ray ccl_optional_struct_init; BsdfEval bsdf_eval ccl_optional_struct_init; - const bool is_transmission = shader_bsdf_is_transmission(sd, ls.D); + const bool is_transmission = surface_shader_is_transmission(sd, ls.D); -# ifdef __MNEE__ +#ifdef __MNEE__ int mnee_vertex_count = 0; IF_KERNEL_FEATURE(MNEE) { if (ls.lamp != LAMP_NONE) { /* Is this a caustic light? */ - const bool use_caustics = kernel_tex_fetch(__lights, ls.lamp).use_caustics; + const bool use_caustics = kernel_data_fetch(lights, ls.lamp).use_caustics; if (use_caustics) { /* Are we on a caustic caster? */ if (is_transmission && (sd->object_flag & SD_OBJECT_CAUSTICS_CASTER)) @@ -161,16 +199,17 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, light_sample_to_surface_shadow_ray(kg, emission_sd, &ls, &ray); } else -# endif /* __MNEE__ */ +#endif /* __MNEE__ */ { - const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time); + const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time); if (is_zero(light_eval)) { return; } /* Evaluate BSDF. */ - const float bsdf_pdf = shader_bsdf_eval(kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader); - bsdf_eval_mul3(&bsdf_eval, light_eval / ls.pdf); + const float bsdf_pdf = surface_shader_bsdf_eval( + kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader); + bsdf_eval_mul(&bsdf_eval, light_eval / ls.pdf); if (ls.shader & SHADER_USE_MIS) { const float mis_weight = light_sample_mis_weight_nee(kg, ls.pdf, bsdf_pdf); @@ -190,16 +229,20 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, const bool is_light = light_sample_is_light(&ls); /* Branch off shadow kernel. */ - INTEGRATOR_SHADOW_PATH_INIT( - shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow); + IntegratorShadowState shadow_state = integrator_shadow_path_init( + kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false); /* Copy volume stack and enter/exit volume. */ integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state); if (is_transmission) { -# ifdef __VOLUME__ +#ifdef __VOLUME__ shadow_volume_stack_enter_exit(kg, shadow_state, sd); -# endif +#endif + } + + if (ray.self.object != OBJECT_NONE) { + ray.P = integrate_surface_ray_offset(kg, sd, ray.P, ray.D); } /* Write shadow ray and associated state to global memory. */ @@ -213,11 +256,12 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, /* Copy state from main path to shadow path. */ uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag); shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0; - const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * bsdf_eval_sum(&bsdf_eval); + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput) * + bsdf_eval_sum(&bsdf_eval); if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { - packed_float3 pass_diffuse_weight; - packed_float3 pass_glossy_weight; + PackedSpectrum pass_diffuse_weight; + PackedSpectrum pass_glossy_weight; if (shadow_flag & PATH_RAY_ANY_PASS) { /* Indirect bounce, use weights from earlier surface or volume bounce. */ @@ -227,8 +271,8 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, else { /* Direct light, use BSDFs at this bounce. */ shadow_flag |= PATH_RAY_SURFACE_PASS; - pass_diffuse_weight = packed_float3(bsdf_eval_pass_diffuse_weight(&bsdf_eval)); - pass_glossy_weight = packed_float3(bsdf_eval_pass_glossy_weight(&bsdf_eval)); + pass_diffuse_weight = PackedSpectrum(bsdf_eval_pass_diffuse_weight(&bsdf_eval)); + pass_glossy_weight = PackedSpectrum(bsdf_eval_pass_glossy_weight(&bsdf_eval)); } INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight; @@ -250,7 +294,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, glossy_bounce) = INTEGRATOR_STATE( state, path, glossy_bounce); -# ifdef __MNEE__ +#ifdef __MNEE__ if (mnee_vertex_count > 0) { INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) = INTEGRATOR_STATE(state, path, transmission_bounce) + mnee_vertex_count - 1; @@ -262,7 +306,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, bounce) = INTEGRATOR_STATE(state, path, bounce) + mnee_vertex_count; } else -# endif +#endif { INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) = INTEGRATOR_STATE( state, path, transmission_bounce); @@ -284,7 +328,6 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, ls.group + 1 : kernel_data.background.lightgroup + 1; } -#endif /* Path tracing: bounce off or through surface with new direction. */ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( @@ -298,9 +341,8 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( return LABEL_NONE; } - float bsdf_u, bsdf_v; - path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); - ccl_private const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u); + float2 rand_bsdf = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF); + ccl_private const ShaderClosure *sc = surface_shader_bsdf_bssrdf_pick(sd, &rand_bsdf); #ifdef __SUBSURFACE__ /* BSSRDF closure, we schedule subsurface intersection kernel. */ @@ -313,29 +355,33 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( float bsdf_pdf; BsdfEval bsdf_eval ccl_optional_struct_init; float3 bsdf_omega_in ccl_optional_struct_init; - differential3 bsdf_domega_in ccl_optional_struct_init; int label; - label = shader_bsdf_sample_closure( - kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf); + label = surface_shader_bsdf_sample_closure( + kg, sd, sc, rand_bsdf, &bsdf_eval, &bsdf_omega_in, &bsdf_pdf); if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) { return LABEL_NONE; } - /* Setup ray. Note that clipping works through transparent bounces. */ - INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; - INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in); - INTEGRATOR_STATE_WRITE(state, ray, t) = (label & LABEL_TRANSPARENT) ? - INTEGRATOR_STATE(state, ray, t) - sd->ray_length : - FLT_MAX; + if (label & LABEL_TRANSPARENT) { + /* Only need to modify start distance for transparent. */ + INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(sd->ray_length); + } + else { + /* Setup ray with changed origin and direction. */ + const float3 D = normalize(bsdf_omega_in); + INTEGRATOR_STATE_WRITE(state, ray, P) = integrate_surface_ray_offset(kg, sd, sd->P, D); + INTEGRATOR_STATE_WRITE(state, ray, D) = D; + INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f; + INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX; #ifdef __RAY_DIFFERENTIALS__ - INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); - INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in); + INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); #endif + } /* Update throughput. */ - float3 throughput = INTEGRATOR_STATE(state, path, throughput); + Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); throughput *= bsdf_eval_sum(&bsdf_eval) / bsdf_pdf; INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput; @@ -349,12 +395,8 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce( } /* Update path state */ - if (label & LABEL_TRANSPARENT) { - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length; - } - else { + if (!(label & LABEL_TRANSPARENT)) { INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = bsdf_pdf; - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f; INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf( bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf)); } @@ -371,17 +413,8 @@ ccl_device_forceinline int integrate_surface_volume_only_bounce(IntegratorState return LABEL_NONE; } - /* Setup ray position, direction stays unchanged. */ - INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; - - /* Clipping works through transparent. */ - INTEGRATOR_STATE_WRITE(state, ray, t) -= sd->ray_length; - -# ifdef __RAY_DIFFERENTIALS__ - INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); -# endif - - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length; + /* Only modify start distance. */ + INTEGRATOR_STATE_WRITE(state, ray, tmin) = intersection_t_offset(sd->ray_length); return LABEL_TRANSMIT | LABEL_TRANSPARENT; } @@ -416,23 +449,26 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg, return; } - float bsdf_u, bsdf_v; - path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); + const float2 rand_bsdf = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF); float3 ao_N; - const float3 ao_weight = shader_bsdf_ao( + const Spectrum ao_weight = surface_shader_ao( kg, sd, kernel_data.integrator.ao_additive_factor, &ao_N); float3 ao_D; float ao_pdf; - sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); + sample_cos_hemisphere(ao_N, rand_bsdf.x, rand_bsdf.y, &ao_D, &ao_pdf); bool skip_self = true; Ray ray ccl_optional_struct_init; ray.P = shadow_ray_offset(kg, sd, ao_D, &skip_self); ray.D = ao_D; - ray.t = kernel_data.integrator.ao_bounces_distance; + if (skip_self) { + ray.P = integrate_surface_ray_offset(kg, sd, ray.P, ray.D); + } + ray.tmin = 0.0f; + ray.tmax = kernel_data.integrator.ao_bounces_distance; ray.time = sd->time; ray.self.object = (skip_self) ? sd->object : OBJECT_NONE; ray.self.prim = (skip_self) ? sd->prim : PRIM_NONE; @@ -442,7 +478,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg, ray.dD = differential_zero_compact(); /* Branch off shadow kernel. */ - INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, ao); + IntegratorShadowState shadow_state = integrator_shadow_path_init( + kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, true); /* Copy volume stack and enter/exit volume. */ integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state); @@ -458,7 +495,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg, const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce); const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce); uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag) | PATH_RAY_SHADOW_FOR_AO; - const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * shader_bsdf_alpha(kg, sd); + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput) * + surface_shader_alpha(kg, sd); INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE( state, path, render_pixel_index); @@ -507,7 +545,7 @@ ccl_device bool integrate_surface(KernelGlobals kg, { /* Evaluate shader. */ PROFILING_EVENT(PROFILING_SHADE_SURFACE_EVAL); - shader_eval_surface<node_feature_mask>(kg, state, &sd, render_buffer, path_flag); + surface_shader_eval<node_feature_mask>(kg, state, &sd, render_buffer, path_flag); /* Initialize additional RNG for BSDFs. */ if (sd.flag & SD_BSDF_NEEDS_LCG) { @@ -529,21 +567,17 @@ ccl_device bool integrate_surface(KernelGlobals kg, #endif { /* Filter closures. */ - shader_prepare_surface_closures(kg, state, &sd, path_flag); + surface_shader_prepare_closures(kg, state, &sd, path_flag); -#ifdef __HOLDOUT__ /* Evaluate holdout. */ if (!integrate_surface_holdout(kg, state, &sd, render_buffer)) { return false; } -#endif -#ifdef __EMISSION__ /* Write emission. */ if (sd.flag & SD_EMISSION) { integrate_surface_emission(kg, state, &sd, render_buffer); } -#endif /* Perform path termination. Most paths have already been terminated in * the intersect_closest kernel, this is just for emission and for dividing @@ -557,11 +591,11 @@ ccl_device bool integrate_surface(KernelGlobals kg, /* Write render passes. */ #ifdef __PASSES__ PROFILING_EVENT(PROFILING_SHADE_SURFACE_PASSES); - kernel_write_data_passes(kg, state, &sd, render_buffer); + film_write_data_passes(kg, state, &sd, render_buffer); #endif #ifdef __DENOISING_FEATURES__ - kernel_write_denoising_features_surface(kg, state, &sd, render_buffer); + film_write_denoising_features_surface(kg, state, &sd, render_buffer); #endif } @@ -604,22 +638,23 @@ ccl_device bool integrate_surface(KernelGlobals kg, } template<uint node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE, - int current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE> + DeviceKernel current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE> ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer) { if (integrate_surface<node_feature_mask>(kg, state, render_buffer)) { if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE) { - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); + integrator_path_next( + kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE); } else { - kernel_assert(INTEGRATOR_STATE(state, ray, t) != 0.0f); - INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); + kernel_assert(INTEGRATOR_STATE(state, ray, tmax) != 0.0f); + integrator_path_next(kg, state, current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); } } else { - INTEGRATOR_PATH_TERMINATE(current_kernel); + integrator_path_terminate(kg, state, current_kernel); } } diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index 4a5015946aa..aaef92729d6 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -3,12 +3,13 @@ #pragma once -#include "kernel/film/accumulate.h" -#include "kernel/film/passes.h" +#include "kernel/film/data_passes.h" +#include "kernel/film/denoising_passes.h" +#include "kernel/film/light_passes.h" #include "kernel/integrator/intersect_closest.h" #include "kernel/integrator/path_state.h" -#include "kernel/integrator/shader_eval.h" +#include "kernel/integrator/volume_shader.h" #include "kernel/integrator/volume_stack.h" #include "kernel/light/light.h" @@ -29,13 +30,13 @@ typedef enum VolumeIntegrateEvent { typedef struct VolumeIntegrateResult { /* Throughput and offset for direct light scattering. */ bool direct_scatter; - float3 direct_throughput; + Spectrum direct_throughput; float direct_t; ShaderVolumePhases direct_phases; /* Throughput and offset for indirect light scattering. */ bool indirect_scatter; - float3 indirect_throughput; + Spectrum indirect_throughput; float indirect_t; ShaderVolumePhases indirect_phases; } VolumeIntegrateResult; @@ -52,19 +53,19 @@ typedef struct VolumeIntegrateResult { * sigma_t = sigma_a + sigma_s */ typedef struct VolumeShaderCoefficients { - float3 sigma_t; - float3 sigma_s; - float3 emission; + Spectrum sigma_t; + Spectrum sigma_s; + Spectrum emission; } VolumeShaderCoefficients; /* Evaluate shader to get extinction coefficient at P. */ ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg, IntegratorShadowState state, ccl_private ShaderData *ccl_restrict sd, - ccl_private float3 *ccl_restrict extinction) + ccl_private Spectrum *ccl_restrict extinction) { VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i)) - shader_eval_volume<true>(kg, state, sd, PATH_RAY_SHADOW, volume_read_lambda_pass); + volume_shader_eval<true>(kg, state, sd, PATH_RAY_SHADOW, volume_read_lambda_pass); if (!(sd->flag & SD_EXTINCTION)) { return false; @@ -83,15 +84,16 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg, { const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i)) - shader_eval_volume<false>(kg, state, sd, path_flag, volume_read_lambda_pass); + volume_shader_eval<false>(kg, state, sd, path_flag, volume_read_lambda_pass); if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) { return false; } - coeff->sigma_s = zero_float3(); - coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction : zero_float3(); - coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background : zero_float3(); + coeff->sigma_s = zero_spectrum(); + coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction : + zero_spectrum(); + coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background : zero_spectrum(); if (sd->flag & SD_SCATTER) { for (int i = 0; i < sd->num_closure; i++) { @@ -114,7 +116,8 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg, ccl_device_forceinline void volume_step_init(KernelGlobals kg, ccl_private const RNGState *rng_state, const float object_step_size, - float t, + const float tmin, + const float tmax, ccl_private float *step_size, ccl_private float *step_shade_offset, ccl_private float *steps_offset, @@ -122,7 +125,7 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg, { if (object_step_size == FLT_MAX) { /* Homogeneous volume. */ - *step_size = t; + *step_size = tmax - tmin; *step_shade_offset = 0.0f; *steps_offset = 1.0f; *max_steps = 1; @@ -130,6 +133,7 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg, else { /* Heterogeneous volume. */ *max_steps = kernel_data.integrator.volume_max_steps; + const float t = tmax - tmin; float step = min(object_step_size, t); /* compute exact steps in advance for malloc */ @@ -141,11 +145,11 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg, /* Perform shading at this offset within a step, to integrate over * over the entire step segment. */ - *step_shade_offset = path_state_rng_1D_hash(kg, rng_state, 0x1e31d8a4); + *step_shade_offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SHADE_OFFSET); /* Shift starting point of all segment by this random amount to avoid * banding artifacts from the volume bounding shape. */ - *steps_offset = path_state_rng_1D_hash(kg, rng_state, 0x3d22c7b3); + *steps_offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_OFFSET); } } @@ -160,12 +164,12 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg, ccl_device void volume_shadow_homogeneous(KernelGlobals kg, IntegratorState state, ccl_private Ray *ccl_restrict ray, ccl_private ShaderData *ccl_restrict sd, - ccl_global float3 *ccl_restrict throughput) + ccl_global Spectrum *ccl_restrict throughput) { - float3 sigma_t = zero_float3(); + Spectrum sigma_t = zero_spectrum(); if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) { - *throughput *= volume_color_transmittance(sigma_t, ray->t); + *throughput *= volume_color_transmittance(sigma_t, ray->tmax - ray->tmin); } } # endif @@ -176,14 +180,14 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, IntegratorShadowState state, ccl_private Ray *ccl_restrict ray, ccl_private ShaderData *ccl_restrict sd, - ccl_private float3 *ccl_restrict throughput, + ccl_private Spectrum *ccl_restrict throughput, const float object_step_size) { /* Load random number state. */ RNGState rng_state; shadow_path_state_rng_load(state, &rng_state); - float3 tp = *throughput; + Spectrum tp = *throughput; /* Prepare for stepping. * For shadows we do not offset all segments, since the starting point is @@ -194,7 +198,8 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, volume_step_init(kg, &rng_state, object_step_size, - ray->t, + ray->tmin, + ray->tmax, &step_size, &step_shade_offset, &unused, @@ -202,17 +207,17 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, const float steps_offset = 1.0f; /* compute extinction at the start */ - float t = 0.0f; + float t = ray->tmin; - float3 sum = zero_float3(); + Spectrum sum = zero_spectrum(); for (int i = 0; i < max_steps; i++) { /* advance to new position */ - float new_t = min(ray->t, (i + steps_offset) * step_size); + float new_t = min(ray->tmax, ray->tmin + (i + steps_offset) * step_size); float dt = new_t - t; float3 new_P = ray->P + ray->D * (t + dt * step_shade_offset); - float3 sigma_t = zero_float3(); + Spectrum sigma_t = zero_spectrum(); /* compute attenuation over segment */ sd->P = new_P; @@ -222,20 +227,19 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, * check then. */ sum += (-sigma_t * dt); if ((i & 0x07) == 0) { /* TODO: Other interval? */ - tp = *throughput * exp3(sum); + tp = *throughput * exp(sum); /* stop if nearly all light is blocked */ - if (tp.x < VOLUME_THROUGHPUT_EPSILON && tp.y < VOLUME_THROUGHPUT_EPSILON && - tp.z < VOLUME_THROUGHPUT_EPSILON) + if (reduce_max(tp) < VOLUME_THROUGHPUT_EPSILON) break; } } /* stop if at the end of the volume */ t = new_t; - if (t == ray->t) { + if (t == ray->tmax) { /* Update throughput in case we haven't done it above */ - tp = *throughput * exp3(sum); + tp = *throughput * exp(sum); break; } } @@ -257,15 +261,16 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r const float xi, ccl_private float *pdf) { - const float t = ray->t; + const float tmin = ray->tmin; + const float tmax = ray->tmax; const float delta = dot((light_P - ray->P), ray->D); const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta); if (UNLIKELY(D == 0.0f)) { *pdf = 0.0f; return 0.0f; } - const float theta_a = -atan2f(delta, D); - const float theta_b = atan2f(t - delta, D); + const float theta_a = atan2f(tmin - delta, D); + const float theta_b = atan2f(tmax - delta, D); const float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a); if (UNLIKELY(theta_b == theta_a)) { *pdf = 0.0f; @@ -273,7 +278,7 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r } *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_)); - return min(t, delta + t_); /* min is only for float precision errors */ + return clamp(delta + t_, tmin, tmax); /* clamp is only for float precision errors */ } ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray, @@ -286,11 +291,12 @@ ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray, return 0.0f; } - const float t = ray->t; + const float tmin = ray->tmin; + const float tmax = ray->tmax; const float t_ = sample_t - delta; - const float theta_a = -atan2f(delta, D); - const float theta_b = atan2f(t - delta, D); + const float theta_a = atan2f(tmin - delta, D); + const float theta_b = atan2f(tmax - delta, D); if (UNLIKELY(theta_b == theta_a)) { return 0.0f; } @@ -310,11 +316,12 @@ ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray, return 0.0f; } - const float t = ray->t; + const float tmin = ray->tmin; + const float tmax = ray->tmax; const float t_ = sample_t - delta; - const float theta_a = -atan2f(delta, D); - const float theta_b = atan2f(t - delta, D); + const float theta_a = atan2f(tmin - delta, D); + const float theta_b = atan2f(tmax - delta, D); if (UNLIKELY(theta_b == theta_a)) { return 0.0f; } @@ -328,22 +335,22 @@ ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray, /* Distance sampling */ ccl_device float volume_distance_sample(float max_t, - float3 sigma_t, + Spectrum sigma_t, int channel, float xi, - ccl_private float3 *transmittance, - ccl_private float3 *pdf) + ccl_private Spectrum *transmittance, + ccl_private Spectrum *pdf) { /* xi is [0, 1[ so log(0) should never happen, division by zero is * avoided because sample_sigma_t > 0 when SD_SCATTER is set */ float sample_sigma_t = volume_channel_get(sigma_t, channel); - float3 full_transmittance = volume_color_transmittance(sigma_t, max_t); + Spectrum full_transmittance = volume_color_transmittance(sigma_t, max_t); float sample_transmittance = volume_channel_get(full_transmittance, channel); float sample_t = min(max_t, -logf(1.0f - xi * (1.0f - sample_transmittance)) / sample_sigma_t); *transmittance = volume_color_transmittance(sigma_t, sample_t); - *pdf = safe_divide_color(sigma_t * *transmittance, one_float3() - full_transmittance); + *pdf = safe_divide_color(sigma_t * *transmittance, one_spectrum() - full_transmittance); /* todo: optimization: when taken together with hit/miss decision, * the full_transmittance cancels out drops out and xi does not @@ -352,33 +359,36 @@ ccl_device float volume_distance_sample(float max_t, return sample_t; } -ccl_device float3 volume_distance_pdf(float max_t, float3 sigma_t, float sample_t) +ccl_device Spectrum volume_distance_pdf(float max_t, Spectrum sigma_t, float sample_t) { - float3 full_transmittance = volume_color_transmittance(sigma_t, max_t); - float3 transmittance = volume_color_transmittance(sigma_t, sample_t); + Spectrum full_transmittance = volume_color_transmittance(sigma_t, max_t); + Spectrum transmittance = volume_color_transmittance(sigma_t, sample_t); - return safe_divide_color(sigma_t * transmittance, one_float3() - full_transmittance); + return safe_divide_color(sigma_t * transmittance, one_spectrum() - full_transmittance); } /* Emission */ -ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients *coeff, - int closure_flag, - float3 transmittance, - float t) +ccl_device Spectrum volume_emission_integrate(ccl_private VolumeShaderCoefficients *coeff, + int closure_flag, + Spectrum transmittance, + float t) { /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t * this goes to E * t as sigma_t goes to zero * * todo: we should use an epsilon to avoid precision issues near zero sigma_t */ - float3 emission = coeff->emission; + Spectrum emission = coeff->emission; if (closure_flag & SD_EXTINCTION) { - float3 sigma_t = coeff->sigma_t; + Spectrum sigma_t = coeff->sigma_t; - emission.x *= (sigma_t.x > 0.0f) ? (1.0f - transmittance.x) / sigma_t.x : t; - emission.y *= (sigma_t.y > 0.0f) ? (1.0f - transmittance.y) / sigma_t.y : t; - emission.z *= (sigma_t.z > 0.0f) ? (1.0f - transmittance.z) / sigma_t.z : t; + FOREACH_SPECTRUM_CHANNEL (i) { + GET_SPECTRUM_CHANNEL(emission, i) *= (GET_SPECTRUM_CHANNEL(sigma_t, i) > 0.0f) ? + (1.0f - GET_SPECTRUM_CHANNEL(transmittance, i)) / + GET_SPECTRUM_CHANNEL(sigma_t, i) : + t; + } } else emission *= t; @@ -390,8 +400,8 @@ ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients typedef struct VolumeIntegrateState { /* Volume segment extents. */ - float start_t; - float end_t; + float tmin; + float tmax; /* If volume is absorption-only up to this point, and no probabilistic * scattering or termination has been used yet. */ @@ -413,27 +423,27 @@ ccl_device_forceinline void volume_integrate_step_scattering( ccl_private const Ray *ray, const float3 equiangular_light_P, ccl_private const VolumeShaderCoefficients &ccl_restrict coeff, - const float3 transmittance, + const Spectrum transmittance, ccl_private VolumeIntegrateState &ccl_restrict vstate, ccl_private VolumeIntegrateResult &ccl_restrict result) { /* Pick random color channel, we use the Veach one-sample * model with balance heuristic for the channels. */ - const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); - float3 channel_pdf; + const Spectrum albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); + Spectrum channel_pdf; const int channel = volume_sample_channel( albedo, result.indirect_throughput, vstate.rphase, &channel_pdf); /* Equiangular sampling for direct lighting. */ if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR && !result.direct_scatter) { - if (result.direct_t >= vstate.start_t && result.direct_t <= vstate.end_t && + if (result.direct_t >= vstate.tmin && result.direct_t <= vstate.tmax && vstate.equiangular_pdf > VOLUME_SAMPLE_PDF_CUTOFF) { - const float new_dt = result.direct_t - vstate.start_t; - const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); + const float new_dt = result.direct_t - vstate.tmin; + const Spectrum new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); result.direct_scatter = true; result.direct_throughput *= coeff.sigma_s * new_transmittance / vstate.equiangular_pdf; - shader_copy_volume_phases(&result.direct_phases, sd); + volume_shader_copy_phases(&result.direct_phases, sd); /* Multiple importance sampling. */ if (vstate.use_mis) { @@ -458,10 +468,10 @@ ccl_device_forceinline void volume_integrate_step_scattering( /* compute sampling distance */ const float sample_sigma_t = volume_channel_get(coeff.sigma_t, channel); const float new_dt = -logf(1.0f - vstate.rscatter) / sample_sigma_t; - const float new_t = vstate.start_t + new_dt; + const float new_t = vstate.tmin + new_dt; /* transmittance and pdf */ - const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); + const Spectrum new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); const float distance_pdf = dot(channel_pdf, coeff.sigma_t * new_transmittance); if (vstate.distance_pdf * distance_pdf > VOLUME_SAMPLE_PDF_CUTOFF) { @@ -469,7 +479,7 @@ ccl_device_forceinline void volume_integrate_step_scattering( result.indirect_scatter = true; result.indirect_t = new_t; result.indirect_throughput *= coeff.sigma_s * new_transmittance / distance_pdf; - shader_copy_volume_phases(&result.indirect_phases, sd); + volume_shader_copy_phases(&result.indirect_phases, sd); if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) { /* If using distance sampling for direct light, just copy parameters @@ -477,7 +487,7 @@ ccl_device_forceinline void volume_integrate_step_scattering( result.direct_scatter = true; result.direct_t = result.indirect_t; result.direct_throughput = result.indirect_throughput; - shader_copy_volume_phases(&result.direct_phases, sd); + volume_shader_copy_phases(&result.direct_phases, sd); /* Multiple importance sampling. */ if (vstate.use_mis) { @@ -528,7 +538,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous( volume_step_init(kg, rng_state, object_step_size, - ray->t, + ray->tmin, + ray->tmax, &step_size, &step_shade_offset, &steps_offset, @@ -536,11 +547,11 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* Initialize volume integration state. */ VolumeIntegrateState vstate ccl_optional_struct_init; - vstate.start_t = 0.0f; - vstate.end_t = 0.0f; + vstate.tmin = ray->tmin; + vstate.tmax = ray->tmin; vstate.absorption_only = true; - vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE); - vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL); + vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SCATTER_DISTANCE); + vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_PHASE_CHANNEL); /* Multiple importance sampling: pick between equiangular and distance sampling strategy. */ vstate.direct_sample_method = direct_sample_method; @@ -559,7 +570,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous( vstate.distance_pdf = 1.0f; /* Initialize volume integration result. */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); result.direct_throughput = throughput; result.indirect_throughput = throughput; @@ -572,14 +583,14 @@ ccl_device_forceinline void volume_integrate_heterogeneous( # ifdef __DENOISING_FEATURES__ const bool write_denoising_features = (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES); - float3 accum_albedo = zero_float3(); + Spectrum accum_albedo = zero_spectrum(); # endif - float3 accum_emission = zero_float3(); + Spectrum accum_emission = zero_spectrum(); for (int i = 0; i < max_steps; i++) { /* Advance to new position */ - vstate.end_t = min(ray->t, (i + steps_offset) * step_size); - const float shade_t = vstate.start_t + (vstate.end_t - vstate.start_t) * step_shade_offset; + vstate.tmax = min(ray->tmax, ray->tmin + (i + steps_offset) * step_size); + const float shade_t = vstate.tmin + (vstate.tmax - vstate.tmin) * step_shade_offset; sd->P = ray->P + ray->D * shade_t; /* compute segment */ @@ -588,17 +599,17 @@ ccl_device_forceinline void volume_integrate_heterogeneous( const int closure_flag = sd->flag; /* Evaluate transmittance over segment. */ - const float dt = (vstate.end_t - vstate.start_t); - const float3 transmittance = (closure_flag & SD_EXTINCTION) ? - volume_color_transmittance(coeff.sigma_t, dt) : - one_float3(); + const float dt = (vstate.tmax - vstate.tmin); + const Spectrum transmittance = (closure_flag & SD_EXTINCTION) ? + volume_color_transmittance(coeff.sigma_t, dt) : + one_spectrum(); /* Emission. */ if (closure_flag & SD_EMISSION) { /* Only write emission before indirect light scatter position, since we terminate * stepping at that point if we have already found a direct light scatter position. */ if (!result.indirect_scatter) { - const float3 emission = volume_emission_integrate( + const Spectrum emission = volume_emission_integrate( &coeff, closure_flag, transmittance, dt); accum_emission += result.indirect_throughput * emission; } @@ -609,8 +620,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous( # ifdef __DENOISING_FEATURES__ /* Accumulate albedo for denoising features. */ if (write_denoising_features && (closure_flag & SD_SCATTER)) { - const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); - accum_albedo += result.indirect_throughput * albedo * (one_float3() - transmittance); + const Spectrum albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); + accum_albedo += result.indirect_throughput * albedo * (one_spectrum() - transmittance); } # endif @@ -626,13 +637,13 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* Stop if nearly all light blocked. */ if (!result.indirect_scatter) { - if (max3(result.indirect_throughput) < VOLUME_THROUGHPUT_EPSILON) { - result.indirect_throughput = zero_float3(); + if (reduce_max(result.indirect_throughput) < VOLUME_THROUGHPUT_EPSILON) { + result.indirect_throughput = zero_spectrum(); break; } } else if (!result.direct_scatter) { - if (max3(result.direct_throughput) < VOLUME_THROUGHPUT_EPSILON) { + if (reduce_max(result.direct_throughput) < VOLUME_THROUGHPUT_EPSILON) { break; } } @@ -645,28 +656,27 @@ ccl_device_forceinline void volume_integrate_heterogeneous( } /* Stop if at the end of the volume. */ - vstate.start_t = vstate.end_t; - if (vstate.start_t == ray->t) { + vstate.tmin = vstate.tmax; + if (vstate.tmin == ray->tmax) { break; } } /* Write accumulated emission. */ if (!is_zero(accum_emission)) { - kernel_accum_emission( + film_write_volume_emission( kg, state, accum_emission, render_buffer, object_lightgroup(kg, sd->object)); } # ifdef __DENOISING_FEATURES__ /* Write denoising features. */ if (write_denoising_features) { - kernel_write_denoising_features_volume( + film_write_denoising_features_volume( kg, state, accum_albedo, result.indirect_scatter, render_buffer); } # endif /* __DENOISING_FEATURES__ */ } -# ifdef __EMISSION__ /* Path tracing: sample point on light and evaluate light shader, then * queue shadow ray to be traced. */ ccl_device_forceinline bool integrate_volume_sample_light( @@ -684,11 +694,10 @@ ccl_device_forceinline bool integrate_volume_sample_light( /* Sample position on a light. */ const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); const uint bounce = INTEGRATOR_STATE(state, path, bounce); - float light_u, light_v; - path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v); + const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT); if (!light_distribution_sample_from_volume_segment( - kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, ls)) { + kg, rand_light.x, rand_light.y, sd->time, sd->P, bounce, path_flag, ls)) { return false; } @@ -708,7 +717,7 @@ ccl_device_forceinline void integrate_volume_direct_light( ccl_private const RNGState *ccl_restrict rng_state, const float3 P, ccl_private const ShaderVolumePhases *ccl_restrict phases, - ccl_private const float3 throughput, + ccl_private const Spectrum throughput, ccl_private LightSample *ccl_restrict ls) { PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT); @@ -725,11 +734,10 @@ ccl_device_forceinline void integrate_volume_direct_light( { const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); const uint bounce = INTEGRATOR_STATE(state, path, bounce); - float light_u, light_v; - path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v); + const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT); if (!light_distribution_sample_from_position( - kg, light_u, light_v, sd->time, P, bounce, path_flag, ls)) { + kg, rand_light.x, rand_light.y, sd->time, P, bounce, path_flag, ls)) { return; } } @@ -746,21 +754,21 @@ ccl_device_forceinline void integrate_volume_direct_light( * non-constant light sources. */ ShaderDataTinyStorage emission_sd_storage; ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time); + const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time); if (is_zero(light_eval)) { return; } /* Evaluate BSDF. */ BsdfEval phase_eval ccl_optional_struct_init; - const float phase_pdf = shader_volume_phase_eval(kg, sd, phases, ls->D, &phase_eval); + const float phase_pdf = volume_shader_phase_eval(kg, sd, phases, ls->D, &phase_eval); if (ls->shader & SHADER_USE_MIS) { float mis_weight = light_sample_mis_weight_nee(kg, ls->pdf, phase_pdf); bsdf_eval_mul(&phase_eval, mis_weight); } - bsdf_eval_mul3(&phase_eval, light_eval / ls->pdf); + bsdf_eval_mul(&phase_eval, light_eval / ls->pdf); /* Path termination. */ const float terminate = path_state_rng_light_termination(kg, rng_state); @@ -774,8 +782,8 @@ ccl_device_forceinline void integrate_volume_direct_light( const bool is_light = light_sample_is_light(ls); /* Branch off shadow kernel. */ - INTEGRATOR_SHADOW_PATH_INIT( - shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow); + IntegratorShadowState shadow_state = integrator_shadow_path_init( + kg, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, false); /* Write shadow ray and associated state to global memory. */ integrator_state_write_shadow_ray(kg, shadow_state, &ray); @@ -789,11 +797,11 @@ ccl_device_forceinline void integrate_volume_direct_light( const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce); uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag); shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0; - const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval); + const Spectrum throughput_phase = throughput * bsdf_eval_sum(&phase_eval); if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { - packed_float3 pass_diffuse_weight; - packed_float3 pass_glossy_weight; + PackedSpectrum pass_diffuse_weight; + PackedSpectrum pass_glossy_weight; if (shadow_flag & PATH_RAY_ANY_PASS) { /* Indirect bounce, use weights from earlier surface or volume bounce. */ @@ -803,8 +811,8 @@ ccl_device_forceinline void integrate_volume_direct_light( else { /* Direct light, no diffuse/glossy distinction needed for volumes. */ shadow_flag |= PATH_RAY_VOLUME_PASS; - pass_diffuse_weight = packed_float3(one_float3()); - pass_glossy_weight = packed_float3(zero_float3()); + pass_diffuse_weight = one_spectrum(); + pass_glossy_weight = zero_spectrum(); } INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight; @@ -842,7 +850,6 @@ ccl_device_forceinline void integrate_volume_direct_light( integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state); } -# endif /* Path tracing: scatter in new direction using phase function */ ccl_device_forceinline bool integrate_volume_phase_scatter( @@ -854,24 +861,15 @@ ccl_device_forceinline bool integrate_volume_phase_scatter( { PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INDIRECT_LIGHT); - float phase_u, phase_v; - path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &phase_u, &phase_v); + const float2 rand_phase = path_state_rng_2D(kg, rng_state, PRNG_VOLUME_PHASE); /* Phase closure, sample direction. */ float phase_pdf; BsdfEval phase_eval ccl_optional_struct_init; float3 phase_omega_in ccl_optional_struct_init; - differential3 phase_domega_in ccl_optional_struct_init; - - const int label = shader_volume_phase_sample(kg, - sd, - phases, - phase_u, - phase_v, - &phase_eval, - &phase_omega_in, - &phase_domega_in, - &phase_pdf); + + const int label = volume_shader_phase_sample( + kg, sd, phases, rand_phase, &phase_eval, &phase_omega_in, &phase_pdf); if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) { return false; @@ -880,28 +878,27 @@ ccl_device_forceinline bool integrate_volume_phase_scatter( /* Setup ray. */ INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_omega_in); - INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX; + INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f; + INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX; # ifdef __RAY_DIFFERENTIALS__ INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); - INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in); # endif // Save memory by storing last hit prim and object in isect INTEGRATOR_STATE_WRITE(state, isect, prim) = sd->prim; INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object; /* Update throughput. */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval) / phase_pdf; + const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput); + const Spectrum throughput_phase = throughput * bsdf_eval_sum(&phase_eval) / phase_pdf; INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput_phase; if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { - INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_float3(); - INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_float3(); + INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_spectrum(); + INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_spectrum(); } /* Update path state */ INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf; - INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f; INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf( phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf)); @@ -1021,7 +1018,7 @@ ccl_device void integrator_shade_volume(KernelGlobals kg, integrator_state_read_isect(kg, state, &isect); /* Set ray length to current segment. */ - ray.t = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX; + ray.tmax = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX; /* Clean volume stack for background rays. */ if (isect.prim == PRIM_NONE) { @@ -1032,13 +1029,15 @@ ccl_device void integrator_shade_volume(KernelGlobals kg, if (event == VOLUME_PATH_SCATTERED) { /* Queue intersect_closest kernel. */ - INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME, + integrator_path_next(kg, + state, + DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST); return; } else if (event == VOLUME_PATH_MISSED) { /* End path. */ - INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); + integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); return; } else { diff --git a/intern/cycles/kernel/integrator/shader_eval.h b/intern/cycles/kernel/integrator/shader_eval.h deleted file mode 100644 index 4da92929366..00000000000 --- a/intern/cycles/kernel/integrator/shader_eval.h +++ /dev/null @@ -1,952 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2011-2022 Blender Foundation */ - -/* Functions to evaluate shaders and use the resulting shader closures. */ - -#pragma once - -#include "kernel/closure/alloc.h" -#include "kernel/closure/bsdf.h" -#include "kernel/closure/bsdf_util.h" -#include "kernel/closure/emissive.h" - -#include "kernel/film/accumulate.h" - -#include "kernel/svm/svm.h" - -#ifdef __OSL__ -# include "kernel/osl/shader.h" -#endif - -CCL_NAMESPACE_BEGIN - -/* Merging */ - -#if defined(__VOLUME__) -ccl_device_inline void shader_merge_volume_closures(ccl_private ShaderData *sd) -{ - /* Merge identical closures to save closure space with stacked volumes. */ - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sci = &sd->closure[i]; - - if (sci->type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) { - continue; - } - - for (int j = i + 1; j < sd->num_closure; j++) { - ccl_private ShaderClosure *scj = &sd->closure[j]; - if (sci->type != scj->type) { - continue; - } - - ccl_private const HenyeyGreensteinVolume *hgi = (ccl_private const HenyeyGreensteinVolume *) - sci; - ccl_private const HenyeyGreensteinVolume *hgj = (ccl_private const HenyeyGreensteinVolume *) - scj; - if (!(hgi->g == hgj->g)) { - continue; - } - - sci->weight += scj->weight; - sci->sample_weight += scj->sample_weight; - - int size = sd->num_closure - (j + 1); - if (size > 0) { - for (int k = 0; k < size; k++) { - scj[k] = scj[k + 1]; - } - } - - sd->num_closure--; - kernel_assert(sd->num_closure >= 0); - j--; - } - } -} - -ccl_device_inline void shader_copy_volume_phases(ccl_private ShaderVolumePhases *ccl_restrict - phases, - ccl_private const ShaderData *ccl_restrict sd) -{ - phases->num_closure = 0; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *from_sc = &sd->closure[i]; - ccl_private const HenyeyGreensteinVolume *from_hg = - (ccl_private const HenyeyGreensteinVolume *)from_sc; - - if (from_sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) { - ccl_private ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure]; - - to_sc->weight = from_sc->weight; - to_sc->sample_weight = from_sc->sample_weight; - to_sc->g = from_hg->g; - phases->num_closure++; - if (phases->num_closure >= MAX_VOLUME_CLOSURE) { - break; - } - } - } -} -#endif /* __VOLUME__ */ - -ccl_device_inline void shader_prepare_surface_closures(KernelGlobals kg, - ConstIntegratorState state, - ccl_private ShaderData *sd, - const uint32_t path_flag) -{ - /* Filter out closures. */ - if (kernel_data.integrator.filter_closures) { - if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_EMISSION) { - sd->closure_emission_background = zero_float3(); - } - - if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIRECT_LIGHT) { - sd->flag &= ~SD_BSDF_HAS_EVAL; - } - - if (path_flag & PATH_RAY_CAMERA) { - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - - if ((CLOSURE_IS_BSDF_DIFFUSE(sc->type) && - (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIFFUSE)) || - (CLOSURE_IS_BSDF_GLOSSY(sc->type) && - (kernel_data.integrator.filter_closures & FILTER_CLOSURE_GLOSSY)) || - (CLOSURE_IS_BSDF_TRANSMISSION(sc->type) && - (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSMISSION))) { - sc->type = CLOSURE_NONE_ID; - sc->sample_weight = 0.0f; - } - else if ((CLOSURE_IS_BSDF_TRANSPARENT(sc->type) && - (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSPARENT))) { - sc->type = CLOSURE_HOLDOUT_ID; - sc->sample_weight = 0.0f; - sd->flag |= SD_HOLDOUT; - } - } - } - } - - /* Defensive sampling. - * - * We can likely also do defensive sampling at deeper bounces, particularly - * for cases like a perfect mirror but possibly also others. This will need - * a good heuristic. */ - if (INTEGRATOR_STATE(state, path, bounce) + INTEGRATOR_STATE(state, path, transparent_bounce) == - 0 && - sd->num_closure > 1) { - float sum = 0.0f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - sum += sc->sample_weight; - } - } - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - sc->sample_weight = max(sc->sample_weight, 0.125f * sum); - } - } - } - - /* Filter glossy. - * - * Blurring of bsdf after bounces, for rays that have a small likelihood - * of following this particular path (diffuse, rough glossy) */ - if (kernel_data.integrator.filter_glossy != FLT_MAX -#ifdef __MNEE__ - && !(INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_VALID) -#endif - ) { - float blur_pdf = kernel_data.integrator.filter_glossy * - INTEGRATOR_STATE(state, path, min_ray_pdf); - - if (blur_pdf < 1.0f) { - float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - if (CLOSURE_IS_BSDF(sc->type)) { - bsdf_blur(kg, sc, blur_roughness); - } - } - } - } -} - -/* BSDF */ - -ccl_device_inline bool shader_bsdf_is_transmission(ccl_private const ShaderData *sd, - const float3 omega_in) -{ - return dot(sd->N, omega_in) < 0.0f; -} - -ccl_device_forceinline bool _shader_bsdf_exclude(ClosureType type, uint light_shader_flags) -{ - if (!(light_shader_flags & SHADER_EXCLUDE_ANY)) { - return false; - } - if (light_shader_flags & SHADER_EXCLUDE_DIFFUSE) { - if (CLOSURE_IS_BSDF_DIFFUSE(type)) { - return true; - } - } - if (light_shader_flags & SHADER_EXCLUDE_GLOSSY) { - if (CLOSURE_IS_BSDF_GLOSSY(type)) { - return true; - } - } - if (light_shader_flags & SHADER_EXCLUDE_TRANSMIT) { - if (CLOSURE_IS_BSDF_TRANSMISSION(type)) { - return true; - } - } - return false; -} - -ccl_device_inline float _shader_bsdf_multi_eval(KernelGlobals kg, - ccl_private ShaderData *sd, - const float3 omega_in, - const bool is_transmission, - ccl_private const ShaderClosure *skip_sc, - ccl_private BsdfEval *result_eval, - float sum_pdf, - float sum_sample_weight, - const uint light_shader_flags) -{ - /* This is the veach one-sample model with balance heuristic, - * some PDF factors drop out when using balance heuristic weighting. */ - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (sc == skip_sc) { - continue; - } - - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - if (CLOSURE_IS_BSDF(sc->type) && !_shader_bsdf_exclude(sc->type, light_shader_flags)) { - float bsdf_pdf = 0.0f; - float3 eval = bsdf_eval(kg, sd, sc, omega_in, is_transmission, &bsdf_pdf); - - if (bsdf_pdf != 0.0f) { - bsdf_eval_accum(result_eval, sc->type, eval * sc->weight); - sum_pdf += bsdf_pdf * sc->sample_weight; - } - } - - sum_sample_weight += sc->sample_weight; - } - } - - return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; -} - -#ifndef __KERNEL_CUDA__ -ccl_device -#else -ccl_device_inline -#endif - float - shader_bsdf_eval(KernelGlobals kg, - ccl_private ShaderData *sd, - const float3 omega_in, - const bool is_transmission, - ccl_private BsdfEval *bsdf_eval, - const uint light_shader_flags) -{ - bsdf_eval_init(bsdf_eval, CLOSURE_NONE_ID, zero_float3()); - - return _shader_bsdf_multi_eval( - kg, sd, omega_in, is_transmission, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags); -} - -/* Randomly sample a BSSRDF or BSDF proportional to ShaderClosure.sample_weight. */ -ccl_device_inline ccl_private const ShaderClosure *shader_bsdf_bssrdf_pick( - ccl_private const ShaderData *ccl_restrict sd, ccl_private float *randu) -{ - int sampled = 0; - - if (sd->num_closure > 1) { - /* Pick a BSDF or based on sample weights. */ - float sum = 0.0f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - sum += sc->sample_weight; - } - } - - float r = (*randu) * sum; - float partial_sum = 0.0f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - float next_sum = partial_sum + sc->sample_weight; - - if (r < next_sum) { - sampled = i; - - /* Rescale to reuse for direction sample, to better preserve stratification. */ - *randu = (r - partial_sum) / sc->sample_weight; - break; - } - - partial_sum = next_sum; - } - } - } - - return &sd->closure[sampled]; -} - -/* Return weight for picked BSSRDF. */ -ccl_device_inline float3 -shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd, - ccl_private const ShaderClosure *ccl_restrict bssrdf_sc) -{ - float3 weight = bssrdf_sc->weight; - - if (sd->num_closure > 1) { - float sum = 0.0f; - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - sum += sc->sample_weight; - } - } - weight *= sum / bssrdf_sc->sample_weight; - } - - return weight; -} - -/* Sample direction for picked BSDF, and return evaluation and pdf for all - * BSDFs combined using MIS. */ -ccl_device int shader_bsdf_sample_closure(KernelGlobals kg, - ccl_private ShaderData *sd, - ccl_private const ShaderClosure *sc, - float randu, - float randv, - ccl_private BsdfEval *bsdf_eval, - ccl_private float3 *omega_in, - ccl_private differential3 *domega_in, - ccl_private float *pdf) -{ - /* BSSRDF should already have been handled elsewhere. */ - kernel_assert(CLOSURE_IS_BSDF(sc->type)); - - int label; - float3 eval = zero_float3(); - - *pdf = 0.0f; - label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); - - if (*pdf != 0.0f) { - bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight); - - if (sd->num_closure > 1) { - const bool is_transmission = shader_bsdf_is_transmission(sd, *omega_in); - float sweight = sc->sample_weight; - *pdf = _shader_bsdf_multi_eval( - kg, sd, *omega_in, is_transmission, sc, bsdf_eval, *pdf * sweight, sweight, 0); - } - } - - return label; -} - -ccl_device float shader_bsdf_average_roughness(ccl_private const ShaderData *sd) -{ - float roughness = 0.0f; - float sum_weight = 0.0f; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF(sc->type)) { - /* sqrt once to undo the squaring from multiplying roughness on the - * two axes, and once for the squared roughness convention. */ - float weight = fabsf(average(sc->weight)); - roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc))); - sum_weight += weight; - } - } - - return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f; -} - -ccl_device float3 shader_bsdf_transparency(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - if (sd->flag & SD_HAS_ONLY_VOLUME) { - return one_float3(); - } - else if (sd->flag & SD_TRANSPARENT) { - return sd->closure_transparent_extinction; - } - else { - return zero_float3(); - } -} - -ccl_device void shader_bsdf_disable_transparency(KernelGlobals kg, ccl_private ShaderData *sd) -{ - if (sd->flag & SD_TRANSPARENT) { - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - - if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) { - sc->sample_weight = 0.0f; - sc->weight = zero_float3(); - } - } - - sd->flag &= ~SD_TRANSPARENT; - } -} - -ccl_device float3 shader_bsdf_alpha(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - float3 alpha = one_float3() - shader_bsdf_transparency(kg, sd); - - alpha = max(alpha, zero_float3()); - alpha = min(alpha, one_float3()); - - return alpha; -} - -ccl_device float3 shader_bsdf_diffuse(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - float3 eval = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type)) - eval += sc->weight; - } - - return eval; -} - -ccl_device float3 shader_bsdf_glossy(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - float3 eval = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_GLOSSY(sc->type)) - eval += sc->weight; - } - - return eval; -} - -ccl_device float3 shader_bsdf_transmission(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - float3 eval = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type)) - eval += sc->weight; - } - - return eval; -} - -ccl_device float3 shader_bsdf_average_normal(KernelGlobals kg, ccl_private const ShaderData *sd) -{ - float3 N = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) - N += sc->N * fabsf(average(sc->weight)); - } - - return (is_zero(N)) ? sd->N : normalize(N); -} - -ccl_device float3 shader_bsdf_ao(KernelGlobals kg, - ccl_private const ShaderData *sd, - const float ao_factor, - ccl_private float3 *N_) -{ - float3 eval = zero_float3(); - float3 N = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { - ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc; - eval += sc->weight * ao_factor; - N += bsdf->N * fabsf(average(sc->weight)); - } - } - - *N_ = (is_zero(N)) ? sd->N : normalize(N); - return eval; -} - -#ifdef __SUBSURFACE__ -ccl_device float3 shader_bssrdf_normal(ccl_private const ShaderData *sd) -{ - float3 N = zero_float3(); - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - - if (CLOSURE_IS_BSSRDF(sc->type)) { - ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc; - float avg_weight = fabsf(average(sc->weight)); - - N += bssrdf->N * avg_weight; - } - } - - return (is_zero(N)) ? sd->N : normalize(N); -} -#endif /* __SUBSURFACE__ */ - -/* Constant emission optimization */ - -ccl_device bool shader_constant_emission_eval(KernelGlobals kg, - int shader, - ccl_private float3 *eval) -{ - int shader_index = shader & SHADER_MASK; - int shader_flag = kernel_tex_fetch(__shaders, shader_index).flags; - - if (shader_flag & SD_HAS_CONSTANT_EMISSION) { - *eval = make_float3(kernel_tex_fetch(__shaders, shader_index).constant_emission[0], - kernel_tex_fetch(__shaders, shader_index).constant_emission[1], - kernel_tex_fetch(__shaders, shader_index).constant_emission[2]); - - return true; - } - - return false; -} - -/* Background */ - -ccl_device float3 shader_background_eval(ccl_private const ShaderData *sd) -{ - if (sd->flag & SD_EMISSION) { - return sd->closure_emission_background; - } - else { - return zero_float3(); - } -} - -/* Emission */ - -ccl_device float3 shader_emissive_eval(ccl_private const ShaderData *sd) -{ - if (sd->flag & SD_EMISSION) { - return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background; - } - else { - return zero_float3(); - } -} - -/* Holdout */ - -ccl_device float3 shader_holdout_apply(KernelGlobals kg, ccl_private ShaderData *sd) -{ - float3 weight = zero_float3(); - - /* For objects marked as holdout, preserve transparency and remove all other - * closures, replacing them with a holdout weight. */ - if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { - if ((sd->flag & SD_TRANSPARENT) && !(sd->flag & SD_HAS_ONLY_VOLUME)) { - weight = one_float3() - sd->closure_transparent_extinction; - - for (int i = 0; i < sd->num_closure; i++) { - ccl_private ShaderClosure *sc = &sd->closure[i]; - if (!CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { - sc->type = NBUILTIN_CLOSURES; - } - } - - sd->flag &= ~(SD_CLOSURE_FLAGS - (SD_TRANSPARENT | SD_BSDF)); - } - else { - weight = one_float3(); - } - } - else { - for (int i = 0; i < sd->num_closure; i++) { - ccl_private const ShaderClosure *sc = &sd->closure[i]; - if (CLOSURE_IS_HOLDOUT(sc->type)) { - weight += sc->weight; - } - } - } - - return weight; -} - -/* Surface Evaluation */ - -template<uint node_feature_mask, typename ConstIntegratorGenericState> -ccl_device void shader_eval_surface(KernelGlobals kg, - ConstIntegratorGenericState state, - ccl_private ShaderData *ccl_restrict sd, - ccl_global float *ccl_restrict buffer, - uint32_t path_flag, - bool use_caustics_storage = false) -{ - /* If path is being terminated, we are tracing a shadow ray or evaluating - * emission, then we don't need to store closures. The emission and shadow - * shader data also do not have a closure array to save GPU memory. */ - int max_closures; - if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { - max_closures = 0; - } - else { - max_closures = use_caustics_storage ? CAUSTICS_MAX_CLOSURE : kernel_data.max_closures; - } - - sd->num_closure = 0; - sd->num_closure_left = max_closures; - -#ifdef __OSL__ - if (kg->osl) { - if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) { - OSLShader::eval_background(kg, state, sd, path_flag); - } - else { - OSLShader::eval_surface(kg, state, sd, path_flag); - } - } - else -#endif - { -#ifdef __SVM__ - svm_eval_nodes<node_feature_mask, SHADER_TYPE_SURFACE>(kg, state, sd, buffer, path_flag); -#else - if (sd->object == OBJECT_NONE) { - sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f); - sd->flag |= SD_EMISSION; - } - else { - ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc( - sd, sizeof(DiffuseBsdf), make_float3(0.8f, 0.8f, 0.8f)); - if (bsdf != NULL) { - bsdf->N = sd->N; - sd->flag |= bsdf_diffuse_setup(bsdf); - } - } -#endif - } -} - -/* Volume */ - -#ifdef __VOLUME__ - -ccl_device_inline float _shader_volume_phase_multi_eval( - ccl_private const ShaderData *sd, - ccl_private const ShaderVolumePhases *phases, - const float3 omega_in, - int skip_phase, - ccl_private BsdfEval *result_eval, - float sum_pdf, - float sum_sample_weight) -{ - for (int i = 0; i < phases->num_closure; i++) { - if (i == skip_phase) - continue; - - ccl_private const ShaderVolumeClosure *svc = &phases->closure[i]; - float phase_pdf = 0.0f; - float3 eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf); - - if (phase_pdf != 0.0f) { - bsdf_eval_accum(result_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); - sum_pdf += phase_pdf * svc->sample_weight; - } - - sum_sample_weight += svc->sample_weight; - } - - return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; -} - -ccl_device float shader_volume_phase_eval(KernelGlobals kg, - ccl_private const ShaderData *sd, - ccl_private const ShaderVolumePhases *phases, - const float3 omega_in, - ccl_private BsdfEval *phase_eval) -{ - bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_float3()); - - return _shader_volume_phase_multi_eval(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f); -} - -ccl_device int shader_volume_phase_sample(KernelGlobals kg, - ccl_private const ShaderData *sd, - ccl_private const ShaderVolumePhases *phases, - float randu, - float randv, - ccl_private BsdfEval *phase_eval, - ccl_private float3 *omega_in, - ccl_private differential3 *domega_in, - ccl_private float *pdf) -{ - int sampled = 0; - - if (phases->num_closure > 1) { - /* pick a phase closure based on sample weights */ - float sum = 0.0f; - - for (sampled = 0; sampled < phases->num_closure; sampled++) { - ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; - sum += svc->sample_weight; - } - - float r = randu * sum; - float partial_sum = 0.0f; - - for (sampled = 0; sampled < phases->num_closure; sampled++) { - ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; - float next_sum = partial_sum + svc->sample_weight; - - if (r <= next_sum) { - /* Rescale to reuse for BSDF direction sample. */ - randu = (r - partial_sum) / svc->sample_weight; - break; - } - - partial_sum = next_sum; - } - - if (sampled == phases->num_closure) { - *pdf = 0.0f; - return LABEL_NONE; - } - } - - /* todo: this isn't quite correct, we don't weight anisotropy properly - * depending on color channels, even if this is perhaps not a common case */ - ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; - int label; - float3 eval = zero_float3(); - - *pdf = 0.0f; - label = volume_phase_sample(sd, svc, randu, randv, &eval, omega_in, domega_in, pdf); - - if (*pdf != 0.0f) { - bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); - } - - return label; -} - -ccl_device int shader_phase_sample_closure(KernelGlobals kg, - ccl_private const ShaderData *sd, - ccl_private const ShaderVolumeClosure *sc, - float randu, - float randv, - ccl_private BsdfEval *phase_eval, - ccl_private float3 *omega_in, - ccl_private differential3 *domega_in, - ccl_private float *pdf) -{ - int label; - float3 eval = zero_float3(); - - *pdf = 0.0f; - label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); - - if (*pdf != 0.0f) - bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); - - return label; -} - -/* Volume Evaluation */ - -template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState> -ccl_device_inline void shader_eval_volume(KernelGlobals kg, - ConstIntegratorGenericState state, - ccl_private ShaderData *ccl_restrict sd, - const uint32_t path_flag, - StackReadOp stack_read) -{ - /* If path is being terminated, we are tracing a shadow ray or evaluating - * emission, then we don't need to store closures. The emission and shadow - * shader data also do not have a closure array to save GPU memory. */ - int max_closures; - if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { - max_closures = 0; - } - else { - max_closures = kernel_data.max_closures; - } - - /* reset closures once at the start, we will be accumulating the closures - * for all volumes in the stack into a single array of closures */ - sd->num_closure = 0; - sd->num_closure_left = max_closures; - sd->flag = 0; - sd->object_flag = 0; - - for (int i = 0;; i++) { - const VolumeStack entry = stack_read(i); - if (entry.shader == SHADER_NONE) { - break; - } - - /* Setup shader-data from stack. it's mostly setup already in - * shader_setup_from_volume, this switching should be quick. */ - sd->object = entry.object; - sd->lamp = LAMP_NONE; - sd->shader = entry.shader; - - sd->flag &= ~SD_SHADER_FLAGS; - sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; - sd->object_flag &= ~SD_OBJECT_FLAGS; - - if (sd->object != OBJECT_NONE) { - sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object); - -# ifdef __OBJECT_MOTION__ - /* todo: this is inefficient for motion blur, we should be - * caching matrices instead of recomputing them each step */ - shader_setup_object_transforms(kg, sd, sd->time); - - if ((sd->object_flag & SD_OBJECT_HAS_VOLUME_MOTION) != 0) { - AttributeDescriptor v_desc = find_attribute(kg, sd, ATTR_STD_VOLUME_VELOCITY); - kernel_assert(v_desc.offset != ATTR_STD_NOT_FOUND); - - const float3 P = sd->P; - const float velocity_scale = kernel_tex_fetch(__objects, sd->object).velocity_scale; - const float time_offset = kernel_data.cam.motion_position == MOTION_POSITION_CENTER ? - 0.5f : - 0.0f; - const float time = kernel_data.cam.motion_position == MOTION_POSITION_END ? - (1.0f - kernel_data.cam.shuttertime) + sd->time : - sd->time; - - /* Use a 1st order semi-lagrangian advection scheme to estimate what volume quantity - * existed, or will exist, at the given time: - * - * `phi(x, T) = phi(x - (T - t) * u(x, T), t)` - * - * where - * - * x : position - * T : super-sampled time (or ray time) - * t : current time of the simulation (in rendering we assume this is center frame with - * relative time = 0) - * phi : the volume quantity - * u : the velocity field - * - * But first we need to determine the velocity field `u(x, T)`, which we can estimate also - * using semi-lagrangian advection. - * - * `u(x, T) = u(x - (T - t) * u(x, T), t)` - * - * This is the typical way to model self-advection in fluid dynamics, however, we do not - * account for other forces affecting the velocity during simulation (pressure, buoyancy, - * etc.): this gives a linear interpolation when fluid are mostly "curvy". For better - * results, a higher order interpolation scheme can be used (at the cost of more lookups), - * or an interpolation of the velocity fields for the previous and next frames could also - * be used to estimate `u(x, T)` (which will cost more memory and lookups). - * - * References: - * "Eulerian Motion Blur", Kim and Ko, 2007 - * "Production Volume Rendering", Wreninge et al., 2012 - */ - - /* Find velocity. */ - float3 velocity = primitive_volume_attribute_float3(kg, sd, v_desc); - object_dir_transform(kg, sd, &velocity); - - /* Find advected P. */ - sd->P = P - (time - time_offset) * velocity_scale * velocity; - - /* Find advected velocity. */ - velocity = primitive_volume_attribute_float3(kg, sd, v_desc); - object_dir_transform(kg, sd, &velocity); - - /* Find advected P. */ - sd->P = P - (time - time_offset) * velocity_scale * velocity; - } -# endif - } - - /* evaluate shader */ -# ifdef __SVM__ -# ifdef __OSL__ - if (kg->osl) { - OSLShader::eval_volume(kg, state, sd, path_flag); - } - else -# endif - { - svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_VOLUME, SHADER_TYPE_VOLUME>( - kg, state, sd, NULL, path_flag); - } -# endif - - /* Merge closures to avoid exceeding number of closures limit. */ - if (!shadow) { - if (i > 0) { - shader_merge_volume_closures(sd); - } - } - } -} - -#endif /* __VOLUME__ */ - -/* Displacement Evaluation */ - -template<typename ConstIntegratorGenericState> -ccl_device void shader_eval_displacement(KernelGlobals kg, - ConstIntegratorGenericState state, - ccl_private ShaderData *sd) -{ - sd->num_closure = 0; - sd->num_closure_left = 0; - - /* this will modify sd->P */ -#ifdef __SVM__ -# ifdef __OSL__ - if (kg->osl) - OSLShader::eval_displacement(kg, state, sd); - else -# endif - { - svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_DISPLACEMENT, SHADER_TYPE_DISPLACEMENT>( - kg, state, sd, NULL, 0); - } -#endif -} - -/* Cryptomatte */ - -ccl_device float shader_cryptomatte_id(KernelGlobals kg, int shader) -{ - return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id; -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/shadow_catcher.h b/intern/cycles/kernel/integrator/shadow_catcher.h index 42d44580f80..a620853faea 100644 --- a/intern/cycles/kernel/integrator/shadow_catcher.h +++ b/intern/cycles/kernel/integrator/shadow_catcher.h @@ -3,7 +3,6 @@ #pragma once -#include "kernel/film/write_passes.h" #include "kernel/integrator/path_state.h" #include "kernel/integrator/state_util.h" @@ -50,7 +49,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg, ConstIntegratorState state) { - if (INTEGRATOR_PATH_IS_TERMINATED) { + if (integrator_path_is_terminated(state)) { return false; } @@ -76,28 +75,6 @@ ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(const uint32_t return path_flag & PATH_RAY_SHADOW_CATCHER_PASS; } -/* Write shadow catcher passes on a bounce from the shadow catcher object. */ -ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data( - KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer) -{ - kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED); - kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); - - const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); - const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * - kernel_data.film.pass_stride; - ccl_global float *buffer = render_buffer + render_buffer_offset; - - /* Count sample for the shadow catcher object. */ - kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f); - - /* Since the split is done, the sample does not contribute to the matte, so accumulate it as - * transparency to the matte. */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, - average(throughput)); -} - #endif /* __SHADOW_CATCHER__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/shadow_state_template.h b/intern/cycles/kernel/integrator/shadow_state_template.h index eaee65ada40..3b490ecffdd 100644 --- a/intern/cycles/kernel/integrator/shadow_state_template.h +++ b/intern/cycles/kernel/integrator/shadow_state_template.h @@ -27,15 +27,15 @@ KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_T /* enum PathRayFlag */ KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING) /* Throughput. */ -KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, throughput, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, throughput, KERNEL_FEATURE_PATH_TRACING) /* Throughput for shadow pass. */ KERNEL_STRUCT_MEMBER(shadow_path, - packed_float3, + PackedSpectrum, unshadowed_throughput, KERNEL_FEATURE_SHADOW_PASS | KERNEL_FEATURE_AO_ADDITIVE) /* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */ -KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES) -KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES) +KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES) +KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES) /* Number of intersections found by ray-tracing. */ KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, num_hits, KERNEL_FEATURE_PATH_TRACING) /* Light group. */ @@ -47,7 +47,8 @@ KERNEL_STRUCT_END(shadow_path) KERNEL_STRUCT_BEGIN(shadow_ray) KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, P, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_MEMBER(shadow_ray, float, t, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, float, tmin, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(shadow_ray, float, tmax, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, float, time, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, float, dP, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(shadow_ray, int, object, KERNEL_FEATURE_PATH_TRACING) diff --git a/intern/cycles/kernel/integrator/state.h b/intern/cycles/kernel/integrator/state.h index d6fef27f344..d1907bd6e16 100644 --- a/intern/cycles/kernel/integrator/state.h +++ b/intern/cycles/kernel/integrator/state.h @@ -127,6 +127,9 @@ typedef struct IntegratorStateGPU { /* Index of main path which will be used by a next shadow catcher split. */ ccl_global int *next_main_path_index; + + /* Divisor used to partition active indices by locality when sorting by material. */ + uint sort_partition_divisor; } IntegratorStateGPU; /* Abstraction @@ -137,7 +140,7 @@ typedef struct IntegratorStateGPU { * happen from a kernel which operates on a "main" path. Attempt to use shadow catcher accessors * from a kernel which operates on a shadow catcher state will cause bad memory access. */ -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ /* Scalar access on CPU. */ @@ -156,7 +159,7 @@ typedef const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState; # define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \ ((state)->nested_struct[array_index].member) -#else /* __KERNEL_CPU__ */ +#else /* !__KERNEL_GPU__ */ /* Array access on GPU with Structure-of-Arrays. */ @@ -177,6 +180,6 @@ typedef int ConstIntegratorShadowState; # define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \ INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) -#endif /* __KERNEL_CPU__ */ +#endif /* !__KERNEL_GPU__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/state_flow.h b/intern/cycles/kernel/integrator/state_flow.h index fed74d49434..4b03c665e17 100644 --- a/intern/cycles/kernel/integrator/state_flow.h +++ b/intern/cycles/kernel/integrator/state_flow.h @@ -10,125 +10,196 @@ CCL_NAMESPACE_BEGIN /* Control Flow * - * Utilities for control flow between kernels. The implementation may differ per device - * or even be handled on the host side. To abstract such differences, experiment with - * different implementations and for debugging, this is abstracted using macros. + * Utilities for control flow between kernels. The implementation is different between CPU and + * GPU devices. For the latter part of the logic is handled on the host side with wavefronts. * * There is a main path for regular path tracing camera for path tracing. Shadows for next * event estimation branch off from this into their own path, that may be computed in - * parallel while the main path continues. + * parallel while the main path continues. Additionally, shading kernels are sorted using + * a key for coherence. * * Each kernel on the main path must call one of these functions. These may not be called * multiple times from the same kernel. * - * INTEGRATOR_PATH_INIT(next_kernel) - * INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) - * INTEGRATOR_PATH_TERMINATE(current_kernel) + * integrator_path_init(kg, state, next_kernel) + * integrator_path_next(kg, state, current_kernel, next_kernel) + * integrator_path_terminate(kg, state, current_kernel) * * For the shadow path similar functions are used, and again each shadow kernel must call * one of them, and only once. */ -#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(state, path, queued_kernel) == 0) -#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED \ - (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0) +ccl_device_forceinline bool integrator_path_is_terminated(ConstIntegratorState state) +{ + return INTEGRATOR_STATE(state, path, queued_kernel) == 0; +} + +ccl_device_forceinline bool integrator_shadow_path_is_terminated(ConstIntegratorShadowState state) +{ + return INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0; +} #ifdef __KERNEL_GPU__ -# define INTEGRATOR_PATH_INIT(next_kernel) \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ - 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; -# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ - 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; -# define INTEGRATOR_PATH_TERMINATE(current_kernel) \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; - -# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \ - IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32( \ - &kernel_integrator_state.next_shadow_path_index[0], 1); \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ - 1); \ - INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; -# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \ - 1); \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; -# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; - -# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \ - { \ - const int key_ = key; \ - atomic_fetch_and_add_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \ - 1); \ - } -# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \ - { \ - const int key_ = key; \ - atomic_fetch_and_sub_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \ - atomic_fetch_and_add_uint32( \ - &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \ - atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \ - 1); \ - } +ccl_device_forceinline void integrator_path_init(KernelGlobals kg, + IntegratorState state, + const DeviceKernel next_kernel) +{ + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; +} + +ccl_device_forceinline void integrator_path_next(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel) +{ + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; +} + +ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel) +{ + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; +} + +ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init( + KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao) +{ + IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32( + &kernel_integrator_state.next_shadow_path_index[0], 1); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; + return shadow_state; +} + +ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg, + IntegratorShadowState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel) +{ + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; +} + +ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg, + IntegratorShadowState state, + const DeviceKernel current_kernel) +{ + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; +} + +/* Sort first by truncated state index (for good locality), then by key (for good coherence). */ +# define INTEGRATOR_SORT_KEY(key, state) \ + (key + kernel_data.max_shaders * (state / kernel_integrator_state.sort_partition_divisor)) + +ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg, + IntegratorState state, + const DeviceKernel next_kernel, + const uint32_t key) +{ + const int key_ = INTEGRATOR_SORT_KEY(key, state); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; + atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1); +} + +ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel, + const uint32_t key) +{ + const int key_ = INTEGRATOR_SORT_KEY(key, state); + atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel], + 1); + atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; + atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1); +} #else -# define INTEGRATOR_PATH_INIT(next_kernel) \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; -# define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \ - { \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - (void)key; \ - } -# define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \ - { \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - (void)current_kernel; \ - } -# define INTEGRATOR_PATH_TERMINATE(current_kernel) \ - { \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; \ - (void)current_kernel; \ - } -# define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \ - { \ - INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \ - (void)key; \ - (void)current_kernel; \ - } - -# define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \ - IntegratorShadowState shadow_state = &state->shadow_type; \ - INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; -# define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \ - { \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; \ - (void)current_kernel; \ - } -# define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \ - { \ - INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; \ - (void)current_kernel; \ - } +ccl_device_forceinline void integrator_path_init(KernelGlobals kg, + IntegratorState state, + const DeviceKernel next_kernel) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; +} + +ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg, + IntegratorState state, + const DeviceKernel next_kernel, + const uint32_t key) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + (void)key; +} + +ccl_device_forceinline void integrator_path_next(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + (void)current_kernel; +} + +ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; + (void)current_kernel; +} + +ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg, + IntegratorState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel, + const uint32_t key) +{ + INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; + (void)key; + (void)current_kernel; +} + +ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init( + KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao) +{ + IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow; + INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel; + return shadow_state; +} + +ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg, + IntegratorShadowState state, + const DeviceKernel current_kernel, + const DeviceKernel next_kernel) +{ + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; + (void)current_kernel; +} + +ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg, + IntegratorShadowState state, + const DeviceKernel current_kernel) +{ + INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; + (void)current_kernel; +} #endif diff --git a/intern/cycles/kernel/integrator/state_template.h b/intern/cycles/kernel/integrator/state_template.h index e7e6db037b0..f4e280e4cb2 100644 --- a/intern/cycles/kernel/integrator/state_template.h +++ b/intern/cycles/kernel/integrator/state_template.h @@ -37,22 +37,21 @@ KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING) /* enum PathRayMNEE */ KERNEL_STRUCT_MEMBER(path, uint8_t, mnee, KERNEL_FEATURE_PATH_TRACING) /* Multiple importance sampling - * The PDF of BSDF sampling at the last scatter point, and distance to the - * last scatter point minus the last ray segment. This distance lets us - * compute the complete distance through transparent surfaces and volumes. */ + * The PDF of BSDF sampling at the last scatter point, which is at ray distance + * zero and distance. Note that transparency and volume attenuation increase + * the ray tmin but keep P unmodified so that this works. */ KERNEL_STRUCT_MEMBER(path, float, mis_ray_pdf, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_MEMBER(path, float, mis_ray_t, KERNEL_FEATURE_PATH_TRACING) /* Filter glossy. */ KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING) /* Continuation probability for path termination. */ KERNEL_STRUCT_MEMBER(path, float, continuation_probability, KERNEL_FEATURE_PATH_TRACING) /* Throughput. */ -KERNEL_STRUCT_MEMBER(path, packed_float3, throughput, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(path, PackedSpectrum, throughput, KERNEL_FEATURE_PATH_TRACING) /* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */ -KERNEL_STRUCT_MEMBER(path, packed_float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES) -KERNEL_STRUCT_MEMBER(path, packed_float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES) +KERNEL_STRUCT_MEMBER(path, PackedSpectrum, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES) +KERNEL_STRUCT_MEMBER(path, PackedSpectrum, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES) /* Denoising. */ -KERNEL_STRUCT_MEMBER(path, packed_float3, denoising_feature_throughput, KERNEL_FEATURE_DENOISING) +KERNEL_STRUCT_MEMBER(path, PackedSpectrum, denoising_feature_throughput, KERNEL_FEATURE_DENOISING) /* Shader sorting. */ /* TODO: compress as uint16? or leave out entirely and recompute key in sorting code? */ KERNEL_STRUCT_MEMBER(path, uint32_t, shader_sort_key, KERNEL_FEATURE_PATH_TRACING) @@ -63,7 +62,8 @@ KERNEL_STRUCT_END(path) KERNEL_STRUCT_BEGIN(ray) KERNEL_STRUCT_MEMBER(ray, packed_float3, P, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, packed_float3, D, KERNEL_FEATURE_PATH_TRACING) -KERNEL_STRUCT_MEMBER(ray, float, t, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(ray, float, tmin, KERNEL_FEATURE_PATH_TRACING) +KERNEL_STRUCT_MEMBER(ray, float, tmax, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, float, time, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, float, dP, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(ray, float, dD, KERNEL_FEATURE_PATH_TRACING) @@ -84,8 +84,8 @@ KERNEL_STRUCT_END(isect) /*************** Subsurface closure state for subsurface kernel ***************/ KERNEL_STRUCT_BEGIN(subsurface) -KERNEL_STRUCT_MEMBER(subsurface, packed_float3, albedo, KERNEL_FEATURE_SUBSURFACE) -KERNEL_STRUCT_MEMBER(subsurface, packed_float3, radius, KERNEL_FEATURE_SUBSURFACE) +KERNEL_STRUCT_MEMBER(subsurface, PackedSpectrum, albedo, KERNEL_FEATURE_SUBSURFACE) +KERNEL_STRUCT_MEMBER(subsurface, PackedSpectrum, radius, KERNEL_FEATURE_SUBSURFACE) KERNEL_STRUCT_MEMBER(subsurface, float, anisotropy, KERNEL_FEATURE_SUBSURFACE) KERNEL_STRUCT_MEMBER(subsurface, packed_float3, Ng, KERNEL_FEATURE_SUBSURFACE) KERNEL_STRUCT_END(subsurface) diff --git a/intern/cycles/kernel/integrator/state_util.h b/intern/cycles/kernel/integrator/state_util.h index 280db2d1aac..168122d3a78 100644 --- a/intern/cycles/kernel/integrator/state_util.h +++ b/intern/cycles/kernel/integrator/state_util.h @@ -17,7 +17,8 @@ ccl_device_forceinline void integrator_state_write_ray(KernelGlobals kg, { INTEGRATOR_STATE_WRITE(state, ray, P) = ray->P; INTEGRATOR_STATE_WRITE(state, ray, D) = ray->D; - INTEGRATOR_STATE_WRITE(state, ray, t) = ray->t; + INTEGRATOR_STATE_WRITE(state, ray, tmin) = ray->tmin; + INTEGRATOR_STATE_WRITE(state, ray, tmax) = ray->tmax; INTEGRATOR_STATE_WRITE(state, ray, time) = ray->time; INTEGRATOR_STATE_WRITE(state, ray, dP) = ray->dP; INTEGRATOR_STATE_WRITE(state, ray, dD) = ray->dD; @@ -29,7 +30,8 @@ ccl_device_forceinline void integrator_state_read_ray(KernelGlobals kg, { ray->P = INTEGRATOR_STATE(state, ray, P); ray->D = INTEGRATOR_STATE(state, ray, D); - ray->t = INTEGRATOR_STATE(state, ray, t); + ray->tmin = INTEGRATOR_STATE(state, ray, tmin); + ray->tmax = INTEGRATOR_STATE(state, ray, tmax); ray->time = INTEGRATOR_STATE(state, ray, time); ray->dP = INTEGRATOR_STATE(state, ray, dP); ray->dD = INTEGRATOR_STATE(state, ray, dD); @@ -42,7 +44,8 @@ ccl_device_forceinline void integrator_state_write_shadow_ray( { INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray->P; INTEGRATOR_STATE_WRITE(state, shadow_ray, D) = ray->D; - INTEGRATOR_STATE_WRITE(state, shadow_ray, t) = ray->t; + INTEGRATOR_STATE_WRITE(state, shadow_ray, tmin) = ray->tmin; + INTEGRATOR_STATE_WRITE(state, shadow_ray, tmax) = ray->tmax; INTEGRATOR_STATE_WRITE(state, shadow_ray, time) = ray->time; INTEGRATOR_STATE_WRITE(state, shadow_ray, dP) = ray->dP; } @@ -53,7 +56,8 @@ ccl_device_forceinline void integrator_state_read_shadow_ray(KernelGlobals kg, { ray->P = INTEGRATOR_STATE(state, shadow_ray, P); ray->D = INTEGRATOR_STATE(state, shadow_ray, D); - ray->t = INTEGRATOR_STATE(state, shadow_ray, t); + ray->tmin = INTEGRATOR_STATE(state, shadow_ray, tmin); + ray->tmax = INTEGRATOR_STATE(state, shadow_ray, tmax); ray->time = INTEGRATOR_STATE(state, shadow_ray, time); ray->dP = INTEGRATOR_STATE(state, shadow_ray, dP); ray->dD = differential_zero_compact(); @@ -334,7 +338,7 @@ ccl_device_inline IntegratorState integrator_state_shadow_catcher_split(KernelGl return to_state; } -#ifdef __KERNEL_CPU__ +#ifndef __KERNEL_GPU__ ccl_device_inline int integrator_state_bounce(ConstIntegratorState state, const int) { return INTEGRATOR_STATE(state, path, bounce); diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h index b449f807290..15c2cb1c708 100644 --- a/intern/cycles/kernel/integrator/subsurface.h +++ b/intern/cycles/kernel/integrator/subsurface.h @@ -15,9 +15,9 @@ #include "kernel/integrator/intersect_volume_stack.h" #include "kernel/integrator/path_state.h" -#include "kernel/integrator/shader_eval.h" #include "kernel/integrator/subsurface_disk.h" #include "kernel/integrator/subsurface_random_walk.h" +#include "kernel/integrator/surface_shader.h" CCL_NAMESPACE_BEGIN @@ -38,7 +38,8 @@ ccl_device int subsurface_bounce(KernelGlobals kg, /* Setup ray into surface. */ INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P; INTEGRATOR_STATE_WRITE(state, ray, D) = bssrdf->N; - INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX; + INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f; + INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX; INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP); INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact(); @@ -50,12 +51,10 @@ ccl_device int subsurface_bounce(KernelGlobals kg, PATH_RAY_SUBSURFACE_RANDOM_WALK); /* Compute weight, optionally including Fresnel from entry point. */ - float3 weight = shader_bssrdf_sample_weight(sd, sc); -# ifdef __PRINCIPLED__ + Spectrum weight = surface_shader_bssrdf_sample_weight(sd, sc); if (bssrdf->roughness != FLT_MAX) { path_flag |= PATH_RAY_SUBSURFACE_USE_FRESNEL; } -# endif if (sd->flag & SD_BACKFACING) { path_flag |= PATH_RAY_SUBSURFACE_BACKFACING; @@ -69,8 +68,8 @@ ccl_device int subsurface_bounce(KernelGlobals kg, if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) { if (INTEGRATOR_STATE(state, path, bounce) == 0) { - INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_float3(); - INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_float3(); + INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_spectrum(); + INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_spectrum(); } } @@ -90,7 +89,7 @@ ccl_device void subsurface_shader_data_setup(KernelGlobals kg, /* Get bump mapped normal from shader evaluation at exit point. */ float3 N = sd->N; if (sd->flag & SD_HAS_BSSRDF_BUMP) { - N = shader_bssrdf_normal(sd); + N = surface_shader_bssrdf_normal(sd); } /* Setup diffuse BSDF at the exit point. This replaces shader_eval_surface. */ @@ -98,9 +97,8 @@ ccl_device void subsurface_shader_data_setup(KernelGlobals kg, sd->num_closure = 0; sd->num_closure_left = kernel_data.max_closures; - const float3 weight = one_float3(); + const Spectrum weight = one_spectrum(); -# ifdef __PRINCIPLED__ if (path_flag & PATH_RAY_SUBSURFACE_USE_FRESNEL) { ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc( sd, sizeof(PrincipledDiffuseBsdf), weight); @@ -111,9 +109,7 @@ ccl_device void subsurface_shader_data_setup(KernelGlobals kg, sd->flag |= bsdf_principled_diffuse_setup(bsdf, PRINCIPLED_DIFFUSE_LAMBERT_EXIT); } } - else -# endif /* __PRINCIPLED__ */ - { + else { ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc( sd, sizeof(DiffuseBsdf), weight); @@ -147,7 +143,7 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat /* Update volume stack if needed. */ if (kernel_data.integrator.use_volumes) { const int object = ss_isect.hits[0].object; - const int object_flag = kernel_tex_fetch(__object_flag, object); + const int object_flag = kernel_data_fetch(object_flag, object); if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) { float3 P = INTEGRATOR_STATE(state, ray, P); @@ -160,7 +156,7 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat /* Pretend ray is coming from the outside towards the exit point. This ensures * correct front/back facing normals. * TODO: find a more elegant solution? */ - ray.P += ray.D * ray.t * 2.0f; + ray.P += ray.D * ray.tmax * 2.0f; ray.D = -ray.D; integrator_state_write_isect(kg, state, &ss_isect.hits[0]); @@ -170,24 +166,30 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM; const int shader = intersection_get_shader(kg, &ss_isect.hits[0]); - const int shader_flags = kernel_tex_fetch(__shaders, shader).flags; + const int shader_flags = kernel_data_fetch(shaders, shader).flags; const int object_flags = intersection_get_object_flags(kg, &ss_isect.hits[0]); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE); if (use_caustics) { - INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + integrator_path_next_sorted(kg, + state, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); } else if (use_raytrace_kernel) { - INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + integrator_path_next_sorted(kg, + state, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { - INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + integrator_path_next_sorted(kg, + state, + DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader); } diff --git a/intern/cycles/kernel/integrator/subsurface_disk.h b/intern/cycles/kernel/integrator/subsurface_disk.h index 34330671748..a44b6a74d7b 100644 --- a/intern/cycles/kernel/integrator/subsurface_disk.h +++ b/intern/cycles/kernel/integrator/subsurface_disk.h @@ -9,11 +9,11 @@ CCL_NAMESPACE_BEGIN * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf */ -ccl_device_inline float3 subsurface_disk_eval(const float3 radius, float disk_r, float r) +ccl_device_inline Spectrum subsurface_disk_eval(const Spectrum radius, float disk_r, float r) { - const float3 eval = bssrdf_eval(radius, r); + const Spectrum eval = bssrdf_eval(radius, r); const float pdf = bssrdf_pdf(radius, disk_r); - return (pdf > 0.0f) ? eval / pdf : zero_float3(); + return (pdf > 0.0f) ? eval / pdf : zero_spectrum(); } /* Subsurface scattering step, from a point on the surface to other @@ -25,8 +25,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, ccl_private LocalIntersection &ss_isect) { - float disk_u, disk_v; - path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &disk_u, &disk_v); + float2 rand_disk = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_DISK); /* Read shading point info from integrator state. */ const float3 P = INTEGRATOR_STATE(state, ray, P); @@ -37,7 +36,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); /* Read subsurface scattering parameters. */ - const float3 radius = INTEGRATOR_STATE(state, subsurface, radius); + const Spectrum radius = INTEGRATOR_STATE(state, subsurface, radius); /* Pick random axis in local frame and point on disk. */ float3 disk_N, disk_T, disk_B; @@ -46,20 +45,20 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, disk_N = Ng; make_orthonormals(disk_N, &disk_T, &disk_B); - if (disk_v < 0.5f) { + if (rand_disk.y < 0.5f) { pick_pdf_N = 0.5f; pick_pdf_T = 0.25f; pick_pdf_B = 0.25f; - disk_v *= 2.0f; + rand_disk.y *= 2.0f; } - else if (disk_v < 0.75f) { + else if (rand_disk.y < 0.75f) { float3 tmp = disk_N; disk_N = disk_T; disk_T = tmp; pick_pdf_N = 0.25f; pick_pdf_T = 0.5f; pick_pdf_B = 0.25f; - disk_v = (disk_v - 0.5f) * 4.0f; + rand_disk.y = (rand_disk.y - 0.5f) * 4.0f; } else { float3 tmp = disk_N; @@ -68,21 +67,22 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, pick_pdf_N = 0.25f; pick_pdf_T = 0.25f; pick_pdf_B = 0.5f; - disk_v = (disk_v - 0.75f) * 4.0f; + rand_disk.y = (rand_disk.y - 0.75f) * 4.0f; } /* Sample point on disk. */ - float phi = M_2PI_F * disk_v; + float phi = M_2PI_F * rand_disk.y; float disk_height, disk_r; - bssrdf_sample(radius, disk_u, &disk_r, &disk_height); + bssrdf_sample(radius, rand_disk.x, &disk_r, &disk_height); float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B; /* Create ray. */ ray.P = P + disk_N * disk_height + disk_P; ray.D = -disk_N; - ray.t = 2.0f * disk_height; + ray.tmin = 0.0f; + ray.tmax = 2.0f * disk_height; ray.dP = ray_dP; ray.dD = differential_zero_compact(); ray.time = time; @@ -107,13 +107,13 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, * traversal algorithm. */ sort_intersections_and_normals(ss_isect.hits, ss_isect.Ng, num_eval_hits); - float3 weights[BSSRDF_MAX_HITS]; /* TODO: zero? */ + Spectrum weights[BSSRDF_MAX_HITS]; /* TODO: zero? */ float sum_weights = 0.0f; for (int hit = 0; hit < num_eval_hits; hit++) { /* Get geometric normal. */ const int object = ss_isect.hits[hit].object; - const int object_flag = kernel_tex_fetch(__object_flag, object); + const int object_flag = kernel_data_fetch(object_flag, object); float3 hit_Ng = ss_isect.Ng[hit]; if (path_flag & PATH_RAY_SUBSURFACE_BACKFACING) { hit_Ng = -hit_Ng; @@ -125,17 +125,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { /* Transform normal to world space. */ Transform itfm; - Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm); + object_fetch_transform_motion_test(kg, object, time, &itfm); hit_Ng = normalize(transform_direction_transposed(&itfm, hit_Ng)); - - /* Transform t to world space, except for OptiX and MetalRT where it already is. */ -#ifdef __KERNEL_GPU_RAYTRACING__ - (void)tfm; -#else - float3 D = transform_direction(&itfm, ray.D); - D = normalize(D) * ss_isect.hits[hit].t; - ss_isect.hits[hit].t = len(transform_direction(&tfm, D)); -#endif } /* Quickly retrieve P and Ng without setting up ShaderData. */ @@ -158,7 +149,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, const float r = len(hit_P - P); /* Evaluate profiles. */ - const float3 weight = subsurface_disk_eval(radius, disk_r, r) * w; + const Spectrum weight = subsurface_disk_eval(radius, disk_r, r) * w; /* Store result. */ ss_isect.Ng[hit] = hit_Ng; @@ -171,11 +162,12 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, } /* Use importance resampling, sampling one of the hits proportional to weight. */ - const float r = lcg_step_float(&lcg_state) * sum_weights; + const float rand_resample = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_DISK_RESAMPLE); + const float r = rand_resample * sum_weights; float partial_sum = 0.0f; for (int hit = 0; hit < num_eval_hits; hit++) { - const float3 weight = weights[hit]; + const Spectrum weight = weights[hit]; const float sample_weight = average(fabs(weight)); float next_sum = partial_sum + sample_weight; @@ -188,7 +180,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg, ray.P = ray.P + ray.D * ss_isect.hits[hit].t; ray.D = ss_isect.Ng[hit]; - ray.t = 1.0f; + ray.tmin = 0.0f; + ray.tmax = 1.0f; return true; } diff --git a/intern/cycles/kernel/integrator/subsurface_random_walk.h b/intern/cycles/kernel/integrator/subsurface_random_walk.h index b6cd4aae195..a6a59e286c9 100644 --- a/intern/cycles/kernel/integrator/subsurface_random_walk.h +++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h @@ -65,19 +65,20 @@ ccl_device void subsurface_random_walk_remap(const float albedo, *sigma_t = sigma_t_prime / (1.0f - g); } -ccl_device void subsurface_random_walk_coefficients(const float3 albedo, - const float3 radius, +ccl_device void subsurface_random_walk_coefficients(const Spectrum albedo, + const Spectrum radius, const float anisotropy, - ccl_private float3 *sigma_t, - ccl_private float3 *alpha, - ccl_private float3 *throughput) + ccl_private Spectrum *sigma_t, + ccl_private Spectrum *alpha, + ccl_private Spectrum *throughput) { - float sigma_t_x, sigma_t_y, sigma_t_z; - float alpha_x, alpha_y, alpha_z; - - subsurface_random_walk_remap(albedo.x, radius.x, anisotropy, &sigma_t_x, &alpha_x); - subsurface_random_walk_remap(albedo.y, radius.y, anisotropy, &sigma_t_y, &alpha_y); - subsurface_random_walk_remap(albedo.z, radius.z, anisotropy, &sigma_t_z, &alpha_z); + FOREACH_SPECTRUM_CHANNEL (i) { + subsurface_random_walk_remap(GET_SPECTRUM_CHANNEL(albedo, i), + GET_SPECTRUM_CHANNEL(radius, i), + anisotropy, + &GET_SPECTRUM_CHANNEL(*sigma_t, i), + &GET_SPECTRUM_CHANNEL(*alpha, i)); + } /* Throughput already contains closure weight at this point, which includes the * albedo, as well as closure mixing and Fresnel weights. Divide out the albedo @@ -88,21 +89,12 @@ ccl_device void subsurface_random_walk_coefficients(const float3 albedo, * infinite phase functions. To avoid a sharp discontinuity as we go from * such values to 0.0, increase alpha and reduce the throughput to compensate. */ const float min_alpha = 0.2f; - if (alpha_x < min_alpha) { - (*throughput).x *= alpha_x / min_alpha; - alpha_x = min_alpha; - } - if (alpha_y < min_alpha) { - (*throughput).y *= alpha_y / min_alpha; - alpha_y = min_alpha; - } - if (alpha_z < min_alpha) { - (*throughput).z *= alpha_z / min_alpha; - alpha_z = min_alpha; + FOREACH_SPECTRUM_CHANNEL (i) { + if (GET_SPECTRUM_CHANNEL(*alpha, i) < min_alpha) { + GET_SPECTRUM_CHANNEL(*throughput, i) *= GET_SPECTRUM_CHANNEL(*alpha, i) / min_alpha; + GET_SPECTRUM_CHANNEL(*alpha, i) = min_alpha; + } } - - *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z); - *alpha = make_float3(alpha_x, alpha_y, alpha_z); } /* References for Dwivedi sampling: @@ -151,12 +143,12 @@ ccl_device_forceinline float3 direction_from_cosine(float3 D, float cos_theta, f return dir.x * T + dir.y * B + dir.z * D; } -ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t, - float t, - bool hit, - ccl_private float3 *transmittance) +ccl_device_forceinline Spectrum subsurface_random_walk_pdf(Spectrum sigma_t, + float t, + bool hit, + ccl_private Spectrum *transmittance) { - float3 T = volume_color_transmittance(sigma_t, t); + Spectrum T = volume_color_transmittance(sigma_t, t); if (transmittance) { *transmittance = T; } @@ -173,8 +165,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, ccl_private Ray &ray, ccl_private LocalIntersection &ss_isect) { - float bssrdf_u, bssrdf_v; - path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + const float2 rand_bsdf = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_BSDF); const float3 P = INTEGRATOR_STATE(state, ray, P); const float3 N = INTEGRATOR_STATE(state, ray, D); @@ -187,7 +178,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, /* Sample diffuse surface scatter into the object. */ float3 D; float pdf; - sample_cos_hemisphere(-N, bssrdf_u, bssrdf_v, &D, &pdf); + sample_cos_hemisphere(-N, rand_bsdf.x, rand_bsdf.y, &D, &pdf); if (dot(-Ng, D) <= 0.0f) { return false; } @@ -195,7 +186,8 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, /* Setup ray. */ ray.P = P; ray.D = D; - ray.t = FLT_MAX; + ray.tmin = 0.0f; + ray.tmax = FLT_MAX; ray.time = time; ray.dP = ray_dP; ray.dD = differential_zero_compact(); @@ -204,22 +196,16 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, ray.self.light_object = OBJECT_NONE; ray.self.light_prim = PRIM_NONE; -#ifndef __KERNEL_GPU_RAYTRACING__ - /* Compute or fetch object transforms. */ - Transform ob_itfm ccl_optional_struct_init; - Transform ob_tfm = object_fetch_transform_motion_test(kg, object, time, &ob_itfm); -#endif - /* Convert subsurface to volume coefficients. * The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */ - const float3 albedo = INTEGRATOR_STATE(state, subsurface, albedo); - const float3 radius = INTEGRATOR_STATE(state, subsurface, radius); + const Spectrum albedo = INTEGRATOR_STATE(state, subsurface, albedo); + const Spectrum radius = INTEGRATOR_STATE(state, subsurface, radius); const float anisotropy = INTEGRATOR_STATE(state, subsurface, anisotropy); - float3 sigma_t, alpha; - float3 throughput = INTEGRATOR_STATE_WRITE(state, path, throughput); + Spectrum sigma_t, alpha; + Spectrum throughput = INTEGRATOR_STATE_WRITE(state, path, throughput); subsurface_random_walk_coefficients(albedo, radius, anisotropy, &sigma_t, &alpha, &throughput); - float3 sigma_s = sigma_t * alpha; + Spectrum sigma_s = sigma_t * alpha; /* Theoretically it should be better to use the exact alpha for the channel we're sampling at * each bounce, but in practice there doesn't seem to be a noticeable difference in exchange @@ -229,7 +215,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, * Since the strength of the guided sampling increases as alpha gets lower, using a value that * is too low results in fireflies while one that's too high just gives a bit more noise. * Therefore, the code here uses the highest of the three albedos to be safe. */ - const float diffusion_length = diffusion_length_dwivedi(max3(alpha)); + const float diffusion_length = diffusion_length_dwivedi(reduce_max(alpha)); if (diffusion_length == 1.0f) { /* With specific values of alpha the length might become 1, which in asymptotic makes phase to @@ -242,7 +228,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, const float phase_log = logf((diffusion_length + 1.0f) / (diffusion_length - 1.0f)); /* Modify state for RNGs, decorrelated from other paths. */ - rng_state.rng_hash = cmj_hash(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef); + rng_state.rng_hash = hash_hp_seeded_uint(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef); /* Random walk until we hit the surface again. */ bool hit = false; @@ -254,10 +240,10 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, const float guided_fraction = 1.0f - fmaxf(0.5f, powf(fabsf(anisotropy), 0.125f)); #ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL - float3 sigma_s_star = sigma_s * (1.0f - anisotropy); - float3 sigma_t_star = sigma_t - sigma_s + sigma_s_star; - float3 sigma_t_org = sigma_t; - float3 sigma_s_org = sigma_s; + Spectrum sigma_s_star = sigma_s * (1.0f - anisotropy); + Spectrum sigma_t_star = sigma_t - sigma_s + sigma_s_star; + Spectrum sigma_t_org = sigma_t; + Spectrum sigma_s_org = sigma_s; const float anisotropy_org = anisotropy; const float guided_fraction_org = guided_fraction; #endif @@ -269,7 +255,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, #ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL // shadow with local variables according to depth float anisotropy, guided_fraction; - float3 sigma_s, sigma_t; + Spectrum sigma_s, sigma_t; if (bounce <= SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL) { anisotropy = anisotropy_org; guided_fraction = guided_fraction_org; @@ -285,11 +271,11 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, #endif /* Sample color channel, use MIS with balance heuristic. */ - float rphase = path_state_rng_1D(kg, &rng_state, PRNG_PHASE_CHANNEL); - float3 channel_pdf; + float rphase = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_PHASE_CHANNEL); + Spectrum channel_pdf; int channel = volume_sample_channel(alpha, throughput, rphase, &channel_pdf); float sample_sigma_t = volume_channel_get(sigma_t, channel); - float randt = path_state_rng_1D(kg, &rng_state, PRNG_SCATTER_DISTANCE); + float randt = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_SCATTER_DISTANCE); /* We need the result of the ray-cast to compute the full guided PDF, so just remember the * relevant terms to avoid recomputing them later. */ @@ -302,7 +288,8 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, /* For the initial ray, we already know the direction, so just do classic distance sampling. */ if (bounce > 0) { /* Decide whether we should use guided or classic sampling. */ - bool guided = (path_state_rng_1D(kg, &rng_state, PRNG_LIGHT_TERMINATE) < guided_fraction); + bool guided = (path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_GUIDE_STRATEGY) < + guided_fraction); /* Determine if we want to sample away from the incoming interface. * This only happens if we found a nearby opposite interface, and the probability for it @@ -316,27 +303,28 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, float x = clamp(dot(ray.P - P, -N), 0.0f, opposite_distance); backward_fraction = 1.0f / (1.0f + expf((opposite_distance - 2.0f * x) / diffusion_length)); - guide_backward = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE) < backward_fraction; + guide_backward = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_GUIDE_DIRECTION) < + backward_fraction; } /* Sample scattering direction. */ - float scatter_u, scatter_v; - path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &scatter_u, &scatter_v); + const float2 rand_scatter = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_BSDF); float cos_theta; float hg_pdf; if (guided) { - cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, scatter_u); + cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, rand_scatter.x); /* The backwards guiding distribution is just mirrored along `sd->N`, so swapping the * sign here is enough to sample from that instead. */ if (guide_backward) { cos_theta = -cos_theta; } - float3 newD = direction_from_cosine(N, cos_theta, scatter_v); + float3 newD = direction_from_cosine(N, cos_theta, rand_scatter.y); hg_pdf = single_peaked_henyey_greenstein(dot(ray.D, newD), anisotropy); ray.D = newD; } else { - float3 newD = henyey_greenstrein_sample(ray.D, anisotropy, scatter_u, scatter_v, &hg_pdf); + float3 newD = henyey_greenstrein_sample( + ray.D, anisotropy, rand_scatter.x, rand_scatter.y, &hg_pdf); cos_theta = dot(newD, N); ray.D = newD; } @@ -370,10 +358,10 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, * chance of connecting to it. * TODO: Maybe use less than 10 times the mean free path? */ if (bounce == 0) { - ray.t = max(t, 10.0f / (min3(sigma_t))); + ray.tmax = max(t, 10.0f / (reduce_min(sigma_t))); } else { - ray.t = t; + ray.tmax = t; /* After the first bounce the object can intersect the same surface again */ ray.self.object = OBJECT_NONE; ray.self.prim = PRIM_NONE; @@ -382,46 +370,39 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, hit = (ss_isect.num_hits > 0); if (hit) { -#ifdef __KERNEL_GPU_RAYTRACING__ - /* t is always in world space with OptiX and MetalRT. */ - ray.t = ss_isect.hits[0].t; -#else - /* Compute world space distance to surface hit. */ - float3 D = transform_direction(&ob_itfm, ray.D); - D = normalize(D) * ss_isect.hits[0].t; - ray.t = len(transform_direction(&ob_tfm, D)); -#endif + ray.tmax = ss_isect.hits[0].t; } if (bounce == 0) { /* Check if we hit the opposite side. */ if (hit) { have_opposite_interface = true; - opposite_distance = dot(ray.P + ray.t * ray.D - P, -N); + opposite_distance = dot(ray.P + ray.tmax * ray.D - P, -N); } /* Apart from the opposite side check, we were supposed to only trace up to distance t, * so check if there would have been a hit in that case. */ - hit = ray.t < t; + hit = ray.tmax < t; } /* Use the distance to the exit point for the throughput update if we found one. */ if (hit) { - t = ray.t; + t = ray.tmax; } /* Advance to new scatter location. */ ray.P += t * ray.D; - float3 transmittance; - float3 pdf = subsurface_random_walk_pdf(sigma_t, t, hit, &transmittance); + Spectrum transmittance; + Spectrum pdf = subsurface_random_walk_pdf(sigma_t, t, hit, &transmittance); if (bounce > 0) { /* Compute PDF just like we do for classic sampling, but with the stretched sigma_t. */ - float3 guided_pdf = subsurface_random_walk_pdf(forward_stretching * sigma_t, t, hit, NULL); + Spectrum guided_pdf = subsurface_random_walk_pdf(forward_stretching * sigma_t, t, hit, NULL); if (have_opposite_interface) { /* First step of MIS: Depending on geometry we might have two methods for guided * sampling, so perform MIS between them. */ - float3 back_pdf = subsurface_random_walk_pdf(backward_stretching * sigma_t, t, hit, NULL); + Spectrum back_pdf = subsurface_random_walk_pdf( + backward_stretching * sigma_t, t, hit, NULL); guided_pdf = mix( guided_pdf * forward_pdf_factor, back_pdf * backward_pdf_factor, backward_fraction); } @@ -443,16 +424,14 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg, /* If we hit the surface, we are done. */ break; } - else if (throughput.x < VOLUME_THROUGHPUT_EPSILON && - throughput.y < VOLUME_THROUGHPUT_EPSILON && - throughput.z < VOLUME_THROUGHPUT_EPSILON) { + else if (reduce_max(throughput) < VOLUME_THROUGHPUT_EPSILON) { /* Avoid unnecessary work and precision issue when throughput gets really small. */ break; } } if (hit) { - kernel_assert(isfinite3_safe(throughput)); + kernel_assert(isfinite_safe(throughput)); INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput; } diff --git a/intern/cycles/kernel/integrator/surface_shader.h b/intern/cycles/kernel/integrator/surface_shader.h new file mode 100644 index 00000000000..f40ff3c33ee --- /dev/null +++ b/intern/cycles/kernel/integrator/surface_shader.h @@ -0,0 +1,587 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +/* Functions to evaluate shaders. */ + +#pragma once + +#include "kernel/closure/alloc.h" +#include "kernel/closure/bsdf.h" +#include "kernel/closure/bsdf_util.h" +#include "kernel/closure/emissive.h" + +#include "kernel/svm/svm.h" + +#ifdef __OSL__ +# include "kernel/osl/shader.h" +#endif + +CCL_NAMESPACE_BEGIN + +ccl_device_inline void surface_shader_prepare_closures(KernelGlobals kg, + ConstIntegratorState state, + ccl_private ShaderData *sd, + const uint32_t path_flag) +{ + /* Filter out closures. */ + if (kernel_data.integrator.filter_closures) { + if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_EMISSION) { + sd->closure_emission_background = zero_spectrum(); + } + + if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIRECT_LIGHT) { + sd->flag &= ~SD_BSDF_HAS_EVAL; + } + + if (path_flag & PATH_RAY_CAMERA) { + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + + if ((CLOSURE_IS_BSDF_DIFFUSE(sc->type) && + (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIFFUSE)) || + (CLOSURE_IS_BSDF_GLOSSY(sc->type) && + (kernel_data.integrator.filter_closures & FILTER_CLOSURE_GLOSSY)) || + (CLOSURE_IS_BSDF_TRANSMISSION(sc->type) && + (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSMISSION))) { + sc->type = CLOSURE_NONE_ID; + sc->sample_weight = 0.0f; + } + else if ((CLOSURE_IS_BSDF_TRANSPARENT(sc->type) && + (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSPARENT))) { + sc->type = CLOSURE_HOLDOUT_ID; + sc->sample_weight = 0.0f; + sd->flag |= SD_HOLDOUT; + } + } + } + } + + /* Defensive sampling. + * + * We can likely also do defensive sampling at deeper bounces, particularly + * for cases like a perfect mirror but possibly also others. This will need + * a good heuristic. */ + if (INTEGRATOR_STATE(state, path, bounce) + INTEGRATOR_STATE(state, path, transparent_bounce) == + 0 && + sd->num_closure > 1) { + float sum = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sum += sc->sample_weight; + } + } + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sc->sample_weight = max(sc->sample_weight, 0.125f * sum); + } + } + } + + /* Filter glossy. + * + * Blurring of bsdf after bounces, for rays that have a small likelihood + * of following this particular path (diffuse, rough glossy) */ + if (kernel_data.integrator.filter_glossy != FLT_MAX +#ifdef __MNEE__ + && !(INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_VALID) +#endif + ) { + float blur_pdf = kernel_data.integrator.filter_glossy * + INTEGRATOR_STATE(state, path, min_ray_pdf); + + if (blur_pdf < 1.0f) { + float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF(sc->type)) { + bsdf_blur(kg, sc, blur_roughness); + } + } + } + } +} + +/* BSDF */ + +ccl_device_inline bool surface_shader_is_transmission(ccl_private const ShaderData *sd, + const float3 omega_in) +{ + return dot(sd->N, omega_in) < 0.0f; +} + +ccl_device_forceinline bool _surface_shader_exclude(ClosureType type, uint light_shader_flags) +{ + if (!(light_shader_flags & SHADER_EXCLUDE_ANY)) { + return false; + } + if (light_shader_flags & SHADER_EXCLUDE_DIFFUSE) { + if (CLOSURE_IS_BSDF_DIFFUSE(type)) { + return true; + } + } + if (light_shader_flags & SHADER_EXCLUDE_GLOSSY) { + if (CLOSURE_IS_BSDF_GLOSSY(type)) { + return true; + } + } + if (light_shader_flags & SHADER_EXCLUDE_TRANSMIT) { + if (CLOSURE_IS_BSDF_TRANSMISSION(type)) { + return true; + } + } + return false; +} + +ccl_device_inline float _surface_shader_bsdf_eval_mis(KernelGlobals kg, + ccl_private ShaderData *sd, + const float3 omega_in, + const bool is_transmission, + ccl_private const ShaderClosure *skip_sc, + ccl_private BsdfEval *result_eval, + float sum_pdf, + float sum_sample_weight, + const uint light_shader_flags) +{ + /* This is the veach one-sample model with balance heuristic, + * some PDF factors drop out when using balance heuristic weighting. */ + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (sc == skip_sc) { + continue; + } + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + if (CLOSURE_IS_BSDF(sc->type) && !_surface_shader_exclude(sc->type, light_shader_flags)) { + float bsdf_pdf = 0.0f; + Spectrum eval = bsdf_eval(kg, sd, sc, omega_in, is_transmission, &bsdf_pdf); + + if (bsdf_pdf != 0.0f) { + bsdf_eval_accum(result_eval, sc->type, eval * sc->weight); + sum_pdf += bsdf_pdf * sc->sample_weight; + } + } + + sum_sample_weight += sc->sample_weight; + } + } + + return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; +} + +#ifndef __KERNEL_CUDA__ +ccl_device +#else +ccl_device_inline +#endif + float + surface_shader_bsdf_eval(KernelGlobals kg, + ccl_private ShaderData *sd, + const float3 omega_in, + const bool is_transmission, + ccl_private BsdfEval *bsdf_eval, + const uint light_shader_flags) +{ + bsdf_eval_init(bsdf_eval, CLOSURE_NONE_ID, zero_spectrum()); + + return _surface_shader_bsdf_eval_mis( + kg, sd, omega_in, is_transmission, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags); +} + +/* Randomly sample a BSSRDF or BSDF proportional to ShaderClosure.sample_weight. */ +ccl_device_inline ccl_private const ShaderClosure *surface_shader_bsdf_bssrdf_pick( + ccl_private const ShaderData *ccl_restrict sd, ccl_private float2 *rand_bsdf) +{ + int sampled = 0; + + if (sd->num_closure > 1) { + /* Pick a BSDF or based on sample weights. */ + float sum = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sum += sc->sample_weight; + } + } + + float r = (*rand_bsdf).x * sum; + float partial_sum = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + float next_sum = partial_sum + sc->sample_weight; + + if (r < next_sum) { + sampled = i; + + /* Rescale to reuse for direction sample, to better preserve stratification. */ + (*rand_bsdf).x = (r - partial_sum) / sc->sample_weight; + break; + } + + partial_sum = next_sum; + } + } + } + + return &sd->closure[sampled]; +} + +/* Return weight for picked BSSRDF. */ +ccl_device_inline Spectrum +surface_shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd, + ccl_private const ShaderClosure *ccl_restrict bssrdf_sc) +{ + Spectrum weight = bssrdf_sc->weight; + + if (sd->num_closure > 1) { + float sum = 0.0f; + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sum += sc->sample_weight; + } + } + weight *= sum / bssrdf_sc->sample_weight; + } + + return weight; +} + +/* Sample direction for picked BSDF, and return evaluation and pdf for all + * BSDFs combined using MIS. */ +ccl_device int surface_shader_bsdf_sample_closure(KernelGlobals kg, + ccl_private ShaderData *sd, + ccl_private const ShaderClosure *sc, + const float2 rand_bsdf, + ccl_private BsdfEval *bsdf_eval, + ccl_private float3 *omega_in, + ccl_private float *pdf) +{ + /* BSSRDF should already have been handled elsewhere. */ + kernel_assert(CLOSURE_IS_BSDF(sc->type)); + + int label; + Spectrum eval = zero_spectrum(); + + *pdf = 0.0f; + label = bsdf_sample(kg, sd, sc, rand_bsdf.x, rand_bsdf.y, &eval, omega_in, pdf); + + if (*pdf != 0.0f) { + bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight); + + if (sd->num_closure > 1) { + const bool is_transmission = surface_shader_is_transmission(sd, *omega_in); + float sweight = sc->sample_weight; + *pdf = _surface_shader_bsdf_eval_mis( + kg, sd, *omega_in, is_transmission, sc, bsdf_eval, *pdf * sweight, sweight, 0); + } + } + + return label; +} + +ccl_device float surface_shader_average_roughness(ccl_private const ShaderData *sd) +{ + float roughness = 0.0f; + float sum_weight = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF(sc->type)) { + /* sqrt once to undo the squaring from multiplying roughness on the + * two axes, and once for the squared roughness convention. */ + float weight = fabsf(average(sc->weight)); + roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc))); + sum_weight += weight; + } + } + + return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f; +} + +ccl_device Spectrum surface_shader_transparency(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + if (sd->flag & SD_HAS_ONLY_VOLUME) { + return one_spectrum(); + } + else if (sd->flag & SD_TRANSPARENT) { + return sd->closure_transparent_extinction; + } + else { + return zero_spectrum(); + } +} + +ccl_device void surface_shader_disable_transparency(KernelGlobals kg, ccl_private ShaderData *sd) +{ + if (sd->flag & SD_TRANSPARENT) { + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + + if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) { + sc->sample_weight = 0.0f; + sc->weight = zero_spectrum(); + } + } + + sd->flag &= ~SD_TRANSPARENT; + } +} + +ccl_device Spectrum surface_shader_alpha(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + Spectrum alpha = one_spectrum() - surface_shader_transparency(kg, sd); + + alpha = saturate(alpha); + + return alpha; +} + +ccl_device Spectrum surface_shader_diffuse(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + Spectrum eval = zero_spectrum(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type)) + eval += sc->weight; + } + + return eval; +} + +ccl_device Spectrum surface_shader_glossy(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + Spectrum eval = zero_spectrum(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_GLOSSY(sc->type)) + eval += sc->weight; + } + + return eval; +} + +ccl_device Spectrum surface_shader_transmission(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + Spectrum eval = zero_spectrum(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type)) + eval += sc->weight; + } + + return eval; +} + +ccl_device float3 surface_shader_average_normal(KernelGlobals kg, ccl_private const ShaderData *sd) +{ + float3 N = zero_float3(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) + N += sc->N * fabsf(average(sc->weight)); + } + + return (is_zero(N)) ? sd->N : normalize(N); +} + +ccl_device Spectrum surface_shader_ao(KernelGlobals kg, + ccl_private const ShaderData *sd, + const float ao_factor, + ccl_private float3 *N_) +{ + Spectrum eval = zero_spectrum(); + float3 N = zero_float3(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { + ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc; + eval += sc->weight * ao_factor; + N += bsdf->N * fabsf(average(sc->weight)); + } + } + + *N_ = (is_zero(N)) ? sd->N : normalize(N); + return eval; +} + +#ifdef __SUBSURFACE__ +ccl_device float3 surface_shader_bssrdf_normal(ccl_private const ShaderData *sd) +{ + float3 N = zero_float3(); + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSSRDF(sc->type)) { + ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc; + float avg_weight = fabsf(average(sc->weight)); + + N += bssrdf->N * avg_weight; + } + } + + return (is_zero(N)) ? sd->N : normalize(N); +} +#endif /* __SUBSURFACE__ */ + +/* Constant emission optimization */ + +ccl_device bool surface_shader_constant_emission(KernelGlobals kg, + int shader, + ccl_private Spectrum *eval) +{ + int shader_index = shader & SHADER_MASK; + int shader_flag = kernel_data_fetch(shaders, shader_index).flags; + + if (shader_flag & SD_HAS_CONSTANT_EMISSION) { + const float3 emission_rgb = make_float3( + kernel_data_fetch(shaders, shader_index).constant_emission[0], + kernel_data_fetch(shaders, shader_index).constant_emission[1], + kernel_data_fetch(shaders, shader_index).constant_emission[2]); + *eval = rgb_to_spectrum(emission_rgb); + + return true; + } + + return false; +} + +/* Background */ + +ccl_device Spectrum surface_shader_background(ccl_private const ShaderData *sd) +{ + if (sd->flag & SD_EMISSION) { + return sd->closure_emission_background; + } + else { + return zero_spectrum(); + } +} + +/* Emission */ + +ccl_device Spectrum surface_shader_emission(ccl_private const ShaderData *sd) +{ + if (sd->flag & SD_EMISSION) { + return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background; + } + else { + return zero_spectrum(); + } +} + +/* Holdout */ + +ccl_device Spectrum surface_shader_apply_holdout(KernelGlobals kg, ccl_private ShaderData *sd) +{ + Spectrum weight = zero_spectrum(); + + /* For objects marked as holdout, preserve transparency and remove all other + * closures, replacing them with a holdout weight. */ + if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { + if ((sd->flag & SD_TRANSPARENT) && !(sd->flag & SD_HAS_ONLY_VOLUME)) { + weight = one_spectrum() - sd->closure_transparent_extinction; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sc = &sd->closure[i]; + if (!CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { + sc->type = NBUILTIN_CLOSURES; + } + } + + sd->flag &= ~(SD_CLOSURE_FLAGS - (SD_TRANSPARENT | SD_BSDF)); + } + else { + weight = one_spectrum(); + } + } + else { + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_HOLDOUT(sc->type)) { + weight += sc->weight; + } + } + } + + return weight; +} + +/* Surface Evaluation */ + +template<uint node_feature_mask, typename ConstIntegratorGenericState> +ccl_device void surface_shader_eval(KernelGlobals kg, + ConstIntegratorGenericState state, + ccl_private ShaderData *ccl_restrict sd, + ccl_global float *ccl_restrict buffer, + uint32_t path_flag, + bool use_caustics_storage = false) +{ + /* If path is being terminated, we are tracing a shadow ray or evaluating + * emission, then we don't need to store closures. The emission and shadow + * shader data also do not have a closure array to save GPU memory. */ + int max_closures; + if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { + max_closures = 0; + } + else { + max_closures = use_caustics_storage ? CAUSTICS_MAX_CLOSURE : kernel_data.max_closures; + } + + sd->num_closure = 0; + sd->num_closure_left = max_closures; + +#ifdef __OSL__ + if (kg->osl) { + if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) { + OSLShader::eval_background(kg, state, sd, path_flag); + } + else { + OSLShader::eval_surface(kg, state, sd, path_flag); + } + } + else +#endif + { +#ifdef __SVM__ + svm_eval_nodes<node_feature_mask, SHADER_TYPE_SURFACE>(kg, state, sd, buffer, path_flag); +#else + if (sd->object == OBJECT_NONE) { + sd->closure_emission_background = make_spectrum(0.8f); + sd->flag |= SD_EMISSION; + } + else { + ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc( + sd, sizeof(DiffuseBsdf), make_spectrum(0.8f)); + if (bsdf != NULL) { + bsdf->N = sd->N; + sd->flag |= bsdf_diffuse_setup(bsdf); + } + } +#endif + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/volume_shader.h b/intern/cycles/kernel/integrator/volume_shader.h new file mode 100644 index 00000000000..a1d191e2d32 --- /dev/null +++ b/intern/cycles/kernel/integrator/volume_shader.h @@ -0,0 +1,353 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +/* Volume shader evaluation and sampling. */ + +#pragma once + +#include "kernel/closure/alloc.h" +#include "kernel/closure/bsdf.h" +#include "kernel/closure/bsdf_util.h" +#include "kernel/closure/emissive.h" + +#include "kernel/svm/svm.h" + +#ifdef __OSL__ +# include "kernel/osl/shader.h" +#endif + +CCL_NAMESPACE_BEGIN + +#ifdef __VOLUME__ + +/* Merging */ +ccl_device_inline void volume_shader_merge_closures(ccl_private ShaderData *sd) +{ + /* Merge identical closures to save closure space with stacked volumes. */ + for (int i = 0; i < sd->num_closure; i++) { + ccl_private ShaderClosure *sci = &sd->closure[i]; + + if (sci->type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) { + continue; + } + + for (int j = i + 1; j < sd->num_closure; j++) { + ccl_private ShaderClosure *scj = &sd->closure[j]; + if (sci->type != scj->type) { + continue; + } + + ccl_private const HenyeyGreensteinVolume *hgi = (ccl_private const HenyeyGreensteinVolume *) + sci; + ccl_private const HenyeyGreensteinVolume *hgj = (ccl_private const HenyeyGreensteinVolume *) + scj; + if (!(hgi->g == hgj->g)) { + continue; + } + + sci->weight += scj->weight; + sci->sample_weight += scj->sample_weight; + + int size = sd->num_closure - (j + 1); + if (size > 0) { + for (int k = 0; k < size; k++) { + scj[k] = scj[k + 1]; + } + } + + sd->num_closure--; + kernel_assert(sd->num_closure >= 0); + j--; + } + } +} + +ccl_device_inline void volume_shader_copy_phases(ccl_private ShaderVolumePhases *ccl_restrict + phases, + ccl_private const ShaderData *ccl_restrict sd) +{ + phases->num_closure = 0; + + for (int i = 0; i < sd->num_closure; i++) { + ccl_private const ShaderClosure *from_sc = &sd->closure[i]; + ccl_private const HenyeyGreensteinVolume *from_hg = + (ccl_private const HenyeyGreensteinVolume *)from_sc; + + if (from_sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) { + ccl_private ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure]; + + to_sc->weight = from_sc->weight; + to_sc->sample_weight = from_sc->sample_weight; + to_sc->g = from_hg->g; + phases->num_closure++; + if (phases->num_closure >= MAX_VOLUME_CLOSURE) { + break; + } + } + } +} + +ccl_device_inline float _volume_shader_phase_eval_mis(ccl_private const ShaderData *sd, + ccl_private const ShaderVolumePhases *phases, + const float3 omega_in, + int skip_phase, + ccl_private BsdfEval *result_eval, + float sum_pdf, + float sum_sample_weight) +{ + for (int i = 0; i < phases->num_closure; i++) { + if (i == skip_phase) + continue; + + ccl_private const ShaderVolumeClosure *svc = &phases->closure[i]; + float phase_pdf = 0.0f; + Spectrum eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf); + + if (phase_pdf != 0.0f) { + bsdf_eval_accum(result_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); + sum_pdf += phase_pdf * svc->sample_weight; + } + + sum_sample_weight += svc->sample_weight; + } + + return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; +} + +ccl_device float volume_shader_phase_eval(KernelGlobals kg, + ccl_private const ShaderData *sd, + ccl_private const ShaderVolumePhases *phases, + const float3 omega_in, + ccl_private BsdfEval *phase_eval) +{ + bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_spectrum()); + + return _volume_shader_phase_eval_mis(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f); +} + +ccl_device int volume_shader_phase_sample(KernelGlobals kg, + ccl_private const ShaderData *sd, + ccl_private const ShaderVolumePhases *phases, + float2 rand_phase, + ccl_private BsdfEval *phase_eval, + ccl_private float3 *omega_in, + ccl_private float *pdf) +{ + int sampled = 0; + + if (phases->num_closure > 1) { + /* pick a phase closure based on sample weights */ + float sum = 0.0f; + + for (sampled = 0; sampled < phases->num_closure; sampled++) { + ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; + sum += svc->sample_weight; + } + + float r = rand_phase.x * sum; + float partial_sum = 0.0f; + + for (sampled = 0; sampled < phases->num_closure; sampled++) { + ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; + float next_sum = partial_sum + svc->sample_weight; + + if (r <= next_sum) { + /* Rescale to reuse for BSDF direction sample. */ + rand_phase.x = (r - partial_sum) / svc->sample_weight; + break; + } + + partial_sum = next_sum; + } + + if (sampled == phases->num_closure) { + *pdf = 0.0f; + return LABEL_NONE; + } + } + + /* todo: this isn't quite correct, we don't weight anisotropy properly + * depending on color channels, even if this is perhaps not a common case */ + ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled]; + int label; + Spectrum eval = zero_spectrum(); + + *pdf = 0.0f; + label = volume_phase_sample(sd, svc, rand_phase.x, rand_phase.y, &eval, omega_in, pdf); + + if (*pdf != 0.0f) { + bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); + } + + return label; +} + +ccl_device int volume_shader_phase_sample_closure(KernelGlobals kg, + ccl_private const ShaderData *sd, + ccl_private const ShaderVolumeClosure *sc, + const float2 rand_phase, + ccl_private BsdfEval *phase_eval, + ccl_private float3 *omega_in, + ccl_private float *pdf) +{ + int label; + Spectrum eval = zero_spectrum(); + + *pdf = 0.0f; + label = volume_phase_sample(sd, sc, rand_phase.x, rand_phase.y, &eval, omega_in, pdf); + + if (*pdf != 0.0f) + bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval); + + return label; +} + +/* Motion Blur */ + +# ifdef __OBJECT_MOTION__ +ccl_device_inline void volume_shader_motion_blur(KernelGlobals kg, + ccl_private ShaderData *ccl_restrict sd) +{ + if ((sd->object_flag & SD_OBJECT_HAS_VOLUME_MOTION) == 0) { + return; + } + + AttributeDescriptor v_desc = find_attribute(kg, sd, ATTR_STD_VOLUME_VELOCITY); + kernel_assert(v_desc.offset != ATTR_STD_NOT_FOUND); + + const float3 P = sd->P; + const float velocity_scale = kernel_data_fetch(objects, sd->object).velocity_scale; + const float time_offset = kernel_data.cam.motion_position == MOTION_POSITION_CENTER ? 0.5f : + 0.0f; + const float time = kernel_data.cam.motion_position == MOTION_POSITION_END ? + (1.0f - kernel_data.cam.shuttertime) + sd->time : + sd->time; + + /* Use a 1st order semi-lagrangian advection scheme to estimate what volume quantity + * existed, or will exist, at the given time: + * + * `phi(x, T) = phi(x - (T - t) * u(x, T), t)` + * + * where + * + * x : position + * T : super-sampled time (or ray time) + * t : current time of the simulation (in rendering we assume this is center frame with + * relative time = 0) + * phi : the volume quantity + * u : the velocity field + * + * But first we need to determine the velocity field `u(x, T)`, which we can estimate also + * using semi-lagrangian advection. + * + * `u(x, T) = u(x - (T - t) * u(x, T), t)` + * + * This is the typical way to model self-advection in fluid dynamics, however, we do not + * account for other forces affecting the velocity during simulation (pressure, buoyancy, + * etc.): this gives a linear interpolation when fluid are mostly "curvy". For better + * results, a higher order interpolation scheme can be used (at the cost of more lookups), + * or an interpolation of the velocity fields for the previous and next frames could also + * be used to estimate `u(x, T)` (which will cost more memory and lookups). + * + * References: + * "Eulerian Motion Blur", Kim and Ko, 2007 + * "Production Volume Rendering", Wreninge et al., 2012 + */ + + /* Find velocity. */ + float3 velocity = primitive_volume_attribute_float3(kg, sd, v_desc); + object_dir_transform(kg, sd, &velocity); + + /* Find advected P. */ + sd->P = P - (time - time_offset) * velocity_scale * velocity; + + /* Find advected velocity. */ + velocity = primitive_volume_attribute_float3(kg, sd, v_desc); + object_dir_transform(kg, sd, &velocity); + + /* Find advected P. */ + sd->P = P - (time - time_offset) * velocity_scale * velocity; +} +# endif + +/* Volume Evaluation */ + +template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState> +ccl_device_inline void volume_shader_eval(KernelGlobals kg, + ConstIntegratorGenericState state, + ccl_private ShaderData *ccl_restrict sd, + const uint32_t path_flag, + StackReadOp stack_read) +{ + /* If path is being terminated, we are tracing a shadow ray or evaluating + * emission, then we don't need to store closures. The emission and shadow + * shader data also do not have a closure array to save GPU memory. */ + int max_closures; + if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { + max_closures = 0; + } + else { + max_closures = kernel_data.max_closures; + } + + /* reset closures once at the start, we will be accumulating the closures + * for all volumes in the stack into a single array of closures */ + sd->num_closure = 0; + sd->num_closure_left = max_closures; + sd->flag = 0; + sd->object_flag = 0; + + for (int i = 0;; i++) { + const VolumeStack entry = stack_read(i); + if (entry.shader == SHADER_NONE) { + break; + } + + /* Setup shader-data from stack. it's mostly setup already in + * shader_setup_from_volume, this switching should be quick. */ + sd->object = entry.object; + sd->lamp = LAMP_NONE; + sd->shader = entry.shader; + + sd->flag &= ~SD_SHADER_FLAGS; + sd->flag |= kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).flags; + sd->object_flag &= ~SD_OBJECT_FLAGS; + + if (sd->object != OBJECT_NONE) { + sd->object_flag |= kernel_data_fetch(object_flag, sd->object); + +# ifdef __OBJECT_MOTION__ + /* todo: this is inefficient for motion blur, we should be + * caching matrices instead of recomputing them each step */ + shader_setup_object_transforms(kg, sd, sd->time); + + volume_shader_motion_blur(kg, sd); +# endif + } + + /* evaluate shader */ +# ifdef __SVM__ +# ifdef __OSL__ + if (kg->osl) { + OSLShader::eval_volume(kg, state, sd, path_flag); + } + else +# endif + { + svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_VOLUME, SHADER_TYPE_VOLUME>( + kg, state, sd, NULL, path_flag); + } +# endif + + /* Merge closures to avoid exceeding number of closures limit. */ + if (!shadow) { + if (i > 0) { + volume_shader_merge_closures(sd); + } + } + } +} + +#endif /* __VOLUME__ */ + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/volume_stack.h b/intern/cycles/kernel/integrator/volume_stack.h index 5256349a0cc..675e1927fc0 100644 --- a/intern/cycles/kernel/integrator/volume_stack.h +++ b/intern/cycles/kernel/integrator/volume_stack.h @@ -39,7 +39,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg, break; } - if (entry.object == sd->object) { + if (entry.object == sd->object && entry.shader == sd->shader) { /* Shift back next stack entries. */ do { entry = stack_read(i + 1); @@ -61,7 +61,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg, } /* Already in the stack? then we have nothing to do. */ - if (entry.object == sd->object) { + if (entry.object == sd->object && entry.shader == sd->shader) { return; } } @@ -133,7 +133,7 @@ ccl_device float volume_stack_step_size(KernelGlobals kg, StackReadOp stack_read break; } - int shader_flag = kernel_tex_fetch(__shaders, (entry.shader & SHADER_MASK)).flags; + int shader_flag = kernel_data_fetch(shaders, (entry.shader & SHADER_MASK)).flags; bool heterogeneous = false; @@ -146,7 +146,7 @@ ccl_device float volume_stack_step_size(KernelGlobals kg, StackReadOp stack_read * heterogeneous volume objects may be using the same shader. */ int object = entry.object; if (object != OBJECT_NONE) { - int object_flag = kernel_tex_fetch(__object_flag, object); + int object_flag = kernel_data_fetch(object_flag, object); if (object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) { heterogeneous = true; } @@ -180,7 +180,7 @@ ccl_device VolumeSampleMethod volume_stack_sample_method(KernelGlobals kg, Integ break; } - int shader_flag = kernel_tex_fetch(__shaders, (entry.shader & SHADER_MASK)).flags; + int shader_flag = kernel_data_fetch(shaders, (entry.shader & SHADER_MASK)).flags; if (shader_flag & SD_VOLUME_MIS) { /* Multiple importance sampling. */ |