diff options
24 files changed, 1483 insertions, 43 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 24cc5735c96..51b3b3d2bcb 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -1012,6 +1012,12 @@ class CyclesLightSettings(bpy.types.PropertyGroup): "note that this will make the light invisible", default=False, ) + is_caustics_light: BoolProperty( + name="Shadow Caustics", + description="Generate approximate caustics in shadows of refractive surfaces. " + "Lights, caster and receiver objects must have shadow caustics options set to enable this", + default=False, + ) @classmethod def register(cls): @@ -1028,6 +1034,12 @@ class CyclesLightSettings(bpy.types.PropertyGroup): class CyclesWorldSettings(bpy.types.PropertyGroup): + is_caustics_light: BoolProperty( + name="Shadow Caustics", + description="Generate approximate caustics in shadows of refractive surfaces. " + "Lights, caster and receiver objects must have shadow caustics options set to enable this", + default=False, + ) sampling_method: EnumProperty( name="Sampling Method", description="How to sample the background light", @@ -1226,6 +1238,21 @@ class CyclesObjectSettings(bpy.types.PropertyGroup): subtype='DISTANCE', ) + is_caustics_caster: BoolProperty( + name="Cast Shadow Caustics", + description="With refractive materials, generate approximate caustics in shadows of this object. " + "Up to 10 bounces inside this object are taken into account. Lights, caster and receiver objects " + "must have shadow caustics options set to enable this", + default=False, + ) + + is_caustics_receiver: BoolProperty( + name="Receive Shadow Caustics", + description="Receive approximate caustics from refractive materials in shadows on this object. " + "Lights, caster and receiver objects must have shadow caustics options set to enable this", + default=False, + ) + @classmethod def register(cls): bpy.types.Object.cycles = PointerProperty( diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 1f50f3da7ae..c86452e0a18 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -1082,7 +1082,7 @@ class CYCLES_OBJECT_PT_shading(CyclesButtonsPanel, Panel): return False ob = context.object - return ob and has_geometry_visibility(ob) + return ob and has_geometry_visibility(ob) and ob.type != 'LIGHT' def draw(self, context): pass @@ -1125,6 +1125,28 @@ class CYCLES_OBJECT_PT_shading_gi_approximation(CyclesButtonsPanel, Panel): col.prop(cob, "ao_distance") +class CYCLES_OBJECT_PT_shading_caustics(CyclesButtonsPanel, Panel): + bl_label = "Caustics" + bl_parent_id = "CYCLES_OBJECT_PT_shading" + bl_context = "object" + + @classmethod + def poll(cls, context): + return CyclesButtonsPanel.poll(context) and not use_metal(context) + + def draw(self, context): + layout = self.layout + layout.use_property_split = True + layout.use_property_decorate = False + + col = layout.column() + + ob = context.object + cob = ob.cycles + col.prop(cob, "is_caustics_caster") + col.prop(cob, "is_caustics_receiver") + + class CYCLES_OBJECT_PT_visibility(CyclesButtonsPanel, Panel): bl_label = "Visibility" bl_context = "object" @@ -1300,6 +1322,8 @@ class CYCLES_LIGHT_PT_light(CyclesButtonsPanel, Panel): sub.active = not (light.type == 'AREA' and clamp.is_portal) sub.prop(clamp, "cast_shadow") sub.prop(clamp, "use_multiple_importance_sampling", text="Multiple Importance") + if not use_metal(context): + sub.prop(clamp, "is_caustics_light", text="Shadow Caustics") if light.type == 'AREA': col.prop(clamp, "is_portal", text="Portal") @@ -1496,6 +1520,8 @@ class CYCLES_WORLD_PT_settings_surface(CyclesButtonsPanel, Panel): subsub.active = cworld.sampling_method == 'MANUAL' subsub.prop(cworld, "sample_map_resolution") sub.prop(cworld, "max_bounces") + sub.prop(cworld, "is_caustics_light", text="Shadow Caustics") + class CYCLES_WORLD_PT_settings_volume(CyclesButtonsPanel, Panel): @@ -2193,6 +2219,7 @@ classes = ( CYCLES_OBJECT_PT_shading, CYCLES_OBJECT_PT_shading_shadow_terminator, CYCLES_OBJECT_PT_shading_gi_approximation, + CYCLES_OBJECT_PT_shading_caustics, CYCLES_OBJECT_PT_visibility, CYCLES_OBJECT_PT_visibility_ray_visibility, CYCLES_OBJECT_PT_visibility_culling, diff --git a/intern/cycles/blender/light.cpp b/intern/cycles/blender/light.cpp index bdfbdfd8618..04799443ceb 100644 --- a/intern/cycles/blender/light.cpp +++ b/intern/cycles/blender/light.cpp @@ -114,6 +114,9 @@ void BlenderSync::sync_light(BL::Object &b_parent, light->set_cast_shadow(get_boolean(clight, "cast_shadow")); light->set_use_mis(get_boolean(clight, "use_multiple_importance_sampling")); + /* caustics light */ + light->set_use_caustics(get_boolean(clight, "is_caustics_light")); + light->set_max_bounces(get_int(clight, "max_bounces")); if (b_ob_info.real_object != b_ob_info.iter_object) { @@ -176,6 +179,9 @@ void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal) /* force enable light again when world is resynced */ light->set_is_enabled(true); + /* caustic light */ + light->set_use_caustics(get_boolean(cworld, "is_caustics_light")); + light->tag_update(scene); light_map.set_recalc(b_world); } diff --git a/intern/cycles/blender/object.cpp b/intern/cycles/blender/object.cpp index 58ea8961845..16a6b3454ed 100644 --- a/intern/cycles/blender/object.cpp +++ b/intern/cycles/blender/object.cpp @@ -298,6 +298,12 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph, } object->set_ao_distance(ao_distance); + bool is_caustics_caster = get_boolean(cobject, "is_caustics_caster"); + object->set_is_caustics_caster(is_caustics_caster); + + bool is_caustics_receiver = get_boolean(cobject, "is_caustics_receiver"); + object->set_is_caustics_receiver(is_caustics_receiver); + /* sync the asset name for Cryptomatte */ BL::Object parent = b_ob.parent(); ustring parent_name; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 6e3ac1bd32f..cfd503a621d 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -223,6 +223,7 @@ set(SRC_KERNEL_INTEGRATOR_HEADERS integrator/intersect_subsurface.h integrator/intersect_volume_stack.h integrator/megakernel.h + integrator/mnee.h integrator/path_state.h integrator/shade_background.h integrator/shade_light.h diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h index b84059d6676..d6047bd2288 100644 --- a/intern/cycles/kernel/integrator/init_from_bake.h +++ b/intern/cycles/kernel/integrator/init_from_bake.h @@ -230,7 +230,11 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, /* Setup next kernel to execute. */ const int shader_index = shader & SHADER_MASK; const int shader_flags = kernel_tex_fetch(__shaders, shader_index).flags; - if (shader_flags & SD_HAS_RAYTRACE) { + const bool use_caustics = kernel_data.integrator.use_caustics && + (object_flag & SD_OBJECT_CAUSTICS); + const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE) || use_caustics; + + if (use_raytrace_kernel) { INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index); } else { diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h index 9fcbf89c579..b8ce625c11b 100644 --- a/intern/cycles/kernel/integrator/intersect_closest.h +++ b/intern/cycles/kernel/integrator/intersect_closest.h @@ -123,7 +123,9 @@ ccl_device_forceinline void integrator_split_shadow_catcher( /* Continue with shading shadow catcher surface. */ const int shader = intersection_get_shader(kg, isect); const int flags = kernel_tex_fetch(__shaders, shader).flags; - const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); + const bool use_caustics = kernel_data.integrator.use_caustics && + (object_flags & SD_OBJECT_CAUSTICS); + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics; if (use_raytrace_kernel) { INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); @@ -145,7 +147,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche const int shader = intersection_get_shader(kg, &isect); const int flags = kernel_tex_fetch(__shaders, shader).flags; - const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); + const int object_flags = intersection_get_object_flags(kg, &isect); + const bool use_caustics = kernel_data.integrator.use_caustics && + (object_flags & SD_OBJECT_CAUSTICS); + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics; if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED( @@ -214,7 +219,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel( const int flags = kernel_tex_fetch(__shaders, shader).flags; if (!integrator_intersect_terminate(kg, state, flags)) { - const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); + const int object_flags = intersection_get_object_flags(kg, isect); + const bool use_caustics = kernel_data.integrator.use_caustics && + (object_flags & SD_OBJECT_CAUSTICS); + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics; if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED( current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); @@ -261,7 +269,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( /* Hit a surface, continue with surface kernel unless terminated. */ const int shader = intersection_get_shader(kg, isect); const int flags = kernel_tex_fetch(__shaders, shader).flags; - const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); + const int object_flags = intersection_get_object_flags(kg, isect); + const bool use_caustics = kernel_data.integrator.use_caustics && + (object_flags & SD_OBJECT_CAUSTICS); + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics; if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED( @@ -328,12 +339,37 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, isect.prim = PRIM_NONE; } + /* Setup mnee flag to signal last intersection with a caster */ + const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); + +#ifdef __MNEE__ + /* Path culling logic for MNEE (removes fireflies at the cost of bias) */ + if (kernel_data.integrator.use_caustics) { + /* The following firefly removal mechanism works by culling light connections when + * a ray comes from a caustic caster directly after bouncing off a different caustic + * receiver */ + bool from_caustic_caster = false; + bool from_caustic_receiver = false; + if (!(path_flag & PATH_RAY_CAMERA) && last_isect_object != OBJECT_NONE) { + const int object_flags = kernel_tex_fetch(__object_flag, last_isect_object); + from_caustic_receiver = (object_flags & SD_OBJECT_CAUSTICS_RECEIVER); + from_caustic_caster = (object_flags & SD_OBJECT_CAUSTICS_CASTER); + } + + bool has_receiver_ancestor = INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_RECEIVER_ANCESTOR; + INTEGRATOR_STATE_WRITE(state, path, mnee) &= ~PATH_MNEE_CULL_LIGHT_CONNECTION; + if (from_caustic_caster && has_receiver_ancestor) + INTEGRATOR_STATE_WRITE(state, path, mnee) |= PATH_MNEE_CULL_LIGHT_CONNECTION; + if (from_caustic_receiver) + INTEGRATOR_STATE_WRITE(state, path, mnee) |= PATH_MNEE_RECEIVER_ANCESTOR; + } +#endif /* __MNEE__ */ + /* Light intersection for MIS. */ if (kernel_data.integrator.use_lamp_mis) { /* NOTE: if we make lights visible to camera rays, we'll need to initialize * these in the path_state_init. */ const int last_type = INTEGRATOR_STATE(state, isect, type); - const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag); hit = lights_intersect( kg, state, &ray, &isect, last_isect_prim, last_isect_object, last_type, path_flag) || hit; diff --git a/intern/cycles/kernel/integrator/mnee.h b/intern/cycles/kernel/integrator/mnee.h new file mode 100644 index 00000000000..af2032c9b99 --- /dev/null +++ b/intern/cycles/kernel/integrator/mnee.h @@ -0,0 +1,1150 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2011-2022 Blender Foundation */ + +#ifdef __MNEE__ + +# include "kernel/light/sample.h" + +/* + * Manifold Next Event Estimation + * + * This code adds manifold next event estimation through refractive surface(s) as a new sampling + * technique for direct lighting, i.e. finding the point on the refractive surface(s) along the + * path to a light sample, which satisfies fermat's principle for a given microfacet normal and + * the path's end points. This technique involves walking on the "specular manifold" using a pseudo + * newton solver. Such a manifold is defined by the specular constraint matrix from the manifold + * exploration framework [2]. For each refractive interface, this constraint is defined by + * enforcing that the generalized half-vector projection onto the interface local tangent plane is + * null. The newton solver guides the walk by linearizing the manifold locally before reprojecting + * the linear solution onto the refractive surface. See paper [1] for more details about + * the technique itself and [3] for the half-vector light transport formulation, from which it is + * derived. + * + * [1] Manifold Next Event Estimation + * Johannes Hanika, Marc Droske, and Luca Fascione. 2015. + * Comput. Graph. Forum 34, 4 (July 2015), 87–97. + * https://jo.dreggn.org/home/2015_mnee.pdf + * + * [2] Manifold exploration: a Markov Chain Monte Carlo technique for rendering scenes with + * difficult specular transport Wenzel Jakob and Steve Marschner. 2012. ACM Trans. Graph. 31, 4, + * Article 58 (July 2012), 13 pages. + * https://www.cs.cornell.edu/projects/manifolds-sg12/ + * + * [3] The Natural-Constraint Representation of the Path Space for Efficient Light Transport + * Simulation Anton S. Kaplanyan, Johannes Hanika, and Carsten Dachsbacher. 2014. ACM Trans. Graph. + * 33, 4, Article 102 (July 2014), 13 pages. + * https://cg.ivd.kit.edu/english/HSLT.php + */ + +# define MNEE_MAX_ITERATIONS 50 +# define MNEE_MAX_INTERSECTION_COUNT 10 +# define MNEE_SOLVER_THRESHOLD 0.001f +# define MNEE_MAX_CAUSTIC_CASTERS 6 +# define MNEE_MIN_DISTANCE 0.001f +# define MNEE_MIN_DETERMINANT 0.0001f +# define MNEE_PROJECTION_DISTANCE_MULTIPLIER 2.f + +CCL_NAMESPACE_BEGIN + +/* Manifold struct containing the local differential geometry quantity */ +typedef ccl_private struct ManifoldVertex { + /* Position and partials */ + float3 p; + float3 dp_du; + float3 dp_dv; + + /* Normal and partials */ + float3 n; + float3 ng; + float3 dn_du; + float3 dn_dv; + + /* geometric info */ + float2 uv; + int object; + int prim; + int shader; + + /* closure info */ + float eta; + ccl_private ShaderClosure *bsdf; + float2 n_offset; + + /* constraint and its derivative matrices */ + float2 constraint; + float4 a; + float4 b; + float4 c; +} ManifoldVertex; + +/* Multiplication of a 2x2 matrix encoded in a row-major order float4 by a vector */ +ccl_device_inline float2 mat22_mult(const float4 a, const float2 b) +{ + return make_float2(a.x * b.x + a.y * b.y, a.z * b.x + a.w * b.y); +} + +/* Multiplication of 2x2 matrices encoded in a row-major order float4 */ +ccl_device_inline float4 mat22_mult(const float4 a, const float4 b) +{ + return make_float4( + a.x * b.x + a.y * b.z, a.x * b.y + a.y * b.w, a.z * b.x + a.w * b.z, a.z * b.y + a.w * b.w); +} + +/* Determinant of a 2x2 matrix encoded in a row-major order float4 */ +ccl_device_inline float mat22_determinant(const float4 m) +{ + return m.x * m.w - m.y * m.z; +} + +/* Inverse of a 2x2 matrix encoded in a row-major order float4 */ +ccl_device_inline float mat22_inverse(const float4 m, ccl_private float4& m_inverse) +{ + float det = mat22_determinant(m); + if (fabsf(det) < MNEE_MIN_DETERMINANT) + return 0.f; + m_inverse = make_float4(m.w, -m.y, -m.z, m.x) / det; + return det; +} + +/* Update light sample */ +ccl_device_forceinline void mnee_update_light_sample(KernelGlobals kg, + const float3 P, + ccl_private LightSample *ls) +{ + /* correct light sample position/direction and pdf + * NOTE: preserve pdf in area measure */ + const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, ls->lamp); + + if (ls->type == LIGHT_POINT || ls->type == LIGHT_SPOT) { + ls->D = normalize_len(ls->P - P, &ls->t); + ls->Ng = -ls->D; + + float2 uv = map_to_sphere(ls->Ng); + ls->u = uv.x; + ls->v = uv.y; + + float invarea = klight->spot.invarea; + ls->eval_fac = (0.25f * M_1_PI_F) * invarea; + ls->pdf = invarea; + + if (ls->type == LIGHT_SPOT) { + /* spot light attenuation */ + float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]); + ls->eval_fac *= spot_light_attenuation( + dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls->Ng); + } + } + else if (ls->type == LIGHT_AREA) { + ls->D = normalize_len(ls->P - P, &ls->t); + ls->pdf = fabsf(klight->area.invarea); + } +} + +/* Compute orthonormal basis + * https://graphics.pixar.com/library/OrthonormalB/paper.pdf */ +ccl_device_forceinline void mnee_make_orthonormals(const float3 n, + ccl_private float3 *dp_du, + ccl_private float3 *dp_dv) +{ + if (n.z < 0.0f) { + const float a = 1.0f / (1.0f - n.z); + const float b = n.x * n.y * a; + *dp_du = make_float3(1.0f - n.x * n.x * a, -b, n.x); + *dp_dv = make_float3(b, n.y * n.y * a - 1.0f, -n.y); + } + else { + const float a = 1.0f / (1.0f + n.z); + const float b = -n.x * n.y * a; + *dp_du = make_float3(1.0f - n.x * n.x * a, b, -n.x); + *dp_dv = make_float3(b, 1.0f - n.y * n.y * a, -n.y); + } +} + +/* Manifold vertex setup from ray and intersection data */ +ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg, + ccl_private ManifoldVertex *vtx, + ccl_private ShaderClosure *bsdf, + const float eta, + const float2 n_offset, + ccl_private const Ray *ray, + ccl_private const Intersection *isect, + ccl_private ShaderData *sd_vtx) +{ + sd_vtx->object = (isect->object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, isect->prim) : + isect->object; + + sd_vtx->type = isect->type; + sd_vtx->flag = 0; + sd_vtx->object_flag = kernel_tex_fetch(__object_flag, sd_vtx->object); + + /* matrices and time */ + shader_setup_object_transforms(kg, sd_vtx, ray->time); + sd_vtx->time = ray->time; + + sd_vtx->prim = isect->prim; + sd_vtx->ray_length = isect->t; + + sd_vtx->u = isect->u; + sd_vtx->v = isect->v; + + sd_vtx->shader = kernel_tex_fetch(__tri_shader, sd_vtx->prim); + + float3 verts[3]; + float3 normals[3]; + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd_vtx->prim); + + if (sd_vtx->type & PRIMITIVE_TRIANGLE) { + /* Static triangle. */ + + /* Load triangle vertices. */ + verts[0] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0); + verts[1] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1); + verts[2] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2); + + /* Vectors. */ + sd_vtx->P = triangle_point_from_uv(kg, sd_vtx, isect->object, isect->prim, isect->u, isect->v); + + /* Smooth normal. */ + if (sd_vtx->shader & SHADER_SMOOTH_NORMAL) { + /* Load triangle vertices. */ + normals[0] = kernel_tex_fetch(__tri_vnormal, tri_vindex.x); + normals[1] = kernel_tex_fetch(__tri_vnormal, tri_vindex.y); + normals[2] = kernel_tex_fetch(__tri_vnormal, tri_vindex.z); + } + } + else { /* if (sd_vtx->type & PRIMITIVE_MOTION_TRIANGLE) */ + /* Motion triangle. */ + + /* Get motion info. */ + int numsteps, numverts; + object_motion_info(kg, sd_vtx->object, &numsteps, &numverts, NULL); + + /* Figure out which steps we need to fetch and their interpolation factor. */ + int maxstep = numsteps * 2; + int step = min((int)(sd_vtx->time * maxstep), maxstep - 1); + float t = sd_vtx->time * maxstep - step; + + /* Find attribute. */ + int offset = intersection_find_attribute(kg, sd_vtx->object, ATTR_STD_MOTION_VERTEX_POSITION); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + + /* Fetch vertex coordinates. */ + float3 next_verts[3]; + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd_vtx->prim); + motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); + motion_triangle_verts_for_step( + kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts); + + /* Interpolate between steps. */ + verts[0] = (1.0f - t) * verts[0] + t * next_verts[0]; + verts[1] = (1.0f - t) * verts[1] + t * next_verts[1]; + verts[2] = (1.0f - t) * verts[2] + t * next_verts[2]; + + /* Compute refined position. */ + sd_vtx->P = motion_triangle_point_from_uv( + kg, sd_vtx, isect->object, isect->prim, isect->u, isect->v, verts); + + /* Compute smooth normal. */ + if (sd_vtx->shader & SHADER_SMOOTH_NORMAL) { + /* Find attribute. */ + int offset = intersection_find_attribute(kg, sd_vtx->object, ATTR_STD_MOTION_VERTEX_NORMAL); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + + /* Fetch vertex coordinates. */ + float3 next_normals[3]; + motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals); + motion_triangle_normals_for_step( + kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals); + + /* Interpolate between steps. */ + normals[0] = (1.0f - t) * normals[0] + t * next_normals[0]; + normals[1] = (1.0f - t) * normals[1] + t * next_normals[1]; + normals[2] = (1.0f - t) * normals[2] + t * next_normals[2]; + } + } + + /* manifold vertex position */ + vtx->p = sd_vtx->P; + + if (!(sd_vtx->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + /* Instance transform. */ + object_position_transform_auto(kg, sd_vtx, &verts[0]); + object_position_transform_auto(kg, sd_vtx, &verts[1]); + object_position_transform_auto(kg, sd_vtx, &verts[2]); + object_normal_transform_auto(kg, sd_vtx, &normals[0]); + object_normal_transform_auto(kg, sd_vtx, &normals[1]); + object_normal_transform_auto(kg, sd_vtx, &normals[2]); + } + + /* Tangent space (position derivatives) wrt barycentric (u, v). */ + vtx->dp_du = verts[0] - verts[2]; + vtx->dp_dv = verts[1] - verts[2]; + + /* Geometric normal. */ + vtx->ng = normalize(cross(vtx->dp_du, vtx->dp_dv)); + if (sd_vtx->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) + vtx->ng = -vtx->ng; + + /* Shading normal. */ + if (!(sd_vtx->shader & SHADER_SMOOTH_NORMAL)) { + vtx->n = vtx->ng; + vtx->dn_du = vtx->dn_dv = zero_float3(); + } + else { + /* Interpolate normals between vertices. */ + float n_len; + vtx->n = normalize_len(normals[0] * sd_vtx->u + normals[1] * sd_vtx->v + + normals[2] * (1.0f - sd_vtx->u - sd_vtx->v), + &n_len); + + /* Shading normal derivatives wrt barycentric (u, v) + * we calculate the derivative of n = |u*n0 + v*n1 + (1-u-v)*n2| using: + * d/du [f(u)/|f(u)|] = [d/du f(u)]/|f(u)| - f(u)/|f(u)|^3 <f(u), d/du f(u)>. */ + const float inv_n_len = 1.f / n_len; + vtx->dn_du = inv_n_len * (normals[0] - normals[2]); + vtx->dn_dv = inv_n_len * (normals[1] - normals[2]); + vtx->dn_du -= vtx->n * dot(vtx->n, vtx->dn_du); + vtx->dn_dv -= vtx->n * dot(vtx->n, vtx->dn_dv); + } + + /* dp_du and dp_dv need to be continuous across triangles for the h normal + * offset to yield a consistent halfvector while walking on the manifold. + * It's usually best to rely on the mesh uv layout, which is assumed to be + * continuous across the mesh. */ + float2 duv0, duv1; + bool found_uv = false; + AttributeDescriptor uv_desc = find_attribute(kg, sd_vtx, ATTR_STD_GENERATED); + if (uv_desc.offset != ATTR_STD_NOT_FOUND) { + float3 uvs[3]; + uvs[0] = kernel_tex_fetch(__attributes_float3, uv_desc.offset + tri_vindex.x); + uvs[1] = kernel_tex_fetch(__attributes_float3, uv_desc.offset + tri_vindex.y); + uvs[2] = kernel_tex_fetch(__attributes_float3, uv_desc.offset + tri_vindex.z); + duv0 = make_float2(uvs[0].x - uvs[2].x, uvs[0].y - uvs[2].y); + duv1 = make_float2(uvs[1].x - uvs[2].x, uvs[1].y - uvs[2].y); + found_uv = true; + } + else { + uv_desc = find_attribute(kg, sd_vtx, ATTR_STD_UV); + if (uv_desc.offset != ATTR_STD_NOT_FOUND) { + float2 uvs[3]; + uvs[0] = kernel_tex_fetch(__attributes_float2, uv_desc.offset + tri_vindex.x); + uvs[1] = kernel_tex_fetch(__attributes_float2, uv_desc.offset + tri_vindex.y); + uvs[2] = kernel_tex_fetch(__attributes_float2, uv_desc.offset + tri_vindex.z); + duv0 = make_float2(uvs[0].x - uvs[2].x, uvs[0].y - uvs[2].y); + duv1 = make_float2(uvs[1].x - uvs[2].x, uvs[1].y - uvs[2].y); + found_uv = true; + } + } + if (found_uv) { + const float det = duv0.x * duv1.y - duv0.y * duv1.x; + if (det != 0.f) { + const float inv_det = 1.f / det; + + /* Tangent space (position derivatives) wrt texture (u, v). */ + const float3 dp_du = vtx->dp_du; + const float3 dp_dv = vtx->dp_dv; + vtx->dp_du = (duv1.y * dp_du - duv0.y * dp_dv) * inv_det; + vtx->dp_dv = (-duv1.x * dp_du + duv0.x * dp_dv) * inv_det; + + /* Shading normal derivatives wrt texture (u, v). */ + const float3 dn_du = vtx->dn_du; + const float3 dn_dv = vtx->dn_dv; + vtx->dn_du = (duv1.y * dn_du - duv0.y * dn_dv) * inv_det; + vtx->dn_dv = (-duv1.x * dn_du + duv0.x * dn_dv) * inv_det; + } + } + + /* Orthonormalize (dp_du,dp_dv) using a linear transformation, which + * we use on (dn_du,dn_dv) as well so the new (u,v) are consistent. */ + const float inv_len_dp_du = 1.f / len(vtx->dp_du); + vtx->dp_du *= inv_len_dp_du; + vtx->dn_du *= inv_len_dp_du; + const float dpdu_dot_dpdv = dot(vtx->dp_du, vtx->dp_dv); + const float3 dp_dv = vtx->dp_dv - dpdu_dot_dpdv * vtx->dp_du; + const float3 dn_dv = vtx->dn_dv - dpdu_dot_dpdv * vtx->dn_du; + const float inv_len_dp_dv = 1.f / len(dp_dv); + vtx->dp_dv = dp_dv * inv_len_dp_dv; + vtx->dn_dv = dn_dv * inv_len_dp_dv; + + /* Initialize constraint and its derivates. */ + vtx->a = vtx->c = zero_float4(); + vtx->b = make_float4(1.f, 0.f, 0.f, 1.f); + vtx->constraint = zero_float2(); + vtx->n_offset = n_offset; + + /* Closure information. */ + vtx->bsdf = bsdf; + vtx->eta = eta; + + /* Geometric information. */ + vtx->uv = make_float2(isect->u, isect->v); + vtx->object = sd_vtx->object; + vtx->prim = sd_vtx->prim; + vtx->shader = sd_vtx->shader; +} + +/* Compute constraint derivatives. */ +ccl_device_forceinline bool mnee_compute_constraint_derivatives( + int vertex_count, + ccl_private ManifoldVertex *vertices, + ccl_private const float3 &surface_sample_pos, + const bool light_fixed_direction, + const float3 light_sample) +{ + for (int vi = 0; vi < vertex_count; vi++) { + ccl_private ManifoldVertex &v = vertices[vi]; + + /* Direction toward surface sample. */ + float3 wi = (vi == 0) ? surface_sample_pos - v.p : vertices[vi - 1].p - v.p; + float ili = len(wi); + if (ili < MNEE_MIN_DISTANCE) + return false; + ili = 1.f / ili; + wi *= ili; + + /* Direction toward light sample. */ + float3 wo = (vi == vertex_count - 1) ? + (light_fixed_direction ? light_sample : light_sample - v.p) : + vertices[vi + 1].p - v.p; + float ilo = len(wo); + if (ilo < MNEE_MIN_DISTANCE) + return false; + ilo = 1.f / ilo; + wo *= ilo; + + /* Invert ior if coming from inside. */ + float eta = v.eta; + if (dot(wi, v.ng) < .0f) + eta = 1.f / eta; + + /* Half vector. */ + float3 H = -(wi + eta * wo); + float ilh = 1.f / len(H); + H *= ilh; + + ilo *= eta * ilh; + ili *= ilh; + + /* Local shading frame. */ + float dp_du_dot_n = dot(v.dp_du, v.n); + float3 s = v.dp_du - dp_du_dot_n * v.n; + float inv_len_s = 1.f / len(s); + s *= inv_len_s; + float3 t = cross(v.n, s); + + float3 dH_du, dH_dv; + + /* Constraint derivatives wrt previous vertex. */ + if (vi > 0) { + ccl_private ManifoldVertex &v_prev = vertices[vi - 1]; + dH_du = (v_prev.dp_du - wi * dot(wi, v_prev.dp_du)) * ili; + dH_dv = (v_prev.dp_dv - wi * dot(wi, v_prev.dp_dv)) * ili; + dH_du -= H * dot(dH_du, H); + dH_dv -= H * dot(dH_dv, H); + dH_du = -dH_du; + dH_dv = -dH_dv; + + v.a = make_float4(dot(dH_du, s), dot(dH_dv, s), dot(dH_du, t), dot(dH_dv, t)); + } + + /* Constraint derivatives wrt current vertex. */ + if (vi == vertex_count - 1 && light_fixed_direction) { + dH_du = ili * (-v.dp_du + wi * dot(wi, v.dp_du)); + dH_dv = ili * (-v.dp_dv + wi * dot(wi, v.dp_dv)); + } + else { + dH_du = -v.dp_du * (ili + ilo) + wi * (dot(wi, v.dp_du) * ili) + + wo * (dot(wo, v.dp_du) * ilo); + dH_dv = -v.dp_dv * (ili + ilo) + wi * (dot(wi, v.dp_dv) * ili) + + wo * (dot(wo, v.dp_dv) * ilo); + } + dH_du -= H * dot(dH_du, H); + dH_dv -= H * dot(dH_dv, H); + dH_du = -dH_du; + dH_dv = -dH_dv; + + float3 ds_du = -inv_len_s * (dot(v.dp_du, v.dn_du) * v.n + dp_du_dot_n * v.dn_du); + float3 ds_dv = -inv_len_s * (dot(v.dp_du, v.dn_dv) * v.n + dp_du_dot_n * v.dn_dv); + ds_du -= s * dot(s, ds_du); + ds_dv -= s * dot(s, ds_dv); + float3 dt_du = cross(v.dn_du, s) + cross(v.n, ds_du); + float3 dt_dv = cross(v.dn_dv, s) + cross(v.n, ds_dv); + + v.b = make_float4(dot(dH_du, s) + dot(H, ds_du), + dot(dH_dv, s) + dot(H, ds_dv), + dot(dH_du, t) + dot(H, dt_du), + dot(dH_dv, t) + dot(H, dt_dv)); + + /* Constraint derivatives wrt next vertex. */ + if (vi < vertex_count - 1) { + ccl_private ManifoldVertex &v_next = vertices[vi + 1]; + dH_du = (v_next.dp_du - wo * dot(wo, v_next.dp_du)) * ilo; + dH_dv = (v_next.dp_dv - wo * dot(wo, v_next.dp_dv)) * ilo; + dH_du -= H * dot(dH_du, H); + dH_dv -= H * dot(dH_dv, H); + dH_du = -dH_du; + dH_dv = -dH_dv; + + v.c = make_float4(dot(dH_du, s), dot(dH_dv, s), dot(dH_du, t), dot(dH_dv, t)); + } + + /* Constraint vector wrt. the local shading frame. */ + v.constraint = make_float2(dot(s, H), dot(t, H)) - v.n_offset; + } + return true; +} + +/* Invert (block) constraint derivative matrix and solve linear system so we can map dh back to dx: + * dh / dx = A + * dx = inverse(A) x dh + * to use for specular specular manifold walk + * (See for example http://faculty.washington.edu/finlayso/ebook/algebraic/advanced/LUtri.htm + * for block tridiagonal matrix based linear system solve) */ +ccl_device_forceinline bool mnee_solve_matrix_h_to_x(int vertex_count, + ccl_private ManifoldVertex *vertices, + ccl_private float2 *dx) +{ + float4 Li[MNEE_MAX_CAUSTIC_CASTERS]; + float2 C[MNEE_MAX_CAUSTIC_CASTERS]; + + /* Block tridiagonal LU factorization. */ + float4 Lk = vertices[0].b; + if (mat22_inverse(Lk, Li[0]) == 0.f) + return false; + + C[0] = vertices[0].constraint; + + for (int k = 1; k < vertex_count; k++) { + float4 A = mat22_mult(vertices[k].a, Li[k - 1]); + + Lk = vertices[k].b - mat22_mult(A, vertices[k - 1].c); + if (mat22_inverse(Lk, Li[k]) == 0.f) + return false; + + C[k] = vertices[k].constraint - mat22_mult(A, C[k - 1]); + } + + dx[vertex_count - 1] = mat22_mult(Li[vertex_count - 1], C[vertex_count - 1]); + for (int k = vertex_count - 2; k > -1; k--) + dx[k] = mat22_mult(Li[k], C[k] - mat22_mult(vertices[k].c, dx[k + 1])); + + return true; +} + +/* Newton solver to walk on specular manifold. */ +ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg, + ccl_private const ShaderData *sd, + ccl_private ShaderData *sd_vtx, + ccl_private const LightSample *ls, + int vertex_count, + ccl_private ManifoldVertex *vertices) +{ + float2 dx[MNEE_MAX_CAUSTIC_CASTERS]; + ManifoldVertex tentative[MNEE_MAX_CAUSTIC_CASTERS]; + + Ray projection_ray; + projection_ray.self.light_object = OBJECT_NONE; + projection_ray.self.light_prim = PRIM_NONE; + projection_ray.dP = differential_make_compact(sd->dP); + projection_ray.dD = differential_zero_compact(); + projection_ray.time = sd->time; + Intersection projection_isect; + + const bool light_fixed_direction = (ls->t == FLT_MAX); + const float3 light_sample = light_fixed_direction ? ls->D : ls->P; + + /* We start gently, potentially ramping up to beta = 1, since target configurations + * far from the seed path can send the proposed solution further than the linearized + * local differential geometric quantities are meant for (especially dn/du and dn/dv). */ + float beta = .1f; + bool reduce_stepsize = false; + bool resolve_constraint = true; + for (int iteration = 0; iteration < MNEE_MAX_ITERATIONS; iteration++) { + if (resolve_constraint) { + /* Calculate constraintand its derivatives for vertices. */ + if (!mnee_compute_constraint_derivatives( + vertex_count, vertices, sd->P, light_fixed_direction, light_sample)) + return false; + + /* Calculate constraint norm. */ + float constraint_norm = 0.f; + for (int vi = 0; vi < vertex_count; vi++) + constraint_norm = fmaxf(constraint_norm, len(vertices[vi].constraint)); + + /* Return if solve successful. */ + if (constraint_norm < MNEE_SOLVER_THRESHOLD) + return true; + + /* Invert derivative matrix. */ + if (!mnee_solve_matrix_h_to_x(vertex_count, vertices, dx)) + return false; + } + + /* Construct tentative new vertices and project back onto surface. */ + for (int vi = 0; vi < vertex_count; vi++) { + ccl_private ManifoldVertex &mv = vertices[vi]; + + /* Tentative new position on linearized manifold (tangent plane). */ + float3 tentative_p = mv.p - beta * (dx[vi].x * mv.dp_du + dx[vi].y * mv.dp_dv); + + /* For certain configs, the first solve ends up below the receiver. */ + if (vi == 0) { + const float3 wo = tentative_p - sd->P; + if (dot(sd->Ng, wo) <= 0.f) { + /* Change direction for the 1st interface. */ + tentative_p = mv.p + beta * (dx[vi].x * mv.dp_du + dx[vi].y * mv.dp_dv); + } + } + + /* Project tentative point from tangent plane back to surface + * we ignore all other intersections since this tentative path could lead + * valid to a valid path even if occluded. */ + if (vi == 0) { + projection_ray.self.object = sd->object; + projection_ray.self.prim = sd->prim; + projection_ray.P = sd->P; + } + else { + ccl_private const ManifoldVertex &pv = vertices[vi - 1]; + projection_ray.self.object = pv.object; + projection_ray.self.prim = pv.prim; + projection_ray.P = pv.p; + } + projection_ray.D = normalize_len(tentative_p - projection_ray.P, &projection_ray.t); + projection_ray.t *= MNEE_PROJECTION_DISTANCE_MULTIPLIER; + + bool projection_success = false; + for (int isect_count = 0; isect_count < MNEE_MAX_INTERSECTION_COUNT; isect_count++) { + bool hit = scene_intersect(kg, &projection_ray, PATH_RAY_TRANSMIT, &projection_isect); + if (!hit) + break; + + int hit_object = (projection_isect.object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, projection_isect.prim) : + projection_isect.object; + + if (hit_object == mv.object) { + projection_success = true; + break; + } + + projection_ray.self.object = projection_isect.object; + projection_ray.self.prim = projection_isect.prim; + projection_ray.P += projection_isect.t * projection_ray.D; + projection_ray.t -= projection_isect.t; + } + if (!projection_success) { + reduce_stepsize = true; + break; + } + + /* Setup corrected manifold vertex. */ + mnee_setup_manifold_vertex(kg, + &tentative[vi], + mv.bsdf, + mv.eta, + mv.n_offset, + &projection_ray, + &projection_isect, + sd_vtx); + } + + /* Check that tentative path is still transmissive. */ + if (!reduce_stepsize) { + for (int vi = 0; vi < vertex_count; vi++) { + ccl_private ManifoldVertex &tv = tentative[vi]; + + /* Direction toward surface sample. */ + const float3 wi = (vi == 0 ? sd->P : tentative[vi - 1].p) - tv.p; + /* Direction toward light sample. */ + const float3 wo = (vi == vertex_count - 1) ? light_fixed_direction ? ls->D : ls->P - tv.p : + tentative[vi + 1].p - tv.p; + + if (dot(tv.n, wi) * dot(tv.n, wo) >= 0.f) { + reduce_stepsize = true; + break; + } + } + } + + if (reduce_stepsize) { + /* Adjust step if can't land on right surface. */ + reduce_stepsize = false; + resolve_constraint = false; + beta *= .5f; + continue; + } + + /* Copy tentative vertices to main vertex list. */ + for (int vi = 0; vi < vertex_count; vi++) + vertices[vi] = tentative[vi]; + + /* Increase the step to get back to 1. */ + resolve_constraint = true; + beta = min(1.f, 2.f * beta); + } + + return false; +} + +/* Sample bsdf in half-vector measure. */ +ccl_device_forceinline float2 +mnee_sample_bsdf_dh(ClosureType type, float alpha_x, float alpha_y, float sample_u, float sample_v) +{ + float alpha2; + float cos_phi, sin_phi; + + if (alpha_x == alpha_y) { + float phi = sample_v * M_2PI_F; + fast_sincosf(phi, &sin_phi, &cos_phi); + alpha2 = alpha_x * alpha_x; + } + else { + float phi = atanf(alpha_y / alpha_x * tanf(M_2PI_F * sample_v + M_PI_2_F)); + if (sample_v > .5f) + phi += M_PI_F; + fast_sincosf(phi, &sin_phi, &cos_phi); + float alpha_x2 = alpha_x * alpha_x; + float alpha_y2 = alpha_y * alpha_y; + alpha2 = 1.f / (cos_phi * cos_phi / alpha_x2 + sin_phi * sin_phi / alpha_y2); + } + + /* Map sampled angles to micro-normal direction h. */ + float tan2_theta = alpha2; + if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID) { + tan2_theta *= -logf(1.0f - sample_u); + } + else { /* if (type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID || type == + CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_FRESNEL_ID) */ + tan2_theta *= sample_u / (1.0f - sample_u); + } + float cos2_theta = 1.0f / (1.0f + tan2_theta); + float sin_theta = safe_sqrtf(1.0f - cos2_theta); + return make_float2(cos_phi * sin_theta, sin_phi * sin_theta); +} + +/* Evaluate product term inside eq.6 at solution interface vi + * divided by corresponding sampled pdf: + * fr(vi)_do / pdf_dh(vi) x |do/dh| x |n.wo / n.h| + * We assume here that the pdf (in half-vector measure) is the same as + * the one calculation when sampling the microfacet normals from the + * specular chain above: this allows us to simplify the bsdf weight */ +ccl_device_forceinline float3 mnee_eval_bsdf_contribution(ccl_private ShaderClosure *closure, + float3 wi, + float3 wo) +{ + ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)closure; + + float cosNO = dot(bsdf->N, wi); + float cosNI = dot(bsdf->N, wo); + + float3 Ht = normalize(-(bsdf->ior * wo + wi)); + float cosHO = dot(Ht, wi); + + float alpha2 = bsdf->alpha_x * bsdf->alpha_y; + float cosThetaM = dot(bsdf->N, Ht); + + float G; + if (bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID) { + /* Eq. 26, 27: now calculate G1(i,m) and G1(o,m). */ + G = bsdf_beckmann_G1(bsdf->alpha_x, cosNO) * bsdf_beckmann_G1(bsdf->alpha_x, cosNI); + } + else { /* if (type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID || type == + CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_FRESNEL_ID) */ + /* Eq. 34: now calculate G1(i,m) and G1(o,m). */ + G = (2.f / (1.f + safe_sqrtf(1.f + alpha2 * (1.f - cosNO * cosNO) / (cosNO * cosNO)))) * + (2.f / (1.f + safe_sqrtf(1.f + alpha2 * (1.f - cosNI * cosNI) / (cosNI * cosNI)))); + } + + /* + * bsdf_do = (1 - F) * D_do * G * |h.wi| / (n.wi * n.wo) + * pdf_dh = D_dh * cosThetaM + * D_do = D_dh * |dh/do| + * + * contribution = bsdf_do * |do/dh| * |n.wo / n.h| / pdf_dh + * = (1 - F) * G * |h.wi / (n.wi * n.h^2)| + */ + return bsdf->weight * G * fabsf(cosHO / (cosNO * sqr(cosThetaM))); +} + +/* Compute transfer matrix determinant |T1| = |dx1/dxn| (and |dh/dx| in the process) */ +ccl_device_forceinline bool mnee_compute_transfer_matrix(ccl_private const ShaderData *sd, + ccl_private const LightSample *ls, + int vertex_count, + ccl_private ManifoldVertex *vertices, + ccl_private float *dx1_dxlight, + ccl_private float *dh_dx) +{ + /* Simplified block tridiagonal LU factorization. */ + float4 Li; + float4 U[MNEE_MAX_CAUSTIC_CASTERS - 1]; + + float4 Lk = vertices[0].b; + float Lk_det = mat22_inverse(Lk, Li); + if (Lk_det == 0.f) + return false; + + float det_dh_dx = Lk_det; + + for (int k = 1; k < vertex_count; k++) { + U[k - 1] = mat22_mult(Li, vertices[k - 1].c); + + Lk = vertices[k].b - mat22_mult(vertices[k].a, U[k - 1]); + Lk_det = mat22_inverse(Lk, Li); + if (Lk_det == 0.f) + return false; + + det_dh_dx *= Lk_det; + } + + /* Fill out constraint derivatives wrt light vertex param. */ + + /* Local shading frame at last free vertex. */ + int mi = vertex_count - 1; + ccl_private const ManifoldVertex &m = vertices[mi]; + + float3 s = normalize(m.dp_du - dot(m.dp_du, m.n) * m.n); + float3 t = cross(m.n, s); + + /* Local differential geometry. */ + float3 dp_du, dp_dv; + mnee_make_orthonormals(ls->Ng, &dp_du, &dp_dv); + + /* Direction toward surface sample. */ + float3 wi = vertex_count == 1 ? sd->P - m.p : vertices[mi - 1].p - m.p; + float ili = 1.f / len(wi); + wi *= ili; + + /* Invert ior if coming from inside. */ + float eta = m.eta; + if (dot(wi, m.ng) < .0f) + eta = 1.f / eta; + + float dxn_dwn; + float4 dc_dlight; + + if (ls->t == FLT_MAX) { + /* Constant direction toward light sample. */ + float3 wo = ls->D; + + /* Half vector. */ + float3 H = -(wi + eta * wo); + float ilh = 1.f / len(H); + H *= ilh; + + float ilo = -eta * ilh; + + float cos_theta = dot(wo, m.n); + float sin_theta = safe_sqrtf(1.f - sqr(cos_theta)); + float cos_phi = dot(wo, s); + float sin_phi = safe_sqrtf(1.f - sqr(cos_phi)); + + /* Wo = (cos_phi * sin_theta) * s + (sin_phi * sin_theta) * t + cos_theta * n. */ + float3 dH_dtheta = ilo * (cos_theta * (cos_phi * s + sin_phi * t) - sin_theta * m.n); + float3 dH_dphi = ilo * sin_theta * (-sin_phi * s + cos_phi * t); + dH_dtheta -= H * dot(dH_dtheta, H); + dH_dphi -= H * dot(dH_dphi, H); + + /* constraint derivatives wrt light direction expressed + * in spherical coordinates (theta, phi). */ + dc_dlight = make_float4( + dot(dH_dtheta, s), dot(dH_dphi, s), dot(dH_dtheta, t), dot(dH_dphi, t)); + + /* Jacobian to convert dtheta x dphi to dw measure. */ + dxn_dwn = 1.f / fmaxf(MNEE_MIN_DISTANCE, fabsf(sin_theta)); + } + else { + /* Direction toward light sample. */ + float3 wo = ls->P - m.p; + float ilo = 1.f / len(wo); + wo *= ilo; + + /* Half vector. */ + float3 H = -(wi + eta * wo); + float ilh = 1.f / len(H); + H *= ilh; + + ilo *= eta * ilh; + + float3 dH_du = (dp_du - wo * dot(wo, dp_du)) * ilo; + float3 dH_dv = (dp_dv - wo * dot(wo, dp_dv)) * ilo; + dH_du -= H * dot(dH_du, H); + dH_dv -= H * dot(dH_dv, H); + dH_du = -dH_du; + dH_dv = -dH_dv; + + dc_dlight = make_float4(dot(dH_du, s), dot(dH_dv, s), dot(dH_du, t), dot(dH_dv, t)); + + /* Neutral value since dc_dlight is already in the desired vertex area measure. */ + dxn_dwn = 1.f; + } + + /* Compute transfer matrix. */ + float4 Tp = -mat22_mult(Li, dc_dlight); + for (int k = vertex_count - 2; k > -1; k--) + Tp = -mat22_mult(U[k], Tp); + + *dx1_dxlight = fabsf(mat22_determinant(Tp)) * dxn_dwn; + *dh_dx = fabsf(det_dh_dx); + return true; +} + +/* Calculate the path contribution. */ +ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg, + IntegratorState state, + ccl_private ShaderData *sd, + ccl_private ShaderData *sd_mnee, + ccl_private LightSample *ls, + int vertex_count, + ccl_private ManifoldVertex *vertices, + ccl_private BsdfEval *throughput) +{ + float wo_len; + float3 wo = normalize_len(vertices[0].p - sd->P, &wo_len); + + /* Initialize throughput and evaluate receiver bsdf * |n.wo|. */ + shader_bsdf_eval(kg, sd, wo, false, throughput, ls->shader); + + /* Update light sample with new position / direct.ion + * and keep pdf in vertex area measure */ + mnee_update_light_sample(kg, vertices[vertex_count - 1].p, ls); + + /* Evaluate light sam.ple + * in case the light has a node-based shader: + * 1. sd_mnee will be used to store light data, which is why we need to do + * this evaluation here. sd_mnee needs to contain the solution's last + * interface data at the end of the call for the shadow ray setup to work. + * 2. ls needs to contain the last interface data for the light shader to + * evaluate properly */ + float3 light_eval = light_sample_shader_eval(kg, state, sd_mnee, ls, sd->time); + bsdf_eval_mul3(throughput, light_eval / ls->pdf); + + /* Generalized geometry term. */ + + float dh_dx; + float dx1_dxlight; + if (!mnee_compute_transfer_matrix(sd, ls, vertex_count, vertices, &dx1_dxlight, &dh_dx)) + return false; + + /* Receiver bsdf eval above already contains |n.wo|. */ + const float dw0_dx1 = fabsf(dot(wo, vertices[0].n)) / sqr(wo_len); + + const float G = dw0_dx1 * dx1_dxlight; + bsdf_eval_mul(throughput, G); + + /* Specular reflectance. */ + + /* Probe ray / isect. */ + Ray probe_ray; + probe_ray.self.light_object = ls->object; + probe_ray.self.light_prim = ls->prim; + probe_ray.dP = differential_make_compact(sd->dP); + probe_ray.dD = differential_zero_compact(); + probe_ray.time = sd->time; + Intersection probe_isect; + + probe_ray.self.object = sd->object; + probe_ray.self.prim = sd->prim; + probe_ray.P = sd->P; + + float3 wi; + float wi_len; + for (int vi = 0; vi < vertex_count; vi++) { + ccl_private const ManifoldVertex &v = vertices[vi]; + + /* Check visiblity. */ + probe_ray.D = normalize_len(v.p - probe_ray.P, &probe_ray.t); + if (scene_intersect(kg, &probe_ray, PATH_RAY_TRANSMIT, &probe_isect)) { + int hit_object = (probe_isect.object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, probe_isect.prim) : + probe_isect.object; + /* Test whether the ray hit the appropriate object at its intended location. */ + if (hit_object != v.object || fabsf(probe_ray.t - probe_isect.t) > MNEE_MIN_DISTANCE) + return false; + } + probe_ray.self.object = v.object; + probe_ray.self.prim = v.prim; + probe_ray.P = v.p; + + /* Set view looking dir. */ + wi = -wo; + wi_len = wo_len; + + /* Setup shader data for vertex vi. */ + shader_setup_from_sample(kg, + sd_mnee, + v.p, + v.n, + wi, + v.shader, + v.object, + v.prim, + v.uv.x, + v.uv.y, + wi_len, + sd->time, + false, + LAMP_NONE); + + /* Evaluate shader nodes at solution vi. */ + shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( + kg, state, sd_mnee, NULL, PATH_RAY_DIFFUSE, true); + + /* Set light looking dir. */ + wo = (vi == vertex_count - 1) ? (ls->t == FLT_MAX ? ls->D : ls->P - v.p) : + vertices[vi + 1].p - v.p; + wo = normalize_len(wo, &wo_len); + + /* Evaluate product term inside eq.6 at solution interface. vi + * divided by corresponding sampled pdf: + * fr(vi)_do / pdf_dh(vi) x |do/dh| x |n.wo / n.h| */ + float3 bsdf_contribution = mnee_eval_bsdf_contribution(v.bsdf, wi, wo); + bsdf_eval_mul3(throughput, bsdf_contribution); + } + + return true; +} + +/* Manifold next event estimation path sampling. */ +ccl_device_forceinline bool kernel_path_mnee_sample(KernelGlobals kg, + IntegratorState state, + ccl_private ShaderData *sd, + ccl_private ShaderData *sd_mnee, + ccl_private const RNGState *rng_state, + ccl_private LightSample *ls, + ccl_private BsdfEval *throughput) +{ + /* + * 1. send seed ray from shading point to light sample position (or along sampled light + * direction), making sure it intersects a caustic caster at least once, ignoring all other + * intersections (the final path could be valid even though objects could occlude the light + * this seed point), building an array of manifold vertices. + */ + + /* Setup probe ray. */ + Ray probe_ray; + probe_ray.self.object = sd->object; + probe_ray.self.prim = sd->prim; + probe_ray.self.light_object = ls->object; + probe_ray.self.light_prim = ls->prim; + probe_ray.P = sd->P; + if (ls->t == FLT_MAX) { + /* Distant / env light. */ + probe_ray.D = ls->D; + probe_ray.t = ls->t; + } + else { + /* Other lights, avoid self-intersection. */ + probe_ray.D = ls->P - probe_ray.P; + probe_ray.D = normalize_len(probe_ray.D, &probe_ray.t); + } + probe_ray.dP = differential_make_compact(sd->dP); + probe_ray.dD = differential_zero_compact(); + probe_ray.time = sd->time; + Intersection probe_isect; + + ManifoldVertex vertices[MNEE_MAX_CAUSTIC_CASTERS]; + + int vertex_count = 0; + for (int isect_count = 0; isect_count < MNEE_MAX_INTERSECTION_COUNT; isect_count++) { + bool hit = scene_intersect(kg, &probe_ray, PATH_RAY_TRANSMIT, &probe_isect); + if (!hit) + break; + + const int object_flags = intersection_get_object_flags(kg, &probe_isect); + if (object_flags & SD_OBJECT_CAUSTICS_CASTER) { + + /* Do we have enough slots. */ + if (vertex_count >= MNEE_MAX_CAUSTIC_CASTERS) + return false; + + ccl_private ManifoldVertex &mv = vertices[vertex_count++]; + + /* Setup shader data on caustic caster and evaluate context. */ + shader_setup_from_ray(kg, sd_mnee, &probe_ray, &probe_isect); + + /* Last bool argument is the MNEE flag (for TINY_MAX_CLOSURE cap in kernel_shader.h). */ + shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>( + kg, state, sd_mnee, NULL, PATH_RAY_DIFFUSE, true); + + /* get and sample refraction bsdf. */ + bool found_transimissive_microfacet_bsdf = false; + for (int ci = 0; ci < sd_mnee->num_closure; ci++) { + ccl_private ShaderClosure *bsdf = &sd_mnee->closure[ci]; + if (bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID || + bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID) { + + found_transimissive_microfacet_bsdf = true; + ccl_private MicrofacetBsdf *microfacet_bsdf = (ccl_private MicrofacetBsdf *)bsdf; + + /* Figure out appropriate index of refraction ratio. */ + const float eta = (sd_mnee->flag & SD_BACKFACING) ? 1.0f / microfacet_bsdf->ior : + microfacet_bsdf->ior; + + /* Sample transmissive microfacet bsdf. */ + float bsdf_u, bsdf_v; + path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); + float2 h = mnee_sample_bsdf_dh( + bsdf->type, microfacet_bsdf->alpha_x, microfacet_bsdf->alpha_y, bsdf_u, bsdf_v); + + /* Setup differential geometry on vertex. */ + mnee_setup_manifold_vertex(kg, &mv, bsdf, eta, h, &probe_ray, &probe_isect, sd_mnee); + break; + } + } + if (!found_transimissive_microfacet_bsdf) + return false; + } + + probe_ray.self.object = probe_isect.object; + probe_ray.self.prim = probe_isect.prim; + probe_ray.P += probe_isect.t * probe_ray.D; + if (ls->t != FLT_MAX) + probe_ray.t -= probe_isect.t; + }; + + /* Mark the manifold walk invalid to keep mollification on by default. */ + INTEGRATOR_STATE_WRITE(state, path, mnee) &= ~PATH_MNEE_VALID; + + if (vertex_count == 0) + return false; + + /* Check whether the transmission depth limit is reached before continuing. */ + const int transmission_bounce = INTEGRATOR_STATE(state, path, transmission_bounce) + + vertex_count; + if (transmission_bounce >= kernel_data.integrator.max_transmission_bounce) + return false; + + /* Check whether the diffuse depth limit is reached before continuing. */ + const int diffuse_bounce = INTEGRATOR_STATE(state, path, diffuse_bounce) + 1; + if (diffuse_bounce >= kernel_data.integrator.max_diffuse_bounce) + return false; + + /* Check whether the overall depth limit is reached before continuing. */ + const int bounce = INTEGRATOR_STATE(state, path, bounce) + diffuse_bounce + transmission_bounce; + if (bounce >= kernel_data.integrator.max_bounce) + return false; + + /* Mark the manifold walk valid to turn off mollification regardless of how successful the walk + * is: this is noticeable when another mnee is performed deeper in the path, for an internally + * reflected ray for example. If mollification was active for the reflection, a clear + * discontinuity is visible between direct and indirect contributions */ + INTEGRATOR_STATE_WRITE(state, path, mnee) |= PATH_MNEE_VALID; + + /* 2. Walk on the specular manifold to find vertices on the + * casters that satisfy snell's law for each interface + */ + if (mnee_newton_solver(kg, sd, sd_mnee, ls, vertex_count, vertices)) { + /* 3. If a solution exists, calculate contribution of the corresponding path */ + if (!mnee_path_contribution(kg, state, sd, sd_mnee, ls, vertex_count, vertices, throughput)) + return false; + + return true; + } + + return false; +} + +CCL_NAMESPACE_END + +#endif /* __MNEE__ */ diff --git a/intern/cycles/kernel/integrator/path_state.h b/intern/cycles/kernel/integrator/path_state.h index e6bdf21499c..ec93ac6d46f 100644 --- a/intern/cycles/kernel/integrator/path_state.h +++ b/intern/cycles/kernel/integrator/path_state.h @@ -57,6 +57,10 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg, INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = 1.0f; INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f); +#ifdef __MNEE__ + INTEGRATOR_STATE_WRITE(state, path, mnee) = 0; +#endif + INTEGRATOR_STATE_WRITE(state, isect, object) = OBJECT_NONE; INTEGRATOR_STATE_WRITE(state, isect, prim) = PRIM_NONE; diff --git a/intern/cycles/kernel/integrator/shade_background.h b/intern/cycles/kernel/integrator/shade_background.h index 69dc3dc772d..4fd41121466 100644 --- a/intern/cycles/kernel/integrator/shade_background.h +++ b/intern/cycles/kernel/integrator/shade_background.h @@ -101,6 +101,22 @@ ccl_device_inline void integrate_background(KernelGlobals kg, #endif } +#ifdef __MNEE__ + if (INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_CULL_LIGHT_CONNECTION) { + if (kernel_data.background.use_mis) { + for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) { + /* This path should have been resolved with mnee, it will + * generate a firefly for small lights since it is improbable. */ + const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp); + if (klight->type == LIGHT_BACKGROUND && klight->use_caustics) { + eval_background = false; + break; + } + } + } + } +#endif /* __MNEE__ */ + /* Evaluate background shader. */ float3 L = (eval_background) ? integrator_eval_background_shader(kg, state, render_buffer) : zero_float3(); @@ -140,6 +156,16 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg, } #endif +#ifdef __MNEE__ + if (INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_CULL_LIGHT_CONNECTION) { + /* This path should have been resolved with mnee, it will + * generate a firefly for small lights since it is improbable. */ + const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp); + if (klight->use_caustics) + return; + } +#endif /* __MNEE__ */ + /* Evaluate light shader. */ /* TODO: does aliasing like this break automatic SoA in CUDA? */ ShaderDataTinyStorage emission_sd_storage; diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index df9af6ca107..c5dd9fe27f4 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -6,6 +6,8 @@ #include "kernel/film/accumulate.h" #include "kernel/film/passes.h" +#include "kernel/integrator/mnee.h" + #include "kernel/integrator/path_state.h" #include "kernel/integrator/shader_eval.h" #include "kernel/integrator/subsurface.h" @@ -92,6 +94,7 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg, #ifdef __EMISSION__ /* Path tracing: sample point on light and evaluate light shader, then * queue shadow ray to be traced. */ +template<uint node_feature_mask> ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, IntegratorState state, ccl_private ShaderData *sd, @@ -124,34 +127,65 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, * integrate_surface_bounce, evaluate the BSDF, and only then evaluate * the light shader. This could also move to its own kernel, for * non-constant light sources. */ - ShaderDataTinyStorage emission_sd_storage; + ShaderDataCausticsStorage emission_sd_storage; ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time); - if (is_zero(light_eval)) { - return; - } - - /* Evaluate BSDF. */ - const bool is_transmission = shader_bsdf_is_transmission(sd, ls.D); + Ray ray ccl_optional_struct_init; BsdfEval bsdf_eval ccl_optional_struct_init; - const float bsdf_pdf = shader_bsdf_eval(kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader); - bsdf_eval_mul3(&bsdf_eval, light_eval / ls.pdf); + const bool is_transmission = shader_bsdf_is_transmission(sd, ls.D); - if (ls.shader & SHADER_USE_MIS) { - const float mis_weight = light_sample_mis_weight_nee(kg, ls.pdf, bsdf_pdf); - bsdf_eval_mul(&bsdf_eval, mis_weight); +# ifdef __MNEE__ + bool skip_nee = false; + IF_KERNEL_NODES_FEATURE(RAYTRACE) + { + if (ls.lamp != LAMP_NONE) { + /* Is this a caustic light? */ + const bool use_caustics = kernel_tex_fetch(__lights, ls.lamp).use_caustics; + if (use_caustics) { + /* Are we on a caustic caster? */ + if (is_transmission && (sd->object_flag & SD_OBJECT_CAUSTICS_CASTER)) + return; + + /* Are we on a caustic receiver? */ + if (!is_transmission && (sd->object_flag & SD_OBJECT_CAUSTICS_RECEIVER)) + skip_nee = kernel_path_mnee_sample( + kg, state, sd, emission_sd, rng_state, &ls, &bsdf_eval); + } + } + } + if (skip_nee) { + /* Create shadow ray after successful manifold walk: + * emission_sd contains the last interface intersection and + * the light sample ls has been updated */ + light_sample_to_surface_shadow_ray(kg, emission_sd, &ls, &ray); } + else +# endif /* __MNEE__ */ + { + const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time); + if (is_zero(light_eval)) { + return; + } - /* Path termination. */ - const float terminate = path_state_rng_light_termination(kg, rng_state); - if (light_sample_terminate(kg, &ls, &bsdf_eval, terminate)) { - return; + /* Evaluate BSDF. */ + const float bsdf_pdf = shader_bsdf_eval(kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader); + bsdf_eval_mul3(&bsdf_eval, light_eval / ls.pdf); + + if (ls.shader & SHADER_USE_MIS) { + const float mis_weight = light_sample_mis_weight_nee(kg, ls.pdf, bsdf_pdf); + bsdf_eval_mul(&bsdf_eval, mis_weight); + } + + /* Path termination. */ + const float terminate = path_state_rng_light_termination(kg, rng_state); + if (light_sample_terminate(kg, &ls, &bsdf_eval, terminate)) { + return; + } + + /* Create shadow ray. */ + light_sample_to_surface_shadow_ray(kg, sd, &ls, &ray); } - /* Create shadow ray. */ - Ray ray ccl_optional_struct_init; - light_sample_to_surface_shadow_ray(kg, sd, &ls, &ray); const bool is_light = light_sample_is_light(&ls); /* Branch off shadow kernel. */ @@ -501,7 +535,7 @@ ccl_device bool integrate_surface(KernelGlobals kg, /* Direct light. */ PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT); - integrate_surface_direct_light(kg, state, &sd, &rng_state); + integrate_surface_direct_light<node_feature_mask>(kg, state, &sd, &rng_state); #if defined(__AO__) /* Ambient occlusion pass. */ diff --git a/intern/cycles/kernel/integrator/shader_eval.h b/intern/cycles/kernel/integrator/shader_eval.h index ce76f7a3a90..3066fb661a1 100644 --- a/intern/cycles/kernel/integrator/shader_eval.h +++ b/intern/cycles/kernel/integrator/shader_eval.h @@ -157,7 +157,11 @@ ccl_device_inline void shader_prepare_surface_closures(KernelGlobals kg, * * Blurring of bsdf after bounces, for rays that have a small likelihood * of following this particular path (diffuse, rough glossy) */ - if (kernel_data.integrator.filter_glossy != FLT_MAX) { + if (kernel_data.integrator.filter_glossy != FLT_MAX +#ifdef __MNEE__ + && !(INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_VALID) +#endif + ) { float blur_pdf = kernel_data.integrator.filter_glossy * INTEGRATOR_STATE(state, path, min_ray_pdf); @@ -605,7 +609,8 @@ ccl_device void shader_eval_surface(KernelGlobals kg, ConstIntegratorGenericState state, ccl_private ShaderData *ccl_restrict sd, ccl_global float *ccl_restrict buffer, - uint32_t path_flag) + uint32_t path_flag, + bool use_caustics_storage = false) { /* If path is being terminated, we are tracing a shadow ray or evaluating * emission, then we don't need to store closures. The emission and shadow @@ -615,7 +620,7 @@ ccl_device void shader_eval_surface(KernelGlobals kg, max_closures = 0; } else { - max_closures = kernel_data.max_closures; + max_closures = use_caustics_storage ? CAUSTICS_MAX_CLOSURE : kernel_data.max_closures; } sd->num_closure = 0; diff --git a/intern/cycles/kernel/integrator/state_template.h b/intern/cycles/kernel/integrator/state_template.h index 6eac38b1af9..e7e6db037b0 100644 --- a/intern/cycles/kernel/integrator/state_template.h +++ b/intern/cycles/kernel/integrator/state_template.h @@ -34,6 +34,8 @@ KERNEL_STRUCT_MEMBER(path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING) KERNEL_STRUCT_MEMBER(path, uint16_t, rng_offset, KERNEL_FEATURE_PATH_TRACING) /* enum PathRayFlag */ KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING) +/* enum PathRayMNEE */ +KERNEL_STRUCT_MEMBER(path, uint8_t, mnee, KERNEL_FEATURE_PATH_TRACING) /* Multiple importance sampling * The PDF of BSDF sampling at the last scatter point, and distance to the * last scatter point minus the last ray segment. This distance lets us diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h index fa468e337c7..2391cc2356d 100644 --- a/intern/cycles/kernel/integrator/subsurface.h +++ b/intern/cycles/kernel/integrator/subsurface.h @@ -171,7 +171,12 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat const int shader = intersection_get_shader(kg, &ss_isect.hits[0]); const int shader_flags = kernel_tex_fetch(__shaders, shader).flags; - if (shader_flags & SD_HAS_RAYTRACE) { + const int object_flags = intersection_get_object_flags(kg, &ss_isect.hits[0]); + const bool use_caustics = kernel_data.integrator.use_caustics && + (object_flags & SD_OBJECT_CAUSTICS); + const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE) || use_caustics; + + if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); diff --git a/intern/cycles/kernel/light/light.h b/intern/cycles/kernel/light/light.h index 359ffd1c607..fb637008ca4 100644 --- a/intern/cycles/kernel/light/light.h +++ b/intern/cycles/kernel/light/light.h @@ -246,6 +246,15 @@ ccl_device bool lights_intersect(KernelGlobals kg, if (!(klight->shader_id & SHADER_USE_MIS)) { continue; } + +#ifdef __MNEE__ + /* This path should have been resolved with mnee, it will + * generate a firefly for small lights since it is improbable. */ + if ((INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_CULL_LIGHT_CONNECTION) && + klight->use_caustics) { + continue; + } +#endif } if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) { diff --git a/intern/cycles/kernel/svm/closure.h b/intern/cycles/kernel/svm/closure.h index 68ddc2fa306..88b44cdbacf 100644 --- a/intern/cycles/kernel/svm/closure.h +++ b/intern/cycles/kernel/svm/closure.h @@ -395,8 +395,10 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) # endif { + /* This is to prevent mnee from receiving a null bsdf. */ + float refraction_fresnel = fmaxf(0.0001f, 1.0f - fresnel); ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( - sd, sizeof(MicrofacetBsdf), base_color * glass_weight * (1.0f - fresnel)); + sd, sizeof(MicrofacetBsdf), base_color * glass_weight * refraction_fresnel); if (bsdf) { bsdf->N = N; bsdf->T = make_float3(0.0f, 0.0f, 0.0f); @@ -674,8 +676,10 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg, if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) #endif { + /* This is to prevent mnee from receiving a null bsdf. */ + float refraction_fresnel = fmaxf(0.0001f, 1.0f - fresnel); ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc( - sd, sizeof(MicrofacetBsdf), weight * (1.0f - fresnel)); + sd, sizeof(MicrofacetBsdf), weight * refraction_fresnel); if (bsdf) { bsdf->N = N; diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 07d4a95780b..00b5b007e11 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -102,6 +102,12 @@ CCL_NAMESPACE_BEGIN # undef __BAKING__ #endif /* __KERNEL_GPU_RAYTRACING__ */ +/* MNEE currently causes "Compute function exceeds available temporary registers" + * on Metal, disabled for now. */ +#ifndef __KERNEL_METAL__ +# define __MNEE__ +#endif + /* Scene-based selective features compilation. */ #ifdef __KERNEL_FEATURES__ # if !(__KERNEL_FEATURES & KERNEL_FEATURE_CAMERA_MOTION) @@ -293,6 +299,13 @@ enum PathRayFlag : uint32_t { PATH_RAY_SHADOW_CATCHER_BACKGROUND = (1U << 31U), }; +// 8bit enum, just in case we need to move more variables in it +enum PathRayMNEE { + PATH_MNEE_VALID = (1U << 0U), + PATH_MNEE_RECEIVER_ANCESTOR = (1U << 1U), + PATH_MNEE_CULL_LIGHT_CONNECTION = (1U << 2U), +}; + /* Configure ray visibility bits for rays and objects respectively, * to make shadow catchers work. * @@ -649,6 +662,17 @@ typedef struct AttributeDescriptor { # define MAX_CLOSURE __MAX_CLOSURE__ #endif +/* For manifold next event estimation, we need space to store and evaluate + * 2 closures (with extra data) on the refractive interfaces, in addition + * to keeping the full sd at the current shading point. We need 4 because a + * refractive bsdf is instanced with a companion reflection bsdf, even though + * we only need the refractive one, and each of them requires 2 slots. */ +#ifndef __CAUSTICS_MAX_CLOSURE__ +# define CAUSTICS_MAX_CLOSURE 4 +#else +# define CAUSTICS_MAX_CLOSURE __CAUSTICS_MAX_CLOSURE__ +#endif + #ifndef __MAX_VOLUME_STACK_SIZE__ # define MAX_VOLUME_STACK_SIZE 32 #else @@ -779,11 +803,18 @@ enum ShaderDataObjectFlag { SD_OBJECT_SHADOW_CATCHER = (1 << 7), /* object has volume attributes */ SD_OBJECT_HAS_VOLUME_ATTRIBUTES = (1 << 8), + /* object is caustics caster */ + SD_OBJECT_CAUSTICS_CASTER = (1 << 9), + /* object is caustics receiver */ + SD_OBJECT_CAUSTICS_RECEIVER = (1 << 10), + + /* object is using caustics */ + SD_OBJECT_CAUSTICS = (SD_OBJECT_CAUSTICS_CASTER | SD_OBJECT_CAUSTICS_RECEIVER), SD_OBJECT_FLAGS = (SD_OBJECT_HOLDOUT_MASK | SD_OBJECT_MOTION | SD_OBJECT_TRANSFORM_APPLIED | SD_OBJECT_NEGATIVE_SCALE_APPLIED | SD_OBJECT_HAS_VOLUME | SD_OBJECT_INTERSECTS_VOLUME | SD_OBJECT_SHADOW_CATCHER | - SD_OBJECT_HAS_VOLUME_ATTRIBUTES) + SD_OBJECT_HAS_VOLUME_ATTRIBUTES | SD_OBJECT_CAUSTICS) }; typedef struct ccl_align(16) ShaderData @@ -882,6 +913,15 @@ typedef struct ccl_align(16) ShaderDataTinyStorage char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE]; } ShaderDataTinyStorage; + +/* ShaderDataCausticsStorage needs the same alignment as ShaderData, or else + * the pointer cast in AS_SHADER_DATA invokes undefined behavior. */ +typedef struct ccl_align(16) ShaderDataCausticsStorage +{ + char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * (MAX_CLOSURE - CAUSTICS_MAX_CLOSURE)]; +} +ShaderDataCausticsStorage; + #define AS_SHADER_DATA(shader_data_tiny_storage) \ ((ccl_private ShaderData *)shader_data_tiny_storage) @@ -1201,6 +1241,9 @@ typedef struct KernelIntegrator { /* mis */ int use_lamp_mis; + /* caustics */ + int use_caustics; + /* sampler */ int sampling_pattern; @@ -1219,7 +1262,7 @@ typedef struct KernelIntegrator { int direct_light_sampling_type; /* padding */ - int pad1, pad2; + int pad1; } KernelIntegrator; static_assert_align(KernelIntegrator, 16); @@ -1383,7 +1426,8 @@ typedef struct KernelLight { float max_bounces; float random; float strength[3]; - float pad1, pad2; + int use_caustics; + float pad1; Transform tfm; Transform itfm; union { diff --git a/intern/cycles/scene/light.cpp b/intern/cycles/scene/light.cpp index 68779a3ce02..85a7f37994c 100644 --- a/intern/cycles/scene/light.cpp +++ b/intern/cycles/scene/light.cpp @@ -123,6 +123,7 @@ NODE_DEFINE(Light) SOCKET_BOOLEAN(use_glossy, "Use Glossy", true); SOCKET_BOOLEAN(use_transmission, "Use Transmission", true); SOCKET_BOOLEAN(use_scatter, "Use Scatter", true); + SOCKET_BOOLEAN(use_caustics, "Shadow Caustics", false); SOCKET_INT(max_bounces, "Max Bounces", 1024); SOCKET_UINT(random_id, "Random ID", 0); @@ -896,6 +897,7 @@ void LightManager::device_update_points(Device *, DeviceScene *dscene, Scene *sc klights[light_index].max_bounces = max_bounces; klights[light_index].random = random; + klights[light_index].use_caustics = light->use_caustics; klights[light_index].tfm = light->tfm; klights[light_index].itfm = transform_inverse(light->tfm); diff --git a/intern/cycles/scene/light.h b/intern/cycles/scene/light.h index 3519b76cf51..25190f6fb63 100644 --- a/intern/cycles/scene/light.h +++ b/intern/cycles/scene/light.h @@ -61,6 +61,7 @@ class Light : public Node { NODE_SOCKET_API(bool, use_glossy) NODE_SOCKET_API(bool, use_transmission) NODE_SOCKET_API(bool, use_scatter) + NODE_SOCKET_API(bool, use_caustics) NODE_SOCKET_API(bool, is_shadow_catcher) NODE_SOCKET_API(bool, is_portal) diff --git a/intern/cycles/scene/object.cpp b/intern/cycles/scene/object.cpp index fda9a211e60..9b19169880a 100644 --- a/intern/cycles/scene/object.cpp +++ b/intern/cycles/scene/object.cpp @@ -90,6 +90,9 @@ NODE_DEFINE(Object) SOCKET_BOOLEAN(is_shadow_catcher, "Shadow Catcher", false); + SOCKET_BOOLEAN(is_caustics_caster, "Cast Shadow Caustics", false); + SOCKET_BOOLEAN(is_caustics_receiver, "Receive Shadow Caustics", false); + SOCKET_NODE(particle_system, "Particle System", ParticleSystem::get_node_type()); SOCKET_INT(particle_index, "Particle Index", 0); @@ -510,6 +513,14 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s kobject.visibility = ob->visibility_for_tracing(); kobject.primitive_type = geom->primitive_type(); + /* Object shadow caustics flag */ + if (ob->is_caustics_caster) { + flag |= SD_OBJECT_CAUSTICS_CASTER; + } + if (ob->is_caustics_receiver) { + flag |= SD_OBJECT_CAUSTICS_RECEIVER; + } + /* Object flag. */ if (ob->use_holdout) { flag |= SD_OBJECT_HOLDOUT_MASK; diff --git a/intern/cycles/scene/object.h b/intern/cycles/scene/object.h index 55689ccfa58..561ecd4e7e9 100644 --- a/intern/cycles/scene/object.h +++ b/intern/cycles/scene/object.h @@ -55,6 +55,9 @@ class Object : public Node { NODE_SOCKET_API(float, shadow_terminator_shading_offset) NODE_SOCKET_API(float, shadow_terminator_geometry_offset) + NODE_SOCKET_API(bool, is_caustics_caster) + NODE_SOCKET_API(bool, is_caustics_receiver) + NODE_SOCKET_API(float3, dupli_generated) NODE_SOCKET_API(float2, dupli_uv) diff --git a/intern/cycles/scene/scene.cpp b/intern/cycles/scene/scene.cpp index 2199b2351d0..359e2d8e2c3 100644 --- a/intern/cycles/scene/scene.cpp +++ b/intern/cycles/scene/scene.cpp @@ -489,7 +489,21 @@ void Scene::update_kernel_features() if (use_motion && camera->use_motion()) { kernel_features |= KERNEL_FEATURE_CAMERA_MOTION; } + + /* Figure out whether the scene will use shader raytrace we need at least + * one caustic light, one caustic caster and one caustic receiver to use + * and enable the mnee code path. */ + bool has_caustics_receiver = false; + bool has_caustics_caster = false; + bool has_caustics_light = false; + foreach (Object *object, objects) { + if (object->get_is_caustics_caster()) { + has_caustics_caster = true; + } + else if (object->get_is_caustics_receiver()) { + has_caustics_receiver = true; + } Geometry *geom = object->get_geometry(); if (use_motion) { if (object->use_motion() || geom->get_use_motion_blur()) { @@ -518,6 +532,18 @@ void Scene::update_kernel_features() } } + foreach (Light *light, lights) { + if (light->get_use_caustics()) { + has_caustics_light = true; + } + } + + dscene.data.integrator.use_caustics = false; + if (has_caustics_caster && has_caustics_receiver && has_caustics_light) { + dscene.data.integrator.use_caustics = true; + kernel_features |= KERNEL_FEATURE_NODE_RAYTRACE; + } + if (bake_manager->get_baking()) { kernel_features |= KERNEL_FEATURE_BAKING; } diff --git a/intern/cycles/util/math_float2.h b/intern/cycles/util/math_float2.h index d6b47052720..542dad93467 100644 --- a/intern/cycles/util/math_float2.h +++ b/intern/cycles/util/math_float2.h @@ -40,7 +40,7 @@ ccl_device_inline float average(const float2 &a); ccl_device_inline float distance(const float2 &a, const float2 &b); ccl_device_inline float dot(const float2 &a, const float2 &b); ccl_device_inline float cross(const float2 &a, const float2 &b); -ccl_device_inline float len(const float2 &a); +ccl_device_inline float len(const float2 a); ccl_device_inline float2 normalize(const float2 &a); ccl_device_inline float2 normalize_len(const float2 &a, float *t); ccl_device_inline float2 safe_normalize(const float2 &a); @@ -187,11 +187,6 @@ ccl_device_inline float cross(const float2 &a, const float2 &b) return (a.x * b.y - a.y * b.x); } -ccl_device_inline float len(const float2 &a) -{ - return sqrtf(dot(a, a)); -} - ccl_device_inline float2 normalize(const float2 &a) { return a / len(a); @@ -251,6 +246,11 @@ ccl_device_inline float2 floor(const float2 &a) #endif /* !__KERNEL_METAL__ */ +ccl_device_inline float len(const float2 a) +{ + return sqrtf(dot(a, a)); +} + ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b) { return (b != 0.0f) ? a / b : zero_float2(); diff --git a/tests/python/cycles_render_tests.py b/tests/python/cycles_render_tests.py index 598e4b6828c..10ba2ce552a 100644 --- a/tests/python/cycles_render_tests.py +++ b/tests/python/cycles_render_tests.py @@ -32,6 +32,11 @@ BLACKLIST_OPTIX = [ 'T43865.blend', ] +BLACKLIST_METAL = [ + # No MNEE for Metal currently + "underwater_caustics.blend", +] + BLACKLIST_GPU = [ # Uninvestigated differences with GPU. 'image_log.blend', @@ -116,6 +121,8 @@ def main(): blacklist += BLACKLIST_OSL if device == 'OPTIX': blacklist += BLACKLIST_OPTIX + if device == 'METAL': + blacklist += BLACKLIST_METAL from modules import render_report report = render_report.Report('Cycles', output_dir, idiff, device, blacklist) |