Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/blender/addon/properties.py27
-rw-r--r--intern/cycles/blender/addon/ui.py29
-rw-r--r--intern/cycles/blender/light.cpp6
-rw-r--r--intern/cycles/blender/object.cpp6
-rw-r--r--intern/cycles/kernel/CMakeLists.txt1
-rw-r--r--intern/cycles/kernel/integrator/init_from_bake.h6
-rw-r--r--intern/cycles/kernel/integrator/intersect_closest.h46
-rw-r--r--intern/cycles/kernel/integrator/mnee.h1150
-rw-r--r--intern/cycles/kernel/integrator/path_state.h4
-rw-r--r--intern/cycles/kernel/integrator/shade_background.h26
-rw-r--r--intern/cycles/kernel/integrator/shade_surface.h76
-rw-r--r--intern/cycles/kernel/integrator/shader_eval.h11
-rw-r--r--intern/cycles/kernel/integrator/state_template.h2
-rw-r--r--intern/cycles/kernel/integrator/subsurface.h7
-rw-r--r--intern/cycles/kernel/light/light.h9
-rw-r--r--intern/cycles/kernel/svm/closure.h8
-rw-r--r--intern/cycles/kernel/types.h50
-rw-r--r--intern/cycles/scene/light.cpp2
-rw-r--r--intern/cycles/scene/light.h1
-rw-r--r--intern/cycles/scene/object.cpp11
-rw-r--r--intern/cycles/scene/object.h3
-rw-r--r--intern/cycles/scene/scene.cpp26
-rw-r--r--intern/cycles/util/math_float2.h12
-rw-r--r--tests/python/cycles_render_tests.py7
24 files changed, 1483 insertions, 43 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 24cc5735c96..51b3b3d2bcb 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -1012,6 +1012,12 @@ class CyclesLightSettings(bpy.types.PropertyGroup):
"note that this will make the light invisible",
default=False,
)
+ is_caustics_light: BoolProperty(
+ name="Shadow Caustics",
+ description="Generate approximate caustics in shadows of refractive surfaces. "
+ "Lights, caster and receiver objects must have shadow caustics options set to enable this",
+ default=False,
+ )
@classmethod
def register(cls):
@@ -1028,6 +1034,12 @@ class CyclesLightSettings(bpy.types.PropertyGroup):
class CyclesWorldSettings(bpy.types.PropertyGroup):
+ is_caustics_light: BoolProperty(
+ name="Shadow Caustics",
+ description="Generate approximate caustics in shadows of refractive surfaces. "
+ "Lights, caster and receiver objects must have shadow caustics options set to enable this",
+ default=False,
+ )
sampling_method: EnumProperty(
name="Sampling Method",
description="How to sample the background light",
@@ -1226,6 +1238,21 @@ class CyclesObjectSettings(bpy.types.PropertyGroup):
subtype='DISTANCE',
)
+ is_caustics_caster: BoolProperty(
+ name="Cast Shadow Caustics",
+ description="With refractive materials, generate approximate caustics in shadows of this object. "
+ "Up to 10 bounces inside this object are taken into account. Lights, caster and receiver objects "
+ "must have shadow caustics options set to enable this",
+ default=False,
+ )
+
+ is_caustics_receiver: BoolProperty(
+ name="Receive Shadow Caustics",
+ description="Receive approximate caustics from refractive materials in shadows on this object. "
+ "Lights, caster and receiver objects must have shadow caustics options set to enable this",
+ default=False,
+ )
+
@classmethod
def register(cls):
bpy.types.Object.cycles = PointerProperty(
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 1f50f3da7ae..c86452e0a18 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -1082,7 +1082,7 @@ class CYCLES_OBJECT_PT_shading(CyclesButtonsPanel, Panel):
return False
ob = context.object
- return ob and has_geometry_visibility(ob)
+ return ob and has_geometry_visibility(ob) and ob.type != 'LIGHT'
def draw(self, context):
pass
@@ -1125,6 +1125,28 @@ class CYCLES_OBJECT_PT_shading_gi_approximation(CyclesButtonsPanel, Panel):
col.prop(cob, "ao_distance")
+class CYCLES_OBJECT_PT_shading_caustics(CyclesButtonsPanel, Panel):
+ bl_label = "Caustics"
+ bl_parent_id = "CYCLES_OBJECT_PT_shading"
+ bl_context = "object"
+
+ @classmethod
+ def poll(cls, context):
+ return CyclesButtonsPanel.poll(context) and not use_metal(context)
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ col = layout.column()
+
+ ob = context.object
+ cob = ob.cycles
+ col.prop(cob, "is_caustics_caster")
+ col.prop(cob, "is_caustics_receiver")
+
+
class CYCLES_OBJECT_PT_visibility(CyclesButtonsPanel, Panel):
bl_label = "Visibility"
bl_context = "object"
@@ -1300,6 +1322,8 @@ class CYCLES_LIGHT_PT_light(CyclesButtonsPanel, Panel):
sub.active = not (light.type == 'AREA' and clamp.is_portal)
sub.prop(clamp, "cast_shadow")
sub.prop(clamp, "use_multiple_importance_sampling", text="Multiple Importance")
+ if not use_metal(context):
+ sub.prop(clamp, "is_caustics_light", text="Shadow Caustics")
if light.type == 'AREA':
col.prop(clamp, "is_portal", text="Portal")
@@ -1496,6 +1520,8 @@ class CYCLES_WORLD_PT_settings_surface(CyclesButtonsPanel, Panel):
subsub.active = cworld.sampling_method == 'MANUAL'
subsub.prop(cworld, "sample_map_resolution")
sub.prop(cworld, "max_bounces")
+ sub.prop(cworld, "is_caustics_light", text="Shadow Caustics")
+
class CYCLES_WORLD_PT_settings_volume(CyclesButtonsPanel, Panel):
@@ -2193,6 +2219,7 @@ classes = (
CYCLES_OBJECT_PT_shading,
CYCLES_OBJECT_PT_shading_shadow_terminator,
CYCLES_OBJECT_PT_shading_gi_approximation,
+ CYCLES_OBJECT_PT_shading_caustics,
CYCLES_OBJECT_PT_visibility,
CYCLES_OBJECT_PT_visibility_ray_visibility,
CYCLES_OBJECT_PT_visibility_culling,
diff --git a/intern/cycles/blender/light.cpp b/intern/cycles/blender/light.cpp
index bdfbdfd8618..04799443ceb 100644
--- a/intern/cycles/blender/light.cpp
+++ b/intern/cycles/blender/light.cpp
@@ -114,6 +114,9 @@ void BlenderSync::sync_light(BL::Object &b_parent,
light->set_cast_shadow(get_boolean(clight, "cast_shadow"));
light->set_use_mis(get_boolean(clight, "use_multiple_importance_sampling"));
+ /* caustics light */
+ light->set_use_caustics(get_boolean(clight, "is_caustics_light"));
+
light->set_max_bounces(get_int(clight, "max_bounces"));
if (b_ob_info.real_object != b_ob_info.iter_object) {
@@ -176,6 +179,9 @@ void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal)
/* force enable light again when world is resynced */
light->set_is_enabled(true);
+ /* caustic light */
+ light->set_use_caustics(get_boolean(cworld, "is_caustics_light"));
+
light->tag_update(scene);
light_map.set_recalc(b_world);
}
diff --git a/intern/cycles/blender/object.cpp b/intern/cycles/blender/object.cpp
index 58ea8961845..16a6b3454ed 100644
--- a/intern/cycles/blender/object.cpp
+++ b/intern/cycles/blender/object.cpp
@@ -298,6 +298,12 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
}
object->set_ao_distance(ao_distance);
+ bool is_caustics_caster = get_boolean(cobject, "is_caustics_caster");
+ object->set_is_caustics_caster(is_caustics_caster);
+
+ bool is_caustics_receiver = get_boolean(cobject, "is_caustics_receiver");
+ object->set_is_caustics_receiver(is_caustics_receiver);
+
/* sync the asset name for Cryptomatte */
BL::Object parent = b_ob.parent();
ustring parent_name;
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 6e3ac1bd32f..cfd503a621d 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -223,6 +223,7 @@ set(SRC_KERNEL_INTEGRATOR_HEADERS
integrator/intersect_subsurface.h
integrator/intersect_volume_stack.h
integrator/megakernel.h
+ integrator/mnee.h
integrator/path_state.h
integrator/shade_background.h
integrator/shade_light.h
diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h
index b84059d6676..d6047bd2288 100644
--- a/intern/cycles/kernel/integrator/init_from_bake.h
+++ b/intern/cycles/kernel/integrator/init_from_bake.h
@@ -230,7 +230,11 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
/* Setup next kernel to execute. */
const int shader_index = shader & SHADER_MASK;
const int shader_flags = kernel_tex_fetch(__shaders, shader_index).flags;
- if (shader_flags & SD_HAS_RAYTRACE) {
+ const bool use_caustics = kernel_data.integrator.use_caustics &&
+ (object_flag & SD_OBJECT_CAUSTICS);
+ const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE) || use_caustics;
+
+ if (use_raytrace_kernel) {
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
}
else {
diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h
index 9fcbf89c579..b8ce625c11b 100644
--- a/intern/cycles/kernel/integrator/intersect_closest.h
+++ b/intern/cycles/kernel/integrator/intersect_closest.h
@@ -123,7 +123,9 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
/* Continue with shading shadow catcher surface. */
const int shader = intersection_get_shader(kg, isect);
const int flags = kernel_tex_fetch(__shaders, shader).flags;
- const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
+ const bool use_caustics = kernel_data.integrator.use_caustics &&
+ (object_flags & SD_OBJECT_CAUSTICS);
+ const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics;
if (use_raytrace_kernel) {
INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
@@ -145,7 +147,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche
const int shader = intersection_get_shader(kg, &isect);
const int flags = kernel_tex_fetch(__shaders, shader).flags;
- const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
+ const int object_flags = intersection_get_object_flags(kg, &isect);
+ const bool use_caustics = kernel_data.integrator.use_caustics &&
+ (object_flags & SD_OBJECT_CAUSTICS);
+ const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics;
if (use_raytrace_kernel) {
INTEGRATOR_PATH_NEXT_SORTED(
@@ -214,7 +219,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
const int flags = kernel_tex_fetch(__shaders, shader).flags;
if (!integrator_intersect_terminate(kg, state, flags)) {
- const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
+ const int object_flags = intersection_get_object_flags(kg, isect);
+ const bool use_caustics = kernel_data.integrator.use_caustics &&
+ (object_flags & SD_OBJECT_CAUSTICS);
+ const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics;
if (use_raytrace_kernel) {
INTEGRATOR_PATH_NEXT_SORTED(
current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
@@ -261,7 +269,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
/* Hit a surface, continue with surface kernel unless terminated. */
const int shader = intersection_get_shader(kg, isect);
const int flags = kernel_tex_fetch(__shaders, shader).flags;
- const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE);
+ const int object_flags = intersection_get_object_flags(kg, isect);
+ const bool use_caustics = kernel_data.integrator.use_caustics &&
+ (object_flags & SD_OBJECT_CAUSTICS);
+ const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics;
if (use_raytrace_kernel) {
INTEGRATOR_PATH_NEXT_SORTED(
@@ -328,12 +339,37 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg,
isect.prim = PRIM_NONE;
}
+ /* Setup mnee flag to signal last intersection with a caster */
+ const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+
+#ifdef __MNEE__
+ /* Path culling logic for MNEE (removes fireflies at the cost of bias) */
+ if (kernel_data.integrator.use_caustics) {
+ /* The following firefly removal mechanism works by culling light connections when
+ * a ray comes from a caustic caster directly after bouncing off a different caustic
+ * receiver */
+ bool from_caustic_caster = false;
+ bool from_caustic_receiver = false;
+ if (!(path_flag & PATH_RAY_CAMERA) && last_isect_object != OBJECT_NONE) {
+ const int object_flags = kernel_tex_fetch(__object_flag, last_isect_object);
+ from_caustic_receiver = (object_flags & SD_OBJECT_CAUSTICS_RECEIVER);
+ from_caustic_caster = (object_flags & SD_OBJECT_CAUSTICS_CASTER);
+ }
+
+ bool has_receiver_ancestor = INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_RECEIVER_ANCESTOR;
+ INTEGRATOR_STATE_WRITE(state, path, mnee) &= ~PATH_MNEE_CULL_LIGHT_CONNECTION;
+ if (from_caustic_caster && has_receiver_ancestor)
+ INTEGRATOR_STATE_WRITE(state, path, mnee) |= PATH_MNEE_CULL_LIGHT_CONNECTION;
+ if (from_caustic_receiver)
+ INTEGRATOR_STATE_WRITE(state, path, mnee) |= PATH_MNEE_RECEIVER_ANCESTOR;
+ }
+#endif /* __MNEE__ */
+
/* Light intersection for MIS. */
if (kernel_data.integrator.use_lamp_mis) {
/* NOTE: if we make lights visible to camera rays, we'll need to initialize
* these in the path_state_init. */
const int last_type = INTEGRATOR_STATE(state, isect, type);
- const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
hit = lights_intersect(
kg, state, &ray, &isect, last_isect_prim, last_isect_object, last_type, path_flag) ||
hit;
diff --git a/intern/cycles/kernel/integrator/mnee.h b/intern/cycles/kernel/integrator/mnee.h
new file mode 100644
index 00000000000..af2032c9b99
--- /dev/null
+++ b/intern/cycles/kernel/integrator/mnee.h
@@ -0,0 +1,1150 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#ifdef __MNEE__
+
+# include "kernel/light/sample.h"
+
+/*
+ * Manifold Next Event Estimation
+ *
+ * This code adds manifold next event estimation through refractive surface(s) as a new sampling
+ * technique for direct lighting, i.e. finding the point on the refractive surface(s) along the
+ * path to a light sample, which satisfies fermat's principle for a given microfacet normal and
+ * the path's end points. This technique involves walking on the "specular manifold" using a pseudo
+ * newton solver. Such a manifold is defined by the specular constraint matrix from the manifold
+ * exploration framework [2]. For each refractive interface, this constraint is defined by
+ * enforcing that the generalized half-vector projection onto the interface local tangent plane is
+ * null. The newton solver guides the walk by linearizing the manifold locally before reprojecting
+ * the linear solution onto the refractive surface. See paper [1] for more details about
+ * the technique itself and [3] for the half-vector light transport formulation, from which it is
+ * derived.
+ *
+ * [1] Manifold Next Event Estimation
+ * Johannes Hanika, Marc Droske, and Luca Fascione. 2015.
+ * Comput. Graph. Forum 34, 4 (July 2015), 87–97.
+ * https://jo.dreggn.org/home/2015_mnee.pdf
+ *
+ * [2] Manifold exploration: a Markov Chain Monte Carlo technique for rendering scenes with
+ * difficult specular transport Wenzel Jakob and Steve Marschner. 2012. ACM Trans. Graph. 31, 4,
+ * Article 58 (July 2012), 13 pages.
+ * https://www.cs.cornell.edu/projects/manifolds-sg12/
+ *
+ * [3] The Natural-Constraint Representation of the Path Space for Efficient Light Transport
+ * Simulation Anton S. Kaplanyan, Johannes Hanika, and Carsten Dachsbacher. 2014. ACM Trans. Graph.
+ * 33, 4, Article 102 (July 2014), 13 pages.
+ * https://cg.ivd.kit.edu/english/HSLT.php
+ */
+
+# define MNEE_MAX_ITERATIONS 50
+# define MNEE_MAX_INTERSECTION_COUNT 10
+# define MNEE_SOLVER_THRESHOLD 0.001f
+# define MNEE_MAX_CAUSTIC_CASTERS 6
+# define MNEE_MIN_DISTANCE 0.001f
+# define MNEE_MIN_DETERMINANT 0.0001f
+# define MNEE_PROJECTION_DISTANCE_MULTIPLIER 2.f
+
+CCL_NAMESPACE_BEGIN
+
+/* Manifold struct containing the local differential geometry quantity */
+typedef ccl_private struct ManifoldVertex {
+ /* Position and partials */
+ float3 p;
+ float3 dp_du;
+ float3 dp_dv;
+
+ /* Normal and partials */
+ float3 n;
+ float3 ng;
+ float3 dn_du;
+ float3 dn_dv;
+
+ /* geometric info */
+ float2 uv;
+ int object;
+ int prim;
+ int shader;
+
+ /* closure info */
+ float eta;
+ ccl_private ShaderClosure *bsdf;
+ float2 n_offset;
+
+ /* constraint and its derivative matrices */
+ float2 constraint;
+ float4 a;
+ float4 b;
+ float4 c;
+} ManifoldVertex;
+
+/* Multiplication of a 2x2 matrix encoded in a row-major order float4 by a vector */
+ccl_device_inline float2 mat22_mult(const float4 a, const float2 b)
+{
+ return make_float2(a.x * b.x + a.y * b.y, a.z * b.x + a.w * b.y);
+}
+
+/* Multiplication of 2x2 matrices encoded in a row-major order float4 */
+ccl_device_inline float4 mat22_mult(const float4 a, const float4 b)
+{
+ return make_float4(
+ a.x * b.x + a.y * b.z, a.x * b.y + a.y * b.w, a.z * b.x + a.w * b.z, a.z * b.y + a.w * b.w);
+}
+
+/* Determinant of a 2x2 matrix encoded in a row-major order float4 */
+ccl_device_inline float mat22_determinant(const float4 m)
+{
+ return m.x * m.w - m.y * m.z;
+}
+
+/* Inverse of a 2x2 matrix encoded in a row-major order float4 */
+ccl_device_inline float mat22_inverse(const float4 m, ccl_private float4& m_inverse)
+{
+ float det = mat22_determinant(m);
+ if (fabsf(det) < MNEE_MIN_DETERMINANT)
+ return 0.f;
+ m_inverse = make_float4(m.w, -m.y, -m.z, m.x) / det;
+ return det;
+}
+
+/* Update light sample */
+ccl_device_forceinline void mnee_update_light_sample(KernelGlobals kg,
+ const float3 P,
+ ccl_private LightSample *ls)
+{
+ /* correct light sample position/direction and pdf
+ * NOTE: preserve pdf in area measure */
+ const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, ls->lamp);
+
+ if (ls->type == LIGHT_POINT || ls->type == LIGHT_SPOT) {
+ ls->D = normalize_len(ls->P - P, &ls->t);
+ ls->Ng = -ls->D;
+
+ float2 uv = map_to_sphere(ls->Ng);
+ ls->u = uv.x;
+ ls->v = uv.y;
+
+ float invarea = klight->spot.invarea;
+ ls->eval_fac = (0.25f * M_1_PI_F) * invarea;
+ ls->pdf = invarea;
+
+ if (ls->type == LIGHT_SPOT) {
+ /* spot light attenuation */
+ float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]);
+ ls->eval_fac *= spot_light_attenuation(
+ dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls->Ng);
+ }
+ }
+ else if (ls->type == LIGHT_AREA) {
+ ls->D = normalize_len(ls->P - P, &ls->t);
+ ls->pdf = fabsf(klight->area.invarea);
+ }
+}
+
+/* Compute orthonormal basis
+ * https://graphics.pixar.com/library/OrthonormalB/paper.pdf */
+ccl_device_forceinline void mnee_make_orthonormals(const float3 n,
+ ccl_private float3 *dp_du,
+ ccl_private float3 *dp_dv)
+{
+ if (n.z < 0.0f) {
+ const float a = 1.0f / (1.0f - n.z);
+ const float b = n.x * n.y * a;
+ *dp_du = make_float3(1.0f - n.x * n.x * a, -b, n.x);
+ *dp_dv = make_float3(b, n.y * n.y * a - 1.0f, -n.y);
+ }
+ else {
+ const float a = 1.0f / (1.0f + n.z);
+ const float b = -n.x * n.y * a;
+ *dp_du = make_float3(1.0f - n.x * n.x * a, b, -n.x);
+ *dp_dv = make_float3(b, 1.0f - n.y * n.y * a, -n.y);
+ }
+}
+
+/* Manifold vertex setup from ray and intersection data */
+ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg,
+ ccl_private ManifoldVertex *vtx,
+ ccl_private ShaderClosure *bsdf,
+ const float eta,
+ const float2 n_offset,
+ ccl_private const Ray *ray,
+ ccl_private const Intersection *isect,
+ ccl_private ShaderData *sd_vtx)
+{
+ sd_vtx->object = (isect->object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, isect->prim) :
+ isect->object;
+
+ sd_vtx->type = isect->type;
+ sd_vtx->flag = 0;
+ sd_vtx->object_flag = kernel_tex_fetch(__object_flag, sd_vtx->object);
+
+ /* matrices and time */
+ shader_setup_object_transforms(kg, sd_vtx, ray->time);
+ sd_vtx->time = ray->time;
+
+ sd_vtx->prim = isect->prim;
+ sd_vtx->ray_length = isect->t;
+
+ sd_vtx->u = isect->u;
+ sd_vtx->v = isect->v;
+
+ sd_vtx->shader = kernel_tex_fetch(__tri_shader, sd_vtx->prim);
+
+ float3 verts[3];
+ float3 normals[3];
+ uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd_vtx->prim);
+
+ if (sd_vtx->type & PRIMITIVE_TRIANGLE) {
+ /* Static triangle. */
+
+ /* Load triangle vertices. */
+ verts[0] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0);
+ verts[1] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1);
+ verts[2] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2);
+
+ /* Vectors. */
+ sd_vtx->P = triangle_point_from_uv(kg, sd_vtx, isect->object, isect->prim, isect->u, isect->v);
+
+ /* Smooth normal. */
+ if (sd_vtx->shader & SHADER_SMOOTH_NORMAL) {
+ /* Load triangle vertices. */
+ normals[0] = kernel_tex_fetch(__tri_vnormal, tri_vindex.x);
+ normals[1] = kernel_tex_fetch(__tri_vnormal, tri_vindex.y);
+ normals[2] = kernel_tex_fetch(__tri_vnormal, tri_vindex.z);
+ }
+ }
+ else { /* if (sd_vtx->type & PRIMITIVE_MOTION_TRIANGLE) */
+ /* Motion triangle. */
+
+ /* Get motion info. */
+ int numsteps, numverts;
+ object_motion_info(kg, sd_vtx->object, &numsteps, &numverts, NULL);
+
+ /* Figure out which steps we need to fetch and their interpolation factor. */
+ int maxstep = numsteps * 2;
+ int step = min((int)(sd_vtx->time * maxstep), maxstep - 1);
+ float t = sd_vtx->time * maxstep - step;
+
+ /* Find attribute. */
+ int offset = intersection_find_attribute(kg, sd_vtx->object, ATTR_STD_MOTION_VERTEX_POSITION);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+ /* Fetch vertex coordinates. */
+ float3 next_verts[3];
+ uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd_vtx->prim);
+ motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
+ motion_triangle_verts_for_step(
+ kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts);
+
+ /* Interpolate between steps. */
+ verts[0] = (1.0f - t) * verts[0] + t * next_verts[0];
+ verts[1] = (1.0f - t) * verts[1] + t * next_verts[1];
+ verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
+
+ /* Compute refined position. */
+ sd_vtx->P = motion_triangle_point_from_uv(
+ kg, sd_vtx, isect->object, isect->prim, isect->u, isect->v, verts);
+
+ /* Compute smooth normal. */
+ if (sd_vtx->shader & SHADER_SMOOTH_NORMAL) {
+ /* Find attribute. */
+ int offset = intersection_find_attribute(kg, sd_vtx->object, ATTR_STD_MOTION_VERTEX_NORMAL);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+ /* Fetch vertex coordinates. */
+ float3 next_normals[3];
+ motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
+ motion_triangle_normals_for_step(
+ kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals);
+
+ /* Interpolate between steps. */
+ normals[0] = (1.0f - t) * normals[0] + t * next_normals[0];
+ normals[1] = (1.0f - t) * normals[1] + t * next_normals[1];
+ normals[2] = (1.0f - t) * normals[2] + t * next_normals[2];
+ }
+ }
+
+ /* manifold vertex position */
+ vtx->p = sd_vtx->P;
+
+ if (!(sd_vtx->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ /* Instance transform. */
+ object_position_transform_auto(kg, sd_vtx, &verts[0]);
+ object_position_transform_auto(kg, sd_vtx, &verts[1]);
+ object_position_transform_auto(kg, sd_vtx, &verts[2]);
+ object_normal_transform_auto(kg, sd_vtx, &normals[0]);
+ object_normal_transform_auto(kg, sd_vtx, &normals[1]);
+ object_normal_transform_auto(kg, sd_vtx, &normals[2]);
+ }
+
+ /* Tangent space (position derivatives) wrt barycentric (u, v). */
+ vtx->dp_du = verts[0] - verts[2];
+ vtx->dp_dv = verts[1] - verts[2];
+
+ /* Geometric normal. */
+ vtx->ng = normalize(cross(vtx->dp_du, vtx->dp_dv));
+ if (sd_vtx->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED)
+ vtx->ng = -vtx->ng;
+
+ /* Shading normal. */
+ if (!(sd_vtx->shader & SHADER_SMOOTH_NORMAL)) {
+ vtx->n = vtx->ng;
+ vtx->dn_du = vtx->dn_dv = zero_float3();
+ }
+ else {
+ /* Interpolate normals between vertices. */
+ float n_len;
+ vtx->n = normalize_len(normals[0] * sd_vtx->u + normals[1] * sd_vtx->v +
+ normals[2] * (1.0f - sd_vtx->u - sd_vtx->v),
+ &n_len);
+
+ /* Shading normal derivatives wrt barycentric (u, v)
+ * we calculate the derivative of n = |u*n0 + v*n1 + (1-u-v)*n2| using:
+ * d/du [f(u)/|f(u)|] = [d/du f(u)]/|f(u)| - f(u)/|f(u)|^3 <f(u), d/du f(u)>. */
+ const float inv_n_len = 1.f / n_len;
+ vtx->dn_du = inv_n_len * (normals[0] - normals[2]);
+ vtx->dn_dv = inv_n_len * (normals[1] - normals[2]);
+ vtx->dn_du -= vtx->n * dot(vtx->n, vtx->dn_du);
+ vtx->dn_dv -= vtx->n * dot(vtx->n, vtx->dn_dv);
+ }
+
+ /* dp_du and dp_dv need to be continuous across triangles for the h normal
+ * offset to yield a consistent halfvector while walking on the manifold.
+ * It's usually best to rely on the mesh uv layout, which is assumed to be
+ * continuous across the mesh. */
+ float2 duv0, duv1;
+ bool found_uv = false;
+ AttributeDescriptor uv_desc = find_attribute(kg, sd_vtx, ATTR_STD_GENERATED);
+ if (uv_desc.offset != ATTR_STD_NOT_FOUND) {
+ float3 uvs[3];
+ uvs[0] = kernel_tex_fetch(__attributes_float3, uv_desc.offset + tri_vindex.x);
+ uvs[1] = kernel_tex_fetch(__attributes_float3, uv_desc.offset + tri_vindex.y);
+ uvs[2] = kernel_tex_fetch(__attributes_float3, uv_desc.offset + tri_vindex.z);
+ duv0 = make_float2(uvs[0].x - uvs[2].x, uvs[0].y - uvs[2].y);
+ duv1 = make_float2(uvs[1].x - uvs[2].x, uvs[1].y - uvs[2].y);
+ found_uv = true;
+ }
+ else {
+ uv_desc = find_attribute(kg, sd_vtx, ATTR_STD_UV);
+ if (uv_desc.offset != ATTR_STD_NOT_FOUND) {
+ float2 uvs[3];
+ uvs[0] = kernel_tex_fetch(__attributes_float2, uv_desc.offset + tri_vindex.x);
+ uvs[1] = kernel_tex_fetch(__attributes_float2, uv_desc.offset + tri_vindex.y);
+ uvs[2] = kernel_tex_fetch(__attributes_float2, uv_desc.offset + tri_vindex.z);
+ duv0 = make_float2(uvs[0].x - uvs[2].x, uvs[0].y - uvs[2].y);
+ duv1 = make_float2(uvs[1].x - uvs[2].x, uvs[1].y - uvs[2].y);
+ found_uv = true;
+ }
+ }
+ if (found_uv) {
+ const float det = duv0.x * duv1.y - duv0.y * duv1.x;
+ if (det != 0.f) {
+ const float inv_det = 1.f / det;
+
+ /* Tangent space (position derivatives) wrt texture (u, v). */
+ const float3 dp_du = vtx->dp_du;
+ const float3 dp_dv = vtx->dp_dv;
+ vtx->dp_du = (duv1.y * dp_du - duv0.y * dp_dv) * inv_det;
+ vtx->dp_dv = (-duv1.x * dp_du + duv0.x * dp_dv) * inv_det;
+
+ /* Shading normal derivatives wrt texture (u, v). */
+ const float3 dn_du = vtx->dn_du;
+ const float3 dn_dv = vtx->dn_dv;
+ vtx->dn_du = (duv1.y * dn_du - duv0.y * dn_dv) * inv_det;
+ vtx->dn_dv = (-duv1.x * dn_du + duv0.x * dn_dv) * inv_det;
+ }
+ }
+
+ /* Orthonormalize (dp_du,dp_dv) using a linear transformation, which
+ * we use on (dn_du,dn_dv) as well so the new (u,v) are consistent. */
+ const float inv_len_dp_du = 1.f / len(vtx->dp_du);
+ vtx->dp_du *= inv_len_dp_du;
+ vtx->dn_du *= inv_len_dp_du;
+ const float dpdu_dot_dpdv = dot(vtx->dp_du, vtx->dp_dv);
+ const float3 dp_dv = vtx->dp_dv - dpdu_dot_dpdv * vtx->dp_du;
+ const float3 dn_dv = vtx->dn_dv - dpdu_dot_dpdv * vtx->dn_du;
+ const float inv_len_dp_dv = 1.f / len(dp_dv);
+ vtx->dp_dv = dp_dv * inv_len_dp_dv;
+ vtx->dn_dv = dn_dv * inv_len_dp_dv;
+
+ /* Initialize constraint and its derivates. */
+ vtx->a = vtx->c = zero_float4();
+ vtx->b = make_float4(1.f, 0.f, 0.f, 1.f);
+ vtx->constraint = zero_float2();
+ vtx->n_offset = n_offset;
+
+ /* Closure information. */
+ vtx->bsdf = bsdf;
+ vtx->eta = eta;
+
+ /* Geometric information. */
+ vtx->uv = make_float2(isect->u, isect->v);
+ vtx->object = sd_vtx->object;
+ vtx->prim = sd_vtx->prim;
+ vtx->shader = sd_vtx->shader;
+}
+
+/* Compute constraint derivatives. */
+ccl_device_forceinline bool mnee_compute_constraint_derivatives(
+ int vertex_count,
+ ccl_private ManifoldVertex *vertices,
+ ccl_private const float3 &surface_sample_pos,
+ const bool light_fixed_direction,
+ const float3 light_sample)
+{
+ for (int vi = 0; vi < vertex_count; vi++) {
+ ccl_private ManifoldVertex &v = vertices[vi];
+
+ /* Direction toward surface sample. */
+ float3 wi = (vi == 0) ? surface_sample_pos - v.p : vertices[vi - 1].p - v.p;
+ float ili = len(wi);
+ if (ili < MNEE_MIN_DISTANCE)
+ return false;
+ ili = 1.f / ili;
+ wi *= ili;
+
+ /* Direction toward light sample. */
+ float3 wo = (vi == vertex_count - 1) ?
+ (light_fixed_direction ? light_sample : light_sample - v.p) :
+ vertices[vi + 1].p - v.p;
+ float ilo = len(wo);
+ if (ilo < MNEE_MIN_DISTANCE)
+ return false;
+ ilo = 1.f / ilo;
+ wo *= ilo;
+
+ /* Invert ior if coming from inside. */
+ float eta = v.eta;
+ if (dot(wi, v.ng) < .0f)
+ eta = 1.f / eta;
+
+ /* Half vector. */
+ float3 H = -(wi + eta * wo);
+ float ilh = 1.f / len(H);
+ H *= ilh;
+
+ ilo *= eta * ilh;
+ ili *= ilh;
+
+ /* Local shading frame. */
+ float dp_du_dot_n = dot(v.dp_du, v.n);
+ float3 s = v.dp_du - dp_du_dot_n * v.n;
+ float inv_len_s = 1.f / len(s);
+ s *= inv_len_s;
+ float3 t = cross(v.n, s);
+
+ float3 dH_du, dH_dv;
+
+ /* Constraint derivatives wrt previous vertex. */
+ if (vi > 0) {
+ ccl_private ManifoldVertex &v_prev = vertices[vi - 1];
+ dH_du = (v_prev.dp_du - wi * dot(wi, v_prev.dp_du)) * ili;
+ dH_dv = (v_prev.dp_dv - wi * dot(wi, v_prev.dp_dv)) * ili;
+ dH_du -= H * dot(dH_du, H);
+ dH_dv -= H * dot(dH_dv, H);
+ dH_du = -dH_du;
+ dH_dv = -dH_dv;
+
+ v.a = make_float4(dot(dH_du, s), dot(dH_dv, s), dot(dH_du, t), dot(dH_dv, t));
+ }
+
+ /* Constraint derivatives wrt current vertex. */
+ if (vi == vertex_count - 1 && light_fixed_direction) {
+ dH_du = ili * (-v.dp_du + wi * dot(wi, v.dp_du));
+ dH_dv = ili * (-v.dp_dv + wi * dot(wi, v.dp_dv));
+ }
+ else {
+ dH_du = -v.dp_du * (ili + ilo) + wi * (dot(wi, v.dp_du) * ili) +
+ wo * (dot(wo, v.dp_du) * ilo);
+ dH_dv = -v.dp_dv * (ili + ilo) + wi * (dot(wi, v.dp_dv) * ili) +
+ wo * (dot(wo, v.dp_dv) * ilo);
+ }
+ dH_du -= H * dot(dH_du, H);
+ dH_dv -= H * dot(dH_dv, H);
+ dH_du = -dH_du;
+ dH_dv = -dH_dv;
+
+ float3 ds_du = -inv_len_s * (dot(v.dp_du, v.dn_du) * v.n + dp_du_dot_n * v.dn_du);
+ float3 ds_dv = -inv_len_s * (dot(v.dp_du, v.dn_dv) * v.n + dp_du_dot_n * v.dn_dv);
+ ds_du -= s * dot(s, ds_du);
+ ds_dv -= s * dot(s, ds_dv);
+ float3 dt_du = cross(v.dn_du, s) + cross(v.n, ds_du);
+ float3 dt_dv = cross(v.dn_dv, s) + cross(v.n, ds_dv);
+
+ v.b = make_float4(dot(dH_du, s) + dot(H, ds_du),
+ dot(dH_dv, s) + dot(H, ds_dv),
+ dot(dH_du, t) + dot(H, dt_du),
+ dot(dH_dv, t) + dot(H, dt_dv));
+
+ /* Constraint derivatives wrt next vertex. */
+ if (vi < vertex_count - 1) {
+ ccl_private ManifoldVertex &v_next = vertices[vi + 1];
+ dH_du = (v_next.dp_du - wo * dot(wo, v_next.dp_du)) * ilo;
+ dH_dv = (v_next.dp_dv - wo * dot(wo, v_next.dp_dv)) * ilo;
+ dH_du -= H * dot(dH_du, H);
+ dH_dv -= H * dot(dH_dv, H);
+ dH_du = -dH_du;
+ dH_dv = -dH_dv;
+
+ v.c = make_float4(dot(dH_du, s), dot(dH_dv, s), dot(dH_du, t), dot(dH_dv, t));
+ }
+
+ /* Constraint vector wrt. the local shading frame. */
+ v.constraint = make_float2(dot(s, H), dot(t, H)) - v.n_offset;
+ }
+ return true;
+}
+
+/* Invert (block) constraint derivative matrix and solve linear system so we can map dh back to dx:
+ * dh / dx = A
+ * dx = inverse(A) x dh
+ * to use for specular specular manifold walk
+ * (See for example http://faculty.washington.edu/finlayso/ebook/algebraic/advanced/LUtri.htm
+ * for block tridiagonal matrix based linear system solve) */
+ccl_device_forceinline bool mnee_solve_matrix_h_to_x(int vertex_count,
+ ccl_private ManifoldVertex *vertices,
+ ccl_private float2 *dx)
+{
+ float4 Li[MNEE_MAX_CAUSTIC_CASTERS];
+ float2 C[MNEE_MAX_CAUSTIC_CASTERS];
+
+ /* Block tridiagonal LU factorization. */
+ float4 Lk = vertices[0].b;
+ if (mat22_inverse(Lk, Li[0]) == 0.f)
+ return false;
+
+ C[0] = vertices[0].constraint;
+
+ for (int k = 1; k < vertex_count; k++) {
+ float4 A = mat22_mult(vertices[k].a, Li[k - 1]);
+
+ Lk = vertices[k].b - mat22_mult(A, vertices[k - 1].c);
+ if (mat22_inverse(Lk, Li[k]) == 0.f)
+ return false;
+
+ C[k] = vertices[k].constraint - mat22_mult(A, C[k - 1]);
+ }
+
+ dx[vertex_count - 1] = mat22_mult(Li[vertex_count - 1], C[vertex_count - 1]);
+ for (int k = vertex_count - 2; k > -1; k--)
+ dx[k] = mat22_mult(Li[k], C[k] - mat22_mult(vertices[k].c, dx[k + 1]));
+
+ return true;
+}
+
+/* Newton solver to walk on specular manifold. */
+ccl_device_forceinline bool mnee_newton_solver(KernelGlobals kg,
+ ccl_private const ShaderData *sd,
+ ccl_private ShaderData *sd_vtx,
+ ccl_private const LightSample *ls,
+ int vertex_count,
+ ccl_private ManifoldVertex *vertices)
+{
+ float2 dx[MNEE_MAX_CAUSTIC_CASTERS];
+ ManifoldVertex tentative[MNEE_MAX_CAUSTIC_CASTERS];
+
+ Ray projection_ray;
+ projection_ray.self.light_object = OBJECT_NONE;
+ projection_ray.self.light_prim = PRIM_NONE;
+ projection_ray.dP = differential_make_compact(sd->dP);
+ projection_ray.dD = differential_zero_compact();
+ projection_ray.time = sd->time;
+ Intersection projection_isect;
+
+ const bool light_fixed_direction = (ls->t == FLT_MAX);
+ const float3 light_sample = light_fixed_direction ? ls->D : ls->P;
+
+ /* We start gently, potentially ramping up to beta = 1, since target configurations
+ * far from the seed path can send the proposed solution further than the linearized
+ * local differential geometric quantities are meant for (especially dn/du and dn/dv). */
+ float beta = .1f;
+ bool reduce_stepsize = false;
+ bool resolve_constraint = true;
+ for (int iteration = 0; iteration < MNEE_MAX_ITERATIONS; iteration++) {
+ if (resolve_constraint) {
+ /* Calculate constraintand its derivatives for vertices. */
+ if (!mnee_compute_constraint_derivatives(
+ vertex_count, vertices, sd->P, light_fixed_direction, light_sample))
+ return false;
+
+ /* Calculate constraint norm. */
+ float constraint_norm = 0.f;
+ for (int vi = 0; vi < vertex_count; vi++)
+ constraint_norm = fmaxf(constraint_norm, len(vertices[vi].constraint));
+
+ /* Return if solve successful. */
+ if (constraint_norm < MNEE_SOLVER_THRESHOLD)
+ return true;
+
+ /* Invert derivative matrix. */
+ if (!mnee_solve_matrix_h_to_x(vertex_count, vertices, dx))
+ return false;
+ }
+
+ /* Construct tentative new vertices and project back onto surface. */
+ for (int vi = 0; vi < vertex_count; vi++) {
+ ccl_private ManifoldVertex &mv = vertices[vi];
+
+ /* Tentative new position on linearized manifold (tangent plane). */
+ float3 tentative_p = mv.p - beta * (dx[vi].x * mv.dp_du + dx[vi].y * mv.dp_dv);
+
+ /* For certain configs, the first solve ends up below the receiver. */
+ if (vi == 0) {
+ const float3 wo = tentative_p - sd->P;
+ if (dot(sd->Ng, wo) <= 0.f) {
+ /* Change direction for the 1st interface. */
+ tentative_p = mv.p + beta * (dx[vi].x * mv.dp_du + dx[vi].y * mv.dp_dv);
+ }
+ }
+
+ /* Project tentative point from tangent plane back to surface
+ * we ignore all other intersections since this tentative path could lead
+ * valid to a valid path even if occluded. */
+ if (vi == 0) {
+ projection_ray.self.object = sd->object;
+ projection_ray.self.prim = sd->prim;
+ projection_ray.P = sd->P;
+ }
+ else {
+ ccl_private const ManifoldVertex &pv = vertices[vi - 1];
+ projection_ray.self.object = pv.object;
+ projection_ray.self.prim = pv.prim;
+ projection_ray.P = pv.p;
+ }
+ projection_ray.D = normalize_len(tentative_p - projection_ray.P, &projection_ray.t);
+ projection_ray.t *= MNEE_PROJECTION_DISTANCE_MULTIPLIER;
+
+ bool projection_success = false;
+ for (int isect_count = 0; isect_count < MNEE_MAX_INTERSECTION_COUNT; isect_count++) {
+ bool hit = scene_intersect(kg, &projection_ray, PATH_RAY_TRANSMIT, &projection_isect);
+ if (!hit)
+ break;
+
+ int hit_object = (projection_isect.object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, projection_isect.prim) :
+ projection_isect.object;
+
+ if (hit_object == mv.object) {
+ projection_success = true;
+ break;
+ }
+
+ projection_ray.self.object = projection_isect.object;
+ projection_ray.self.prim = projection_isect.prim;
+ projection_ray.P += projection_isect.t * projection_ray.D;
+ projection_ray.t -= projection_isect.t;
+ }
+ if (!projection_success) {
+ reduce_stepsize = true;
+ break;
+ }
+
+ /* Setup corrected manifold vertex. */
+ mnee_setup_manifold_vertex(kg,
+ &tentative[vi],
+ mv.bsdf,
+ mv.eta,
+ mv.n_offset,
+ &projection_ray,
+ &projection_isect,
+ sd_vtx);
+ }
+
+ /* Check that tentative path is still transmissive. */
+ if (!reduce_stepsize) {
+ for (int vi = 0; vi < vertex_count; vi++) {
+ ccl_private ManifoldVertex &tv = tentative[vi];
+
+ /* Direction toward surface sample. */
+ const float3 wi = (vi == 0 ? sd->P : tentative[vi - 1].p) - tv.p;
+ /* Direction toward light sample. */
+ const float3 wo = (vi == vertex_count - 1) ? light_fixed_direction ? ls->D : ls->P - tv.p :
+ tentative[vi + 1].p - tv.p;
+
+ if (dot(tv.n, wi) * dot(tv.n, wo) >= 0.f) {
+ reduce_stepsize = true;
+ break;
+ }
+ }
+ }
+
+ if (reduce_stepsize) {
+ /* Adjust step if can't land on right surface. */
+ reduce_stepsize = false;
+ resolve_constraint = false;
+ beta *= .5f;
+ continue;
+ }
+
+ /* Copy tentative vertices to main vertex list. */
+ for (int vi = 0; vi < vertex_count; vi++)
+ vertices[vi] = tentative[vi];
+
+ /* Increase the step to get back to 1. */
+ resolve_constraint = true;
+ beta = min(1.f, 2.f * beta);
+ }
+
+ return false;
+}
+
+/* Sample bsdf in half-vector measure. */
+ccl_device_forceinline float2
+mnee_sample_bsdf_dh(ClosureType type, float alpha_x, float alpha_y, float sample_u, float sample_v)
+{
+ float alpha2;
+ float cos_phi, sin_phi;
+
+ if (alpha_x == alpha_y) {
+ float phi = sample_v * M_2PI_F;
+ fast_sincosf(phi, &sin_phi, &cos_phi);
+ alpha2 = alpha_x * alpha_x;
+ }
+ else {
+ float phi = atanf(alpha_y / alpha_x * tanf(M_2PI_F * sample_v + M_PI_2_F));
+ if (sample_v > .5f)
+ phi += M_PI_F;
+ fast_sincosf(phi, &sin_phi, &cos_phi);
+ float alpha_x2 = alpha_x * alpha_x;
+ float alpha_y2 = alpha_y * alpha_y;
+ alpha2 = 1.f / (cos_phi * cos_phi / alpha_x2 + sin_phi * sin_phi / alpha_y2);
+ }
+
+ /* Map sampled angles to micro-normal direction h. */
+ float tan2_theta = alpha2;
+ if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID) {
+ tan2_theta *= -logf(1.0f - sample_u);
+ }
+ else { /* if (type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID || type ==
+ CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_FRESNEL_ID) */
+ tan2_theta *= sample_u / (1.0f - sample_u);
+ }
+ float cos2_theta = 1.0f / (1.0f + tan2_theta);
+ float sin_theta = safe_sqrtf(1.0f - cos2_theta);
+ return make_float2(cos_phi * sin_theta, sin_phi * sin_theta);
+}
+
+/* Evaluate product term inside eq.6 at solution interface vi
+ * divided by corresponding sampled pdf:
+ * fr(vi)_do / pdf_dh(vi) x |do/dh| x |n.wo / n.h|
+ * We assume here that the pdf (in half-vector measure) is the same as
+ * the one calculation when sampling the microfacet normals from the
+ * specular chain above: this allows us to simplify the bsdf weight */
+ccl_device_forceinline float3 mnee_eval_bsdf_contribution(ccl_private ShaderClosure *closure,
+ float3 wi,
+ float3 wo)
+{
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)closure;
+
+ float cosNO = dot(bsdf->N, wi);
+ float cosNI = dot(bsdf->N, wo);
+
+ float3 Ht = normalize(-(bsdf->ior * wo + wi));
+ float cosHO = dot(Ht, wi);
+
+ float alpha2 = bsdf->alpha_x * bsdf->alpha_y;
+ float cosThetaM = dot(bsdf->N, Ht);
+
+ float G;
+ if (bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID) {
+ /* Eq. 26, 27: now calculate G1(i,m) and G1(o,m). */
+ G = bsdf_beckmann_G1(bsdf->alpha_x, cosNO) * bsdf_beckmann_G1(bsdf->alpha_x, cosNI);
+ }
+ else { /* if (type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID || type ==
+ CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_FRESNEL_ID) */
+ /* Eq. 34: now calculate G1(i,m) and G1(o,m). */
+ G = (2.f / (1.f + safe_sqrtf(1.f + alpha2 * (1.f - cosNO * cosNO) / (cosNO * cosNO)))) *
+ (2.f / (1.f + safe_sqrtf(1.f + alpha2 * (1.f - cosNI * cosNI) / (cosNI * cosNI))));
+ }
+
+ /*
+ * bsdf_do = (1 - F) * D_do * G * |h.wi| / (n.wi * n.wo)
+ * pdf_dh = D_dh * cosThetaM
+ * D_do = D_dh * |dh/do|
+ *
+ * contribution = bsdf_do * |do/dh| * |n.wo / n.h| / pdf_dh
+ * = (1 - F) * G * |h.wi / (n.wi * n.h^2)|
+ */
+ return bsdf->weight * G * fabsf(cosHO / (cosNO * sqr(cosThetaM)));
+}
+
+/* Compute transfer matrix determinant |T1| = |dx1/dxn| (and |dh/dx| in the process) */
+ccl_device_forceinline bool mnee_compute_transfer_matrix(ccl_private const ShaderData *sd,
+ ccl_private const LightSample *ls,
+ int vertex_count,
+ ccl_private ManifoldVertex *vertices,
+ ccl_private float *dx1_dxlight,
+ ccl_private float *dh_dx)
+{
+ /* Simplified block tridiagonal LU factorization. */
+ float4 Li;
+ float4 U[MNEE_MAX_CAUSTIC_CASTERS - 1];
+
+ float4 Lk = vertices[0].b;
+ float Lk_det = mat22_inverse(Lk, Li);
+ if (Lk_det == 0.f)
+ return false;
+
+ float det_dh_dx = Lk_det;
+
+ for (int k = 1; k < vertex_count; k++) {
+ U[k - 1] = mat22_mult(Li, vertices[k - 1].c);
+
+ Lk = vertices[k].b - mat22_mult(vertices[k].a, U[k - 1]);
+ Lk_det = mat22_inverse(Lk, Li);
+ if (Lk_det == 0.f)
+ return false;
+
+ det_dh_dx *= Lk_det;
+ }
+
+ /* Fill out constraint derivatives wrt light vertex param. */
+
+ /* Local shading frame at last free vertex. */
+ int mi = vertex_count - 1;
+ ccl_private const ManifoldVertex &m = vertices[mi];
+
+ float3 s = normalize(m.dp_du - dot(m.dp_du, m.n) * m.n);
+ float3 t = cross(m.n, s);
+
+ /* Local differential geometry. */
+ float3 dp_du, dp_dv;
+ mnee_make_orthonormals(ls->Ng, &dp_du, &dp_dv);
+
+ /* Direction toward surface sample. */
+ float3 wi = vertex_count == 1 ? sd->P - m.p : vertices[mi - 1].p - m.p;
+ float ili = 1.f / len(wi);
+ wi *= ili;
+
+ /* Invert ior if coming from inside. */
+ float eta = m.eta;
+ if (dot(wi, m.ng) < .0f)
+ eta = 1.f / eta;
+
+ float dxn_dwn;
+ float4 dc_dlight;
+
+ if (ls->t == FLT_MAX) {
+ /* Constant direction toward light sample. */
+ float3 wo = ls->D;
+
+ /* Half vector. */
+ float3 H = -(wi + eta * wo);
+ float ilh = 1.f / len(H);
+ H *= ilh;
+
+ float ilo = -eta * ilh;
+
+ float cos_theta = dot(wo, m.n);
+ float sin_theta = safe_sqrtf(1.f - sqr(cos_theta));
+ float cos_phi = dot(wo, s);
+ float sin_phi = safe_sqrtf(1.f - sqr(cos_phi));
+
+ /* Wo = (cos_phi * sin_theta) * s + (sin_phi * sin_theta) * t + cos_theta * n. */
+ float3 dH_dtheta = ilo * (cos_theta * (cos_phi * s + sin_phi * t) - sin_theta * m.n);
+ float3 dH_dphi = ilo * sin_theta * (-sin_phi * s + cos_phi * t);
+ dH_dtheta -= H * dot(dH_dtheta, H);
+ dH_dphi -= H * dot(dH_dphi, H);
+
+ /* constraint derivatives wrt light direction expressed
+ * in spherical coordinates (theta, phi). */
+ dc_dlight = make_float4(
+ dot(dH_dtheta, s), dot(dH_dphi, s), dot(dH_dtheta, t), dot(dH_dphi, t));
+
+ /* Jacobian to convert dtheta x dphi to dw measure. */
+ dxn_dwn = 1.f / fmaxf(MNEE_MIN_DISTANCE, fabsf(sin_theta));
+ }
+ else {
+ /* Direction toward light sample. */
+ float3 wo = ls->P - m.p;
+ float ilo = 1.f / len(wo);
+ wo *= ilo;
+
+ /* Half vector. */
+ float3 H = -(wi + eta * wo);
+ float ilh = 1.f / len(H);
+ H *= ilh;
+
+ ilo *= eta * ilh;
+
+ float3 dH_du = (dp_du - wo * dot(wo, dp_du)) * ilo;
+ float3 dH_dv = (dp_dv - wo * dot(wo, dp_dv)) * ilo;
+ dH_du -= H * dot(dH_du, H);
+ dH_dv -= H * dot(dH_dv, H);
+ dH_du = -dH_du;
+ dH_dv = -dH_dv;
+
+ dc_dlight = make_float4(dot(dH_du, s), dot(dH_dv, s), dot(dH_du, t), dot(dH_dv, t));
+
+ /* Neutral value since dc_dlight is already in the desired vertex area measure. */
+ dxn_dwn = 1.f;
+ }
+
+ /* Compute transfer matrix. */
+ float4 Tp = -mat22_mult(Li, dc_dlight);
+ for (int k = vertex_count - 2; k > -1; k--)
+ Tp = -mat22_mult(U[k], Tp);
+
+ *dx1_dxlight = fabsf(mat22_determinant(Tp)) * dxn_dwn;
+ *dh_dx = fabsf(det_dh_dx);
+ return true;
+}
+
+/* Calculate the path contribution. */
+ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
+ IntegratorState state,
+ ccl_private ShaderData *sd,
+ ccl_private ShaderData *sd_mnee,
+ ccl_private LightSample *ls,
+ int vertex_count,
+ ccl_private ManifoldVertex *vertices,
+ ccl_private BsdfEval *throughput)
+{
+ float wo_len;
+ float3 wo = normalize_len(vertices[0].p - sd->P, &wo_len);
+
+ /* Initialize throughput and evaluate receiver bsdf * |n.wo|. */
+ shader_bsdf_eval(kg, sd, wo, false, throughput, ls->shader);
+
+ /* Update light sample with new position / direct.ion
+ * and keep pdf in vertex area measure */
+ mnee_update_light_sample(kg, vertices[vertex_count - 1].p, ls);
+
+ /* Evaluate light sam.ple
+ * in case the light has a node-based shader:
+ * 1. sd_mnee will be used to store light data, which is why we need to do
+ * this evaluation here. sd_mnee needs to contain the solution's last
+ * interface data at the end of the call for the shadow ray setup to work.
+ * 2. ls needs to contain the last interface data for the light shader to
+ * evaluate properly */
+ float3 light_eval = light_sample_shader_eval(kg, state, sd_mnee, ls, sd->time);
+ bsdf_eval_mul3(throughput, light_eval / ls->pdf);
+
+ /* Generalized geometry term. */
+
+ float dh_dx;
+ float dx1_dxlight;
+ if (!mnee_compute_transfer_matrix(sd, ls, vertex_count, vertices, &dx1_dxlight, &dh_dx))
+ return false;
+
+ /* Receiver bsdf eval above already contains |n.wo|. */
+ const float dw0_dx1 = fabsf(dot(wo, vertices[0].n)) / sqr(wo_len);
+
+ const float G = dw0_dx1 * dx1_dxlight;
+ bsdf_eval_mul(throughput, G);
+
+ /* Specular reflectance. */
+
+ /* Probe ray / isect. */
+ Ray probe_ray;
+ probe_ray.self.light_object = ls->object;
+ probe_ray.self.light_prim = ls->prim;
+ probe_ray.dP = differential_make_compact(sd->dP);
+ probe_ray.dD = differential_zero_compact();
+ probe_ray.time = sd->time;
+ Intersection probe_isect;
+
+ probe_ray.self.object = sd->object;
+ probe_ray.self.prim = sd->prim;
+ probe_ray.P = sd->P;
+
+ float3 wi;
+ float wi_len;
+ for (int vi = 0; vi < vertex_count; vi++) {
+ ccl_private const ManifoldVertex &v = vertices[vi];
+
+ /* Check visiblity. */
+ probe_ray.D = normalize_len(v.p - probe_ray.P, &probe_ray.t);
+ if (scene_intersect(kg, &probe_ray, PATH_RAY_TRANSMIT, &probe_isect)) {
+ int hit_object = (probe_isect.object == OBJECT_NONE) ?
+ kernel_tex_fetch(__prim_object, probe_isect.prim) :
+ probe_isect.object;
+ /* Test whether the ray hit the appropriate object at its intended location. */
+ if (hit_object != v.object || fabsf(probe_ray.t - probe_isect.t) > MNEE_MIN_DISTANCE)
+ return false;
+ }
+ probe_ray.self.object = v.object;
+ probe_ray.self.prim = v.prim;
+ probe_ray.P = v.p;
+
+ /* Set view looking dir. */
+ wi = -wo;
+ wi_len = wo_len;
+
+ /* Setup shader data for vertex vi. */
+ shader_setup_from_sample(kg,
+ sd_mnee,
+ v.p,
+ v.n,
+ wi,
+ v.shader,
+ v.object,
+ v.prim,
+ v.uv.x,
+ v.uv.y,
+ wi_len,
+ sd->time,
+ false,
+ LAMP_NONE);
+
+ /* Evaluate shader nodes at solution vi. */
+ shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
+ kg, state, sd_mnee, NULL, PATH_RAY_DIFFUSE, true);
+
+ /* Set light looking dir. */
+ wo = (vi == vertex_count - 1) ? (ls->t == FLT_MAX ? ls->D : ls->P - v.p) :
+ vertices[vi + 1].p - v.p;
+ wo = normalize_len(wo, &wo_len);
+
+ /* Evaluate product term inside eq.6 at solution interface. vi
+ * divided by corresponding sampled pdf:
+ * fr(vi)_do / pdf_dh(vi) x |do/dh| x |n.wo / n.h| */
+ float3 bsdf_contribution = mnee_eval_bsdf_contribution(v.bsdf, wi, wo);
+ bsdf_eval_mul3(throughput, bsdf_contribution);
+ }
+
+ return true;
+}
+
+/* Manifold next event estimation path sampling. */
+ccl_device_forceinline bool kernel_path_mnee_sample(KernelGlobals kg,
+ IntegratorState state,
+ ccl_private ShaderData *sd,
+ ccl_private ShaderData *sd_mnee,
+ ccl_private const RNGState *rng_state,
+ ccl_private LightSample *ls,
+ ccl_private BsdfEval *throughput)
+{
+ /*
+ * 1. send seed ray from shading point to light sample position (or along sampled light
+ * direction), making sure it intersects a caustic caster at least once, ignoring all other
+ * intersections (the final path could be valid even though objects could occlude the light
+ * this seed point), building an array of manifold vertices.
+ */
+
+ /* Setup probe ray. */
+ Ray probe_ray;
+ probe_ray.self.object = sd->object;
+ probe_ray.self.prim = sd->prim;
+ probe_ray.self.light_object = ls->object;
+ probe_ray.self.light_prim = ls->prim;
+ probe_ray.P = sd->P;
+ if (ls->t == FLT_MAX) {
+ /* Distant / env light. */
+ probe_ray.D = ls->D;
+ probe_ray.t = ls->t;
+ }
+ else {
+ /* Other lights, avoid self-intersection. */
+ probe_ray.D = ls->P - probe_ray.P;
+ probe_ray.D = normalize_len(probe_ray.D, &probe_ray.t);
+ }
+ probe_ray.dP = differential_make_compact(sd->dP);
+ probe_ray.dD = differential_zero_compact();
+ probe_ray.time = sd->time;
+ Intersection probe_isect;
+
+ ManifoldVertex vertices[MNEE_MAX_CAUSTIC_CASTERS];
+
+ int vertex_count = 0;
+ for (int isect_count = 0; isect_count < MNEE_MAX_INTERSECTION_COUNT; isect_count++) {
+ bool hit = scene_intersect(kg, &probe_ray, PATH_RAY_TRANSMIT, &probe_isect);
+ if (!hit)
+ break;
+
+ const int object_flags = intersection_get_object_flags(kg, &probe_isect);
+ if (object_flags & SD_OBJECT_CAUSTICS_CASTER) {
+
+ /* Do we have enough slots. */
+ if (vertex_count >= MNEE_MAX_CAUSTIC_CASTERS)
+ return false;
+
+ ccl_private ManifoldVertex &mv = vertices[vertex_count++];
+
+ /* Setup shader data on caustic caster and evaluate context. */
+ shader_setup_from_ray(kg, sd_mnee, &probe_ray, &probe_isect);
+
+ /* Last bool argument is the MNEE flag (for TINY_MAX_CLOSURE cap in kernel_shader.h). */
+ shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
+ kg, state, sd_mnee, NULL, PATH_RAY_DIFFUSE, true);
+
+ /* get and sample refraction bsdf. */
+ bool found_transimissive_microfacet_bsdf = false;
+ for (int ci = 0; ci < sd_mnee->num_closure; ci++) {
+ ccl_private ShaderClosure *bsdf = &sd_mnee->closure[ci];
+ if (bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID ||
+ bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID) {
+
+ found_transimissive_microfacet_bsdf = true;
+ ccl_private MicrofacetBsdf *microfacet_bsdf = (ccl_private MicrofacetBsdf *)bsdf;
+
+ /* Figure out appropriate index of refraction ratio. */
+ const float eta = (sd_mnee->flag & SD_BACKFACING) ? 1.0f / microfacet_bsdf->ior :
+ microfacet_bsdf->ior;
+
+ /* Sample transmissive microfacet bsdf. */
+ float bsdf_u, bsdf_v;
+ path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+ float2 h = mnee_sample_bsdf_dh(
+ bsdf->type, microfacet_bsdf->alpha_x, microfacet_bsdf->alpha_y, bsdf_u, bsdf_v);
+
+ /* Setup differential geometry on vertex. */
+ mnee_setup_manifold_vertex(kg, &mv, bsdf, eta, h, &probe_ray, &probe_isect, sd_mnee);
+ break;
+ }
+ }
+ if (!found_transimissive_microfacet_bsdf)
+ return false;
+ }
+
+ probe_ray.self.object = probe_isect.object;
+ probe_ray.self.prim = probe_isect.prim;
+ probe_ray.P += probe_isect.t * probe_ray.D;
+ if (ls->t != FLT_MAX)
+ probe_ray.t -= probe_isect.t;
+ };
+
+ /* Mark the manifold walk invalid to keep mollification on by default. */
+ INTEGRATOR_STATE_WRITE(state, path, mnee) &= ~PATH_MNEE_VALID;
+
+ if (vertex_count == 0)
+ return false;
+
+ /* Check whether the transmission depth limit is reached before continuing. */
+ const int transmission_bounce = INTEGRATOR_STATE(state, path, transmission_bounce) +
+ vertex_count;
+ if (transmission_bounce >= kernel_data.integrator.max_transmission_bounce)
+ return false;
+
+ /* Check whether the diffuse depth limit is reached before continuing. */
+ const int diffuse_bounce = INTEGRATOR_STATE(state, path, diffuse_bounce) + 1;
+ if (diffuse_bounce >= kernel_data.integrator.max_diffuse_bounce)
+ return false;
+
+ /* Check whether the overall depth limit is reached before continuing. */
+ const int bounce = INTEGRATOR_STATE(state, path, bounce) + diffuse_bounce + transmission_bounce;
+ if (bounce >= kernel_data.integrator.max_bounce)
+ return false;
+
+ /* Mark the manifold walk valid to turn off mollification regardless of how successful the walk
+ * is: this is noticeable when another mnee is performed deeper in the path, for an internally
+ * reflected ray for example. If mollification was active for the reflection, a clear
+ * discontinuity is visible between direct and indirect contributions */
+ INTEGRATOR_STATE_WRITE(state, path, mnee) |= PATH_MNEE_VALID;
+
+ /* 2. Walk on the specular manifold to find vertices on the
+ * casters that satisfy snell's law for each interface
+ */
+ if (mnee_newton_solver(kg, sd, sd_mnee, ls, vertex_count, vertices)) {
+ /* 3. If a solution exists, calculate contribution of the corresponding path */
+ if (!mnee_path_contribution(kg, state, sd, sd_mnee, ls, vertex_count, vertices, throughput))
+ return false;
+
+ return true;
+ }
+
+ return false;
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __MNEE__ */
diff --git a/intern/cycles/kernel/integrator/path_state.h b/intern/cycles/kernel/integrator/path_state.h
index e6bdf21499c..ec93ac6d46f 100644
--- a/intern/cycles/kernel/integrator/path_state.h
+++ b/intern/cycles/kernel/integrator/path_state.h
@@ -57,6 +57,10 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = 1.0f;
INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f);
+#ifdef __MNEE__
+ INTEGRATOR_STATE_WRITE(state, path, mnee) = 0;
+#endif
+
INTEGRATOR_STATE_WRITE(state, isect, object) = OBJECT_NONE;
INTEGRATOR_STATE_WRITE(state, isect, prim) = PRIM_NONE;
diff --git a/intern/cycles/kernel/integrator/shade_background.h b/intern/cycles/kernel/integrator/shade_background.h
index 69dc3dc772d..4fd41121466 100644
--- a/intern/cycles/kernel/integrator/shade_background.h
+++ b/intern/cycles/kernel/integrator/shade_background.h
@@ -101,6 +101,22 @@ ccl_device_inline void integrate_background(KernelGlobals kg,
#endif
}
+#ifdef __MNEE__
+ if (INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_CULL_LIGHT_CONNECTION) {
+ if (kernel_data.background.use_mis) {
+ for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) {
+ /* This path should have been resolved with mnee, it will
+ * generate a firefly for small lights since it is improbable. */
+ const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
+ if (klight->type == LIGHT_BACKGROUND && klight->use_caustics) {
+ eval_background = false;
+ break;
+ }
+ }
+ }
+ }
+#endif /* __MNEE__ */
+
/* Evaluate background shader. */
float3 L = (eval_background) ? integrator_eval_background_shader(kg, state, render_buffer) :
zero_float3();
@@ -140,6 +156,16 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
}
#endif
+#ifdef __MNEE__
+ if (INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_CULL_LIGHT_CONNECTION) {
+ /* This path should have been resolved with mnee, it will
+ * generate a firefly for small lights since it is improbable. */
+ const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
+ if (klight->use_caustics)
+ return;
+ }
+#endif /* __MNEE__ */
+
/* Evaluate light shader. */
/* TODO: does aliasing like this break automatic SoA in CUDA? */
ShaderDataTinyStorage emission_sd_storage;
diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h
index df9af6ca107..c5dd9fe27f4 100644
--- a/intern/cycles/kernel/integrator/shade_surface.h
+++ b/intern/cycles/kernel/integrator/shade_surface.h
@@ -6,6 +6,8 @@
#include "kernel/film/accumulate.h"
#include "kernel/film/passes.h"
+#include "kernel/integrator/mnee.h"
+
#include "kernel/integrator/path_state.h"
#include "kernel/integrator/shader_eval.h"
#include "kernel/integrator/subsurface.h"
@@ -92,6 +94,7 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
#ifdef __EMISSION__
/* Path tracing: sample point on light and evaluate light shader, then
* queue shadow ray to be traced. */
+template<uint node_feature_mask>
ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
IntegratorState state,
ccl_private ShaderData *sd,
@@ -124,34 +127,65 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
* integrate_surface_bounce, evaluate the BSDF, and only then evaluate
* the light shader. This could also move to its own kernel, for
* non-constant light sources. */
- ShaderDataTinyStorage emission_sd_storage;
+ ShaderDataCausticsStorage emission_sd_storage;
ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
- const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time);
- if (is_zero(light_eval)) {
- return;
- }
-
- /* Evaluate BSDF. */
- const bool is_transmission = shader_bsdf_is_transmission(sd, ls.D);
+ Ray ray ccl_optional_struct_init;
BsdfEval bsdf_eval ccl_optional_struct_init;
- const float bsdf_pdf = shader_bsdf_eval(kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader);
- bsdf_eval_mul3(&bsdf_eval, light_eval / ls.pdf);
+ const bool is_transmission = shader_bsdf_is_transmission(sd, ls.D);
- if (ls.shader & SHADER_USE_MIS) {
- const float mis_weight = light_sample_mis_weight_nee(kg, ls.pdf, bsdf_pdf);
- bsdf_eval_mul(&bsdf_eval, mis_weight);
+# ifdef __MNEE__
+ bool skip_nee = false;
+ IF_KERNEL_NODES_FEATURE(RAYTRACE)
+ {
+ if (ls.lamp != LAMP_NONE) {
+ /* Is this a caustic light? */
+ const bool use_caustics = kernel_tex_fetch(__lights, ls.lamp).use_caustics;
+ if (use_caustics) {
+ /* Are we on a caustic caster? */
+ if (is_transmission && (sd->object_flag & SD_OBJECT_CAUSTICS_CASTER))
+ return;
+
+ /* Are we on a caustic receiver? */
+ if (!is_transmission && (sd->object_flag & SD_OBJECT_CAUSTICS_RECEIVER))
+ skip_nee = kernel_path_mnee_sample(
+ kg, state, sd, emission_sd, rng_state, &ls, &bsdf_eval);
+ }
+ }
+ }
+ if (skip_nee) {
+ /* Create shadow ray after successful manifold walk:
+ * emission_sd contains the last interface intersection and
+ * the light sample ls has been updated */
+ light_sample_to_surface_shadow_ray(kg, emission_sd, &ls, &ray);
}
+ else
+# endif /* __MNEE__ */
+ {
+ const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time);
+ if (is_zero(light_eval)) {
+ return;
+ }
- /* Path termination. */
- const float terminate = path_state_rng_light_termination(kg, rng_state);
- if (light_sample_terminate(kg, &ls, &bsdf_eval, terminate)) {
- return;
+ /* Evaluate BSDF. */
+ const float bsdf_pdf = shader_bsdf_eval(kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader);
+ bsdf_eval_mul3(&bsdf_eval, light_eval / ls.pdf);
+
+ if (ls.shader & SHADER_USE_MIS) {
+ const float mis_weight = light_sample_mis_weight_nee(kg, ls.pdf, bsdf_pdf);
+ bsdf_eval_mul(&bsdf_eval, mis_weight);
+ }
+
+ /* Path termination. */
+ const float terminate = path_state_rng_light_termination(kg, rng_state);
+ if (light_sample_terminate(kg, &ls, &bsdf_eval, terminate)) {
+ return;
+ }
+
+ /* Create shadow ray. */
+ light_sample_to_surface_shadow_ray(kg, sd, &ls, &ray);
}
- /* Create shadow ray. */
- Ray ray ccl_optional_struct_init;
- light_sample_to_surface_shadow_ray(kg, sd, &ls, &ray);
const bool is_light = light_sample_is_light(&ls);
/* Branch off shadow kernel. */
@@ -501,7 +535,7 @@ ccl_device bool integrate_surface(KernelGlobals kg,
/* Direct light. */
PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT);
- integrate_surface_direct_light(kg, state, &sd, &rng_state);
+ integrate_surface_direct_light<node_feature_mask>(kg, state, &sd, &rng_state);
#if defined(__AO__)
/* Ambient occlusion pass. */
diff --git a/intern/cycles/kernel/integrator/shader_eval.h b/intern/cycles/kernel/integrator/shader_eval.h
index ce76f7a3a90..3066fb661a1 100644
--- a/intern/cycles/kernel/integrator/shader_eval.h
+++ b/intern/cycles/kernel/integrator/shader_eval.h
@@ -157,7 +157,11 @@ ccl_device_inline void shader_prepare_surface_closures(KernelGlobals kg,
*
* Blurring of bsdf after bounces, for rays that have a small likelihood
* of following this particular path (diffuse, rough glossy) */
- if (kernel_data.integrator.filter_glossy != FLT_MAX) {
+ if (kernel_data.integrator.filter_glossy != FLT_MAX
+#ifdef __MNEE__
+ && !(INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_VALID)
+#endif
+ ) {
float blur_pdf = kernel_data.integrator.filter_glossy *
INTEGRATOR_STATE(state, path, min_ray_pdf);
@@ -605,7 +609,8 @@ ccl_device void shader_eval_surface(KernelGlobals kg,
ConstIntegratorGenericState state,
ccl_private ShaderData *ccl_restrict sd,
ccl_global float *ccl_restrict buffer,
- uint32_t path_flag)
+ uint32_t path_flag,
+ bool use_caustics_storage = false)
{
/* If path is being terminated, we are tracing a shadow ray or evaluating
* emission, then we don't need to store closures. The emission and shadow
@@ -615,7 +620,7 @@ ccl_device void shader_eval_surface(KernelGlobals kg,
max_closures = 0;
}
else {
- max_closures = kernel_data.max_closures;
+ max_closures = use_caustics_storage ? CAUSTICS_MAX_CLOSURE : kernel_data.max_closures;
}
sd->num_closure = 0;
diff --git a/intern/cycles/kernel/integrator/state_template.h b/intern/cycles/kernel/integrator/state_template.h
index 6eac38b1af9..e7e6db037b0 100644
--- a/intern/cycles/kernel/integrator/state_template.h
+++ b/intern/cycles/kernel/integrator/state_template.h
@@ -34,6 +34,8 @@ KERNEL_STRUCT_MEMBER(path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_MEMBER(path, uint16_t, rng_offset, KERNEL_FEATURE_PATH_TRACING)
/* enum PathRayFlag */
KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
+/* enum PathRayMNEE */
+KERNEL_STRUCT_MEMBER(path, uint8_t, mnee, KERNEL_FEATURE_PATH_TRACING)
/* Multiple importance sampling
* The PDF of BSDF sampling at the last scatter point, and distance to the
* last scatter point minus the last ray segment. This distance lets us
diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h
index fa468e337c7..2391cc2356d 100644
--- a/intern/cycles/kernel/integrator/subsurface.h
+++ b/intern/cycles/kernel/integrator/subsurface.h
@@ -171,7 +171,12 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
const int shader = intersection_get_shader(kg, &ss_isect.hits[0]);
const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
- if (shader_flags & SD_HAS_RAYTRACE) {
+ const int object_flags = intersection_get_object_flags(kg, &ss_isect.hits[0]);
+ const bool use_caustics = kernel_data.integrator.use_caustics &&
+ (object_flags & SD_OBJECT_CAUSTICS);
+ const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE) || use_caustics;
+
+ if (use_raytrace_kernel) {
INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
shader);
diff --git a/intern/cycles/kernel/light/light.h b/intern/cycles/kernel/light/light.h
index 359ffd1c607..fb637008ca4 100644
--- a/intern/cycles/kernel/light/light.h
+++ b/intern/cycles/kernel/light/light.h
@@ -246,6 +246,15 @@ ccl_device bool lights_intersect(KernelGlobals kg,
if (!(klight->shader_id & SHADER_USE_MIS)) {
continue;
}
+
+#ifdef __MNEE__
+ /* This path should have been resolved with mnee, it will
+ * generate a firefly for small lights since it is improbable. */
+ if ((INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_CULL_LIGHT_CONNECTION) &&
+ klight->use_caustics) {
+ continue;
+ }
+#endif
}
if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) {
diff --git a/intern/cycles/kernel/svm/closure.h b/intern/cycles/kernel/svm/closure.h
index 68ddc2fa306..88b44cdbacf 100644
--- a/intern/cycles/kernel/svm/closure.h
+++ b/intern/cycles/kernel/svm/closure.h
@@ -395,8 +395,10 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
# endif
{
+ /* This is to prevent mnee from receiving a null bsdf. */
+ float refraction_fresnel = fmaxf(0.0001f, 1.0f - fresnel);
ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
- sd, sizeof(MicrofacetBsdf), base_color * glass_weight * (1.0f - fresnel));
+ sd, sizeof(MicrofacetBsdf), base_color * glass_weight * refraction_fresnel);
if (bsdf) {
bsdf->N = N;
bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
@@ -674,8 +676,10 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
#endif
{
+ /* This is to prevent mnee from receiving a null bsdf. */
+ float refraction_fresnel = fmaxf(0.0001f, 1.0f - fresnel);
ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
- sd, sizeof(MicrofacetBsdf), weight * (1.0f - fresnel));
+ sd, sizeof(MicrofacetBsdf), weight * refraction_fresnel);
if (bsdf) {
bsdf->N = N;
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index 07d4a95780b..00b5b007e11 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -102,6 +102,12 @@ CCL_NAMESPACE_BEGIN
# undef __BAKING__
#endif /* __KERNEL_GPU_RAYTRACING__ */
+/* MNEE currently causes "Compute function exceeds available temporary registers"
+ * on Metal, disabled for now. */
+#ifndef __KERNEL_METAL__
+# define __MNEE__
+#endif
+
/* Scene-based selective features compilation. */
#ifdef __KERNEL_FEATURES__
# if !(__KERNEL_FEATURES & KERNEL_FEATURE_CAMERA_MOTION)
@@ -293,6 +299,13 @@ enum PathRayFlag : uint32_t {
PATH_RAY_SHADOW_CATCHER_BACKGROUND = (1U << 31U),
};
+// 8bit enum, just in case we need to move more variables in it
+enum PathRayMNEE {
+ PATH_MNEE_VALID = (1U << 0U),
+ PATH_MNEE_RECEIVER_ANCESTOR = (1U << 1U),
+ PATH_MNEE_CULL_LIGHT_CONNECTION = (1U << 2U),
+};
+
/* Configure ray visibility bits for rays and objects respectively,
* to make shadow catchers work.
*
@@ -649,6 +662,17 @@ typedef struct AttributeDescriptor {
# define MAX_CLOSURE __MAX_CLOSURE__
#endif
+/* For manifold next event estimation, we need space to store and evaluate
+ * 2 closures (with extra data) on the refractive interfaces, in addition
+ * to keeping the full sd at the current shading point. We need 4 because a
+ * refractive bsdf is instanced with a companion reflection bsdf, even though
+ * we only need the refractive one, and each of them requires 2 slots. */
+#ifndef __CAUSTICS_MAX_CLOSURE__
+# define CAUSTICS_MAX_CLOSURE 4
+#else
+# define CAUSTICS_MAX_CLOSURE __CAUSTICS_MAX_CLOSURE__
+#endif
+
#ifndef __MAX_VOLUME_STACK_SIZE__
# define MAX_VOLUME_STACK_SIZE 32
#else
@@ -779,11 +803,18 @@ enum ShaderDataObjectFlag {
SD_OBJECT_SHADOW_CATCHER = (1 << 7),
/* object has volume attributes */
SD_OBJECT_HAS_VOLUME_ATTRIBUTES = (1 << 8),
+ /* object is caustics caster */
+ SD_OBJECT_CAUSTICS_CASTER = (1 << 9),
+ /* object is caustics receiver */
+ SD_OBJECT_CAUSTICS_RECEIVER = (1 << 10),
+
+ /* object is using caustics */
+ SD_OBJECT_CAUSTICS = (SD_OBJECT_CAUSTICS_CASTER | SD_OBJECT_CAUSTICS_RECEIVER),
SD_OBJECT_FLAGS = (SD_OBJECT_HOLDOUT_MASK | SD_OBJECT_MOTION | SD_OBJECT_TRANSFORM_APPLIED |
SD_OBJECT_NEGATIVE_SCALE_APPLIED | SD_OBJECT_HAS_VOLUME |
SD_OBJECT_INTERSECTS_VOLUME | SD_OBJECT_SHADOW_CATCHER |
- SD_OBJECT_HAS_VOLUME_ATTRIBUTES)
+ SD_OBJECT_HAS_VOLUME_ATTRIBUTES | SD_OBJECT_CAUSTICS)
};
typedef struct ccl_align(16) ShaderData
@@ -882,6 +913,15 @@ typedef struct ccl_align(16) ShaderDataTinyStorage
char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE];
}
ShaderDataTinyStorage;
+
+/* ShaderDataCausticsStorage needs the same alignment as ShaderData, or else
+ * the pointer cast in AS_SHADER_DATA invokes undefined behavior. */
+typedef struct ccl_align(16) ShaderDataCausticsStorage
+{
+ char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * (MAX_CLOSURE - CAUSTICS_MAX_CLOSURE)];
+}
+ShaderDataCausticsStorage;
+
#define AS_SHADER_DATA(shader_data_tiny_storage) \
((ccl_private ShaderData *)shader_data_tiny_storage)
@@ -1201,6 +1241,9 @@ typedef struct KernelIntegrator {
/* mis */
int use_lamp_mis;
+ /* caustics */
+ int use_caustics;
+
/* sampler */
int sampling_pattern;
@@ -1219,7 +1262,7 @@ typedef struct KernelIntegrator {
int direct_light_sampling_type;
/* padding */
- int pad1, pad2;
+ int pad1;
} KernelIntegrator;
static_assert_align(KernelIntegrator, 16);
@@ -1383,7 +1426,8 @@ typedef struct KernelLight {
float max_bounces;
float random;
float strength[3];
- float pad1, pad2;
+ int use_caustics;
+ float pad1;
Transform tfm;
Transform itfm;
union {
diff --git a/intern/cycles/scene/light.cpp b/intern/cycles/scene/light.cpp
index 68779a3ce02..85a7f37994c 100644
--- a/intern/cycles/scene/light.cpp
+++ b/intern/cycles/scene/light.cpp
@@ -123,6 +123,7 @@ NODE_DEFINE(Light)
SOCKET_BOOLEAN(use_glossy, "Use Glossy", true);
SOCKET_BOOLEAN(use_transmission, "Use Transmission", true);
SOCKET_BOOLEAN(use_scatter, "Use Scatter", true);
+ SOCKET_BOOLEAN(use_caustics, "Shadow Caustics", false);
SOCKET_INT(max_bounces, "Max Bounces", 1024);
SOCKET_UINT(random_id, "Random ID", 0);
@@ -896,6 +897,7 @@ void LightManager::device_update_points(Device *, DeviceScene *dscene, Scene *sc
klights[light_index].max_bounces = max_bounces;
klights[light_index].random = random;
+ klights[light_index].use_caustics = light->use_caustics;
klights[light_index].tfm = light->tfm;
klights[light_index].itfm = transform_inverse(light->tfm);
diff --git a/intern/cycles/scene/light.h b/intern/cycles/scene/light.h
index 3519b76cf51..25190f6fb63 100644
--- a/intern/cycles/scene/light.h
+++ b/intern/cycles/scene/light.h
@@ -61,6 +61,7 @@ class Light : public Node {
NODE_SOCKET_API(bool, use_glossy)
NODE_SOCKET_API(bool, use_transmission)
NODE_SOCKET_API(bool, use_scatter)
+ NODE_SOCKET_API(bool, use_caustics)
NODE_SOCKET_API(bool, is_shadow_catcher)
NODE_SOCKET_API(bool, is_portal)
diff --git a/intern/cycles/scene/object.cpp b/intern/cycles/scene/object.cpp
index fda9a211e60..9b19169880a 100644
--- a/intern/cycles/scene/object.cpp
+++ b/intern/cycles/scene/object.cpp
@@ -90,6 +90,9 @@ NODE_DEFINE(Object)
SOCKET_BOOLEAN(is_shadow_catcher, "Shadow Catcher", false);
+ SOCKET_BOOLEAN(is_caustics_caster, "Cast Shadow Caustics", false);
+ SOCKET_BOOLEAN(is_caustics_receiver, "Receive Shadow Caustics", false);
+
SOCKET_NODE(particle_system, "Particle System", ParticleSystem::get_node_type());
SOCKET_INT(particle_index, "Particle Index", 0);
@@ -510,6 +513,14 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
kobject.visibility = ob->visibility_for_tracing();
kobject.primitive_type = geom->primitive_type();
+ /* Object shadow caustics flag */
+ if (ob->is_caustics_caster) {
+ flag |= SD_OBJECT_CAUSTICS_CASTER;
+ }
+ if (ob->is_caustics_receiver) {
+ flag |= SD_OBJECT_CAUSTICS_RECEIVER;
+ }
+
/* Object flag. */
if (ob->use_holdout) {
flag |= SD_OBJECT_HOLDOUT_MASK;
diff --git a/intern/cycles/scene/object.h b/intern/cycles/scene/object.h
index 55689ccfa58..561ecd4e7e9 100644
--- a/intern/cycles/scene/object.h
+++ b/intern/cycles/scene/object.h
@@ -55,6 +55,9 @@ class Object : public Node {
NODE_SOCKET_API(float, shadow_terminator_shading_offset)
NODE_SOCKET_API(float, shadow_terminator_geometry_offset)
+ NODE_SOCKET_API(bool, is_caustics_caster)
+ NODE_SOCKET_API(bool, is_caustics_receiver)
+
NODE_SOCKET_API(float3, dupli_generated)
NODE_SOCKET_API(float2, dupli_uv)
diff --git a/intern/cycles/scene/scene.cpp b/intern/cycles/scene/scene.cpp
index 2199b2351d0..359e2d8e2c3 100644
--- a/intern/cycles/scene/scene.cpp
+++ b/intern/cycles/scene/scene.cpp
@@ -489,7 +489,21 @@ void Scene::update_kernel_features()
if (use_motion && camera->use_motion()) {
kernel_features |= KERNEL_FEATURE_CAMERA_MOTION;
}
+
+ /* Figure out whether the scene will use shader raytrace we need at least
+ * one caustic light, one caustic caster and one caustic receiver to use
+ * and enable the mnee code path. */
+ bool has_caustics_receiver = false;
+ bool has_caustics_caster = false;
+ bool has_caustics_light = false;
+
foreach (Object *object, objects) {
+ if (object->get_is_caustics_caster()) {
+ has_caustics_caster = true;
+ }
+ else if (object->get_is_caustics_receiver()) {
+ has_caustics_receiver = true;
+ }
Geometry *geom = object->get_geometry();
if (use_motion) {
if (object->use_motion() || geom->get_use_motion_blur()) {
@@ -518,6 +532,18 @@ void Scene::update_kernel_features()
}
}
+ foreach (Light *light, lights) {
+ if (light->get_use_caustics()) {
+ has_caustics_light = true;
+ }
+ }
+
+ dscene.data.integrator.use_caustics = false;
+ if (has_caustics_caster && has_caustics_receiver && has_caustics_light) {
+ dscene.data.integrator.use_caustics = true;
+ kernel_features |= KERNEL_FEATURE_NODE_RAYTRACE;
+ }
+
if (bake_manager->get_baking()) {
kernel_features |= KERNEL_FEATURE_BAKING;
}
diff --git a/intern/cycles/util/math_float2.h b/intern/cycles/util/math_float2.h
index d6b47052720..542dad93467 100644
--- a/intern/cycles/util/math_float2.h
+++ b/intern/cycles/util/math_float2.h
@@ -40,7 +40,7 @@ ccl_device_inline float average(const float2 &a);
ccl_device_inline float distance(const float2 &a, const float2 &b);
ccl_device_inline float dot(const float2 &a, const float2 &b);
ccl_device_inline float cross(const float2 &a, const float2 &b);
-ccl_device_inline float len(const float2 &a);
+ccl_device_inline float len(const float2 a);
ccl_device_inline float2 normalize(const float2 &a);
ccl_device_inline float2 normalize_len(const float2 &a, float *t);
ccl_device_inline float2 safe_normalize(const float2 &a);
@@ -187,11 +187,6 @@ ccl_device_inline float cross(const float2 &a, const float2 &b)
return (a.x * b.y - a.y * b.x);
}
-ccl_device_inline float len(const float2 &a)
-{
- return sqrtf(dot(a, a));
-}
-
ccl_device_inline float2 normalize(const float2 &a)
{
return a / len(a);
@@ -251,6 +246,11 @@ ccl_device_inline float2 floor(const float2 &a)
#endif /* !__KERNEL_METAL__ */
+ccl_device_inline float len(const float2 a)
+{
+ return sqrtf(dot(a, a));
+}
+
ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b)
{
return (b != 0.0f) ? a / b : zero_float2();
diff --git a/tests/python/cycles_render_tests.py b/tests/python/cycles_render_tests.py
index 598e4b6828c..10ba2ce552a 100644
--- a/tests/python/cycles_render_tests.py
+++ b/tests/python/cycles_render_tests.py
@@ -32,6 +32,11 @@ BLACKLIST_OPTIX = [
'T43865.blend',
]
+BLACKLIST_METAL = [
+ # No MNEE for Metal currently
+ "underwater_caustics.blend",
+]
+
BLACKLIST_GPU = [
# Uninvestigated differences with GPU.
'image_log.blend',
@@ -116,6 +121,8 @@ def main():
blacklist += BLACKLIST_OSL
if device == 'OPTIX':
blacklist += BLACKLIST_OPTIX
+ if device == 'METAL':
+ blacklist += BLACKLIST_METAL
from modules import render_report
report = render_report.Report('Cycles', output_dir, idiff, device, blacklist)