Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet.h5
-rw-r--r--intern/cycles/kernel/closure/bssrdf.h217
-rw-r--r--intern/cycles/kernel/kernel_bvh.h119
-rw-r--r--intern/cycles/kernel/kernel_bvh_subsurface.h308
-rw-r--r--intern/cycles/kernel/kernel_bvh_traversal.h128
-rw-r--r--intern/cycles/kernel/kernel_montecarlo.h15
-rw-r--r--intern/cycles/kernel/kernel_path.h55
-rw-r--r--intern/cycles/kernel/kernel_random.h11
-rw-r--r--intern/cycles/kernel/kernel_shader.h117
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h456
-rw-r--r--intern/cycles/kernel/kernel_types.h17
-rw-r--r--intern/cycles/kernel/osl/osl_bssrdf.cpp59
-rw-r--r--intern/cycles/kernel/osl/osl_bssrdf.h7
-rw-r--r--intern/cycles/kernel/osl/osl_closures.cpp4
-rw-r--r--intern/cycles/kernel/osl/osl_closures.h9
-rw-r--r--intern/cycles/kernel/osl/osl_shader.cpp37
-rw-r--r--intern/cycles/kernel/shaders/node_subsurface_scattering.osl10
-rw-r--r--intern/cycles/kernel/shaders/stdosl.h3
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h26
-rw-r--r--intern/cycles/kernel/svm/svm_types.h8
20 files changed, 1224 insertions, 387 deletions
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 915b9eafbc1..b159f585831 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -37,11 +37,6 @@ CCL_NAMESPACE_BEGIN
/* GGX */
-__device_inline float safe_sqrtf(float f)
-{
- return sqrtf(max(f, 0.0f));
-}
-
__device int bsdf_microfacet_ggx_setup(ShaderClosure *sc)
{
sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* m_ag */
diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h
index 486de4ca65f..23b932a91c6 100644
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@@ -21,130 +21,187 @@
CCL_NAMESPACE_BEGIN
-__device int bssrdf_setup(ShaderClosure *sc)
+__device int bssrdf_setup(ShaderClosure *sc, ClosureType type)
{
if(sc->data0 < BSSRDF_MIN_RADIUS) {
/* revert to diffuse BSDF if radius too small */
sc->data0 = 0.0f;
sc->data1 = 0.0f;
- return bsdf_diffuse_setup(sc);
+ int flag = bsdf_diffuse_setup(sc);
+ sc->type = CLOSURE_BSDF_BSSRDF_ID;
+ return flag;
}
else {
- /* IOR param */
- sc->data1 = max(sc->data1, 1.0f);
- sc->type = CLOSURE_BSSRDF_ID;
+ sc->data1 = clamp(sc->data1, 0.0f, 1.0f); /* texture blur */
+ sc->type = type;
return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSSRDF;
}
}
-/* Simple Cubic BSSRDF falloff */
+/* Planar Truncated Gaussian
+ *
+ * Note how this is different from the typical gaussian, this one integrates
+ * to 1 over the plane (where you get an extra 2*pi*x factor). We are lucky
+ * that integrating x*exp(-x) gives a nice closed form solution. */
+
+/* paper suggests 1/12.46 which is much too small, suspect it's *12.46 */
+#define GAUSS_TRUNCATE 12.46f
-__device float bssrdf_cubic(float ld, float r)
+__device float bssrdf_gaussian_eval(ShaderClosure *sc, float r)
{
- if(ld == 0.0f)
- return (r == 0.0f)? 1.0f: 0.0f;
+ /* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm
+ * = 1 - exp(-Rm*Rm/(2*v)) */
+ const float v = sc->data0;
+ const float Rm = sqrtf(v*GAUSS_TRUNCATE);
+
+ if(r >= Rm)
+ return 0.0f;
- return powf(ld - min(r, ld), 3.0f) * 4.0f/powf(ld, 4.0f);
+ return expf(-r*r/(2.0f*v))/(2.0f*M_PI_F*v);
}
-/* Original BSSRDF fallof function */
-
-typedef struct BSSRDFParams {
- float eta; /* index of refraction */
- float sigma_t_; /* reduced extinction coefficient */
- float sigma_tr; /* effective extinction coefficient */
- float Fdr; /* diffuse fresnel reflectance */
- float D; /* diffusion constant */
- float A;
- float alpha_; /* reduced albedo */
- float zr; /* distance of virtual lightsource above surface */
- float zv; /* distance of virtual lightsource below surface */
- float ld; /* mean free path */
- float ro; /* diffuse reflectance */
-} BSSRDFParams;
-
-__device float bssrdf_reduced_albedo_Rd(float alpha_, float A, float ro)
+__device float bssrdf_gaussian_pdf(ShaderClosure *sc, float r)
{
- float sq;
+ /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
+ const float area_truncated = 1.0f - expf(-0.5f*GAUSS_TRUNCATE);
+
+ return bssrdf_gaussian_eval(sc, r) * (1.0f/(area_truncated));
+}
+
+__device void bssrdf_gaussian_sample(ShaderClosure *sc, float xi, float *r, float *h)
+{
+ /* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v))
+ * r = sqrt(-2*v*logf(xi)) */
+
+ const float v = sc->data0;
+ const float Rm = sqrtf(v*GAUSS_TRUNCATE);
+
+ /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
+ const float area_truncated = 1.0f - expf(-0.5f*GAUSS_TRUNCATE);
+
+ /* r(xi) */
+ const float r_squared = -2.0f*v*logf(1.0f - xi*area_truncated);
+ *r = sqrtf(r_squared);
+
+ /* h^2 + r^2 = Rm^2 */
+ *h = sqrtf(Rm*Rm - r_squared);
+}
+
+/* Planar Cubic BSSRDF falloff
+ *
+ * This is basically (Rm - x)^3, with some factors to normalize it. For sampling
+ * we integrate 2*pi*x * (Rm - x)^3, which gives us a quintic equation that as
+ * far as I can tell has no closed form solution. So we get an iterative solution
+ * instead with newton-raphson. */
+
+__device float bssrdf_cubic_eval(ShaderClosure *sc, float r)
+{
+ const float Rm = sc->data0;
+
+ if(r >= Rm)
+ return 0.0f;
+
+ /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */
+ const float Rm5 = (Rm*Rm) * (Rm*Rm) * Rm;
+ const float f = Rm - min(r, Rm);
+ const float f3 = f*f*f;
- sq = sqrtf(3.0f*(1.0f - alpha_));
- return (alpha_/2.0f)*(1.0f + expf((-4.0f/3.0f)*A*sq))*expf(-sq) - ro;
+ return (f3 * 10.0f) / (Rm5 * M_PI_F);
}
-__device float bssrdf_compute_reduced_albedo(float A, float ro)
+__device float bssrdf_cubic_pdf(ShaderClosure *sc, float r)
{
- const float tolerance = 1e-8f;
- const int max_iteration_count = 20;
- float d, fsub, xn_1 = 0.0f, xn = 1.0f, fxn, fxn_1;
+ return bssrdf_cubic_eval(sc, r);
+}
+
+/* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */
+__device float bssrdf_cubic_quintic_root_find(float xi)
+{
+ /* newton-raphson iteration, usually succeeds in 2-4 iterations, except
+ * outside 0.02 ... 0.98 where it can go up to 10, so overall performance
+ * should not be too bad */
+ const float tolerance = 1e-6f;
+ const int max_iteration_count = 10;
+ float x = 0.25f;
int i;
- /* use secant method to compute reduced albedo using Rd function inverse
- * with a given reflectance */
- fxn = bssrdf_reduced_albedo_Rd(xn, A, ro);
- fxn_1 = bssrdf_reduced_albedo_Rd(xn_1, A, ro);
+ for (i = 0; i < max_iteration_count; i++) {
+ float x2 = x*x;
+ float x3 = x2*x;
+ float nx = (1.0f - x);
- for (i= 0; i < max_iteration_count; i++) {
- fsub = (fxn - fxn_1);
- if (fabsf(fsub) < tolerance)
- break;
- d = ((xn - xn_1)/fsub)*fxn;
- if (fabsf(d) < tolerance)
- break;
+ float f = 10.0f*x2 - 20.0f*x3 + 15.0f*x2*x2 - 4.0f*x2*x3 - xi;
+ float f_ = 20.0f*(x*nx)*(nx*nx);
- xn_1 = xn;
- fxn_1 = fxn;
- xn = xn - d;
+ if(fabsf(f) < tolerance || f_ == 0.0f)
+ break;
- if (xn > 1.0f) xn = 1.0f;
- if (xn_1 > 1.0f) xn_1 = 1.0f;
-
- fxn = bssrdf_reduced_albedo_Rd(xn, A, ro);
+ x = clamp(x - f/f_, 0.0f, 1.0f);
}
- /* avoid division by zero later */
- if (xn <= 0.0f)
- xn = 0.00001f;
-
- return xn;
+ return x;
}
-__device void bssrdf_setup_params(BSSRDFParams *ss, float refl, float radius, float ior)
+__device void bssrdf_cubic_sample(ShaderClosure *sc, float xi, float *r, float *h)
{
- ss->eta = ior;
- ss->Fdr = -1.440f/ior*ior + 0.710f/ior + 0.668f + 0.0636f*ior;
- ss->A = (1.0f + ss->Fdr)/(1.0f - ss->Fdr);
- ss->ld = radius;
- ss->ro = min(refl, 0.999f);
+ const float Rm = sc->data0;
+ const float r_ = bssrdf_cubic_quintic_root_find(xi) * Rm;
- ss->alpha_ = bssrdf_compute_reduced_albedo(ss->A, ss->ro);
+ *r = r_;
- ss->sigma_tr = 1.0f/ss->ld;
- ss->sigma_t_ = ss->sigma_tr/sqrtf(3.0f*(1.0f - ss->alpha_));
+ /* h^2 + r^2 = Rm^2 */
+ *h = sqrtf(Rm*Rm - r_*r_);
+}
- ss->D = 1.0f/(3.0f*ss->sigma_t_);
+/* None BSSRDF falloff
+ *
+ * Samples distributed over disk with no falloff, for reference. */
- ss->zr = 1.0f/ss->sigma_t_;
- ss->zv = ss->zr + 4.0f*ss->A*ss->D;
+__device float bssrdf_none_eval(ShaderClosure *sc, float r)
+{
+ const float Rm = sc->data0;
+ return (r < Rm)? 1.0f: 0.0f;
}
-/* exponential falloff function */
+__device float bssrdf_none_pdf(ShaderClosure *sc, float r)
+{
+ /* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */
+ const float Rm = sc->data0;
+ const float area = (M_PI_F*Rm*Rm);
+
+ return bssrdf_none_eval(sc, r) / area;
+}
-__device float bssrdf_original(const BSSRDFParams *ss, float r)
+__device void bssrdf_none_sample(ShaderClosure *sc, float xi, float *r, float *h)
{
- if(ss->ld == 0.0f)
- return (r == 0.0f)? 1.0f: 0.0f;
+ /* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2
+ * r = sqrt(xi)*Rm */
+ const float Rm = sc->data0;
+ const float r_ = sqrtf(xi)*Rm;
+
+ *r = r_;
- float rr = r*r;
- float sr, sv, Rdr, Rdv;
+ /* h^2 + r^2 = Rm^2 */
+ *h = sqrtf(Rm*Rm - r_*r_);
+}
- sr = sqrtf(rr + ss->zr*ss->zr);
- sv = sqrtf(rr + ss->zv*ss->zv);
+/* Generic */
- Rdr = ss->zr*(1.0f + ss->sigma_tr*sr)*expf(-ss->sigma_tr*sr)/(sr*sr*sr);
- Rdv = ss->zv*(1.0f + ss->sigma_tr*sv)*expf(-ss->sigma_tr*sv)/(sv*sv*sv);
+__device void bssrdf_sample(ShaderClosure *sc, float xi, float *r, float *h)
+{
+ if(sc->type == CLOSURE_BSSRDF_CUBIC_ID)
+ bssrdf_cubic_sample(sc, xi, r, h);
+ else
+ bssrdf_gaussian_sample(sc, xi, r, h);
+}
- return ss->alpha_*(1.0f/M_4PI_F)*(Rdr + Rdv);
+__device float bssrdf_pdf(ShaderClosure *sc, float r)
+{
+ if(sc->type == CLOSURE_BSSRDF_CUBIC_ID)
+ return bssrdf_cubic_pdf(sc, r);
+ else
+ return bssrdf_gaussian_pdf(sc, r);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_bvh.h b/intern/cycles/kernel/kernel_bvh.h
index f0f1fcd4c0a..4cc92254b01 100644
--- a/intern/cycles/kernel/kernel_bvh.h
+++ b/intern/cycles/kernel/kernel_bvh.h
@@ -488,7 +488,7 @@ __device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersectio
/*stochastic fade from minimum width*/
if(lcg_state && coverage != 1.0f) {
- if(lcg_step(lcg_state) > coverage)
+ if(lcg_step_float(lcg_state) > coverage)
return hit;
}
@@ -640,7 +640,7 @@ __device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect,
float adjradius = or1 + z * (or2 - or1) / l;
adjradius = adjradius / (r1 + z * gd);
if(lcg_state && adjradius != 1.0f) {
- if(lcg_step(lcg_state) > adjradius)
+ if(lcg_step_float(lcg_state) > adjradius)
return false;
}
/* --- */
@@ -690,8 +690,8 @@ __device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect,
* only want to intersect with primitives in the same object, and if case of
* multiple hits we pick a single random primitive as the intersection point. */
-__device_inline bool bvh_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect,
- float3 P, float3 idir, int object, int triAddr, float tmax, int *num_hits, float subsurface_random)
+__device_inline void bvh_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array,
+ float3 P, float3 idir, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits)
{
/* compute and check intersection t-value */
float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0);
@@ -718,20 +718,30 @@ __device_inline bool bvh_triangle_intersect_subsurface(KernelGlobals *kg, Inters
if(v >= 0.0f && u + v <= 1.0f) {
(*num_hits)++;
- if(subsurface_random * (*num_hits) <= 1.0f) {
- /* record intersection */
- isect->prim = triAddr;
- isect->object = object;
- isect->u = u;
- isect->v = v;
- isect->t = t;
- return true;
+ int hit;
+
+ if(*num_hits <= max_hits) {
+ hit = *num_hits - 1;
}
+ else {
+ /* reservoir sampling: if we are at the maximum number of
+ * hits, randomly replace element or skip it */
+ hit = lcg_step_uint(lcg_state) % *num_hits;
+
+ if(hit >= max_hits)
+ return;
+ }
+
+ /* record intersection */
+ Intersection *isect = &isect_array[hit];
+ isect->prim = triAddr;
+ isect->object = object;
+ isect->u = u;
+ isect->v = v;
+ isect->t = t;
}
}
}
-
- return false;
}
#endif
@@ -741,7 +751,6 @@ __device_inline bool bvh_triangle_intersect_subsurface(KernelGlobals *kg, Inters
#define BVH_MOTION 2
#define BVH_HAIR 4
#define BVH_HAIR_MINIMUM_WIDTH 8
-#define BVH_SUBSURFACE 16
#define BVH_FUNCTION_NAME bvh_intersect
#define BVH_FUNCTION_FEATURES 0
@@ -773,32 +782,31 @@ __device_inline bool bvh_triangle_intersect_subsurface(KernelGlobals *kg, Inters
#if defined(__SUBSURFACE__)
#define BVH_FUNCTION_NAME bvh_intersect_subsurface
-#define BVH_FUNCTION_FEATURES BVH_SUBSURFACE
-#include "kernel_bvh_traversal.h"
+#include "kernel_bvh_subsurface.h"
#endif
#if defined(__SUBSURFACE__) && defined(__INSTANCING__)
#define BVH_FUNCTION_NAME bvh_intersect_subsurface_instancing
-#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_SUBSURFACE
-#include "kernel_bvh_traversal.h"
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING
+#include "kernel_bvh_subsurface.h"
#endif
#if defined(__SUBSURFACE__) && defined(__HAIR__)
#define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair
-#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_SUBSURFACE|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
-#include "kernel_bvh_traversal.h"
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
+#include "kernel_bvh_subsurface.h"
#endif
#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__)
#define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion
-#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_SUBSURFACE|BVH_MOTION
-#include "kernel_bvh_traversal.h"
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
+#include "kernel_bvh_subsurface.h"
#endif
#if defined(__SUBSURFACE__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
#define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair_motion
-#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_SUBSURFACE|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
-#include "kernel_bvh_traversal.h"
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
+#include "kernel_bvh_subsurface.h"
#endif
@@ -844,38 +852,38 @@ __device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const ui
}
#ifdef __SUBSURFACE__
-__device_inline int scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, float subsurface_random)
+__device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
{
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
#ifdef __HAIR__
if(kernel_data.bvh.have_curves)
- return bvh_intersect_subsurface_hair_motion(kg, ray, isect, subsurface_object, subsurface_random);
+ return bvh_intersect_subsurface_hair_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
#endif /* __HAIR__ */
- return bvh_intersect_subsurface_motion(kg, ray, isect, subsurface_object, subsurface_random);
+ return bvh_intersect_subsurface_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
}
#endif /* __OBJECT_MOTION__ */
#ifdef __HAIR__
if(kernel_data.bvh.have_curves)
- return bvh_intersect_subsurface_hair(kg, ray, isect, subsurface_object, subsurface_random);
+ return bvh_intersect_subsurface_hair(kg, ray, isect, subsurface_object, lcg_state, max_hits);
#endif /* __HAIR__ */
#ifdef __KERNEL_CPU__
#ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing)
- return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, subsurface_random);
+ return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
#endif /* __INSTANCING__ */
- return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, subsurface_random);
+ return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
#else /* __KERNEL_CPU__ */
#ifdef __INSTANCING__
- return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, subsurface_random);
+ return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
#else
- return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, subsurface_random);
+ return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
#endif /* __INSTANCING__ */
#endif /* __KERNEL_CPU__ */
@@ -980,6 +988,51 @@ __device_inline float3 bvh_triangle_refine(KernelGlobals *kg, ShaderData *sd, co
#endif
}
+/* same as above, except that isect->t is assumed to be in object space for instancing */
+__device_inline float3 bvh_triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray)
+{
+ float3 P = ray->P;
+ float3 D = ray->D;
+ float t = isect->t;
+
+#ifdef __INTERSECTION_REFINE__
+ if(isect->object != ~0) {
+#ifdef __OBJECT_MOTION__
+ Transform tfm = sd->ob_itfm;
+#else
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
+#endif
+
+ P = transform_point(&tfm, P);
+ D = transform_direction(&tfm, D);
+ D = normalize(D);
+ }
+
+ P = P + D*t;
+
+ float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0);
+ float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
+ float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z);
+ float rt = Oz * invDz;
+
+ P = P + D*rt;
+
+ if(isect->object != ~0) {
+#ifdef __OBJECT_MOTION__
+ Transform tfm = sd->ob_tfm;
+#else
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
+#endif
+
+ P = transform_point(&tfm, P);
+ }
+
+ return P;
+#else
+ return P + D*t;
+#endif
+}
+
#ifdef __HAIR__
__device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3)
diff --git a/intern/cycles/kernel/kernel_bvh_subsurface.h b/intern/cycles/kernel/kernel_bvh_subsurface.h
new file mode 100644
index 00000000000..ac30bea6a9d
--- /dev/null
+++ b/intern/cycles/kernel/kernel_bvh_subsurface.h
@@ -0,0 +1,308 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2013, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This is a template BVH traversal function for subsurface scattering, where
+ * various features can be enabled/disabled. This way we can compile optimized
+ * versions for each case without new features slowing things down.
+ *
+ * BVH_INSTANCING: object instancing
+ * BVH_MOTION: motion blur rendering
+ *
+ */
+
+#define FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)
+
+__device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersection *isect_array,
+ int subsurface_object, uint *lcg_state, int max_hits)
+{
+ /* todo:
+ * - test if pushing distance on the stack helps (for non shadow rays)
+ * - separate version for shadow rays
+ * - likely and unlikely for if() statements
+ * - SSE for hair
+ * - test restrict attribute for pointers
+ */
+
+ /* traversal stack in CUDA thread-local memory */
+ int traversalStack[BVH_STACK_SIZE];
+ traversalStack[0] = ENTRYPOINT_SENTINEL;
+
+ /* traversal variables in registers */
+ int stackPtr = 0;
+ int nodeAddr = kernel_data.bvh.root;
+
+ /* ray parameters in registers */
+ const float tmax = ray->t;
+ float3 P = ray->P;
+ float3 idir = bvh_inverse_direction(ray->D);
+ int object = ~0;
+
+ const uint visibility = ~0;
+ uint num_hits = 0;
+
+#if FEATURE(BVH_MOTION)
+ Transform ob_tfm;
+#endif
+
+#if defined(__KERNEL_SSE2__)
+ const shuffle_swap_t shuf_identity = shuffle_swap_identity();
+ const shuffle_swap_t shuf_swap = shuffle_swap_swap();
+
+ const __m128i pn = _mm_set_epi32(0x80000000, 0x80000000, 0x00000000, 0x00000000);
+ __m128 Psplat[3], idirsplat[3];
+
+ Psplat[0] = _mm_set_ps1(P.x);
+ Psplat[1] = _mm_set_ps1(P.y);
+ Psplat[2] = _mm_set_ps1(P.z);
+
+ idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), _mm_castsi128_ps(pn));
+ idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), _mm_castsi128_ps(pn));
+ idirsplat[2] = _mm_xor_ps(_mm_set_ps1(idir.z), _mm_castsi128_ps(pn));
+
+ __m128 tsplat = _mm_set_ps(-tmax, -tmax, 0.0f, 0.0f);
+
+ shuffle_swap_t shufflex = (idir.x >= 0)? shuf_identity: shuf_swap;
+ shuffle_swap_t shuffley = (idir.y >= 0)? shuf_identity: shuf_swap;
+ shuffle_swap_t shufflez = (idir.z >= 0)? shuf_identity: shuf_swap;
+#endif
+
+ /* traversal loop */
+ do {
+ do
+ {
+ /* traverse internal nodes */
+ while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL)
+ {
+ bool traverseChild0, traverseChild1;
+ int nodeAddrChild1;
+
+#if !defined(__KERNEL_SSE2__)
+ /* Intersect two child bounding boxes, non-SSE version */
+ float t = tmax;
+
+ /* fetch node data */
+ float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0);
+ float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1);
+ float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2);
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
+
+ /* intersect ray against child nodes */
+ NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
+ NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
+
+ NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
+ NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
+
+ /* decide which nodes to traverse next */
+#ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility);
+ traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility);
+#else
+ traverseChild0 = (c0max >= c0min);
+ traverseChild1 = (c1max >= c1min);
+#endif
+
+#else // __KERNEL_SSE2__
+ /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
+
+ /* fetch node data */
+ __m128 *bvh_nodes = (__m128*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
+ float4 cnodes = ((float4*)bvh_nodes)[3];
+
+ /* intersect ray against child nodes */
+ const __m128 tminmaxx = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[0], shufflex), Psplat[0]), idirsplat[0]);
+ const __m128 tminmaxy = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[1], shuffley), Psplat[1]), idirsplat[1]);
+ const __m128 tminmaxz = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[2], shufflez), Psplat[2]), idirsplat[2]);
+
+ const __m128 tminmax = _mm_xor_ps(_mm_max_ps(_mm_max_ps(tminmaxx, tminmaxy), _mm_max_ps(tminmaxz, tsplat)), _mm_castsi128_ps(pn));
+ const __m128 lrhit = _mm_cmple_ps(tminmax, shuffle_swap(tminmax, shuf_swap));
+
+ /* decide which nodes to traverse next */
+#ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ traverseChild0 = (_mm_movemask_ps(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility);
+ traverseChild1 = (_mm_movemask_ps(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility);
+#else
+ traverseChild0 = (_mm_movemask_ps(lrhit) & 1);
+ traverseChild1 = (_mm_movemask_ps(lrhit) & 2);
+#endif
+#endif // __KERNEL_SSE2__
+
+ nodeAddr = __float_as_int(cnodes.x);
+ nodeAddrChild1 = __float_as_int(cnodes.y);
+
+ if(traverseChild0 && traverseChild1) {
+ /* both children were intersected, push the farther one */
+#if !defined(__KERNEL_SSE2__)
+ bool closestChild1 = (c1min < c0min);
+#else
+ union { __m128 m128; float v[4]; } uminmax;
+ uminmax.m128 = tminmax;
+ bool closestChild1 = uminmax.v[1] < uminmax.v[0];
+#endif
+
+ if(closestChild1) {
+ int tmp = nodeAddr;
+ nodeAddr = nodeAddrChild1;
+ nodeAddrChild1 = tmp;
+ }
+
+ ++stackPtr;
+ traversalStack[stackPtr] = nodeAddrChild1;
+ }
+ else {
+ /* one child was intersected */
+ if(traverseChild1) {
+ nodeAddr = nodeAddrChild1;
+ }
+ else if(!traverseChild0) {
+ /* neither child was intersected */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+ }
+ }
+
+ /* if node is leaf, fetch triangle list */
+ if(nodeAddr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+(BVH_NODE_SIZE-1));
+ int primAddr = __float_as_int(leaf.x);
+
+#if FEATURE(BVH_INSTANCING)
+ if(primAddr >= 0) {
+#endif
+ int primAddr2 = __float_as_int(leaf.y);
+
+ /* pop */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+
+ /* primitive intersection */
+ while(primAddr < primAddr2) {
+ /* only primitives from the same object */
+ uint tri_object = (object == ~0)? kernel_tex_fetch(__prim_object, primAddr): object;
+
+ if(tri_object == subsurface_object) {
+
+ /* intersect ray against primitive */
+ bvh_triangle_intersect_subsurface(kg, isect_array, P, idir, object, primAddr, tmax, &num_hits, lcg_state, max_hits);
+ }
+
+ primAddr++;
+ }
+ }
+#if FEATURE(BVH_INSTANCING)
+ else {
+ /* instance push */
+ if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) {
+ object = subsurface_object;
+
+ float t_ignore = FLT_MAX;
+#if FEATURE(BVH_MOTION)
+ bvh_instance_motion_push(kg, object, ray, &P, &idir, &t_ignore, &ob_tfm, tmax);
+#else
+ bvh_instance_push(kg, object, ray, &P, &idir, &t_ignore, tmax);
+#endif
+
+#if defined(__KERNEL_SSE2__)
+ Psplat[0] = _mm_set_ps1(P.x);
+ Psplat[1] = _mm_set_ps1(P.y);
+ Psplat[2] = _mm_set_ps1(P.z);
+
+ idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), _mm_castsi128_ps(pn));
+ idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), _mm_castsi128_ps(pn));
+ idirsplat[2] = _mm_xor_ps(_mm_set_ps1(idir.z), _mm_castsi128_ps(pn));
+
+ tsplat = _mm_set_ps(-tmax, -tmax, 0.0f, 0.0f);
+
+ shufflex = (idir.x >= 0)? shuf_identity: shuf_swap;
+ shuffley = (idir.y >= 0)? shuf_identity: shuf_swap;
+ shufflez = (idir.z >= 0)? shuf_identity: shuf_swap;
+#endif
+
+ ++stackPtr;
+ traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+
+ nodeAddr = kernel_tex_fetch(__object_node, object);
+ }
+ else {
+ /* pop */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+ }
+ }
+#endif
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+#if FEATURE(BVH_INSTANCING)
+ if(stackPtr >= 0) {
+ kernel_assert(object != ~0);
+
+ /* instance pop */
+ float t_ignore = FLT_MAX;
+#if FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop(kg, object, ray, &P, &idir, &t_ignore, &ob_tfm, tmax);
+#else
+ bvh_instance_pop(kg, object, ray, &P, &idir, &t_ignore, tmax);
+#endif
+
+#if defined(__KERNEL_SSE2__)
+ Psplat[0] = _mm_set_ps1(P.x);
+ Psplat[1] = _mm_set_ps1(P.y);
+ Psplat[2] = _mm_set_ps1(P.z);
+
+ idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), _mm_castsi128_ps(pn));
+ idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), _mm_castsi128_ps(pn));
+ idirsplat[2] = _mm_xor_ps(_mm_set_ps1(idir.z), _mm_castsi128_ps(pn));
+
+ tsplat = _mm_set_ps(-tmax, -tmax, 0.0f, 0.0f);
+
+ shufflex = (idir.x >= 0)? shuf_identity: shuf_swap;
+ shuffley = (idir.y >= 0)? shuf_identity: shuf_swap;
+ shufflez = (idir.z >= 0)? shuf_identity: shuf_swap;
+#endif
+
+ object = ~0;
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+#endif
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+ return num_hits;
+}
+
+#undef FEATURE
+#undef BVH_FUNCTION_NAME
+#undef BVH_FUNCTION_FEATURES
+
diff --git a/intern/cycles/kernel/kernel_bvh_traversal.h b/intern/cycles/kernel/kernel_bvh_traversal.h
index cfca405e7a5..a9264f318eb 100644
--- a/intern/cycles/kernel/kernel_bvh_traversal.h
+++ b/intern/cycles/kernel/kernel_bvh_traversal.h
@@ -24,7 +24,6 @@
* BVH_INSTANCING: object instancing
* BVH_HAIR: hair curve rendering
* BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
- * BVH_SUBSURFACE: subsurface same object, random triangle intersection
* BVH_MOTION: motion blur rendering
*
*/
@@ -32,13 +31,8 @@
#define FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)
__device bool BVH_FUNCTION_NAME
-(KernelGlobals *kg, const Ray *ray, Intersection *isect
-#if FEATURE(BVH_SUBSURFACE)
-, int subsurface_object, float subsurface_random
-#else
-, const uint visibility
-#endif
-#if FEATURE(BVH_HAIR_MINIMUM_WIDTH) && !FEATURE(BVH_SUBSURFACE)
+(KernelGlobals *kg, const Ray *ray, Intersection *isect, const uint visibility
+#if FEATURE(BVH_HAIR_MINIMUM_WIDTH)
, uint *lcg_state, float difl, float extmax
#endif
)
@@ -65,11 +59,6 @@ __device bool BVH_FUNCTION_NAME
float3 idir = bvh_inverse_direction(ray->D);
int object = ~0;
-#if FEATURE(BVH_SUBSURFACE)
- const uint visibility = ~0;
- int num_hits = 0;
-#endif
-
#if FEATURE(BVH_MOTION)
Transform ob_tfm;
#endif
@@ -141,7 +130,7 @@ __device bool BVH_FUNCTION_NAME
NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
-#if FEATURE(BVH_HAIR_MINIMUM_WIDTH) && !FEATURE(BVH_SUBSURFACE)
+#if FEATURE(BVH_HAIR_MINIMUM_WIDTH)
if(difl != 0.0f) {
float hdiff = 1.0f + difl;
float ldiff = 1.0f - difl;
@@ -245,59 +234,37 @@ __device bool BVH_FUNCTION_NAME
while(primAddr < primAddr2) {
bool hit;
-#if FEATURE(BVH_SUBSURFACE)
- /* only primitives from the same object */
- uint tri_object = (object == ~0)? kernel_tex_fetch(__prim_object, primAddr): object;
-
- if(tri_object == subsurface_object) {
-#endif
-
- /* intersect ray against primitive */
+ /* intersect ray against primitive */
#if FEATURE(BVH_HAIR)
- uint segment = kernel_tex_fetch(__prim_segment, primAddr);
-#if !FEATURE(BVH_SUBSURFACE)
- if(segment != ~0) {
+ uint segment = kernel_tex_fetch(__prim_segment, primAddr);
+ if(segment != ~0) {
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
#if FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax);
- else
- hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax);
+ hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax);
+ else
+ hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax);
#else
- hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment);
- else
- hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment);
-#endif
- }
+ hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment);
else
+ hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment);
#endif
-#endif
-#if FEATURE(BVH_SUBSURFACE)
-#if FEATURE(BVH_HAIR)
- if(segment == ~0)
-#endif
- {
- hit = bvh_triangle_intersect_subsurface(kg, isect, P, idir, object, primAddr, tmax, &num_hits, subsurface_random);
- (void)hit;
- }
-
}
-#else
- hit = bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr);
+ else
+#endif
+ hit = bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr);
- /* shadow ray early termination */
+ /* shadow ray early termination */
#if defined(__KERNEL_SSE2__) && !FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- if(hit) {
- if(visibility == PATH_RAY_SHADOW_OPAQUE)
- return true;
-
- tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f);
- }
-#else
- if(hit && visibility == PATH_RAY_SHADOW_OPAQUE)
+ if(hit) {
+ if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
-#endif
+ tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f);
+ }
+#else
+ if(hit && visibility == PATH_RAY_SHADOW_OPAQUE)
+ return true;
#endif
primAddr++;
@@ -306,47 +273,34 @@ __device bool BVH_FUNCTION_NAME
#if FEATURE(BVH_INSTANCING)
else {
/* instance push */
-#if FEATURE(BVH_SUBSURFACE)
- if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) {
- object = subsurface_object;
-#else
- object = kernel_tex_fetch(__prim_object, -primAddr-1);
-#endif
+ object = kernel_tex_fetch(__prim_object, -primAddr-1);
#if FEATURE(BVH_MOTION)
- bvh_instance_motion_push(kg, object, ray, &P, &idir, &isect->t, &ob_tfm, tmax);
+ bvh_instance_motion_push(kg, object, ray, &P, &idir, &isect->t, &ob_tfm, tmax);
#else
- bvh_instance_push(kg, object, ray, &P, &idir, &isect->t, tmax);
+ bvh_instance_push(kg, object, ray, &P, &idir, &isect->t, tmax);
#endif
#if defined(__KERNEL_SSE2__) && !FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- Psplat[0] = _mm_set_ps1(P.x);
- Psplat[1] = _mm_set_ps1(P.y);
- Psplat[2] = _mm_set_ps1(P.z);
+ Psplat[0] = _mm_set_ps1(P.x);
+ Psplat[1] = _mm_set_ps1(P.y);
+ Psplat[2] = _mm_set_ps1(P.z);
- idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), _mm_castsi128_ps(pn));
- idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), _mm_castsi128_ps(pn));
- idirsplat[2] = _mm_xor_ps(_mm_set_ps1(idir.z), _mm_castsi128_ps(pn));
+ idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), _mm_castsi128_ps(pn));
+ idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), _mm_castsi128_ps(pn));
+ idirsplat[2] = _mm_xor_ps(_mm_set_ps1(idir.z), _mm_castsi128_ps(pn));
- tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f);
+ tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f);
- shufflex = (idir.x >= 0)? shuf_identity: shuf_swap;
- shuffley = (idir.y >= 0)? shuf_identity: shuf_swap;
- shufflez = (idir.z >= 0)? shuf_identity: shuf_swap;
+ shufflex = (idir.x >= 0)? shuf_identity: shuf_swap;
+ shuffley = (idir.y >= 0)? shuf_identity: shuf_swap;
+ shufflez = (idir.z >= 0)? shuf_identity: shuf_swap;
#endif
- ++stackPtr;
- traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+ ++stackPtr;
+ traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
- nodeAddr = kernel_tex_fetch(__object_node, object);
-#if FEATURE(BVH_SUBSURFACE)
- }
- else {
- /* pop */
- nodeAddr = traversalStack[stackPtr];
- --stackPtr;
- }
-#endif
+ nodeAddr = kernel_tex_fetch(__object_node, object);
}
}
#endif
@@ -386,11 +340,7 @@ __device bool BVH_FUNCTION_NAME
#endif
} while(nodeAddr != ENTRYPOINT_SENTINEL);
-#if FEATURE(BVH_SUBSURFACE)
- return (num_hits != 0);
-#else
return (isect->prim != ~0);
-#endif
}
#undef FEATURE
diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h
index 7d5e4cd9df5..592c45867ac 100644
--- a/intern/cycles/kernel/kernel_montecarlo.h
+++ b/intern/cycles/kernel/kernel_montecarlo.h
@@ -108,11 +108,26 @@ __device float3 sample_uniform_sphere(float u1, float u2)
return make_float3(x, y, z);
}
+__device float balance_heuristic(float a, float b)
+{
+ return (a)/(a + b);
+}
+
+__device float balance_heuristic_3(float a, float b, float c)
+{
+ return (a)/(a + b + c);
+}
+
__device float power_heuristic(float a, float b)
{
return (a*a)/(a*a + b*b);
}
+__device float power_heuristic_3(float a, float b, float c)
+{
+ return (a*a)/(a*a + b*b + c*c);
+}
+
__device float2 concentric_sample_disk(float u1, float u2)
{
float r, theta;
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index bf06f8dd5f6..d613943e85d 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -100,11 +100,11 @@ __device_inline void path_state_next(KernelGlobals *kg, PathState *state, int la
/* diffuse/glossy/singular */
if(label & LABEL_DIFFUSE) {
- state->flag |= PATH_RAY_DIFFUSE;
+ state->flag |= PATH_RAY_DIFFUSE|PATH_RAY_DIFFUSE_ANCESTOR;
state->flag &= ~(PATH_RAY_GLOSSY|PATH_RAY_SINGULAR|PATH_RAY_MIS_SKIP);
}
else if(label & LABEL_GLOSSY) {
- state->flag |= PATH_RAY_GLOSSY;
+ state->flag |= PATH_RAY_GLOSSY|PATH_RAY_GLOSSY_ANCESTOR;
state->flag &= ~(PATH_RAY_DIFFUSE|PATH_RAY_SINGULAR|PATH_RAY_MIS_SKIP);
}
else {
@@ -117,7 +117,7 @@ __device_inline void path_state_next(KernelGlobals *kg, PathState *state, int la
__device_inline uint path_state_ray_visibility(KernelGlobals *kg, PathState *state)
{
- uint flag = state->flag;
+ uint flag = state->flag & PATH_RAY_ALL_VISIBILITY;
/* for visibility, diffuse/glossy are for reflection only */
if(flag & PATH_RAY_TRANSMIT)
@@ -404,7 +404,15 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
/* do bssrdf scatter step if we picked a bssrdf closure */
if(sc) {
uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
- subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
+
+ if(old_subsurface_scatter_use(&sd)) {
+ old_subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
+ }
+ else {
+ float bssrdf_u, bssrdf_v;
+ path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+ subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
+ }
}
}
#endif
@@ -646,7 +654,15 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
/* do bssrdf scatter step if we picked a bssrdf closure */
if(sc) {
uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
- subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
+
+ if(old_subsurface_scatter_use(&sd)) {
+ old_subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
+ }
+ else {
+ float bssrdf_u, bssrdf_v;
+ path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+ subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
+ }
}
}
#endif
@@ -1090,17 +1106,32 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
int num_samples = kernel_data.integrator.subsurface_samples;
float num_samples_inv = 1.0f/num_samples;
+ RNG bssrdf_rng = cmj_hash(*rng, i);
/* do subsurface scatter step with copy of shader data, this will
* replace the BSSRDF with a diffuse BSDF closure */
for(int j = 0; j < num_samples; j++) {
- ShaderData bssrdf_sd = sd;
- subsurface_scatter_step(kg, &bssrdf_sd, state.flag, sc, &lcg_state, true);
-
- /* compute lighting with the BSDF closure */
- kernel_path_non_progressive_lighting(kg, rng, sample*num_samples + j,
- &bssrdf_sd, throughput, num_samples_inv,
- ray_pdf, ray_pdf, state, rng_offset, &L, buffer);
+ if(old_subsurface_scatter_use(&sd)) {
+ ShaderData bssrdf_sd = sd;
+ old_subsurface_scatter_step(kg, &bssrdf_sd, state.flag, sc, &lcg_state, true);
+
+ /* compute lighting with the BSDF closure */
+ kernel_path_non_progressive_lighting(kg, rng, sample*num_samples + j,
+ &bssrdf_sd, throughput, num_samples_inv,
+ ray_pdf, ray_pdf, state, rng_offset, &L, buffer);
+ }
+ else {
+ ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
+ float bssrdf_u, bssrdf_v;
+ path_rng_2D(kg, &bssrdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+ int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
+
+ /* compute lighting with the BSDF closure */
+ for(int hit = 0; hit < num_hits; hit++)
+ kernel_path_non_progressive_lighting(kg, rng, sample*num_samples + j,
+ &bssrdf_sd[hit], throughput, num_samples_inv,
+ ray_pdf, ray_pdf, state, rng_offset, &L, buffer);
+ }
}
}
}
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index c86ac34a057..be848d9bb16 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -235,7 +235,14 @@ __device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng)
#endif
-__device float lcg_step(uint *rng)
+__device uint lcg_step_uint(uint *rng)
+{
+ /* implicit mod 2^32 */
+ *rng = (1103515245*(*rng) + 12345);
+ return *rng;
+}
+
+__device float lcg_step_float(uint *rng)
{
/* implicit mod 2^32 */
*rng = (1103515245*(*rng) + 12345);
@@ -245,7 +252,7 @@ __device float lcg_step(uint *rng)
__device uint lcg_init(uint seed)
{
uint rng = seed;
- lcg_step(&rng);
+ lcg_step_uint(&rng);
return rng;
}
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index 5dd12f98b9c..2c86cc5e227 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -184,52 +184,32 @@ __device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData
sd->flag = kernel_tex_fetch(__object_flag, sd->object);
sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
-#ifdef __HAIR__
- if(kernel_tex_fetch(__prim_segment, isect->prim) != ~0) {
- /* Strand Shader setting*/
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
-
- sd->shader = __float_as_int(curvedata.z);
- sd->segment = isect->segment;
-
- float tcorr = isect->t;
- if(kernel_data.curve.curveflags & CURVE_KN_POSTINTERSECTCORRECTION)
- tcorr = (isect->u < 0)? tcorr + sqrtf(isect->v) : tcorr - sqrtf(isect->v);
-
- sd->P = bvh_curve_refine(kg, sd, isect, ray, tcorr);
- }
- else {
-#endif
- /* fetch triangle data */
- float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim);
- float3 Ng = make_float3(Ns.x, Ns.y, Ns.z);
- sd->shader = __float_as_int(Ns.w);
+ /* fetch triangle data */
+ float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim);
+ float3 Ng = make_float3(Ns.x, Ns.y, Ns.z);
+ sd->shader = __float_as_int(Ns.w);
#ifdef __HAIR__
- sd->segment = ~0;
+ sd->segment = ~0;
#endif
#ifdef __UV__
- sd->u = isect->u;
- sd->v = isect->v;
+ sd->u = isect->u;
+ sd->v = isect->v;
#endif
- /* vectors */
- sd->P = bvh_triangle_refine(kg, sd, isect, ray);
- sd->Ng = Ng;
- sd->N = Ng;
-
- /* smooth normal */
- if(sd->shader & SHADER_SMOOTH_NORMAL)
- sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
+ /* vectors */
+ sd->P = bvh_triangle_refine_subsurface(kg, sd, isect, ray);
+ sd->Ng = Ng;
+ sd->N = Ng;
+
+ /* smooth normal */
+ if(sd->shader & SHADER_SMOOTH_NORMAL)
+ sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
#ifdef __DPDU__
- /* dPdu/dPdv */
- triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim);
-#endif
-
-#ifdef __HAIR__
- }
+ /* dPdu/dPdv */
+ triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim);
#endif
sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
@@ -468,6 +448,8 @@ __device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData
__device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, const ShaderData *sd, const float3 omega_in, float *pdf,
int skip_bsdf, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
{
+ /* this is the veach one-sample model with balance heuristic, some pdf
+ * factors drop out when using balance heuristic weighting */
for(int i = 0; i< sd->num_closure; i++) {
if(i == skip_bsdf)
continue;
@@ -706,34 +688,34 @@ __device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
#endif
}
-__device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N)
+__device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_)
{
#ifdef __MULTI_CLOSURE__
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
-
- *N = make_float3(0.0f, 0.0f, 0.0f);
+ float3 N = make_float3(0.0f, 0.0f, 0.0f);
for(int i = 0; i< sd->num_closure; i++) {
ShaderClosure *sc = &sd->closure[i];
if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
eval += sc->weight*ao_factor;
- *N += sc->N*average(sc->weight);
+ N += sc->N*average(sc->weight);
}
else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) {
eval += sc->weight;
- *N += sd->N*average(sc->weight);
+ N += sd->N*average(sc->weight);
}
}
- if(is_zero(*N))
- *N = sd->N;
+ if(is_zero(N))
+ N = sd->N;
else
- *N = normalize(*N);
+ N = normalize(N);
+ *N_ = N;
return eval;
#else
- *N = sd->N;
+ *N_ = sd->N;
if(CLOSURE_IS_BSDF_DIFFUSE(sd->closure.type))
return sd->closure.weight*ao_factor;
@@ -744,6 +726,49 @@ __device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_facto
#endif
}
+__device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_)
+{
+#ifdef __MULTI_CLOSURE__
+ float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+ float3 N = make_float3(0.0f, 0.0f, 0.0f);
+ float texture_blur = 0.0f, weight_sum = 0.0f;
+
+ for(int i = 0; i< sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+
+ if(CLOSURE_IS_BSSRDF(sc->type)) {
+ float avg_weight = fabsf(average(sc->weight));
+
+ N += sc->N*avg_weight;
+ eval += sc->weight;
+ texture_blur += sc->data1*avg_weight;
+ weight_sum += avg_weight;
+ }
+ }
+
+ if(N_)
+ *N_ = (is_zero(N))? sd->N: normalize(N);
+
+ if(texture_blur_)
+ *texture_blur_ = texture_blur/weight_sum;
+
+ return eval;
+#else
+ if(CLOSURE_IS_BSSRDF(sd->closure.type)) {
+ if(N_) *N_ = sd->closure.N;
+ if(texture_blur_) *texture_blur_ = sd->closure.data1;
+
+ return sd->closure.weight;
+ }
+ else {
+ if(N_) *N_ = sd->N;
+ if(texture_blur_) *texture_blur_ = 0.0f;
+
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
+#endif
+}
+
/* Emission */
__device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure *sc)
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 4fae961512e..8f5bcdf06e2 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -18,35 +18,18 @@
CCL_NAMESPACE_BEGIN
-#define BSSRDF_MULTI_EVAL
-#define BSSRDF_SKIP_NO_HIT
-
-__device float bssrdf_sample_distance(KernelGlobals *kg, float radius, float refl, float u)
-{
- int table_offset = kernel_data.bssrdf.table_offset;
- float r = lookup_table_read_2D(kg, u, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE);
-
- return r*radius;
-}
+#include "closure/bssrdf.h"
-#ifdef BSSRDF_MULTI_EVAL
-__device float bssrdf_pdf(KernelGlobals *kg, float radius, float refl, float r)
-{
- if(r >= radius)
- return 0.0f;
+/* NEW BSSRDF: See "BSSRDF Importance Sampling", SIGGRAPH 2013 */
- /* todo: when we use the real BSSRDF this will need to be divided by the maximum
- * radius instead of the average radius */
- float t = r/radius;
-
- int table_offset = kernel_data.bssrdf.table_offset + BSSRDF_PDF_TABLE_OFFSET;
- float pdf = lookup_table_read_2D(kg, t, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE);
-
- pdf /= radius;
+/* TODO:
+ * - test using power heuristic for combing bssrdfs
+ * - try to reduce one sample model variance
+ * - possible shade all hits for progressive integrator
+ * - cubic and gaussian scale difference tweak
+ */
- return pdf;
-}
-#endif
+#define BSSRDF_MULTI_EVAL
__device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, ShaderData *sd, float *probability)
{
@@ -75,7 +58,6 @@ __device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, Shade
/* use bssrdf */
r -= bsdf_sum;
- sd->randb_closure = 0.0f; /* not needed anymore */
float sum = 0.0f;
@@ -86,6 +68,8 @@ __device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, Shade
sum += sc->sample_weight;
if(r <= sum) {
+ sd->randb_closure = (r - (sum - sc->sample_weight))/sc->sample_weight;
+
#ifdef BSSRDF_MULTI_EVAL
*probability = (bssrdf_sum > 0.0f)? (bsdf_sum + bssrdf_sum)/bssrdf_sum: 1.0f;
#else
@@ -97,12 +81,362 @@ __device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, Shade
}
/* should never happen */
+ sd->randb_closure = 0.0f;
*probability = 1.0f;
return NULL;
}
+__device float3 subsurface_scatter_eval(ShaderData *sd, ShaderClosure *sc, float disk_r, float r, bool all)
+{
#ifdef BSSRDF_MULTI_EVAL
-__device float3 subsurface_scatter_multi_eval(KernelGlobals *kg, ShaderData *sd, bool hit, float refl, float *r, int num_r, bool all)
+ /* this is the veach one-sample model with balance heuristic, some pdf
+ * factors drop out when using balance heuristic weighting */
+ float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
+ float pdf_sum = 0.0f;
+ float sample_weight_sum = 0.0f;
+ int num_bssrdf = 0;
+
+ for(int i = 0; i < sd->num_closure; i++) {
+ sc = &sd->closure[i];
+
+ if(CLOSURE_IS_BSSRDF(sc->type)) {
+ float sample_weight = (all)? 1.0f: sc->sample_weight;
+ sample_weight_sum += sample_weight;
+ }
+ }
+
+ float sample_weight_inv = 1.0f/sample_weight_sum;
+
+ //printf("num closures %d\n", sd->num_closure);
+
+ for(int i = 0; i < sd->num_closure; i++) {
+ sc = &sd->closure[i];
+
+ if(CLOSURE_IS_BSSRDF(sc->type)) {
+ /* in case of non-progressive integrate we sample all bssrdf's once,
+ * for progressive we pick one, so adjust pdf for that */
+ float sample_weight = (all)? 1.0f: sc->sample_weight * sample_weight_inv;
+
+ /* compute pdf */
+ float pdf = bssrdf_pdf(sc, r);
+ float disk_pdf = bssrdf_pdf(sc, disk_r);
+
+ /* TODO power heuristic is not working correct here */
+ eval_sum += sc->weight*pdf; //*sample_weight*disk_pdf;
+ pdf_sum += sample_weight*disk_pdf; //*sample_weight*disk_pdf;
+
+ num_bssrdf++;
+ }
+ }
+
+ return (pdf_sum > 0.0f)? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f);
+#else
+ float pdf = bssrdf_pdf(pick_sc, r);
+ float disk_pdf = bssrdf_pdf(pick_sc, disk_r);
+
+ return pick_sc->weight * pdf / disk_pdf;
+#endif
+}
+
+/* replace closures with a single diffuse bsdf closure after scatter step */
+__device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, float3 weight, bool hit, float3 N)
+{
+ sd->flag &= ~SD_CLOSURE_FLAGS;
+ sd->randb_closure = 0.0f;
+
+ if(hit) {
+ ShaderClosure *sc = &sd->closure[0];
+ sd->num_closure = 1;
+
+ sc->weight = weight;
+ sc->sample_weight = 1.0f;
+ sc->data0 = 0.0f;
+ sc->data1 = 0.0f;
+ sc->N = N;
+ sd->flag |= bsdf_diffuse_setup(sc);
+
+ /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
+ * can recognize it as not being a regular diffuse closure */
+ sc->type = CLOSURE_BSDF_BSSRDF_ID;
+ }
+ else
+ sd->num_closure = 0;
+}
+
+/* optionally do blurring of color and/or bump mapping, at the cost of a shader evaluation */
+__device float3 subsurface_color_pow(float3 color, float exponent)
+{
+ color = max(color, make_float3(0.0f, 0.0f, 0.0f));
+
+ if(exponent == 1.0f) {
+ /* nothing to do */
+ }
+ else if(exponent == 0.5f) {
+ color.x = sqrtf(color.x);
+ color.y = sqrtf(color.y);
+ color.z = sqrtf(color.z);
+ }
+ else {
+ color.x = powf(color.x, exponent);
+ color.y = powf(color.y, exponent);
+ color.z = powf(color.z, exponent);
+ }
+
+ return color;
+}
+
+__device void subsurface_color_bump_blur(KernelGlobals *kg, ShaderData *out_sd, ShaderData *in_sd, int state_flag, float3 *eval, float3 *N)
+{
+ /* average color and texture blur at outgoing point */
+ float texture_blur;
+ float3 out_color = shader_bssrdf_sum(out_sd, NULL, &texture_blur);
+
+ /* do we have bump mapping? */
+ bool bump = (out_sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
+
+ if(bump || texture_blur > 0.0f) {
+ /* average color and normal at incoming point */
+ shader_eval_surface(kg, in_sd, 0.0f, state_flag, SHADER_CONTEXT_SSS);
+ float3 in_color = shader_bssrdf_sum(in_sd, (bump)? N: NULL, NULL);
+
+ /* we simply divide out the average color and multiply with the average
+ * of the other one. we could try to do this per closure but it's quite
+ * tricky to match closures between shader evaluations, their number and
+ * order may change, this is simpler */
+ if(texture_blur > 0.0f) {
+ out_color = subsurface_color_pow(out_color, texture_blur);
+ in_color = subsurface_color_pow(in_color, texture_blur);
+
+ *eval *= safe_divide_color(in_color, out_color);
+ }
+ }
+}
+
+/* subsurface scattering step, from a point on the surface to other nearby points on the same object */
+__device int subsurface_scatter_multi_step(KernelGlobals *kg, ShaderData *sd, ShaderData bssrdf_sd[BSSRDF_MAX_HITS],
+ int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
+{
+ /* pick random axis in local frame and point on disk */
+ float3 disk_N, disk_T, disk_B;
+ float pick_pdf_N, pick_pdf_T, pick_pdf_B;
+
+ disk_N = sd->Ng;
+ make_orthonormals(disk_N, &disk_T, &disk_B);
+
+ if(disk_u < 0.5f) {
+ pick_pdf_N = 0.5f;
+ pick_pdf_T = 0.25f;
+ pick_pdf_B = 0.25f;
+ disk_u *= 2.0f;
+ }
+ else if(disk_u < 0.75f) {
+ float3 tmp = disk_N;
+ disk_N = disk_T;
+ disk_T = tmp;
+ pick_pdf_N = 0.25f;
+ pick_pdf_T = 0.5f;
+ pick_pdf_B = 0.25f;
+ disk_u = (disk_u - 0.5f)*4.0f;
+ }
+ else {
+ float3 tmp = disk_N;
+ disk_N = disk_B;
+ disk_B = tmp;
+ pick_pdf_N = 0.25f;
+ pick_pdf_T = 0.25f;
+ pick_pdf_B = 0.5f;
+ disk_u = (disk_u - 0.75f)*4.0f;
+ }
+
+ /* sample point on disk */
+ float phi = M_2PI_F * disk_u;
+ float disk_r = disk_v;
+ float disk_height;
+
+ bssrdf_sample(sc, disk_r, &disk_r, &disk_height);
+
+ float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
+
+ /* create ray */
+ Ray ray;
+ ray.P = sd->P + disk_N*disk_height + disk_P;
+ ray.D = -disk_N;
+ ray.t = 2.0f*disk_height;
+ ray.dP = sd->dP;
+ ray.dD = differential3_zero();
+ ray.time = sd->time;
+
+ /* intersect with the same object. if multiple intersections are found it
+ * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
+ Intersection isect[BSSRDF_MAX_HITS];
+ uint num_hits = scene_intersect_subsurface(kg, &ray, isect, sd->object, lcg_state, BSSRDF_MAX_HITS);
+
+ /* evaluate bssrdf */
+ float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+ int num_eval_hits = min(num_hits, BSSRDF_MAX_HITS);
+
+ for(int hit = 0; hit < num_eval_hits; hit++) {
+ ShaderData *bsd = &bssrdf_sd[hit];
+
+ /* setup new shading point */
+ *bsd = *sd;
+ shader_setup_from_subsurface(kg, bsd, &isect[hit], &ray);
+
+ /* probability densities for local frame axes */
+ float pdf_N = pick_pdf_N * fabsf(dot(disk_N, bsd->Ng));
+ float pdf_T = pick_pdf_T * fabsf(dot(disk_T, bsd->Ng));
+ float pdf_B = pick_pdf_B * fabsf(dot(disk_B, bsd->Ng));
+
+ /* multiple importance sample between 3 axes, power heuristic
+ * found to be slightly better than balance heuristic */
+ float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
+
+ /* real distance to sampled point */
+ float r = len(bsd->P - sd->P);
+
+ /* evaluate */
+ float w = mis_weight / pdf_N;
+ if(num_hits > BSSRDF_MAX_HITS)
+ w *= num_hits/(float)BSSRDF_MAX_HITS;
+ eval = subsurface_scatter_eval(bsd, sc, disk_r, r, all) * w;
+
+ /* optionally blur colors and bump mapping */
+ float3 N = bsd->N;
+ subsurface_color_bump_blur(kg, sd, bsd, state_flag, &eval, &N);
+
+ /* setup diffuse bsdf */
+ subsurface_scatter_setup_diffuse_bsdf(bsd, eval, true, N);
+ }
+
+ return num_eval_hits;
+}
+
+/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
+__device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
+ int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
+{
+ float3 eval = make_float3(0.0f, 0.0f, 0.0f);
+ uint num_hits = 0;
+
+ /* pick random axis in local frame and point on disk */
+ float3 disk_N, disk_T, disk_B;
+ float pick_pdf_N, pick_pdf_T, pick_pdf_B;
+
+ disk_N = sd->Ng;
+ make_orthonormals(disk_N, &disk_T, &disk_B);
+
+ if(disk_u < 0.5f) {
+ pick_pdf_N = 0.5f;
+ pick_pdf_T = 0.25f;
+ pick_pdf_B = 0.25f;
+ disk_u *= 2.0f;
+ }
+ else if(disk_u < 0.75f) {
+ float3 tmp = disk_N;
+ disk_N = disk_T;
+ disk_T = tmp;
+ pick_pdf_N = 0.25f;
+ pick_pdf_T = 0.5f;
+ pick_pdf_B = 0.25f;
+ disk_u = (disk_u - 0.5f)*4.0f;
+ }
+ else {
+ float3 tmp = disk_N;
+ disk_N = disk_B;
+ disk_B = tmp;
+ pick_pdf_N = 0.25f;
+ pick_pdf_T = 0.25f;
+ pick_pdf_B = 0.5f;
+ disk_u = (disk_u - 0.75f)*4.0f;
+ }
+
+ /* sample point on disk */
+ float phi = M_2PI_F * disk_u;
+ float disk_r = disk_v;
+ float disk_height;
+
+ bssrdf_sample(sc, disk_r, &disk_r, &disk_height);
+
+ float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
+
+ /* create ray */
+ Ray ray;
+ ray.P = sd->P + disk_N*disk_height + disk_P;
+ ray.D = -disk_N;
+ ray.t = 2.0f*disk_height;
+ ray.dP = sd->dP;
+ ray.dD = differential3_zero();
+ ray.time = sd->time;
+
+ /* intersect with the same object. if multiple intersections are
+ * found it will randomly pick one of them */
+ Intersection isect;
+ num_hits = scene_intersect_subsurface(kg, &ray, &isect, sd->object, lcg_state, 1);
+
+ /* evaluate bssrdf */
+ if(num_hits > 0) {
+ float3 origP = sd->P;
+
+ /* setup new shading point */
+ shader_setup_from_subsurface(kg, sd, &isect, &ray);
+
+ /* probability densities for local frame axes */
+ float pdf_N = pick_pdf_N * fabsf(dot(disk_N, sd->Ng));
+ float pdf_T = pick_pdf_T * fabsf(dot(disk_T, sd->Ng));
+ float pdf_B = pick_pdf_B * fabsf(dot(disk_B, sd->Ng));
+
+ /* multiple importance sample between 3 axes, power heuristic
+ * found to be slightly better than balance heuristic */
+ float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
+
+ /* real distance to sampled point */
+ float r = len(sd->P - origP);
+
+ /* evaluate */
+ float w = (mis_weight * num_hits) / pdf_N;
+ eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
+ }
+
+ /* optionally blur colors and bump mapping */
+ float3 N = sd->N;
+ subsurface_color_bump_blur(kg, sd, sd, state_flag, &eval, &N);
+
+ /* setup diffuse bsdf */
+ subsurface_scatter_setup_diffuse_bsdf(sd, eval, (num_hits > 0), N);
+}
+
+
+/* OLD BSSRDF */
+
+__device float old_bssrdf_sample_distance(KernelGlobals *kg, float radius, float refl, float u)
+{
+ int table_offset = kernel_data.bssrdf.table_offset;
+ float r = lookup_table_read_2D(kg, u, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE);
+
+ return r*radius;
+}
+
+#ifdef BSSRDF_MULTI_EVAL
+__device float old_bssrdf_pdf(KernelGlobals *kg, float radius, float refl, float r)
+{
+ if(r >= radius)
+ return 0.0f;
+
+ /* todo: when we use the real BSSRDF this will need to be divided by the maximum
+ * radius instead of the average radius */
+ float t = r/radius;
+
+ int table_offset = kernel_data.bssrdf.table_offset + BSSRDF_PDF_TABLE_OFFSET;
+ float pdf = lookup_table_read_2D(kg, t, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE);
+
+ pdf /= radius;
+
+ return pdf;
+}
+#endif
+
+#ifdef BSSRDF_MULTI_EVAL
+__device float3 old_subsurface_scatter_multi_eval(KernelGlobals *kg, ShaderData *sd, bool hit, float refl, float *r, int num_r, bool all)
{
/* compute pdf */
float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
@@ -119,7 +453,7 @@ __device float3 subsurface_scatter_multi_eval(KernelGlobals *kg, ShaderData *sd,
/* compute pdf */
float pdf = 1.0f;
for(int i = 0; i < num_r; i++)
- pdf *= bssrdf_pdf(kg, sc->data0, refl, r[i]);
+ pdf *= old_bssrdf_pdf(kg, sc->data0, refl, r[i]);
eval_sum += sc->weight*pdf;
pdf_sum += sample_weight*pdf;
@@ -148,31 +482,8 @@ __device float3 subsurface_scatter_multi_eval(KernelGlobals *kg, ShaderData *sd,
}
#endif
-/* replace closures with a single diffuse bsdf closure after scatter step */
-__device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, float3 weight)
-{
- ShaderClosure *sc = &sd->closure[0];
- sd->num_closure = 1;
-
- sc->weight = weight;
- sc->sample_weight = 1.0f;
- sc->data0 = 0.0f;
- sc->data1 = 0.0f;
- sc->N = sd->N;
- sd->flag &= ~SD_CLOSURE_FLAGS;
- sd->flag |= bsdf_diffuse_setup(sc);
- sd->randb_closure = 0.0f;
-
- /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
- * can recognize it as not being a regular diffuse closure */
- sc->type = CLOSURE_BSDF_BSSRDF_ID;
-
- /* todo: evaluate shading to get blurred textures and bump mapping */
- /* shader_eval_surface(kg, sd, 0.0f, state_flag, SHADER_CONTEXT_SSS); */
-}
-
/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
-__device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int state_flag, ShaderClosure *sc, uint *lcg_state, bool all)
+__device void old_subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int state_flag, ShaderClosure *sc, uint *lcg_state, bool all)
{
float radius = sc->data0;
float refl = max(average(sc->weight)*3.0f, 0.0f);
@@ -187,14 +498,13 @@ __device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int sta
/* attempt to find a hit a given number of times before giving up */
for(num_attempts = 0; num_attempts < kernel_data.bssrdf.num_attempts; num_attempts++) {
/* random numbers for sampling */
- float u1 = lcg_step(lcg_state);
- float u2 = lcg_step(lcg_state);
- float u3 = lcg_step(lcg_state);
- float u4 = lcg_step(lcg_state);
- float u5 = lcg_step(lcg_state);
- float u6 = lcg_step(lcg_state);
-
- r = bssrdf_sample_distance(kg, radius, refl, u5);
+ float u1 = lcg_step_float(lcg_state);
+ float u2 = lcg_step_float(lcg_state);
+ float u3 = lcg_step_float(lcg_state);
+ float u4 = lcg_step_float(lcg_state);
+ float u5 = lcg_step_float(lcg_state);
+
+ r = old_bssrdf_sample_distance(kg, radius, refl, u5);
#ifdef BSSRDF_MULTI_EVAL
r_attempts[num_attempts] = r;
#endif
@@ -213,7 +523,7 @@ __device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int sta
/* intersect with the same object. if multiple intersections are
* found it will randomly pick one of them */
Intersection isect;
- if(!scene_intersect_subsurface(kg, &ray, &isect, sd->object, u6))
+ if(scene_intersect_subsurface(kg, &ray, &isect, sd->object, lcg_state, 1) == 0)
continue;
/* setup new shading point */
@@ -226,18 +536,32 @@ __device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int sta
/* evaluate subsurface scattering closures */
#ifdef BSSRDF_MULTI_EVAL
- weight *= subsurface_scatter_multi_eval(kg, sd, hit, refl, r_attempts, num_attempts, all);
+ weight *= old_subsurface_scatter_multi_eval(kg, sd, hit, refl, r_attempts, num_attempts, all);
#else
weight *= sc->weight;
#endif
-#ifdef BSSRDF_SKIP_NO_HIT
if(!hit)
weight = make_float3(0.0f, 0.0f, 0.0f);
-#endif
+
+ /* optionally blur colors and bump mapping */
+ float3 N = sd->N;
+ subsurface_color_bump_blur(kg, sd, sd, state_flag, &weight, &N);
/* replace closures with a single diffuse BSDF */
- subsurface_scatter_setup_diffuse_bsdf(sd, weight);
+ subsurface_scatter_setup_diffuse_bsdf(sd, weight, hit, N);
+}
+
+__device bool old_subsurface_scatter_use(ShaderData *sd)
+{
+ for(int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+
+ if(sc->type == CLOSURE_BSSRDF_COMPATIBLE_ID)
+ return true;
+ }
+
+ return false;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 3008698313e..3421ba44007 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -43,6 +43,7 @@ CCL_NAMESPACE_BEGIN
#define BSSRDF_LOOKUP_TABLE_SIZE (BSSRDF_RADIUS_TABLE_SIZE*BSSRDF_REFL_TABLE_SIZE*2)
#define BSSRDF_MIN_RADIUS 1e-8f
#define BSSRDF_MAX_ATTEMPTS 8
+#define BSSRDF_MAX_HITS 4
#define BB_DRAPPER 800.0f
#define BB_MAX_TABLE_RANGE 12000.0f
@@ -214,12 +215,13 @@ enum PathRayFlag {
PATH_RAY_SHADOW_TRANSPARENT = 256,
PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE|PATH_RAY_SHADOW_TRANSPARENT),
- PATH_RAY_MIS_SKIP = 512,
+ PATH_RAY_CURVE = 512, /* visibility flag to define curve segments*/
- PATH_RAY_ALL = (1|2|4|8|16|32|64|128|256|512),
+ PATH_RAY_ALL_VISIBILITY = (1|2|4|8|16|32|64|128|256|512),
- /* visibility flag to define curve segments*/
- PATH_RAY_CURVE = 1024,
+ PATH_RAY_MIS_SKIP = 1024,
+ PATH_RAY_DIFFUSE_ANCESTOR = 2048,
+ PATH_RAY_GLOSSY_ANCESTOR = 4096,
/* this gives collisions with localview bits
* see: blender_util.h, grr - Campbell */
@@ -507,11 +509,12 @@ enum ShaderDataFlag {
SD_HAS_TRANSPARENT_SHADOW = 1024, /* has transparent shadow */
SD_HAS_VOLUME = 2048, /* has volume shader */
SD_HOMOGENEOUS_VOLUME = 4096, /* has homogeneous volume */
+ SD_HAS_BSSRDF_BUMP = 8192, /* bssrdf normal uses bump */
/* object flags */
- SD_HOLDOUT_MASK = 8192, /* holdout for camera rays */
- SD_OBJECT_MOTION = 16384, /* has object motion blur */
- SD_TRANSFORM_APPLIED = 32768 /* vertices have transform applied */
+ SD_HOLDOUT_MASK = 16384, /* holdout for camera rays */
+ SD_OBJECT_MOTION = 32768, /* has object motion blur */
+ SD_TRANSFORM_APPLIED = 65536 /* vertices have transform applied */
};
struct KernelGlobals;
diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp
index ba9b13126ac..7405b0be567 100644
--- a/intern/cycles/kernel/osl/osl_bssrdf.cpp
+++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp
@@ -47,18 +47,59 @@ CCL_NAMESPACE_BEGIN
using namespace OSL;
-class BSSRDFClosure : public CBSSRDFClosure {
+/* Cubic */
+
+class CubicBSSRDFClosure : public CBSSRDFClosure {
public:
size_t memsize() const { return sizeof(*this); }
const char *name() const { return "bssrdf_cubic"; }
void setup()
{
+ sc.type = CLOSURE_BSSRDF_COMPATIBLE_ID;
sc.prim = NULL;
sc.data0 = fabsf(average(radius));
- sc.data1 = 1.3f;
+ sc.data1 = 0.0f; // XXX texture blur
+ }
+
+ bool mergeable(const ClosurePrimitive *other) const
+ {
+ return false;
+ }
- m_shaderdata_flag = bssrdf_setup(&sc);
+ void print_on(std::ostream &out) const
+ {
+ out << name() << " ((" << sc.N[0] << ", " << sc.N[1] << ", " << sc.N[2] << "))";
+ }
+};
+
+ClosureParam *closure_bssrdf_cubic_params()
+{
+ static ClosureParam params[] = {
+ CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, sc.N),
+ CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, radius),
+ //CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, sc.data1),
+ CLOSURE_STRING_KEYPARAM("label"),
+ CLOSURE_FINISH_PARAM(CubicBSSRDFClosure)
+ };
+ return params;
+}
+
+CLOSURE_PREPARE(closure_bssrdf_cubic_prepare, CubicBSSRDFClosure)
+
+/* Gaussian */
+
+class GaussianBSSRDFClosure : public CBSSRDFClosure {
+public:
+ size_t memsize() const { return sizeof(*this); }
+ const char *name() const { return "bssrdf_gaussian"; }
+
+ void setup()
+ {
+ sc.type = CLOSURE_BSSRDF_GAUSSIAN_ID;
+ sc.prim = NULL;
+ sc.data0 = fabsf(average(radius));
+ sc.data1 = 0.0f; // XXX texture blurring!
}
bool mergeable(const ClosurePrimitive *other) const
@@ -72,19 +113,19 @@ public:
}
};
-ClosureParam *closure_bssrdf_params()
+ClosureParam *closure_bssrdf_gaussian_params()
{
static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(BSSRDFClosure, sc.N),
- CLOSURE_FLOAT3_PARAM(BSSRDFClosure, radius),
- //CLOSURE_FLOAT_PARAM(BSSRDFClosure, sc.data1),
+ CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, sc.N),
+ CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, radius),
+ //CLOSURE_FLOAT_PARAM(GaussianBSSRDFClosure, sc.data1),
CLOSURE_STRING_KEYPARAM("label"),
- CLOSURE_FINISH_PARAM(BSSRDFClosure)
+ CLOSURE_FINISH_PARAM(GaussianBSSRDFClosure)
};
return params;
}
-CLOSURE_PREPARE(closure_bssrdf_prepare, BSSRDFClosure)
+CLOSURE_PREPARE(closure_bssrdf_gaussian_prepare, GaussianBSSRDFClosure)
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_bssrdf.h b/intern/cycles/kernel/osl/osl_bssrdf.h
index 54df055405e..ee9fc7c4ac5 100644
--- a/intern/cycles/kernel/osl/osl_bssrdf.h
+++ b/intern/cycles/kernel/osl/osl_bssrdf.h
@@ -48,15 +48,10 @@ public:
ShaderClosure sc;
float3 radius;
- CBSSRDFClosure() : OSL::ClosurePrimitive(BSSRDF),
- m_shaderdata_flag(0) { }
+ CBSSRDFClosure() : OSL::ClosurePrimitive(BSSRDF) { }
~CBSSRDFClosure() { }
int scattering() const { return LABEL_DIFFUSE; }
- int shaderdata_flag() const { return m_shaderdata_flag; }
-
-protected:
- int m_shaderdata_flag;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp
index b1549e95920..c03e50d4313 100644
--- a/intern/cycles/kernel/osl/osl_closures.cpp
+++ b/intern/cycles/kernel/osl/osl_closures.cpp
@@ -218,7 +218,9 @@ void OSLShader::register_closures(OSLShadingSystem *ss_)
register_closure(ss, "phong_ramp", id++,
closure_bsdf_phong_ramp_params(), closure_bsdf_phong_ramp_prepare);
register_closure(ss, "bssrdf_cubic", id++,
- closure_bssrdf_params(), closure_bssrdf_prepare);
+ closure_bssrdf_cubic_params(), closure_bssrdf_cubic_prepare);
+ register_closure(ss, "bssrdf_gaussian", id++,
+ closure_bssrdf_gaussian_params(), closure_bssrdf_gaussian_prepare);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h
index ca5f441aa2d..e3a7e890597 100644
--- a/intern/cycles/kernel/osl/osl_closures.h
+++ b/intern/cycles/kernel/osl/osl_closures.h
@@ -50,7 +50,8 @@ OSL::ClosureParam *closure_bsdf_diffuse_ramp_params();
OSL::ClosureParam *closure_bsdf_phong_ramp_params();
OSL::ClosureParam *closure_westin_backscatter_params();
OSL::ClosureParam *closure_westin_sheen_params();
-OSL::ClosureParam *closure_bssrdf_params();
+OSL::ClosureParam *closure_bssrdf_cubic_params();
+OSL::ClosureParam *closure_bssrdf_gaussian_params();
void closure_emission_prepare(OSL::RendererServices *, int id, void *data);
void closure_background_prepare(OSL::RendererServices *, int id, void *data);
@@ -60,7 +61,8 @@ void closure_bsdf_diffuse_ramp_prepare(OSL::RendererServices *, int id, void *da
void closure_bsdf_phong_ramp_prepare(OSL::RendererServices *, int id, void *data);
void closure_westin_backscatter_prepare(OSL::RendererServices *, int id, void *data);
void closure_westin_sheen_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bssrdf_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bssrdf_cubic_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bssrdf_gaussian_prepare(OSL::RendererServices *, int id, void *data);
enum {
AmbientOcclusion = 100
@@ -89,7 +91,8 @@ public:
ShaderClosure sc;
CBSDFClosure(int scattering) : OSL::ClosurePrimitive(BSDF),
- m_scattering_label(scattering), m_shaderdata_flag(0) { }
+ m_scattering_label(scattering), m_shaderdata_flag(0)
+ { memset(&sc, 0, sizeof(sc)); }
~CBSDFClosure() { }
int scattering() const { return m_scattering_label; }
diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp
index dedda1dc10e..23be0acb4d3 100644
--- a/intern/cycles/kernel/osl/osl_shader.cpp
+++ b/intern/cycles/kernel/osl/osl_shader.cpp
@@ -17,10 +17,14 @@
*/
#include "kernel_compat_cpu.h"
+#include "kernel_montecarlo.h"
#include "kernel_types.h"
#include "kernel_globals.h"
#include "kernel_object.h"
+#include "closure/bsdf_diffuse.h"
+#include "closure/bssrdf.h"
+
#include "osl_bssrdf.h"
#include "osl_closures.h"
#include "osl_globals.h"
@@ -136,7 +140,7 @@ static void shaderdata_to_shaderglobals(KernelGlobals *kg, ShaderData *sd,
/* Surface */
-static void flatten_surface_closure_tree(ShaderData *sd, bool no_glossy,
+static void flatten_surface_closure_tree(ShaderData *sd, int path_flag,
const OSL::ClosureColor *closure, float3 weight = make_float3(1.0f, 1.0f, 1.0f))
{
/* OSL gives us a closure tree, we flatten it into arrays per
@@ -156,8 +160,11 @@ static void flatten_surface_closure_tree(ShaderData *sd, bool no_glossy,
int scattering = bsdf->scattering();
/* no caustics option */
- if (no_glossy && scattering == LABEL_GLOSSY)
- return;
+ if(scattering == LABEL_GLOSSY && (path_flag & PATH_RAY_DIFFUSE)) {
+ KernelGlobals *kg = sd->osl_globals;
+ if(kernel_data.integrator.no_caustics)
+ return;
+ }
/* sample weight */
float sample_weight = fabsf(average(weight));
@@ -230,26 +237,32 @@ static void flatten_surface_closure_tree(ShaderData *sd, bool no_glossy,
sc.data1 = bssrdf->sc.data1;
sc.prim = NULL;
+ /* disable in case of diffuse ancestor, can't see it well then and
+ * adds considerably noise due to probabilities of continuing path
+ * getting lower and lower */
+ if(sc.type != CLOSURE_BSSRDF_COMPATIBLE_ID && (path_flag & PATH_RAY_DIFFUSE_ANCESTOR))
+ bssrdf->radius = make_float3(0.0f, 0.0f, 0.0f);
+
/* create one closure for each color channel */
if(fabsf(weight.x) > 0.0f) {
sc.weight = make_float3(weight.x, 0.0f, 0.0f);
sc.data0 = bssrdf->radius.x;
+ sd->flag |= bssrdf_setup(&sc, sc.type);
sd->closure[sd->num_closure++] = sc;
- sd->flag |= bssrdf->shaderdata_flag();
}
if(fabsf(weight.y) > 0.0f) {
sc.weight = make_float3(0.0f, weight.y, 0.0f);
sc.data0 = bssrdf->radius.y;
+ sd->flag |= bssrdf_setup(&sc, sc.type);
sd->closure[sd->num_closure++] = sc;
- sd->flag |= bssrdf->shaderdata_flag();
}
if(fabsf(weight.z) > 0.0f) {
sc.weight = make_float3(0.0f, 0.0f, weight.z);
sc.data0 = bssrdf->radius.z;
+ sd->flag |= bssrdf_setup(&sc, sc.type);
sd->closure[sd->num_closure++] = sc;
- sd->flag |= bssrdf->shaderdata_flag();
}
}
break;
@@ -264,12 +277,12 @@ static void flatten_surface_closure_tree(ShaderData *sd, bool no_glossy,
}
else if (closure->type == OSL::ClosureColor::MUL) {
OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
- flatten_surface_closure_tree(sd, no_glossy, mul->closure, TO_FLOAT3(mul->weight) * weight);
+ flatten_surface_closure_tree(sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight);
}
else if (closure->type == OSL::ClosureColor::ADD) {
OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
- flatten_surface_closure_tree(sd, no_glossy, add->closureA, weight);
- flatten_surface_closure_tree(sd, no_glossy, add->closureB, weight);
+ flatten_surface_closure_tree(sd, path_flag, add->closureA, weight);
+ flatten_surface_closure_tree(sd, path_flag, add->closureB, weight);
}
}
@@ -292,10 +305,8 @@ void OSLShader::eval_surface(KernelGlobals *kg, ShaderData *sd, float randb, int
sd->num_closure = 0;
sd->randb_closure = randb;
- if (globals->Ci) {
- bool no_glossy = (path_flag & PATH_RAY_DIFFUSE) && kernel_data.integrator.no_caustics;
- flatten_surface_closure_tree(sd, no_glossy, globals->Ci);
- }
+ if (globals->Ci)
+ flatten_surface_closure_tree(sd, path_flag, globals->Ci);
}
/* Background */
diff --git a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
index 5c25c44ec8f..eb21a5f69bd 100644
--- a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
+++ b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
@@ -22,12 +22,14 @@ shader node_subsurface_scattering(
color Color = 0.8,
float Scale = 1.0,
vector Radius = vector(0.1, 0.1, 0.1),
- float IOR = 1.3,
+ float TextureBlur = 0.0, // XXX use
+ string Falloff = "Cubic",
normal Normal = N,
output closure color BSSRDF = 0)
{
- float eta = max(IOR, 1.0 + 1e-5);
-
- BSSRDF = Color * bssrdf_cubic(N, Scale * Radius);
+ if(Falloff == "Cubic")
+ BSSRDF = Color * bssrdf_cubic(N, Scale * Radius);
+ else if(Falloff == "Gaussian")
+ BSSRDF = Color * bssrdf_gaussian(N, Scale * Radius);
}
diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h
index 24c3e187783..7d1c2443ee7 100644
--- a/intern/cycles/kernel/shaders/stdosl.h
+++ b/intern/cycles/kernel/shaders/stdosl.h
@@ -463,7 +463,10 @@ closure color emission() BUILTIN;
closure color background() BUILTIN;
closure color holdout() BUILTIN;
closure color ambient_occlusion() BUILTIN;
+
+// BSSRDF
closure color bssrdf_cubic(normal N, vector radius) BUILTIN;
+closure color bssrdf_gaussian(normal N, vector radius) BUILTIN;
// Backwards compatibility
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index 847195134e8..bd4a2d781eb 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -340,28 +340,36 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st
break;
}
#ifdef __SUBSURFACE__
- case CLOSURE_BSSRDF_ID: {
+ case CLOSURE_BSSRDF_COMPATIBLE_ID:
+ case CLOSURE_BSSRDF_CUBIC_ID:
+ case CLOSURE_BSSRDF_GAUSSIAN_ID: {
ShaderClosure *sc = &sd->closure[sd->num_closure];
float3 weight = sc->weight * mix_weight;
float sample_weight = fabsf(average(weight));
+
+ /* disable in case of diffuse ancestor, can't see it well then and
+ * adds considerably noise due to probabilities of continuing path
+ * getting lower and lower */
+ if(type != CLOSURE_BSSRDF_COMPATIBLE_ID && (path_flag & PATH_RAY_DIFFUSE_ANCESTOR))
+ param1 = 0.0f;
if(sample_weight > 1e-5f && sd->num_closure+2 < MAX_CLOSURE) {
/* radius * scale */
float3 radius = stack_load_float3(stack, data_node.w)*param1;
- /* index of refraction */
- float eta = fmaxf(param2, 1.0f + 1e-5f);
+ /* texture color blur */
+ float texture_blur = param2;
/* create one closure per color channel */
if(fabsf(weight.x) > 0.0f) {
sc->weight = make_float3(weight.x, 0.0f, 0.0f);
sc->sample_weight = sample_weight;
sc->data0 = radius.x;
- sc->data1 = eta;
+ sc->data1 = texture_blur;
#ifdef __OSL__
sc->prim = NULL;
#endif
sc->N = N;
- sd->flag |= bssrdf_setup(sc);
+ sd->flag |= bssrdf_setup(sc, (ClosureType)type);
sd->num_closure++;
sc++;
@@ -371,12 +379,12 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st
sc->weight = make_float3(0.0f, weight.y, 0.0f);
sc->sample_weight = sample_weight;
sc->data0 = radius.y;
- sc->data1 = eta;
+ sc->data1 = texture_blur;
#ifdef __OSL__
sc->prim = NULL;
#endif
sc->N = N;
- sd->flag |= bssrdf_setup(sc);
+ sd->flag |= bssrdf_setup(sc, (ClosureType)type);
sd->num_closure++;
sc++;
@@ -386,12 +394,12 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st
sc->weight = make_float3(0.0f, 0.0f, weight.z);
sc->sample_weight = sample_weight;
sc->data0 = radius.z;
- sc->data1 = eta;
+ sc->data1 = texture_blur;
#ifdef __OSL__
sc->prim = NULL;
#endif
sc->N = N;
- sd->flag |= bssrdf_setup(sc);
+ sd->flag |= bssrdf_setup(sc, (ClosureType)type);
sd->num_closure++;
sc++;
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index 939decf80a9..37ed5ead49f 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -369,8 +369,12 @@ typedef enum ClosureType {
CLOSURE_BSDF_BSSRDF_ID,
CLOSURE_BSDF_TRANSPARENT_ID,
+ /* BSSRDF */
+ CLOSURE_BSSRDF_COMPATIBLE_ID,
+ CLOSURE_BSSRDF_CUBIC_ID,
+ CLOSURE_BSSRDF_GAUSSIAN_ID,
+
/* Other */
- CLOSURE_BSSRDF_ID,
CLOSURE_EMISSION_ID,
CLOSURE_DEBUG_ID,
CLOSURE_BACKGROUND_ID,
@@ -391,7 +395,7 @@ typedef enum ClosureType {
#define CLOSURE_IS_BSDF_GLOSSY(type) (type >= CLOSURE_BSDF_GLOSSY_ID && type <= CLOSURE_BSDF_GLOSSY_TOON_ID)
#define CLOSURE_IS_BSDF_TRANSMISSION(type) (type >= CLOSURE_BSDF_TRANSMISSION_ID && type <= CLOSURE_BSDF_SHARP_GLASS_ID)
#define CLOSURE_IS_BSDF_BSSRDF(type) (type == CLOSURE_BSDF_BSSRDF_ID)
-#define CLOSURE_IS_BSSRDF(type) (type == CLOSURE_BSSRDF_ID)
+#define CLOSURE_IS_BSSRDF(type) (type >= CLOSURE_BSSRDF_COMPATIBLE_ID && type <= CLOSURE_BSSRDF_GAUSSIAN_ID)
#define CLOSURE_IS_VOLUME(type) (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_ISOTROPIC_ID)
#define CLOSURE_IS_EMISSION(type) (type == CLOSURE_EMISSION_ID)
#define CLOSURE_IS_HOLDOUT(type) (type == CLOSURE_HOLDOUT_ID)