diff options
Diffstat (limited to 'intern/cycles')
31 files changed, 1343 insertions, 420 deletions
diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp index 3410c73fbde..08ad2bab22e 100644 --- a/intern/cycles/blender/blender_object.cpp +++ b/intern/cycles/blender/blender_object.cpp @@ -298,7 +298,7 @@ Object *BlenderSync::sync_object(BL::Object b_parent, int persistent_id[OBJECT_P object->random_id = hash_int_2d(object->random_id, 0); /* visibility flags for both parent */ - object->visibility = object_ray_visibility(b_ob) & PATH_RAY_ALL; + object->visibility = object_ray_visibility(b_ob) & PATH_RAY_ALL_VISIBILITY; if(b_parent.ptr.data != b_ob.ptr.data) { object->visibility &= object_ray_visibility(b_parent); object->random_id ^= hash_int(hash_string(b_parent.name().c_str())); @@ -306,7 +306,7 @@ Object *BlenderSync::sync_object(BL::Object b_parent, int persistent_id[OBJECT_P /* make holdout objects on excluded layer invisible for non-camera rays */ if(use_holdout && (layer_flag & render_layer.exclude_layer)) - object->visibility &= ~(PATH_RAY_ALL - PATH_RAY_CAMERA); + object->visibility &= ~(PATH_RAY_ALL_VISIBILITY - PATH_RAY_CAMERA); /* camera flag is not actually used, instead is tested * against render layer flags */ diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp index 469ba15d291..b5ea46e096a 100644 --- a/intern/cycles/blender/blender_shader.cpp +++ b/intern/cycles/blender/blender_shader.cpp @@ -317,7 +317,23 @@ static ShaderNode *add_node(Scene *scene, BL::BlendData b_data, BL::Scene b_scen node = new DiffuseBsdfNode(); } else if (b_node.is_a(&RNA_ShaderNodeSubsurfaceScattering)) { - node = new SubsurfaceScatteringNode(); + BL::ShaderNodeSubsurfaceScattering b_subsurface_node(b_node); + + SubsurfaceScatteringNode *subsurface = new SubsurfaceScatteringNode(); + + switch(b_subsurface_node.falloff()) { + case BL::ShaderNodeSubsurfaceScattering::falloff_COMPATIBLE: + subsurface->closure = CLOSURE_BSSRDF_COMPATIBLE_ID; + break; + case BL::ShaderNodeSubsurfaceScattering::falloff_CUBIC: + subsurface->closure = CLOSURE_BSSRDF_CUBIC_ID; + break; + case BL::ShaderNodeSubsurfaceScattering::falloff_GAUSSIAN: + subsurface->closure = CLOSURE_BSSRDF_GAUSSIAN_ID; + break; + } + + node = subsurface; } else if (b_node.is_a(&RNA_ShaderNodeBsdfGlossy)) { BL::ShaderNodeBsdfGlossy b_glossy_node(b_node); diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h index 915b9eafbc1..b159f585831 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet.h @@ -37,11 +37,6 @@ CCL_NAMESPACE_BEGIN /* GGX */ -__device_inline float safe_sqrtf(float f) -{ - return sqrtf(max(f, 0.0f)); -} - __device int bsdf_microfacet_ggx_setup(ShaderClosure *sc) { sc->data0 = clamp(sc->data0, 0.0f, 1.0f); /* m_ag */ diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h index 486de4ca65f..23b932a91c6 100644 --- a/intern/cycles/kernel/closure/bssrdf.h +++ b/intern/cycles/kernel/closure/bssrdf.h @@ -21,130 +21,187 @@ CCL_NAMESPACE_BEGIN -__device int bssrdf_setup(ShaderClosure *sc) +__device int bssrdf_setup(ShaderClosure *sc, ClosureType type) { if(sc->data0 < BSSRDF_MIN_RADIUS) { /* revert to diffuse BSDF if radius too small */ sc->data0 = 0.0f; sc->data1 = 0.0f; - return bsdf_diffuse_setup(sc); + int flag = bsdf_diffuse_setup(sc); + sc->type = CLOSURE_BSDF_BSSRDF_ID; + return flag; } else { - /* IOR param */ - sc->data1 = max(sc->data1, 1.0f); - sc->type = CLOSURE_BSSRDF_ID; + sc->data1 = clamp(sc->data1, 0.0f, 1.0f); /* texture blur */ + sc->type = type; return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSSRDF; } } -/* Simple Cubic BSSRDF falloff */ +/* Planar Truncated Gaussian + * + * Note how this is different from the typical gaussian, this one integrates + * to 1 over the plane (where you get an extra 2*pi*x factor). We are lucky + * that integrating x*exp(-x) gives a nice closed form solution. */ + +/* paper suggests 1/12.46 which is much too small, suspect it's *12.46 */ +#define GAUSS_TRUNCATE 12.46f -__device float bssrdf_cubic(float ld, float r) +__device float bssrdf_gaussian_eval(ShaderClosure *sc, float r) { - if(ld == 0.0f) - return (r == 0.0f)? 1.0f: 0.0f; + /* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm + * = 1 - exp(-Rm*Rm/(2*v)) */ + const float v = sc->data0; + const float Rm = sqrtf(v*GAUSS_TRUNCATE); + + if(r >= Rm) + return 0.0f; - return powf(ld - min(r, ld), 3.0f) * 4.0f/powf(ld, 4.0f); + return expf(-r*r/(2.0f*v))/(2.0f*M_PI_F*v); } -/* Original BSSRDF fallof function */ - -typedef struct BSSRDFParams { - float eta; /* index of refraction */ - float sigma_t_; /* reduced extinction coefficient */ - float sigma_tr; /* effective extinction coefficient */ - float Fdr; /* diffuse fresnel reflectance */ - float D; /* diffusion constant */ - float A; - float alpha_; /* reduced albedo */ - float zr; /* distance of virtual lightsource above surface */ - float zv; /* distance of virtual lightsource below surface */ - float ld; /* mean free path */ - float ro; /* diffuse reflectance */ -} BSSRDFParams; - -__device float bssrdf_reduced_albedo_Rd(float alpha_, float A, float ro) +__device float bssrdf_gaussian_pdf(ShaderClosure *sc, float r) { - float sq; + /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */ + const float area_truncated = 1.0f - expf(-0.5f*GAUSS_TRUNCATE); + + return bssrdf_gaussian_eval(sc, r) * (1.0f/(area_truncated)); +} + +__device void bssrdf_gaussian_sample(ShaderClosure *sc, float xi, float *r, float *h) +{ + /* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v)) + * r = sqrt(-2*v*logf(xi)) */ + + const float v = sc->data0; + const float Rm = sqrtf(v*GAUSS_TRUNCATE); + + /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */ + const float area_truncated = 1.0f - expf(-0.5f*GAUSS_TRUNCATE); + + /* r(xi) */ + const float r_squared = -2.0f*v*logf(1.0f - xi*area_truncated); + *r = sqrtf(r_squared); + + /* h^2 + r^2 = Rm^2 */ + *h = sqrtf(Rm*Rm - r_squared); +} + +/* Planar Cubic BSSRDF falloff + * + * This is basically (Rm - x)^3, with some factors to normalize it. For sampling + * we integrate 2*pi*x * (Rm - x)^3, which gives us a quintic equation that as + * far as I can tell has no closed form solution. So we get an iterative solution + * instead with newton-raphson. */ + +__device float bssrdf_cubic_eval(ShaderClosure *sc, float r) +{ + const float Rm = sc->data0; + + if(r >= Rm) + return 0.0f; + + /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */ + const float Rm5 = (Rm*Rm) * (Rm*Rm) * Rm; + const float f = Rm - min(r, Rm); + const float f3 = f*f*f; - sq = sqrtf(3.0f*(1.0f - alpha_)); - return (alpha_/2.0f)*(1.0f + expf((-4.0f/3.0f)*A*sq))*expf(-sq) - ro; + return (f3 * 10.0f) / (Rm5 * M_PI_F); } -__device float bssrdf_compute_reduced_albedo(float A, float ro) +__device float bssrdf_cubic_pdf(ShaderClosure *sc, float r) { - const float tolerance = 1e-8f; - const int max_iteration_count = 20; - float d, fsub, xn_1 = 0.0f, xn = 1.0f, fxn, fxn_1; + return bssrdf_cubic_eval(sc, r); +} + +/* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */ +__device float bssrdf_cubic_quintic_root_find(float xi) +{ + /* newton-raphson iteration, usually succeeds in 2-4 iterations, except + * outside 0.02 ... 0.98 where it can go up to 10, so overall performance + * should not be too bad */ + const float tolerance = 1e-6f; + const int max_iteration_count = 10; + float x = 0.25f; int i; - /* use secant method to compute reduced albedo using Rd function inverse - * with a given reflectance */ - fxn = bssrdf_reduced_albedo_Rd(xn, A, ro); - fxn_1 = bssrdf_reduced_albedo_Rd(xn_1, A, ro); + for (i = 0; i < max_iteration_count; i++) { + float x2 = x*x; + float x3 = x2*x; + float nx = (1.0f - x); - for (i= 0; i < max_iteration_count; i++) { - fsub = (fxn - fxn_1); - if (fabsf(fsub) < tolerance) - break; - d = ((xn - xn_1)/fsub)*fxn; - if (fabsf(d) < tolerance) - break; + float f = 10.0f*x2 - 20.0f*x3 + 15.0f*x2*x2 - 4.0f*x2*x3 - xi; + float f_ = 20.0f*(x*nx)*(nx*nx); - xn_1 = xn; - fxn_1 = fxn; - xn = xn - d; + if(fabsf(f) < tolerance || f_ == 0.0f) + break; - if (xn > 1.0f) xn = 1.0f; - if (xn_1 > 1.0f) xn_1 = 1.0f; - - fxn = bssrdf_reduced_albedo_Rd(xn, A, ro); + x = clamp(x - f/f_, 0.0f, 1.0f); } - /* avoid division by zero later */ - if (xn <= 0.0f) - xn = 0.00001f; - - return xn; + return x; } -__device void bssrdf_setup_params(BSSRDFParams *ss, float refl, float radius, float ior) +__device void bssrdf_cubic_sample(ShaderClosure *sc, float xi, float *r, float *h) { - ss->eta = ior; - ss->Fdr = -1.440f/ior*ior + 0.710f/ior + 0.668f + 0.0636f*ior; - ss->A = (1.0f + ss->Fdr)/(1.0f - ss->Fdr); - ss->ld = radius; - ss->ro = min(refl, 0.999f); + const float Rm = sc->data0; + const float r_ = bssrdf_cubic_quintic_root_find(xi) * Rm; - ss->alpha_ = bssrdf_compute_reduced_albedo(ss->A, ss->ro); + *r = r_; - ss->sigma_tr = 1.0f/ss->ld; - ss->sigma_t_ = ss->sigma_tr/sqrtf(3.0f*(1.0f - ss->alpha_)); + /* h^2 + r^2 = Rm^2 */ + *h = sqrtf(Rm*Rm - r_*r_); +} - ss->D = 1.0f/(3.0f*ss->sigma_t_); +/* None BSSRDF falloff + * + * Samples distributed over disk with no falloff, for reference. */ - ss->zr = 1.0f/ss->sigma_t_; - ss->zv = ss->zr + 4.0f*ss->A*ss->D; +__device float bssrdf_none_eval(ShaderClosure *sc, float r) +{ + const float Rm = sc->data0; + return (r < Rm)? 1.0f: 0.0f; } -/* exponential falloff function */ +__device float bssrdf_none_pdf(ShaderClosure *sc, float r) +{ + /* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */ + const float Rm = sc->data0; + const float area = (M_PI_F*Rm*Rm); + + return bssrdf_none_eval(sc, r) / area; +} -__device float bssrdf_original(const BSSRDFParams *ss, float r) +__device void bssrdf_none_sample(ShaderClosure *sc, float xi, float *r, float *h) { - if(ss->ld == 0.0f) - return (r == 0.0f)? 1.0f: 0.0f; + /* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2 + * r = sqrt(xi)*Rm */ + const float Rm = sc->data0; + const float r_ = sqrtf(xi)*Rm; + + *r = r_; - float rr = r*r; - float sr, sv, Rdr, Rdv; + /* h^2 + r^2 = Rm^2 */ + *h = sqrtf(Rm*Rm - r_*r_); +} - sr = sqrtf(rr + ss->zr*ss->zr); - sv = sqrtf(rr + ss->zv*ss->zv); +/* Generic */ - Rdr = ss->zr*(1.0f + ss->sigma_tr*sr)*expf(-ss->sigma_tr*sr)/(sr*sr*sr); - Rdv = ss->zv*(1.0f + ss->sigma_tr*sv)*expf(-ss->sigma_tr*sv)/(sv*sv*sv); +__device void bssrdf_sample(ShaderClosure *sc, float xi, float *r, float *h) +{ + if(sc->type == CLOSURE_BSSRDF_CUBIC_ID) + bssrdf_cubic_sample(sc, xi, r, h); + else + bssrdf_gaussian_sample(sc, xi, r, h); +} - return ss->alpha_*(1.0f/M_4PI_F)*(Rdr + Rdv); +__device float bssrdf_pdf(ShaderClosure *sc, float r) +{ + if(sc->type == CLOSURE_BSSRDF_CUBIC_ID) + return bssrdf_cubic_pdf(sc, r); + else + return bssrdf_gaussian_pdf(sc, r); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_bvh.h b/intern/cycles/kernel/kernel_bvh.h index f0f1fcd4c0a..4cc92254b01 100644 --- a/intern/cycles/kernel/kernel_bvh.h +++ b/intern/cycles/kernel/kernel_bvh.h @@ -488,7 +488,7 @@ __device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersectio /*stochastic fade from minimum width*/ if(lcg_state && coverage != 1.0f) { - if(lcg_step(lcg_state) > coverage) + if(lcg_step_float(lcg_state) > coverage) return hit; } @@ -640,7 +640,7 @@ __device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect, float adjradius = or1 + z * (or2 - or1) / l; adjradius = adjradius / (r1 + z * gd); if(lcg_state && adjradius != 1.0f) { - if(lcg_step(lcg_state) > adjradius) + if(lcg_step_float(lcg_state) > adjradius) return false; } /* --- */ @@ -690,8 +690,8 @@ __device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect, * only want to intersect with primitives in the same object, and if case of * multiple hits we pick a single random primitive as the intersection point. */ -__device_inline bool bvh_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect, - float3 P, float3 idir, int object, int triAddr, float tmax, int *num_hits, float subsurface_random) +__device_inline void bvh_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array, + float3 P, float3 idir, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits) { /* compute and check intersection t-value */ float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); @@ -718,20 +718,30 @@ __device_inline bool bvh_triangle_intersect_subsurface(KernelGlobals *kg, Inters if(v >= 0.0f && u + v <= 1.0f) { (*num_hits)++; - if(subsurface_random * (*num_hits) <= 1.0f) { - /* record intersection */ - isect->prim = triAddr; - isect->object = object; - isect->u = u; - isect->v = v; - isect->t = t; - return true; + int hit; + + if(*num_hits <= max_hits) { + hit = *num_hits - 1; } + else { + /* reservoir sampling: if we are at the maximum number of + * hits, randomly replace element or skip it */ + hit = lcg_step_uint(lcg_state) % *num_hits; + + if(hit >= max_hits) + return; + } + + /* record intersection */ + Intersection *isect = &isect_array[hit]; + isect->prim = triAddr; + isect->object = object; + isect->u = u; + isect->v = v; + isect->t = t; } } } - - return false; } #endif @@ -741,7 +751,6 @@ __device_inline bool bvh_triangle_intersect_subsurface(KernelGlobals *kg, Inters #define BVH_MOTION 2 #define BVH_HAIR 4 #define BVH_HAIR_MINIMUM_WIDTH 8 -#define BVH_SUBSURFACE 16 #define BVH_FUNCTION_NAME bvh_intersect #define BVH_FUNCTION_FEATURES 0 @@ -773,32 +782,31 @@ __device_inline bool bvh_triangle_intersect_subsurface(KernelGlobals *kg, Inters #if defined(__SUBSURFACE__) #define BVH_FUNCTION_NAME bvh_intersect_subsurface -#define BVH_FUNCTION_FEATURES BVH_SUBSURFACE -#include "kernel_bvh_traversal.h" +#include "kernel_bvh_subsurface.h" #endif #if defined(__SUBSURFACE__) && defined(__INSTANCING__) #define BVH_FUNCTION_NAME bvh_intersect_subsurface_instancing -#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_SUBSURFACE -#include "kernel_bvh_traversal.h" +#define BVH_FUNCTION_FEATURES BVH_INSTANCING +#include "kernel_bvh_subsurface.h" #endif #if defined(__SUBSURFACE__) && defined(__HAIR__) #define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair -#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_SUBSURFACE|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH -#include "kernel_bvh_traversal.h" +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH +#include "kernel_bvh_subsurface.h" #endif #if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__) #define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion -#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_SUBSURFACE|BVH_MOTION -#include "kernel_bvh_traversal.h" +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION +#include "kernel_bvh_subsurface.h" #endif #if defined(__SUBSURFACE__) && defined(__HAIR__) && defined(__OBJECT_MOTION__) #define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair_motion -#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_SUBSURFACE|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION -#include "kernel_bvh_traversal.h" +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION +#include "kernel_bvh_subsurface.h" #endif @@ -844,38 +852,38 @@ __device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const ui } #ifdef __SUBSURFACE__ -__device_inline int scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, float subsurface_random) +__device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits) { #ifdef __OBJECT_MOTION__ if(kernel_data.bvh.have_motion) { #ifdef __HAIR__ if(kernel_data.bvh.have_curves) - return bvh_intersect_subsurface_hair_motion(kg, ray, isect, subsurface_object, subsurface_random); + return bvh_intersect_subsurface_hair_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits); #endif /* __HAIR__ */ - return bvh_intersect_subsurface_motion(kg, ray, isect, subsurface_object, subsurface_random); + return bvh_intersect_subsurface_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits); } #endif /* __OBJECT_MOTION__ */ #ifdef __HAIR__ if(kernel_data.bvh.have_curves) - return bvh_intersect_subsurface_hair(kg, ray, isect, subsurface_object, subsurface_random); + return bvh_intersect_subsurface_hair(kg, ray, isect, subsurface_object, lcg_state, max_hits); #endif /* __HAIR__ */ #ifdef __KERNEL_CPU__ #ifdef __INSTANCING__ if(kernel_data.bvh.have_instancing) - return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, subsurface_random); + return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits); #endif /* __INSTANCING__ */ - return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, subsurface_random); + return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits); #else /* __KERNEL_CPU__ */ #ifdef __INSTANCING__ - return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, subsurface_random); + return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits); #else - return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, subsurface_random); + return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits); #endif /* __INSTANCING__ */ #endif /* __KERNEL_CPU__ */ @@ -980,6 +988,51 @@ __device_inline float3 bvh_triangle_refine(KernelGlobals *kg, ShaderData *sd, co #endif } +/* same as above, except that isect->t is assumed to be in object space for instancing */ +__device_inline float3 bvh_triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) +{ + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; + +#ifdef __INTERSECTION_REFINE__ + if(isect->object != ~0) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_itfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D); + D = normalize(D); + } + + P = P + D*t; + + float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0); + float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; + float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z); + float rt = Oz * invDz; + + P = P + D*rt; + + if(isect->object != ~0) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_tfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + } + + return P; +#else + return P + D*t; +#endif +} + #ifdef __HAIR__ __device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3) diff --git a/intern/cycles/kernel/kernel_bvh_subsurface.h b/intern/cycles/kernel/kernel_bvh_subsurface.h new file mode 100644 index 00000000000..ac30bea6a9d --- /dev/null +++ b/intern/cycles/kernel/kernel_bvh_subsurface.h @@ -0,0 +1,308 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2013, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is a template BVH traversal function for subsurface scattering, where + * various features can be enabled/disabled. This way we can compile optimized + * versions for each case without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_MOTION: motion blur rendering + * + */ + +#define FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0) + +__device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersection *isect_array, + int subsurface_object, uint *lcg_state, int max_hits) +{ + /* todo: + * - test if pushing distance on the stack helps (for non shadow rays) + * - separate version for shadow rays + * - likely and unlikely for if() statements + * - SSE for hair + * - test restrict attribute for pointers + */ + + /* traversal stack in CUDA thread-local memory */ + int traversalStack[BVH_STACK_SIZE]; + traversalStack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* ray parameters in registers */ + const float tmax = ray->t; + float3 P = ray->P; + float3 idir = bvh_inverse_direction(ray->D); + int object = ~0; + + const uint visibility = ~0; + uint num_hits = 0; + +#if FEATURE(BVH_MOTION) + Transform ob_tfm; +#endif + +#if defined(__KERNEL_SSE2__) + const shuffle_swap_t shuf_identity = shuffle_swap_identity(); + const shuffle_swap_t shuf_swap = shuffle_swap_swap(); + + const __m128i pn = _mm_set_epi32(0x80000000, 0x80000000, 0x00000000, 0x00000000); + __m128 Psplat[3], idirsplat[3]; + + Psplat[0] = _mm_set_ps1(P.x); + Psplat[1] = _mm_set_ps1(P.y); + Psplat[2] = _mm_set_ps1(P.z); + + idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), _mm_castsi128_ps(pn)); + idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), _mm_castsi128_ps(pn)); + idirsplat[2] = _mm_xor_ps(_mm_set_ps1(idir.z), _mm_castsi128_ps(pn)); + + __m128 tsplat = _mm_set_ps(-tmax, -tmax, 0.0f, 0.0f); + + shuffle_swap_t shufflex = (idir.x >= 0)? shuf_identity: shuf_swap; + shuffle_swap_t shuffley = (idir.y >= 0)? shuf_identity: shuf_swap; + shuffle_swap_t shufflez = (idir.z >= 0)? shuf_identity: shuf_swap; +#endif + + /* traversal loop */ + do { + do + { + /* traverse internal nodes */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) + { + bool traverseChild0, traverseChild1; + int nodeAddrChild1; + +#if !defined(__KERNEL_SSE2__) + /* Intersect two child bounding boxes, non-SSE version */ + float t = tmax; + + /* fetch node data */ + float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0); + float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1); + float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2); + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3); + + /* intersect ray against child nodes */ + NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x; + NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x; + NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y; + NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y; + NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z; + NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z; + NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f); + NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t); + + NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x; + NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x; + NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y; + NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y; + NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z; + NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z; + NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); + NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); + + /* decide which nodes to traverse next */ +#ifdef __VISIBILITY_FLAG__ + /* this visibility test gives a 5% performance hit, how to solve? */ + traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility); + traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility); +#else + traverseChild0 = (c0max >= c0min); + traverseChild1 = (c1max >= c1min); +#endif + +#else // __KERNEL_SSE2__ + /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ + + /* fetch node data */ + __m128 *bvh_nodes = (__m128*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE; + float4 cnodes = ((float4*)bvh_nodes)[3]; + + /* intersect ray against child nodes */ + const __m128 tminmaxx = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[0], shufflex), Psplat[0]), idirsplat[0]); + const __m128 tminmaxy = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[1], shuffley), Psplat[1]), idirsplat[1]); + const __m128 tminmaxz = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[2], shufflez), Psplat[2]), idirsplat[2]); + + const __m128 tminmax = _mm_xor_ps(_mm_max_ps(_mm_max_ps(tminmaxx, tminmaxy), _mm_max_ps(tminmaxz, tsplat)), _mm_castsi128_ps(pn)); + const __m128 lrhit = _mm_cmple_ps(tminmax, shuffle_swap(tminmax, shuf_swap)); + + /* decide which nodes to traverse next */ +#ifdef __VISIBILITY_FLAG__ + /* this visibility test gives a 5% performance hit, how to solve? */ + traverseChild0 = (_mm_movemask_ps(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility); + traverseChild1 = (_mm_movemask_ps(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility); +#else + traverseChild0 = (_mm_movemask_ps(lrhit) & 1); + traverseChild1 = (_mm_movemask_ps(lrhit) & 2); +#endif +#endif // __KERNEL_SSE2__ + + nodeAddr = __float_as_int(cnodes.x); + nodeAddrChild1 = __float_as_int(cnodes.y); + + if(traverseChild0 && traverseChild1) { + /* both children were intersected, push the farther one */ +#if !defined(__KERNEL_SSE2__) + bool closestChild1 = (c1min < c0min); +#else + union { __m128 m128; float v[4]; } uminmax; + uminmax.m128 = tminmax; + bool closestChild1 = uminmax.v[1] < uminmax.v[0]; +#endif + + if(closestChild1) { + int tmp = nodeAddr; + nodeAddr = nodeAddrChild1; + nodeAddrChild1 = tmp; + } + + ++stackPtr; + traversalStack[stackPtr] = nodeAddrChild1; + } + else { + /* one child was intersected */ + if(traverseChild1) { + nodeAddr = nodeAddrChild1; + } + else if(!traverseChild0) { + /* neither child was intersected */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } + + /* if node is leaf, fetch triangle list */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+(BVH_NODE_SIZE-1)); + int primAddr = __float_as_int(leaf.x); + +#if FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + int primAddr2 = __float_as_int(leaf.y); + + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + + /* primitive intersection */ + while(primAddr < primAddr2) { + /* only primitives from the same object */ + uint tri_object = (object == ~0)? kernel_tex_fetch(__prim_object, primAddr): object; + + if(tri_object == subsurface_object) { + + /* intersect ray against primitive */ + bvh_triangle_intersect_subsurface(kg, isect_array, P, idir, object, primAddr, tmax, &num_hits, lcg_state, max_hits); + } + + primAddr++; + } + } +#if FEATURE(BVH_INSTANCING) + else { + /* instance push */ + if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) { + object = subsurface_object; + + float t_ignore = FLT_MAX; +#if FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &idir, &t_ignore, &ob_tfm, tmax); +#else + bvh_instance_push(kg, object, ray, &P, &idir, &t_ignore, tmax); +#endif + +#if defined(__KERNEL_SSE2__) + Psplat[0] = _mm_set_ps1(P.x); + Psplat[1] = _mm_set_ps1(P.y); + Psplat[2] = _mm_set_ps1(P.z); + + idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), _mm_castsi128_ps(pn)); + idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), _mm_castsi128_ps(pn)); + idirsplat[2] = _mm_xor_ps(_mm_set_ps1(idir.z), _mm_castsi128_ps(pn)); + + tsplat = _mm_set_ps(-tmax, -tmax, 0.0f, 0.0f); + + shufflex = (idir.x >= 0)? shuf_identity: shuf_swap; + shuffley = (idir.y >= 0)? shuf_identity: shuf_swap; + shufflez = (idir.z >= 0)? shuf_identity: shuf_swap; +#endif + + ++stackPtr; + traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + else { + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } +#endif + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != ~0); + + /* instance pop */ + float t_ignore = FLT_MAX; +#if FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &idir, &t_ignore, &ob_tfm, tmax); +#else + bvh_instance_pop(kg, object, ray, &P, &idir, &t_ignore, tmax); +#endif + +#if defined(__KERNEL_SSE2__) + Psplat[0] = _mm_set_ps1(P.x); + Psplat[1] = _mm_set_ps1(P.y); + Psplat[2] = _mm_set_ps1(P.z); + + idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), _mm_castsi128_ps(pn)); + idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), _mm_castsi128_ps(pn)); + idirsplat[2] = _mm_xor_ps(_mm_set_ps1(idir.z), _mm_castsi128_ps(pn)); + + tsplat = _mm_set_ps(-tmax, -tmax, 0.0f, 0.0f); + + shufflex = (idir.x >= 0)? shuf_identity: shuf_swap; + shuffley = (idir.y >= 0)? shuf_identity: shuf_swap; + shufflez = (idir.z >= 0)? shuf_identity: shuf_swap; +#endif + + object = ~0; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } +#endif + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return num_hits; +} + +#undef FEATURE +#undef BVH_FUNCTION_NAME +#undef BVH_FUNCTION_FEATURES + diff --git a/intern/cycles/kernel/kernel_bvh_traversal.h b/intern/cycles/kernel/kernel_bvh_traversal.h index cfca405e7a5..a9264f318eb 100644 --- a/intern/cycles/kernel/kernel_bvh_traversal.h +++ b/intern/cycles/kernel/kernel_bvh_traversal.h @@ -24,7 +24,6 @@ * BVH_INSTANCING: object instancing * BVH_HAIR: hair curve rendering * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width - * BVH_SUBSURFACE: subsurface same object, random triangle intersection * BVH_MOTION: motion blur rendering * */ @@ -32,13 +31,8 @@ #define FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0) __device bool BVH_FUNCTION_NAME -(KernelGlobals *kg, const Ray *ray, Intersection *isect -#if FEATURE(BVH_SUBSURFACE) -, int subsurface_object, float subsurface_random -#else -, const uint visibility -#endif -#if FEATURE(BVH_HAIR_MINIMUM_WIDTH) && !FEATURE(BVH_SUBSURFACE) +(KernelGlobals *kg, const Ray *ray, Intersection *isect, const uint visibility +#if FEATURE(BVH_HAIR_MINIMUM_WIDTH) , uint *lcg_state, float difl, float extmax #endif ) @@ -65,11 +59,6 @@ __device bool BVH_FUNCTION_NAME float3 idir = bvh_inverse_direction(ray->D); int object = ~0; -#if FEATURE(BVH_SUBSURFACE) - const uint visibility = ~0; - int num_hits = 0; -#endif - #if FEATURE(BVH_MOTION) Transform ob_tfm; #endif @@ -141,7 +130,7 @@ __device bool BVH_FUNCTION_NAME NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); -#if FEATURE(BVH_HAIR_MINIMUM_WIDTH) && !FEATURE(BVH_SUBSURFACE) +#if FEATURE(BVH_HAIR_MINIMUM_WIDTH) if(difl != 0.0f) { float hdiff = 1.0f + difl; float ldiff = 1.0f - difl; @@ -245,59 +234,37 @@ __device bool BVH_FUNCTION_NAME while(primAddr < primAddr2) { bool hit; -#if FEATURE(BVH_SUBSURFACE) - /* only primitives from the same object */ - uint tri_object = (object == ~0)? kernel_tex_fetch(__prim_object, primAddr): object; - - if(tri_object == subsurface_object) { -#endif - - /* intersect ray against primitive */ + /* intersect ray against primitive */ #if FEATURE(BVH_HAIR) - uint segment = kernel_tex_fetch(__prim_segment, primAddr); -#if !FEATURE(BVH_SUBSURFACE) - if(segment != ~0) { + uint segment = kernel_tex_fetch(__prim_segment, primAddr); + if(segment != ~0) { - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) #if FEATURE(BVH_HAIR_MINIMUM_WIDTH) - hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax); - else - hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax); + hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax); + else + hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax); #else - hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment); - else - hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment); -#endif - } + hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment); else + hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment); #endif -#endif -#if FEATURE(BVH_SUBSURFACE) -#if FEATURE(BVH_HAIR) - if(segment == ~0) -#endif - { - hit = bvh_triangle_intersect_subsurface(kg, isect, P, idir, object, primAddr, tmax, &num_hits, subsurface_random); - (void)hit; - } - } -#else - hit = bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr); + else +#endif + hit = bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr); - /* shadow ray early termination */ + /* shadow ray early termination */ #if defined(__KERNEL_SSE2__) && !FEATURE(BVH_HAIR_MINIMUM_WIDTH) - if(hit) { - if(visibility == PATH_RAY_SHADOW_OPAQUE) - return true; - - tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f); - } -#else - if(hit && visibility == PATH_RAY_SHADOW_OPAQUE) + if(hit) { + if(visibility == PATH_RAY_SHADOW_OPAQUE) return true; -#endif + tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f); + } +#else + if(hit && visibility == PATH_RAY_SHADOW_OPAQUE) + return true; #endif primAddr++; @@ -306,47 +273,34 @@ __device bool BVH_FUNCTION_NAME #if FEATURE(BVH_INSTANCING) else { /* instance push */ -#if FEATURE(BVH_SUBSURFACE) - if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) { - object = subsurface_object; -#else - object = kernel_tex_fetch(__prim_object, -primAddr-1); -#endif + object = kernel_tex_fetch(__prim_object, -primAddr-1); #if FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &idir, &isect->t, &ob_tfm, tmax); + bvh_instance_motion_push(kg, object, ray, &P, &idir, &isect->t, &ob_tfm, tmax); #else - bvh_instance_push(kg, object, ray, &P, &idir, &isect->t, tmax); + bvh_instance_push(kg, object, ray, &P, &idir, &isect->t, tmax); #endif #if defined(__KERNEL_SSE2__) && !FEATURE(BVH_HAIR_MINIMUM_WIDTH) - Psplat[0] = _mm_set_ps1(P.x); - Psplat[1] = _mm_set_ps1(P.y); - Psplat[2] = _mm_set_ps1(P.z); + Psplat[0] = _mm_set_ps1(P.x); + Psplat[1] = _mm_set_ps1(P.y); + Psplat[2] = _mm_set_ps1(P.z); - idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), _mm_castsi128_ps(pn)); - idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), _mm_castsi128_ps(pn)); - idirsplat[2] = _mm_xor_ps(_mm_set_ps1(idir.z), _mm_castsi128_ps(pn)); + idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), _mm_castsi128_ps(pn)); + idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), _mm_castsi128_ps(pn)); + idirsplat[2] = _mm_xor_ps(_mm_set_ps1(idir.z), _mm_castsi128_ps(pn)); - tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f); + tsplat = _mm_set_ps(-isect->t, -isect->t, 0.0f, 0.0f); - shufflex = (idir.x >= 0)? shuf_identity: shuf_swap; - shuffley = (idir.y >= 0)? shuf_identity: shuf_swap; - shufflez = (idir.z >= 0)? shuf_identity: shuf_swap; + shufflex = (idir.x >= 0)? shuf_identity: shuf_swap; + shuffley = (idir.y >= 0)? shuf_identity: shuf_swap; + shufflez = (idir.z >= 0)? shuf_identity: shuf_swap; #endif - ++stackPtr; - traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + ++stackPtr; + traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; - nodeAddr = kernel_tex_fetch(__object_node, object); -#if FEATURE(BVH_SUBSURFACE) - } - else { - /* pop */ - nodeAddr = traversalStack[stackPtr]; - --stackPtr; - } -#endif + nodeAddr = kernel_tex_fetch(__object_node, object); } } #endif @@ -386,11 +340,7 @@ __device bool BVH_FUNCTION_NAME #endif } while(nodeAddr != ENTRYPOINT_SENTINEL); -#if FEATURE(BVH_SUBSURFACE) - return (num_hits != 0); -#else return (isect->prim != ~0); -#endif } #undef FEATURE diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h index 7d5e4cd9df5..592c45867ac 100644 --- a/intern/cycles/kernel/kernel_montecarlo.h +++ b/intern/cycles/kernel/kernel_montecarlo.h @@ -108,11 +108,26 @@ __device float3 sample_uniform_sphere(float u1, float u2) return make_float3(x, y, z); } +__device float balance_heuristic(float a, float b) +{ + return (a)/(a + b); +} + +__device float balance_heuristic_3(float a, float b, float c) +{ + return (a)/(a + b + c); +} + __device float power_heuristic(float a, float b) { return (a*a)/(a*a + b*b); } +__device float power_heuristic_3(float a, float b, float c) +{ + return (a*a)/(a*a + b*b + c*c); +} + __device float2 concentric_sample_disk(float u1, float u2) { float r, theta; diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index bf06f8dd5f6..d613943e85d 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -100,11 +100,11 @@ __device_inline void path_state_next(KernelGlobals *kg, PathState *state, int la /* diffuse/glossy/singular */ if(label & LABEL_DIFFUSE) { - state->flag |= PATH_RAY_DIFFUSE; + state->flag |= PATH_RAY_DIFFUSE|PATH_RAY_DIFFUSE_ANCESTOR; state->flag &= ~(PATH_RAY_GLOSSY|PATH_RAY_SINGULAR|PATH_RAY_MIS_SKIP); } else if(label & LABEL_GLOSSY) { - state->flag |= PATH_RAY_GLOSSY; + state->flag |= PATH_RAY_GLOSSY|PATH_RAY_GLOSSY_ANCESTOR; state->flag &= ~(PATH_RAY_DIFFUSE|PATH_RAY_SINGULAR|PATH_RAY_MIS_SKIP); } else { @@ -117,7 +117,7 @@ __device_inline void path_state_next(KernelGlobals *kg, PathState *state, int la __device_inline uint path_state_ray_visibility(KernelGlobals *kg, PathState *state) { - uint flag = state->flag; + uint flag = state->flag & PATH_RAY_ALL_VISIBILITY; /* for visibility, diffuse/glossy are for reflection only */ if(flag & PATH_RAY_TRANSMIT) @@ -404,7 +404,15 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, /* do bssrdf scatter step if we picked a bssrdf closure */ if(sc) { uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb); - subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false); + + if(old_subsurface_scatter_use(&sd)) { + old_subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false); + } + else { + float bssrdf_u, bssrdf_v; + path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false); + } } } #endif @@ -646,7 +654,15 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray /* do bssrdf scatter step if we picked a bssrdf closure */ if(sc) { uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb); - subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false); + + if(old_subsurface_scatter_use(&sd)) { + old_subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false); + } + else { + float bssrdf_u, bssrdf_v; + path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false); + } } } #endif @@ -1090,17 +1106,32 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb); int num_samples = kernel_data.integrator.subsurface_samples; float num_samples_inv = 1.0f/num_samples; + RNG bssrdf_rng = cmj_hash(*rng, i); /* do subsurface scatter step with copy of shader data, this will * replace the BSSRDF with a diffuse BSDF closure */ for(int j = 0; j < num_samples; j++) { - ShaderData bssrdf_sd = sd; - subsurface_scatter_step(kg, &bssrdf_sd, state.flag, sc, &lcg_state, true); - - /* compute lighting with the BSDF closure */ - kernel_path_non_progressive_lighting(kg, rng, sample*num_samples + j, - &bssrdf_sd, throughput, num_samples_inv, - ray_pdf, ray_pdf, state, rng_offset, &L, buffer); + if(old_subsurface_scatter_use(&sd)) { + ShaderData bssrdf_sd = sd; + old_subsurface_scatter_step(kg, &bssrdf_sd, state.flag, sc, &lcg_state, true); + + /* compute lighting with the BSDF closure */ + kernel_path_non_progressive_lighting(kg, rng, sample*num_samples + j, + &bssrdf_sd, throughput, num_samples_inv, + ray_pdf, ray_pdf, state, rng_offset, &L, buffer); + } + else { + ShaderData bssrdf_sd[BSSRDF_MAX_HITS]; + float bssrdf_u, bssrdf_v; + path_rng_2D(kg, &bssrdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true); + + /* compute lighting with the BSDF closure */ + for(int hit = 0; hit < num_hits; hit++) + kernel_path_non_progressive_lighting(kg, rng, sample*num_samples + j, + &bssrdf_sd[hit], throughput, num_samples_inv, + ray_pdf, ray_pdf, state, rng_offset, &L, buffer); + } } } } diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h index c86ac34a057..be848d9bb16 100644 --- a/intern/cycles/kernel/kernel_random.h +++ b/intern/cycles/kernel/kernel_random.h @@ -235,7 +235,14 @@ __device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng) #endif -__device float lcg_step(uint *rng) +__device uint lcg_step_uint(uint *rng) +{ + /* implicit mod 2^32 */ + *rng = (1103515245*(*rng) + 12345); + return *rng; +} + +__device float lcg_step_float(uint *rng) { /* implicit mod 2^32 */ *rng = (1103515245*(*rng) + 12345); @@ -245,7 +252,7 @@ __device float lcg_step(uint *rng) __device uint lcg_init(uint seed) { uint rng = seed; - lcg_step(&rng); + lcg_step_uint(&rng); return rng; } diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index 5dd12f98b9c..2c86cc5e227 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -184,52 +184,32 @@ __device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData sd->flag = kernel_tex_fetch(__object_flag, sd->object); sd->prim = kernel_tex_fetch(__prim_index, isect->prim); -#ifdef __HAIR__ - if(kernel_tex_fetch(__prim_segment, isect->prim) != ~0) { - /* Strand Shader setting*/ - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - - sd->shader = __float_as_int(curvedata.z); - sd->segment = isect->segment; - - float tcorr = isect->t; - if(kernel_data.curve.curveflags & CURVE_KN_POSTINTERSECTCORRECTION) - tcorr = (isect->u < 0)? tcorr + sqrtf(isect->v) : tcorr - sqrtf(isect->v); - - sd->P = bvh_curve_refine(kg, sd, isect, ray, tcorr); - } - else { -#endif - /* fetch triangle data */ - float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim); - float3 Ng = make_float3(Ns.x, Ns.y, Ns.z); - sd->shader = __float_as_int(Ns.w); + /* fetch triangle data */ + float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim); + float3 Ng = make_float3(Ns.x, Ns.y, Ns.z); + sd->shader = __float_as_int(Ns.w); #ifdef __HAIR__ - sd->segment = ~0; + sd->segment = ~0; #endif #ifdef __UV__ - sd->u = isect->u; - sd->v = isect->v; + sd->u = isect->u; + sd->v = isect->v; #endif - /* vectors */ - sd->P = bvh_triangle_refine(kg, sd, isect, ray); - sd->Ng = Ng; - sd->N = Ng; - - /* smooth normal */ - if(sd->shader & SHADER_SMOOTH_NORMAL) - sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v); + /* vectors */ + sd->P = bvh_triangle_refine_subsurface(kg, sd, isect, ray); + sd->Ng = Ng; + sd->N = Ng; + + /* smooth normal */ + if(sd->shader & SHADER_SMOOTH_NORMAL) + sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v); #ifdef __DPDU__ - /* dPdu/dPdv */ - triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim); -#endif - -#ifdef __HAIR__ - } + /* dPdu/dPdv */ + triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim); #endif sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2); @@ -468,6 +448,8 @@ __device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData __device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, const ShaderData *sd, const float3 omega_in, float *pdf, int skip_bsdf, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight) { + /* this is the veach one-sample model with balance heuristic, some pdf + * factors drop out when using balance heuristic weighting */ for(int i = 0; i< sd->num_closure; i++) { if(i == skip_bsdf) continue; @@ -706,34 +688,34 @@ __device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd) #endif } -__device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N) +__device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_) { #ifdef __MULTI_CLOSURE__ float3 eval = make_float3(0.0f, 0.0f, 0.0f); - - *N = make_float3(0.0f, 0.0f, 0.0f); + float3 N = make_float3(0.0f, 0.0f, 0.0f); for(int i = 0; i< sd->num_closure; i++) { ShaderClosure *sc = &sd->closure[i]; if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { eval += sc->weight*ao_factor; - *N += sc->N*average(sc->weight); + N += sc->N*average(sc->weight); } else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) { eval += sc->weight; - *N += sd->N*average(sc->weight); + N += sd->N*average(sc->weight); } } - if(is_zero(*N)) - *N = sd->N; + if(is_zero(N)) + N = sd->N; else - *N = normalize(*N); + N = normalize(N); + *N_ = N; return eval; #else - *N = sd->N; + *N_ = sd->N; if(CLOSURE_IS_BSDF_DIFFUSE(sd->closure.type)) return sd->closure.weight*ao_factor; @@ -744,6 +726,49 @@ __device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_facto #endif } +__device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_) +{ +#ifdef __MULTI_CLOSURE__ + float3 eval = make_float3(0.0f, 0.0f, 0.0f); + float3 N = make_float3(0.0f, 0.0f, 0.0f); + float texture_blur = 0.0f, weight_sum = 0.0f; + + for(int i = 0; i< sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; + + if(CLOSURE_IS_BSSRDF(sc->type)) { + float avg_weight = fabsf(average(sc->weight)); + + N += sc->N*avg_weight; + eval += sc->weight; + texture_blur += sc->data1*avg_weight; + weight_sum += avg_weight; + } + } + + if(N_) + *N_ = (is_zero(N))? sd->N: normalize(N); + + if(texture_blur_) + *texture_blur_ = texture_blur/weight_sum; + + return eval; +#else + if(CLOSURE_IS_BSSRDF(sd->closure.type)) { + if(N_) *N_ = sd->closure.N; + if(texture_blur_) *texture_blur_ = sd->closure.data1; + + return sd->closure.weight; + } + else { + if(N_) *N_ = sd->N; + if(texture_blur_) *texture_blur_ = 0.0f; + + return make_float3(0.0f, 0.0f, 0.0f); + } +#endif +} + /* Emission */ __device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure *sc) diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index 4fae961512e..8f5bcdf06e2 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -18,35 +18,18 @@ CCL_NAMESPACE_BEGIN -#define BSSRDF_MULTI_EVAL -#define BSSRDF_SKIP_NO_HIT - -__device float bssrdf_sample_distance(KernelGlobals *kg, float radius, float refl, float u) -{ - int table_offset = kernel_data.bssrdf.table_offset; - float r = lookup_table_read_2D(kg, u, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE); - - return r*radius; -} +#include "closure/bssrdf.h" -#ifdef BSSRDF_MULTI_EVAL -__device float bssrdf_pdf(KernelGlobals *kg, float radius, float refl, float r) -{ - if(r >= radius) - return 0.0f; +/* NEW BSSRDF: See "BSSRDF Importance Sampling", SIGGRAPH 2013 */ - /* todo: when we use the real BSSRDF this will need to be divided by the maximum - * radius instead of the average radius */ - float t = r/radius; - - int table_offset = kernel_data.bssrdf.table_offset + BSSRDF_PDF_TABLE_OFFSET; - float pdf = lookup_table_read_2D(kg, t, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE); - - pdf /= radius; +/* TODO: + * - test using power heuristic for combing bssrdfs + * - try to reduce one sample model variance + * - possible shade all hits for progressive integrator + * - cubic and gaussian scale difference tweak + */ - return pdf; -} -#endif +#define BSSRDF_MULTI_EVAL __device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, ShaderData *sd, float *probability) { @@ -75,7 +58,6 @@ __device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, Shade /* use bssrdf */ r -= bsdf_sum; - sd->randb_closure = 0.0f; /* not needed anymore */ float sum = 0.0f; @@ -86,6 +68,8 @@ __device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, Shade sum += sc->sample_weight; if(r <= sum) { + sd->randb_closure = (r - (sum - sc->sample_weight))/sc->sample_weight; + #ifdef BSSRDF_MULTI_EVAL *probability = (bssrdf_sum > 0.0f)? (bsdf_sum + bssrdf_sum)/bssrdf_sum: 1.0f; #else @@ -97,12 +81,362 @@ __device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, Shade } /* should never happen */ + sd->randb_closure = 0.0f; *probability = 1.0f; return NULL; } +__device float3 subsurface_scatter_eval(ShaderData *sd, ShaderClosure *sc, float disk_r, float r, bool all) +{ #ifdef BSSRDF_MULTI_EVAL -__device float3 subsurface_scatter_multi_eval(KernelGlobals *kg, ShaderData *sd, bool hit, float refl, float *r, int num_r, bool all) + /* this is the veach one-sample model with balance heuristic, some pdf + * factors drop out when using balance heuristic weighting */ + float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f); + float pdf_sum = 0.0f; + float sample_weight_sum = 0.0f; + int num_bssrdf = 0; + + for(int i = 0; i < sd->num_closure; i++) { + sc = &sd->closure[i]; + + if(CLOSURE_IS_BSSRDF(sc->type)) { + float sample_weight = (all)? 1.0f: sc->sample_weight; + sample_weight_sum += sample_weight; + } + } + + float sample_weight_inv = 1.0f/sample_weight_sum; + + //printf("num closures %d\n", sd->num_closure); + + for(int i = 0; i < sd->num_closure; i++) { + sc = &sd->closure[i]; + + if(CLOSURE_IS_BSSRDF(sc->type)) { + /* in case of non-progressive integrate we sample all bssrdf's once, + * for progressive we pick one, so adjust pdf for that */ + float sample_weight = (all)? 1.0f: sc->sample_weight * sample_weight_inv; + + /* compute pdf */ + float pdf = bssrdf_pdf(sc, r); + float disk_pdf = bssrdf_pdf(sc, disk_r); + + /* TODO power heuristic is not working correct here */ + eval_sum += sc->weight*pdf; //*sample_weight*disk_pdf; + pdf_sum += sample_weight*disk_pdf; //*sample_weight*disk_pdf; + + num_bssrdf++; + } + } + + return (pdf_sum > 0.0f)? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f); +#else + float pdf = bssrdf_pdf(pick_sc, r); + float disk_pdf = bssrdf_pdf(pick_sc, disk_r); + + return pick_sc->weight * pdf / disk_pdf; +#endif +} + +/* replace closures with a single diffuse bsdf closure after scatter step */ +__device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, float3 weight, bool hit, float3 N) +{ + sd->flag &= ~SD_CLOSURE_FLAGS; + sd->randb_closure = 0.0f; + + if(hit) { + ShaderClosure *sc = &sd->closure[0]; + sd->num_closure = 1; + + sc->weight = weight; + sc->sample_weight = 1.0f; + sc->data0 = 0.0f; + sc->data1 = 0.0f; + sc->N = N; + sd->flag |= bsdf_diffuse_setup(sc); + + /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes + * can recognize it as not being a regular diffuse closure */ + sc->type = CLOSURE_BSDF_BSSRDF_ID; + } + else + sd->num_closure = 0; +} + +/* optionally do blurring of color and/or bump mapping, at the cost of a shader evaluation */ +__device float3 subsurface_color_pow(float3 color, float exponent) +{ + color = max(color, make_float3(0.0f, 0.0f, 0.0f)); + + if(exponent == 1.0f) { + /* nothing to do */ + } + else if(exponent == 0.5f) { + color.x = sqrtf(color.x); + color.y = sqrtf(color.y); + color.z = sqrtf(color.z); + } + else { + color.x = powf(color.x, exponent); + color.y = powf(color.y, exponent); + color.z = powf(color.z, exponent); + } + + return color; +} + +__device void subsurface_color_bump_blur(KernelGlobals *kg, ShaderData *out_sd, ShaderData *in_sd, int state_flag, float3 *eval, float3 *N) +{ + /* average color and texture blur at outgoing point */ + float texture_blur; + float3 out_color = shader_bssrdf_sum(out_sd, NULL, &texture_blur); + + /* do we have bump mapping? */ + bool bump = (out_sd->flag & SD_HAS_BSSRDF_BUMP) != 0; + + if(bump || texture_blur > 0.0f) { + /* average color and normal at incoming point */ + shader_eval_surface(kg, in_sd, 0.0f, state_flag, SHADER_CONTEXT_SSS); + float3 in_color = shader_bssrdf_sum(in_sd, (bump)? N: NULL, NULL); + + /* we simply divide out the average color and multiply with the average + * of the other one. we could try to do this per closure but it's quite + * tricky to match closures between shader evaluations, their number and + * order may change, this is simpler */ + if(texture_blur > 0.0f) { + out_color = subsurface_color_pow(out_color, texture_blur); + in_color = subsurface_color_pow(in_color, texture_blur); + + *eval *= safe_divide_color(in_color, out_color); + } + } +} + +/* subsurface scattering step, from a point on the surface to other nearby points on the same object */ +__device int subsurface_scatter_multi_step(KernelGlobals *kg, ShaderData *sd, ShaderData bssrdf_sd[BSSRDF_MAX_HITS], + int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all) +{ + /* pick random axis in local frame and point on disk */ + float3 disk_N, disk_T, disk_B; + float pick_pdf_N, pick_pdf_T, pick_pdf_B; + + disk_N = sd->Ng; + make_orthonormals(disk_N, &disk_T, &disk_B); + + if(disk_u < 0.5f) { + pick_pdf_N = 0.5f; + pick_pdf_T = 0.25f; + pick_pdf_B = 0.25f; + disk_u *= 2.0f; + } + else if(disk_u < 0.75f) { + float3 tmp = disk_N; + disk_N = disk_T; + disk_T = tmp; + pick_pdf_N = 0.25f; + pick_pdf_T = 0.5f; + pick_pdf_B = 0.25f; + disk_u = (disk_u - 0.5f)*4.0f; + } + else { + float3 tmp = disk_N; + disk_N = disk_B; + disk_B = tmp; + pick_pdf_N = 0.25f; + pick_pdf_T = 0.25f; + pick_pdf_B = 0.5f; + disk_u = (disk_u - 0.75f)*4.0f; + } + + /* sample point on disk */ + float phi = M_2PI_F * disk_u; + float disk_r = disk_v; + float disk_height; + + bssrdf_sample(sc, disk_r, &disk_r, &disk_height); + + float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B; + + /* create ray */ + Ray ray; + ray.P = sd->P + disk_N*disk_height + disk_P; + ray.D = -disk_N; + ray.t = 2.0f*disk_height; + ray.dP = sd->dP; + ray.dD = differential3_zero(); + ray.time = sd->time; + + /* intersect with the same object. if multiple intersections are found it + * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */ + Intersection isect[BSSRDF_MAX_HITS]; + uint num_hits = scene_intersect_subsurface(kg, &ray, isect, sd->object, lcg_state, BSSRDF_MAX_HITS); + + /* evaluate bssrdf */ + float3 eval = make_float3(0.0f, 0.0f, 0.0f); + int num_eval_hits = min(num_hits, BSSRDF_MAX_HITS); + + for(int hit = 0; hit < num_eval_hits; hit++) { + ShaderData *bsd = &bssrdf_sd[hit]; + + /* setup new shading point */ + *bsd = *sd; + shader_setup_from_subsurface(kg, bsd, &isect[hit], &ray); + + /* probability densities for local frame axes */ + float pdf_N = pick_pdf_N * fabsf(dot(disk_N, bsd->Ng)); + float pdf_T = pick_pdf_T * fabsf(dot(disk_T, bsd->Ng)); + float pdf_B = pick_pdf_B * fabsf(dot(disk_B, bsd->Ng)); + + /* multiple importance sample between 3 axes, power heuristic + * found to be slightly better than balance heuristic */ + float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B); + + /* real distance to sampled point */ + float r = len(bsd->P - sd->P); + + /* evaluate */ + float w = mis_weight / pdf_N; + if(num_hits > BSSRDF_MAX_HITS) + w *= num_hits/(float)BSSRDF_MAX_HITS; + eval = subsurface_scatter_eval(bsd, sc, disk_r, r, all) * w; + + /* optionally blur colors and bump mapping */ + float3 N = bsd->N; + subsurface_color_bump_blur(kg, sd, bsd, state_flag, &eval, &N); + + /* setup diffuse bsdf */ + subsurface_scatter_setup_diffuse_bsdf(bsd, eval, true, N); + } + + return num_eval_hits; +} + +/* subsurface scattering step, from a point on the surface to another nearby point on the same object */ +__device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, + int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all) +{ + float3 eval = make_float3(0.0f, 0.0f, 0.0f); + uint num_hits = 0; + + /* pick random axis in local frame and point on disk */ + float3 disk_N, disk_T, disk_B; + float pick_pdf_N, pick_pdf_T, pick_pdf_B; + + disk_N = sd->Ng; + make_orthonormals(disk_N, &disk_T, &disk_B); + + if(disk_u < 0.5f) { + pick_pdf_N = 0.5f; + pick_pdf_T = 0.25f; + pick_pdf_B = 0.25f; + disk_u *= 2.0f; + } + else if(disk_u < 0.75f) { + float3 tmp = disk_N; + disk_N = disk_T; + disk_T = tmp; + pick_pdf_N = 0.25f; + pick_pdf_T = 0.5f; + pick_pdf_B = 0.25f; + disk_u = (disk_u - 0.5f)*4.0f; + } + else { + float3 tmp = disk_N; + disk_N = disk_B; + disk_B = tmp; + pick_pdf_N = 0.25f; + pick_pdf_T = 0.25f; + pick_pdf_B = 0.5f; + disk_u = (disk_u - 0.75f)*4.0f; + } + + /* sample point on disk */ + float phi = M_2PI_F * disk_u; + float disk_r = disk_v; + float disk_height; + + bssrdf_sample(sc, disk_r, &disk_r, &disk_height); + + float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B; + + /* create ray */ + Ray ray; + ray.P = sd->P + disk_N*disk_height + disk_P; + ray.D = -disk_N; + ray.t = 2.0f*disk_height; + ray.dP = sd->dP; + ray.dD = differential3_zero(); + ray.time = sd->time; + + /* intersect with the same object. if multiple intersections are + * found it will randomly pick one of them */ + Intersection isect; + num_hits = scene_intersect_subsurface(kg, &ray, &isect, sd->object, lcg_state, 1); + + /* evaluate bssrdf */ + if(num_hits > 0) { + float3 origP = sd->P; + + /* setup new shading point */ + shader_setup_from_subsurface(kg, sd, &isect, &ray); + + /* probability densities for local frame axes */ + float pdf_N = pick_pdf_N * fabsf(dot(disk_N, sd->Ng)); + float pdf_T = pick_pdf_T * fabsf(dot(disk_T, sd->Ng)); + float pdf_B = pick_pdf_B * fabsf(dot(disk_B, sd->Ng)); + + /* multiple importance sample between 3 axes, power heuristic + * found to be slightly better than balance heuristic */ + float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B); + + /* real distance to sampled point */ + float r = len(sd->P - origP); + + /* evaluate */ + float w = (mis_weight * num_hits) / pdf_N; + eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w; + } + + /* optionally blur colors and bump mapping */ + float3 N = sd->N; + subsurface_color_bump_blur(kg, sd, sd, state_flag, &eval, &N); + + /* setup diffuse bsdf */ + subsurface_scatter_setup_diffuse_bsdf(sd, eval, (num_hits > 0), N); +} + + +/* OLD BSSRDF */ + +__device float old_bssrdf_sample_distance(KernelGlobals *kg, float radius, float refl, float u) +{ + int table_offset = kernel_data.bssrdf.table_offset; + float r = lookup_table_read_2D(kg, u, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE); + + return r*radius; +} + +#ifdef BSSRDF_MULTI_EVAL +__device float old_bssrdf_pdf(KernelGlobals *kg, float radius, float refl, float r) +{ + if(r >= radius) + return 0.0f; + + /* todo: when we use the real BSSRDF this will need to be divided by the maximum + * radius instead of the average radius */ + float t = r/radius; + + int table_offset = kernel_data.bssrdf.table_offset + BSSRDF_PDF_TABLE_OFFSET; + float pdf = lookup_table_read_2D(kg, t, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE); + + pdf /= radius; + + return pdf; +} +#endif + +#ifdef BSSRDF_MULTI_EVAL +__device float3 old_subsurface_scatter_multi_eval(KernelGlobals *kg, ShaderData *sd, bool hit, float refl, float *r, int num_r, bool all) { /* compute pdf */ float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f); @@ -119,7 +453,7 @@ __device float3 subsurface_scatter_multi_eval(KernelGlobals *kg, ShaderData *sd, /* compute pdf */ float pdf = 1.0f; for(int i = 0; i < num_r; i++) - pdf *= bssrdf_pdf(kg, sc->data0, refl, r[i]); + pdf *= old_bssrdf_pdf(kg, sc->data0, refl, r[i]); eval_sum += sc->weight*pdf; pdf_sum += sample_weight*pdf; @@ -148,31 +482,8 @@ __device float3 subsurface_scatter_multi_eval(KernelGlobals *kg, ShaderData *sd, } #endif -/* replace closures with a single diffuse bsdf closure after scatter step */ -__device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, float3 weight) -{ - ShaderClosure *sc = &sd->closure[0]; - sd->num_closure = 1; - - sc->weight = weight; - sc->sample_weight = 1.0f; - sc->data0 = 0.0f; - sc->data1 = 0.0f; - sc->N = sd->N; - sd->flag &= ~SD_CLOSURE_FLAGS; - sd->flag |= bsdf_diffuse_setup(sc); - sd->randb_closure = 0.0f; - - /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes - * can recognize it as not being a regular diffuse closure */ - sc->type = CLOSURE_BSDF_BSSRDF_ID; - - /* todo: evaluate shading to get blurred textures and bump mapping */ - /* shader_eval_surface(kg, sd, 0.0f, state_flag, SHADER_CONTEXT_SSS); */ -} - /* subsurface scattering step, from a point on the surface to another nearby point on the same object */ -__device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int state_flag, ShaderClosure *sc, uint *lcg_state, bool all) +__device void old_subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int state_flag, ShaderClosure *sc, uint *lcg_state, bool all) { float radius = sc->data0; float refl = max(average(sc->weight)*3.0f, 0.0f); @@ -187,14 +498,13 @@ __device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int sta /* attempt to find a hit a given number of times before giving up */ for(num_attempts = 0; num_attempts < kernel_data.bssrdf.num_attempts; num_attempts++) { /* random numbers for sampling */ - float u1 = lcg_step(lcg_state); - float u2 = lcg_step(lcg_state); - float u3 = lcg_step(lcg_state); - float u4 = lcg_step(lcg_state); - float u5 = lcg_step(lcg_state); - float u6 = lcg_step(lcg_state); - - r = bssrdf_sample_distance(kg, radius, refl, u5); + float u1 = lcg_step_float(lcg_state); + float u2 = lcg_step_float(lcg_state); + float u3 = lcg_step_float(lcg_state); + float u4 = lcg_step_float(lcg_state); + float u5 = lcg_step_float(lcg_state); + + r = old_bssrdf_sample_distance(kg, radius, refl, u5); #ifdef BSSRDF_MULTI_EVAL r_attempts[num_attempts] = r; #endif @@ -213,7 +523,7 @@ __device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int sta /* intersect with the same object. if multiple intersections are * found it will randomly pick one of them */ Intersection isect; - if(!scene_intersect_subsurface(kg, &ray, &isect, sd->object, u6)) + if(scene_intersect_subsurface(kg, &ray, &isect, sd->object, lcg_state, 1) == 0) continue; /* setup new shading point */ @@ -226,18 +536,32 @@ __device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int sta /* evaluate subsurface scattering closures */ #ifdef BSSRDF_MULTI_EVAL - weight *= subsurface_scatter_multi_eval(kg, sd, hit, refl, r_attempts, num_attempts, all); + weight *= old_subsurface_scatter_multi_eval(kg, sd, hit, refl, r_attempts, num_attempts, all); #else weight *= sc->weight; #endif -#ifdef BSSRDF_SKIP_NO_HIT if(!hit) weight = make_float3(0.0f, 0.0f, 0.0f); -#endif + + /* optionally blur colors and bump mapping */ + float3 N = sd->N; + subsurface_color_bump_blur(kg, sd, sd, state_flag, &weight, &N); /* replace closures with a single diffuse BSDF */ - subsurface_scatter_setup_diffuse_bsdf(sd, weight); + subsurface_scatter_setup_diffuse_bsdf(sd, weight, hit, N); +} + +__device bool old_subsurface_scatter_use(ShaderData *sd) +{ + for(int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; + + if(sc->type == CLOSURE_BSSRDF_COMPATIBLE_ID) + return true; + } + + return false; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 3008698313e..3421ba44007 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -43,6 +43,7 @@ CCL_NAMESPACE_BEGIN #define BSSRDF_LOOKUP_TABLE_SIZE (BSSRDF_RADIUS_TABLE_SIZE*BSSRDF_REFL_TABLE_SIZE*2) #define BSSRDF_MIN_RADIUS 1e-8f #define BSSRDF_MAX_ATTEMPTS 8 +#define BSSRDF_MAX_HITS 4 #define BB_DRAPPER 800.0f #define BB_MAX_TABLE_RANGE 12000.0f @@ -214,12 +215,13 @@ enum PathRayFlag { PATH_RAY_SHADOW_TRANSPARENT = 256, PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE|PATH_RAY_SHADOW_TRANSPARENT), - PATH_RAY_MIS_SKIP = 512, + PATH_RAY_CURVE = 512, /* visibility flag to define curve segments*/ - PATH_RAY_ALL = (1|2|4|8|16|32|64|128|256|512), + PATH_RAY_ALL_VISIBILITY = (1|2|4|8|16|32|64|128|256|512), - /* visibility flag to define curve segments*/ - PATH_RAY_CURVE = 1024, + PATH_RAY_MIS_SKIP = 1024, + PATH_RAY_DIFFUSE_ANCESTOR = 2048, + PATH_RAY_GLOSSY_ANCESTOR = 4096, /* this gives collisions with localview bits * see: blender_util.h, grr - Campbell */ @@ -507,11 +509,12 @@ enum ShaderDataFlag { SD_HAS_TRANSPARENT_SHADOW = 1024, /* has transparent shadow */ SD_HAS_VOLUME = 2048, /* has volume shader */ SD_HOMOGENEOUS_VOLUME = 4096, /* has homogeneous volume */ + SD_HAS_BSSRDF_BUMP = 8192, /* bssrdf normal uses bump */ /* object flags */ - SD_HOLDOUT_MASK = 8192, /* holdout for camera rays */ - SD_OBJECT_MOTION = 16384, /* has object motion blur */ - SD_TRANSFORM_APPLIED = 32768 /* vertices have transform applied */ + SD_HOLDOUT_MASK = 16384, /* holdout for camera rays */ + SD_OBJECT_MOTION = 32768, /* has object motion blur */ + SD_TRANSFORM_APPLIED = 65536 /* vertices have transform applied */ }; struct KernelGlobals; diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp index ba9b13126ac..7405b0be567 100644 --- a/intern/cycles/kernel/osl/osl_bssrdf.cpp +++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp @@ -47,18 +47,59 @@ CCL_NAMESPACE_BEGIN using namespace OSL; -class BSSRDFClosure : public CBSSRDFClosure { +/* Cubic */ + +class CubicBSSRDFClosure : public CBSSRDFClosure { public: size_t memsize() const { return sizeof(*this); } const char *name() const { return "bssrdf_cubic"; } void setup() { + sc.type = CLOSURE_BSSRDF_COMPATIBLE_ID; sc.prim = NULL; sc.data0 = fabsf(average(radius)); - sc.data1 = 1.3f; + sc.data1 = 0.0f; // XXX texture blur + } + + bool mergeable(const ClosurePrimitive *other) const + { + return false; + } - m_shaderdata_flag = bssrdf_setup(&sc); + void print_on(std::ostream &out) const + { + out << name() << " ((" << sc.N[0] << ", " << sc.N[1] << ", " << sc.N[2] << "))"; + } +}; + +ClosureParam *closure_bssrdf_cubic_params() +{ + static ClosureParam params[] = { + CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, sc.N), + CLOSURE_FLOAT3_PARAM(CubicBSSRDFClosure, radius), + //CLOSURE_FLOAT_PARAM(CubicBSSRDFClosure, sc.data1), + CLOSURE_STRING_KEYPARAM("label"), + CLOSURE_FINISH_PARAM(CubicBSSRDFClosure) + }; + return params; +} + +CLOSURE_PREPARE(closure_bssrdf_cubic_prepare, CubicBSSRDFClosure) + +/* Gaussian */ + +class GaussianBSSRDFClosure : public CBSSRDFClosure { +public: + size_t memsize() const { return sizeof(*this); } + const char *name() const { return "bssrdf_gaussian"; } + + void setup() + { + sc.type = CLOSURE_BSSRDF_GAUSSIAN_ID; + sc.prim = NULL; + sc.data0 = fabsf(average(radius)); + sc.data1 = 0.0f; // XXX texture blurring! } bool mergeable(const ClosurePrimitive *other) const @@ -72,19 +113,19 @@ public: } }; -ClosureParam *closure_bssrdf_params() +ClosureParam *closure_bssrdf_gaussian_params() { static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(BSSRDFClosure, sc.N), - CLOSURE_FLOAT3_PARAM(BSSRDFClosure, radius), - //CLOSURE_FLOAT_PARAM(BSSRDFClosure, sc.data1), + CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, sc.N), + CLOSURE_FLOAT3_PARAM(GaussianBSSRDFClosure, radius), + //CLOSURE_FLOAT_PARAM(GaussianBSSRDFClosure, sc.data1), CLOSURE_STRING_KEYPARAM("label"), - CLOSURE_FINISH_PARAM(BSSRDFClosure) + CLOSURE_FINISH_PARAM(GaussianBSSRDFClosure) }; return params; } -CLOSURE_PREPARE(closure_bssrdf_prepare, BSSRDFClosure) +CLOSURE_PREPARE(closure_bssrdf_gaussian_prepare, GaussianBSSRDFClosure) CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/osl_bssrdf.h b/intern/cycles/kernel/osl/osl_bssrdf.h index 54df055405e..ee9fc7c4ac5 100644 --- a/intern/cycles/kernel/osl/osl_bssrdf.h +++ b/intern/cycles/kernel/osl/osl_bssrdf.h @@ -48,15 +48,10 @@ public: ShaderClosure sc; float3 radius; - CBSSRDFClosure() : OSL::ClosurePrimitive(BSSRDF), - m_shaderdata_flag(0) { } + CBSSRDFClosure() : OSL::ClosurePrimitive(BSSRDF) { } ~CBSSRDFClosure() { } int scattering() const { return LABEL_DIFFUSE; } - int shaderdata_flag() const { return m_shaderdata_flag; } - -protected: - int m_shaderdata_flag; }; CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index b1549e95920..c03e50d4313 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -218,7 +218,9 @@ void OSLShader::register_closures(OSLShadingSystem *ss_) register_closure(ss, "phong_ramp", id++, closure_bsdf_phong_ramp_params(), closure_bsdf_phong_ramp_prepare); register_closure(ss, "bssrdf_cubic", id++, - closure_bssrdf_params(), closure_bssrdf_prepare); + closure_bssrdf_cubic_params(), closure_bssrdf_cubic_prepare); + register_closure(ss, "bssrdf_gaussian", id++, + closure_bssrdf_gaussian_params(), closure_bssrdf_gaussian_prepare); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h index ca5f441aa2d..e3a7e890597 100644 --- a/intern/cycles/kernel/osl/osl_closures.h +++ b/intern/cycles/kernel/osl/osl_closures.h @@ -50,7 +50,8 @@ OSL::ClosureParam *closure_bsdf_diffuse_ramp_params(); OSL::ClosureParam *closure_bsdf_phong_ramp_params(); OSL::ClosureParam *closure_westin_backscatter_params(); OSL::ClosureParam *closure_westin_sheen_params(); -OSL::ClosureParam *closure_bssrdf_params(); +OSL::ClosureParam *closure_bssrdf_cubic_params(); +OSL::ClosureParam *closure_bssrdf_gaussian_params(); void closure_emission_prepare(OSL::RendererServices *, int id, void *data); void closure_background_prepare(OSL::RendererServices *, int id, void *data); @@ -60,7 +61,8 @@ void closure_bsdf_diffuse_ramp_prepare(OSL::RendererServices *, int id, void *da void closure_bsdf_phong_ramp_prepare(OSL::RendererServices *, int id, void *data); void closure_westin_backscatter_prepare(OSL::RendererServices *, int id, void *data); void closure_westin_sheen_prepare(OSL::RendererServices *, int id, void *data); -void closure_bssrdf_prepare(OSL::RendererServices *, int id, void *data); +void closure_bssrdf_cubic_prepare(OSL::RendererServices *, int id, void *data); +void closure_bssrdf_gaussian_prepare(OSL::RendererServices *, int id, void *data); enum { AmbientOcclusion = 100 @@ -89,7 +91,8 @@ public: ShaderClosure sc; CBSDFClosure(int scattering) : OSL::ClosurePrimitive(BSDF), - m_scattering_label(scattering), m_shaderdata_flag(0) { } + m_scattering_label(scattering), m_shaderdata_flag(0) + { memset(&sc, 0, sizeof(sc)); } ~CBSDFClosure() { } int scattering() const { return m_scattering_label; } diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp index dedda1dc10e..23be0acb4d3 100644 --- a/intern/cycles/kernel/osl/osl_shader.cpp +++ b/intern/cycles/kernel/osl/osl_shader.cpp @@ -17,10 +17,14 @@ */ #include "kernel_compat_cpu.h" +#include "kernel_montecarlo.h" #include "kernel_types.h" #include "kernel_globals.h" #include "kernel_object.h" +#include "closure/bsdf_diffuse.h" +#include "closure/bssrdf.h" + #include "osl_bssrdf.h" #include "osl_closures.h" #include "osl_globals.h" @@ -136,7 +140,7 @@ static void shaderdata_to_shaderglobals(KernelGlobals *kg, ShaderData *sd, /* Surface */ -static void flatten_surface_closure_tree(ShaderData *sd, bool no_glossy, +static void flatten_surface_closure_tree(ShaderData *sd, int path_flag, const OSL::ClosureColor *closure, float3 weight = make_float3(1.0f, 1.0f, 1.0f)) { /* OSL gives us a closure tree, we flatten it into arrays per @@ -156,8 +160,11 @@ static void flatten_surface_closure_tree(ShaderData *sd, bool no_glossy, int scattering = bsdf->scattering(); /* no caustics option */ - if (no_glossy && scattering == LABEL_GLOSSY) - return; + if(scattering == LABEL_GLOSSY && (path_flag & PATH_RAY_DIFFUSE)) { + KernelGlobals *kg = sd->osl_globals; + if(kernel_data.integrator.no_caustics) + return; + } /* sample weight */ float sample_weight = fabsf(average(weight)); @@ -230,26 +237,32 @@ static void flatten_surface_closure_tree(ShaderData *sd, bool no_glossy, sc.data1 = bssrdf->sc.data1; sc.prim = NULL; + /* disable in case of diffuse ancestor, can't see it well then and + * adds considerably noise due to probabilities of continuing path + * getting lower and lower */ + if(sc.type != CLOSURE_BSSRDF_COMPATIBLE_ID && (path_flag & PATH_RAY_DIFFUSE_ANCESTOR)) + bssrdf->radius = make_float3(0.0f, 0.0f, 0.0f); + /* create one closure for each color channel */ if(fabsf(weight.x) > 0.0f) { sc.weight = make_float3(weight.x, 0.0f, 0.0f); sc.data0 = bssrdf->radius.x; + sd->flag |= bssrdf_setup(&sc, sc.type); sd->closure[sd->num_closure++] = sc; - sd->flag |= bssrdf->shaderdata_flag(); } if(fabsf(weight.y) > 0.0f) { sc.weight = make_float3(0.0f, weight.y, 0.0f); sc.data0 = bssrdf->radius.y; + sd->flag |= bssrdf_setup(&sc, sc.type); sd->closure[sd->num_closure++] = sc; - sd->flag |= bssrdf->shaderdata_flag(); } if(fabsf(weight.z) > 0.0f) { sc.weight = make_float3(0.0f, 0.0f, weight.z); sc.data0 = bssrdf->radius.z; + sd->flag |= bssrdf_setup(&sc, sc.type); sd->closure[sd->num_closure++] = sc; - sd->flag |= bssrdf->shaderdata_flag(); } } break; @@ -264,12 +277,12 @@ static void flatten_surface_closure_tree(ShaderData *sd, bool no_glossy, } else if (closure->type == OSL::ClosureColor::MUL) { OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; - flatten_surface_closure_tree(sd, no_glossy, mul->closure, TO_FLOAT3(mul->weight) * weight); + flatten_surface_closure_tree(sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight); } else if (closure->type == OSL::ClosureColor::ADD) { OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; - flatten_surface_closure_tree(sd, no_glossy, add->closureA, weight); - flatten_surface_closure_tree(sd, no_glossy, add->closureB, weight); + flatten_surface_closure_tree(sd, path_flag, add->closureA, weight); + flatten_surface_closure_tree(sd, path_flag, add->closureB, weight); } } @@ -292,10 +305,8 @@ void OSLShader::eval_surface(KernelGlobals *kg, ShaderData *sd, float randb, int sd->num_closure = 0; sd->randb_closure = randb; - if (globals->Ci) { - bool no_glossy = (path_flag & PATH_RAY_DIFFUSE) && kernel_data.integrator.no_caustics; - flatten_surface_closure_tree(sd, no_glossy, globals->Ci); - } + if (globals->Ci) + flatten_surface_closure_tree(sd, path_flag, globals->Ci); } /* Background */ diff --git a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl index 5c25c44ec8f..eb21a5f69bd 100644 --- a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl +++ b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl @@ -22,12 +22,14 @@ shader node_subsurface_scattering( color Color = 0.8, float Scale = 1.0, vector Radius = vector(0.1, 0.1, 0.1), - float IOR = 1.3, + float TextureBlur = 0.0, // XXX use + string Falloff = "Cubic", normal Normal = N, output closure color BSSRDF = 0) { - float eta = max(IOR, 1.0 + 1e-5); - - BSSRDF = Color * bssrdf_cubic(N, Scale * Radius); + if(Falloff == "Cubic") + BSSRDF = Color * bssrdf_cubic(N, Scale * Radius); + else if(Falloff == "Gaussian") + BSSRDF = Color * bssrdf_gaussian(N, Scale * Radius); } diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h index 24c3e187783..7d1c2443ee7 100644 --- a/intern/cycles/kernel/shaders/stdosl.h +++ b/intern/cycles/kernel/shaders/stdosl.h @@ -463,7 +463,10 @@ closure color emission() BUILTIN; closure color background() BUILTIN; closure color holdout() BUILTIN; closure color ambient_occlusion() BUILTIN; + +// BSSRDF closure color bssrdf_cubic(normal N, vector radius) BUILTIN; +closure color bssrdf_gaussian(normal N, vector radius) BUILTIN; // Backwards compatibility diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index 847195134e8..bd4a2d781eb 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -340,28 +340,36 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st break; } #ifdef __SUBSURFACE__ - case CLOSURE_BSSRDF_ID: { + case CLOSURE_BSSRDF_COMPATIBLE_ID: + case CLOSURE_BSSRDF_CUBIC_ID: + case CLOSURE_BSSRDF_GAUSSIAN_ID: { ShaderClosure *sc = &sd->closure[sd->num_closure]; float3 weight = sc->weight * mix_weight; float sample_weight = fabsf(average(weight)); + + /* disable in case of diffuse ancestor, can't see it well then and + * adds considerably noise due to probabilities of continuing path + * getting lower and lower */ + if(type != CLOSURE_BSSRDF_COMPATIBLE_ID && (path_flag & PATH_RAY_DIFFUSE_ANCESTOR)) + param1 = 0.0f; if(sample_weight > 1e-5f && sd->num_closure+2 < MAX_CLOSURE) { /* radius * scale */ float3 radius = stack_load_float3(stack, data_node.w)*param1; - /* index of refraction */ - float eta = fmaxf(param2, 1.0f + 1e-5f); + /* texture color blur */ + float texture_blur = param2; /* create one closure per color channel */ if(fabsf(weight.x) > 0.0f) { sc->weight = make_float3(weight.x, 0.0f, 0.0f); sc->sample_weight = sample_weight; sc->data0 = radius.x; - sc->data1 = eta; + sc->data1 = texture_blur; #ifdef __OSL__ sc->prim = NULL; #endif sc->N = N; - sd->flag |= bssrdf_setup(sc); + sd->flag |= bssrdf_setup(sc, (ClosureType)type); sd->num_closure++; sc++; @@ -371,12 +379,12 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st sc->weight = make_float3(0.0f, weight.y, 0.0f); sc->sample_weight = sample_weight; sc->data0 = radius.y; - sc->data1 = eta; + sc->data1 = texture_blur; #ifdef __OSL__ sc->prim = NULL; #endif sc->N = N; - sd->flag |= bssrdf_setup(sc); + sd->flag |= bssrdf_setup(sc, (ClosureType)type); sd->num_closure++; sc++; @@ -386,12 +394,12 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st sc->weight = make_float3(0.0f, 0.0f, weight.z); sc->sample_weight = sample_weight; sc->data0 = radius.z; - sc->data1 = eta; + sc->data1 = texture_blur; #ifdef __OSL__ sc->prim = NULL; #endif sc->N = N; - sd->flag |= bssrdf_setup(sc); + sd->flag |= bssrdf_setup(sc, (ClosureType)type); sd->num_closure++; sc++; diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h index 939decf80a9..37ed5ead49f 100644 --- a/intern/cycles/kernel/svm/svm_types.h +++ b/intern/cycles/kernel/svm/svm_types.h @@ -369,8 +369,12 @@ typedef enum ClosureType { CLOSURE_BSDF_BSSRDF_ID, CLOSURE_BSDF_TRANSPARENT_ID, + /* BSSRDF */ + CLOSURE_BSSRDF_COMPATIBLE_ID, + CLOSURE_BSSRDF_CUBIC_ID, + CLOSURE_BSSRDF_GAUSSIAN_ID, + /* Other */ - CLOSURE_BSSRDF_ID, CLOSURE_EMISSION_ID, CLOSURE_DEBUG_ID, CLOSURE_BACKGROUND_ID, @@ -391,7 +395,7 @@ typedef enum ClosureType { #define CLOSURE_IS_BSDF_GLOSSY(type) (type >= CLOSURE_BSDF_GLOSSY_ID && type <= CLOSURE_BSDF_GLOSSY_TOON_ID) #define CLOSURE_IS_BSDF_TRANSMISSION(type) (type >= CLOSURE_BSDF_TRANSMISSION_ID && type <= CLOSURE_BSDF_SHARP_GLASS_ID) #define CLOSURE_IS_BSDF_BSSRDF(type) (type == CLOSURE_BSDF_BSSRDF_ID) -#define CLOSURE_IS_BSSRDF(type) (type == CLOSURE_BSSRDF_ID) +#define CLOSURE_IS_BSSRDF(type) (type >= CLOSURE_BSSRDF_COMPATIBLE_ID && type <= CLOSURE_BSSRDF_GAUSSIAN_ID) #define CLOSURE_IS_VOLUME(type) (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_ISOTROPIC_ID) #define CLOSURE_IS_EMISSION(type) (type == CLOSURE_EMISSION_ID) #define CLOSURE_IS_HOLDOUT(type) (type == CLOSURE_HOLDOUT_ID) diff --git a/intern/cycles/render/bssrdf.cpp b/intern/cycles/render/bssrdf.cpp index 8ec3c6a1384..bba4e6e9df3 100644 --- a/intern/cycles/render/bssrdf.cpp +++ b/intern/cycles/render/bssrdf.cpp @@ -25,11 +25,16 @@ #include "kernel_types.h" #include "kernel_montecarlo.h" -#include "closure/bsdf_diffuse.h" -#include "closure/bssrdf.h" - CCL_NAMESPACE_BEGIN +static float bssrdf_cubic(float ld, float r) +{ + if(ld == 0.0f) + return (r == 0.0f)? 1.0f: 0.0f; + + return powf(ld - min(r, ld), 3.0f) * 4.0f/powf(ld, 4.0f); +} + /* Cumulative density function utilities */ static float cdf_lookup_inverse(const vector<float>& table, float2 range, float x) @@ -61,25 +66,19 @@ static void cdf_invert(vector<float>& to, float2 to_range, const vector<float>& /* BSSRDF */ -static float bssrdf_lookup_table_max_radius(const BSSRDFParams *ss) -{ - /* todo: adjust when we use the real BSSRDF */ - return ss->ld; -} - -static void bssrdf_lookup_table_create(const BSSRDFParams *ss, vector<float>& sample_table, vector<float>& pdf_table) +static void bssrdf_lookup_table_create(float ld, vector<float>& sample_table, vector<float>& pdf_table) { const int size = BSSRDF_RADIUS_TABLE_SIZE; vector<float> cdf(size); vector<float> pdf(size); float step = 1.0f/(float)(size - 1); - float max_radius = bssrdf_lookup_table_max_radius(ss); + float max_radius = ld; float pdf_sum = 0.0f; /* compute the probability density function */ for(int i = 0; i < pdf.size(); i++) { float x = (i*step)*max_radius; - pdf[i] = bssrdf_cubic(ss->ld, x); + pdf[i] = bssrdf_cubic(ld, x); pdf_sum += pdf[i]; } @@ -124,13 +123,9 @@ void bssrdf_table_build(vector<float>& table) /* create a 2D lookup table, for reflection x sample radius */ for(int i = 0; i < BSSRDF_REFL_TABLE_SIZE; i++) { - float refl = (float)i/(float)(BSSRDF_REFL_TABLE_SIZE-1); - float ior = 1.3f; float radius = 1.0f; - BSSRDFParams ss; - bssrdf_setup_params(&ss, refl, radius, ior); - bssrdf_lookup_table_create(&ss, sample_table, pdf_table); + bssrdf_lookup_table_create(radius, sample_table, pdf_table); memcpy(&table[i*BSSRDF_RADIUS_TABLE_SIZE], &sample_table[0], BSSRDF_RADIUS_TABLE_SIZE*sizeof(float)); memcpy(&table[BSSRDF_PDF_TABLE_OFFSET + i*BSSRDF_RADIUS_TABLE_SIZE], &pdf_table[0], BSSRDF_RADIUS_TABLE_SIZE*sizeof(float)); diff --git a/intern/cycles/render/graph.h b/intern/cycles/render/graph.h index da8ed987346..df361cde2b4 100644 --- a/intern/cycles/render/graph.h +++ b/intern/cycles/render/graph.h @@ -76,7 +76,8 @@ enum ShaderNodeSpecialType { SHADER_SPECIAL_TYPE_NONE, SHADER_SPECIAL_TYPE_PROXY, SHADER_SPECIAL_TYPE_MIX_CLOSURE, - SHADER_SPECIAL_TYPE_AUTOCONVERT + SHADER_SPECIAL_TYPE_AUTOCONVERT, + SHADER_SPECIAL_TYPE_GEOMETRY }; /* Enum @@ -190,6 +191,7 @@ public: virtual bool has_surface_transparent() { return false; } virtual bool has_surface_bssrdf() { return false; } virtual bool has_converter_blackbody() { return false; } + virtual bool has_bssrdf_bump() { return false; } vector<ShaderInput*> inputs; vector<ShaderOutput*> outputs; diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index db402c5fc9f..70fa30fe03b 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -1276,16 +1276,18 @@ void ProxyNode::compile(OSLCompiler& compiler) BsdfNode::BsdfNode(bool scattering_) : ShaderNode("bsdf"), scattering(scattering_) { - closure = ccl::CLOSURE_BSSRDF_ID; - add_input("Color", SHADER_SOCKET_COLOR, make_float3(0.8f, 0.8f, 0.8f)); add_input("Normal", SHADER_SOCKET_NORMAL, ShaderInput::NORMAL); add_input("SurfaceMixWeight", SHADER_SOCKET_FLOAT, 0.0f, ShaderInput::USE_SVM); - if(scattering) + if(scattering) { + closure = CLOSURE_BSSRDF_CUBIC_ID; add_output("BSSRDF", SHADER_SOCKET_CLOSURE); - else + } + else { + closure = CLOSURE_BSDF_DIFFUSE_ID; add_output("BSDF", SHADER_SOCKET_CLOSURE); + } } void BsdfNode::compile(SVMCompiler& compiler, ShaderInput *param1, ShaderInput *param2, ShaderInput *param3) @@ -1600,27 +1602,47 @@ void TransparentBsdfNode::compile(OSLCompiler& compiler) /* Subsurface Scattering Closure */ +static ShaderEnum subsurface_falloff_init() +{ + ShaderEnum enm; + + enm.insert("Cubic", CLOSURE_BSSRDF_CUBIC_ID); + enm.insert("Gaussian", CLOSURE_BSSRDF_GAUSSIAN_ID); + + return enm; +} + +ShaderEnum SubsurfaceScatteringNode::falloff_enum = subsurface_falloff_init(); + SubsurfaceScatteringNode::SubsurfaceScatteringNode() : BsdfNode(true) { name = "subsurface_scattering"; - closure = CLOSURE_BSSRDF_ID; + closure = CLOSURE_BSSRDF_CUBIC_ID; add_input("Scale", SHADER_SOCKET_FLOAT, 0.01f); add_input("Radius", SHADER_SOCKET_VECTOR, make_float3(0.1f, 0.1f, 0.1f)); - add_input("IOR", SHADER_SOCKET_FLOAT, 1.3f); + add_input("Texture Blur", SHADER_SOCKET_FLOAT, 1.0f); } void SubsurfaceScatteringNode::compile(SVMCompiler& compiler) { - BsdfNode::compile(compiler, input("Scale"), input("IOR"), input("Radius")); + BsdfNode::compile(compiler, input("Scale"), input("Texture Blur"), input("Radius")); } void SubsurfaceScatteringNode::compile(OSLCompiler& compiler) { + compiler.parameter("Falloff", falloff_enum[closure]); compiler.add(this, "node_subsurface_scattering"); } +bool SubsurfaceScatteringNode::has_bssrdf_bump() +{ + /* detect if anything is plugged into the normal input besides the default */ + ShaderInput *normal_in = input("Normal"); + return (normal_in->link && normal_in->link->parent->special_type != SHADER_SPECIAL_TYPE_GEOMETRY); +} + /* Emissive Closure */ EmissionNode::EmissionNode() @@ -1835,6 +1857,8 @@ void IsotropicVolumeNode::compile(OSLCompiler& compiler) GeometryNode::GeometryNode() : ShaderNode("geometry") { + special_type = SHADER_SPECIAL_TYPE_GEOMETRY; + add_input("NormalIn", SHADER_SOCKET_NORMAL, ShaderInput::NORMAL, ShaderInput::USE_OSL); add_output("Position", SHADER_SOCKET_POINT); add_output("Normal", SHADER_SOCKET_NORMAL); diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h index 46b426ea20b..ce7942eaae5 100644 --- a/intern/cycles/render/nodes.h +++ b/intern/cycles/render/nodes.h @@ -271,6 +271,9 @@ class SubsurfaceScatteringNode : public BsdfNode { public: SHADER_NODE_CLASS(SubsurfaceScatteringNode) bool has_surface_bssrdf() { return true; } + bool has_bssrdf_bump(); + + static ShaderEnum falloff_enum; }; class EmissionNode : public ShaderNode { diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp index 3f269f44abe..291827f6f41 100644 --- a/intern/cycles/render/osl.cpp +++ b/intern/cycles/render/osl.cpp @@ -201,11 +201,16 @@ void OSLShaderManager::shading_system_init() "reflection", /* PATH_RAY_REFLECT */ "refraction", /* PATH_RAY_TRANSMIT */ "diffuse", /* PATH_RAY_DIFFUSE */ - "gloss_sharedy", /* PATH_RAY_GLOSSY */ + "glossy", /* PATH_RAY_GLOSSY */ "singular", /* PATH_RAY_SINGULAR */ "transparent", /* PATH_RAY_TRANSPARENT */ "shadow", /* PATH_RAY_SHADOW_OPAQUE */ "shadow", /* PATH_RAY_SHADOW_TRANSPARENT */ + + "__unused__", + "__unused__", + "diffuse_ancestor", /* PATH_RAY_DIFFUSE_ANCESTOR */ + "glossy_ancestor", /* PATH_RAY_GLOSSY_ANCESTOR */ }; const int nraytypes = sizeof(raytypes)/sizeof(raytypes[0]); @@ -543,8 +548,10 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath) current_shader->has_surface_emission = true; if(info->has_surface_transparent) current_shader->has_surface_transparent = true; - if(info->has_surface_bssrdf) + if(info->has_surface_bssrdf) { current_shader->has_surface_bssrdf = true; + current_shader->has_bssrdf_bump = true; /* can't detect yet */ + } } } @@ -705,8 +712,11 @@ void OSLCompiler::generate_nodes(const set<ShaderNode*>& nodes) current_shader->has_surface_emission = true; if(node->has_surface_transparent()) current_shader->has_surface_transparent = true; - if(node->has_surface_bssrdf()) + if(node->has_surface_bssrdf()) { current_shader->has_surface_bssrdf = true; + if(node->has_bssrdf_bump()) + current_shader->has_bssrdf_bump = true; + } } else nodes_done = false; @@ -773,6 +783,7 @@ void OSLCompiler::compile(OSLGlobals *og, Shader *shader) shader->has_surface_emission = false; shader->has_surface_transparent = false; shader->has_surface_bssrdf = false; + shader->has_bssrdf_bump = false; shader->has_volume = false; shader->has_displacement = false; diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index 5b326e0a017..75b3b193e76 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -55,6 +55,7 @@ Shader::Shader() has_converter_blackbody = false; has_volume = false; has_displacement = false; + has_bssrdf_bump = false; used = false; @@ -236,11 +237,19 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc flag |= SD_HOMOGENEOUS_VOLUME; if(shader->has_surface_bssrdf) has_surface_bssrdf = true; + if(shader->has_bssrdf_bump) + flag |= SD_HAS_BSSRDF_BUMP; if(shader->has_converter_blackbody) has_converter_blackbody = true; + /* regular shader */ shader_flag[i++] = flag; shader_flag[i++] = shader->pass_id; + + /* shader with bump mapping */ + if(shader->graph_bump) + flag |= SD_HAS_BSSRDF_BUMP; + shader_flag[i++] = flag; shader_flag[i++] = shader->pass_id; } diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h index d7eac603fa6..146b94c9ef5 100644 --- a/intern/cycles/render/shader.h +++ b/intern/cycles/render/shader.h @@ -78,6 +78,7 @@ public: bool has_displacement; bool has_surface_bssrdf; bool has_converter_blackbody; + bool has_bssrdf_bump; /* requested mesh attributes */ AttributeRequestSet attributes; diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp index 4e617155465..9580823d141 100644 --- a/intern/cycles/render/svm.cpp +++ b/intern/cycles/render/svm.cpp @@ -495,8 +495,11 @@ void SVMCompiler::generate_closure(ShaderNode *node, set<ShaderNode*>& done) current_shader->has_surface_emission = true; if(node->has_surface_transparent()) current_shader->has_surface_transparent = true; - if(node->has_surface_bssrdf()) + if(node->has_surface_bssrdf()) { current_shader->has_surface_bssrdf = true; + if(node->has_bssrdf_bump()) + current_shader->has_bssrdf_bump = true; + } /* end node is added outside of this */ } @@ -557,8 +560,11 @@ void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& don current_shader->has_surface_emission = true; if(node->has_surface_transparent()) current_shader->has_surface_transparent = true; - if(node->has_surface_bssrdf()) + if(node->has_surface_bssrdf()) { current_shader->has_surface_bssrdf = true; + if(node->has_bssrdf_bump()) + current_shader->has_bssrdf_bump = true; + } } done.insert(node); @@ -676,6 +682,7 @@ void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int in shader->has_surface_emission = false; shader->has_surface_transparent = false; shader->has_surface_bssrdf = false; + shader->has_bssrdf_bump = false; shader->has_converter_blackbody = false; shader->has_volume = false; shader->has_displacement = false; diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index cde547cd77c..32ce821624d 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -1078,6 +1078,19 @@ __device_inline float triangle_area(const float3 v1, const float3 v2, const floa __device_inline void make_orthonormals(const float3 N, float3 *a, float3 *b) { +#if 0 + if(fabsf(N.y) >= 0.999f) { + *a = make_float3(1, 0, 0); + *b = make_float3(0, 0, 1); + return; + } + if(fabsf(N.z) >= 0.999f) { + *a = make_float3(1, 0, 0); + *b = make_float3(0, 1, 0); + return; + } +#endif + if(N.x != N.y || N.x != N.z) *a = make_float3(N.z-N.y, N.x-N.z, N.y-N.x); //(1,1,1)x N else @@ -1161,6 +1174,11 @@ __device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle) /* NaN-safe math ops */ +__device_inline float safe_sqrtf(float f) +{ + return sqrtf(max(f, 0.0f)); +} + __device float safe_asinf(float a) { if(a <= -1.0f) |