diff options
Diffstat (limited to 'intern/cycles/kernel/closure/bssrdf.h')
-rw-r--r-- | intern/cycles/kernel/closure/bssrdf.h | 406 |
1 files changed, 43 insertions, 363 deletions
diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h index 562daf1286d..0f9278bba89 100644 --- a/intern/cycles/kernel/closure/bssrdf.h +++ b/intern/cycles/kernel/closure/bssrdf.h @@ -14,8 +14,7 @@ * limitations under the License. */ -#ifndef __KERNEL_BSSRDF_H__ -#define __KERNEL_BSSRDF_H__ +#pragma once CCL_NAMESPACE_BEGIN @@ -24,310 +23,71 @@ typedef ccl_addr_space struct Bssrdf { float3 radius; float3 albedo; - float sharpness; - float texture_blur; float roughness; - float channels; + float anisotropy; } Bssrdf; static_assert(sizeof(ShaderClosure) >= sizeof(Bssrdf), "Bssrdf is too large!"); -/* Planar Truncated Gaussian - * - * Note how this is different from the typical gaussian, this one integrates - * to 1 over the plane (where you get an extra 2*pi*x factor). We are lucky - * that integrating x*exp(-x) gives a nice closed form solution. */ - -/* paper suggests 1/12.46 which is much too small, suspect it's *12.46 */ -#define GAUSS_TRUNCATE 12.46f - -ccl_device float bssrdf_gaussian_eval(const float radius, float r) -{ - /* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm - * = 1 - exp(-Rm*Rm/(2*v)) */ - const float v = radius * radius * (0.25f * 0.25f); - const float Rm = sqrtf(v * GAUSS_TRUNCATE); - - if (r >= Rm) - return 0.0f; - - return expf(-r * r / (2.0f * v)) / (2.0f * M_PI_F * v); -} - -ccl_device float bssrdf_gaussian_pdf(const float radius, float r) +ccl_device float bssrdf_dipole_compute_Rd(float alpha_prime, float fourthirdA) { - /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */ - const float area_truncated = 1.0f - expf(-0.5f * GAUSS_TRUNCATE); - - return bssrdf_gaussian_eval(radius, r) * (1.0f / (area_truncated)); + float s = sqrtf(3.0f * (1.0f - alpha_prime)); + return 0.5f * alpha_prime * (1.0f + expf(-fourthirdA * s)) * expf(-s); } -ccl_device void bssrdf_gaussian_sample(const float radius, float xi, float *r, float *h) +ccl_device float bssrdf_dipole_compute_alpha_prime(float rd, float fourthirdA) { - /* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v)) - * r = sqrt(-2*v*logf(xi)) */ - const float v = radius * radius * (0.25f * 0.25f); - const float Rm = sqrtf(v * GAUSS_TRUNCATE); - - /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */ - const float area_truncated = 1.0f - expf(-0.5f * GAUSS_TRUNCATE); - - /* r(xi) */ - const float r_squared = -2.0f * v * logf(1.0f - xi * area_truncated); - *r = sqrtf(r_squared); - - /* h^2 + r^2 = Rm^2 */ - *h = safe_sqrtf(Rm * Rm - r_squared); -} - -/* Planar Cubic BSSRDF falloff - * - * This is basically (Rm - x)^3, with some factors to normalize it. For sampling - * we integrate 2*pi*x * (Rm - x)^3, which gives us a quintic equation that as - * far as I can tell has no closed form solution. So we get an iterative solution - * instead with newton-raphson. */ - -ccl_device float bssrdf_cubic_eval(const float radius, const float sharpness, float r) -{ - if (sharpness == 0.0f) { - const float Rm = radius; - - if (r >= Rm) - return 0.0f; - - /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */ - const float Rm5 = (Rm * Rm) * (Rm * Rm) * Rm; - const float f = Rm - r; - const float num = f * f * f; - - return (10.0f * num) / (Rm5 * M_PI_F); + /* Little Newton solver. */ + if (rd < 1e-4f) { + return 0.0f; + } + if (rd >= 0.995f) { + return 0.999999f; } - else { - float Rm = radius * (1.0f + sharpness); - - if (r >= Rm) - return 0.0f; - /* custom variation with extra sharpness, to match the previous code */ - const float y = 1.0f / (1.0f + sharpness); - float Rmy, ry, ryinv; + float x0 = 0.0f; + float x1 = 1.0f; + float xmid, fmid; - if (sharpness == 1.0f) { - Rmy = sqrtf(Rm); - ry = sqrtf(r); - ryinv = (ry > 0.0f) ? 1.0f / ry : 0.0f; + constexpr const int max_num_iterations = 12; + for (int i = 0; i < max_num_iterations; ++i) { + xmid = 0.5f * (x0 + x1); + fmid = bssrdf_dipole_compute_Rd(xmid, fourthirdA); + if (fmid < rd) { + x0 = xmid; } else { - Rmy = powf(Rm, y); - ry = powf(r, y); - ryinv = (r > 0.0f) ? powf(r, y - 1.0f) : 0.0f; + x1 = xmid; } - - const float Rmy5 = (Rmy * Rmy) * (Rmy * Rmy) * Rmy; - const float f = Rmy - ry; - const float num = f * (f * f) * (y * ryinv); - - return (10.0f * num) / (Rmy5 * M_PI_F); - } -} - -ccl_device float bssrdf_cubic_pdf(const float radius, const float sharpness, float r) -{ - return bssrdf_cubic_eval(radius, sharpness, r); -} - -/* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */ -ccl_device_forceinline float bssrdf_cubic_quintic_root_find(float xi) -{ - /* newton-raphson iteration, usually succeeds in 2-4 iterations, except - * outside 0.02 ... 0.98 where it can go up to 10, so overall performance - * should not be too bad */ - const float tolerance = 1e-6f; - const int max_iteration_count = 10; - float x = 0.25f; - int i; - - for (i = 0; i < max_iteration_count; i++) { - float x2 = x * x; - float x3 = x2 * x; - float nx = (1.0f - x); - - float f = 10.0f * x2 - 20.0f * x3 + 15.0f * x2 * x2 - 4.0f * x2 * x3 - xi; - float f_ = 20.0f * (x * nx) * (nx * nx); - - if (fabsf(f) < tolerance || f_ == 0.0f) - break; - - x = saturate(x - f / f_); } - return x; + return xmid; } -ccl_device void bssrdf_cubic_sample( - const float radius, const float sharpness, float xi, float *r, float *h) +ccl_device void bssrdf_setup_radius(Bssrdf *bssrdf, const ClosureType type, const float eta) { - float Rm = radius; - float r_ = bssrdf_cubic_quintic_root_find(xi); - - if (sharpness != 0.0f) { - r_ = powf(r_, 1.0f + sharpness); - Rm *= (1.0f + sharpness); - } - - r_ *= Rm; - *r = r_; - - /* h^2 + r^2 = Rm^2 */ - *h = safe_sqrtf(Rm * Rm - r_ * r_); -} - -/* Approximate Reflectance Profiles - * http://graphics.pixar.com/library/ApproxBSSRDF/paper.pdf - */ - -/* This is a bit arbitrary, just need big enough radius so it matches - * the mean free length, but still not too big so sampling is still - * effective. Might need some further tweaks. - */ -#define BURLEY_TRUNCATE 16.0f -#define BURLEY_TRUNCATE_CDF 0.9963790093708328f // cdf(BURLEY_TRUNCATE) - -ccl_device_inline float bssrdf_burley_fitting(float A) -{ - /* Diffuse surface transmission, equation (6). */ - return 1.9f - A + 3.5f * (A - 0.8f) * (A - 0.8f); -} - -/* Scale mean free path length so it gives similar looking result - * to Cubic and Gaussian models. - */ -ccl_device_inline float3 bssrdf_burley_compatible_mfp(float3 r) -{ - return 0.25f * M_1_PI_F * r; -} - -ccl_device void bssrdf_burley_setup(Bssrdf *bssrdf) -{ - /* Mean free path length. */ - const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius); - /* Surface albedo. */ - const float3 A = bssrdf->albedo; - const float3 s = make_float3( - bssrdf_burley_fitting(A.x), bssrdf_burley_fitting(A.y), bssrdf_burley_fitting(A.z)); - - bssrdf->radius = l / s; -} - -ccl_device float bssrdf_burley_eval(const float d, float r) -{ - const float Rm = BURLEY_TRUNCATE * d; - - if (r >= Rm) - return 0.0f; - - /* Burley reflectance profile, equation (3). - * - * NOTES: - * - Surface albedo is already included into sc->weight, no need to - * multiply by this term here. - * - This is normalized diffuse model, so the equation is multiplied - * by 2*pi, which also matches cdf(). - */ - float exp_r_3_d = expf(-r / (3.0f * d)); - float exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d; - return (exp_r_d + exp_r_3_d) / (4.0f * d); -} - -ccl_device float bssrdf_burley_pdf(const float d, float r) -{ - return bssrdf_burley_eval(d, r) * (1.0f / BURLEY_TRUNCATE_CDF); -} - -/* Find the radius for desired CDF value. - * Returns scaled radius, meaning the result is to be scaled up by d. - * Since there's no closed form solution we do Newton-Raphson method to find it. - */ -ccl_device_forceinline float bssrdf_burley_root_find(float xi) -{ - const float tolerance = 1e-6f; - const int max_iteration_count = 10; - /* Do initial guess based on manual curve fitting, this allows us to reduce - * number of iterations to maximum 4 across the [0..1] range. We keep maximum - * number of iteration higher just to be sure we didn't miss root in some - * corner case. - */ - float r; - if (xi <= 0.9f) { - r = expf(xi * xi * 2.4f) - 1.0f; + if (type == CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID) { + /* Scale mean free path length so it gives similar looking result to older + * Cubic, Gaussian and Burley models. */ + bssrdf->radius *= 0.25f * M_1_PI_F; } else { - /* TODO(sergey): Some nicer curve fit is possible here. */ - r = 15.0f; - } - /* Solve against scaled radius. */ - for (int i = 0; i < max_iteration_count; i++) { - float exp_r_3 = expf(-r / 3.0f); - float exp_r = exp_r_3 * exp_r_3 * exp_r_3; - float f = 1.0f - 0.25f * exp_r - 0.75f * exp_r_3 - xi; - float f_ = 0.25f * exp_r + 0.25f * exp_r_3; + /* Adjust radius based on IOR and albedo. */ + const float inv_eta = 1.0f / eta; + const float F_dr = inv_eta * (-1.440f * inv_eta + 0.710f) + 0.668f + 0.0636f * eta; + const float fourthirdA = (4.0f / 3.0f) * (1.0f + F_dr) / + (1.0f - F_dr); /* From Jensen's Fdr ratio formula. */ - if (fabsf(f) < tolerance || f_ == 0.0f) { - break; - } + const float3 alpha_prime = make_float3( + bssrdf_dipole_compute_alpha_prime(bssrdf->albedo.x, fourthirdA), + bssrdf_dipole_compute_alpha_prime(bssrdf->albedo.y, fourthirdA), + bssrdf_dipole_compute_alpha_prime(bssrdf->albedo.z, fourthirdA)); - r = r - f / f_; - if (r < 0.0f) { - r = 0.0f; - } + bssrdf->radius *= sqrt(3.0f * (one_float3() - alpha_prime)); } - return r; } -ccl_device void bssrdf_burley_sample(const float d, float xi, float *r, float *h) -{ - const float Rm = BURLEY_TRUNCATE * d; - const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d; - - *r = r_; - - /* h^2 + r^2 = Rm^2 */ - *h = safe_sqrtf(Rm * Rm - r_ * r_); -} - -/* None BSSRDF falloff - * - * Samples distributed over disk with no falloff, for reference. */ - -ccl_device float bssrdf_none_eval(const float radius, float r) -{ - const float Rm = radius; - return (r < Rm) ? 1.0f : 0.0f; -} - -ccl_device float bssrdf_none_pdf(const float radius, float r) -{ - /* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */ - const float Rm = radius; - const float area = (M_PI_F * Rm * Rm); - - return bssrdf_none_eval(radius, r) / area; -} - -ccl_device void bssrdf_none_sample(const float radius, float xi, float *r, float *h) -{ - /* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2 - * r = sqrt(xi)*Rm */ - const float Rm = radius; - const float r_ = sqrtf(xi) * Rm; - - *r = r_; - - /* h^2 + r^2 = Rm^2 */ - *h = safe_sqrtf(Rm * Rm - r_ * r_); -} - -/* Generic */ +/* Setup */ ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight) { @@ -342,7 +102,7 @@ ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight) return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? bssrdf : NULL; } -ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type) +ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type, const float ior) { int flag = 0; int bssrdf_channels = 3; @@ -371,7 +131,7 @@ ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type) if (bssrdf_channels < 3) { /* Add diffuse BSDF if any radius too small. */ #ifdef __PRINCIPLED__ - if (type == CLOSURE_BSSRDF_PRINCIPLED_ID || type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) { + if (bssrdf->roughness != FLT_MAX) { float roughness = bssrdf->roughness; float3 N = bssrdf->N; @@ -401,16 +161,9 @@ ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type) /* Setup BSSRDF if radius is large enough. */ if (bssrdf_channels > 0) { bssrdf->type = type; - bssrdf->channels = bssrdf_channels; - bssrdf->sample_weight = fabsf(average(bssrdf->weight)) * bssrdf->channels; - bssrdf->texture_blur = saturate(bssrdf->texture_blur); - bssrdf->sharpness = saturate(bssrdf->sharpness); + bssrdf->sample_weight = fabsf(average(bssrdf->weight)) * bssrdf_channels; - if (type == CLOSURE_BSSRDF_BURLEY_ID || type == CLOSURE_BSSRDF_PRINCIPLED_ID || - type == CLOSURE_BSSRDF_RANDOM_WALK_ID || - type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) { - bssrdf_burley_setup(bssrdf); - } + bssrdf_setup_radius(bssrdf, type, ior); flag |= SD_BSSRDF; } @@ -422,77 +175,4 @@ ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type) return flag; } -ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float *h) -{ - const Bssrdf *bssrdf = (const Bssrdf *)sc; - float radius; - - /* Sample color channel and reuse random number. Only a subset of channels - * may be used if their radius was too small to handle as BSSRDF. */ - xi *= bssrdf->channels; - - if (xi < 1.0f) { - radius = (bssrdf->radius.x > 0.0f) ? bssrdf->radius.x : - (bssrdf->radius.y > 0.0f) ? bssrdf->radius.y : - bssrdf->radius.z; - } - else if (xi < 2.0f) { - xi -= 1.0f; - radius = (bssrdf->radius.x > 0.0f && bssrdf->radius.y > 0.0f) ? bssrdf->radius.y : - bssrdf->radius.z; - } - else { - xi -= 2.0f; - radius = bssrdf->radius.z; - } - - /* Sample BSSRDF. */ - if (bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) { - bssrdf_cubic_sample(radius, bssrdf->sharpness, xi, r, h); - } - else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) { - bssrdf_gaussian_sample(radius, xi, r, h); - } - else { /* if (bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || - * bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID) */ - bssrdf_burley_sample(radius, xi, r, h); - } -} - -ccl_device float bssrdf_channel_pdf(const Bssrdf *bssrdf, float radius, float r) -{ - if (radius == 0.0f) { - return 0.0f; - } - else if (bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) { - return bssrdf_cubic_pdf(radius, bssrdf->sharpness, r); - } - else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) { - return bssrdf_gaussian_pdf(radius, r); - } - else { /* if (bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || - * bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/ - return bssrdf_burley_pdf(radius, r); - } -} - -ccl_device_forceinline float3 bssrdf_eval(const ShaderClosure *sc, float r) -{ - const Bssrdf *bssrdf = (const Bssrdf *)sc; - - return make_float3(bssrdf_channel_pdf(bssrdf, bssrdf->radius.x, r), - bssrdf_channel_pdf(bssrdf, bssrdf->radius.y, r), - bssrdf_channel_pdf(bssrdf, bssrdf->radius.z, r)); -} - -ccl_device_forceinline float bssrdf_pdf(const ShaderClosure *sc, float r) -{ - const Bssrdf *bssrdf = (const Bssrdf *)sc; - float3 pdf = bssrdf_eval(sc, r); - - return (pdf.x + pdf.y + pdf.z) / bssrdf->channels; -} - CCL_NAMESPACE_END - -#endif /* __KERNEL_BSSRDF_H__ */ |