diff options
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/blender/blender_shader.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/bvh_util.h | 24 | ||||
-rw-r--r-- | intern/cycles/kernel/closure/bssrdf.h | 174 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/integrator_subsurface.h | 460 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/integrator_subsurface_disk.h | 195 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h | 465 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_types.h | 20 | ||||
-rw-r--r-- | intern/cycles/kernel/osl/osl_bssrdf.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_closure.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_types.h | 3 | ||||
-rw-r--r-- | intern/cycles/render/nodes.cpp | 2 |
12 files changed, 901 insertions, 457 deletions
diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp index 0b8aea15d6c..db5eadeed56 100644 --- a/intern/cycles/blender/blender_shader.cpp +++ b/intern/cycles/blender/blender_shader.cpp @@ -489,6 +489,9 @@ static ShaderNode *add_node(Scene *scene, SubsurfaceScatteringNode *subsurface = graph->create_node<SubsurfaceScatteringNode>(); switch (b_subsurface_node.falloff()) { + case BL::ShaderNodeSubsurfaceScattering::falloff_BURLEY: + subsurface->set_method(CLOSURE_BSSRDF_BURLEY_ID); + break; case BL::ShaderNodeSubsurfaceScattering::falloff_RANDOM_WALK_FIXED_RADIUS: subsurface->set_method(CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID); break; @@ -605,6 +608,9 @@ static ShaderNode *add_node(Scene *scene, break; } switch (b_principled_node.subsurface_method()) { + case BL::ShaderNodeBsdfPrincipled::subsurface_method_BURLEY: + principled->set_subsurface_method(CLOSURE_BSSRDF_BURLEY_ID); + break; case BL::ShaderNodeBsdfPrincipled::subsurface_method_RANDOM_WALK_FIXED_RADIUS: principled->set_subsurface_method(CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID); break; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index c53d3d4b962..e0d48361650 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -241,6 +241,8 @@ set(SRC_INTEGRATOR_HEADERS integrator/integrator_state_template.h integrator/integrator_state_util.h integrator/integrator_subsurface.h + integrator/integrator_subsurface_disk.h + integrator/integrator_subsurface_random_walk.h integrator/integrator_volume_stack.h ) diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h index 9f188a93e2c..e16da9755f2 100644 --- a/intern/cycles/kernel/bvh/bvh_util.h +++ b/intern/cycles/kernel/bvh/bvh_util.h @@ -113,6 +113,30 @@ ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits) } #endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */ +/* For subsurface scattering, only sorting a small amount of intersections + * so bubble sort is fine for CPU and GPU. */ +ccl_device_inline void sort_intersections_and_normals(Intersection *hits, + float3 *Ng, + uint num_hits) +{ + bool swapped; + do { + swapped = false; + for (int j = 0; j < num_hits - 1; ++j) { + if (hits[j].t > hits[j + 1].t) { + struct Intersection tmp_hit = hits[j]; + struct float3 tmp_Ng = Ng[j]; + hits[j] = hits[j + 1]; + Ng[j] = Ng[j + 1]; + hits[j + 1] = tmp_hit; + Ng[j + 1] = tmp_Ng; + swapped = true; + } + } + --num_hits; + } while (swapped); +} + /* Utility to quickly get flags from an intersection. */ ccl_device_forceinline int intersection_get_shader_flags(const KernelGlobals *ccl_restrict kg, diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h index e095314678a..db183887018 100644 --- a/intern/cycles/kernel/closure/bssrdf.h +++ b/intern/cycles/kernel/closure/bssrdf.h @@ -29,6 +29,8 @@ typedef ccl_addr_space struct Bssrdf { static_assert(sizeof(ShaderClosure) >= sizeof(Bssrdf), "Bssrdf is too large!"); +/* Random Walk BSSRDF */ + ccl_device float bssrdf_dipole_compute_Rd(float alpha_prime, float fourthirdA) { float s = sqrtf(3.0f * (1.0f - alpha_prime)); @@ -66,7 +68,7 @@ ccl_device float bssrdf_dipole_compute_alpha_prime(float rd, float fourthirdA) ccl_device void bssrdf_setup_radius(Bssrdf *bssrdf, const ClosureType type, const float eta) { - if (type == CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID) { + if (type == CLOSURE_BSSRDF_BURLEY_ID || type == CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID) { /* Scale mean free path length so it gives similar looking result to older * Cubic, Gaussian and Burley models. */ bssrdf->radius *= 0.25f * M_1_PI_F; @@ -87,6 +89,176 @@ ccl_device void bssrdf_setup_radius(Bssrdf *bssrdf, const ClosureType type, cons } } +/* Christensen-Burley BSSRDF. + * + * Approximate Reflectance Profiles from + * http://graphics.pixar.com/library/ApproxBSSRDF/paper.pdf + */ + +/* This is a bit arbitrary, just need big enough radius so it matches + * the mean free length, but still not too big so sampling is still + * effective. */ +#define BURLEY_TRUNCATE 16.0f +#define BURLEY_TRUNCATE_CDF 0.9963790093708328f // cdf(BURLEY_TRUNCATE) + +ccl_device_inline float bssrdf_burley_fitting(float A) +{ + /* Diffuse surface transmission, equation (6). */ + return 1.9f - A + 3.5f * (A - 0.8f) * (A - 0.8f); +} + +/* Scale mean free path length so it gives similar looking result + * to Cubic and Gaussian models. */ +ccl_device_inline float3 bssrdf_burley_compatible_mfp(float3 r) +{ + return 0.25f * M_1_PI_F * r; +} + +ccl_device void bssrdf_burley_setup(Bssrdf *bssrdf) +{ + /* Mean free path length. */ + const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius); + /* Surface albedo. */ + const float3 A = bssrdf->albedo; + const float3 s = make_float3( + bssrdf_burley_fitting(A.x), bssrdf_burley_fitting(A.y), bssrdf_burley_fitting(A.z)); + + bssrdf->radius = l / s; +} + +ccl_device float bssrdf_burley_eval(const float d, float r) +{ + const float Rm = BURLEY_TRUNCATE * d; + + if (r >= Rm) + return 0.0f; + + /* Burley reflectance profile, equation (3). + * + * NOTES: + * - Surface albedo is already included into sc->weight, no need to + * multiply by this term here. + * - This is normalized diffuse model, so the equation is multiplied + * by 2*pi, which also matches cdf(). + */ + float exp_r_3_d = expf(-r / (3.0f * d)); + float exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d; + return (exp_r_d + exp_r_3_d) / (4.0f * d); +} + +ccl_device float bssrdf_burley_pdf(const float d, float r) +{ + if (r == 0.0f) { + return 0.0f; + } + + return bssrdf_burley_eval(d, r) * (1.0f / BURLEY_TRUNCATE_CDF); +} + +/* Find the radius for desired CDF value. + * Returns scaled radius, meaning the result is to be scaled up by d. + * Since there's no closed form solution we do Newton-Raphson method to find it. + */ +ccl_device_forceinline float bssrdf_burley_root_find(float xi) +{ + const float tolerance = 1e-6f; + const int max_iteration_count = 10; + /* Do initial guess based on manual curve fitting, this allows us to reduce + * number of iterations to maximum 4 across the [0..1] range. We keep maximum + * number of iteration higher just to be sure we didn't miss root in some + * corner case. + */ + float r; + if (xi <= 0.9f) { + r = expf(xi * xi * 2.4f) - 1.0f; + } + else { + /* TODO(sergey): Some nicer curve fit is possible here. */ + r = 15.0f; + } + /* Solve against scaled radius. */ + for (int i = 0; i < max_iteration_count; i++) { + float exp_r_3 = expf(-r / 3.0f); + float exp_r = exp_r_3 * exp_r_3 * exp_r_3; + float f = 1.0f - 0.25f * exp_r - 0.75f * exp_r_3 - xi; + float f_ = 0.25f * exp_r + 0.25f * exp_r_3; + + if (fabsf(f) < tolerance || f_ == 0.0f) { + break; + } + + r = r - f / f_; + if (r < 0.0f) { + r = 0.0f; + } + } + return r; +} + +ccl_device void bssrdf_burley_sample(const float d, float xi, float *r, float *h) +{ + const float Rm = BURLEY_TRUNCATE * d; + const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d; + + *r = r_; + + /* h^2 + r^2 = Rm^2 */ + *h = safe_sqrtf(Rm * Rm - r_ * r_); +} + +ccl_device float bssrdf_num_channels(const float3 radius) +{ + float channels = 0; + if (radius.x > 0.0f) { + channels += 1.0f; + } + if (radius.y > 0.0f) { + channels += 1.0f; + } + if (radius.z > 0.0f) { + channels += 1.0f; + } + return channels; +} + +ccl_device void bssrdf_sample(const float3 radius, float xi, float *r, float *h) +{ + const float num_channels = bssrdf_num_channels(radius); + float sampled_radius; + + /* Sample color channel and reuse random number. Only a subset of channels + * may be used if their radius was too small to handle as BSSRDF. */ + xi *= num_channels; + + if (xi < 1.0f) { + sampled_radius = (radius.x > 0.0f) ? radius.x : (radius.y > 0.0f) ? radius.y : radius.z; + } + else if (xi < 2.0f) { + xi -= 1.0f; + sampled_radius = (radius.x > 0.0f && radius.y > 0.0f) ? radius.y : radius.z; + } + else { + xi -= 2.0f; + sampled_radius = radius.z; + } + + /* Sample BSSRDF. */ + bssrdf_burley_sample(sampled_radius, xi, r, h); +} + +ccl_device_forceinline float3 bssrdf_eval(const float3 radius, float r) +{ + return make_float3(bssrdf_burley_pdf(radius.x, r), + bssrdf_burley_pdf(radius.y, r), + bssrdf_burley_pdf(radius.z, r)); +} + +ccl_device_forceinline float bssrdf_pdf(const float3 radius, float r) +{ + float3 pdf = bssrdf_eval(radius, r); + return (pdf.x + pdf.y + pdf.z) / bssrdf_num_channels(radius); +} + /* Setup */ ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight) diff --git a/intern/cycles/kernel/integrator/integrator_subsurface.h b/intern/cycles/kernel/integrator/integrator_subsurface.h index 9026de1c064..7ca676351db 100644 --- a/intern/cycles/kernel/integrator/integrator_subsurface.h +++ b/intern/cycles/kernel/integrator/integrator_subsurface.h @@ -29,6 +29,8 @@ #include "kernel/closure/volume.h" #include "kernel/integrator/integrator_intersect_volume_stack.h" +#include "kernel/integrator/integrator_subsurface_disk.h" +#include "kernel/integrator/integrator_subsurface_random_walk.h" CCL_NAMESPACE_BEGIN @@ -56,7 +58,10 @@ ccl_device int subsurface_bounce(INTEGRATOR_STATE_ARGS, ShaderData *sd, const Sh /* Pass BSSRDF parameters. */ const uint32_t path_flag = INTEGRATOR_STATE_WRITE(path, flag); - INTEGRATOR_STATE_WRITE(path, flag) = (path_flag & ~PATH_RAY_CAMERA) | PATH_RAY_SUBSURFACE; + INTEGRATOR_STATE_WRITE(path, flag) = (path_flag & ~PATH_RAY_CAMERA) | + ((sc->type == CLOSURE_BSSRDF_BURLEY_ID) ? + PATH_RAY_SUBSURFACE_DISK : + PATH_RAY_SUBSURFACE_RANDOM_WALK); INTEGRATOR_STATE_WRITE(path, throughput) *= shader_bssrdf_sample_weight(sd, sc); /* Advance random number offset for bounce. */ @@ -123,448 +128,6 @@ ccl_device void subsurface_shader_data_setup(INTEGRATOR_STATE_ARGS, ShaderData * } } -/* Random walk subsurface scattering. - * - * "Practical and Controllable Subsurface Scattering for Production Path - * Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */ - -/* Support for anisotropy from: - * "Path Traced Subsurface Scattering using Anisotropic Phase Functions - * and Non-Exponential Free Flights". - * Magnus Wrenninge, Ryusuke Villemin, Christophe Hery. - * https://graphics.pixar.com/library/PathTracedSubsurface/ */ - -ccl_device void subsurface_random_walk_remap( - const float albedo, const float d, float g, float *sigma_t, float *alpha) -{ - /* Compute attenuation and scattering coefficients from albedo. */ - const float g2 = g * g; - const float g3 = g2 * g; - const float g4 = g3 * g; - const float g5 = g4 * g; - const float g6 = g5 * g; - const float g7 = g6 * g; - - const float A = 1.8260523782f + -1.28451056436f * g + -1.79904629312f * g2 + - 9.19393289202f * g3 + -22.8215585862f * g4 + 32.0234874259f * g5 + - -23.6264803333f * g6 + 7.21067002658f * g7; - const float B = 4.98511194385f + - 0.127355959438f * - expf(31.1491581433f * g + -201.847017512f * g2 + 841.576016723f * g3 + - -2018.09288505f * g4 + 2731.71560286f * g5 + -1935.41424244f * g6 + - 559.009054474f * g7); - const float C = 1.09686102424f + -0.394704063468f * g + 1.05258115941f * g2 + - -8.83963712726f * g3 + 28.8643230661f * g4 + -46.8802913581f * g5 + - 38.5402837518f * g6 + -12.7181042538f * g7; - const float D = 0.496310210422f + 0.360146581622f * g + -2.15139309747f * g2 + - 17.8896899217f * g3 + -55.2984010333f * g4 + 82.065982243f * g5 + - -58.5106008578f * g6 + 15.8478295021f * g7; - const float E = 4.23190299701f + - 0.00310603949088f * - expf(76.7316253952f * g + -594.356773233f * g2 + 2448.8834203f * g3 + - -5576.68528998f * g4 + 7116.60171912f * g5 + -4763.54467887f * g6 + - 1303.5318055f * g7); - const float F = 2.40602999408f + -2.51814844609f * g + 9.18494908356f * g2 + - -79.2191708682f * g3 + 259.082868209f * g4 + -403.613804597f * g5 + - 302.85712436f * g6 + -87.4370473567f * g7; - - const float blend = powf(albedo, 0.25f); - - *alpha = (1.0f - blend) * A * powf(atanf(B * albedo), C) + - blend * D * powf(atanf(E * albedo), F); - *alpha = clamp(*alpha, 0.0f, 0.999999f); // because of numerical precision - - float sigma_t_prime = 1.0f / fmaxf(d, 1e-16f); - *sigma_t = sigma_t_prime / (1.0f - g); -} - -ccl_device void subsurface_random_walk_coefficients(const float3 albedo, - const float3 radius, - const float anisotropy, - float3 *sigma_t, - float3 *alpha, - float3 *throughput) -{ - float sigma_t_x, sigma_t_y, sigma_t_z; - float alpha_x, alpha_y, alpha_z; - - subsurface_random_walk_remap(albedo.x, radius.x, anisotropy, &sigma_t_x, &alpha_x); - subsurface_random_walk_remap(albedo.y, radius.y, anisotropy, &sigma_t_y, &alpha_y); - subsurface_random_walk_remap(albedo.z, radius.z, anisotropy, &sigma_t_z, &alpha_z); - - /* Throughput already contains closure weight at this point, which includes the - * albedo, as well as closure mixing and Fresnel weights. Divide out the albedo - * which will be added through scattering. */ - *throughput = safe_divide_color(*throughput, albedo); - - /* With low albedo values (like 0.025) we get diffusion_length 1.0 and - * infinite phase functions. To avoid a sharp discontinuity as we go from - * such values to 0.0, increase alpha and reduce the throughput to compensate. */ - const float min_alpha = 0.2f; - if (alpha_x < min_alpha) { - (*throughput).x *= alpha_x / min_alpha; - alpha_x = min_alpha; - } - if (alpha_y < min_alpha) { - (*throughput).y *= alpha_y / min_alpha; - alpha_y = min_alpha; - } - if (alpha_z < min_alpha) { - (*throughput).z *= alpha_z / min_alpha; - alpha_z = min_alpha; - } - - *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z); - *alpha = make_float3(alpha_x, alpha_y, alpha_z); -} - -/* References for Dwivedi sampling: - * - * [1] "A Zero-variance-based Sampling Scheme for Monte Carlo Subsurface Scattering" - * by Jaroslav Křivánek and Eugene d'Eon (SIGGRAPH 2014) - * https://cgg.mff.cuni.cz/~jaroslav/papers/2014-zerovar/ - * - * [2] "Improving the Dwivedi Sampling Scheme" - * by Johannes Meng, Johannes Hanika, and Carsten Dachsbacher (EGSR 2016) - * https://cg.ivd.kit.edu/1951.php - * - * [3] "Zero-Variance Theory for Efficient Subsurface Scattering" - * by Eugene d'Eon and Jaroslav Křivánek (SIGGRAPH 2020) - * https://iliyan.com/publications/RenderingCourse2020 - */ - -ccl_device_forceinline float eval_phase_dwivedi(float v, float phase_log, float cos_theta) -{ - /* Eq. 9 from [2] using precomputed log((v + 1) / (v - 1)) */ - return 1.0f / ((v - cos_theta) * phase_log); -} - -ccl_device_forceinline float sample_phase_dwivedi(float v, float phase_log, float rand) -{ - /* Based on Eq. 10 from [2]: `v - (v + 1) * pow((v - 1) / (v + 1), rand)` - * Since we're already pre-computing `phase_log = log((v + 1) / (v - 1))` for the evaluation, - * we can implement the power function like this. */ - return v - (v + 1.0f) * expf(-rand * phase_log); -} - -ccl_device_forceinline float diffusion_length_dwivedi(float alpha) -{ - /* Eq. 67 from [3] */ - return 1.0f / sqrtf(1.0f - powf(alpha, 2.44294f - 0.0215813f * alpha + 0.578637f / alpha)); -} - -ccl_device_forceinline float3 direction_from_cosine(float3 D, float cos_theta, float randv) -{ - float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta); - float phi = M_2PI_F * randv; - float3 dir = make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cos_theta); - - float3 T, B; - make_orthonormals(D, &T, &B); - return dir.x * T + dir.y * B + dir.z * D; -} - -ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t, - float t, - bool hit, - float3 *transmittance) -{ - float3 T = volume_color_transmittance(sigma_t, t); - if (transmittance) { - *transmittance = T; - } - return hit ? T : sigma_t * T; -} - -/* Define the below variable to get the similarity code active, - * and the value represents the cutoff level */ -# define SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL 9 - -ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS, - RNGState rng_state, - Ray &ray, - LocalIntersection &ss_isect) -{ - float bssrdf_u, bssrdf_v; - path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); - - const float3 P = INTEGRATOR_STATE(ray, P); - const float3 N = INTEGRATOR_STATE(ray, D); - const float ray_dP = INTEGRATOR_STATE(ray, dP); - const float time = INTEGRATOR_STATE(ray, time); - const float3 Ng = INTEGRATOR_STATE(isect, Ng); - const int object = INTEGRATOR_STATE(isect, object); - - /* Sample diffuse surface scatter into the object. */ - float3 D; - float pdf; - sample_cos_hemisphere(-N, bssrdf_u, bssrdf_v, &D, &pdf); - if (dot(-Ng, D) <= 0.0f) { - return false; - } - - /* Setup ray. */ - ray.P = ray_offset(P, -Ng); - ray.D = D; - ray.t = FLT_MAX; - ray.time = time; - ray.dP = ray_dP; - ray.dD = differential_zero_compact(); - -# ifndef __KERNEL_OPTIX__ - /* Compute or fetch object transforms. */ - Transform ob_itfm ccl_optional_struct_init; - Transform ob_tfm = object_fetch_transform_motion_test(kg, object, time, &ob_itfm); -# endif - - /* Convert subsurface to volume coefficients. - * The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */ - const float3 albedo = INTEGRATOR_STATE(subsurface, albedo); - const float3 radius = INTEGRATOR_STATE(subsurface, radius); - const float anisotropy = INTEGRATOR_STATE(subsurface, anisotropy); - - float3 sigma_t, alpha; - float3 throughput = INTEGRATOR_STATE_WRITE(path, throughput); - subsurface_random_walk_coefficients(albedo, radius, anisotropy, &sigma_t, &alpha, &throughput); - float3 sigma_s = sigma_t * alpha; - - /* Theoretically it should be better to use the exact alpha for the channel we're sampling at - * each bounce, but in practice there doesn't seem to be a noticeable difference in exchange - * for making the code significantly more complex and slower (if direction sampling depends on - * the sampled channel, we need to compute its PDF per-channel and consider it for MIS later on). - * - * Since the strength of the guided sampling increases as alpha gets lower, using a value that - * is too low results in fireflies while one that's too high just gives a bit more noise. - * Therefore, the code here uses the highest of the three albedos to be safe. */ - const float diffusion_length = diffusion_length_dwivedi(max3(alpha)); - - if (diffusion_length == 1.0f) { - /* With specific values of alpha the length might become 1, which in asymptotic makes phase to - * be infinite. After first bounce it will cause throughput to be 0. Do early output, avoiding - * numerical issues and extra unneeded work. */ - return false; - } - - /* Precompute term for phase sampling. */ - const float phase_log = logf((diffusion_length + 1.0f) / (diffusion_length - 1.0f)); - - /* Modify state for RNGs, decorrelated from other paths. */ - rng_state.rng_hash = cmj_hash(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef); - - /* Random walk until we hit the surface again. */ - bool hit = false; - bool have_opposite_interface = false; - float opposite_distance = 0.0f; - - /* Todo: Disable for alpha>0.999 or so? */ - /* Our heuristic, a compromise between guiding and classic. */ - const float guided_fraction = 1.0f - fmaxf(0.5f, powf(fabsf(anisotropy), 0.125f)); - -# ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL - float3 sigma_s_star = sigma_s * (1.0f - anisotropy); - float3 sigma_t_star = sigma_t - sigma_s + sigma_s_star; - float3 sigma_t_org = sigma_t; - float3 sigma_s_org = sigma_s; - const float anisotropy_org = anisotropy; - const float guided_fraction_org = guided_fraction; -# endif - - for (int bounce = 0; bounce < BSSRDF_MAX_BOUNCES; bounce++) { - /* Advance random number offset. */ - rng_state.rng_offset += PRNG_BOUNCE_NUM; - -# ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL - // shadow with local variables according to depth - float anisotropy, guided_fraction; - float3 sigma_s, sigma_t; - if (bounce <= SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL) { - anisotropy = anisotropy_org; - guided_fraction = guided_fraction_org; - sigma_t = sigma_t_org; - sigma_s = sigma_s_org; - } - else { - anisotropy = 0.0f; - guided_fraction = 0.75f; // back to isotropic heuristic from Blender - sigma_t = sigma_t_star; - sigma_s = sigma_s_star; - } -# endif - - /* Sample color channel, use MIS with balance heuristic. */ - float rphase = path_state_rng_1D(kg, &rng_state, PRNG_PHASE_CHANNEL); - float3 channel_pdf; - int channel = volume_sample_channel(alpha, throughput, rphase, &channel_pdf); - float sample_sigma_t = volume_channel_get(sigma_t, channel); - float randt = path_state_rng_1D(kg, &rng_state, PRNG_SCATTER_DISTANCE); - - /* We need the result of the raycast to compute the full guided PDF, so just remember the - * relevant terms to avoid recomputing them later. */ - float backward_fraction = 0.0f; - float forward_pdf_factor = 0.0f; - float forward_stretching = 1.0f; - float backward_pdf_factor = 0.0f; - float backward_stretching = 1.0f; - - /* For the initial ray, we already know the direction, so just do classic distance sampling. */ - if (bounce > 0) { - /* Decide whether we should use guided or classic sampling. */ - bool guided = (path_state_rng_1D(kg, &rng_state, PRNG_LIGHT_TERMINATE) < guided_fraction); - - /* Determine if we want to sample away from the incoming interface. - * This only happens if we found a nearby opposite interface, and the probability for it - * depends on how close we are to it already. - * This probability term comes from the recorded presentation of [3]. */ - bool guide_backward = false; - if (have_opposite_interface) { - /* Compute distance of the random walk between the tangent plane at the starting point - * and the assumed opposite interface (the parallel plane that contains the point we - * found in our ray query for the opposite side). */ - float x = clamp(dot(ray.P - P, -N), 0.0f, opposite_distance); - backward_fraction = 1.0f / - (1.0f + expf((opposite_distance - 2.0f * x) / diffusion_length)); - guide_backward = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE) < backward_fraction; - } - - /* Sample scattering direction. */ - float scatter_u, scatter_v; - path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &scatter_u, &scatter_v); - float cos_theta; - float hg_pdf; - if (guided) { - cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, scatter_u); - /* The backwards guiding distribution is just mirrored along sd->N, so swapping the - * sign here is enough to sample from that instead. */ - if (guide_backward) { - cos_theta = -cos_theta; - } - float3 newD = direction_from_cosine(N, cos_theta, scatter_v); - hg_pdf = single_peaked_henyey_greenstein(dot(ray.D, newD), anisotropy); - ray.D = newD; - } - else { - float3 newD = henyey_greenstrein_sample(ray.D, anisotropy, scatter_u, scatter_v, &hg_pdf); - cos_theta = dot(newD, N); - ray.D = newD; - } - - /* Compute PDF factor caused by phase sampling (as the ratio of guided / classic). - * Since phase sampling is channel-independent, we can get away with applying a factor - * to the guided PDF, which implicitly means pulling out the classic PDF term and letting - * it cancel with an equivalent term in the numerator of the full estimator. - * For the backward PDF, we again reuse the same probability distribution with a sign swap. - */ - forward_pdf_factor = M_1_2PI_F * eval_phase_dwivedi(diffusion_length, phase_log, cos_theta) / - hg_pdf; - backward_pdf_factor = M_1_2PI_F * - eval_phase_dwivedi(diffusion_length, phase_log, -cos_theta) / hg_pdf; - - /* Prepare distance sampling. - * For the backwards case, this also needs the sign swapped since now directions against - * sd->N (and therefore with negative cos_theta) are preferred. */ - forward_stretching = (1.0f - cos_theta / diffusion_length); - backward_stretching = (1.0f + cos_theta / diffusion_length); - if (guided) { - sample_sigma_t *= guide_backward ? backward_stretching : forward_stretching; - } - } - - /* Sample direction along ray. */ - float t = -logf(1.0f - randt) / sample_sigma_t; - - /* On the first bounce, we use the raycast to check if the opposite side is nearby. - * If yes, we will later use backwards guided sampling in order to have a decent - * chance of connecting to it. - * Todo: Maybe use less than 10 times the mean free path? */ - ray.t = (bounce == 0) ? max(t, 10.0f / (min3(sigma_t))) : t; - scene_intersect_local(kg, &ray, &ss_isect, object, NULL, 1); - hit = (ss_isect.num_hits > 0); - - if (hit) { -# ifdef __KERNEL_OPTIX__ - /* t is always in world space with OptiX. */ - ray.t = ss_isect.hits[0].t; -# else - /* Compute world space distance to surface hit. */ - float3 D = transform_direction(&ob_itfm, ray.D); - D = normalize(D) * ss_isect.hits[0].t; - ray.t = len(transform_direction(&ob_tfm, D)); -# endif - } - - if (bounce == 0) { - /* Check if we hit the opposite side. */ - if (hit) { - have_opposite_interface = true; - opposite_distance = dot(ray.P + ray.t * ray.D - P, -N); - } - /* Apart from the opposite side check, we were supposed to only trace up to distance t, - * so check if there would have been a hit in that case. */ - hit = ray.t < t; - } - - /* Use the distance to the exit point for the throughput update if we found one. */ - if (hit) { - t = ray.t; - } - else if (bounce == 0) { - /* Restore original position if nothing was hit after the first bounce, - * without the ray_offset() that was added to avoid self-intersection. - * Otherwise if that offset is relatively large compared to the scattering - * radius, we never go back up high enough to exit the surface. */ - ray.P = P; - } - - /* Advance to new scatter location. */ - ray.P += t * ray.D; - - float3 transmittance; - float3 pdf = subsurface_random_walk_pdf(sigma_t, t, hit, &transmittance); - if (bounce > 0) { - /* Compute PDF just like we do for classic sampling, but with the stretched sigma_t. */ - float3 guided_pdf = subsurface_random_walk_pdf(forward_stretching * sigma_t, t, hit, NULL); - - if (have_opposite_interface) { - /* First step of MIS: Depending on geometry we might have two methods for guided - * sampling, so perform MIS between them. */ - float3 back_pdf = subsurface_random_walk_pdf(backward_stretching * sigma_t, t, hit, NULL); - guided_pdf = mix( - guided_pdf * forward_pdf_factor, back_pdf * backward_pdf_factor, backward_fraction); - } - else { - /* Just include phase sampling factor otherwise. */ - guided_pdf *= forward_pdf_factor; - } - - /* Now we apply the MIS balance heuristic between the classic and guided sampling. */ - pdf = mix(pdf, guided_pdf, guided_fraction); - } - - /* Finally, we're applying MIS again to combine the three color channels. - * Altogether, the MIS computation combines up to nine different estimators: - * {classic, guided, backward_guided} x {r, g, b} */ - throughput *= (hit ? transmittance : sigma_s * transmittance) / dot(channel_pdf, pdf); - - if (hit) { - /* If we hit the surface, we are done. */ - break; - } - else if (throughput.x < VOLUME_THROUGHPUT_EPSILON && - throughput.y < VOLUME_THROUGHPUT_EPSILON && - throughput.z < VOLUME_THROUGHPUT_EPSILON) { - /* Avoid unnecessary work and precision issue when throughput gets really small. */ - break; - } - } - - if (hit) { - kernel_assert(isfinite3_safe(throughput)); - INTEGRATOR_STATE_WRITE(path, throughput) = throughput; - } - - return hit; -} - ccl_device_inline bool subsurface_scatter(INTEGRATOR_STATE_ARGS) { RNGState rng_state; @@ -573,8 +136,15 @@ ccl_device_inline bool subsurface_scatter(INTEGRATOR_STATE_ARGS) Ray ray ccl_optional_struct_init; LocalIntersection ss_isect ccl_optional_struct_init; - if (!subsurface_random_walk(INTEGRATOR_STATE_PASS, rng_state, ray, ss_isect)) { - return false; + if (INTEGRATOR_STATE(path, flag) & PATH_RAY_SUBSURFACE_RANDOM_WALK) { + if (!subsurface_random_walk(INTEGRATOR_STATE_PASS, rng_state, ray, ss_isect)) { + return false; + } + } + else { + if (!subsurface_disk(INTEGRATOR_STATE_PASS, rng_state, ray, ss_isect)) { + return false; + } } # ifdef __VOLUME__ diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h new file mode 100644 index 00000000000..3f685e3a2e9 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h @@ -0,0 +1,195 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +/* BSSRDF using disk based importance sampling. + * + * BSSRDF Importance Sampling, SIGGRAPH 2013 + * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf + */ + +ccl_device_inline float3 subsurface_disk_eval(const float3 radius, float disk_r, float r) +{ + const float3 eval = bssrdf_eval(radius, r); + const float pdf = bssrdf_pdf(radius, disk_r); + return (pdf > 0.0f) ? eval / pdf : zero_float3(); +} + +/* Subsurface scattering step, from a point on the surface to other + * nearby points on the same object. */ +ccl_device_inline bool subsurface_disk(INTEGRATOR_STATE_ARGS, + RNGState rng_state, + Ray &ray, + LocalIntersection &ss_isect) + +{ + float disk_u, disk_v; + path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &disk_u, &disk_v); + + /* Read shading point info from integrator state. */ + const float3 P = INTEGRATOR_STATE(ray, P); + const float ray_dP = INTEGRATOR_STATE(ray, dP); + const float time = INTEGRATOR_STATE(ray, time); + const float3 Ng = INTEGRATOR_STATE(isect, Ng); + const int object = INTEGRATOR_STATE(isect, object); + + /* Read subsurface scattering parameters. */ + const float3 radius = INTEGRATOR_STATE(subsurface, radius); + + /* Pick random axis in local frame and point on disk. */ + float3 disk_N, disk_T, disk_B; + float pick_pdf_N, pick_pdf_T, pick_pdf_B; + + disk_N = Ng; + make_orthonormals(disk_N, &disk_T, &disk_B); + + if (disk_v < 0.5f) { + pick_pdf_N = 0.5f; + pick_pdf_T = 0.25f; + pick_pdf_B = 0.25f; + disk_v *= 2.0f; + } + else if (disk_v < 0.75f) { + float3 tmp = disk_N; + disk_N = disk_T; + disk_T = tmp; + pick_pdf_N = 0.25f; + pick_pdf_T = 0.5f; + pick_pdf_B = 0.25f; + disk_v = (disk_v - 0.5f) * 4.0f; + } + else { + float3 tmp = disk_N; + disk_N = disk_B; + disk_B = tmp; + pick_pdf_N = 0.25f; + pick_pdf_T = 0.25f; + pick_pdf_B = 0.5f; + disk_v = (disk_v - 0.75f) * 4.0f; + } + + /* Sample point on disk. */ + float phi = M_2PI_F * disk_v; + float disk_height, disk_r; + + bssrdf_sample(radius, disk_u, &disk_r, &disk_height); + + float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B; + + /* Create ray. */ + ray.P = P + disk_N * disk_height + disk_P; + ray.D = -disk_N; + ray.t = 2.0f * disk_height; + ray.dP = ray_dP; + ray.dD = differential_zero_compact(); + ray.time = time; + + /* Intersect with the same object. if multiple intersections are found it + * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits. */ + uint lcg_state = lcg_state_init( + rng_state.rng_hash, rng_state.rng_offset, rng_state.sample, 0x68bc21eb); + const int max_hits = BSSRDF_MAX_HITS; + + scene_intersect_local(kg, &ray, &ss_isect, object, &lcg_state, max_hits); + const int num_eval_hits = min(ss_isect.num_hits, max_hits); + if (num_eval_hits == 0) { + return false; + } + + /* Sort for consistent renders between CPU and GPU, independent of the BVH + * traversal algorithm. */ + sort_intersections_and_normals(ss_isect.hits, ss_isect.Ng, num_eval_hits); + + float3 weights[BSSRDF_MAX_HITS]; /* TODO: zero? */ + float sum_weights = 0.0f; + + for (int hit = 0; hit < num_eval_hits; hit++) { + /* Quickly retrieve P and Ng without setting up ShaderData. */ + const float3 hit_P = ray.P + ray.D * ss_isect.hits[hit].t; + + /* Get geometric normal. */ + const int object = ss_isect.hits[hit].object; + const int object_flag = kernel_tex_fetch(__object_flag, object); + float3 hit_Ng = ss_isect.Ng[hit]; + if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { + hit_Ng = -hit_Ng; + } + + if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + Transform itfm; + object_fetch_transform_motion_test(kg, object, time, &itfm); + hit_Ng = normalize(transform_direction_transposed(&itfm, hit_Ng)); + } + + /* Probability densities for local frame axes. */ + const float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng)); + const float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng)); + const float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng)); + + /* Multiple importance sample between 3 axes, power heuristic + * found to be slightly better than balance heuristic. pdf_N + * in the MIS weight and denominator cancelled out. */ + float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B)); + if (ss_isect.num_hits > max_hits) { + w *= ss_isect.num_hits / (float)max_hits; + } + + /* Real distance to sampled point. */ + const float r = len(hit_P - P); + + /* Evaluate profiles. */ + const float3 weight = subsurface_disk_eval(radius, disk_r, r) * w; + + /* Store result. */ + ss_isect.Ng[hit] = hit_Ng; + weights[hit] = weight; + sum_weights += average(fabs(weight)); + } + + if (sum_weights == 0.0f) { + return false; + } + + /* Use importance resampling, sampling one of the hits proportional to weight. */ + const float r = lcg_step_float(&lcg_state) * sum_weights; + float partial_sum = 0.0f; + + for (int hit = 0; hit < num_eval_hits; hit++) { + const float3 weight = weights[hit]; + const float sample_weight = average(fabs(weight)); + float next_sum = partial_sum + sample_weight; + + if (r < next_sum) { + /* Return exit point. */ + INTEGRATOR_STATE_WRITE(path, throughput) *= weight * sum_weights / sample_weight; + + ss_isect.hits[0] = ss_isect.hits[hit]; + ss_isect.Ng[0] = ss_isect.Ng[hit]; + + ray.P = ray.P + ray.D * ss_isect.hits[hit].t; + ray.D = ss_isect.Ng[hit]; + ray.t = 1.0f; + return true; + } + + partial_sum = next_sum; + } + + return false; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h new file mode 100644 index 00000000000..cffc53bf270 --- /dev/null +++ b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h @@ -0,0 +1,465 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/kernel_projection.h" + +#include "kernel/bvh/bvh.h" + +CCL_NAMESPACE_BEGIN + +/* Random walk subsurface scattering. + * + * "Practical and Controllable Subsurface Scattering for Production Path + * Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */ + +/* Support for anisotropy from: + * "Path Traced Subsurface Scattering using Anisotropic Phase Functions + * and Non-Exponential Free Flights". + * Magnus Wrenninge, Ryusuke Villemin, Christophe Hery. + * https://graphics.pixar.com/library/PathTracedSubsurface/ */ + +ccl_device void subsurface_random_walk_remap( + const float albedo, const float d, float g, float *sigma_t, float *alpha) +{ + /* Compute attenuation and scattering coefficients from albedo. */ + const float g2 = g * g; + const float g3 = g2 * g; + const float g4 = g3 * g; + const float g5 = g4 * g; + const float g6 = g5 * g; + const float g7 = g6 * g; + + const float A = 1.8260523782f + -1.28451056436f * g + -1.79904629312f * g2 + + 9.19393289202f * g3 + -22.8215585862f * g4 + 32.0234874259f * g5 + + -23.6264803333f * g6 + 7.21067002658f * g7; + const float B = 4.98511194385f + + 0.127355959438f * + expf(31.1491581433f * g + -201.847017512f * g2 + 841.576016723f * g3 + + -2018.09288505f * g4 + 2731.71560286f * g5 + -1935.41424244f * g6 + + 559.009054474f * g7); + const float C = 1.09686102424f + -0.394704063468f * g + 1.05258115941f * g2 + + -8.83963712726f * g3 + 28.8643230661f * g4 + -46.8802913581f * g5 + + 38.5402837518f * g6 + -12.7181042538f * g7; + const float D = 0.496310210422f + 0.360146581622f * g + -2.15139309747f * g2 + + 17.8896899217f * g3 + -55.2984010333f * g4 + 82.065982243f * g5 + + -58.5106008578f * g6 + 15.8478295021f * g7; + const float E = 4.23190299701f + + 0.00310603949088f * + expf(76.7316253952f * g + -594.356773233f * g2 + 2448.8834203f * g3 + + -5576.68528998f * g4 + 7116.60171912f * g5 + -4763.54467887f * g6 + + 1303.5318055f * g7); + const float F = 2.40602999408f + -2.51814844609f * g + 9.18494908356f * g2 + + -79.2191708682f * g3 + 259.082868209f * g4 + -403.613804597f * g5 + + 302.85712436f * g6 + -87.4370473567f * g7; + + const float blend = powf(albedo, 0.25f); + + *alpha = (1.0f - blend) * A * powf(atanf(B * albedo), C) + + blend * D * powf(atanf(E * albedo), F); + *alpha = clamp(*alpha, 0.0f, 0.999999f); // because of numerical precision + + float sigma_t_prime = 1.0f / fmaxf(d, 1e-16f); + *sigma_t = sigma_t_prime / (1.0f - g); +} + +ccl_device void subsurface_random_walk_coefficients(const float3 albedo, + const float3 radius, + const float anisotropy, + float3 *sigma_t, + float3 *alpha, + float3 *throughput) +{ + float sigma_t_x, sigma_t_y, sigma_t_z; + float alpha_x, alpha_y, alpha_z; + + subsurface_random_walk_remap(albedo.x, radius.x, anisotropy, &sigma_t_x, &alpha_x); + subsurface_random_walk_remap(albedo.y, radius.y, anisotropy, &sigma_t_y, &alpha_y); + subsurface_random_walk_remap(albedo.z, radius.z, anisotropy, &sigma_t_z, &alpha_z); + + /* Throughput already contains closure weight at this point, which includes the + * albedo, as well as closure mixing and Fresnel weights. Divide out the albedo + * which will be added through scattering. */ + *throughput = safe_divide_color(*throughput, albedo); + + /* With low albedo values (like 0.025) we get diffusion_length 1.0 and + * infinite phase functions. To avoid a sharp discontinuity as we go from + * such values to 0.0, increase alpha and reduce the throughput to compensate. */ + const float min_alpha = 0.2f; + if (alpha_x < min_alpha) { + (*throughput).x *= alpha_x / min_alpha; + alpha_x = min_alpha; + } + if (alpha_y < min_alpha) { + (*throughput).y *= alpha_y / min_alpha; + alpha_y = min_alpha; + } + if (alpha_z < min_alpha) { + (*throughput).z *= alpha_z / min_alpha; + alpha_z = min_alpha; + } + + *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z); + *alpha = make_float3(alpha_x, alpha_y, alpha_z); +} + +/* References for Dwivedi sampling: + * + * [1] "A Zero-variance-based Sampling Scheme for Monte Carlo Subsurface Scattering" + * by Jaroslav Křivánek and Eugene d'Eon (SIGGRAPH 2014) + * https://cgg.mff.cuni.cz/~jaroslav/papers/2014-zerovar/ + * + * [2] "Improving the Dwivedi Sampling Scheme" + * by Johannes Meng, Johannes Hanika, and Carsten Dachsbacher (EGSR 2016) + * https://cg.ivd.kit.edu/1951.php + * + * [3] "Zero-Variance Theory for Efficient Subsurface Scattering" + * by Eugene d'Eon and Jaroslav Křivánek (SIGGRAPH 2020) + * https://iliyan.com/publications/RenderingCourse2020 + */ + +ccl_device_forceinline float eval_phase_dwivedi(float v, float phase_log, float cos_theta) +{ + /* Eq. 9 from [2] using precomputed log((v + 1) / (v - 1)) */ + return 1.0f / ((v - cos_theta) * phase_log); +} + +ccl_device_forceinline float sample_phase_dwivedi(float v, float phase_log, float rand) +{ + /* Based on Eq. 10 from [2]: `v - (v + 1) * pow((v - 1) / (v + 1), rand)` + * Since we're already pre-computing `phase_log = log((v + 1) / (v - 1))` for the evaluation, + * we can implement the power function like this. */ + return v - (v + 1.0f) * expf(-rand * phase_log); +} + +ccl_device_forceinline float diffusion_length_dwivedi(float alpha) +{ + /* Eq. 67 from [3] */ + return 1.0f / sqrtf(1.0f - powf(alpha, 2.44294f - 0.0215813f * alpha + 0.578637f / alpha)); +} + +ccl_device_forceinline float3 direction_from_cosine(float3 D, float cos_theta, float randv) +{ + float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta); + float phi = M_2PI_F * randv; + float3 dir = make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cos_theta); + + float3 T, B; + make_orthonormals(D, &T, &B); + return dir.x * T + dir.y * B + dir.z * D; +} + +ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t, + float t, + bool hit, + float3 *transmittance) +{ + float3 T = volume_color_transmittance(sigma_t, t); + if (transmittance) { + *transmittance = T; + } + return hit ? T : sigma_t * T; +} + +/* Define the below variable to get the similarity code active, + * and the value represents the cutoff level */ +#define SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL 9 + +ccl_device_inline bool subsurface_random_walk(INTEGRATOR_STATE_ARGS, + RNGState rng_state, + Ray &ray, + LocalIntersection &ss_isect) +{ + float bssrdf_u, bssrdf_v; + path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + + const float3 P = INTEGRATOR_STATE(ray, P); + const float3 N = INTEGRATOR_STATE(ray, D); + const float ray_dP = INTEGRATOR_STATE(ray, dP); + const float time = INTEGRATOR_STATE(ray, time); + const float3 Ng = INTEGRATOR_STATE(isect, Ng); + const int object = INTEGRATOR_STATE(isect, object); + + /* Sample diffuse surface scatter into the object. */ + float3 D; + float pdf; + sample_cos_hemisphere(-N, bssrdf_u, bssrdf_v, &D, &pdf); + if (dot(-Ng, D) <= 0.0f) { + return false; + } + + /* Setup ray. */ + ray.P = ray_offset(P, -Ng); + ray.D = D; + ray.t = FLT_MAX; + ray.time = time; + ray.dP = ray_dP; + ray.dD = differential_zero_compact(); + +#ifndef __KERNEL_OPTIX__ + /* Compute or fetch object transforms. */ + Transform ob_itfm ccl_optional_struct_init; + Transform ob_tfm = object_fetch_transform_motion_test(kg, object, time, &ob_itfm); +#endif + + /* Convert subsurface to volume coefficients. + * The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */ + const float3 albedo = INTEGRATOR_STATE(subsurface, albedo); + const float3 radius = INTEGRATOR_STATE(subsurface, radius); + const float anisotropy = INTEGRATOR_STATE(subsurface, anisotropy); + + float3 sigma_t, alpha; + float3 throughput = INTEGRATOR_STATE_WRITE(path, throughput); + subsurface_random_walk_coefficients(albedo, radius, anisotropy, &sigma_t, &alpha, &throughput); + float3 sigma_s = sigma_t * alpha; + + /* Theoretically it should be better to use the exact alpha for the channel we're sampling at + * each bounce, but in practice there doesn't seem to be a noticeable difference in exchange + * for making the code significantly more complex and slower (if direction sampling depends on + * the sampled channel, we need to compute its PDF per-channel and consider it for MIS later on). + * + * Since the strength of the guided sampling increases as alpha gets lower, using a value that + * is too low results in fireflies while one that's too high just gives a bit more noise. + * Therefore, the code here uses the highest of the three albedos to be safe. */ + const float diffusion_length = diffusion_length_dwivedi(max3(alpha)); + + if (diffusion_length == 1.0f) { + /* With specific values of alpha the length might become 1, which in asymptotic makes phase to + * be infinite. After first bounce it will cause throughput to be 0. Do early output, avoiding + * numerical issues and extra unneeded work. */ + return false; + } + + /* Precompute term for phase sampling. */ + const float phase_log = logf((diffusion_length + 1.0f) / (diffusion_length - 1.0f)); + + /* Modify state for RNGs, decorrelated from other paths. */ + rng_state.rng_hash = cmj_hash(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef); + + /* Random walk until we hit the surface again. */ + bool hit = false; + bool have_opposite_interface = false; + float opposite_distance = 0.0f; + + /* Todo: Disable for alpha>0.999 or so? */ + /* Our heuristic, a compromise between guiding and classic. */ + const float guided_fraction = 1.0f - fmaxf(0.5f, powf(fabsf(anisotropy), 0.125f)); + +#ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL + float3 sigma_s_star = sigma_s * (1.0f - anisotropy); + float3 sigma_t_star = sigma_t - sigma_s + sigma_s_star; + float3 sigma_t_org = sigma_t; + float3 sigma_s_org = sigma_s; + const float anisotropy_org = anisotropy; + const float guided_fraction_org = guided_fraction; +#endif + + for (int bounce = 0; bounce < BSSRDF_MAX_BOUNCES; bounce++) { + /* Advance random number offset. */ + rng_state.rng_offset += PRNG_BOUNCE_NUM; + +#ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL + // shadow with local variables according to depth + float anisotropy, guided_fraction; + float3 sigma_s, sigma_t; + if (bounce <= SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL) { + anisotropy = anisotropy_org; + guided_fraction = guided_fraction_org; + sigma_t = sigma_t_org; + sigma_s = sigma_s_org; + } + else { + anisotropy = 0.0f; + guided_fraction = 0.75f; // back to isotropic heuristic from Blender + sigma_t = sigma_t_star; + sigma_s = sigma_s_star; + } +#endif + + /* Sample color channel, use MIS with balance heuristic. */ + float rphase = path_state_rng_1D(kg, &rng_state, PRNG_PHASE_CHANNEL); + float3 channel_pdf; + int channel = volume_sample_channel(alpha, throughput, rphase, &channel_pdf); + float sample_sigma_t = volume_channel_get(sigma_t, channel); + float randt = path_state_rng_1D(kg, &rng_state, PRNG_SCATTER_DISTANCE); + + /* We need the result of the raycast to compute the full guided PDF, so just remember the + * relevant terms to avoid recomputing them later. */ + float backward_fraction = 0.0f; + float forward_pdf_factor = 0.0f; + float forward_stretching = 1.0f; + float backward_pdf_factor = 0.0f; + float backward_stretching = 1.0f; + + /* For the initial ray, we already know the direction, so just do classic distance sampling. */ + if (bounce > 0) { + /* Decide whether we should use guided or classic sampling. */ + bool guided = (path_state_rng_1D(kg, &rng_state, PRNG_LIGHT_TERMINATE) < guided_fraction); + + /* Determine if we want to sample away from the incoming interface. + * This only happens if we found a nearby opposite interface, and the probability for it + * depends on how close we are to it already. + * This probability term comes from the recorded presentation of [3]. */ + bool guide_backward = false; + if (have_opposite_interface) { + /* Compute distance of the random walk between the tangent plane at the starting point + * and the assumed opposite interface (the parallel plane that contains the point we + * found in our ray query for the opposite side). */ + float x = clamp(dot(ray.P - P, -N), 0.0f, opposite_distance); + backward_fraction = 1.0f / + (1.0f + expf((opposite_distance - 2.0f * x) / diffusion_length)); + guide_backward = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE) < backward_fraction; + } + + /* Sample scattering direction. */ + float scatter_u, scatter_v; + path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &scatter_u, &scatter_v); + float cos_theta; + float hg_pdf; + if (guided) { + cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, scatter_u); + /* The backwards guiding distribution is just mirrored along sd->N, so swapping the + * sign here is enough to sample from that instead. */ + if (guide_backward) { + cos_theta = -cos_theta; + } + float3 newD = direction_from_cosine(N, cos_theta, scatter_v); + hg_pdf = single_peaked_henyey_greenstein(dot(ray.D, newD), anisotropy); + ray.D = newD; + } + else { + float3 newD = henyey_greenstrein_sample(ray.D, anisotropy, scatter_u, scatter_v, &hg_pdf); + cos_theta = dot(newD, N); + ray.D = newD; + } + + /* Compute PDF factor caused by phase sampling (as the ratio of guided / classic). + * Since phase sampling is channel-independent, we can get away with applying a factor + * to the guided PDF, which implicitly means pulling out the classic PDF term and letting + * it cancel with an equivalent term in the numerator of the full estimator. + * For the backward PDF, we again reuse the same probability distribution with a sign swap. + */ + forward_pdf_factor = M_1_2PI_F * eval_phase_dwivedi(diffusion_length, phase_log, cos_theta) / + hg_pdf; + backward_pdf_factor = M_1_2PI_F * + eval_phase_dwivedi(diffusion_length, phase_log, -cos_theta) / hg_pdf; + + /* Prepare distance sampling. + * For the backwards case, this also needs the sign swapped since now directions against + * sd->N (and therefore with negative cos_theta) are preferred. */ + forward_stretching = (1.0f - cos_theta / diffusion_length); + backward_stretching = (1.0f + cos_theta / diffusion_length); + if (guided) { + sample_sigma_t *= guide_backward ? backward_stretching : forward_stretching; + } + } + + /* Sample direction along ray. */ + float t = -logf(1.0f - randt) / sample_sigma_t; + + /* On the first bounce, we use the raycast to check if the opposite side is nearby. + * If yes, we will later use backwards guided sampling in order to have a decent + * chance of connecting to it. + * Todo: Maybe use less than 10 times the mean free path? */ + ray.t = (bounce == 0) ? max(t, 10.0f / (min3(sigma_t))) : t; + scene_intersect_local(kg, &ray, &ss_isect, object, NULL, 1); + hit = (ss_isect.num_hits > 0); + + if (hit) { +#ifdef __KERNEL_OPTIX__ + /* t is always in world space with OptiX. */ + ray.t = ss_isect.hits[0].t; +#else + /* Compute world space distance to surface hit. */ + float3 D = transform_direction(&ob_itfm, ray.D); + D = normalize(D) * ss_isect.hits[0].t; + ray.t = len(transform_direction(&ob_tfm, D)); +#endif + } + + if (bounce == 0) { + /* Check if we hit the opposite side. */ + if (hit) { + have_opposite_interface = true; + opposite_distance = dot(ray.P + ray.t * ray.D - P, -N); + } + /* Apart from the opposite side check, we were supposed to only trace up to distance t, + * so check if there would have been a hit in that case. */ + hit = ray.t < t; + } + + /* Use the distance to the exit point for the throughput update if we found one. */ + if (hit) { + t = ray.t; + } + else if (bounce == 0) { + /* Restore original position if nothing was hit after the first bounce, + * without the ray_offset() that was added to avoid self-intersection. + * Otherwise if that offset is relatively large compared to the scattering + * radius, we never go back up high enough to exit the surface. */ + ray.P = P; + } + + /* Advance to new scatter location. */ + ray.P += t * ray.D; + + float3 transmittance; + float3 pdf = subsurface_random_walk_pdf(sigma_t, t, hit, &transmittance); + if (bounce > 0) { + /* Compute PDF just like we do for classic sampling, but with the stretched sigma_t. */ + float3 guided_pdf = subsurface_random_walk_pdf(forward_stretching * sigma_t, t, hit, NULL); + + if (have_opposite_interface) { + /* First step of MIS: Depending on geometry we might have two methods for guided + * sampling, so perform MIS between them. */ + float3 back_pdf = subsurface_random_walk_pdf(backward_stretching * sigma_t, t, hit, NULL); + guided_pdf = mix( + guided_pdf * forward_pdf_factor, back_pdf * backward_pdf_factor, backward_fraction); + } + else { + /* Just include phase sampling factor otherwise. */ + guided_pdf *= forward_pdf_factor; + } + + /* Now we apply the MIS balance heuristic between the classic and guided sampling. */ + pdf = mix(pdf, guided_pdf, guided_fraction); + } + + /* Finally, we're applying MIS again to combine the three color channels. + * Altogether, the MIS computation combines up to nine different estimators: + * {classic, guided, backward_guided} x {r, g, b} */ + throughput *= (hit ? transmittance : sigma_s * transmittance) / dot(channel_pdf, pdf); + + if (hit) { + /* If we hit the surface, we are done. */ + break; + } + else if (throughput.x < VOLUME_THROUGHPUT_EPSILON && + throughput.y < VOLUME_THROUGHPUT_EPSILON && + throughput.z < VOLUME_THROUGHPUT_EPSILON) { + /* Avoid unnecessary work and precision issue when throughput gets really small. */ + break; + } + } + + if (hit) { + kernel_assert(isfinite3_safe(throughput)); + INTEGRATOR_STATE_WRITE(path, throughput) = throughput; + } + + return hit; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 5000a96c331..eb681683d6a 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -261,32 +261,34 @@ enum PathRayFlag { PATH_RAY_EMISSION = (1 << 19), /* Perform subsurface scattering. */ - PATH_RAY_SUBSURFACE = (1 << 20), + PATH_RAY_SUBSURFACE_RANDOM_WALK = (1 << 20), + PATH_RAY_SUBSURFACE_DISK = (1 << 21), + PATH_RAY_SUBSURFACE = (PATH_RAY_SUBSURFACE_RANDOM_WALK | PATH_RAY_SUBSURFACE_DISK), /* Contribute to denoising features. */ - PATH_RAY_DENOISING_FEATURES = (1 << 21), + PATH_RAY_DENOISING_FEATURES = (1 << 22), /* Render pass categories. */ - PATH_RAY_REFLECT_PASS = (1 << 22), - PATH_RAY_TRANSMISSION_PASS = (1 << 23), - PATH_RAY_VOLUME_PASS = (1 << 24), + PATH_RAY_REFLECT_PASS = (1 << 23), + PATH_RAY_TRANSMISSION_PASS = (1 << 24), + PATH_RAY_VOLUME_PASS = (1 << 25), PATH_RAY_ANY_PASS = (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS | PATH_RAY_VOLUME_PASS), /* Shadow ray is for a light or surface. */ - PATH_RAY_SHADOW_FOR_LIGHT = (1 << 25), + PATH_RAY_SHADOW_FOR_LIGHT = (1 << 26), /* A shadow catcher object was hit and the path was split into two. */ - PATH_RAY_SHADOW_CATCHER_HIT = (1 << 26), + PATH_RAY_SHADOW_CATCHER_HIT = (1 << 27), /* A shadow catcher object was hit and this path traces only shadow catchers, writing them into * their dedicated pass for later division. * * NOTE: Is not covered with `PATH_RAY_ANY_PASS` because shadow catcher does special handling * which is separate from the light passes. */ - PATH_RAY_SHADOW_CATCHER_PASS = (1 << 27), + PATH_RAY_SHADOW_CATCHER_PASS = (1 << 28), /* Path is evaluating background for an approximate shadow catcher with non-transparent film. */ - PATH_RAY_SHADOW_CATCHER_BACKGROUND = (1 << 28), + PATH_RAY_SHADOW_CATCHER_BACKGROUND = (1 << 29), }; /* Configure ray visibility bits for rays and objects respectively, diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp index 5d968ed85e0..b6b0d72103a 100644 --- a/intern/cycles/kernel/osl/osl_bssrdf.cpp +++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp @@ -50,6 +50,7 @@ CCL_NAMESPACE_BEGIN using namespace OSL; +static ustring u_burley("burley"); static ustring u_random_walk_fixed_radius("random_walk_fixed_radius"); static ustring u_random_walk("random_walk"); @@ -68,7 +69,10 @@ class CBSSRDFClosure : public CClosurePrimitive { void setup(ShaderData *sd, int path_flag, float3 weight) { - if (method == u_random_walk_fixed_radius) { + if (method == u_burley) { + alloc(sd, path_flag, weight, CLOSURE_BSSRDF_BURLEY_ID); + } + else if (method == u_random_walk_fixed_radius) { alloc(sd, path_flag, weight, CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID); } else if (method == u_random_walk) { diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index 3e0cbe3a483..b3f7fee8a63 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -885,6 +885,7 @@ ccl_device_noinline int svm_node_closure_bsdf( #endif /* __HAIR__ */ #ifdef __SUBSURFACE__ + case CLOSURE_BSSRDF_BURLEY_ID: case CLOSURE_BSSRDF_RANDOM_WALK_ID: case CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID: { float3 weight = sd->svm_closure_weight * mix_weight; diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h index 59a0e33acbc..e846e4af259 100644 --- a/intern/cycles/kernel/svm/svm_types.h +++ b/intern/cycles/kernel/svm/svm_types.h @@ -543,6 +543,7 @@ typedef enum ClosureType { CLOSURE_BSDF_TRANSPARENT_ID, /* BSSRDF */ + CLOSURE_BSSRDF_BURLEY_ID, CLOSURE_BSSRDF_RANDOM_WALK_ID, CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID, @@ -589,7 +590,7 @@ typedef enum ClosureType { type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) #define CLOSURE_IS_BSDF_OR_BSSRDF(type) (type <= CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID) #define CLOSURE_IS_BSSRDF(type) \ - (type >= CLOSURE_BSSRDF_RANDOM_WALK_ID && type <= CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID) + (type >= CLOSURE_BSSRDF_BURLEY_ID && type <= CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID) #define CLOSURE_IS_VOLUME(type) \ (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) #define CLOSURE_IS_VOLUME_SCATTER(type) (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index 1629895ff6e..d775859d24d 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -2736,6 +2736,7 @@ NODE_DEFINE(PrincipledBsdfNode) distribution, "Distribution", distribution_enum, CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID); static NodeEnum subsurface_method_enum; + subsurface_method_enum.insert("burley", CLOSURE_BSSRDF_BURLEY_ID); subsurface_method_enum.insert("random_walk_fixed_radius", CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID); subsurface_method_enum.insert("random_walk", CLOSURE_BSSRDF_RANDOM_WALK_ID); @@ -3060,6 +3061,7 @@ NODE_DEFINE(SubsurfaceScatteringNode) SOCKET_IN_FLOAT(surface_mix_weight, "SurfaceMixWeight", 0.0f, SocketType::SVM_INTERNAL); static NodeEnum method_enum; + method_enum.insert("burley", CLOSURE_BSSRDF_BURLEY_ID); method_enum.insert("random_walk_fixed_radius", CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID); method_enum.insert("random_walk", CLOSURE_BSSRDF_RANDOM_WALK_ID); SOCKET_ENUM(method, "Method", method_enum, CLOSURE_BSSRDF_RANDOM_WALK_ID); |