diff options
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/bvh.h | 98 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/bvh_shadow_all.h | 19 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/bvh_util.h | 162 | ||||
-rw-r--r-- | intern/cycles/kernel/closure/alloc.h | 48 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_light.h | 49 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_light_common.h | 64 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_montecarlo.h | 117 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path.h | 17 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_subsurface.h | 43 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_types.h | 44 | ||||
-rw-r--r-- | intern/cycles/kernel/shaders/node_noise_texture.osl | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/shaders/node_vector_math.osl | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/shaders/stdcycles.h | 69 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_math.h | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_math_util.h | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_noisetex.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_tex_coord.h | 9 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_types.h | 1 |
19 files changed, 552 insertions, 205 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index f6b4b963a7a..ea0f16c9233 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -93,6 +93,7 @@ set(SRC_BVH_HEADERS bvh/bvh_local.h bvh/bvh_traversal.h bvh/bvh_types.h + bvh/bvh_util.h bvh/bvh_volume.h bvh/bvh_volume_all.h bvh/bvh_embree.h diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h index 3049f243ae9..3a3f38539c5 100644 --- a/intern/cycles/kernel/bvh/bvh.h +++ b/intern/cycles/kernel/bvh/bvh.h @@ -29,9 +29,10 @@ # include "kernel/bvh/bvh_embree.h" #endif -CCL_NAMESPACE_BEGIN - #include "kernel/bvh/bvh_types.h" +#include "kernel/bvh/bvh_util.h" + +CCL_NAMESPACE_BEGIN #ifndef __KERNEL_OPTIX__ @@ -533,97 +534,4 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg, } #endif /* __VOLUME_RECORD_ALL__ */ -/* Ray offset to avoid self intersection. - * - * This function should be used to compute a modified ray start position for - * rays leaving from a surface. */ - -ccl_device_inline float3 ray_offset(float3 P, float3 Ng) -{ -#ifdef __INTERSECTION_REFINE__ - const float epsilon_f = 1e-5f; - /* ideally this should match epsilon_f, but instancing and motion blur - * precision makes it problematic */ - const float epsilon_test = 1.0f; - const int epsilon_i = 32; - - float3 res; - - /* x component */ - if (fabsf(P.x) < epsilon_test) { - res.x = P.x + Ng.x * epsilon_f; - } - else { - uint ix = __float_as_uint(P.x); - ix += ((ix ^ __float_as_uint(Ng.x)) >> 31) ? -epsilon_i : epsilon_i; - res.x = __uint_as_float(ix); - } - - /* y component */ - if (fabsf(P.y) < epsilon_test) { - res.y = P.y + Ng.y * epsilon_f; - } - else { - uint iy = __float_as_uint(P.y); - iy += ((iy ^ __float_as_uint(Ng.y)) >> 31) ? -epsilon_i : epsilon_i; - res.y = __uint_as_float(iy); - } - - /* z component */ - if (fabsf(P.z) < epsilon_test) { - res.z = P.z + Ng.z * epsilon_f; - } - else { - uint iz = __float_as_uint(P.z); - iz += ((iz ^ __float_as_uint(Ng.z)) >> 31) ? -epsilon_i : epsilon_i; - res.z = __uint_as_float(iz); - } - - return res; -#else - const float epsilon_f = 1e-4f; - return P + epsilon_f * Ng; -#endif -} - -#if defined(__VOLUME_RECORD_ALL__) || (defined(__SHADOW_RECORD_ALL__) && defined(__KERNEL_CPU__)) -/* ToDo: Move to another file? */ -ccl_device int intersections_compare(const void *a, const void *b) -{ - const Intersection *isect_a = (const Intersection *)a; - const Intersection *isect_b = (const Intersection *)b; - - if (isect_a->t < isect_b->t) - return -1; - else if (isect_a->t > isect_b->t) - return 1; - else - return 0; -} -#endif - -#if defined(__SHADOW_RECORD_ALL__) -ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits) -{ -# ifdef __KERNEL_GPU__ - /* Use bubble sort which has more friendly memory pattern on GPU. */ - bool swapped; - do { - swapped = false; - for (int j = 0; j < num_hits - 1; ++j) { - if (hits[j].t > hits[j + 1].t) { - struct Intersection tmp = hits[j]; - hits[j] = hits[j + 1]; - hits[j + 1] = tmp; - swapped = true; - } - } - --num_hits; - } while (swapped); -# else - qsort(hits, num_hits, sizeof(Intersection), intersections_compare); -# endif -} -#endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */ - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h index dccd257d2de..2e94b1d7c37 100644 --- a/intern/cycles/kernel/bvh/bvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h @@ -180,25 +180,10 @@ ccl_device_inline /* todo: optimize so primitive visibility flag indicates if * the primitive has a transparent shadow shader? */ - int prim = kernel_tex_fetch(__prim_index, isect_array->prim); - int shader = 0; - -#ifdef __HAIR__ - if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) -#endif - { - shader = kernel_tex_fetch(__tri_shader, prim); - } -#ifdef __HAIR__ - else { - float4 str = kernel_tex_fetch(__curves, prim); - shader = __float_as_int(str.z); - } -#endif - int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; + const int flags = intersection_get_shader_flags(kg, isect_array); /* if no transparent shadows, all light is blocked */ - if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) { + if (!(flags & SD_HAS_TRANSPARENT_SHADOW)) { return true; } /* if maximum number of hits reached, block all light */ diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h new file mode 100644 index 00000000000..a694e4dc259 --- /dev/null +++ b/intern/cycles/kernel/bvh/bvh_util.h @@ -0,0 +1,162 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +CCL_NAMESPACE_BEGIN + +/* Ray offset to avoid self intersection. + * + * This function should be used to compute a modified ray start position for + * rays leaving from a surface. */ + +ccl_device_inline float3 ray_offset(float3 P, float3 Ng) +{ +#ifdef __INTERSECTION_REFINE__ + const float epsilon_f = 1e-5f; + /* ideally this should match epsilon_f, but instancing and motion blur + * precision makes it problematic */ + const float epsilon_test = 1.0f; + const int epsilon_i = 32; + + float3 res; + + /* x component */ + if (fabsf(P.x) < epsilon_test) { + res.x = P.x + Ng.x * epsilon_f; + } + else { + uint ix = __float_as_uint(P.x); + ix += ((ix ^ __float_as_uint(Ng.x)) >> 31) ? -epsilon_i : epsilon_i; + res.x = __uint_as_float(ix); + } + + /* y component */ + if (fabsf(P.y) < epsilon_test) { + res.y = P.y + Ng.y * epsilon_f; + } + else { + uint iy = __float_as_uint(P.y); + iy += ((iy ^ __float_as_uint(Ng.y)) >> 31) ? -epsilon_i : epsilon_i; + res.y = __uint_as_float(iy); + } + + /* z component */ + if (fabsf(P.z) < epsilon_test) { + res.z = P.z + Ng.z * epsilon_f; + } + else { + uint iz = __float_as_uint(P.z); + iz += ((iz ^ __float_as_uint(Ng.z)) >> 31) ? -epsilon_i : epsilon_i; + res.z = __uint_as_float(iz); + } + + return res; +#else + const float epsilon_f = 1e-4f; + return P + epsilon_f * Ng; +#endif +} + +#if defined(__VOLUME_RECORD_ALL__) || (defined(__SHADOW_RECORD_ALL__) && defined(__KERNEL_CPU__)) +/* ToDo: Move to another file? */ +ccl_device int intersections_compare(const void *a, const void *b) +{ + const Intersection *isect_a = (const Intersection *)a; + const Intersection *isect_b = (const Intersection *)b; + + if (isect_a->t < isect_b->t) + return -1; + else if (isect_a->t > isect_b->t) + return 1; + else + return 0; +} +#endif + +#if defined(__SHADOW_RECORD_ALL__) +ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits) +{ + kernel_assert(num_hits > 0); + +# ifdef __KERNEL_GPU__ + /* Use bubble sort which has more friendly memory pattern on GPU. */ + bool swapped; + do { + swapped = false; + for (int j = 0; j < num_hits - 1; ++j) { + if (hits[j].t > hits[j + 1].t) { + struct Intersection tmp = hits[j]; + hits[j] = hits[j + 1]; + hits[j + 1] = tmp; + swapped = true; + } + } + --num_hits; + } while (swapped); +# else + qsort(hits, num_hits, sizeof(Intersection), intersections_compare); +# endif +} +#endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */ + +/* Utility to quickly get a shader flags from an intersection. */ + +ccl_device_forceinline int intersection_get_shader_flags(KernelGlobals *ccl_restrict kg, + const Intersection *isect) +{ + const int prim = kernel_tex_fetch(__prim_index, isect->prim); + int shader = 0; + +#ifdef __HAIR__ + if (kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) +#endif + { + shader = kernel_tex_fetch(__tri_shader, prim); + } +#ifdef __HAIR__ + else { + float4 str = kernel_tex_fetch(__curves, prim); + shader = __float_as_int(str.z); + } +#endif + + return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; +} + +ccl_device_forceinline int intersection_get_shader(KernelGlobals *ccl_restrict kg, + const Intersection *isect) +{ + const int prim = kernel_tex_fetch(__prim_index, isect->prim); + int shader = 0; + +#ifdef __HAIR__ + if (kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) +#endif + { + shader = kernel_tex_fetch(__tri_shader, prim); + } +#ifdef __HAIR__ + else { + float4 str = kernel_tex_fetch(__curves, prim); + shader = __float_as_int(str.z); + } +#endif + + return shader & SHADER_MASK; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h index 341d1e16eb1..99a5a675976 100644 --- a/intern/cycles/kernel/closure/alloc.h +++ b/intern/cycles/kernel/closure/alloc.h @@ -57,14 +57,24 @@ ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size) ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight) { - ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight); + kernel_assert(isfinite3_safe(weight)); - if (sc == NULL) - return NULL; + const float sample_weight = fabsf(average(weight)); + + /* Use comparison this way to help dealing with non-finite weight: if the average is not finite + * we will not allocate new closure. */ + if (sample_weight >= CLOSURE_WEIGHT_CUTOFF) { + ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight); + if (sc == NULL) { + return NULL; + } + + sc->sample_weight = sample_weight; - float sample_weight = fabsf(average(weight)); - sc->sample_weight = sample_weight; - return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL; + return sc; + } + + return NULL; } #ifdef __OSL__ @@ -73,17 +83,27 @@ ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd, float3 weight, void *data) { - ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight); + kernel_assert(isfinite3_safe(weight)); - if (!sc) - return NULL; + const float sample_weight = fabsf(average(weight)); - memcpy((void *)sc, data, size); + /* Use comparison this way to help dealing with non-finite weight: if the average is not finite + * we will not allocate new closure. */ + if (sample_weight >= CLOSURE_WEIGHT_CUTOFF) { + ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight); + if (!sc) { + return NULL; + } - float sample_weight = fabsf(average(weight)); - sc->weight = weight; - sc->sample_weight = sample_weight; - return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL; + memcpy((void *)sc, data, size); + + sc->weight = weight; + sc->sample_weight = sample_weight; + + return sc; + } + + return NULL; } #endif diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index 9650b85a5c2..42a834d2ce3 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -119,11 +119,11 @@ ccl_device_inline bool lamp_light_sample( klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]); float3 axisv = make_float3( klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]); - float3 D = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]); + float3 Ng = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]); float invarea = fabsf(klight->area.invarea); bool is_round = (klight->area.invarea < 0.0f); - if (dot(ls->P - P, D) > 0.0f) { + if (dot(ls->P - P, Ng) > 0.0f) { return false; } @@ -136,19 +136,37 @@ ccl_device_inline bool lamp_light_sample( } else { inplane = ls->P; - ls->pdf = rect_light_sample(P, &ls->P, axisu, axisv, randu, randv, true); + + float3 sample_axisu = axisu; + float3 sample_axisv = axisv; + + if (klight->area.tan_spread > 0.0f) { + if (!light_spread_clamp_area_light( + P, Ng, &ls->P, &sample_axisu, &sample_axisv, klight->area.tan_spread)) { + return false; + } + } + + ls->pdf = rect_light_sample(P, &ls->P, sample_axisu, sample_axisv, randu, randv, true); inplane = ls->P - inplane; } ls->u = dot(inplane, axisu) * (1.0f / dot(axisu, axisu)) + 0.5f; ls->v = dot(inplane, axisv) * (1.0f / dot(axisv, axisv)) + 0.5f; - ls->Ng = D; + ls->Ng = Ng; ls->D = normalize_len(ls->P - P, &ls->t); ls->eval_fac = 0.25f * invarea; + + if (klight->area.tan_spread > 0.0f) { + /* Area Light spread angle attenuation */ + ls->eval_fac *= light_spread_attenuation( + ls->D, ls->Ng, klight->area.tan_spread, klight->area.normalize_spread); + } + if (is_round) { - ls->pdf *= lamp_light_pdf(kg, D, -ls->D, ls->t); + ls->pdf *= lamp_light_pdf(kg, Ng, -ls->D, ls->t); } } } @@ -283,9 +301,28 @@ ccl_device bool lamp_light_eval( ls->pdf = invarea * lamp_light_pdf(kg, Ng, -D, ls->t); } else { - ls->pdf = rect_light_sample(P, &light_P, axisu, axisv, 0, 0, false); + float3 sample_axisu = axisu; + float3 sample_axisv = axisv; + + if (klight->area.tan_spread > 0.0f) { + if (!light_spread_clamp_area_light( + P, Ng, &light_P, &sample_axisu, &sample_axisv, klight->area.tan_spread)) { + return false; + } + } + + ls->pdf = rect_light_sample(P, &light_P, sample_axisu, sample_axisv, 0, 0, false); } ls->eval_fac = 0.25f * invarea; + + if (klight->area.tan_spread > 0.0f) { + /* Area Light spread angle attenuation */ + ls->eval_fac *= light_spread_attenuation( + ls->D, ls->Ng, klight->area.tan_spread, klight->area.normalize_spread); + if (ls->eval_fac == 0.0f) { + return false; + } + } } else { return false; diff --git a/intern/cycles/kernel/kernel_light_common.h b/intern/cycles/kernel/kernel_light_common.h index 39503a4b479..4a683d36226 100644 --- a/intern/cycles/kernel/kernel_light_common.h +++ b/intern/cycles/kernel/kernel_light_common.h @@ -146,6 +146,70 @@ ccl_device float spot_light_attenuation(float3 dir, float spot_angle, float spot return attenuation; } +ccl_device float light_spread_attenuation(const float3 D, + const float3 lightNg, + const float tan_spread, + const float normalize_spread) +{ + /* Model a soft-box grid, computing the ratio of light not hidden by the + * slats of the grid at a given angle. (see D10594). */ + const float cos_a = -dot(D, lightNg); + const float sin_a = safe_sqrtf(1.0f - sqr(cos_a)); + const float tan_a = sin_a / cos_a; + return max((1.0f - (tan_spread * tan_a)) * normalize_spread, 0.0f); +} + +/* Compute subset of area light that actually has an influence on the shading point, to + * reduce noise with low spread. */ +ccl_device bool light_spread_clamp_area_light(const float3 P, + const float3 lightNg, + float3 *lightP, + float3 *axisu, + float3 *axisv, + const float tan_spread) +{ + /* Closest point in area light plane and distance to that plane. */ + const float3 closest_P = P - dot(lightNg, P - *lightP) * lightNg; + const float t = len(closest_P - P); + + /* Radius of circle on area light that actually affects the shading point. */ + const float radius = t / tan_spread; + + /* TODO: would be faster to store as normalized vector + length, also in rect_light_sample. */ + float len_u, len_v; + const float3 u = normalize_len(*axisu, &len_u); + const float3 v = normalize_len(*axisv, &len_v); + + /* Local uv coordinates of closest point. */ + const float closest_u = dot(u, closest_P - *lightP); + const float closest_v = dot(v, closest_P - *lightP); + + /* Compute rectangle encompassing the circle that affects the shading point, + * clamped to the bounds of the area light. */ + const float min_u = max(closest_u - radius, -len_u * 0.5f); + const float max_u = min(closest_u + radius, len_u * 0.5f); + const float min_v = max(closest_v - radius, -len_v * 0.5f); + const float max_v = min(closest_v + radius, len_v * 0.5f); + + /* Skip if rectangle is empty. */ + if (min_u >= max_u || min_v >= max_v) { + return false; + } + + /* Compute new area light center position and axes from rectangle in local + * uv coordinates. */ + const float new_center_u = 0.5f * (min_u + max_u); + const float new_center_v = 0.5f * (min_v + max_v); + const float new_len_u = max_u - min_u; + const float new_len_v = max_v - min_v; + + *lightP = *lightP + new_center_u * u + new_center_v * v; + *axisu = u * new_len_u; + *axisv = v * new_len_v; + + return true; +} + ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3 I, float t) { float cos_pi = dot(Ng, I); diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h index ba25c0e24e4..ce37bd0b15e 100644 --- a/intern/cycles/kernel/kernel_montecarlo.h +++ b/intern/cycles/kernel/kernel_montecarlo.h @@ -195,31 +195,108 @@ ccl_device float2 regular_polygon_sample(float corners, float rotation, float u, ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N) { - float3 R; - float NI = dot(N, I); - float NgR, threshold; - - /* Check if the incident ray is coming from behind normal N. */ - if (NI > 0) { - /* Normal reflection */ - R = (2 * NI) * N - I; - NgR = dot(Ng, R); - - /* Reflection rays may always be at least as shallow as the incoming ray. */ - threshold = min(0.9f * dot(Ng, I), 0.01f); - if (NgR >= threshold) { - return N; + float3 R = 2 * dot(N, I) * N - I; + + /* Reflection rays may always be at least as shallow as the incoming ray. */ + float threshold = min(0.9f * dot(Ng, I), 0.01f); + if (dot(Ng, R) >= threshold) { + return N; + } + + /* Form coordinate system with Ng as the Z axis and N inside the X-Z-plane. + * The X axis is found by normalizing the component of N that's orthogonal to Ng. + * The Y axis isn't actually needed. + */ + float NdotNg = dot(N, Ng); + float3 X = normalize(N - NdotNg * Ng); + + /* Keep math expressions. */ + /* clang-format off */ + /* Calculate N.z and N.x in the local coordinate system. + * + * The goal of this computation is to find a N' that is rotated towards Ng just enough + * to lift R' above the threshold (here called t), therefore dot(R', Ng) = t. + * + * According to the standard reflection equation, + * this means that we want dot(2*dot(N', I)*N' - I, Ng) = t. + * + * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get + * 2*dot(N', I)*N'.z - I.z = t. + * + * The rotation is simple to express in the coordinate system we formed - + * since N lies in the X-Z-plane, we know that N' will also lie in the X-Z-plane, + * so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z . + * + * Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2). + * + * With these simplifications, + * we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t. + * + * The only unknown here is N'.z, so we can solve for that. + * + * The equation has four solutions in general: + * + * N'.z = +-sqrt(0.5*(+-sqrt(I.x^2*(I.x^2 + I.z^2 - t^2)) + t*I.z + I.x^2 + I.z^2)/(I.x^2 + I.z^2)) + * We can simplify this expression a bit by grouping terms: + * + * a = I.x^2 + I.z^2 + * b = sqrt(I.x^2 * (a - t^2)) + * c = I.z*t + a + * N'.z = +-sqrt(0.5*(+-b + c)/a) + * + * Two solutions can immediately be discarded because they're negative so N' would lie in the + * lower hemisphere. + */ + /* clang-format on */ + + float Ix = dot(I, X), Iz = dot(I, Ng); + float Ix2 = sqr(Ix), Iz2 = sqr(Iz); + float a = Ix2 + Iz2; + + float b = safe_sqrtf(Ix2 * (a - sqr(threshold))); + float c = Iz * threshold + a; + + /* Evaluate both solutions. + * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than + * one), so check for that first. If no option is viable (might happen in extreme cases like N + * being in the wrong hemisphere), give up and return Ng. */ + float fac = 0.5f / a; + float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c); + bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f)); + bool valid2 = (N2_z2 > 1e-5f) && (N2_z2 <= (1.0f + 1e-5f)); + + float2 N_new; + if (valid1 && valid2) { + /* If both are possible, do the expensive reflection-based check. */ + float2 N1 = make_float2(safe_sqrtf(1.0f - N1_z2), safe_sqrtf(N1_z2)); + float2 N2 = make_float2(safe_sqrtf(1.0f - N2_z2), safe_sqrtf(N2_z2)); + + float R1 = 2 * (N1.x * Ix + N1.y * Iz) * N1.y - Iz; + float R2 = 2 * (N2.x * Ix + N2.y * Iz) * N2.y - Iz; + + valid1 = (R1 >= 1e-5f); + valid2 = (R2 >= 1e-5f); + if (valid1 && valid2) { + /* If both solutions are valid, return the one with the shallower reflection since it will be + * closer to the input (if the original reflection wasn't shallow, we would not be in this + * part of the function). */ + N_new = (R1 < R2) ? N1 : N2; } + else { + /* If only one reflection is valid (= positive), pick that one. */ + N_new = (R1 > R2) ? N1 : N2; + } + } + else if (valid1 || valid2) { + /* Only one solution passes the N'.z criterium, so pick that one. */ + float Nz2 = valid1 ? N1_z2 : N2_z2; + N_new = make_float2(safe_sqrtf(1.0f - Nz2), safe_sqrtf(Nz2)); } else { - /* Bad incident */ - R = -I; - NgR = dot(Ng, R); - threshold = 0.01f; + return Ng; } - R = R + Ng * (threshold - NgR); /* Lift the reflection above the threshold. */ - return normalize(I * len(R) + R * len(I)); /* Find a bisector. */ + return N_new.x * X + N_new.y * Ng; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 5681510fc25..dd2390808ea 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -65,7 +65,6 @@ ccl_device_forceinline bool kernel_path_scene_intersect(KernelGlobals *kg, uint visibility = path_state_ray_visibility(kg, state); if (path_state_ao_bounce(kg, state)) { - visibility = PATH_RAY_SHADOW; ray->t = kernel_data.background.ao_distance; } @@ -416,7 +415,13 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, break; } else if (path_state_ao_bounce(kg, state)) { - break; + if (intersection_get_shader_flags(kg, &isect) & + (SD_HAS_TRANSPARENT_SHADOW | SD_HAS_EMISSION)) { + state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; + } + else { + break; + } } /* Setup shader data. */ @@ -554,7 +559,13 @@ ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg, break; } else if (path_state_ao_bounce(kg, state)) { - break; + if (intersection_get_shader_flags(kg, &isect) & + (SD_HAS_TRANSPARENT_SHADOW | SD_HAS_EMISSION)) { + state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; + } + else { + break; + } } /* Setup shader data. */ diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index c75958e79c5..dd922b86722 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -25,8 +25,9 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd, const ShaderClosure *sc, float disk_r, float r, bool all) { - /* this is the veach one-sample model with balance heuristic, some pdf - * factors drop out when using balance heuristic weighting */ + /* This is the Veach one-sample model with balance heuristic, some pdf + * factors drop out when using balance heuristic weighting. For branched + * path tracing (all) we sample all closure and don't use MIS. */ float3 eval_sum = zero_float3(); float pdf_sum = 0.0f; float sample_weight_inv = 0.0f; @@ -65,6 +66,30 @@ subsurface_scatter_eval(ShaderData *sd, const ShaderClosure *sc, float disk_r, f return (pdf_sum > 0.0f) ? eval_sum / pdf_sum : zero_float3(); } +ccl_device_inline float3 subsurface_scatter_walk_eval(ShaderData *sd, + const ShaderClosure *sc, + float3 throughput, + bool all) +{ + /* This is the Veach one-sample model with balance heuristic, some pdf + * factors drop out when using balance heuristic weighting. For branched + * path tracing (all) we sample all closure and don't use MIS. */ + if (!all) { + float bssrdf_weight = 0.0f; + float weight = sc->sample_weight; + + for (int i = 0; i < sd->num_closure; i++) { + sc = &sd->closure[i]; + + if (CLOSURE_IS_BSSRDF(sc->type)) { + bssrdf_weight += sc->sample_weight; + } + } + throughput *= bssrdf_weight / weight; + } + return throughput; +} + /* replace closures with a single diffuse bsdf closure after scatter step */ ccl_device void subsurface_scatter_setup_diffuse_bsdf( KernelGlobals *kg, ShaderData *sd, ClosureType type, float roughness, float3 weight, float3 N) @@ -437,7 +462,8 @@ ccl_device_noinline ccl_addr_space PathState *state, const ShaderClosure *sc, const float bssrdf_u, - const float bssrdf_v) + const float bssrdf_v, + bool all) { /* Sample diffuse surface scatter into the object. */ float3 D; @@ -605,6 +631,13 @@ ccl_device_noinline if (hit) { t = ray->t; } + else if (bounce == 0) { + /* Restore original position if nothing was hit after the first bounce, + * without the ray_offset() that was added to avoid self-intersection. + * Otherwise if that offset is relatively large compared to the scattering + * radius, we never go back up high enough to exit the surface. */ + ray->P = sd->P; + } /* Advance to new scatter location. */ ray->P += t * ray->D; @@ -662,7 +695,7 @@ ccl_device_noinline /* TODO: gain back performance lost from merging with disk BSSRDF. We * only need to return on hit so this indirect ray push/pop overhead * is not actually needed, but it does keep the code simpler. */ - ss_isect->weight[0] = throughput; + ss_isect->weight[0] = subsurface_scatter_walk_eval(sd, sc, throughput, all); #ifdef __SPLIT_KERNEL__ ss_isect->ray = *ray; #endif @@ -684,7 +717,7 @@ ccl_device_inline int subsurface_scatter_multi_intersect(KernelGlobals *kg, return subsurface_scatter_disk(kg, ss_isect, sd, sc, lcg_state, bssrdf_u, bssrdf_v, all); } else { - return subsurface_random_walk(kg, ss_isect, sd, state, sc, bssrdf_u, bssrdf_v); + return subsurface_random_walk(kg, ss_isect, sd, state, sc, bssrdf_u, bssrdf_v, all); } } diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index df56360b1df..74fa2826cd4 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -99,27 +99,23 @@ CCL_NAMESPACE_BEGIN #define __AO__ #define __PASSES__ #define __HAIR__ - -/* Without these we get an AO render, used by OpenCL preview kernel. */ -#ifndef __KERNEL_AO_PREVIEW__ -# define __SVM__ -# define __EMISSION__ -# define __HOLDOUT__ -# define __MULTI_CLOSURE__ -# define __TRANSPARENT_SHADOWS__ -# define __BACKGROUND_MIS__ -# define __LAMP_MIS__ -# define __CAMERA_MOTION__ -# define __OBJECT_MOTION__ -# define __BAKING__ -# define __PRINCIPLED__ -# define __SUBSURFACE__ -# define __VOLUME__ -# define __VOLUME_SCATTER__ -# define __CMJ__ -# define __SHADOW_RECORD_ALL__ -# define __BRANCHED_PATH__ -#endif +#define __SVM__ +#define __EMISSION__ +#define __HOLDOUT__ +#define __MULTI_CLOSURE__ +#define __TRANSPARENT_SHADOWS__ +#define __BACKGROUND_MIS__ +#define __LAMP_MIS__ +#define __CAMERA_MOTION__ +#define __OBJECT_MOTION__ +#define __BAKING__ +#define __PRINCIPLED__ +#define __SUBSURFACE__ +#define __VOLUME__ +#define __VOLUME_SCATTER__ +#define __CMJ__ +#define __SHADOW_RECORD_ALL__ +#define __BRANCHED_PATH__ /* Device specific features */ #ifdef __KERNEL_CPU__ @@ -895,6 +891,8 @@ enum ShaderDataFlag { SD_HAS_CONSTANT_EMISSION = (1 << 27), /* Needs to access attributes for volume rendering */ SD_NEED_VOLUME_ATTRIBUTES = (1 << 28), + /* Shader has emission */ + SD_HAS_EMISSION = (1 << 29), SD_SHADER_FLAGS = (SD_USE_MIS | SD_HAS_TRANSPARENT_SHADOW | SD_HAS_VOLUME | SD_HAS_ONLY_VOLUME | SD_HETEROGENEOUS_VOLUME | SD_HAS_BSSRDF_BUMP | SD_VOLUME_EQUIANGULAR | @@ -1501,9 +1499,9 @@ typedef struct KernelAreaLight { float axisu[3]; float invarea; float axisv[3]; - float pad1; + float tan_spread; float dir[3]; - float pad2; + float normalize_spread; } KernelAreaLight; typedef struct KernelDistantLight { diff --git a/intern/cycles/kernel/shaders/node_noise_texture.osl b/intern/cycles/kernel/shaders/node_noise_texture.osl index 61c0216910b..01196ab633a 100644 --- a/intern/cycles/kernel/shaders/node_noise_texture.osl +++ b/intern/cycles/kernel/shaders/node_noise_texture.osl @@ -25,7 +25,7 @@ * coordinates to act as a seed since the noise functions don't have seed values. * A seed value is needed for generating distortion textures and color outputs. * The offset's components are in the range [100, 200], not too high to cause - * bad precision and not to small to be noticeable. We use float seed because + * bad precision and not too small to be noticeable. We use float seed because * OSL only support float hashes. */ diff --git a/intern/cycles/kernel/shaders/node_vector_math.osl b/intern/cycles/kernel/shaders/node_vector_math.osl index 3963c23ea9c..c08d75b99ef 100644 --- a/intern/cycles/kernel/shaders/node_vector_math.osl +++ b/intern/cycles/kernel/shaders/node_vector_math.osl @@ -52,6 +52,9 @@ shader node_vector_math(string math_type = "add", else if (math_type == "faceforward") { Vector = compatible_faceforward(Vector1, Vector2, Vector3); } + else if (math_type == "multiply_add") { + Vector = Vector1 * Vector2 + Vector3; + } else if (math_type == "dot_product") { Value = dot(Vector1, Vector2); } diff --git a/intern/cycles/kernel/shaders/stdcycles.h b/intern/cycles/kernel/shaders/stdcycles.h index af7b645d9a2..dd604da68ce 100644 --- a/intern/cycles/kernel/shaders/stdcycles.h +++ b/intern/cycles/kernel/shaders/stdcycles.h @@ -84,30 +84,67 @@ closure color principled_hair(normal N, closure color henyey_greenstein(float g) BUILTIN; closure color absorption() BUILTIN; -normal ensure_valid_reflection(normal Ng, normal I, normal N) +normal ensure_valid_reflection(normal Ng, vector I, normal N) { /* The implementation here mirrors the one in kernel_montecarlo.h, * check there for an explanation of the algorithm. */ - vector R; - float NI = dot(N, I); - float NgR, threshold; - - if (NI > 0) { - R = (2 * NI) * N - I; - NgR = dot(Ng, R); - threshold = min(0.9 * dot(Ng, I), 0.01); - if (NgR >= threshold) { - return N; + + float sqr(float x) + { + return x * x; + } + + vector R = 2 * dot(N, I) * N - I; + + float threshold = min(0.9 * dot(Ng, I), 0.01); + if (dot(Ng, R) >= threshold) { + return N; + } + + float NdotNg = dot(N, Ng); + vector X = normalize(N - NdotNg * Ng); + + float Ix = dot(I, X), Iz = dot(I, Ng); + float Ix2 = sqr(Ix), Iz2 = sqr(Iz); + float a = Ix2 + Iz2; + + float b = sqrt(Ix2 * (a - sqr(threshold))); + float c = Iz * threshold + a; + + float fac = 0.5 / a; + float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c); + int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5)); + int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5)); + + float N_new_x, N_new_z; + if (valid1 && valid2) { + float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2); + float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2); + + float R1 = 2 * (N1_x * Ix + N1_z * Iz) * N1_z - Iz; + float R2 = 2 * (N2_x * Ix + N2_z * Iz) * N2_z - Iz; + + valid1 = (R1 >= 1e-5); + valid2 = (R2 >= 1e-5); + if (valid1 && valid2) { + N_new_x = (R1 < R2) ? N1_x : N2_x; + N_new_z = (R1 < R2) ? N1_z : N2_z; + } + else { + N_new_x = (R1 > R2) ? N1_x : N2_x; + N_new_z = (R1 > R2) ? N1_z : N2_z; } } + else if (valid1 || valid2) { + float Nz2 = valid1 ? N1_z2 : N2_z2; + N_new_x = sqrt(1.0 - Nz2); + N_new_z = sqrt(Nz2); + } else { - R = -I; - NgR = dot(Ng, R); - threshold = 0.01; + return Ng; } - R = R + Ng * (threshold - NgR); - return normalize(I * length(R) + R * length(I)); + return N_new_x * X + N_new_z * Ng; } #endif /* CCL_STDOSL_H */ diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h index dda2e50f916..733ea28f9e5 100644 --- a/intern/cycles/kernel/svm/svm_math.h +++ b/intern/cycles/kernel/svm/svm_math.h @@ -58,7 +58,8 @@ ccl_device void svm_node_vector_math(KernelGlobals *kg, float3 vector; /* 3 Vector Operators */ - if (type == NODE_VECTOR_MATH_WRAP || type == NODE_VECTOR_MATH_FACEFORWARD) { + if (type == NODE_VECTOR_MATH_WRAP || type == NODE_VECTOR_MATH_FACEFORWARD || + type == NODE_VECTOR_MATH_MULTIPLY_ADD) { uint4 extra_node = read_node(kg, offset); c = stack_load_float3(stack, extra_node.x); } diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h index 389c44ab1da..9e654f2247f 100644 --- a/intern/cycles/kernel/svm/svm_math_util.h +++ b/intern/cycles/kernel/svm/svm_math_util.h @@ -52,6 +52,9 @@ ccl_device void svm_vector_math(float *value, case NODE_VECTOR_MATH_FACEFORWARD: *vector = faceforward(a, b, c); break; + case NODE_VECTOR_MATH_MULTIPLY_ADD: + *vector = a * b + c; + break; case NODE_VECTOR_MATH_DOT_PRODUCT: *value = dot(a, b); break; @@ -242,12 +245,15 @@ ccl_device float3 svm_math_blackbody_color(float t) return make_float3(4.70366907f, 0.0f, 0.0f); } + /* Manually align for readability. */ + /* clang-format off */ int i = (t >= 6365.0f) ? 5 : (t >= 3315.0f) ? 4 : (t >= 1902.0f) ? 3 : (t >= 1449.0f) ? 2 : (t >= 1167.0f) ? 1 : 0; + /* clang-format on */ ccl_constant float *r = blackbody_table_r[i]; ccl_constant float *g = blackbody_table_g[i]; diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h index 920dd7d9d02..61fd9553802 100644 --- a/intern/cycles/kernel/svm/svm_noisetex.h +++ b/intern/cycles/kernel/svm/svm_noisetex.h @@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN * coordinates to act as a seed since the noise functions don't have seed values. * A seed value is needed for generating distortion textures and color outputs. * The offset's components are in the range [100, 200], not too high to cause - * bad precision and not to small to be noticeable. We use float seed because + * bad precision and not too small to be noticeable. We use float seed because * OSL only support float hashes. */ diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h index 4fe940f1a67..fc46bb584be 100644 --- a/intern/cycles/kernel/svm/svm_tex_coord.h +++ b/intern/cycles/kernel/svm/svm_tex_coord.h @@ -370,10 +370,13 @@ ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack if (direction_type == NODE_TANGENT_UVMAP) { /* UV map */ - if (desc.offset == ATTR_STD_NOT_FOUND) - tangent = make_float3(0.0f, 0.0f, 0.0f); - else + if (desc.offset == ATTR_STD_NOT_FOUND) { + stack_store_float3(stack, tangent_offset, zero_float3()); + return; + } + else { tangent = attribute_value; + } } else { /* radial */ diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h index 64a8f82a094..062afcfa5ac 100644 --- a/intern/cycles/kernel/svm/svm_types.h +++ b/intern/cycles/kernel/svm/svm_types.h @@ -341,6 +341,7 @@ typedef enum NodeVectorMathType { NODE_VECTOR_MATH_TANGENT, NODE_VECTOR_MATH_REFRACT, NODE_VECTOR_MATH_FACEFORWARD, + NODE_VECTOR_MATH_MULTIPLY_ADD, } NodeVectorMathType; typedef enum NodeClampType { |