Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt1
-rw-r--r--intern/cycles/kernel/bvh/bvh.h98
-rw-r--r--intern/cycles/kernel/bvh/bvh_shadow_all.h19
-rw-r--r--intern/cycles/kernel/bvh/bvh_util.h162
-rw-r--r--intern/cycles/kernel/closure/alloc.h48
-rw-r--r--intern/cycles/kernel/kernel_light.h49
-rw-r--r--intern/cycles/kernel/kernel_light_common.h64
-rw-r--r--intern/cycles/kernel/kernel_montecarlo.h117
-rw-r--r--intern/cycles/kernel/kernel_path.h17
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h43
-rw-r--r--intern/cycles/kernel/kernel_types.h44
-rw-r--r--intern/cycles/kernel/shaders/node_noise_texture.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_vector_math.osl3
-rw-r--r--intern/cycles/kernel/shaders/stdcycles.h69
-rw-r--r--intern/cycles/kernel/svm/svm_math.h3
-rw-r--r--intern/cycles/kernel/svm/svm_math_util.h6
-rw-r--r--intern/cycles/kernel/svm/svm_noisetex.h2
-rw-r--r--intern/cycles/kernel/svm/svm_tex_coord.h9
-rw-r--r--intern/cycles/kernel/svm/svm_types.h1
19 files changed, 552 insertions, 205 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index f6b4b963a7a..ea0f16c9233 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -93,6 +93,7 @@ set(SRC_BVH_HEADERS
bvh/bvh_local.h
bvh/bvh_traversal.h
bvh/bvh_types.h
+ bvh/bvh_util.h
bvh/bvh_volume.h
bvh/bvh_volume_all.h
bvh/bvh_embree.h
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 3049f243ae9..3a3f38539c5 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -29,9 +29,10 @@
# include "kernel/bvh/bvh_embree.h"
#endif
-CCL_NAMESPACE_BEGIN
-
#include "kernel/bvh/bvh_types.h"
+#include "kernel/bvh/bvh_util.h"
+
+CCL_NAMESPACE_BEGIN
#ifndef __KERNEL_OPTIX__
@@ -533,97 +534,4 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
}
#endif /* __VOLUME_RECORD_ALL__ */
-/* Ray offset to avoid self intersection.
- *
- * This function should be used to compute a modified ray start position for
- * rays leaving from a surface. */
-
-ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
-{
-#ifdef __INTERSECTION_REFINE__
- const float epsilon_f = 1e-5f;
- /* ideally this should match epsilon_f, but instancing and motion blur
- * precision makes it problematic */
- const float epsilon_test = 1.0f;
- const int epsilon_i = 32;
-
- float3 res;
-
- /* x component */
- if (fabsf(P.x) < epsilon_test) {
- res.x = P.x + Ng.x * epsilon_f;
- }
- else {
- uint ix = __float_as_uint(P.x);
- ix += ((ix ^ __float_as_uint(Ng.x)) >> 31) ? -epsilon_i : epsilon_i;
- res.x = __uint_as_float(ix);
- }
-
- /* y component */
- if (fabsf(P.y) < epsilon_test) {
- res.y = P.y + Ng.y * epsilon_f;
- }
- else {
- uint iy = __float_as_uint(P.y);
- iy += ((iy ^ __float_as_uint(Ng.y)) >> 31) ? -epsilon_i : epsilon_i;
- res.y = __uint_as_float(iy);
- }
-
- /* z component */
- if (fabsf(P.z) < epsilon_test) {
- res.z = P.z + Ng.z * epsilon_f;
- }
- else {
- uint iz = __float_as_uint(P.z);
- iz += ((iz ^ __float_as_uint(Ng.z)) >> 31) ? -epsilon_i : epsilon_i;
- res.z = __uint_as_float(iz);
- }
-
- return res;
-#else
- const float epsilon_f = 1e-4f;
- return P + epsilon_f * Ng;
-#endif
-}
-
-#if defined(__VOLUME_RECORD_ALL__) || (defined(__SHADOW_RECORD_ALL__) && defined(__KERNEL_CPU__))
-/* ToDo: Move to another file? */
-ccl_device int intersections_compare(const void *a, const void *b)
-{
- const Intersection *isect_a = (const Intersection *)a;
- const Intersection *isect_b = (const Intersection *)b;
-
- if (isect_a->t < isect_b->t)
- return -1;
- else if (isect_a->t > isect_b->t)
- return 1;
- else
- return 0;
-}
-#endif
-
-#if defined(__SHADOW_RECORD_ALL__)
-ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
-{
-# ifdef __KERNEL_GPU__
- /* Use bubble sort which has more friendly memory pattern on GPU. */
- bool swapped;
- do {
- swapped = false;
- for (int j = 0; j < num_hits - 1; ++j) {
- if (hits[j].t > hits[j + 1].t) {
- struct Intersection tmp = hits[j];
- hits[j] = hits[j + 1];
- hits[j + 1] = tmp;
- swapped = true;
- }
- }
- --num_hits;
- } while (swapped);
-# else
- qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
-# endif
-}
-#endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index dccd257d2de..2e94b1d7c37 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -180,25 +180,10 @@ ccl_device_inline
/* todo: optimize so primitive visibility flag indicates if
* the primitive has a transparent shadow shader? */
- int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
- int shader = 0;
-
-#ifdef __HAIR__
- if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
- {
- shader = kernel_tex_fetch(__tri_shader, prim);
- }
-#ifdef __HAIR__
- else {
- float4 str = kernel_tex_fetch(__curves, prim);
- shader = __float_as_int(str.z);
- }
-#endif
- int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
+ const int flags = intersection_get_shader_flags(kg, isect_array);
/* if no transparent shadows, all light is blocked */
- if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
+ if (!(flags & SD_HAS_TRANSPARENT_SHADOW)) {
return true;
}
/* if maximum number of hits reached, block all light */
diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h
new file mode 100644
index 00000000000..a694e4dc259
--- /dev/null
+++ b/intern/cycles/kernel/bvh/bvh_util.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Ray offset to avoid self intersection.
+ *
+ * This function should be used to compute a modified ray start position for
+ * rays leaving from a surface. */
+
+ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
+{
+#ifdef __INTERSECTION_REFINE__
+ const float epsilon_f = 1e-5f;
+ /* ideally this should match epsilon_f, but instancing and motion blur
+ * precision makes it problematic */
+ const float epsilon_test = 1.0f;
+ const int epsilon_i = 32;
+
+ float3 res;
+
+ /* x component */
+ if (fabsf(P.x) < epsilon_test) {
+ res.x = P.x + Ng.x * epsilon_f;
+ }
+ else {
+ uint ix = __float_as_uint(P.x);
+ ix += ((ix ^ __float_as_uint(Ng.x)) >> 31) ? -epsilon_i : epsilon_i;
+ res.x = __uint_as_float(ix);
+ }
+
+ /* y component */
+ if (fabsf(P.y) < epsilon_test) {
+ res.y = P.y + Ng.y * epsilon_f;
+ }
+ else {
+ uint iy = __float_as_uint(P.y);
+ iy += ((iy ^ __float_as_uint(Ng.y)) >> 31) ? -epsilon_i : epsilon_i;
+ res.y = __uint_as_float(iy);
+ }
+
+ /* z component */
+ if (fabsf(P.z) < epsilon_test) {
+ res.z = P.z + Ng.z * epsilon_f;
+ }
+ else {
+ uint iz = __float_as_uint(P.z);
+ iz += ((iz ^ __float_as_uint(Ng.z)) >> 31) ? -epsilon_i : epsilon_i;
+ res.z = __uint_as_float(iz);
+ }
+
+ return res;
+#else
+ const float epsilon_f = 1e-4f;
+ return P + epsilon_f * Ng;
+#endif
+}
+
+#if defined(__VOLUME_RECORD_ALL__) || (defined(__SHADOW_RECORD_ALL__) && defined(__KERNEL_CPU__))
+/* ToDo: Move to another file? */
+ccl_device int intersections_compare(const void *a, const void *b)
+{
+ const Intersection *isect_a = (const Intersection *)a;
+ const Intersection *isect_b = (const Intersection *)b;
+
+ if (isect_a->t < isect_b->t)
+ return -1;
+ else if (isect_a->t > isect_b->t)
+ return 1;
+ else
+ return 0;
+}
+#endif
+
+#if defined(__SHADOW_RECORD_ALL__)
+ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
+{
+ kernel_assert(num_hits > 0);
+
+# ifdef __KERNEL_GPU__
+ /* Use bubble sort which has more friendly memory pattern on GPU. */
+ bool swapped;
+ do {
+ swapped = false;
+ for (int j = 0; j < num_hits - 1; ++j) {
+ if (hits[j].t > hits[j + 1].t) {
+ struct Intersection tmp = hits[j];
+ hits[j] = hits[j + 1];
+ hits[j + 1] = tmp;
+ swapped = true;
+ }
+ }
+ --num_hits;
+ } while (swapped);
+# else
+ qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
+# endif
+}
+#endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */
+
+/* Utility to quickly get a shader flags from an intersection. */
+
+ccl_device_forceinline int intersection_get_shader_flags(KernelGlobals *ccl_restrict kg,
+ const Intersection *isect)
+{
+ const int prim = kernel_tex_fetch(__prim_index, isect->prim);
+ int shader = 0;
+
+#ifdef __HAIR__
+ if (kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE)
+#endif
+ {
+ shader = kernel_tex_fetch(__tri_shader, prim);
+ }
+#ifdef __HAIR__
+ else {
+ float4 str = kernel_tex_fetch(__curves, prim);
+ shader = __float_as_int(str.z);
+ }
+#endif
+
+ return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
+}
+
+ccl_device_forceinline int intersection_get_shader(KernelGlobals *ccl_restrict kg,
+ const Intersection *isect)
+{
+ const int prim = kernel_tex_fetch(__prim_index, isect->prim);
+ int shader = 0;
+
+#ifdef __HAIR__
+ if (kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE)
+#endif
+ {
+ shader = kernel_tex_fetch(__tri_shader, prim);
+ }
+#ifdef __HAIR__
+ else {
+ float4 str = kernel_tex_fetch(__curves, prim);
+ shader = __float_as_int(str.z);
+ }
+#endif
+
+ return shader & SHADER_MASK;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h
index 341d1e16eb1..99a5a675976 100644
--- a/intern/cycles/kernel/closure/alloc.h
+++ b/intern/cycles/kernel/closure/alloc.h
@@ -57,14 +57,24 @@ ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size)
ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight)
{
- ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+ kernel_assert(isfinite3_safe(weight));
- if (sc == NULL)
- return NULL;
+ const float sample_weight = fabsf(average(weight));
+
+ /* Use comparison this way to help dealing with non-finite weight: if the average is not finite
+ * we will not allocate new closure. */
+ if (sample_weight >= CLOSURE_WEIGHT_CUTOFF) {
+ ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+ if (sc == NULL) {
+ return NULL;
+ }
+
+ sc->sample_weight = sample_weight;
- float sample_weight = fabsf(average(weight));
- sc->sample_weight = sample_weight;
- return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL;
+ return sc;
+ }
+
+ return NULL;
}
#ifdef __OSL__
@@ -73,17 +83,27 @@ ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd,
float3 weight,
void *data)
{
- ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+ kernel_assert(isfinite3_safe(weight));
- if (!sc)
- return NULL;
+ const float sample_weight = fabsf(average(weight));
- memcpy((void *)sc, data, size);
+ /* Use comparison this way to help dealing with non-finite weight: if the average is not finite
+ * we will not allocate new closure. */
+ if (sample_weight >= CLOSURE_WEIGHT_CUTOFF) {
+ ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+ if (!sc) {
+ return NULL;
+ }
- float sample_weight = fabsf(average(weight));
- sc->weight = weight;
- sc->sample_weight = sample_weight;
- return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL;
+ memcpy((void *)sc, data, size);
+
+ sc->weight = weight;
+ sc->sample_weight = sample_weight;
+
+ return sc;
+ }
+
+ return NULL;
}
#endif
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 9650b85a5c2..42a834d2ce3 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -119,11 +119,11 @@ ccl_device_inline bool lamp_light_sample(
klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
float3 axisv = make_float3(
klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
- float3 D = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
+ float3 Ng = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
float invarea = fabsf(klight->area.invarea);
bool is_round = (klight->area.invarea < 0.0f);
- if (dot(ls->P - P, D) > 0.0f) {
+ if (dot(ls->P - P, Ng) > 0.0f) {
return false;
}
@@ -136,19 +136,37 @@ ccl_device_inline bool lamp_light_sample(
}
else {
inplane = ls->P;
- ls->pdf = rect_light_sample(P, &ls->P, axisu, axisv, randu, randv, true);
+
+ float3 sample_axisu = axisu;
+ float3 sample_axisv = axisv;
+
+ if (klight->area.tan_spread > 0.0f) {
+ if (!light_spread_clamp_area_light(
+ P, Ng, &ls->P, &sample_axisu, &sample_axisv, klight->area.tan_spread)) {
+ return false;
+ }
+ }
+
+ ls->pdf = rect_light_sample(P, &ls->P, sample_axisu, sample_axisv, randu, randv, true);
inplane = ls->P - inplane;
}
ls->u = dot(inplane, axisu) * (1.0f / dot(axisu, axisu)) + 0.5f;
ls->v = dot(inplane, axisv) * (1.0f / dot(axisv, axisv)) + 0.5f;
- ls->Ng = D;
+ ls->Ng = Ng;
ls->D = normalize_len(ls->P - P, &ls->t);
ls->eval_fac = 0.25f * invarea;
+
+ if (klight->area.tan_spread > 0.0f) {
+ /* Area Light spread angle attenuation */
+ ls->eval_fac *= light_spread_attenuation(
+ ls->D, ls->Ng, klight->area.tan_spread, klight->area.normalize_spread);
+ }
+
if (is_round) {
- ls->pdf *= lamp_light_pdf(kg, D, -ls->D, ls->t);
+ ls->pdf *= lamp_light_pdf(kg, Ng, -ls->D, ls->t);
}
}
}
@@ -283,9 +301,28 @@ ccl_device bool lamp_light_eval(
ls->pdf = invarea * lamp_light_pdf(kg, Ng, -D, ls->t);
}
else {
- ls->pdf = rect_light_sample(P, &light_P, axisu, axisv, 0, 0, false);
+ float3 sample_axisu = axisu;
+ float3 sample_axisv = axisv;
+
+ if (klight->area.tan_spread > 0.0f) {
+ if (!light_spread_clamp_area_light(
+ P, Ng, &light_P, &sample_axisu, &sample_axisv, klight->area.tan_spread)) {
+ return false;
+ }
+ }
+
+ ls->pdf = rect_light_sample(P, &light_P, sample_axisu, sample_axisv, 0, 0, false);
}
ls->eval_fac = 0.25f * invarea;
+
+ if (klight->area.tan_spread > 0.0f) {
+ /* Area Light spread angle attenuation */
+ ls->eval_fac *= light_spread_attenuation(
+ ls->D, ls->Ng, klight->area.tan_spread, klight->area.normalize_spread);
+ if (ls->eval_fac == 0.0f) {
+ return false;
+ }
+ }
}
else {
return false;
diff --git a/intern/cycles/kernel/kernel_light_common.h b/intern/cycles/kernel/kernel_light_common.h
index 39503a4b479..4a683d36226 100644
--- a/intern/cycles/kernel/kernel_light_common.h
+++ b/intern/cycles/kernel/kernel_light_common.h
@@ -146,6 +146,70 @@ ccl_device float spot_light_attenuation(float3 dir, float spot_angle, float spot
return attenuation;
}
+ccl_device float light_spread_attenuation(const float3 D,
+ const float3 lightNg,
+ const float tan_spread,
+ const float normalize_spread)
+{
+ /* Model a soft-box grid, computing the ratio of light not hidden by the
+ * slats of the grid at a given angle. (see D10594). */
+ const float cos_a = -dot(D, lightNg);
+ const float sin_a = safe_sqrtf(1.0f - sqr(cos_a));
+ const float tan_a = sin_a / cos_a;
+ return max((1.0f - (tan_spread * tan_a)) * normalize_spread, 0.0f);
+}
+
+/* Compute subset of area light that actually has an influence on the shading point, to
+ * reduce noise with low spread. */
+ccl_device bool light_spread_clamp_area_light(const float3 P,
+ const float3 lightNg,
+ float3 *lightP,
+ float3 *axisu,
+ float3 *axisv,
+ const float tan_spread)
+{
+ /* Closest point in area light plane and distance to that plane. */
+ const float3 closest_P = P - dot(lightNg, P - *lightP) * lightNg;
+ const float t = len(closest_P - P);
+
+ /* Radius of circle on area light that actually affects the shading point. */
+ const float radius = t / tan_spread;
+
+ /* TODO: would be faster to store as normalized vector + length, also in rect_light_sample. */
+ float len_u, len_v;
+ const float3 u = normalize_len(*axisu, &len_u);
+ const float3 v = normalize_len(*axisv, &len_v);
+
+ /* Local uv coordinates of closest point. */
+ const float closest_u = dot(u, closest_P - *lightP);
+ const float closest_v = dot(v, closest_P - *lightP);
+
+ /* Compute rectangle encompassing the circle that affects the shading point,
+ * clamped to the bounds of the area light. */
+ const float min_u = max(closest_u - radius, -len_u * 0.5f);
+ const float max_u = min(closest_u + radius, len_u * 0.5f);
+ const float min_v = max(closest_v - radius, -len_v * 0.5f);
+ const float max_v = min(closest_v + radius, len_v * 0.5f);
+
+ /* Skip if rectangle is empty. */
+ if (min_u >= max_u || min_v >= max_v) {
+ return false;
+ }
+
+ /* Compute new area light center position and axes from rectangle in local
+ * uv coordinates. */
+ const float new_center_u = 0.5f * (min_u + max_u);
+ const float new_center_v = 0.5f * (min_v + max_v);
+ const float new_len_u = max_u - min_u;
+ const float new_len_v = max_v - min_v;
+
+ *lightP = *lightP + new_center_u * u + new_center_v * v;
+ *axisu = u * new_len_u;
+ *axisv = v * new_len_v;
+
+ return true;
+}
+
ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3 I, float t)
{
float cos_pi = dot(Ng, I);
diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h
index ba25c0e24e4..ce37bd0b15e 100644
--- a/intern/cycles/kernel/kernel_montecarlo.h
+++ b/intern/cycles/kernel/kernel_montecarlo.h
@@ -195,31 +195,108 @@ ccl_device float2 regular_polygon_sample(float corners, float rotation, float u,
ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
{
- float3 R;
- float NI = dot(N, I);
- float NgR, threshold;
-
- /* Check if the incident ray is coming from behind normal N. */
- if (NI > 0) {
- /* Normal reflection */
- R = (2 * NI) * N - I;
- NgR = dot(Ng, R);
-
- /* Reflection rays may always be at least as shallow as the incoming ray. */
- threshold = min(0.9f * dot(Ng, I), 0.01f);
- if (NgR >= threshold) {
- return N;
+ float3 R = 2 * dot(N, I) * N - I;
+
+ /* Reflection rays may always be at least as shallow as the incoming ray. */
+ float threshold = min(0.9f * dot(Ng, I), 0.01f);
+ if (dot(Ng, R) >= threshold) {
+ return N;
+ }
+
+ /* Form coordinate system with Ng as the Z axis and N inside the X-Z-plane.
+ * The X axis is found by normalizing the component of N that's orthogonal to Ng.
+ * The Y axis isn't actually needed.
+ */
+ float NdotNg = dot(N, Ng);
+ float3 X = normalize(N - NdotNg * Ng);
+
+ /* Keep math expressions. */
+ /* clang-format off */
+ /* Calculate N.z and N.x in the local coordinate system.
+ *
+ * The goal of this computation is to find a N' that is rotated towards Ng just enough
+ * to lift R' above the threshold (here called t), therefore dot(R', Ng) = t.
+ *
+ * According to the standard reflection equation,
+ * this means that we want dot(2*dot(N', I)*N' - I, Ng) = t.
+ *
+ * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get
+ * 2*dot(N', I)*N'.z - I.z = t.
+ *
+ * The rotation is simple to express in the coordinate system we formed -
+ * since N lies in the X-Z-plane, we know that N' will also lie in the X-Z-plane,
+ * so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z .
+ *
+ * Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2).
+ *
+ * With these simplifications,
+ * we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t.
+ *
+ * The only unknown here is N'.z, so we can solve for that.
+ *
+ * The equation has four solutions in general:
+ *
+ * N'.z = +-sqrt(0.5*(+-sqrt(I.x^2*(I.x^2 + I.z^2 - t^2)) + t*I.z + I.x^2 + I.z^2)/(I.x^2 + I.z^2))
+ * We can simplify this expression a bit by grouping terms:
+ *
+ * a = I.x^2 + I.z^2
+ * b = sqrt(I.x^2 * (a - t^2))
+ * c = I.z*t + a
+ * N'.z = +-sqrt(0.5*(+-b + c)/a)
+ *
+ * Two solutions can immediately be discarded because they're negative so N' would lie in the
+ * lower hemisphere.
+ */
+ /* clang-format on */
+
+ float Ix = dot(I, X), Iz = dot(I, Ng);
+ float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
+ float a = Ix2 + Iz2;
+
+ float b = safe_sqrtf(Ix2 * (a - sqr(threshold)));
+ float c = Iz * threshold + a;
+
+ /* Evaluate both solutions.
+ * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than
+ * one), so check for that first. If no option is viable (might happen in extreme cases like N
+ * being in the wrong hemisphere), give up and return Ng. */
+ float fac = 0.5f / a;
+ float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
+ bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f));
+ bool valid2 = (N2_z2 > 1e-5f) && (N2_z2 <= (1.0f + 1e-5f));
+
+ float2 N_new;
+ if (valid1 && valid2) {
+ /* If both are possible, do the expensive reflection-based check. */
+ float2 N1 = make_float2(safe_sqrtf(1.0f - N1_z2), safe_sqrtf(N1_z2));
+ float2 N2 = make_float2(safe_sqrtf(1.0f - N2_z2), safe_sqrtf(N2_z2));
+
+ float R1 = 2 * (N1.x * Ix + N1.y * Iz) * N1.y - Iz;
+ float R2 = 2 * (N2.x * Ix + N2.y * Iz) * N2.y - Iz;
+
+ valid1 = (R1 >= 1e-5f);
+ valid2 = (R2 >= 1e-5f);
+ if (valid1 && valid2) {
+ /* If both solutions are valid, return the one with the shallower reflection since it will be
+ * closer to the input (if the original reflection wasn't shallow, we would not be in this
+ * part of the function). */
+ N_new = (R1 < R2) ? N1 : N2;
}
+ else {
+ /* If only one reflection is valid (= positive), pick that one. */
+ N_new = (R1 > R2) ? N1 : N2;
+ }
+ }
+ else if (valid1 || valid2) {
+ /* Only one solution passes the N'.z criterium, so pick that one. */
+ float Nz2 = valid1 ? N1_z2 : N2_z2;
+ N_new = make_float2(safe_sqrtf(1.0f - Nz2), safe_sqrtf(Nz2));
}
else {
- /* Bad incident */
- R = -I;
- NgR = dot(Ng, R);
- threshold = 0.01f;
+ return Ng;
}
- R = R + Ng * (threshold - NgR); /* Lift the reflection above the threshold. */
- return normalize(I * len(R) + R * len(I)); /* Find a bisector. */
+ return N_new.x * X + N_new.y * Ng;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 5681510fc25..dd2390808ea 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -65,7 +65,6 @@ ccl_device_forceinline bool kernel_path_scene_intersect(KernelGlobals *kg,
uint visibility = path_state_ray_visibility(kg, state);
if (path_state_ao_bounce(kg, state)) {
- visibility = PATH_RAY_SHADOW;
ray->t = kernel_data.background.ao_distance;
}
@@ -416,7 +415,13 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
break;
}
else if (path_state_ao_bounce(kg, state)) {
- break;
+ if (intersection_get_shader_flags(kg, &isect) &
+ (SD_HAS_TRANSPARENT_SHADOW | SD_HAS_EMISSION)) {
+ state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+ }
+ else {
+ break;
+ }
}
/* Setup shader data. */
@@ -554,7 +559,13 @@ ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg,
break;
}
else if (path_state_ao_bounce(kg, state)) {
- break;
+ if (intersection_get_shader_flags(kg, &isect) &
+ (SD_HAS_TRANSPARENT_SHADOW | SD_HAS_EMISSION)) {
+ state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+ }
+ else {
+ break;
+ }
}
/* Setup shader data. */
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index c75958e79c5..dd922b86722 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -25,8 +25,9 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline float3
subsurface_scatter_eval(ShaderData *sd, const ShaderClosure *sc, float disk_r, float r, bool all)
{
- /* this is the veach one-sample model with balance heuristic, some pdf
- * factors drop out when using balance heuristic weighting */
+ /* This is the Veach one-sample model with balance heuristic, some pdf
+ * factors drop out when using balance heuristic weighting. For branched
+ * path tracing (all) we sample all closure and don't use MIS. */
float3 eval_sum = zero_float3();
float pdf_sum = 0.0f;
float sample_weight_inv = 0.0f;
@@ -65,6 +66,30 @@ subsurface_scatter_eval(ShaderData *sd, const ShaderClosure *sc, float disk_r, f
return (pdf_sum > 0.0f) ? eval_sum / pdf_sum : zero_float3();
}
+ccl_device_inline float3 subsurface_scatter_walk_eval(ShaderData *sd,
+ const ShaderClosure *sc,
+ float3 throughput,
+ bool all)
+{
+ /* This is the Veach one-sample model with balance heuristic, some pdf
+ * factors drop out when using balance heuristic weighting. For branched
+ * path tracing (all) we sample all closure and don't use MIS. */
+ if (!all) {
+ float bssrdf_weight = 0.0f;
+ float weight = sc->sample_weight;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSSRDF(sc->type)) {
+ bssrdf_weight += sc->sample_weight;
+ }
+ }
+ throughput *= bssrdf_weight / weight;
+ }
+ return throughput;
+}
+
/* replace closures with a single diffuse bsdf closure after scatter step */
ccl_device void subsurface_scatter_setup_diffuse_bsdf(
KernelGlobals *kg, ShaderData *sd, ClosureType type, float roughness, float3 weight, float3 N)
@@ -437,7 +462,8 @@ ccl_device_noinline
ccl_addr_space PathState *state,
const ShaderClosure *sc,
const float bssrdf_u,
- const float bssrdf_v)
+ const float bssrdf_v,
+ bool all)
{
/* Sample diffuse surface scatter into the object. */
float3 D;
@@ -605,6 +631,13 @@ ccl_device_noinline
if (hit) {
t = ray->t;
}
+ else if (bounce == 0) {
+ /* Restore original position if nothing was hit after the first bounce,
+ * without the ray_offset() that was added to avoid self-intersection.
+ * Otherwise if that offset is relatively large compared to the scattering
+ * radius, we never go back up high enough to exit the surface. */
+ ray->P = sd->P;
+ }
/* Advance to new scatter location. */
ray->P += t * ray->D;
@@ -662,7 +695,7 @@ ccl_device_noinline
/* TODO: gain back performance lost from merging with disk BSSRDF. We
* only need to return on hit so this indirect ray push/pop overhead
* is not actually needed, but it does keep the code simpler. */
- ss_isect->weight[0] = throughput;
+ ss_isect->weight[0] = subsurface_scatter_walk_eval(sd, sc, throughput, all);
#ifdef __SPLIT_KERNEL__
ss_isect->ray = *ray;
#endif
@@ -684,7 +717,7 @@ ccl_device_inline int subsurface_scatter_multi_intersect(KernelGlobals *kg,
return subsurface_scatter_disk(kg, ss_isect, sd, sc, lcg_state, bssrdf_u, bssrdf_v, all);
}
else {
- return subsurface_random_walk(kg, ss_isect, sd, state, sc, bssrdf_u, bssrdf_v);
+ return subsurface_random_walk(kg, ss_isect, sd, state, sc, bssrdf_u, bssrdf_v, all);
}
}
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index df56360b1df..74fa2826cd4 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -99,27 +99,23 @@ CCL_NAMESPACE_BEGIN
#define __AO__
#define __PASSES__
#define __HAIR__
-
-/* Without these we get an AO render, used by OpenCL preview kernel. */
-#ifndef __KERNEL_AO_PREVIEW__
-# define __SVM__
-# define __EMISSION__
-# define __HOLDOUT__
-# define __MULTI_CLOSURE__
-# define __TRANSPARENT_SHADOWS__
-# define __BACKGROUND_MIS__
-# define __LAMP_MIS__
-# define __CAMERA_MOTION__
-# define __OBJECT_MOTION__
-# define __BAKING__
-# define __PRINCIPLED__
-# define __SUBSURFACE__
-# define __VOLUME__
-# define __VOLUME_SCATTER__
-# define __CMJ__
-# define __SHADOW_RECORD_ALL__
-# define __BRANCHED_PATH__
-#endif
+#define __SVM__
+#define __EMISSION__
+#define __HOLDOUT__
+#define __MULTI_CLOSURE__
+#define __TRANSPARENT_SHADOWS__
+#define __BACKGROUND_MIS__
+#define __LAMP_MIS__
+#define __CAMERA_MOTION__
+#define __OBJECT_MOTION__
+#define __BAKING__
+#define __PRINCIPLED__
+#define __SUBSURFACE__
+#define __VOLUME__
+#define __VOLUME_SCATTER__
+#define __CMJ__
+#define __SHADOW_RECORD_ALL__
+#define __BRANCHED_PATH__
/* Device specific features */
#ifdef __KERNEL_CPU__
@@ -895,6 +891,8 @@ enum ShaderDataFlag {
SD_HAS_CONSTANT_EMISSION = (1 << 27),
/* Needs to access attributes for volume rendering */
SD_NEED_VOLUME_ATTRIBUTES = (1 << 28),
+ /* Shader has emission */
+ SD_HAS_EMISSION = (1 << 29),
SD_SHADER_FLAGS = (SD_USE_MIS | SD_HAS_TRANSPARENT_SHADOW | SD_HAS_VOLUME | SD_HAS_ONLY_VOLUME |
SD_HETEROGENEOUS_VOLUME | SD_HAS_BSSRDF_BUMP | SD_VOLUME_EQUIANGULAR |
@@ -1501,9 +1499,9 @@ typedef struct KernelAreaLight {
float axisu[3];
float invarea;
float axisv[3];
- float pad1;
+ float tan_spread;
float dir[3];
- float pad2;
+ float normalize_spread;
} KernelAreaLight;
typedef struct KernelDistantLight {
diff --git a/intern/cycles/kernel/shaders/node_noise_texture.osl b/intern/cycles/kernel/shaders/node_noise_texture.osl
index 61c0216910b..01196ab633a 100644
--- a/intern/cycles/kernel/shaders/node_noise_texture.osl
+++ b/intern/cycles/kernel/shaders/node_noise_texture.osl
@@ -25,7 +25,7 @@
* coordinates to act as a seed since the noise functions don't have seed values.
* A seed value is needed for generating distortion textures and color outputs.
* The offset's components are in the range [100, 200], not too high to cause
- * bad precision and not to small to be noticeable. We use float seed because
+ * bad precision and not too small to be noticeable. We use float seed because
* OSL only support float hashes.
*/
diff --git a/intern/cycles/kernel/shaders/node_vector_math.osl b/intern/cycles/kernel/shaders/node_vector_math.osl
index 3963c23ea9c..c08d75b99ef 100644
--- a/intern/cycles/kernel/shaders/node_vector_math.osl
+++ b/intern/cycles/kernel/shaders/node_vector_math.osl
@@ -52,6 +52,9 @@ shader node_vector_math(string math_type = "add",
else if (math_type == "faceforward") {
Vector = compatible_faceforward(Vector1, Vector2, Vector3);
}
+ else if (math_type == "multiply_add") {
+ Vector = Vector1 * Vector2 + Vector3;
+ }
else if (math_type == "dot_product") {
Value = dot(Vector1, Vector2);
}
diff --git a/intern/cycles/kernel/shaders/stdcycles.h b/intern/cycles/kernel/shaders/stdcycles.h
index af7b645d9a2..dd604da68ce 100644
--- a/intern/cycles/kernel/shaders/stdcycles.h
+++ b/intern/cycles/kernel/shaders/stdcycles.h
@@ -84,30 +84,67 @@ closure color principled_hair(normal N,
closure color henyey_greenstein(float g) BUILTIN;
closure color absorption() BUILTIN;
-normal ensure_valid_reflection(normal Ng, normal I, normal N)
+normal ensure_valid_reflection(normal Ng, vector I, normal N)
{
/* The implementation here mirrors the one in kernel_montecarlo.h,
* check there for an explanation of the algorithm. */
- vector R;
- float NI = dot(N, I);
- float NgR, threshold;
-
- if (NI > 0) {
- R = (2 * NI) * N - I;
- NgR = dot(Ng, R);
- threshold = min(0.9 * dot(Ng, I), 0.01);
- if (NgR >= threshold) {
- return N;
+
+ float sqr(float x)
+ {
+ return x * x;
+ }
+
+ vector R = 2 * dot(N, I) * N - I;
+
+ float threshold = min(0.9 * dot(Ng, I), 0.01);
+ if (dot(Ng, R) >= threshold) {
+ return N;
+ }
+
+ float NdotNg = dot(N, Ng);
+ vector X = normalize(N - NdotNg * Ng);
+
+ float Ix = dot(I, X), Iz = dot(I, Ng);
+ float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
+ float a = Ix2 + Iz2;
+
+ float b = sqrt(Ix2 * (a - sqr(threshold)));
+ float c = Iz * threshold + a;
+
+ float fac = 0.5 / a;
+ float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
+ int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5));
+ int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5));
+
+ float N_new_x, N_new_z;
+ if (valid1 && valid2) {
+ float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2);
+ float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2);
+
+ float R1 = 2 * (N1_x * Ix + N1_z * Iz) * N1_z - Iz;
+ float R2 = 2 * (N2_x * Ix + N2_z * Iz) * N2_z - Iz;
+
+ valid1 = (R1 >= 1e-5);
+ valid2 = (R2 >= 1e-5);
+ if (valid1 && valid2) {
+ N_new_x = (R1 < R2) ? N1_x : N2_x;
+ N_new_z = (R1 < R2) ? N1_z : N2_z;
+ }
+ else {
+ N_new_x = (R1 > R2) ? N1_x : N2_x;
+ N_new_z = (R1 > R2) ? N1_z : N2_z;
}
}
+ else if (valid1 || valid2) {
+ float Nz2 = valid1 ? N1_z2 : N2_z2;
+ N_new_x = sqrt(1.0 - Nz2);
+ N_new_z = sqrt(Nz2);
+ }
else {
- R = -I;
- NgR = dot(Ng, R);
- threshold = 0.01;
+ return Ng;
}
- R = R + Ng * (threshold - NgR);
- return normalize(I * length(R) + R * length(I));
+ return N_new_x * X + N_new_z * Ng;
}
#endif /* CCL_STDOSL_H */
diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h
index dda2e50f916..733ea28f9e5 100644
--- a/intern/cycles/kernel/svm/svm_math.h
+++ b/intern/cycles/kernel/svm/svm_math.h
@@ -58,7 +58,8 @@ ccl_device void svm_node_vector_math(KernelGlobals *kg,
float3 vector;
/* 3 Vector Operators */
- if (type == NODE_VECTOR_MATH_WRAP || type == NODE_VECTOR_MATH_FACEFORWARD) {
+ if (type == NODE_VECTOR_MATH_WRAP || type == NODE_VECTOR_MATH_FACEFORWARD ||
+ type == NODE_VECTOR_MATH_MULTIPLY_ADD) {
uint4 extra_node = read_node(kg, offset);
c = stack_load_float3(stack, extra_node.x);
}
diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h
index 389c44ab1da..9e654f2247f 100644
--- a/intern/cycles/kernel/svm/svm_math_util.h
+++ b/intern/cycles/kernel/svm/svm_math_util.h
@@ -52,6 +52,9 @@ ccl_device void svm_vector_math(float *value,
case NODE_VECTOR_MATH_FACEFORWARD:
*vector = faceforward(a, b, c);
break;
+ case NODE_VECTOR_MATH_MULTIPLY_ADD:
+ *vector = a * b + c;
+ break;
case NODE_VECTOR_MATH_DOT_PRODUCT:
*value = dot(a, b);
break;
@@ -242,12 +245,15 @@ ccl_device float3 svm_math_blackbody_color(float t)
return make_float3(4.70366907f, 0.0f, 0.0f);
}
+ /* Manually align for readability. */
+ /* clang-format off */
int i = (t >= 6365.0f) ? 5 :
(t >= 3315.0f) ? 4 :
(t >= 1902.0f) ? 3 :
(t >= 1449.0f) ? 2 :
(t >= 1167.0f) ? 1 :
0;
+ /* clang-format on */
ccl_constant float *r = blackbody_table_r[i];
ccl_constant float *g = blackbody_table_g[i];
diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h
index 920dd7d9d02..61fd9553802 100644
--- a/intern/cycles/kernel/svm/svm_noisetex.h
+++ b/intern/cycles/kernel/svm/svm_noisetex.h
@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
* coordinates to act as a seed since the noise functions don't have seed values.
* A seed value is needed for generating distortion textures and color outputs.
* The offset's components are in the range [100, 200], not too high to cause
- * bad precision and not to small to be noticeable. We use float seed because
+ * bad precision and not too small to be noticeable. We use float seed because
* OSL only support float hashes.
*/
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
index 4fe940f1a67..fc46bb584be 100644
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ b/intern/cycles/kernel/svm/svm_tex_coord.h
@@ -370,10 +370,13 @@ ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack
if (direction_type == NODE_TANGENT_UVMAP) {
/* UV map */
- if (desc.offset == ATTR_STD_NOT_FOUND)
- tangent = make_float3(0.0f, 0.0f, 0.0f);
- else
+ if (desc.offset == ATTR_STD_NOT_FOUND) {
+ stack_store_float3(stack, tangent_offset, zero_float3());
+ return;
+ }
+ else {
tangent = attribute_value;
+ }
}
else {
/* radial */
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index 64a8f82a094..062afcfa5ac 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -341,6 +341,7 @@ typedef enum NodeVectorMathType {
NODE_VECTOR_MATH_TANGENT,
NODE_VECTOR_MATH_REFRACT,
NODE_VECTOR_MATH_FACEFORWARD,
+ NODE_VECTOR_MATH_MULTIPLY_ADD,
} NodeVectorMathType;
typedef enum NodeClampType {