Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2014-12-15 19:18:01 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2014-12-25 00:50:49 +0300
commitf770bc4757a2b471d5aaee048359096c1c79a6b2 (patch)
tree853a0b93183aa814b4ec5a45f8050b47aa9c779d /intern/cycles/kernel
parent57d235d9f496fd71f5b57cef36d34fae5bf9d9ce (diff)
Cycles: Implement watertight ray/triangle intersection
Using this paper: Sven Woop, Watertight Ray/Triangle Intersection http://jcgt.org/published/0002/01/05/paper.pdf This change is expected to address quite reasonable amount of reports from the bug tracker, plus it might help reducing the noise in some scenes. Unfortunately, it's currently about 7% slower than the previous solution with pre-computed triangle plane equations, but maybe with some smart tweaks to the code (tests reshuffle, using SIMD in a nice way or so) we can avoid the speed regression. But perhaps smartest thing to do here would be to change single triangle / ray intersection with multiple triangles / ray intersections. That's how Embree does this and it's watertight single ray intersection is not any faster that this. Currently only triangle intersection is modified accordingly to the paper, in the future we would also want to modify the node / ray intersection. Reviewers: brecht, juicyfruit Subscribers: dingto, ton Differential Revision: https://developer.blender.org/D819
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_shadow.h9
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_subsurface.h8
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_traversal.h7
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_volume.h9
-rw-r--r--intern/cycles/kernel/geom/geom_triangle.h4
-rw-r--r--intern/cycles/kernel/geom/geom_triangle_intersect.h403
6 files changed, 324 insertions, 116 deletions
diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h
index 9704b0ac1b8..db7c564c4a3 100644
--- a/intern/cycles/kernel/geom/geom_bvh_shadow.h
+++ b/intern/cycles/kernel/geom/geom_bvh_shadow.h
@@ -81,6 +81,9 @@ ccl_device bool BVH_FUNCTION_NAME
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
+ IsectPrecalc isect_precalc;
+ triangle_intersect_precalc(dir, &isect_precalc);
+
/* traversal loop */
do {
do {
@@ -214,7 +217,7 @@ ccl_device bool BVH_FUNCTION_NAME
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
- hit = triangle_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr);
+ hit = triangle_intersect(kg, &isect_precalc, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr);
break;
}
#if FEATURE(BVH_MOTION)
@@ -295,6 +298,7 @@ ccl_device bool BVH_FUNCTION_NAME
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
num_hits_in_instance = 0;
#if defined(__KERNEL_SSE2__)
@@ -330,6 +334,8 @@ ccl_device bool BVH_FUNCTION_NAME
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+
/* scale isect->t to adjust for instancing */
for(int i = 0; i < num_hits_in_instance; i++)
(isect_array-i-1)->t *= t_fac;
@@ -342,6 +348,7 @@ ccl_device bool BVH_FUNCTION_NAME
#else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
}
#if defined(__KERNEL_SSE2__)
diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
index e5cb60f537a..756130be3da 100644
--- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
@@ -77,6 +77,9 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
+ IsectPrecalc isect_precalc;
+ triangle_intersect_precalc(dir, &isect_precalc);
+
/* traversal loop */
do {
do
@@ -202,7 +205,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
- triangle_intersect_subsurface(kg, isect_array, P, dir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+ triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, dir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
break;
}
#if FEATURE(BVH_MOTION)
@@ -228,6 +231,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
#else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
#if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
@@ -265,6 +269,8 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect_t);
#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+
#if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
Psplat[1] = ssef(P.y);
diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h
index 1d053ba5d08..7be22c0f3bf 100644
--- a/intern/cycles/kernel/geom/geom_bvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h
@@ -89,6 +89,9 @@ ccl_device bool BVH_FUNCTION_NAME
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
+ IsectPrecalc isect_precalc;
+ triangle_intersect_precalc(dir, &isect_precalc);
+
/* traversal loop */
do {
do {
@@ -257,7 +260,7 @@ ccl_device bool BVH_FUNCTION_NAME
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
- hit = triangle_intersect(kg, isect, P, dir, visibility, object, primAddr);
+ hit = triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr);
break;
}
#if FEATURE(BVH_MOTION)
@@ -312,6 +315,7 @@ ccl_device bool BVH_FUNCTION_NAME
#else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
#if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
@@ -342,6 +346,7 @@ ccl_device bool BVH_FUNCTION_NAME
#else
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
#if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
diff --git a/intern/cycles/kernel/geom/geom_bvh_volume.h b/intern/cycles/kernel/geom/geom_bvh_volume.h
index 58dda7b5e06..7d6b9bdd8db 100644
--- a/intern/cycles/kernel/geom/geom_bvh_volume.h
+++ b/intern/cycles/kernel/geom/geom_bvh_volume.h
@@ -83,6 +83,9 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg,
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
+ IsectPrecalc isect_precalc;
+ triangle_intersect_precalc(dir, &isect_precalc);
+
/* traversal loop */
do {
do {
@@ -209,7 +212,7 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg,
switch(type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
- triangle_intersect(kg, isect, P, dir, visibility, object, primAddr);
+ triangle_intersect(kg, &isect_precalc, isect, P, dir, visibility, object, primAddr);
break;
}
#if FEATURE(BVH_MOTION)
@@ -248,6 +251,8 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg,
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+
#if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
Psplat[1] = ssef(P.y);
@@ -285,6 +290,8 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg,
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
#endif
+ triangle_intersect_precalc(dir, &isect_precalc);
+
#if defined(__KERNEL_SSE2__)
Psplat[0] = ssef(P.x);
Psplat[1] = ssef(P.y);
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index 0390804d131..dd3928682e3 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -17,7 +17,9 @@
/* Triangle Primitive
*
- * Basic triangle with 3 vertices is used to represent mesh surfaces. */
+ * Basic triangle with 3 vertices is used to represent mesh surfaces. For BVH
+ * ray intersection we use a precomputed triangle storage to accelerate
+ * intersection at the cost of more memory usage */
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index b965bddf330..4bb60ca78e0 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -1,6 +1,5 @@
/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
+ * Copyright 2014, Blender Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,122 +14,270 @@
* limitations under the License.
*/
-/* Triangle/Ray intersections
+/* Triangle/Ray intersections .
*
- * Basic triangle with 3 vertices is used to represent mesh surfaces. For BVH
- * ray intersection we use a precomputed triangle storage to accelerate
- * intersection at the cost of more memory usage */
+ * For BVH ray intersection we use a precomputed triangle storage to accelerate
+ * intersection at the cost of more memory usage.
+ */
CCL_NAMESPACE_BEGIN
+/* Workaroudn stupidness of CUDA/OpenCL which doesn't allow to access indexed
+ * component of float3 value.
+ */
+#ifndef __KERNEL_CPU__
+# define IDX(vec, idx) \
+ ((idx == 0) ? ((vec).x) : ( (idx == 1) ? ((vec).y) : ((vec).z) ))
+#else
+# define IDX(vec, idx) ((vec)[idx])
+#endif
+
/* Ray-Triangle intersection for BVH traversal
*
- * Based on Sven Woop's algorithm with precomputed triangle storage */
+ * Sven Woop
+ * Watertight Ray/Triangle Intersection
+ *
+ * http://jcgt.org/published/0002/01/05/paper.pdf
+ */
+
+/* Precalculated data for the ray->tri intersection. */
+typedef struct IsectPrecalc {
+ /* Maximal dimension kz, and orthogonal dimensions. */
+ int kx, ky, kz;
+
+ /* Shear constants. */
+ float Sx, Sy, Sz;
+} IsectPrecalc;
+
+ccl_device_inline void triangle_intersect_precalc(float3 dir,
+ IsectPrecalc *isect_precalc)
+{
+ /* Calculate dimesion where the ray direction is maximal. */
+ int kz = util_max_axis(make_float3(fabsf(dir.x),
+ fabsf(dir.y),
+ fabsf(dir.z)));
+ int kx = kz + 1; if(kx == 3) kx = 0;
+ int ky = kx + 1; if(ky == 3) ky = 0;
+
+ /* Swap kx and ky dimensions to preserve winding direction of triangles. */
+ if(IDX(dir, kz) < 0.0f) {
+ int tmp = kx;
+ kx = ky;
+ ky = tmp;
+ }
-ccl_device_inline bool triangle_intersect(KernelGlobals *kg, Intersection *isect,
- float3 P, float3 dir, uint visibility, int object, int triAddr)
+ /* Calculate the shear constants. */
+ float inf_dir_z = 1.0f / IDX(dir, kz);
+ isect_precalc->Sx = IDX(dir, kx) * inf_dir_z;
+ isect_precalc->Sy = IDX(dir, ky) * inf_dir_z;
+ isect_precalc->Sz = inf_dir_z;
+
+ /* Store the dimensions. */
+ isect_precalc->kx = kx;
+ isect_precalc->ky = ky;
+ isect_precalc->kz = kz;
+}
+
+/* TODO(sergey): Make it general utility function. */
+ccl_device_inline float xor_signmast(float x, int y)
+{
+ return __int_as_float(__float_as_int(x) ^ y);
+}
+
+ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
+ const IsectPrecalc *isect_precalc,
+ Intersection *isect,
+ float3 P,
+ float3 dir,
+ uint visibility,
+ int object,
+ int triAddr)
{
- /* compute and check intersection t-value */
- float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0);
- float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1);
-
- float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
- float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z);
- float t = Oz * invDz;
-
- if(t > 0.0f && t < isect->t) {
- /* compute and check barycentric u */
- float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z;
- float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z;
- float u = Ox + t*Dx;
-
- if(u >= 0.0f) {
- /* compute and check barycentric v */
- float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
- float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z;
- float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z;
- float v = Oy + t*Dy;
-
- if(v >= 0.0f && u + v <= 1.0f) {
+ const int kx = isect_precalc->kx;
+ const int ky = isect_precalc->ky;
+ const int kz = isect_precalc->kz;
+ const float Sx = isect_precalc->Sx;
+ const float Sy = isect_precalc->Sy;
+ const float Sz = isect_precalc->Sz;
+
+ /* Calculate vertices relative to ray origin. */
+ float3 tri[3];
+ tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0));
+ tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1));
+ tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2));
+
+ const float3 A = tri[0] - P;
+ const float3 B = tri[1] - P;
+ const float3 C = tri[2] - P;
+
+ const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
+ const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
+ const float C_kx = IDX(C, kx), C_ky = IDX(C, ky), C_kz = IDX(C, kz);
+
+ /* Perform shear and scale of vertices. */
+ const float Ax = A_kx - Sx * A_kz;
+ const float Ay = A_ky - Sy * A_kz;
+ const float Bx = B_kx - Sx * B_kz;
+ const float By = B_ky - Sy * B_kz;
+ const float Cx = C_kx - Sx * C_kz;
+ const float Cy = C_ky - Sy * C_kz;
+
+ /* Calculate scaled barycentric coordinates. */
+ float U = Cx * By - Cy * Bx;
+ int sign_mask = (__float_as_int(U) & 0x80000000);
+ float V = Ax * Cy - Ay * Cx;
+ if(sign_mask != (__float_as_int(V) & 0x80000000)) {
+ return false;
+ }
+ float W = Bx * Ay - By * Ax;
+ if(sign_mask != (__float_as_int(W) & 0x80000000)) {
+ return false;
+ }
+
+ /* Calculate determinant. */
+ float det = U + V + W;
+ if(UNLIKELY(det == 0.0f)) {
+ return false;
+ }
+
+ /* Calculate scaled z−coordinates of vertices and use them to calculate
+ * the hit distance.
+ */
+ const float Az = Sz * A_kz;
+ const float Bz = Sz * B_kz;
+ const float Cz = Sz * C_kz;
+ const float T = U * Az + V * Bz + W * Cz;
+
+ if ((xor_signmast(T, sign_mask) < 0.0f) ||
+ (xor_signmast(T, sign_mask) > isect->t * xor_signmast(det, sign_mask)))
+ {
+ return false;
+ }
+
#ifdef __VISIBILITY_FLAG__
- /* visibility flag test. we do it here under the assumption
- * that most triangles are culled by node flags */
- if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
+ /* visibility flag test. we do it here under the assumption
+ * that most triangles are culled by node flags */
+ if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
#endif
- {
- /* record intersection */
- isect->t = t;
- isect->u = u;
- isect->v = v;
- isect->prim = triAddr;
- isect->object = object;
- isect->type = PRIMITIVE_TRIANGLE;
- return true;
- }
- }
- }
+ {
+ /* Normalize U, V, W, and T. */
+ const float inv_det = 1.0f / det;
+ isect->prim = triAddr;
+ isect->object = object;
+ isect->type = PRIMITIVE_TRIANGLE;
+ isect->u = U * inv_det;
+ isect->v = V * inv_det;
+ isect->t = T * inv_det;
+ return true;
}
-
return false;
}
/* Special ray intersection routines for subsurface scattering. In that case we
* only want to intersect with primitives in the same object, and if case of
- * multiple hits we pick a single random primitive as the intersection point. */
+ * multiple hits we pick a single random primitive as the intersection point.
+ */
#ifdef __SUBSURFACE__
-ccl_device_inline void triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array,
- float3 P, float3 dir, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits)
+ccl_device_inline void triangle_intersect_subsurface(
+ KernelGlobals *kg,
+ const IsectPrecalc *isect_precalc,
+ Intersection *isect_array,
+ float3 P,
+ float3 dir,
+ int object,
+ int triAddr,
+ float tmax,
+ uint *num_hits,
+ uint *lcg_state,
+ int max_hits)
{
- /* compute and check intersection t-value */
- float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0);
- float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1);
-
- float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
- float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z);
- float t = Oz * invDz;
-
- if(t > 0.0f && t < tmax) {
- /* compute and check barycentric u */
- float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z;
- float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z;
- float u = Ox + t*Dx;
-
- if(u >= 0.0f) {
- /* compute and check barycentric v */
- float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
- float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z;
- float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z;
- float v = Oy + t*Dy;
-
- if(v >= 0.0f && u + v <= 1.0f) {
- (*num_hits)++;
-
- int hit;
-
- if(*num_hits <= max_hits) {
- hit = *num_hits - 1;
- }
- else {
- /* reservoir sampling: if we are at the maximum number of
- * hits, randomly replace element or skip it */
- hit = lcg_step_uint(lcg_state) % *num_hits;
-
- if(hit >= max_hits)
- return;
- }
-
- /* record intersection */
- Intersection *isect = &isect_array[hit];
- isect->t = t;
- isect->u = u;
- isect->v = v;
- isect->prim = triAddr;
- isect->object = object;
- isect->type = PRIMITIVE_TRIANGLE;
- }
- }
+ const int kx = isect_precalc->kx;
+ const int ky = isect_precalc->ky;
+ const int kz = isect_precalc->kz;
+ const float Sx = isect_precalc->Sx;
+ const float Sy = isect_precalc->Sy;
+ const float Sz = isect_precalc->Sz;
+
+ /* Calculate vertices relative to ray origin. */
+ float3 tri[3];
+ int prim = kernel_tex_fetch(__prim_index, triAddr);
+ triangle_vertices(kg, prim, tri);
+
+ const float3 A = tri[0] - P;
+ const float3 B = tri[1] - P;
+ const float3 C = tri[2] - P;
+
+ const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
+ const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
+ const float C_kx = IDX(C, kx), C_ky = IDX(C, ky), C_kz = IDX(C, kz);
+
+ /* Perform shear and scale of vertices. */
+ const float Ax = A_kx - Sx * A_kz;
+ const float Ay = A_ky - Sy * A_kz;
+ const float Bx = B_kx - Sx * B_kz;
+ const float By = B_ky - Sy * B_kz;
+ const float Cx = C_kx - Sx * C_kz;
+ const float Cy = C_ky - Sy * C_kz;
+
+ /* Calculate scaled barycentric coordinates. */
+ float U = Cx * By - Cy * Bx;
+ int sign_mask = (__float_as_int(U) & 0x80000000);
+ float V = Ax * Cy - Ay * Cx;
+ if(sign_mask != (__float_as_int(V) & 0x80000000)) {
+ return;
+ }
+ float W = Bx * Ay - By * Ax;
+ if(sign_mask != (__float_as_int(W) & 0x80000000)) {
+ return;
+ }
+
+ /* Calculate determinant. */
+ float det = U + V + W;
+ if(UNLIKELY(det == 0.0f)) {
+ return;
+ }
+
+ /* Calculate scaled z−coordinates of vertices and use them to calculate
+ * the hit distance.
+ */
+ const float Az = Sz * A_kz;
+ const float Bz = Sz * B_kz;
+ const float Cz = Sz * C_kz;
+ const float T = U * Az + V * Bz + W * Cz;
+
+ if ((xor_signmast(T, sign_mask) < 0.0f) ||
+ (xor_signmast(T, sign_mask) > tmax * xor_signmast(det, sign_mask)))
+ {
+ return;
+ }
+
+ /* Normalize U, V, W, and T. */
+ const float inv_det = 1.0f / det;
+
+ (*num_hits)++;
+ int hit;
+
+ if(*num_hits <= max_hits) {
+ hit = *num_hits - 1;
}
+ else {
+ /* reservoir sampling: if we are at the maximum number of
+ * hits, randomly replace element or skip it */
+ hit = lcg_step_uint(lcg_state) % *num_hits;
+
+ if(hit >= max_hits)
+ return;
+ }
+
+ /* record intersection */
+ Intersection *isect = &isect_array[hit];
+ isect->prim = triAddr;
+ isect->object = object;
+ isect->type = PRIMITIVE_TRIANGLE;
+ isect->u = U * inv_det;
+ isect->v = V * inv_det;
+ isect->t = T * inv_det;
}
#endif
@@ -138,7 +285,17 @@ ccl_device_inline void triangle_intersect_subsurface(KernelGlobals *kg, Intersec
* far the precision is often not so good, this reintersects the primitive from
* a closer distance. */
-ccl_device_inline float3 triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray)
+/* Reintersections uses the paper:
+ *
+ * Tomas Moeller
+ * Fast, minimum storage ray/triangle intersection
+ * http://www.cs.virginia.edu/~gfx/Courses/2003/ImageSynthesis/papers/Acceleration/Fast%20MinimumStorage%20RayTriangle%20Intersection.pdf
+ */
+
+ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
+ ShaderData *sd,
+ const Intersection *isect,
+ const Ray *ray)
{
float3 P = ray->P;
float3 D = ray->D;
@@ -159,10 +316,17 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, ShaderData *sd, cons
P = P + D*t;
- float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0);
- float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
- float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z);
- float rt = Oz * invDz;
+ float3 tri[3];
+ tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0));
+ tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1));
+ tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2));
+
+ float3 edge1 = tri[0] - tri[2];
+ float3 edge2 = tri[1] - tri[2];
+ float3 tvec = P - tri[2];
+ float3 qvec = cross(tvec, edge1);
+ float3 pvec = cross(D, edge2);
+ float rt = dot(edge2, qvec) / dot(edge1, pvec);
P = P + D*rt;
@@ -182,8 +346,13 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, ShaderData *sd, cons
#endif
}
-/* same as above, except that isect->t is assumed to be in object space for instancing */
-ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray)
+/* Same as above, except that isect->t is assumed to be in object space for
+ * instancing.
+ */
+ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
+ ShaderData *sd,
+ const Intersection *isect,
+ const Ray *ray)
{
float3 P = ray->P;
float3 D = ray->D;
@@ -194,7 +363,9 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderDat
#ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_itfm;
#else
- Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg,
+ isect->object,
+ OBJECT_INVERSE_TRANSFORM);
#endif
P = transform_point(&tfm, P);
@@ -204,10 +375,16 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderDat
P = P + D*t;
- float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0);
- float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
- float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z);
- float rt = Oz * invDz;
+ float3 tri[3];
+ int prim = kernel_tex_fetch(__prim_index, isect->prim);
+ triangle_vertices(kg, prim, tri);
+
+ float3 edge1 = tri[0] - tri[2];
+ float3 edge2 = tri[1] - tri[2];
+ float3 tvec = P - tri[2];
+ float3 qvec = cross(tvec, edge1);
+ float3 pvec = cross(D, edge2);
+ float rt = dot(edge2, qvec) / dot(edge1, pvec);
P = P + D*rt;
@@ -215,7 +392,9 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderDat
#ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_tfm;
#else
- Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
+ Transform tfm = object_fetch_transform(kg,
+ isect->object,
+ OBJECT_TRANSFORM);
#endif
P = transform_point(&tfm, P);
@@ -227,4 +406,6 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderDat
#endif
}
+#undef IDX
+
CCL_NAMESPACE_END