Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2017-03-27 18:06:37 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2017-03-28 18:26:47 +0300
commit6ea54fe9ffe2b2514990fdf3489ca53d05ce449a (patch)
tree3026ce049c43bf8e0d56fbad57e3cbb844444fc4 /intern/cycles/kernel/geom
parent69aa6577b3dfea5d8a6d915fad7fb7650d8b6278 (diff)
Cycles: Switch to reformulated Pluecker ray/triangle intersection
The intention of this commit it to address issues mentioned in the reports T43865,T50164 and T50452. The code is based on Embree code with some extra vectorization to speed up single ray to single triangle intersection. Unfortunately, such a fix is not coming for free. There is some slowdown for AVX2 processors, mainly due to different vectorization code, which caused different number of instructions to be executed and different instructions-per-cycle counters. But on another hand this commit makes pre-AVX2 platforms such as AVX and SSE4.1 a bit faster. The prerformance goes as following: 2.78c AVX2 2.78c AVX Patch AVX2 Patch AVX BMW 05:21.09 06:05.34 05:32.97 (+3.5%) 05:34.97 (-8.5%) Classroom 16:55.36 18:24.51 17:10.41 (+1.4%) 17:15.87 (-6.3%) Fishy Cat 08:08.49 08:36.26 08:09.19 (+0.2%) 08:12.25 (-4.7% Koro 11:22.54 11:45.24 11:13.25 (-1.5%) 11:43.81 (-0.3%) Barcelone 14:18.32 16:09.46 14:15.20 (-0.4%) 14:25.15 (-10.8%) On GPU the performance is about 1.5-2% slower in my tests on GTX1080 but afraid we can't do much as a part of this chaneg here and consider it a price to pay for more proper intersection check. Made in collaboration with Maxym Dmytrychenko, big thanks to him! Reviewers: brecht, juicyfruit, lukasstockner97, dingto Differential Revision: https://developer.blender.org/D1574
Diffstat (limited to 'intern/cycles/kernel/geom')
-rw-r--r--intern/cycles/kernel/geom/geom_motion_triangle_intersect.h16
-rw-r--r--intern/cycles/kernel/geom/geom_triangle_intersect.h41
2 files changed, 23 insertions, 34 deletions
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
index 971a34308f1..f74995becf5 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
@@ -168,9 +168,9 @@ float3 motion_triangle_refine_subsurface(KernelGlobals *kg,
ccl_device_inline bool motion_triangle_intersect(
KernelGlobals *kg,
- const TriangleIsectPrecalc *isect_precalc,
Intersection *isect,
float3 P,
+ float3 dir,
float time,
uint visibility,
int object,
@@ -186,10 +186,10 @@ ccl_device_inline bool motion_triangle_intersect(
motion_triangle_vertices(kg, fobject, prim, time, verts);
/* Ray-triangle intersection, unoptimized. */
float t, u, v;
- if(ray_triangle_intersect(isect_precalc,
- P,
+ if(ray_triangle_intersect(P,
+ dir,
isect->t,
-#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
(ssef*)verts,
#else
verts[0], verts[1], verts[2],
@@ -222,9 +222,9 @@ ccl_device_inline bool motion_triangle_intersect(
#ifdef __SUBSURFACE__
ccl_device_inline void motion_triangle_intersect_subsurface(
KernelGlobals *kg,
- const TriangleIsectPrecalc *isect_precalc,
SubsurfaceIntersection *ss_isect,
float3 P,
+ float3 dir,
float time,
int object,
int prim_addr,
@@ -242,10 +242,10 @@ ccl_device_inline void motion_triangle_intersect_subsurface(
motion_triangle_vertices(kg, fobject, prim, time, verts);
/* Ray-triangle intersection, unoptimized. */
float t, u, v;
- if(ray_triangle_intersect(isect_precalc,
- P,
+ if(ray_triangle_intersect(P,
+ dir,
tmax,
-#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
(ssef*)verts,
#else
verts[0], verts[1], verts[2],
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index 584d0b3508f..804e74d7e37 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -22,25 +22,16 @@
CCL_NAMESPACE_BEGIN
-/* Ray-Triangle intersection for BVH traversal
- *
- * Sven Woop
- * Watertight Ray/Triangle Intersection
- *
- * http://jcgt.org/published/0002/01/05/paper.pdf
- */
-
ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
- const TriangleIsectPrecalc *isect_precalc,
Intersection *isect,
float3 P,
+ float3 dir,
uint visibility,
int object,
int prim_addr)
{
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
-
-#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex];
#else
const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
@@ -48,9 +39,10 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
#endif
float t, u, v;
- if(ray_triangle_intersect(isect_precalc,
- P, isect->t,
-#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+ if(ray_triangle_intersect(P,
+ dir,
+ isect->t,
+#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
ssef_verts,
#else
float4_to_float3(tri_a),
@@ -86,9 +78,9 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
#ifdef __SUBSURFACE__
ccl_device_inline void triangle_intersect_subsurface(
KernelGlobals *kg,
- const TriangleIsectPrecalc *isect_precalc,
SubsurfaceIntersection *ss_isect,
float3 P,
+ float3 dir,
int object,
int prim_addr,
float tmax,
@@ -96,8 +88,7 @@ ccl_device_inline void triangle_intersect_subsurface(
int max_hits)
{
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
-
-#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex];
#else
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)),
@@ -105,14 +96,13 @@ ccl_device_inline void triangle_intersect_subsurface(
tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2));
#endif
float t, u, v;
- if(!ray_triangle_intersect(isect_precalc,
- P, tmax,
-#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+ if(!ray_triangle_intersect(P,
+ dir,
+ tmax,
+#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
ssef_verts,
#else
- tri_a,
- tri_b,
- tri_c,
+ tri_a, tri_b, tri_c,
#endif
&u, &v, &t))
{
@@ -150,15 +140,14 @@ ccl_device_inline void triangle_intersect_subsurface(
isect->t = t;
/* Record geometric normal. */
- /* TODO(sergey): Check whether it's faster to re-use ssef verts. */
-#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)),
tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)),
tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2));
#endif
ss_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
}
-#endif
+#endif /* __SUBSURFACE__ */
/* Refine triangle intersection to more precise hit point. For rays that travel
* far the precision is often not so good, this reintersects the primitive from