Cycles: Switch to reformulated Pluecker ray/triangle intersection

The intention of this commit it to address issues mentioned in the reports T43865,T50164 and T50452. The code is based on Embree code with some extra vectorization to speed up single ray to single triangle intersection. Unfortunately, such a fix is not coming for free. There is some slowdown for AVX2 processors, mainly due to different vectorization code, which caused different number of instructions to be executed and different instructions-per-cycle counters. But on another hand this commit makes pre-AVX2 platforms such as AVX and SSE4.1 a bit faster. The prerformance goes as following: 2.78c AVX2 2.78c AVX Patch AVX2 Patch AVX BMW 05:21.09 06:05.34 05:32.97 (+3.5%) 05:34.97 (-8.5%) Classroom 16:55.36 18:24.51 17:10.41 (+1.4%) 17:15.87 (-6.3%) Fishy Cat 08:08.49 08:36.26 08:09.19 (+0.2%) 08:12.25 (-4.7% Koro 11:22.54 11:45.24 11:13.25 (-1.5%) 11:43.81 (-0.3%) Barcelone 14:18.32 16:09.46 14:15.20 (-0.4%) 14:25.15 (-10.8%) On GPU the performance is about 1.5-2% slower in my tests on GTX1080 but afraid we can't do much as a part of this chaneg here and consider it a price to pay for more proper intersection check. Made in collaboration with Maxym Dmytrychenko, big thanks to him! Reviewers: brecht, juicyfruit, lukasstockner97, dingto Differential Revision: https://developer.blender.org/D1574
author: Sergey Sharybin <sergey.vfx@gmail.com> 2017-03-27 18:06:37 +0300
committer: Sergey Sharybin <sergey.vfx@gmail.com> 2017-03-28 18:26:47 +0300
commit: 6ea54fe9ffe2b2514990fdf3489ca53d05ce449a (patch)
tree: 3026ce049c43bf8e0d56fbad57e3cbb844444fc4 /intern/cycles/kernel/bvh/qbvh_traversal.h
parent: 69aa6577b3dfea5d8a6d915fad7fb7650d8b6278 (diff)
1 files changed, 2 insertions, 9 deletions
diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h
index d88e0e07203..a05913c3a29 100644
--- a/intern/cycles/kernel/bvh/qbvh_traversal.h
+++ b/intern/cycles/kernel/bvh/qbvh_traversal.h
@@ -106,9 +106,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 	                       &near_x, &near_y, &near_z,
 	                       &far_x, &far_y, &far_z);
 
-	TriangleIsectPrecalc isect_precalc;
-	ray_triangle_intersect_precalc(dir, &isect_precalc);
-
 	/* Traversal loop. */
 	do {
 		do {
@@ -333,9 +330,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 								BVH_DEBUG_NEXT_INTERSECTION();
 								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
 								if(triangle_intersect(kg,
-								                      &isect_precalc,
 								                      isect,
 								                      P,
+								                      dir,
 								                      visibility,
 								                      object,
 								                      prim_addr)) {
@@ -354,9 +351,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 								BVH_DEBUG_NEXT_INTERSECTION();
 								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
 								if(motion_triangle_intersect(kg,
-								                             &isect_precalc,
 								                             isect,
 								                             P,
+								                             dir,
 								                             ray->time,
 								                             visibility,
 								                             object,
@@ -447,8 +444,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 					org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
 #  endif
 
-					ray_triangle_intersect_precalc(dir, &isect_precalc);
-
 					++stack_ptr;
 					kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
 					traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
@@ -489,8 +484,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 			org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
 #  endif
 
-			ray_triangle_intersect_precalc(dir, &isect_precalc);
-
 			object = OBJECT_NONE;
 			node_addr = traversal_stack[stack_ptr].addr;
 			node_dist = traversal_stack[stack_ptr].dist;
author	Sergey Sharybin <sergey.vfx@gmail.com>	2017-03-27 18:06:37 +0300
committer	Sergey Sharybin <sergey.vfx@gmail.com>	2017-03-28 18:26:47 +0300
commit	6ea54fe9ffe2b2514990fdf3489ca53d05ce449a (patch)
tree	3026ce049c43bf8e0d56fbad57e3cbb844444fc4 /intern/cycles/kernel/bvh/qbvh_traversal.h
parent	69aa6577b3dfea5d8a6d915fad7fb7650d8b6278 (diff)