Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2016-07-07 13:23:13 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2016-07-07 18:25:48 +0300
commita08e2179f17569abed814f734dadfebf591e7024 (patch)
treee83a1cb3b5bb8204c84802a08be46aa86351d0f9 /intern/cycles/kernel/geom/geom_qbvh_volume_all.h
parentb03e66e75fb79b503d48bc21dab0d67415516d4a (diff)
Cycles: Implement unaligned nodes BVH traversal
This commit implements traversal of unaligned BVH nodes. QBVH traversal is fully SIMD optimized and calculates orientation for all 4 children at a time, regular BVH might probably be optimized a bit more.
Diffstat (limited to 'intern/cycles/kernel/geom/geom_qbvh_volume_all.h')
-rw-r--r--intern/cycles/kernel/geom/geom_qbvh_volume_all.h73
1 files changed, 54 insertions, 19 deletions
diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume_all.h b/intern/cycles/kernel/geom/geom_qbvh_volume_all.h
index 89950f17c64..8a31775fae3 100644
--- a/intern/cycles/kernel/geom/geom_qbvh_volume_all.h
+++ b/intern/cycles/kernel/geom/geom_qbvh_volume_all.h
@@ -26,6 +26,12 @@
*
*/
+#if BVH_FEATURE(BVH_HAIR)
+# define NODE_INTERSECT qbvh_node_intersect
+#else
+# define NODE_INTERSECT qbvh_aligned_node_intersect
+#endif
+
ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
const Ray *ray,
Intersection *isect_array,
@@ -72,13 +78,17 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#endif
ssef tnear(0.0f), tfar(isect_t);
+#if BVH_FEATURE(BVH_HAIR)
+ sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+#endif
sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
float3 P_idir = P*idir;
- sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#else
- sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+ sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
+#endif
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
#endif
/* Offsets to select the side that becomes the lower or upper bound. */
@@ -108,22 +118,35 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#endif
ssef dist;
- int traverseChild = qbvh_node_intersect(kg,
- tnear,
- tfar,
+ int traverseChild = NODE_INTERSECT(kg,
+ tnear,
+ tfar,
#ifdef __KERNEL_AVX2__
- P_idir4,
-#else
- org,
+ P_idir4,
+#endif
+#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4,
+#endif
+#if BVH_FEATURE(BVH_HAIR)
+ dir4,
#endif
- idir4,
- near_x, near_y, near_z,
- far_x, far_y, far_z,
- nodeAddr,
- &dist);
+ idir4,
+ near_x, near_y, near_z,
+ far_x, far_y, far_z,
+ nodeAddr,
+ &dist);
if(traverseChild != 0) {
- float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
+ float4 cnodes;
+#if BVH_FEATURE(BVH_HAIR)
+ if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
+ cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+13);
+ }
+ else
+#endif
+ {
+ cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
+ }
/* One child is hit, continue with that child. */
int r = __bscf(traverseChild);
@@ -330,12 +353,17 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect_t);
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
+# if BVH_FEATURE(BVH_HAIR)
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+# endif
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# else
- org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
+# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+# endif
+
triangle_intersect_precalc(dir, &isect_precalc);
num_hits_in_instance = 0;
isect_array->t = isect_t;
@@ -389,13 +417,18 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
tfar = ssef(isect_t);
+# if BVH_FEATURE(BVH_HAIR)
+ dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
+# endif
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-# else
- org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
# endif
+# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
+ org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
+# endif
+
triangle_intersect_precalc(dir, &isect_precalc);
isect_t = tmax;
isect_array->t = isect_t;
@@ -409,3 +442,5 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
return num_hits;
}
+
+#undef NODE_INTERSECT