Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2016-10-25 15:47:34 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2016-10-25 15:47:34 +0300
commit064caae7b2943aa35953642fd4b15d0e9ec05a87 (patch)
treec9ee8cdfdac57a0b7b77faa16faee1cb5b64d7a9 /intern/cycles/kernel/bvh
parent81c9e0d2958a1274f8cb76386a3bafc08a181eed (diff)
Cycles: BVH-related SSE optimization
Several ideas here: - Optimize calculation of near_{x,y,z} in a way that does not require 3 if() statements per update, which avoids negative effect of wrong branch prediction. - Optimization of direction clamping for BVH. - Optimization of point/direction transform. Brings ~1.5% speedup again depending on a scene (unfortunately, this speedup can't be sum across all previous commits because speedup of each of the changes varies from scene to scene, but it still seems to be nice solid speedup of few percent on Linux and bigger speedup was reported on Windows). Once again ,thanks Maxym for inspiration! Still TODO: We have multiple places where we need to calculate near x,y,z indices in BVH, for now it's only done for main BVH traversal. Will try to move this calculation to an utility function and see if that can be easily re-used across all the BVH flavors.
Diffstat (limited to 'intern/cycles/kernel/bvh')
-rw-r--r--intern/cycles/kernel/bvh/qbvh_traversal.h46
1 files changed, 46 insertions, 0 deletions
diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h
index a1e154d6dcf..b9da539306b 100644
--- a/intern/cycles/kernel/bvh/qbvh_traversal.h
+++ b/intern/cycles/kernel/bvh/qbvh_traversal.h
@@ -100,12 +100,27 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#endif
/* Offsets to select the side that becomes the lower or upper bound. */
+#ifdef __KERNEL_SSE__
+ int near_x = 0, near_y = 2, near_z = 4;
+ int far_x = 1, far_y = 3, far_z = 5;
+
+ const size_t mask = movemask(ssef(idir.m128));
+
+ const int mask_x = mask & 1;
+ const int mask_y = (mask & 2) >> 1;
+ const int mask_z = (mask & 4) >> 2;
+
+ near_x += mask_x; far_x -= mask_x;
+ near_y += mask_y; far_y -= mask_y;
+ near_z += mask_z; far_z -= mask_z;
+#else
int near_x, near_y, near_z;
int far_x, far_y, far_z;
if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+#endif
IsectPrecalc isect_precalc;
triangle_intersect_precalc(dir, &isect_precalc);
@@ -427,9 +442,24 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
# endif
+#ifdef __KERNEL_SSE__
+ near_x = 0; near_y = 2; near_z = 4;
+ far_x = 1; far_y = 3; far_z = 5;
+
+ const size_t mask = movemask(ssef(idir.m128));
+
+ const int mask_x = mask & 1;
+ const int mask_y = (mask & 2) >> 1;
+ const int mask_z = (mask & 4) >> 2;
+
+ near_x += mask_x; far_x -= mask_x;
+ near_y += mask_y; far_y -= mask_y;
+ near_z += mask_z; far_z -= mask_z;
+#else
if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+#endif
tfar = ssef(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
@@ -469,9 +499,25 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
# endif
+#ifdef __KERNEL_SSE__
+ near_x = 0; near_y = 2; near_z = 4;
+ far_x = 1; far_y = 3; far_z = 5;
+
+ const size_t mask = movemask(ssef(idir.m128));
+
+ const int mask_x = mask & 1;
+ const int mask_y = (mask & 2) >> 1;
+ const int mask_z = (mask & 4) >> 2;
+
+ near_x += mask_x; far_x -= mask_x;
+ near_y += mask_y; far_y -= mask_y;
+ near_z += mask_z; far_z -= mask_z;
+#else
if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
+#endif
+
tfar = ssef(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));