Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2016-10-25 14:54:17 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2016-10-25 14:54:17 +0300
commitaf411d918e68b487155309f5c1e29bb50924b69a (patch)
treebaebdc2747689aefbc74b03481685541f7788b81 /intern
parent3e7100644861368b5ec951cf36c631e0828012f8 (diff)
Cycles: Implement SSE-optimized path of util_max_axis()
The idea here is to avoid if statements which could cause wrong branch prediction. Gives a bit of measurable speedup up to ~1%. Still nice :) Inspired by Maxym Dmytrychenko, thanks!
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/util/util_math.h9
1 files changed, 9 insertions, 0 deletions
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index b9594f7ec69..57cad39d1eb 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -1629,6 +1629,14 @@ ccl_device_inline float2 map_to_sphere(const float3 co)
ccl_device_inline int util_max_axis(float3 vec)
{
+#ifdef __KERNEL_SSE__
+ __m128 a = shuffle<0,0,1,1>(vec.m128);
+ __m128 b = shuffle<1,2,2,1>(vec.m128);
+ __m128 c = _mm_cmpgt_ps(a, b);
+ int mask = _mm_movemask_ps(c) & 0x7;
+ static const char tab[8] = {2, 2, 2, 0, 1, 2, 1, 0};
+ return tab[mask];
+#else
if(vec.x > vec.y) {
if(vec.x > vec.z)
return 0;
@@ -1641,6 +1649,7 @@ ccl_device_inline int util_max_axis(float3 vec)
else
return 2;
}
+#endif
}
CCL_NAMESPACE_END