diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2016-10-25 14:54:17 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2016-10-25 14:54:17 +0300 |
commit | af411d918e68b487155309f5c1e29bb50924b69a (patch) | |
tree | baebdc2747689aefbc74b03481685541f7788b81 /intern/cycles/util/util_math.h | |
parent | 3e7100644861368b5ec951cf36c631e0828012f8 (diff) |
Cycles: Implement SSE-optimized path of util_max_axis()
The idea here is to avoid if statements which could cause wrong
branch prediction.
Gives a bit of measurable speedup up to ~1%. Still nice :)
Inspired by Maxym Dmytrychenko, thanks!
Diffstat (limited to 'intern/cycles/util/util_math.h')
-rw-r--r-- | intern/cycles/util/util_math.h | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index b9594f7ec69..57cad39d1eb 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -1629,6 +1629,14 @@ ccl_device_inline float2 map_to_sphere(const float3 co) ccl_device_inline int util_max_axis(float3 vec) { +#ifdef __KERNEL_SSE__ + __m128 a = shuffle<0,0,1,1>(vec.m128); + __m128 b = shuffle<1,2,2,1>(vec.m128); + __m128 c = _mm_cmpgt_ps(a, b); + int mask = _mm_movemask_ps(c) & 0x7; + static const char tab[8] = {2, 2, 2, 0, 1, 2, 1, 0}; + return tab[mask]; +#else if(vec.x > vec.y) { if(vec.x > vec.z) return 0; @@ -1641,6 +1649,7 @@ ccl_device_inline int util_max_axis(float3 vec) else return 2; } +#endif } CCL_NAMESPACE_END |