From af411d918e68b487155309f5c1e29bb50924b69a Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Tue, 25 Oct 2016 13:54:17 +0200 Subject: Cycles: Implement SSE-optimized path of util_max_axis() The idea here is to avoid if statements which could cause wrong branch prediction. Gives a bit of measurable speedup up to ~1%. Still nice :) Inspired by Maxym Dmytrychenko, thanks! --- intern/cycles/util/util_math.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'intern') diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index b9594f7ec69..57cad39d1eb 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -1629,6 +1629,14 @@ ccl_device_inline float2 map_to_sphere(const float3 co) ccl_device_inline int util_max_axis(float3 vec) { +#ifdef __KERNEL_SSE__ + __m128 a = shuffle<0,0,1,1>(vec.m128); + __m128 b = shuffle<1,2,2,1>(vec.m128); + __m128 c = _mm_cmpgt_ps(a, b); + int mask = _mm_movemask_ps(c) & 0x7; + static const char tab[8] = {2, 2, 2, 0, 1, 2, 1, 0}; + return tab[mask]; +#else if(vec.x > vec.y) { if(vec.x > vec.z) return 0; @@ -1641,6 +1649,7 @@ ccl_device_inline int util_max_axis(float3 vec) else return 2; } +#endif } CCL_NAMESPACE_END -- cgit v1.2.3