diff options
author | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2013-06-19 21:54:26 +0400 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2013-06-19 21:54:26 +0400 |
commit | f811e6e3ae79be46144d33baf11dbfb618fd4315 (patch) | |
tree | dbcd3941f049fdaac479b5602cb550cc50571e05 /intern/cycles/util/util_types.h | |
parent | 16204bd64759fddc940800f39fc91461ee340424 (diff) |
Cycles: optimized SSE BVH traversal now also works with SSE2 CPUs, so all the
way back to Pentium 4, using a slightly less efficient instruction.
Also ensure /Ox is used for Visual Studio for RelWithDebInfo builds.
Diffstat (limited to 'intern/cycles/util/util_types.h')
-rw-r--r-- | intern/cycles/util/util_types.h | 51 |
1 files changed, 46 insertions, 5 deletions
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index d0116186ee8..4fd1e9d8807 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -76,8 +76,8 @@ #define __KERNEL_SSE2__ #endif -#ifndef __KERNEL_SSSE3__ -#define __KERNEL_SSSE3__ +#ifndef __KERNEL_SSE3__ +#define __KERNEL_SSE3__ #endif #ifndef __KERNEL_SSSE3__ @@ -100,7 +100,7 @@ #include <xmmintrin.h> /* SSE 1 */ #include <emmintrin.h> /* SSE 2 */ -#ifdef __KERNEL_SSSE3__ +#ifdef __KERNEL_SSE3__ #include <pmmintrin.h> /* SSE 3 */ #endif #ifdef __KERNEL_SSSE3__ @@ -491,15 +491,56 @@ __device_inline int4 make_int4(const float3& f) #endif -#ifdef __KERNEL_SSSE3__ +#ifdef __KERNEL_SSE2__ /* SSE shuffle utility functions */ -__device_inline const __m128 shuffle8(const __m128& a, const __m128i& shuf) +#ifdef __KERNEL_SSSE3__ + +/* faster version for SSSE3 */ +typedef __m128i shuffle_swap_t; + +__device_inline const shuffle_swap_t shuffle_swap_identity(void) +{ + return _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +} + +__device_inline const shuffle_swap_t shuffle_swap_swap(void) +{ + return _mm_set_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); +} + +__device_inline const __m128 shuffle_swap(const __m128& a, const shuffle_swap_t& shuf) { return _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a), shuf)); } +#else + +/* somewhat slower version for SSE3 */ +typedef int shuffle_swap_t; + +__device_inline const shuffle_swap_t shuffle_swap_identity(void) +{ + return 0; +} + +__device_inline const shuffle_swap_t shuffle_swap_swap(void) +{ + return 1; +} + +__device_inline const __m128 shuffle_swap(const __m128& a, shuffle_swap_t shuf) +{ + /* shuffle value must be a constant, so we need to branch */ + if(shuf) + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 0, 3, 2)); + else + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 2, 1, 0)); +} + +#endif + template<size_t i0, size_t i1, size_t i2, size_t i3> __device_inline const __m128 shuffle(const __m128& a, const __m128& b) { return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); |