diff options
author | Sv. Lockal <lockalsash@gmail.com> | 2015-03-07 20:25:21 +0300 |
---|---|---|
committer | Sv. Lockal <lockalsash@gmail.com> | 2015-03-07 20:25:21 +0300 |
commit | c32ded36548b26c5722803556e2b502d3dbbba8a (patch) | |
tree | ecfcbcbf3455e5f238866ce39d2b6328cc5f45d8 /intern/cycles/util/util_ssei.h | |
parent | c8fb488b087f91ba58ac16e4e76050b660122417 (diff) |
Cycles: add better specializations for SSE shuffle function and few more wrappers.
Diffstat (limited to 'intern/cycles/util/util_ssei.h')
-rw-r--r-- | intern/cycles/util/util_ssei.h | 10 |
1 files changed, 2 insertions, 8 deletions
diff --git a/intern/cycles/util/util_ssei.h b/intern/cycles/util/util_ssei.h index ddd7a858941..5f62569268c 100644 --- a/intern/cycles/util/util_ssei.h +++ b/intern/cycles/util/util_ssei.h @@ -190,8 +190,8 @@ __forceinline const ssei select( const int mask, const ssei& t, const ssei& f ) // Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// -__forceinline ssei unpacklo( const ssei& a, const ssei& b ) { return _mm_castps_si128(_mm_unpacklo_ps(_mm_castsi128_ps(a.m128), _mm_castsi128_ps(b.m128))); } -__forceinline ssei unpackhi( const ssei& a, const ssei& b ) { return _mm_castps_si128(_mm_unpackhi_ps(_mm_castsi128_ps(a.m128), _mm_castsi128_ps(b.m128))); } +__forceinline ssei unpacklo( const ssei& a, const ssei& b ) { return _mm_unpacklo_epi32(a, b); } +__forceinline ssei unpackhi( const ssei& a, const ssei& b ) { return _mm_unpackhi_epi32(a, b); } template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssei shuffle( const ssei& a ) { return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)); @@ -201,12 +201,6 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssei sh return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))); } -#if defined(__KERNEL_SSE3__) -template<> __forceinline const ssei shuffle<0, 0, 2, 2>( const ssei& a ) { return _mm_castps_si128(_mm_moveldup_ps(_mm_castsi128_ps(a))); } -template<> __forceinline const ssei shuffle<1, 1, 3, 3>( const ssei& a ) { return _mm_castps_si128(_mm_movehdup_ps(_mm_castsi128_ps(a))); } -template<> __forceinline const ssei shuffle<0, 1, 0, 1>( const ssei& a ) { return _mm_castpd_si128(_mm_movedup_pd (_mm_castsi128_pd(a))); } -#endif - template<size_t i0> __forceinline const ssei shuffle( const ssei& b ) { return shuffle<i0,i0,i0,i0>(b); } |