Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/util/util_avxb.h')
-rw-r--r--intern/cycles/util/util_avxb.h72
1 files changed, 17 insertions, 55 deletions
diff --git a/intern/cycles/util/util_avxb.h b/intern/cycles/util/util_avxb.h
index 60d9bb44256..25ef39d39ae 100644
--- a/intern/cycles/util/util_avxb.h
+++ b/intern/cycles/util/util_avxb.h
@@ -44,23 +44,12 @@ struct avxb
__forceinline operator const __m256i( void ) const { return _mm256_castps_si256(m256); }
__forceinline operator const __m256d( void ) const { return _mm256_castps_pd(m256); }
- //__forceinline avxb ( bool a )
- // : m256(_mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) {}
- //__forceinline avxb ( bool a, bool b)
- // : m256(_mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)]) {}
- //__forceinline avxb ( bool a, bool b, bool c, bool d)
- // : m256(_mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) {}
- //__forceinline avxb(int mask) {
- // assert(mask >= 0 && mask < 16);
- // m128 = _mm_lookupmask_ps[mask];
- //}
-
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline avxb( FalseTy ) : m256(_mm256_setzero_ps()) {}
- __forceinline avxb( TrueTy ) : m256(_mm256_castsi256_ps(_mm256_cmpeq_epi32(_mm256_setzero_si256(), _mm256_setzero_si256()))) {}
+ __forceinline avxb( TrueTy ) : m256(_mm256_castsi256_ps(_mm256_set1_epi32(-1))) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
@@ -97,7 +86,21 @@ __forceinline const avxb operator ^=( avxb& a, const avxb& b ) { return a = a ^
////////////////////////////////////////////////////////////////////////////////
__forceinline const avxb operator !=( const avxb& a, const avxb& b ) { return _mm256_xor_ps(a, b); }
-__forceinline const avxb operator ==( const avxb& a, const avxb& b ) { return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b)); }
+__forceinline const avxb operator ==( const avxb& a, const avxb& b )
+{
+#ifdef __KERNEL_AVX2__
+ return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b));
+#else
+ __m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(a, 0));
+ __m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(a, 1));
+ __m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0));
+ __m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(b, 1));
+ __m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo);
+ __m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi);
+ __m256i result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1);
+ return _mm256_castsi256_ps(result);
+#endif
+}
__forceinline const avxb select( const avxb& m, const avxb& t, const avxb& f ) {
#if defined(__KERNEL_SSE41__)
@@ -114,47 +117,6 @@ __forceinline const avxb select( const avxb& m, const avxb& t, const avxb& f ) {
__forceinline const avxb unpacklo( const avxb& a, const avxb& b ) { return _mm256_unpacklo_ps(a, b); }
__forceinline const avxb unpackhi( const avxb& a, const avxb& b ) { return _mm256_unpackhi_ps(a, b); }
-#define _MM256_SHUFFLE(fp7,fp6,fp5,fp4,fp3,fp2,fp1,fp0) (((fp7) << 14) | ((fp6) << 12) | ((fp5) << 10) | ((fp4) << 8) | \
- ((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
-
-template<size_t i0, size_t i1, size_t i2, size_t i3, size_t i4, size_t i5, size_t i6, size_t i7>
-__forceinline const avxb shuffle( const avxb& a ) {
- return _mm256_cvtepi32_ps(_mm256_shuffle_epi32(a, _MM256_SHUFFLE(i7, i6, i5, i4, i3, i2, i1, i0)));
-}
-
-/*
-template<> __forceinline const avxb shuffle<0, 1, 0, 1, 0, 1, 0, 1>( const avxb& a ) {
- return _mm_movelh_ps(a, a);
-}
-
-template<> __forceinline const sseb shuffle<2, 3, 2, 3>( const sseb& a ) {
- return _mm_movehl_ps(a, a);
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const sseb shuffle( const sseb& a, const sseb& b ) {
- return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
-}
-
-template<> __forceinline const sseb shuffle<0, 1, 0, 1>( const sseb& a, const sseb& b ) {
- return _mm_movelh_ps(a, b);
-}
-
-template<> __forceinline const sseb shuffle<2, 3, 2, 3>( const sseb& a, const sseb& b ) {
- return _mm_movehl_ps(b, a);
-}
-
-#if defined(__KERNEL_SSE3__)
-template<> __forceinline const sseb shuffle<0, 0, 2, 2>( const sseb& a ) { return _mm_moveldup_ps(a); }
-template<> __forceinline const sseb shuffle<1, 1, 3, 3>( const sseb& a ) { return _mm_movehdup_ps(a); }
-#endif
-
-#if defined(__KERNEL_SSE41__)
-template<size_t dst, size_t src, size_t clr> __forceinline const sseb insert( const sseb& a, const sseb& b ) { return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); }
-template<size_t dst, size_t src> __forceinline const sseb insert( const sseb& a, const sseb& b ) { return insert<dst, src, 0>(a, b); }
-template<size_t dst> __forceinline const sseb insert( const sseb& a, const bool b ) { return insert<dst,0>(a, sseb(b)); }
-#endif
-*/
-
////////////////////////////////////////////////////////////////////////////////
/// Reduction Operations
////////////////////////////////////////////////////////////////////////////////
@@ -180,7 +142,7 @@ __forceinline size_t movemask( const avxb& a ) { return _mm256_movemask_ps(a); }
ccl_device_inline void print_avxb(const char *label, const avxb &a)
{
- printf("%s: %df %df %df %df %df %df %df %d\n",
+ printf("%s: %d %d %d %d %d %d %d %d\n",
label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]);
}