Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2018-09-18 12:49:08 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2018-09-18 12:49:08 +0300
commit5aa5066c3ba8f11ad5d435e4f571b3aabd31cb4e (patch)
treef4ffa9aea6baf05cbbbaf11ae67a13848bdc62b1 /intern
parenteadf0f3d705c7f3cd2ba9e14f5568c421d88cb2a (diff)
parentd04eb330e873488d8b7cbe49da5600d6f64ea6db (diff)
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/util/util_avxb.h18
-rw-r--r--intern/cycles/util/util_avxf.h6
2 files changed, 20 insertions, 4 deletions
diff --git a/intern/cycles/util/util_avxb.h b/intern/cycles/util/util_avxb.h
index b6d77857c6f..4add69bcb91 100644
--- a/intern/cycles/util/util_avxb.h
+++ b/intern/cycles/util/util_avxb.h
@@ -49,7 +49,7 @@ struct avxb
////////////////////////////////////////////////////////////////////////////////
__forceinline avxb( FalseTy ) : m256(_mm256_setzero_ps()) {}
- __forceinline avxb( TrueTy ) : m256(_mm256_castsi256_ps(_mm256_cmpeq_epi32(_mm256_setzero_si256(), _mm256_setzero_si256()))) {}
+ __forceinline avxb( TrueTy ) : m256(_mm256_castsi256_ps(_mm256_set1_epi32(-1))) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
@@ -86,7 +86,21 @@ __forceinline const avxb operator ^=( avxb& a, const avxb& b ) { return a = a ^
////////////////////////////////////////////////////////////////////////////////
__forceinline const avxb operator !=( const avxb& a, const avxb& b ) { return _mm256_xor_ps(a, b); }
-__forceinline const avxb operator ==( const avxb& a, const avxb& b ) { return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b)); }
+__forceinline const avxb operator ==( const avxb& a, const avxb& b )
+{
+#ifdef __KERNEL_AVX2__
+ return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b));
+#else
+ __m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(a, 0));
+ __m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(a, 1));
+ __m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0));
+ __m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(b, 1));
+ __m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo);
+ __m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi);
+ __m256i result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1);
+ return _mm256_castsi256_ps(result);
+#endif
+}
__forceinline const avxb select( const avxb& m, const avxb& t, const avxb& f ) {
#if defined(__KERNEL_SSE41__)
diff --git a/intern/cycles/util/util_avxf.h b/intern/cycles/util/util_avxf.h
index 5596702ca20..0c15ba5cbbd 100644
--- a/intern/cycles/util/util_avxf.h
+++ b/intern/cycles/util/util_avxf.h
@@ -214,17 +214,19 @@ __forceinline const avxf nmadd(const avxf& a, const avxf& b, const avxf& c) {
#endif
}
__forceinline const avxf msub(const avxf& a, const avxf& b, const avxf& c) {
+#ifdef __KERNEL_AVX2__
return _mm256_fmsub_ps(a, b, c);
+#else
+ return (a*b) - c;
+#endif
}
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
-#ifdef __KERNEL_AVX2__
__forceinline const avxb operator <=(const avxf& a, const avxf& b) {
return _mm256_cmp_ps(a.m256, b.m256, _CMP_LE_OS);
}
-#endif
#endif