Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRay Molenkamp <github@lazydodo.com>2018-09-18 01:27:13 +0300
committerRay Molenkamp <github@lazydodo.com>2018-09-18 01:27:13 +0300
commit16b8d223b718925d6aadd30b802967f010076f5d (patch)
tree34f000f3ac329d5b5e9b297c5c37e4c35dfe337b /intern/cycles/util
parent51ac494f219c2a7bd2937b63c91a3261d6ba3ad9 (diff)
Cycles: Fix usage of AVX2 intrinsics in AVX kernel
While building the AVX kernel, util_avxf.h/avxb.h were using some AVX2 intrinsics, these were never called, so it wasn't a run-time issue, but the intrinsics headers on centos excluded the AVX2 prototypes when building the AVX kernel causing build errors. This commit cleans up the improper usage of the AVX2 intrinsics and provides AVX fallback implementations for future use. Differential Revision: https://developer.blender.org/D3696
Diffstat (limited to 'intern/cycles/util')
-rw-r--r--intern/cycles/util/util_avxb.h18
-rw-r--r--intern/cycles/util/util_avxf.h6
2 files changed, 20 insertions, 4 deletions
diff --git a/intern/cycles/util/util_avxb.h b/intern/cycles/util/util_avxb.h
index b6d77857c6f..4add69bcb91 100644
--- a/intern/cycles/util/util_avxb.h
+++ b/intern/cycles/util/util_avxb.h
@@ -49,7 +49,7 @@ struct avxb
////////////////////////////////////////////////////////////////////////////////
__forceinline avxb( FalseTy ) : m256(_mm256_setzero_ps()) {}
- __forceinline avxb( TrueTy ) : m256(_mm256_castsi256_ps(_mm256_cmpeq_epi32(_mm256_setzero_si256(), _mm256_setzero_si256()))) {}
+ __forceinline avxb( TrueTy ) : m256(_mm256_castsi256_ps(_mm256_set1_epi32(-1))) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
@@ -86,7 +86,21 @@ __forceinline const avxb operator ^=( avxb& a, const avxb& b ) { return a = a ^
////////////////////////////////////////////////////////////////////////////////
__forceinline const avxb operator !=( const avxb& a, const avxb& b ) { return _mm256_xor_ps(a, b); }
-__forceinline const avxb operator ==( const avxb& a, const avxb& b ) { return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b)); }
+__forceinline const avxb operator ==( const avxb& a, const avxb& b )
+{
+#ifdef __KERNEL_AVX2__
+ return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b));
+#else
+ __m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(a, 0));
+ __m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(a, 1));
+ __m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0));
+ __m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(b, 1));
+ __m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo);
+ __m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi);
+ __m256i result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1);
+ return _mm256_castsi256_ps(result);
+#endif
+}
__forceinline const avxb select( const avxb& m, const avxb& t, const avxb& f ) {
#if defined(__KERNEL_SSE41__)
diff --git a/intern/cycles/util/util_avxf.h b/intern/cycles/util/util_avxf.h
index 5596702ca20..0c15ba5cbbd 100644
--- a/intern/cycles/util/util_avxf.h
+++ b/intern/cycles/util/util_avxf.h
@@ -214,17 +214,19 @@ __forceinline const avxf nmadd(const avxf& a, const avxf& b, const avxf& c) {
#endif
}
__forceinline const avxf msub(const avxf& a, const avxf& b, const avxf& c) {
+#ifdef __KERNEL_AVX2__
return _mm256_fmsub_ps(a, b, c);
+#else
+ return (a*b) - c;
+#endif
}
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
-#ifdef __KERNEL_AVX2__
__forceinline const avxb operator <=(const avxf& a, const avxf& b) {
return _mm256_cmp_ps(a.m256, b.m256, _CMP_LE_OS);
}
-#endif
#endif