diff options
Diffstat (limited to 'intern/cycles/util/util_ssei.h')
-rw-r--r-- | intern/cycles/util/util_ssei.h | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/intern/cycles/util/util_ssei.h b/intern/cycles/util/util_ssei.h index 3ec69ab3700..cd51dbff2f1 100644 --- a/intern/cycles/util/util_ssei.h +++ b/intern/cycles/util/util_ssei.h @@ -446,7 +446,8 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssei shuffle(const ssei &a) { # ifdef __KERNEL_NEON__ - return shuffle_neon<ssei, i0, i1, i2, i3>(a); + int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a)); + return vreinterpretq_m128i_s32(result); # else return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)); # endif @@ -456,7 +457,9 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssei shuffle(const ssei &a, const ssei &b) { # ifdef __KERNEL_NEON__ - return shuffle_neon<ssei, i0, i1, i2, i3>(a, b); + int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a), + vreinterpretq_s32_m128i(b)); + return vreinterpretq_m128i_s32(result); # else return _mm_castps_si128( _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))); @@ -514,7 +517,7 @@ __forceinline const ssei vreduce_add(const ssei &v) __forceinline int reduce_min(const ssei &v) { # ifdef __KERNEL_NEON__ - return vminvq_s32(v); + return vminvq_s32(vreinterpretq_s32_m128i(v)); # else return extract<0>(vreduce_min(v)); # endif @@ -522,7 +525,7 @@ __forceinline int reduce_min(const ssei &v) __forceinline int reduce_max(const ssei &v) { # ifdef __KERNEL_NEON__ - return vmaxvq_s32(v); + return vmaxvq_s32(vreinterpretq_s32_m128i(v)); # else return extract<0>(vreduce_max(v)); # endif @@ -530,7 +533,7 @@ __forceinline int reduce_max(const ssei &v) __forceinline int reduce_add(const ssei &v) { # ifdef __KERNEL_NEON__ - return vaddvq_s32(v); + return vaddvq_s32(vreinterpretq_s32_m128i(v)); # else return extract<0>(vreduce_add(v)); # endif |