Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/util/util_ssei.h')
-rw-r--r--intern/cycles/util/util_ssei.h13
1 files changed, 8 insertions, 5 deletions
diff --git a/intern/cycles/util/util_ssei.h b/intern/cycles/util/util_ssei.h
index 3ec69ab3700..cd51dbff2f1 100644
--- a/intern/cycles/util/util_ssei.h
+++ b/intern/cycles/util/util_ssei.h
@@ -446,7 +446,8 @@ template<size_t i0, size_t i1, size_t i2, size_t i3>
__forceinline const ssei shuffle(const ssei &a)
{
# ifdef __KERNEL_NEON__
- return shuffle_neon<ssei, i0, i1, i2, i3>(a);
+ int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a));
+ return vreinterpretq_m128i_s32(result);
# else
return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0));
# endif
@@ -456,7 +457,9 @@ template<size_t i0, size_t i1, size_t i2, size_t i3>
__forceinline const ssei shuffle(const ssei &a, const ssei &b)
{
# ifdef __KERNEL_NEON__
- return shuffle_neon<ssei, i0, i1, i2, i3>(a, b);
+ int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a),
+ vreinterpretq_s32_m128i(b));
+ return vreinterpretq_m128i_s32(result);
# else
return _mm_castps_si128(
_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0)));
@@ -514,7 +517,7 @@ __forceinline const ssei vreduce_add(const ssei &v)
__forceinline int reduce_min(const ssei &v)
{
# ifdef __KERNEL_NEON__
- return vminvq_s32(v);
+ return vminvq_s32(vreinterpretq_s32_m128i(v));
# else
return extract<0>(vreduce_min(v));
# endif
@@ -522,7 +525,7 @@ __forceinline int reduce_min(const ssei &v)
__forceinline int reduce_max(const ssei &v)
{
# ifdef __KERNEL_NEON__
- return vmaxvq_s32(v);
+ return vmaxvq_s32(vreinterpretq_s32_m128i(v));
# else
return extract<0>(vreduce_max(v));
# endif
@@ -530,7 +533,7 @@ __forceinline int reduce_max(const ssei &v)
__forceinline int reduce_add(const ssei &v)
{
# ifdef __KERNEL_NEON__
- return vaddvq_s32(v);
+ return vaddvq_s32(vreinterpretq_s32_m128i(v));
# else
return extract<0>(vreduce_add(v));
# endif