diff options
Diffstat (limited to 'intern/cycles/util/ssei.h')
-rw-r--r-- | intern/cycles/util/ssei.h | 633 |
1 files changed, 0 insertions, 633 deletions
diff --git a/intern/cycles/util/ssei.h b/intern/cycles/util/ssei.h deleted file mode 100644 index 5caf44c967f..00000000000 --- a/intern/cycles/util/ssei.h +++ /dev/null @@ -1,633 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2011-2013 Intel Corporation - * Modifications Copyright 2014-2022 Blender Foundation. */ - -#ifndef __UTIL_SSEI_H__ -#define __UTIL_SSEI_H__ - -CCL_NAMESPACE_BEGIN - -#ifdef __KERNEL_SSE2__ - -struct sseb; -struct ssef; - -/*! 4-wide SSE integer type. */ -struct ssei { - typedef sseb Mask; // mask type - typedef ssei Int; // int type - typedef ssef Float; // float type - - enum { size = 4 }; // number of SIMD elements - union { - __m128i m128; - int32_t i[4]; - }; // data - - //////////////////////////////////////////////////////////////////////////////// - /// Constructors, Assignment & Cast Operators - //////////////////////////////////////////////////////////////////////////////// - - __forceinline ssei() - { - } - __forceinline ssei(const ssei &a) - { - m128 = a.m128; - } - __forceinline ssei &operator=(const ssei &a) - { - m128 = a.m128; - return *this; - } - - __forceinline ssei(const __m128i a) : m128(a) - { - } - __forceinline operator const __m128i &(void) const - { - return m128; - } - __forceinline operator __m128i &(void) - { - return m128; - } - - __forceinline ssei(const int a) : m128(_mm_set1_epi32(a)) - { - } - __forceinline ssei(int a, int b, int c, int d) : m128(_mm_setr_epi32(a, b, c, d)) - { - } - - __forceinline explicit ssei(const __m128 a) : m128(_mm_cvtps_epi32(a)) - { - } - - //////////////////////////////////////////////////////////////////////////////// - /// Array Access - //////////////////////////////////////////////////////////////////////////////// - - __forceinline const int32_t &operator[](const size_t index) const - { - assert(index < 4); - return i[index]; - } - __forceinline int32_t &operator[](const size_t index) - { - assert(index < 4); - return i[index]; - } -}; - -//////////////////////////////////////////////////////////////////////////////// -/// Unary Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const ssei cast(const __m128 &a) -{ - return _mm_castps_si128(a); -} -__forceinline const ssei operator+(const ssei &a) -{ - return a; -} -__forceinline const ssei operator-(const ssei &a) -{ - return _mm_sub_epi32(_mm_setzero_si128(), a.m128); -} -# if defined(__KERNEL_SSSE3__) -__forceinline const ssei abs(const ssei &a) -{ - return _mm_abs_epi32(a.m128); -} -# endif - -//////////////////////////////////////////////////////////////////////////////// -/// Binary Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const ssei operator+(const ssei &a, const ssei &b) -{ - return _mm_add_epi32(a.m128, b.m128); -} -__forceinline const ssei operator+(const ssei &a, const int32_t &b) -{ - return a + ssei(b); -} -__forceinline const ssei operator+(const int32_t &a, const ssei &b) -{ - return ssei(a) + b; -} - -__forceinline const ssei operator-(const ssei &a, const ssei &b) -{ - return _mm_sub_epi32(a.m128, b.m128); -} -__forceinline const ssei operator-(const ssei &a, const int32_t &b) -{ - return a - ssei(b); -} -__forceinline const ssei operator-(const int32_t &a, const ssei &b) -{ - return ssei(a) - b; -} - -# if defined(__KERNEL_SSE41__) -__forceinline const ssei operator*(const ssei &a, const ssei &b) -{ - return _mm_mullo_epi32(a.m128, b.m128); -} -__forceinline const ssei operator*(const ssei &a, const int32_t &b) -{ - return a * ssei(b); -} -__forceinline const ssei operator*(const int32_t &a, const ssei &b) -{ - return ssei(a) * b; -} -# endif - -__forceinline const ssei operator&(const ssei &a, const ssei &b) -{ - return _mm_and_si128(a.m128, b.m128); -} -__forceinline const ssei operator&(const ssei &a, const int32_t &b) -{ - return a & ssei(b); -} -__forceinline const ssei operator&(const int32_t &a, const ssei &b) -{ - return ssei(a) & b; -} - -__forceinline const ssei operator|(const ssei &a, const ssei &b) -{ - return _mm_or_si128(a.m128, b.m128); -} -__forceinline const ssei operator|(const ssei &a, const int32_t &b) -{ - return a | ssei(b); -} -__forceinline const ssei operator|(const int32_t &a, const ssei &b) -{ - return ssei(a) | b; -} - -__forceinline const ssei operator^(const ssei &a, const ssei &b) -{ - return _mm_xor_si128(a.m128, b.m128); -} -__forceinline const ssei operator^(const ssei &a, const int32_t &b) -{ - return a ^ ssei(b); -} -__forceinline const ssei operator^(const int32_t &a, const ssei &b) -{ - return ssei(a) ^ b; -} - -__forceinline const ssei operator<<(const ssei &a, const int32_t &n) -{ - return _mm_slli_epi32(a.m128, n); -} -__forceinline const ssei operator>>(const ssei &a, const int32_t &n) -{ - return _mm_srai_epi32(a.m128, n); -} - -__forceinline const ssei andnot(const ssei &a, const ssei &b) -{ - return _mm_andnot_si128(a.m128, b.m128); -} -__forceinline const ssei andnot(const sseb &a, const ssei &b) -{ - return _mm_andnot_si128(cast(a.m128), b.m128); -} -__forceinline const ssei andnot(const ssei &a, const sseb &b) -{ - return _mm_andnot_si128(a.m128, cast(b.m128)); -} - -__forceinline const ssei sra(const ssei &a, const int32_t &b) -{ - return _mm_srai_epi32(a.m128, b); -} -__forceinline const ssei srl(const ssei &a, const int32_t &b) -{ - return _mm_srli_epi32(a.m128, b); -} - -# if defined(__KERNEL_SSE41__) -__forceinline const ssei min(const ssei &a, const ssei &b) -{ - return _mm_min_epi32(a.m128, b.m128); -} -__forceinline const ssei min(const ssei &a, const int32_t &b) -{ - return min(a, ssei(b)); -} -__forceinline const ssei min(const int32_t &a, const ssei &b) -{ - return min(ssei(a), b); -} - -__forceinline const ssei max(const ssei &a, const ssei &b) -{ - return _mm_max_epi32(a.m128, b.m128); -} -__forceinline const ssei max(const ssei &a, const int32_t &b) -{ - return max(a, ssei(b)); -} -__forceinline const ssei max(const int32_t &a, const ssei &b) -{ - return max(ssei(a), b); -} -# endif - -//////////////////////////////////////////////////////////////////////////////// -/// Assignment Operators -//////////////////////////////////////////////////////////////////////////////// - -__forceinline ssei &operator+=(ssei &a, const ssei &b) -{ - return a = a + b; -} -__forceinline ssei &operator+=(ssei &a, const int32_t &b) -{ - return a = a + b; -} - -__forceinline ssei &operator-=(ssei &a, const ssei &b) -{ - return a = a - b; -} -__forceinline ssei &operator-=(ssei &a, const int32_t &b) -{ - return a = a - b; -} - -# if defined(__KERNEL_SSE41__) -__forceinline ssei &operator*=(ssei &a, const ssei &b) -{ - return a = a * b; -} -__forceinline ssei &operator*=(ssei &a, const int32_t &b) -{ - return a = a * b; -} -# endif - -__forceinline ssei &operator&=(ssei &a, const ssei &b) -{ - return a = a & b; -} -__forceinline ssei &operator&=(ssei &a, const int32_t &b) -{ - return a = a & b; -} - -__forceinline ssei &operator|=(ssei &a, const ssei &b) -{ - return a = a | b; -} -__forceinline ssei &operator|=(ssei &a, const int32_t &b) -{ - return a = a | b; -} - -__forceinline ssei &operator^=(ssei &a, const ssei &b) -{ - return a = a ^ b; -} -__forceinline ssei &operator^=(ssei &a, const int32_t &b) -{ - return a = a ^ b; -} - -__forceinline ssei &operator<<=(ssei &a, const int32_t &b) -{ - return a = a << b; -} -__forceinline ssei &operator>>=(ssei &a, const int32_t &b) -{ - return a = a >> b; -} - -//////////////////////////////////////////////////////////////////////////////// -/// Comparison Operators + Select -//////////////////////////////////////////////////////////////////////////////// - -__forceinline const sseb operator==(const ssei &a, const ssei &b) -{ - return _mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128)); -} -__forceinline const sseb operator==(const ssei &a, const int32_t &b) -{ - return a == ssei(b); -} -__forceinline const sseb operator==(const int32_t &a, const ssei &b) -{ - return ssei(a) == b; -} - -__forceinline const sseb operator!=(const ssei &a, const ssei &b) -{ - return !(a == b); -} -__forceinline const sseb operator!=(const ssei &a, const int32_t &b) -{ - return a != ssei(b); -} -__forceinline const sseb operator!=(const int32_t &a, const ssei &b) -{ - return ssei(a) != b; -} - -__forceinline const sseb operator<(const ssei &a, const ssei &b) -{ - return _mm_castsi128_ps(_mm_cmplt_epi32(a.m128, b.m128)); -} -__forceinline const sseb operator<(const ssei &a, const int32_t &b) -{ - return a < ssei(b); -} -__forceinline const sseb operator<(const int32_t &a, const ssei &b) -{ - return ssei(a) < b; -} - -__forceinline const sseb operator>=(const ssei &a, const ssei &b) -{ - return !(a < b); -} -__forceinline const sseb operator>=(const ssei &a, const int32_t &b) -{ - return a >= ssei(b); -} -__forceinline const sseb operator>=(const int32_t &a, const ssei &b) -{ - return ssei(a) >= b; -} - -__forceinline const sseb operator>(const ssei &a, const ssei &b) -{ - return _mm_castsi128_ps(_mm_cmpgt_epi32(a.m128, b.m128)); -} -__forceinline const sseb operator>(const ssei &a, const int32_t &b) -{ - return a > ssei(b); -} -__forceinline const sseb operator>(const int32_t &a, const ssei &b) -{ - return ssei(a) > b; -} - -__forceinline const sseb operator<=(const ssei &a, const ssei &b) -{ - return !(a > b); -} -__forceinline const sseb operator<=(const ssei &a, const int32_t &b) -{ - return a <= ssei(b); -} -__forceinline const sseb operator<=(const int32_t &a, const ssei &b) -{ - return ssei(a) <= b; -} - -__forceinline const ssei select(const sseb &m, const ssei &t, const ssei &f) -{ -# ifdef __KERNEL_SSE41__ - return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m)); -# else - return _mm_or_si128(_mm_and_si128(m, t), _mm_andnot_si128(m, f)); -# endif -} - -__forceinline const ssei select(const int mask, const ssei &t, const ssei &f) -{ -# if defined(__KERNEL_SSE41__) && \ - ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER)) - return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), mask)); -# else - return select(sseb(mask), t, f); -# endif -} - -//////////////////////////////////////////////////////////////////////////////// -// Movement/Shifting/Shuffling Functions -//////////////////////////////////////////////////////////////////////////////// - -__forceinline ssei unpacklo(const ssei &a, const ssei &b) -{ - return _mm_unpacklo_epi32(a, b); -} -__forceinline ssei unpackhi(const ssei &a, const ssei &b) -{ - return _mm_unpackhi_epi32(a, b); -} - -template<size_t i0, size_t i1, size_t i2, size_t i3> -__forceinline const ssei shuffle(const ssei &a) -{ -# ifdef __KERNEL_NEON__ - int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a)); - return vreinterpretq_m128i_s32(result); -# else - return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)); -# endif -} - -template<size_t i0, size_t i1, size_t i2, size_t i3> -__forceinline const ssei shuffle(const ssei &a, const ssei &b) -{ -# ifdef __KERNEL_NEON__ - int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a), - vreinterpretq_s32_m128i(b)); - return vreinterpretq_m128i_s32(result); -# else - return _mm_castps_si128( - _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))); -# endif -} - -template<size_t i0> __forceinline const ssei shuffle(const ssei &b) -{ - return shuffle<i0, i0, i0, i0>(b); -} - -# if defined(__KERNEL_SSE41__) -template<size_t src> __forceinline int extract(const ssei &b) -{ - return _mm_extract_epi32(b, src); -} -template<size_t dst> __forceinline const ssei insert(const ssei &a, const int32_t b) -{ - return _mm_insert_epi32(a, b, dst); -} -# else -template<size_t src> __forceinline int extract(const ssei &b) -{ - return b[src]; -} -template<size_t dst> __forceinline const ssei insert(const ssei &a, const int32_t b) -{ - ssei c = a; - c[dst] = b; - return c; -} -# endif - -//////////////////////////////////////////////////////////////////////////////// -/// Reductions -//////////////////////////////////////////////////////////////////////////////// - -# if defined(__KERNEL_SSE41__) -__forceinline const ssei vreduce_min(const ssei &v) -{ - ssei h = min(shuffle<1, 0, 3, 2>(v), v); - return min(shuffle<2, 3, 0, 1>(h), h); -} -__forceinline const ssei vreduce_max(const ssei &v) -{ - ssei h = max(shuffle<1, 0, 3, 2>(v), v); - return max(shuffle<2, 3, 0, 1>(h), h); -} -__forceinline const ssei vreduce_add(const ssei &v) -{ - ssei h = shuffle<1, 0, 3, 2>(v) + v; - return shuffle<2, 3, 0, 1>(h) + h; -} - -__forceinline int reduce_min(const ssei &v) -{ -# ifdef __KERNEL_NEON__ - return vminvq_s32(vreinterpretq_s32_m128i(v)); -# else - return extract<0>(vreduce_min(v)); -# endif -} -__forceinline int reduce_max(const ssei &v) -{ -# ifdef __KERNEL_NEON__ - return vmaxvq_s32(vreinterpretq_s32_m128i(v)); -# else - return extract<0>(vreduce_max(v)); -# endif -} -__forceinline int reduce_add(const ssei &v) -{ -# ifdef __KERNEL_NEON__ - return vaddvq_s32(vreinterpretq_s32_m128i(v)); -# else - return extract<0>(vreduce_add(v)); -# endif -} - -__forceinline uint32_t select_min(const ssei &v) -{ - return __bsf(movemask(v == vreduce_min(v))); -} -__forceinline uint32_t select_max(const ssei &v) -{ - return __bsf(movemask(v == vreduce_max(v))); -} - -__forceinline uint32_t select_min(const sseb &valid, const ssei &v) -{ - const ssei a = select(valid, v, ssei((int)pos_inf)); - return __bsf(movemask(valid & (a == vreduce_min(a)))); -} -__forceinline uint32_t select_max(const sseb &valid, const ssei &v) -{ - const ssei a = select(valid, v, ssei((int)neg_inf)); - return __bsf(movemask(valid & (a == vreduce_max(a)))); -} - -# else - -__forceinline int ssei_min(int a, int b) -{ - return (a < b) ? a : b; -} -__forceinline int ssei_max(int a, int b) -{ - return (a > b) ? a : b; -} -__forceinline int reduce_min(const ssei &v) -{ - return ssei_min(ssei_min(v[0], v[1]), ssei_min(v[2], v[3])); -} -__forceinline int reduce_max(const ssei &v) -{ - return ssei_max(ssei_max(v[0], v[1]), ssei_max(v[2], v[3])); -} -__forceinline int reduce_add(const ssei &v) -{ - return v[0] + v[1] + v[2] + v[3]; -} - -# endif - -//////////////////////////////////////////////////////////////////////////////// -/// Memory load and store operations -//////////////////////////////////////////////////////////////////////////////// - -__forceinline ssei load4i(const void *const a) -{ - return _mm_load_si128((__m128i *)a); -} - -__forceinline void store4i(void *ptr, const ssei &v) -{ - _mm_store_si128((__m128i *)ptr, v); -} - -__forceinline void storeu4i(void *ptr, const ssei &v) -{ - _mm_storeu_si128((__m128i *)ptr, v); -} - -__forceinline void store4i(const sseb &mask, void *ptr, const ssei &i) -{ -# if defined(__KERNEL_AVX__) - _mm_maskstore_ps((float *)ptr, (__m128i)mask, _mm_castsi128_ps(i)); -# else - *(ssei *)ptr = select(mask, i, *(ssei *)ptr); -# endif -} - -__forceinline ssei load4i_nt(void *ptr) -{ -# if defined(__KERNEL_SSE41__) - return _mm_stream_load_si128((__m128i *)ptr); -# else - return _mm_load_si128((__m128i *)ptr); -# endif -} - -__forceinline void store4i_nt(void *ptr, const ssei &v) -{ -# if defined(__KERNEL_SSE41__) - _mm_stream_ps((float *)ptr, _mm_castsi128_ps(v)); -# else - _mm_store_si128((__m128i *)ptr, v); -# endif -} - -//////////////////////////////////////////////////////////////////////////////// -/// Debug Functions -//////////////////////////////////////////////////////////////////////////////// - -ccl_device_inline void print_ssei(const char *label, const ssei &a) -{ - printf("%s: %df %df %df %d\n", label, a[0], a[1], a[2], a[3]); -} - -#endif - -CCL_NAMESPACE_END - -#endif |