Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2021-10-24 15:19:19 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-10-26 16:37:04 +0300
commitfd25e883e2807a151f673b87c152a59701a0df80 (patch)
tree9441933f32ba2672ca71c58842342a9c525e123e /intern/cycles/util/sseb.h
parentd7d40745fa09061a3117bd3669c5a46bbf611eae (diff)
Cycles: remove prefix from source code file names
Remove prefix of filenames that is the same as the folder name. This used to help when #includes were using individual files, but now they are always relative to the cycles root directory and so the prefixes are redundant. For patches and branches, git merge and rebase should be able to detect the renames and move over code to the right file.
Diffstat (limited to 'intern/cycles/util/sseb.h')
-rw-r--r--intern/cycles/util/sseb.h358
1 files changed, 358 insertions, 0 deletions
diff --git a/intern/cycles/util/sseb.h b/intern/cycles/util/sseb.h
new file mode 100644
index 00000000000..6afce4f8909
--- /dev/null
+++ b/intern/cycles/util/sseb.h
@@ -0,0 +1,358 @@
+/*
+ * Copyright 2011-2013 Intel Corporation
+ * Modifications Copyright 2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0(the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_SSEB_H__
+#define __UTIL_SSEB_H__
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __KERNEL_SSE2__
+
+struct ssei;
+struct ssef;
+
+/*! 4-wide SSE bool type. */
+struct sseb {
+ typedef sseb Mask; // mask type
+ typedef ssei Int; // int type
+ typedef ssef Float; // float type
+
+ enum { size = 4 }; // number of SIMD elements
+ union {
+ __m128 m128;
+ int32_t v[4];
+ }; // data
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constructors, Assignment & Cast Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline sseb()
+ {
+ }
+ __forceinline sseb(const sseb &other)
+ {
+ m128 = other.m128;
+ }
+ __forceinline sseb &operator=(const sseb &other)
+ {
+ m128 = other.m128;
+ return *this;
+ }
+
+ __forceinline sseb(const __m128 input) : m128(input)
+ {
+ }
+ __forceinline operator const __m128 &(void) const
+ {
+ return m128;
+ }
+ __forceinline operator const __m128i(void) const
+ {
+ return _mm_castps_si128(m128);
+ }
+ __forceinline operator const __m128d(void) const
+ {
+ return _mm_castps_pd(m128);
+ }
+
+ __forceinline sseb(bool a)
+ : m128(_mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)])
+ {
+ }
+ __forceinline sseb(bool a, bool b)
+ : m128(_mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)])
+ {
+ }
+ __forceinline sseb(bool a, bool b, bool c, bool d)
+ : m128(_mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)])
+ {
+ }
+ __forceinline sseb(int mask)
+ {
+ assert(mask >= 0 && mask < 16);
+ m128 = _mm_lookupmask_ps[mask];
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline sseb(FalseTy) : m128(_mm_setzero_ps())
+ {
+ }
+ __forceinline sseb(TrueTy)
+ : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())))
+ {
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Array Access
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline bool operator[](const size_t i) const
+ {
+ assert(i < 4);
+ return (_mm_movemask_ps(m128) >> i) & 1;
+ }
+ __forceinline int32_t &operator[](const size_t i)
+ {
+ assert(i < 4);
+ return v[i];
+ }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+/// Unary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const sseb operator!(const sseb &a)
+{
+ return _mm_xor_ps(a, sseb(True));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Binary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const sseb operator&(const sseb &a, const sseb &b)
+{
+ return _mm_and_ps(a, b);
+}
+__forceinline const sseb operator|(const sseb &a, const sseb &b)
+{
+ return _mm_or_ps(a, b);
+}
+__forceinline const sseb operator^(const sseb &a, const sseb &b)
+{
+ return _mm_xor_ps(a, b);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Assignment Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const sseb operator&=(sseb &a, const sseb &b)
+{
+ return a = a & b;
+}
+__forceinline const sseb operator|=(sseb &a, const sseb &b)
+{
+ return a = a | b;
+}
+__forceinline const sseb operator^=(sseb &a, const sseb &b)
+{
+ return a = a ^ b;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Comparison Operators + Select
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const sseb operator!=(const sseb &a, const sseb &b)
+{
+ return _mm_xor_ps(a, b);
+}
+__forceinline const sseb operator==(const sseb &a, const sseb &b)
+{
+ return _mm_castsi128_ps(_mm_cmpeq_epi32(a, b));
+}
+
+__forceinline const sseb select(const sseb &m, const sseb &t, const sseb &f)
+{
+# if defined(__KERNEL_SSE41__)
+ return _mm_blendv_ps(f, t, m);
+# else
+ return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f));
+# endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Movement/Shifting/Shuffling Functions
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const sseb unpacklo(const sseb &a, const sseb &b)
+{
+ return _mm_unpacklo_ps(a, b);
+}
+__forceinline const sseb unpackhi(const sseb &a, const sseb &b)
+{
+ return _mm_unpackhi_ps(a, b);
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+__forceinline const sseb shuffle(const sseb &a)
+{
+# ifdef __KERNEL_NEON__
+ return shuffle_neon<int32x4_t, i0, i1, i2, i3>(a);
+# else
+ return _mm_castsi128_ps(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)));
+# endif
+}
+
+# ifndef __KERNEL_NEON__
+template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a)
+{
+ return _mm_movelh_ps(a, a);
+}
+
+template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a)
+{
+ return _mm_movehl_ps(a, a);
+}
+# endif
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+__forceinline const sseb shuffle(const sseb &a, const sseb &b)
+{
+# ifdef __KERNEL_NEON__
+ return shuffle_neon<int32x4_t, i0, i1, i2, i3>(a, b);
+# else
+ return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
+# endif
+}
+
+# ifndef __KERNEL_NEON__
+template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a, const sseb &b)
+{
+ return _mm_movelh_ps(a, b);
+}
+
+template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a, const sseb &b)
+{
+ return _mm_movehl_ps(b, a);
+}
+# endif
+
+# if defined(__KERNEL_SSE3__) && !defined(__KERNEL_NEON__)
+template<> __forceinline const sseb shuffle<0, 0, 2, 2>(const sseb &a)
+{
+ return _mm_moveldup_ps(a);
+}
+template<> __forceinline const sseb shuffle<1, 1, 3, 3>(const sseb &a)
+{
+ return _mm_movehdup_ps(a);
+}
+# endif
+
+# if defined(__KERNEL_SSE41__)
+template<size_t dst, size_t src, size_t clr>
+__forceinline const sseb insert(const sseb &a, const sseb &b)
+{
+# ifdef __KERNEL_NEON__
+ sseb res = a;
+ if (clr)
+ res[dst] = 0;
+ else
+ res[dst] = b[src];
+ return res;
+# else
+ return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr);
+# endif
+}
+template<size_t dst, size_t src> __forceinline const sseb insert(const sseb &a, const sseb &b)
+{
+ return insert<dst, src, 0>(a, b);
+}
+template<size_t dst> __forceinline const sseb insert(const sseb &a, const bool b)
+{
+ return insert<dst, 0>(a, sseb(b));
+}
+# endif
+
+////////////////////////////////////////////////////////////////////////////////
+/// Reduction Operations
+////////////////////////////////////////////////////////////////////////////////
+
+# if defined(__KERNEL_SSE41__)
+__forceinline uint32_t popcnt(const sseb &a)
+{
+# if defined(__KERNEL_NEON__)
+ const int32x4_t mask = {1, 1, 1, 1};
+ int32x4_t t = vandq_s32(vreinterpretq_s32_m128(a.m128), mask);
+ return vaddvq_s32(t);
+# else
+ return _mm_popcnt_u32(_mm_movemask_ps(a));
+# endif
+}
+# else
+__forceinline uint32_t popcnt(const sseb &a)
+{
+ return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]);
+}
+# endif
+
+__forceinline bool reduce_and(const sseb &a)
+{
+# if defined(__KERNEL_NEON__)
+ return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) == -4;
+# else
+ return _mm_movemask_ps(a) == 0xf;
+# endif
+}
+__forceinline bool reduce_or(const sseb &a)
+{
+# if defined(__KERNEL_NEON__)
+ return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) != 0x0;
+# else
+ return _mm_movemask_ps(a) != 0x0;
+# endif
+}
+__forceinline bool all(const sseb &b)
+{
+# if defined(__KERNEL_NEON__)
+ return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == -4;
+# else
+ return _mm_movemask_ps(b) == 0xf;
+# endif
+}
+__forceinline bool any(const sseb &b)
+{
+# if defined(__KERNEL_NEON__)
+ return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) != 0x0;
+# else
+ return _mm_movemask_ps(b) != 0x0;
+# endif
+}
+__forceinline bool none(const sseb &b)
+{
+# if defined(__KERNEL_NEON__)
+ return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == 0x0;
+# else
+ return _mm_movemask_ps(b) == 0x0;
+# endif
+}
+
+__forceinline uint32_t movemask(const sseb &a)
+{
+ return _mm_movemask_ps(a);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Debug Functions
+////////////////////////////////////////////////////////////////////////////////
+
+ccl_device_inline void print_sseb(const char *label, const sseb &a)
+{
+ printf("%s: %d %d %d %d\n", label, a[0], a[1], a[2], a[3]);
+}
+
+#endif
+
+CCL_NAMESPACE_END
+
+#endif