diff options
author | Thomas Dinges <blender@dingto.org> | 2013-12-28 00:30:03 +0400 |
---|---|---|
committer | Thomas Dinges <blender@dingto.org> | 2013-12-28 00:30:21 +0400 |
commit | 1578b55c2716aba111a0a96d02b638dc8f597def (patch) | |
tree | c5d3c16a74b5554cb3ace2a274601ca20b68d9b2 /intern | |
parent | 42044a96e4274327b7e839e002939b46b959a655 (diff) |
Cycles: Move SIMD utility functions into its own file.
Recently added SSE macros for noise texture can be moved here as well, but I leave this for later.
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/kernel/kernel_compat_cpu.h | 1 | ||||
-rw-r--r-- | intern/cycles/util/CMakeLists.txt | 1 | ||||
-rw-r--r-- | intern/cycles/util/util_simd.h | 87 | ||||
-rw-r--r-- | intern/cycles/util/util_types.h | 61 |
4 files changed, 89 insertions, 61 deletions
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index 9d3ffcdfce2..2b786bce0de 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -21,6 +21,7 @@ #include "util_debug.h" #include "util_math.h" +#include "util_simd.h" #include "util_types.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index ce5ba44abe9..389f76e6df2 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -50,6 +50,7 @@ set(SRC_HEADERS util_path.h util_progress.h util_set.h + util_simd.h util_stats.h util_string.h util_system.h diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h new file mode 100644 index 00000000000..119ea2ac211 --- /dev/null +++ b/intern/cycles/util/util_simd.h @@ -0,0 +1,87 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License + */ + +#ifndef __UTIL_SIMD_H__ +#define __UTIL_SIMD_H__ + +CCL_NAMESPACE_BEGIN + +#ifdef __KERNEL_SSE2__ + +/* SSE shuffle utility functions */ + +#ifdef __KERNEL_SSSE3__ + +/* faster version for SSSE3 */ +typedef __m128i shuffle_swap_t; + +ccl_device_inline const shuffle_swap_t shuffle_swap_identity(void) +{ + return _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); +} + +ccl_device_inline const shuffle_swap_t shuffle_swap_swap(void) +{ + return _mm_set_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); +} + +ccl_device_inline const __m128 shuffle_swap(const __m128& a, const shuffle_swap_t& shuf) +{ + return _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a), shuf)); +} + +#else + +/* somewhat slower version for SSE2 */ +typedef int shuffle_swap_t; + +ccl_device_inline const shuffle_swap_t shuffle_swap_identity(void) +{ + return 0; +} + +ccl_device_inline const shuffle_swap_t shuffle_swap_swap(void) +{ + return 1; +} + +ccl_device_inline const __m128 shuffle_swap(const __m128& a, shuffle_swap_t shuf) +{ + /* shuffle value must be a constant, so we need to branch */ + if(shuf) + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 0, 3, 2)); + else + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 2, 1, 0)); +} + +#endif + +template<size_t i0, size_t i1, size_t i2, size_t i3> ccl_device_inline const __m128 shuffle(const __m128& a, const __m128& b) +{ + return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); +} + +template<size_t i0, size_t i1, size_t i2, size_t i3> ccl_device_inline const __m128 shuffle(const __m128& b) +{ + return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0))); +} + +#endif /* __KERNEL_SSE2__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_SIMD_H__ */ + diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index 3fa1df6ab44..a53ab38734c 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -486,67 +486,6 @@ ccl_device_inline int4 make_int4(const float3& f) #endif -#ifdef __KERNEL_SSE2__ - -/* SSE shuffle utility functions */ - -#ifdef __KERNEL_SSSE3__ - -/* faster version for SSSE3 */ -typedef __m128i shuffle_swap_t; - -ccl_device_inline const shuffle_swap_t shuffle_swap_identity(void) -{ - return _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); -} - -ccl_device_inline const shuffle_swap_t shuffle_swap_swap(void) -{ - return _mm_set_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); -} - -ccl_device_inline const __m128 shuffle_swap(const __m128& a, const shuffle_swap_t& shuf) -{ - return _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a), shuf)); -} - -#else - -/* somewhat slower version for SSE2 */ -typedef int shuffle_swap_t; - -ccl_device_inline const shuffle_swap_t shuffle_swap_identity(void) -{ - return 0; -} - -ccl_device_inline const shuffle_swap_t shuffle_swap_swap(void) -{ - return 1; -} - -ccl_device_inline const __m128 shuffle_swap(const __m128& a, shuffle_swap_t shuf) -{ - /* shuffle value must be a constant, so we need to branch */ - if(shuf) - return _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 0, 3, 2)); - else - return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 2, 1, 0)); -} - -#endif - -template<size_t i0, size_t i1, size_t i2, size_t i3> ccl_device_inline const __m128 shuffle(const __m128& a, const __m128& b) -{ - return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); -} - -template<size_t i0, size_t i1, size_t i2, size_t i3> ccl_device_inline const __m128 shuffle(const __m128& b) -{ - return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0))); -} -#endif - /* Half Floats */ #ifdef __KERNEL_OPENCL__ |