diff options
author | Martijn Berger <martijn.berger@gmail.com> | 2013-11-22 17:16:47 +0400 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2013-11-22 17:42:41 +0400 |
commit | e3a79258d17e6cdca26120eab7a2c48c7c4d4a0f (patch) | |
tree | 77d59694458125dd7525faf59ed56ce505533981 /intern/cycles/util/util_types.h | |
parent | 5feb0d2bfe8f6723bf48073b1760b732bc6a5ceb (diff) |
Cycles: test code for sse 4.1 kernel and alignment for some vector types.
This is mostly work towards enabling the __KERNEL_SSE__ option to start using
SIMD operations for vector math operations. This 4.1 kernel performes about 8%
faster with that option but overall is still slower than without the option.
WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 is the cmake flag for testing this kernel.
Alignment of int3, int4, float3, float4 to 16 bytes seems to give a slight 1-2%
speedup on tested systems with the current kernel already, so is enabled now.
Diffstat (limited to 'intern/cycles/util/util_types.h')
-rw-r--r-- | intern/cycles/util/util_types.h | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index c53d67235f6..fe743221f32 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -95,6 +95,10 @@ #include <tmmintrin.h> /* SSSE 3 */ #endif +#ifdef __KERNEL_SSE41__ +#include <smmintrin.h> /* SSE 4.1 */ +#endif + #else /* MinGW64 has conflicting declarations for these SSE headers in <windows.h>. @@ -199,7 +203,7 @@ struct ccl_align(16) int3 { __forceinline operator const __m128i&(void) const { return m128; } __forceinline operator __m128i&(void) { return m128; } #else -struct int3 { +struct ccl_align(16) int3 { int x, y, z, w; #endif @@ -219,7 +223,7 @@ struct ccl_align(16) int4 { __forceinline operator const __m128i&(void) const { return m128; } __forceinline operator __m128i&(void) { return m128; } #else -struct int4 { +struct ccl_align(16) int4 { int x, y, z, w; #endif @@ -267,7 +271,7 @@ struct ccl_align(16) float3 { __forceinline operator const __m128&(void) const { return m128; } __forceinline operator __m128&(void) { return m128; } #else -struct float3 { +struct ccl_align(16) float3 { float x, y, z, w; #endif @@ -287,7 +291,7 @@ struct ccl_align(16) float4 { __forceinline operator const __m128&(void) const { return m128; } __forceinline operator __m128&(void) { return m128; } #else -struct float4 { +struct ccl_align(16) float4 { float x, y, z, w; #endif |