Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartijn Berger <martijn.berger@gmail.com>2013-11-22 17:16:47 +0400
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2013-11-22 17:42:41 +0400
commite3a79258d17e6cdca26120eab7a2c48c7c4d4a0f (patch)
tree77d59694458125dd7525faf59ed56ce505533981 /intern/cycles/util/util_types.h
parent5feb0d2bfe8f6723bf48073b1760b732bc6a5ceb (diff)
Cycles: test code for sse 4.1 kernel and alignment for some vector types.
This is mostly work towards enabling the __KERNEL_SSE__ option to start using SIMD operations for vector math operations. This 4.1 kernel performes about 8% faster with that option but overall is still slower than without the option. WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 is the cmake flag for testing this kernel. Alignment of int3, int4, float3, float4 to 16 bytes seems to give a slight 1-2% speedup on tested systems with the current kernel already, so is enabled now.
Diffstat (limited to 'intern/cycles/util/util_types.h')
-rw-r--r--intern/cycles/util/util_types.h12
1 files changed, 8 insertions, 4 deletions
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index c53d67235f6..fe743221f32 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -95,6 +95,10 @@
#include <tmmintrin.h> /* SSSE 3 */
#endif
+#ifdef __KERNEL_SSE41__
+#include <smmintrin.h> /* SSE 4.1 */
+#endif
+
#else
/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
@@ -199,7 +203,7 @@ struct ccl_align(16) int3 {
__forceinline operator const __m128i&(void) const { return m128; }
__forceinline operator __m128i&(void) { return m128; }
#else
-struct int3 {
+struct ccl_align(16) int3 {
int x, y, z, w;
#endif
@@ -219,7 +223,7 @@ struct ccl_align(16) int4 {
__forceinline operator const __m128i&(void) const { return m128; }
__forceinline operator __m128i&(void) { return m128; }
#else
-struct int4 {
+struct ccl_align(16) int4 {
int x, y, z, w;
#endif
@@ -267,7 +271,7 @@ struct ccl_align(16) float3 {
__forceinline operator const __m128&(void) const { return m128; }
__forceinline operator __m128&(void) { return m128; }
#else
-struct float3 {
+struct ccl_align(16) float3 {
float x, y, z, w;
#endif
@@ -287,7 +291,7 @@ struct ccl_align(16) float4 {
__forceinline operator const __m128&(void) const { return m128; }
__forceinline operator __m128&(void) { return m128; }
#else
-struct float4 {
+struct ccl_align(16) float4 {
float x, y, z, w;
#endif