Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartijn Berger <martijn.berger@gmail.com>2013-11-22 17:16:47 +0400
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2013-11-22 17:42:41 +0400
commite3a79258d17e6cdca26120eab7a2c48c7c4d4a0f (patch)
tree77d59694458125dd7525faf59ed56ce505533981 /intern/cycles/util/util_math.h
parent5feb0d2bfe8f6723bf48073b1760b732bc6a5ceb (diff)
Cycles: test code for sse 4.1 kernel and alignment for some vector types.
This is mostly work towards enabling the __KERNEL_SSE__ option to start using SIMD operations for vector math operations. This 4.1 kernel performes about 8% faster with that option but overall is still slower than without the option. WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 is the cmake flag for testing this kernel. Alignment of int3, int4, float3, float4 to 16 bytes seems to give a slight 1-2% speedup on tested systems with the current kernel already, so is enabled now.
Diffstat (limited to 'intern/cycles/util/util_math.h')
-rw-r--r--intern/cycles/util/util_math.h13
1 files changed, 13 insertions, 0 deletions
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 6db532faf74..851c67b1189 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -462,7 +462,11 @@ ccl_device_inline float3 operator/=(float3& a, float f)
ccl_device_inline float dot(const float3 a, const float3 b)
{
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+ return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F));
+#else
return a.x*b.x + a.y*b.y + a.z*b.z;
+#endif
}
ccl_device_inline float3 cross(const float3 a, const float3 b)
@@ -475,7 +479,11 @@ ccl_device_inline float3 cross(const float3 a, const float3 b)
ccl_device_inline float len(const float3 a)
{
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+ return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F)));
+#else
return sqrtf(dot(a, a));
+#endif
}
ccl_device_inline float len_squared(const float3 a)
@@ -487,7 +495,12 @@ ccl_device_inline float len_squared(const float3 a)
ccl_device_inline float3 normalize(const float3 a)
{
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+ __m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F));
+ return _mm_div_ps(a.m128, norm);
+#else
return a/len(a);
+#endif
}
#endif