From 44924a2e5e10cf645e2a81e0041defa05b542e83 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sat, 28 Apr 2012 09:10:20 +0000 Subject: Cycles: fix for CUDA build. --- intern/cycles/util/util_math.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'intern/cycles/util') diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 25d81481d12..33e351c74e9 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -515,15 +515,6 @@ __device_inline void print_float3(const char *label, const float3& a) printf("%s: %.8f %.8f %.8f\n", label, a.x, a.y, a.z); } -__device_inline float reduce_add(const float3& a) -{ -#ifdef __KERNEL_SSE__ - return (a.x + a.y + a.z); -#else - return (a.x + a.y + a.z); -#endif -} - __device_inline float3 rcp(const float3& a) { #ifdef __KERNEL_SSE__ @@ -550,6 +541,15 @@ __device_inline bool is_zero(const float3 a) #endif } +__device_inline float reduce_add(const float3& a) +{ +#ifdef __KERNEL_SSE__ + return (a.x + a.y + a.z); +#else + return (a.x + a.y + a.z); +#endif +} + __device_inline float average(const float3 a) { return reduce_add(a)*(1.0f/3.0f); @@ -783,16 +783,6 @@ __device_inline float4 reduce_add(const float4& a) } #endif -__device_inline float reduce_add(const float4& a) -{ -#ifdef __KERNEL_SSE__ - float4 h = shuffle<1,0,3,2>(a) + a; - return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h); /* todo: efficiency? */ -#else - return ((a.x + a.y) + (a.z + a.w)); -#endif -} - __device_inline void print_float4(const char *label, const float4& a) { printf("%s: %.8f %.8f %.8f %.8f\n", label, a.x, a.y, a.z, a.w); @@ -811,6 +801,16 @@ __device_inline bool is_zero(const float4& a) #endif } +__device_inline float reduce_add(const float4& a) +{ +#ifdef __KERNEL_SSE__ + float4 h = shuffle<1,0,3,2>(a) + a; + return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h); /* todo: efficiency? */ +#else + return ((a.x + a.y) + (a.z + a.w)); +#endif +} + __device_inline float average(const float4& a) { return reduce_add(a) * 0.25f; -- cgit v1.2.3