diff options
author | Hans Goudey <h.goudey@me.com> | 2020-08-12 04:59:16 +0300 |
---|---|---|
committer | Hans Goudey <h.goudey@me.com> | 2020-08-12 04:59:16 +0300 |
commit | ef11238c743e6985fe325280fb13e05d6ec27378 (patch) | |
tree | 1dd2605a676bee52296535825b99f000a9c255eb /intern/cycles/util/util_math_fast.h | |
parent | 1f768bbe4145daed111636ca09dd53b25b8d29b5 (diff) | |
parent | ec5f39208785c1bbe723054ffe69e1ac2ab470dd (diff) |
Merge branch 'master' into property-search-uiproperty-search-ui
Diffstat (limited to 'intern/cycles/util/util_math_fast.h')
-rw-r--r-- | intern/cycles/util/util_math_fast.h | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/intern/cycles/util/util_math_fast.h b/intern/cycles/util/util_math_fast.h index e979bd9e0c0..07b0878e3d5 100644 --- a/intern/cycles/util/util_math_fast.h +++ b/intern/cycles/util/util_math_fast.h @@ -87,7 +87,7 @@ ccl_device_inline int fast_rint(float x) /* Single roundps instruction on SSE4.1+ (for gcc/clang at least). */ return float_to_int(rintf(x)); #else - /* emulate rounding by adding/substracting 0.5. */ + /* emulate rounding by adding/subtracting 0.5. */ return float_to_int(x + copysignf(0.5f, x)); #endif } @@ -445,12 +445,10 @@ ccl_device_inline float fast_expf(float x) return fast_exp2f(x / M_LN2_F); } -#ifndef __KERNEL_GPU__ -/* MSVC seems to have a code-gen bug here in at least SSE41/AVX - * see T78047 for details. */ -# ifdef _MSC_VER -# pragma optimize("", off) -# endif +#if defined(__KERNEL_CPU__) && !defined(_MSC_VER) +/* MSVC seems to have a code-gen bug here in at least SSE41/AVX, see + * T78047 and T78869 for details. Just disable for now, it only makes + * a small difference in denoising performance. */ ccl_device float4 fast_exp2f4(float4 x) { const float4 one = make_float4(1.0f); @@ -466,14 +464,16 @@ ccl_device float4 fast_exp2f4(float4 x) r = madd4(x, r, make_float4(1.0f)); return __int4_as_float4(__float4_as_int4(r) + (m << 23)); } -# ifdef _MSC_VER -# pragma optimize("", on) -# endif ccl_device_inline float4 fast_expf4(float4 x) { return fast_exp2f4(x / M_LN2_F); } +#else +ccl_device_inline float4 fast_expf4(float4 x) +{ + return make_float4(fast_expf(x.x), fast_expf(x.y), fast_expf(x.z), fast_expf(x.w)); +} #endif ccl_device_inline float fast_exp10(float x) |