diff options
Diffstat (limited to 'intern/cycles/util/util_simd.h')
-rw-r--r-- | intern/cycles/util/util_simd.h | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h index 2d3a927f227..fd5ba1de37b 100644 --- a/intern/cycles/util/util_simd.h +++ b/intern/cycles/util/util_simd.h @@ -148,6 +148,12 @@ ccl_device_inline const __m128 fma(const __m128& a, const __m128& b, const __m12 return _mm_add_ps(_mm_mul_ps(a, b), c); } +/* calculate a*b-c (replacement for fused multiply-subtract on SSE CPUs) */ +ccl_device_inline const __m128 fms(const __m128& a, const __m128& b, const __m128& c) +{ + return _mm_sub_ps(_mm_mul_ps(a, b), c); +} + template<size_t N> ccl_device_inline const __m128 broadcast(const __m128& a) { return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(a), _MM_SHUFFLE(N, N, N, N))); @@ -168,6 +174,12 @@ ccl_device_inline const __m128 uint32_to_float(const __m128i &in) return _mm_add_ps(e, d); } +template<size_t S1, size_t S2, size_t S3, size_t S4> +ccl_device_inline const __m128 set_sign_bit(const __m128 &a) +{ + return _mm_xor_ps(a, _mm_castsi128_ps(_mm_setr_epi32(S1 << 31, S2 << 31, S3 << 31, S4 << 31))); +} + #endif /* __KERNEL_SSE2__ */ CCL_NAMESPACE_END |