diff options
author | Thomas Dinges <blender@dingto.org> | 2014-06-13 23:13:18 +0400 |
---|---|---|
committer | Thomas Dinges <blender@dingto.org> | 2014-06-13 23:59:12 +0400 |
commit | cd5e1ff74e4f6443f3e4b836dd23fe46b56cb7ed (patch) | |
tree | 578ee132eab87d348147e49c91e1929660558c20 /intern/cycles/util/util_half.h | |
parent | d0573ce9054e325c0ad2fbb943087e0f8b9e159a (diff) |
Cycles Refactor: Add SSE Utility code from Embree for cleaner SSE code.
This makes the code a bit easier to understand, and might come in handy
if we want to reuse more Embree code.
Differential Revision: https://developer.blender.org/D482
Code by Brecht, with fixes by Lockal, Sergey and myself.
Diffstat (limited to 'intern/cycles/util/util_half.h')
-rw-r--r-- | intern/cycles/util/util_half.h | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h index da6fae79bb9..397133618be 100644 --- a/intern/cycles/util/util_half.h +++ b/intern/cycles/util/util_half.h @@ -68,18 +68,18 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale) } #else /* same as above with SSE */ - const __m128 mm_scale = _mm_set_ps1(scale); - const __m128i mm_38800000 = _mm_set1_epi32(0x38800000); - const __m128i mm_7FFF = _mm_set1_epi32(0x7FFF); - const __m128i mm_7FFFFFFF = _mm_set1_epi32(0x7FFFFFFF); - const __m128i mm_C8000000 = _mm_set1_epi32(0xC8000000); - - __m128 mm_fscale = _mm_mul_ps(load_m128(f), mm_scale); - __m128i x = _mm_castps_si128(_mm_min_ps(_mm_max_ps(mm_fscale, _mm_set_ps1(0.0f)), _mm_set_ps1(65500.0f))); - __m128i absolute = _mm_and_si128(x, mm_7FFFFFFF); - __m128i Z = _mm_add_epi32(absolute, mm_C8000000); - __m128i result = _mm_andnot_si128(_mm_cmplt_epi32(absolute, mm_38800000), Z); - __m128i rh = _mm_and_si128(_mm_srai_epi32(result, 13), mm_7FFF); + const ssef mm_scale = ssef(scale); + const ssei mm_38800000 = ssei(0x38800000); + const ssei mm_7FFF = ssei(0x7FFF); + const ssei mm_7FFFFFFF = ssei(0x7FFFFFFF); + const ssei mm_C8000000 = ssei(0xC8000000); + + ssef mm_fscale = load4f(f) * mm_scale; + ssei x = cast(min(max(mm_fscale, ssef(0.0f)), ssef(65500.0f))); + ssei absolute = x & mm_7FFFFFFF; + ssei Z = absolute + mm_C8000000; + ssei result = andnot(absolute < mm_38800000, Z); + ssei rh = (result >> 13) & mm_7FFF; _mm_storel_pi((__m64*)h, _mm_castsi128_ps(_mm_packs_epi32(rh, rh))); #endif |