Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Dinges <blender@dingto.org>2014-06-13 23:13:18 +0400
committerThomas Dinges <blender@dingto.org>2014-06-13 23:59:12 +0400
commitcd5e1ff74e4f6443f3e4b836dd23fe46b56cb7ed (patch)
tree578ee132eab87d348147e49c91e1929660558c20 /intern/cycles/util/util_half.h
parentd0573ce9054e325c0ad2fbb943087e0f8b9e159a (diff)
Cycles Refactor: Add SSE Utility code from Embree for cleaner SSE code.
This makes the code a bit easier to understand, and might come in handy if we want to reuse more Embree code. Differential Revision: https://developer.blender.org/D482 Code by Brecht, with fixes by Lockal, Sergey and myself.
Diffstat (limited to 'intern/cycles/util/util_half.h')
-rw-r--r--intern/cycles/util/util_half.h24
1 files changed, 12 insertions, 12 deletions
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index da6fae79bb9..397133618be 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -68,18 +68,18 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
}
#else
/* same as above with SSE */
- const __m128 mm_scale = _mm_set_ps1(scale);
- const __m128i mm_38800000 = _mm_set1_epi32(0x38800000);
- const __m128i mm_7FFF = _mm_set1_epi32(0x7FFF);
- const __m128i mm_7FFFFFFF = _mm_set1_epi32(0x7FFFFFFF);
- const __m128i mm_C8000000 = _mm_set1_epi32(0xC8000000);
-
- __m128 mm_fscale = _mm_mul_ps(load_m128(f), mm_scale);
- __m128i x = _mm_castps_si128(_mm_min_ps(_mm_max_ps(mm_fscale, _mm_set_ps1(0.0f)), _mm_set_ps1(65500.0f)));
- __m128i absolute = _mm_and_si128(x, mm_7FFFFFFF);
- __m128i Z = _mm_add_epi32(absolute, mm_C8000000);
- __m128i result = _mm_andnot_si128(_mm_cmplt_epi32(absolute, mm_38800000), Z);
- __m128i rh = _mm_and_si128(_mm_srai_epi32(result, 13), mm_7FFF);
+ const ssef mm_scale = ssef(scale);
+ const ssei mm_38800000 = ssei(0x38800000);
+ const ssei mm_7FFF = ssei(0x7FFF);
+ const ssei mm_7FFFFFFF = ssei(0x7FFFFFFF);
+ const ssei mm_C8000000 = ssei(0xC8000000);
+
+ ssef mm_fscale = load4f(f) * mm_scale;
+ ssei x = cast(min(max(mm_fscale, ssef(0.0f)), ssef(65500.0f)));
+ ssei absolute = x & mm_7FFFFFFF;
+ ssei Z = absolute + mm_C8000000;
+ ssei result = andnot(absolute < mm_38800000, Z);
+ ssei rh = (result >> 13) & mm_7FFF;
_mm_storel_pi((__m64*)h, _mm_castsi128_ps(_mm_packs_epi32(rh, rh)));
#endif