diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2014-01-11 03:49:51 +0400 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2014-01-11 03:50:53 +0400 |
commit | 4d72a5e34a2f3e530f34ddc2d48adad390254203 (patch) | |
tree | d3cf3897f27081974ba410245cb3b16bc1ea3252 /intern/cycles/util/util_types.h | |
parent | 241fccaf6a113963598e5ad040e7e72c857bea00 (diff) |
Fix T38129: cycles viewport render display with very bright colors turning black.
This happened when exceeding the maximum value representable by half floats.
Diffstat (limited to 'intern/cycles/util/util_types.h')
-rw-r--r-- | intern/cycles/util/util_types.h | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index a53ab38734c..2ee2f0f92e0 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -61,6 +61,8 @@ #ifndef __KERNEL_GPU__ +#define __KERNEL_SSE2__ + /* not enabled, globally applying it gives slowdown, only for testing. */ #if 0 #define __KERNEL_SSE__ @@ -516,14 +518,16 @@ ccl_device_inline void float4_store_half(half *h, const float4 *f, float scale) /* optimized float to half for pixels: * assumes no negative, no nan, no inf, and sets denormal to 0 */ union { uint i; float f; } in; - in.f = ((*f)[i] > 0.0f)? (*f)[i] * scale: 0.0f; + float fscale = (*f)[i] * scale; + in.f = (fscale > 0.0f)? ((fscale < 65500.0f)? fscale: 65500.0f): 0.0f; int x = in.i; int absolute = x & 0x7FFFFFFF; int Z = absolute + 0xC8000000; int result = (absolute < 0x38800000)? 0: Z; + int rshift = (result >> 13); - h[i] = ((result >> 13) & 0x7FFF); + h[i] = (rshift & 0x7FFF); } #else /* same as above with SSE */ @@ -533,7 +537,8 @@ ccl_device_inline void float4_store_half(half *h, const float4 *f, float scale) const __m128i mm_7FFFFFFF = _mm_set1_epi32(0x7FFFFFFF); const __m128i mm_C8000000 = _mm_set1_epi32(0xC8000000); - __m128i x = _mm_castps_si128(_mm_max_ps(_mm_mul_ps(*(__m128*)f, mm_scale), _mm_set_ps1(0.0f))); + __m128 mm_fscale = _mm_mul_ps(*(__m128*)f, mm_scale); + __m128i x = _mm_castps_si128(_mm_min_ps(_mm_max_ps(mm_fscale, _mm_set_ps1(0.0f)), _mm_set_ps1(65500.0f))); __m128i absolute = _mm_and_si128(x, mm_7FFFFFFF); __m128i Z = _mm_add_epi32(absolute, mm_C8000000); __m128i result = _mm_andnot_si128(_mm_cmplt_epi32(absolute, mm_38800000), Z); |