Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2014-01-11 03:49:51 +0400
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2014-01-11 03:50:53 +0400
commit4d72a5e34a2f3e530f34ddc2d48adad390254203 (patch)
treed3cf3897f27081974ba410245cb3b16bc1ea3252 /intern/cycles/util/util_types.h
parent241fccaf6a113963598e5ad040e7e72c857bea00 (diff)
Fix T38129: cycles viewport render display with very bright colors turning black.
This happened when exceeding the maximum value representable by half floats.
Diffstat (limited to 'intern/cycles/util/util_types.h')
-rw-r--r--intern/cycles/util/util_types.h11
1 files changed, 8 insertions, 3 deletions
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index a53ab38734c..2ee2f0f92e0 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -61,6 +61,8 @@
#ifndef __KERNEL_GPU__
+#define __KERNEL_SSE2__
+
/* not enabled, globally applying it gives slowdown, only for testing. */
#if 0
#define __KERNEL_SSE__
@@ -516,14 +518,16 @@ ccl_device_inline void float4_store_half(half *h, const float4 *f, float scale)
/* optimized float to half for pixels:
* assumes no negative, no nan, no inf, and sets denormal to 0 */
union { uint i; float f; } in;
- in.f = ((*f)[i] > 0.0f)? (*f)[i] * scale: 0.0f;
+ float fscale = (*f)[i] * scale;
+ in.f = (fscale > 0.0f)? ((fscale < 65500.0f)? fscale: 65500.0f): 0.0f;
int x = in.i;
int absolute = x & 0x7FFFFFFF;
int Z = absolute + 0xC8000000;
int result = (absolute < 0x38800000)? 0: Z;
+ int rshift = (result >> 13);
- h[i] = ((result >> 13) & 0x7FFF);
+ h[i] = (rshift & 0x7FFF);
}
#else
/* same as above with SSE */
@@ -533,7 +537,8 @@ ccl_device_inline void float4_store_half(half *h, const float4 *f, float scale)
const __m128i mm_7FFFFFFF = _mm_set1_epi32(0x7FFFFFFF);
const __m128i mm_C8000000 = _mm_set1_epi32(0xC8000000);
- __m128i x = _mm_castps_si128(_mm_max_ps(_mm_mul_ps(*(__m128*)f, mm_scale), _mm_set_ps1(0.0f)));
+ __m128 mm_fscale = _mm_mul_ps(*(__m128*)f, mm_scale);
+ __m128i x = _mm_castps_si128(_mm_min_ps(_mm_max_ps(mm_fscale, _mm_set_ps1(0.0f)), _mm_set_ps1(65500.0f)));
__m128i absolute = _mm_and_si128(x, mm_7FFFFFFF);
__m128i Z = _mm_add_epi32(absolute, mm_C8000000);
__m128i result = _mm_andnot_si128(_mm_cmplt_epi32(absolute, mm_38800000), Z);