diff options
author | Sv. Lockal <lockalsash@gmail.com> | 2014-02-27 14:49:21 +0400 |
---|---|---|
committer | Sv. Lockal <lockalsash@gmail.com> | 2014-02-27 15:01:20 +0400 |
commit | 7808360c5f35c5a6371c3627758a70db431955a2 (patch) | |
tree | 0a3cb25e749917dd0021f308a5627442888b1ecb /intern/cycles/util/util_half.h | |
parent | 8badec14f97df80b328a6d8106c6cdbcafb171c0 (diff) |
Cycles: fix crash in SSE hair and half-floats on x86+vc2008
MSVC 2008 ignores alignement attribute when assigning from unaligned
float4 vector, returned from other function. Now Cycles uses unaligned
loads instead of casts for win32 in x86 mode.
Diffstat (limited to 'intern/cycles/util/util_half.h')
-rw-r--r-- | intern/cycles/util/util_half.h | 18 |
1 files changed, 9 insertions, 9 deletions
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h index 21192024f7f..88709955b32 100644 --- a/intern/cycles/util/util_half.h +++ b/intern/cycles/util/util_half.h @@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN #ifdef __KERNEL_OPENCL__ -#define float4_store_half(h, f, scale) vstore_half4(*(f) * (scale), 0, h); +#define float4_store_half(h, f, scale) vstore_half4(f * (scale), 0, h); #else @@ -34,24 +34,24 @@ struct half4 { half x, y, z, w; }; #ifdef __KERNEL_CUDA__ -ccl_device_inline void float4_store_half(half *h, const float4 *f, float scale) +ccl_device_inline void float4_store_half(half *h, float4 f, float scale) { - h[0] = __float2half_rn(f->x * scale); - h[1] = __float2half_rn(f->y * scale); - h[2] = __float2half_rn(f->z * scale); - h[3] = __float2half_rn(f->w * scale); + h[0] = __float2half_rn(f.x * scale); + h[1] = __float2half_rn(f.y * scale); + h[2] = __float2half_rn(f.z * scale); + h[3] = __float2half_rn(f.w * scale); } #else -ccl_device_inline void float4_store_half(half *h, const float4 *f, float scale) +ccl_device_inline void float4_store_half(half *h, float4 f, float scale) { #ifndef __KERNEL_SSE2__ for(int i = 0; i < 4; i++) { /* optimized float to half for pixels: * assumes no negative, no nan, no inf, and sets denormal to 0 */ union { uint i; float f; } in; - float fscale = (*f)[i] * scale; + float fscale = f[i] * scale; in.f = (fscale > 0.0f)? ((fscale < 65500.0f)? fscale: 65500.0f): 0.0f; int x = in.i; @@ -70,7 +70,7 @@ ccl_device_inline void float4_store_half(half *h, const float4 *f, float scale) const __m128i mm_7FFFFFFF = _mm_set1_epi32(0x7FFFFFFF); const __m128i mm_C8000000 = _mm_set1_epi32(0xC8000000); - __m128 mm_fscale = _mm_mul_ps(*(__m128*)f, mm_scale); + __m128 mm_fscale = _mm_mul_ps(load_m128(f), mm_scale); __m128i x = _mm_castps_si128(_mm_min_ps(_mm_max_ps(mm_fscale, _mm_set_ps1(0.0f)), _mm_set_ps1(65500.0f))); __m128i absolute = _mm_and_si128(x, mm_7FFFFFFF); __m128i Z = _mm_add_epi32(absolute, mm_C8000000); |