Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSv. Lockal <lockalsash@gmail.com>2014-02-27 14:49:21 +0400
committerSv. Lockal <lockalsash@gmail.com>2014-02-27 15:01:20 +0400
commit7808360c5f35c5a6371c3627758a70db431955a2 (patch)
tree0a3cb25e749917dd0021f308a5627442888b1ecb /intern/cycles/util/util_half.h
parent8badec14f97df80b328a6d8106c6cdbcafb171c0 (diff)
Cycles: fix crash in SSE hair and half-floats on x86+vc2008
MSVC 2008 ignores alignement attribute when assigning from unaligned float4 vector, returned from other function. Now Cycles uses unaligned loads instead of casts for win32 in x86 mode.
Diffstat (limited to 'intern/cycles/util/util_half.h')
-rw-r--r--intern/cycles/util/util_half.h18
1 files changed, 9 insertions, 9 deletions
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index 21192024f7f..88709955b32 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_OPENCL__
-#define float4_store_half(h, f, scale) vstore_half4(*(f) * (scale), 0, h);
+#define float4_store_half(h, f, scale) vstore_half4(f * (scale), 0, h);
#else
@@ -34,24 +34,24 @@ struct half4 { half x, y, z, w; };
#ifdef __KERNEL_CUDA__
-ccl_device_inline void float4_store_half(half *h, const float4 *f, float scale)
+ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
{
- h[0] = __float2half_rn(f->x * scale);
- h[1] = __float2half_rn(f->y * scale);
- h[2] = __float2half_rn(f->z * scale);
- h[3] = __float2half_rn(f->w * scale);
+ h[0] = __float2half_rn(f.x * scale);
+ h[1] = __float2half_rn(f.y * scale);
+ h[2] = __float2half_rn(f.z * scale);
+ h[3] = __float2half_rn(f.w * scale);
}
#else
-ccl_device_inline void float4_store_half(half *h, const float4 *f, float scale)
+ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
{
#ifndef __KERNEL_SSE2__
for(int i = 0; i < 4; i++) {
/* optimized float to half for pixels:
* assumes no negative, no nan, no inf, and sets denormal to 0 */
union { uint i; float f; } in;
- float fscale = (*f)[i] * scale;
+ float fscale = f[i] * scale;
in.f = (fscale > 0.0f)? ((fscale < 65500.0f)? fscale: 65500.0f): 0.0f;
int x = in.i;
@@ -70,7 +70,7 @@ ccl_device_inline void float4_store_half(half *h, const float4 *f, float scale)
const __m128i mm_7FFFFFFF = _mm_set1_epi32(0x7FFFFFFF);
const __m128i mm_C8000000 = _mm_set1_epi32(0xC8000000);
- __m128 mm_fscale = _mm_mul_ps(*(__m128*)f, mm_scale);
+ __m128 mm_fscale = _mm_mul_ps(load_m128(f), mm_scale);
__m128i x = _mm_castps_si128(_mm_min_ps(_mm_max_ps(mm_fscale, _mm_set_ps1(0.0f)), _mm_set_ps1(65500.0f)));
__m128i absolute = _mm_and_si128(x, mm_7FFFFFFF);
__m128i Z = _mm_add_epi32(absolute, mm_C8000000);