From dc4d850d109f64bc0c56b53b9a50a8e9c70951fa Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Mon, 7 Aug 2017 15:30:01 +0200 Subject: Fix Windows build errors with recent Cycles SIMD refactoring. --- intern/cycles/util/util_math_float4.h | 5 ++-- intern/cycles/util/util_simd.h | 44 +++++++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 14 deletions(-) (limited to 'intern/cycles/util') diff --git a/intern/cycles/util/util_math_float4.h b/intern/cycles/util/util_math_float4.h index adb9a76a434..57befea66c4 100644 --- a/intern/cycles/util/util_math_float4.h +++ b/intern/cycles/util/util_math_float4.h @@ -286,13 +286,14 @@ ccl_device_inline float4 reduce_add(const float4& a) return shuffle<2,3,0,1>(h) + h; # endif #else - return make_float4(((a.x + a.y) + (a.z + a.w))); + float sum = (a.x + a.y) + (a.z + a.w); + return make_float4(sum, sum, sum, sum); #endif } ccl_device_inline float average(const float4& a) { - return reduce_add(a)[0] * 0.25f; + return reduce_add(a).x * 0.25f; } ccl_device_inline float len(const float4& a) diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h index a2b3247b207..1a26ca697dd 100644 --- a/intern/cycles/util/util_simd.h +++ b/intern/cycles/util/util_simd.h @@ -358,33 +358,45 @@ __forceinline size_t __bscf(size_t& v) #define _MM_FROUND_CUR_DIRECTION 0x04 #undef _mm_blendv_ps -__forceinline __m128 _mm_blendv_ps( __m128 value, __m128 input, __m128 mask ) { +#define _mm_blendv_ps _mm_blendv_ps_emu +__forceinline __m128 _mm_blendv_ps_emu( __m128 value, __m128 input, __m128 mask) +{ return _mm_or_ps(_mm_and_ps(mask, input), _mm_andnot_ps(mask, value)); } #undef _mm_blend_ps -__forceinline __m128 _mm_blend_ps( __m128 value, __m128 input, const int mask ) { +#define _mm_blend_ps _mm_blend_ps_emu +__forceinline __m128 _mm_blend_ps_emu( __m128 value, __m128 input, const int mask) +{ assert(mask < 0x10); return _mm_blendv_ps(value, input, _mm_lookupmask_ps[mask]); } #undef _mm_blendv_epi8 -__forceinline __m128i _mm_blendv_epi8( __m128i value, __m128i input, __m128i mask ) { +#define _mm_blendv_epi8 _mm_blendv_epi8_emu +__forceinline __m128i _mm_blendv_epi8_emu( __m128i value, __m128i input, __m128i mask) +{ return _mm_or_si128(_mm_and_si128(mask, input), _mm_andnot_si128(mask, value)); } #undef _mm_min_epi32 -__forceinline __m128i _mm_min_epi32( __m128i value, __m128i input ) { +#define _mm_min_epi32 _mm_min_epi32_emu +__forceinline __m128i _mm_min_epi32_emu( __m128i value, __m128i input) +{ return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input)); } #undef _mm_max_epi32 -__forceinline __m128i _mm_max_epi32( __m128i value, __m128i input ) { +#define _mm_max_epi32 _mm_max_epi32_emu +__forceinline __m128i _mm_max_epi32_emu( __m128i value, __m128i input) +{ return _mm_blendv_epi8(value, input, _mm_cmplt_epi32(value, input)); } #undef _mm_extract_epi32 -__forceinline int _mm_extract_epi32( __m128i input, const int index ) { - switch ( index ) { +#define _mm_extract_epi32 _mm_extract_epi32_emu +__forceinline int _mm_extract_epi32_emu( __m128i input, const int index) +{ + switch(index) { case 0: return _mm_cvtsi128_si32(input); case 1: return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(1, 1, 1, 1))); case 2: return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(2, 2, 2, 2))); @@ -394,18 +406,26 @@ __forceinline int _mm_extract_epi32( __m128i input, const int index ) { } #undef _mm_insert_epi32 -__forceinline __m128i _mm_insert_epi32( __m128i value, int input, const int index ) { +#define _mm_insert_epi32 _mm_insert_epi32_emu +__forceinline __m128i _mm_insert_epi32_emu( __m128i value, int input, const int index) +{ assert(index >= 0 && index < 4); ((int*)&value)[index] = input; return value; } #undef _mm_insert_ps -__forceinline __m128 _mm_insert_ps( __m128 value, __m128 input, const int index ) -{ assert(index < 0x100); ((float*)&value)[(index >> 4)&0x3] = ((float*)&input)[index >> 6]; return _mm_andnot_ps(_mm_lookupmask_ps[index&0xf], value); } +#define _mm_insert_ps _mm_insert_ps_emu +__forceinline __m128 _mm_insert_ps_emu( __m128 value, __m128 input, const int index) +{ + assert(index < 0x100); + ((float*)&value)[(index >> 4)&0x3] = ((float*)&input)[index >> 6]; + return _mm_andnot_ps(_mm_lookupmask_ps[index&0xf], value); +} #undef _mm_round_ps -__forceinline __m128 _mm_round_ps( __m128 value, const int flags ) +#define _mm_round_ps _mm_round_ps_emu +__forceinline __m128 _mm_round_ps_emu( __m128 value, const int flags) { - switch ( flags ) + switch(flags) { case _MM_FROUND_TO_NEAREST_INT: return _mm_cvtepi32_ps(_mm_cvtps_epi32(value)); case _MM_FROUND_TO_NEG_INF : return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_add_ps(value, _mm_set1_ps(-0.5f)))); -- cgit v1.2.3