diff options
author | Brecht Van Lommel <brecht@blender.org> | 2022-07-21 16:49:00 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2022-07-25 14:27:40 +0300 |
commit | 023eb2ea7c16a00272f83d564145e28aeb9ed2b7 (patch) | |
tree | 424c8baf1d563e36e56b49141887b75149f36a6e /intern/cycles/util/math_float3.h | |
parent | d567785658349504dc98c693c8c46c30e9a60c44 (diff) |
Cycles: more closely match some math and intersection operations in Embree
This helps with debugging, and gives a slightly closer match between CPU
and CUDA/HIP/Metal renders when it comes to ray tracing precision.
Diffstat (limited to 'intern/cycles/util/math_float3.h')
-rw-r--r-- | intern/cycles/util/math_float3.h | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/intern/cycles/util/math_float3.h b/intern/cycles/util/math_float3.h index c02b4cdbf0d..c408eadf195 100644 --- a/intern/cycles/util/math_float3.h +++ b/intern/cycles/util/math_float3.h @@ -147,8 +147,11 @@ ccl_device_inline float3 operator/(const float f, const float3 &a) ccl_device_inline float3 operator/(const float3 &a, const float f) { - float invf = 1.0f / f; - return a * invf; +# if defined(__KERNEL_SSE__) + return float3(_mm_div_ps(a.m128, _mm_set1_ps(f))); +# else + return make_float3(a.x / f, a.y / f, a.z / f); +# endif } ccl_device_inline float3 operator/(const float3 &a, const float3 &b) @@ -284,8 +287,12 @@ ccl_device_inline float dot_xy(const float3 &a, const float3 &b) ccl_device_inline float3 cross(const float3 &a, const float3 &b) { - float3 r = make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); - return r; +# ifdef __KERNEL_SSE__ + return float3(shuffle<1, 2, 0, 3>( + msub(ssef(a), shuffle<1, 2, 0, 3>(ssef(b)), shuffle<1, 2, 0, 3>(ssef(a)) * ssef(b)))); +# else + return make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); +# endif } ccl_device_inline float3 normalize(const float3 &a) |