Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Kim <pk15950@gmail.com>2022-06-17 11:27:30 +0300
committerPeter Kim <pk15950@gmail.com>2022-06-17 11:27:30 +0300
commita39532670f6b668da7be5810fb1f844b82feeba3 (patch)
tree4d6a40f6c362ca1d9b7c7031527e6e54a62589e6 /source/blender/blenlib/intern/math_base_inline.c
parent7948150ca3683dc326c37609ded322d54b832d0d (diff)
parent10981bc8c092dda48ed5228cc19108513035abf0 (diff)
Merge branch 'master' into xr-dev
Diffstat (limited to 'source/blender/blenlib/intern/math_base_inline.c')
-rw-r--r--source/blender/blenlib/intern/math_base_inline.c20
1 files changed, 17 insertions, 3 deletions
diff --git a/source/blender/blenlib/intern/math_base_inline.c b/source/blender/blenlib/intern/math_base_inline.c
index a983821f15e..4a213f5fe74 100644
--- a/source/blender/blenlib/intern/math_base_inline.c
+++ b/source/blender/blenlib/intern/math_base_inline.c
@@ -767,6 +767,20 @@ MALWAYS_INLINE __m128 _bli_math_fastpow24(const __m128 arg)
return _mm_mul_ps(x, _mm_mul_ps(x, x));
}
+MALWAYS_INLINE __m128 _bli_math_rsqrt(__m128 in)
+{
+ __m128 r = _mm_rsqrt_ps(in);
+ /* Only do additional Newton-Raphson iterations when using actual SSE
+ * code path. When we are emulating SSE on NEON via sse2neon, the
+ * additional NR iterations are already done inside _mm_rsqrt_ps
+ * emulation. */
+# if defined(__SSE2__)
+ r = _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f), r),
+ _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(in, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
+# endif
+ return r;
+}
+
/* Calculate powf(x, 1.0f / 2.4) */
MALWAYS_INLINE __m128 _bli_math_fastpow512(const __m128 arg)
{
@@ -776,14 +790,14 @@ MALWAYS_INLINE __m128 _bli_math_fastpow512(const __m128 arg)
*/
__m128 xf = _bli_math_fastpow(0x3f2aaaab, 0x5eb504f3, arg);
__m128 xover = _mm_mul_ps(arg, xf);
- __m128 xfm1 = _mm_rsqrt_ps(xf);
+ __m128 xfm1 = _bli_math_rsqrt(xf);
__m128 x2 = _mm_mul_ps(arg, arg);
__m128 xunder = _mm_mul_ps(x2, xfm1);
/* sqrt2 * over + 2 * sqrt2 * under */
__m128 xavg = _mm_mul_ps(_mm_set1_ps(1.0f / (3.0f * 0.629960524947437f) * 0.999852f),
_mm_add_ps(xover, xunder));
- xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
- xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
+ xavg = _mm_mul_ps(xavg, _bli_math_rsqrt(xavg));
+ xavg = _mm_mul_ps(xavg, _bli_math_rsqrt(xavg));
return xavg;
}