Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--source/blender/blenlib/intern/math_base_inline.c20
-rw-r--r--source/blender/blenlib/tests/BLI_math_color_test.cc68
2 files changed, 85 insertions, 3 deletions
diff --git a/source/blender/blenlib/intern/math_base_inline.c b/source/blender/blenlib/intern/math_base_inline.c
index a983821f15e..4a213f5fe74 100644
--- a/source/blender/blenlib/intern/math_base_inline.c
+++ b/source/blender/blenlib/intern/math_base_inline.c
@@ -767,6 +767,20 @@ MALWAYS_INLINE __m128 _bli_math_fastpow24(const __m128 arg)
return _mm_mul_ps(x, _mm_mul_ps(x, x));
}
+MALWAYS_INLINE __m128 _bli_math_rsqrt(__m128 in)
+{
+ __m128 r = _mm_rsqrt_ps(in);
+ /* Only do additional Newton-Raphson iterations when using actual SSE
+ * code path. When we are emulating SSE on NEON via sse2neon, the
+ * additional NR iterations are already done inside _mm_rsqrt_ps
+ * emulation. */
+# if defined(__SSE2__)
+ r = _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f), r),
+ _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(in, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
+# endif
+ return r;
+}
+
/* Calculate powf(x, 1.0f / 2.4) */
MALWAYS_INLINE __m128 _bli_math_fastpow512(const __m128 arg)
{
@@ -776,14 +790,14 @@ MALWAYS_INLINE __m128 _bli_math_fastpow512(const __m128 arg)
*/
__m128 xf = _bli_math_fastpow(0x3f2aaaab, 0x5eb504f3, arg);
__m128 xover = _mm_mul_ps(arg, xf);
- __m128 xfm1 = _mm_rsqrt_ps(xf);
+ __m128 xfm1 = _bli_math_rsqrt(xf);
__m128 x2 = _mm_mul_ps(arg, arg);
__m128 xunder = _mm_mul_ps(x2, xfm1);
/* sqrt2 * over + 2 * sqrt2 * under */
__m128 xavg = _mm_mul_ps(_mm_set1_ps(1.0f / (3.0f * 0.629960524947437f) * 0.999852f),
_mm_add_ps(xover, xunder));
- xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
- xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
+ xavg = _mm_mul_ps(xavg, _bli_math_rsqrt(xavg));
+ xavg = _mm_mul_ps(xavg, _bli_math_rsqrt(xavg));
return xavg;
}
diff --git a/source/blender/blenlib/tests/BLI_math_color_test.cc b/source/blender/blenlib/tests/BLI_math_color_test.cc
index 7f2c0a3f1ca..4d928477870 100644
--- a/source/blender/blenlib/tests/BLI_math_color_test.cc
+++ b/source/blender/blenlib/tests/BLI_math_color_test.cc
@@ -74,3 +74,71 @@ TEST(math_color, LinearRGBTosRGBRoundtrip)
EXPECT_NEAR(orig_linear_color, linear_color, 1e-5);
}
}
+
+TEST(math_color, linearrgb_to_srgb_v3_v3)
+{
+ float srgb_color[3];
+ {
+ const float kTolerance = 1.0e-8f;
+ const float linear_color[3] = {0.0023f, 0.0024f, 0.0025f};
+ linearrgb_to_srgb_v3_v3(srgb_color, linear_color);
+ EXPECT_NEAR(0.029716f, srgb_color[0], kTolerance);
+ EXPECT_NEAR(0.031008f, srgb_color[1], kTolerance);
+ EXPECT_NEAR(0.032300f, srgb_color[2], kTolerance);
+ }
+
+ {
+ /* SIMD implementation of linear->srgb for larger inputs
+ * is less accurate; use larger tolerance. */
+ const float kTolerance = 3.6e-5f;
+ const float linear_color[3] = {0.71f, 0.75f, 0.78f};
+ linearrgb_to_srgb_v3_v3(srgb_color, linear_color);
+ EXPECT_NEAR(0.859696f, srgb_color[0], kTolerance);
+ EXPECT_NEAR(0.880825f, srgb_color[1], kTolerance);
+ EXPECT_NEAR(0.896244f, srgb_color[2], kTolerance);
+ }
+
+ {
+ /* Not a common, but possible case: values beyond 1.0 range. */
+ const float kTolerance = 2.3e-4f;
+ const float linear_color[3] = {1.5f, 2.8f, 5.6f};
+ linearrgb_to_srgb_v3_v3(srgb_color, linear_color);
+ EXPECT_NEAR(1.19418f, srgb_color[0], kTolerance);
+ EXPECT_NEAR(1.56520f, srgb_color[1], kTolerance);
+ EXPECT_NEAR(2.10771f, srgb_color[2], kTolerance);
+ }
+}
+
+TEST(math_color, srgb_to_linearrgb_v3_v3)
+{
+ float linear_color[3];
+ {
+ const float kTolerance = 1.0e-8f;
+ const float srgb_color[3] = {0.0023f, 0.0024f, 0.0025f};
+ srgb_to_linearrgb_v3_v3(linear_color, srgb_color);
+ EXPECT_NEAR(0.000178019f, linear_color[0], kTolerance);
+ EXPECT_NEAR(0.000185759f, linear_color[1], kTolerance);
+ EXPECT_NEAR(0.000193498f, linear_color[2], kTolerance);
+ }
+
+ {
+ /* SIMD implementation of linear->srgb for larger inputs
+ * is less accurate; use larger tolerance. */
+ const float kTolerance = 1.5e-7f;
+ const float srgb_color[3] = {0.71f, 0.72f, 0.73f};
+ srgb_to_linearrgb_v3_v3(linear_color, srgb_color);
+ EXPECT_NEAR(0.4623615f, linear_color[0], kTolerance);
+ EXPECT_NEAR(0.4770000f, linear_color[1], kTolerance);
+ EXPECT_NEAR(0.4919052f, linear_color[2], kTolerance);
+ }
+
+ {
+ /* Not a common, but possible case: values beyond 1.0 range. */
+ const float kTolerance = 7.7e-6f;
+ const float srgb_color[3] = {1.1f, 2.5f, 5.6f};
+ srgb_to_linearrgb_v3_v3(linear_color, srgb_color);
+ EXPECT_NEAR(1.24277f, linear_color[0], kTolerance);
+ EXPECT_NEAR(8.35473f, linear_color[1], kTolerance);
+ EXPECT_NEAR(56.23833f, linear_color[2], kTolerance);
+ }
+}