diff options
-rw-r--r-- | source/blender/blenlib/intern/math_base_inline.c | 20 | ||||
-rw-r--r-- | source/blender/blenlib/tests/BLI_math_color_test.cc | 68 |
2 files changed, 85 insertions, 3 deletions
diff --git a/source/blender/blenlib/intern/math_base_inline.c b/source/blender/blenlib/intern/math_base_inline.c index a983821f15e..4a213f5fe74 100644 --- a/source/blender/blenlib/intern/math_base_inline.c +++ b/source/blender/blenlib/intern/math_base_inline.c @@ -767,6 +767,20 @@ MALWAYS_INLINE __m128 _bli_math_fastpow24(const __m128 arg) return _mm_mul_ps(x, _mm_mul_ps(x, x)); } +MALWAYS_INLINE __m128 _bli_math_rsqrt(__m128 in) +{ + __m128 r = _mm_rsqrt_ps(in); + /* Only do additional Newton-Raphson iterations when using actual SSE + * code path. When we are emulating SSE on NEON via sse2neon, the + * additional NR iterations are already done inside _mm_rsqrt_ps + * emulation. */ +# if defined(__SSE2__) + r = _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f), r), + _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(in, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r))); +# endif + return r; +} + /* Calculate powf(x, 1.0f / 2.4) */ MALWAYS_INLINE __m128 _bli_math_fastpow512(const __m128 arg) { @@ -776,14 +790,14 @@ MALWAYS_INLINE __m128 _bli_math_fastpow512(const __m128 arg) */ __m128 xf = _bli_math_fastpow(0x3f2aaaab, 0x5eb504f3, arg); __m128 xover = _mm_mul_ps(arg, xf); - __m128 xfm1 = _mm_rsqrt_ps(xf); + __m128 xfm1 = _bli_math_rsqrt(xf); __m128 x2 = _mm_mul_ps(arg, arg); __m128 xunder = _mm_mul_ps(x2, xfm1); /* sqrt2 * over + 2 * sqrt2 * under */ __m128 xavg = _mm_mul_ps(_mm_set1_ps(1.0f / (3.0f * 0.629960524947437f) * 0.999852f), _mm_add_ps(xover, xunder)); - xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg)); - xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg)); + xavg = _mm_mul_ps(xavg, _bli_math_rsqrt(xavg)); + xavg = _mm_mul_ps(xavg, _bli_math_rsqrt(xavg)); return xavg; } diff --git a/source/blender/blenlib/tests/BLI_math_color_test.cc b/source/blender/blenlib/tests/BLI_math_color_test.cc index 7f2c0a3f1ca..4d928477870 100644 --- a/source/blender/blenlib/tests/BLI_math_color_test.cc +++ b/source/blender/blenlib/tests/BLI_math_color_test.cc @@ -74,3 +74,71 @@ TEST(math_color, LinearRGBTosRGBRoundtrip) EXPECT_NEAR(orig_linear_color, linear_color, 1e-5); } } + +TEST(math_color, linearrgb_to_srgb_v3_v3) +{ + float srgb_color[3]; + { + const float kTolerance = 1.0e-8f; + const float linear_color[3] = {0.0023f, 0.0024f, 0.0025f}; + linearrgb_to_srgb_v3_v3(srgb_color, linear_color); + EXPECT_NEAR(0.029716f, srgb_color[0], kTolerance); + EXPECT_NEAR(0.031008f, srgb_color[1], kTolerance); + EXPECT_NEAR(0.032300f, srgb_color[2], kTolerance); + } + + { + /* SIMD implementation of linear->srgb for larger inputs + * is less accurate; use larger tolerance. */ + const float kTolerance = 3.6e-5f; + const float linear_color[3] = {0.71f, 0.75f, 0.78f}; + linearrgb_to_srgb_v3_v3(srgb_color, linear_color); + EXPECT_NEAR(0.859696f, srgb_color[0], kTolerance); + EXPECT_NEAR(0.880825f, srgb_color[1], kTolerance); + EXPECT_NEAR(0.896244f, srgb_color[2], kTolerance); + } + + { + /* Not a common, but possible case: values beyond 1.0 range. */ + const float kTolerance = 2.3e-4f; + const float linear_color[3] = {1.5f, 2.8f, 5.6f}; + linearrgb_to_srgb_v3_v3(srgb_color, linear_color); + EXPECT_NEAR(1.19418f, srgb_color[0], kTolerance); + EXPECT_NEAR(1.56520f, srgb_color[1], kTolerance); + EXPECT_NEAR(2.10771f, srgb_color[2], kTolerance); + } +} + +TEST(math_color, srgb_to_linearrgb_v3_v3) +{ + float linear_color[3]; + { + const float kTolerance = 1.0e-8f; + const float srgb_color[3] = {0.0023f, 0.0024f, 0.0025f}; + srgb_to_linearrgb_v3_v3(linear_color, srgb_color); + EXPECT_NEAR(0.000178019f, linear_color[0], kTolerance); + EXPECT_NEAR(0.000185759f, linear_color[1], kTolerance); + EXPECT_NEAR(0.000193498f, linear_color[2], kTolerance); + } + + { + /* SIMD implementation of linear->srgb for larger inputs + * is less accurate; use larger tolerance. */ + const float kTolerance = 1.5e-7f; + const float srgb_color[3] = {0.71f, 0.72f, 0.73f}; + srgb_to_linearrgb_v3_v3(linear_color, srgb_color); + EXPECT_NEAR(0.4623615f, linear_color[0], kTolerance); + EXPECT_NEAR(0.4770000f, linear_color[1], kTolerance); + EXPECT_NEAR(0.4919052f, linear_color[2], kTolerance); + } + + { + /* Not a common, but possible case: values beyond 1.0 range. */ + const float kTolerance = 7.7e-6f; + const float srgb_color[3] = {1.1f, 2.5f, 5.6f}; + srgb_to_linearrgb_v3_v3(linear_color, srgb_color); + EXPECT_NEAR(1.24277f, linear_color[0], kTolerance); + EXPECT_NEAR(8.35473f, linear_color[1], kTolerance); + EXPECT_NEAR(56.23833f, linear_color[2], kTolerance); + } +} |