Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2016-05-05 20:25:08 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2016-05-05 20:46:06 +0300
commit88b72925d0ef59dc0d67df05f4ad74742175653c (patch)
tree20e13cd7be9c3c56c242da71b6e61e32e331619b /source/blender/blenlib/intern/math_color_inline.c
parentbb6fbc64ae2bf4d1cba4a4b6f1831e6df58e2e6b (diff)
Optimize linear<->sRGB conversion for SSE2 processors
Using SSE2 intrinsics when available for this kind of conversions. It's not totally accurate, but accurate enough for the purposes where we're using direct colorspace conversion by-passing OCIO. Partially based on code from Cycles, partially based on other online articles: https://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent Makes projection painting on hi-res float textures smoother. This commit also enables global SSE2 in Blender. It shouldn't bring any regressions in supported hardware (we require SSE2 since 2.64 now), but should keep an eye on because compilers might have some bugs with that (unlikely, but possible).
Diffstat (limited to 'source/blender/blenlib/intern/math_color_inline.c')
-rw-r--r--source/blender/blenlib/intern/math_color_inline.c68
1 files changed, 62 insertions, 6 deletions
diff --git a/source/blender/blenlib/intern/math_color_inline.c b/source/blender/blenlib/intern/math_color_inline.c
index 45466226e72..c268481a5d9 100644
--- a/source/blender/blenlib/intern/math_color_inline.c
+++ b/source/blender/blenlib/intern/math_color_inline.c
@@ -28,6 +28,7 @@
*/
+#include "BLI_math_base.h"
#include "BLI_math_color.h"
#include "BLI_utildefines.h"
@@ -38,6 +39,52 @@
/******************************** Color Space ********************************/
+#ifdef __SSE2__
+
+MALWAYS_INLINE __m128 srgb_to_linearrgb_v4_simd(const __m128 c)
+{
+ __m128 cmp = _mm_cmplt_ps(c, _mm_set1_ps(0.04045f));
+ __m128 lt = _mm_max_ps(_mm_mul_ps(c, _mm_set1_ps(1.0f/12.92f)),
+ _mm_set1_ps(0.0f));
+ __m128 gtebase = _mm_mul_ps(_mm_add_ps(c, _mm_set1_ps(0.055f)),
+ _mm_set1_ps(1.0f/1.055f)); /* fma */
+ __m128 gte = _bli_math_fastpow24(gtebase);
+ return _bli_math_blend_sse(cmp, lt, gte);
+}
+
+MALWAYS_INLINE __m128 linearrgb_to_srgb_v4_simd(const __m128 c)
+{
+ __m128 cmp = _mm_cmplt_ps(c, _mm_set1_ps(0.0031308f));
+ __m128 lt = _mm_max_ps(_mm_mul_ps(c, _mm_set1_ps(12.92f)),
+ _mm_set1_ps(0.0f));
+ __m128 gte = _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.055f),
+ _bli_math_fastpow512(c)),
+ _mm_set1_ps(-0.055f));
+ return _bli_math_blend_sse(cmp, lt, gte);
+}
+
+MINLINE void srgb_to_linearrgb_v3_v3(float linear[3], const float srgb[3])
+{
+ float r[4] = {srgb[0], srgb[1], srgb[2], 0.0f};
+ __m128 *rv = (__m128 *)&r;
+ *rv = srgb_to_linearrgb_v4_simd(*rv);
+ linear[0] = r[0];
+ linear[1] = r[1];
+ linear[2] = r[2];
+}
+
+MINLINE void linearrgb_to_srgb_v3_v3(float srgb[3], const float linear[3])
+{
+ float r[4] = {linear[0], linear[1], linear[2], 0.0f};
+ __m128 *rv = (__m128 *)&r;
+ *rv = linearrgb_to_srgb_v4_simd(*rv);
+ srgb[0] = r[0];
+ srgb[1] = r[1];
+ srgb[2] = r[2];
+}
+
+#else /* __SSE2__ */
+
MINLINE void srgb_to_linearrgb_v3_v3(float linear[3], const float srgb[3])
{
linear[0] = srgb_to_linearrgb(srgb[0]);
@@ -51,6 +98,7 @@ MINLINE void linearrgb_to_srgb_v3_v3(float srgb[3], const float linear[3])
srgb[1] = linearrgb_to_srgb(linear[1]);
srgb[2] = linearrgb_to_srgb(linear[2]);
}
+#endif /* __SSE2__ */
MINLINE void srgb_to_linearrgb_v4(float linear[4], const float srgb[4])
{
@@ -98,10 +146,14 @@ MINLINE void srgb_to_linearrgb_predivide_v4(float linear[4], const float srgb[4]
inv_alpha = 1.0f / alpha;
}
- linear[0] = srgb_to_linearrgb(srgb[0] * inv_alpha) * alpha;
- linear[1] = srgb_to_linearrgb(srgb[1] * inv_alpha) * alpha;
- linear[2] = srgb_to_linearrgb(srgb[2] * inv_alpha) * alpha;
+ linear[0] = srgb[0] * inv_alpha;
+ linear[1] = srgb[1] * inv_alpha;
+ linear[2] = srgb[2] * inv_alpha;
linear[3] = srgb[3];
+ srgb_to_linearrgb_v3_v3(linear, linear);
+ linear[0] *= alpha;
+ linear[1] *= alpha;
+ linear[2] *= alpha;
}
MINLINE void linearrgb_to_srgb_predivide_v4(float srgb[4], const float linear[4])
@@ -117,10 +169,14 @@ MINLINE void linearrgb_to_srgb_predivide_v4(float srgb[4], const float linear[4]
inv_alpha = 1.0f / alpha;
}
- srgb[0] = linearrgb_to_srgb(linear[0] * inv_alpha) * alpha;
- srgb[1] = linearrgb_to_srgb(linear[1] * inv_alpha) * alpha;
- srgb[2] = linearrgb_to_srgb(linear[2] * inv_alpha) * alpha;
+ srgb[0] = linear[0] * inv_alpha;
+ srgb[1] = linear[1] * inv_alpha;
+ srgb[2] = linear[2] * inv_alpha;
srgb[3] = linear[3];
+ linearrgb_to_srgb_v3_v3(srgb, srgb);
+ srgb[0] *= alpha;
+ srgb[1] *= alpha;
+ srgb[2] *= alpha;
}
/* LUT accelerated conversions */