diff options
author | Campbell Barton <ideasman42@gmail.com> | 2012-06-21 13:47:30 +0400 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2012-06-21 13:47:30 +0400 |
commit | c250ab893c0b0086011d44848b66e53adcff8cad (patch) | |
tree | 0131eb22eb0726c995a4ecf6c6fa2022f7846cd9 /extern/libopenjpeg/mct.c | |
parent | 11cb213d4509b6a2733210fce934395a86543bae (diff) |
update libopenjpeg from 1.3 to 1.5, since 1.3 has a bug saving alpha channels.
the patches/ dir is NOT applied but perhaps we can get OSX and BSD working using the opj_config.h
Diffstat (limited to 'extern/libopenjpeg/mct.c')
-rw-r--r-- | extern/libopenjpeg/mct.c | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/extern/libopenjpeg/mct.c b/extern/libopenjpeg/mct.c index ca21744f3e4..870993b06d2 100644 --- a/extern/libopenjpeg/mct.c +++ b/extern/libopenjpeg/mct.c @@ -29,6 +29,10 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#ifdef __SSE__ +#include <xmmintrin.h> +#endif + #include "opj_includes.h" /* <summary> */ @@ -127,6 +131,44 @@ void mct_decode_real( int n) { int i; +#ifdef __SSE__ + __m128 vrv, vgu, vgv, vbu; + vrv = _mm_set1_ps(1.402f); + vgu = _mm_set1_ps(0.34413f); + vgv = _mm_set1_ps(0.71414f); + vbu = _mm_set1_ps(1.772f); + for (i = 0; i < (n >> 3); ++i) { + __m128 vy, vu, vv; + __m128 vr, vg, vb; + + vy = _mm_load_ps(c0); + vu = _mm_load_ps(c1); + vv = _mm_load_ps(c2); + vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv)); + vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv)); + vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu)); + _mm_store_ps(c0, vr); + _mm_store_ps(c1, vg); + _mm_store_ps(c2, vb); + c0 += 4; + c1 += 4; + c2 += 4; + + vy = _mm_load_ps(c0); + vu = _mm_load_ps(c1); + vv = _mm_load_ps(c2); + vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv)); + vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv)); + vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu)); + _mm_store_ps(c0, vr); + _mm_store_ps(c1, vg); + _mm_store_ps(c2, vb); + c0 += 4; + c1 += 4; + c2 += 4; + } + n &= 7; +#endif for(i = 0; i < n; ++i) { float y = c0[i]; float u = c1[i]; |