Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/LAVFilters.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHendrik Leppkes <h.leppkes@gmail.com>2012-07-11 00:03:28 +0400
committerHendrik Leppkes <h.leppkes@gmail.com>2012-07-11 00:03:28 +0400
commitfc7ae01c070fd499ce44c3f35e33fdb57cb5074f (patch)
tree53957c673195fc1eeb44a4843454f14f897b4a25 /decoder/LAVVideo/pixconv
parente88cb68608e5c97d58d0598abf3397215639db44 (diff)
pixconv: don't use SSE4 instructions for YCgCo conversion
Diffstat (limited to 'decoder/LAVVideo/pixconv')
-rw-r--r--decoder/LAVVideo/pixconv/yuv2rgb.cpp18
1 files changed, 11 insertions, 7 deletions
diff --git a/decoder/LAVVideo/pixconv/yuv2rgb.cpp b/decoder/LAVVideo/pixconv/yuv2rgb.cpp
index 62c1468e..4c62bd84 100644
--- a/decoder/LAVVideo/pixconv/yuv2rgb.cpp
+++ b/decoder/LAVVideo/pixconv/yuv2rgb.cpp
@@ -227,10 +227,6 @@ static int yuv2rgb_convert_pixels(const uint8_t* &srcY, const uint8_t* &srcU, co
xmm0 = _mm_unpacklo_epi64(xmm0, xmm5); /* YYYYYYYY */
- xmm2 = coeffs->CbCr_center; /* move CbCr/CgCo to proper range */
- xmm1 = _mm_subs_epi16(xmm1, xmm2);
- xmm3 = _mm_subs_epi16(xmm3, xmm2);
-
// After this step, xmm1 & xmm3 contain 4 UV pairs, each in a 16-bit value, filling 12-bit.
if (!ycgco) {
// YCbCr conversion
@@ -244,6 +240,10 @@ static int yuv2rgb_convert_pixels(const uint8_t* &srcY, const uint8_t* &srcU, co
xmm0 = _mm_mulhi_epi16(xmm0, coeffs->cy); /* Y*cy (result is 28 bits, with 12 high-bits packed into the result) */
xmm0 = _mm_add_epi16(xmm0, coeffs->rgb_add); /* Y*cy + 16 (in case of range compression) */
+ xmm2 = coeffs->CbCr_center; /* move CbCr to proper range */
+ xmm1 = _mm_subs_epi16(xmm1, xmm2);
+ xmm3 = _mm_subs_epi16(xmm3, xmm2);
+
xmm6 = xmm1;
xmm4 = xmm3;
xmm6 = _mm_madd_epi16(xmm6, coeffs->cR_Cr); /* Result is 25 bits (12 from chroma, 13 from coeff) */
@@ -290,11 +290,15 @@ static int yuv2rgb_convert_pixels(const uint8_t* &srcY, const uint8_t* &srcU, co
xmm1 = _mm_and_si128(xmm1, xmm7); /* null out the high-order bytes to get the Cg values */
xmm2 = _mm_and_si128(xmm2, xmm7);
- xmm3 = _mm_srli_epi32(xmm3, 16); /* right shift the V values */
+ xmm3 = _mm_srli_epi32(xmm3, 16); /* right shift the Co values */
xmm4 = _mm_srli_epi32(xmm4, 16);
- xmm1 = _mm_packus_epi32(xmm1, xmm2); /* Pack Cg into xmm1 */
- xmm3 = _mm_packus_epi32(xmm3, xmm4); /* Pack Co into xmm3 */
+ xmm1 = _mm_packs_epi32(xmm1, xmm2); /* Pack Cg into xmm1 */
+ xmm3 = _mm_packs_epi32(xmm3, xmm4); /* Pack Co into xmm3 */
+
+ xmm2 = coeffs->CbCr_center; /* move CgCo to proper range */
+ xmm1 = _mm_subs_epi16(xmm1, xmm2);
+ xmm3 = _mm_subs_epi16(xmm3, xmm2);
xmm2 = xmm0;
xmm2 = _mm_subs_epi16(xmm2, xmm1); /* tmp = Y - Cg */