From bda9b20fa49975c6b9c39308818e7773eb78b411 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 5 Jun 2011 21:15:44 -0400
Subject: swscale: un-special-case yuv2yuvX16_c().

Make yuv2yuvX16_c a function pointer for yuv2yuvX(), so that the
function pointer becomes bitdepth-independent.
---
 libswscale/ppc/swscale_altivec.c  |  6 +++-
 libswscale/swscale.c              | 63 ++++++++++++++++++++++-----------------
 libswscale/x86/swscale_template.c |  5 +++-
 3 files changed, 44 insertions(+), 30 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index 7f4dfcd6f1..423297a8f0 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -403,11 +403,15 @@ static void hScale_altivec_real(int16_t *dst, int dstW,
 
 void ff_sws_init_swScale_altivec(SwsContext *c)
 {
+    enum PixelFormat dstFormat = c->dstFormat;
+
     if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
         return;
 
     c->hScale       = hScale_altivec_real;
-    c->yuv2yuvX     = yuv2yuvX_altivec_real;
+    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat)) {
+        c->yuv2yuvX     = yuv2yuvX_altivec_real;
+    }
 
     /* The following list of supported dstFormat values should
      * match what's found in the body of ff_yuv2packedX_altivec() */
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index fd64b81019..35b0233f15 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -258,17 +258,20 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
             output_pixel(&aDest[i], val);
         }
     }
+#undef output_pixel
 }
 
 #define yuv2NBPS(bits, BE_LE, is_be) \
-static void yuv2yuvX ## bits ## BE_LE ## _c(const int16_t *lumFilter, \
+static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
                               const int16_t **lumSrc, int lumFilterSize, \
                               const int16_t *chrFilter, const int16_t **chrUSrc, \
                               const int16_t **chrVSrc, \
                               int chrFilterSize, const int16_t **alpSrc, \
-                              uint16_t *dest, uint16_t *uDest, uint16_t *vDest, \
-                              uint16_t *aDest, int dstW, int chrDstW) \
+                              uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
+                              uint8_t *_aDest, int dstW, int chrDstW) \
 { \
+    uint16_t *dest  = (uint16_t *) _dest,  *uDest = (uint16_t *) _uDest, \
+             *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
     yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
                           alpSrc, \
@@ -282,24 +285,24 @@ yuv2NBPS(10, LE, 0);
 yuv2NBPS(16, BE, 1);
 yuv2NBPS(16, LE, 0);
 
-static inline void yuv2yuvX16_c(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+static inline void yuv2yuvX16_c(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                                 const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize,
-                                const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
+                                const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW,
                                 enum PixelFormat dstFormat)
 {
 #define conv16(bits) \
     if (isBE(dstFormat)) { \
-        yuv2yuvX ## bits ## BE_c(lumFilter, lumSrc, lumFilterSize, \
-                               chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                               alpSrc, \
-                               dest, uDest, vDest, aDest, \
-                               dstW, chrDstW); \
+        yuv2yuvX ## bits ## BE_c(c, lumFilter, lumSrc, lumFilterSize, \
+                                 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+                                 alpSrc, \
+                                 dest, uDest, vDest, aDest, \
+                                 dstW, chrDstW); \
     } else { \
-        yuv2yuvX ## bits ## LE_c(lumFilter, lumSrc, lumFilterSize, \
-                               chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                               alpSrc, \
-                               dest, uDest, vDest, aDest, \
-                               dstW, chrDstW); \
+        yuv2yuvX ## bits ## LE_c(c, lumFilter, lumSrc, lumFilterSize, \
+                                 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+                                 alpSrc, \
+                                 dest, uDest, vDest, aDest, \
+                                 dstW, chrDstW); \
     }
     if (is16BPS(dstFormat)) {
         conv16(16);
@@ -1783,14 +1786,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
             } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
-                if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
-                    yuv2yuvX16_c(vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
-                                 chrVSrcPtr, vChrFilterSize,
-                                 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest,
-                                 (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
-                                 dstFormat);
-                } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
+                if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
                     const int16_t *lumBuf = lumSrcPtr[0];
                     const int16_t *chrUBuf= chrUSrcPtr[0];
                     const int16_t *chrVBuf= chrVSrcPtr[0];
@@ -1870,9 +1866,9 @@ static int swScale(SwsContext *c, const uint8_t* src[],
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
                 if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
-                    yuv2yuvX16_c(vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
+                    yuv2yuvX16_c(c, vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
                                  vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                 alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
+                                 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW,
                                  dstFormat);
                 } else {
                     yuv2yuvX_c(c, vLumFilter+dstY*vLumFilterSize,
@@ -1921,11 +1917,22 @@ static int swScale(SwsContext *c, const uint8_t* src[],
 
 static void sws_init_swScale_c(SwsContext *c)
 {
-    enum PixelFormat srcFormat = c->srcFormat;
+    enum PixelFormat srcFormat = c->srcFormat,
+                     dstFormat = c->dstFormat;
 
     c->yuv2nv12X    = yuv2nv12X_c;
-    c->yuv2yuv1     = yuv2yuv1_c;
-    c->yuv2yuvX     = yuv2yuvX_c;
+    if (is16BPS(dstFormat)) {
+        c->yuv2yuvX     = isBE(dstFormat) ? yuv2yuvX16BE_c  : yuv2yuvX16LE_c;
+    } else if (is9_OR_10BPS(dstFormat)) {
+        if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
+            c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c :  yuv2yuvX9LE_c;
+        } else {
+            c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
+        }
+    } else {
+        c->yuv2yuv1     = yuv2yuv1_c;
+        c->yuv2yuvX     = yuv2yuvX_c;
+    }
     c->yuv2packed1  = yuv2packed1_c;
     c->yuv2packed2  = yuv2packed2_c;
     c->yuv2packedX  = yuv2packedX_c;
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index efaadaa09e..174c3fbfd6 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2185,8 +2185,10 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *d
 
 static void RENAME(sws_init_swScale)(SwsContext *c)
 {
-    enum PixelFormat srcFormat = c->srcFormat;
+    enum PixelFormat srcFormat = c->srcFormat,
+                     dstFormat = c->dstFormat;
 
+    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat)) {
     if (!(c->flags & SWS_BITEXACT)) {
         if (c->flags & SWS_ACCURATE_RND) {
             c->yuv2yuv1     = RENAME(yuv2yuv1_ar    );
@@ -2236,6 +2238,7 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
                 break;
         }
     }
+    }
 
 #if !COMPILE_TEMPLATE_MMX2
     c->hScale       = RENAME(hScale      );
-- 
cgit v1.2.3


From ea281753120f55fc3a3102ac54ba291d0ffcd43d Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 5 Jun 2011 21:37:59 -0400
Subject: swscale: remove alp/chr/lumSrcOffset.

They are hacks added to reuse the same scaling function for
different formats and they may cause problems when SIMD
implementation of the same functions are used along with pure
C functions.
---
 libswscale/swscale.c          | 201 ++++++++++++++++--------------------------
 libswscale/swscale_internal.h |   4 -
 2 files changed, 76 insertions(+), 129 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 35b0233f15..d97c4e1259 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -979,93 +979,58 @@ static void fillPlane(uint8_t* plane, int stride, int width, int height, int y,
     }
 }
 
-static void rgb48ToY_c(uint8_t *dst, const uint8_t *src, int width,
-                       uint32_t *unused)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        int r = src[i*6+0];
-        int g = src[i*6+2];
-        int b = src[i*6+4];
-
-        dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-    }
-}
-
-static void rgb48ToUV_c(uint8_t *dstU, uint8_t *dstV,
-                        const uint8_t *src1, const uint8_t *src2,
-                        int width, uint32_t *unused)
-{
-    int i;
-    assert(src1==src2);
-    for (i = 0; i < width; i++) {
-        int r = src1[6*i + 0];
-        int g = src1[6*i + 2];
-        int b = src1[6*i + 4];
-
-        dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-        dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-    }
-}
-
-static void rgb48ToUV_half_c(uint8_t *dstU, uint8_t *dstV,
-                             const uint8_t *src1, const uint8_t *src2,
-                             int width, uint32_t *unused)
-{
-    int i;
-    assert(src1==src2);
-    for (i = 0; i < width; i++) {
-        int r= src1[12*i + 0] + src1[12*i + 6];
-        int g= src1[12*i + 2] + src1[12*i + 8];
-        int b= src1[12*i + 4] + src1[12*i + 10];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
-    }
-}
-
-static void bgr48ToY_c(uint8_t *dst, const uint8_t *src, int width,
-                       uint32_t *unused)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        int b = src[i*6+0];
-        int g = src[i*6+2];
-        int r = src[i*6+4];
-
-        dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-    }
-}
-
-static void bgr48ToUV_c(uint8_t *dstU, uint8_t *dstV,
-                        const uint8_t *src1, const uint8_t *src2,
-                        int width, uint32_t *unused)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        int b = src1[6*i + 0];
-        int g = src1[6*i + 2];
-        int r = src1[6*i + 4];
-
-        dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-        dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
-    }
-}
-
-static void bgr48ToUV_half_c(uint8_t *dstU, uint8_t *dstV,
-                             const uint8_t *src1, const uint8_t *src2,
-                             int width, uint32_t *unused)
-{
-    int i;
-    for (i = 0; i < width; i++) {
-        int b= src1[12*i + 0] + src1[12*i + 6];
-        int g= src1[12*i + 2] + src1[12*i + 8];
-        int r= src1[12*i + 4] + src1[12*i + 10];
-
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
-    }
+#define rgb48funcs(LE_BE, rfunc, compA, compB, compC) \
+static void compA ## compB ## compC ## 48 ## LE_BE ## ToY_c( \
+                       uint8_t *dst, const uint8_t *src, int width, \
+                       uint32_t *unused) \
+{ \
+    int i; \
+    for (i = 0; i < width; i++) { \
+        int compA = rfunc(&src[i*6+0]) >> 8; \
+        int compB = rfunc(&src[i*6+2]) >> 8; \
+        int compC = rfunc(&src[i*6+4]) >> 8; \
+ \
+        dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
+    } \
+} \
+ \
+static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_c( \
+                        uint8_t *dstU, uint8_t *dstV, \
+                        const uint8_t *src1, const uint8_t *src2, \
+                        int width, uint32_t *unused) \
+{ \
+    int i; \
+    assert(src1==src2); \
+    for (i = 0; i < width; i++) { \
+        int compA = rfunc(&src1[6*i + 0]) >> 8; \
+        int compB = rfunc(&src1[6*i + 2]) >> 8; \
+        int compC = rfunc(&src1[6*i + 4]) >> 8; \
+ \
+        dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
+        dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
+    } \
+} \
+ \
+static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_half_c( \
+                            uint8_t *dstU, uint8_t *dstV, \
+                            const uint8_t *src1, const uint8_t *src2, \
+                            int width, uint32_t *unused) \
+{ \
+    int i; \
+    assert(src1==src2); \
+    for (i = 0; i < width; i++) { \
+        int compA = (rfunc(&src1[12*i + 0]) >> 8) + (rfunc(&src1[12*i + 6]) >> 8); \
+        int compB = (rfunc(&src1[12*i + 2]) >> 8) + (rfunc(&src1[12*i + 8]) >> 8); \
+        int compC = (rfunc(&src1[12*i + 4]) >> 8) + (rfunc(&src1[12*i + 10]) >> 8); \
+ \
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
+    } \
 }
+rgb48funcs(LE, AV_RL16, r, g, b);
+rgb48funcs(BE, AV_RB16, r, g, b);
+rgb48funcs(LE, AV_RL16, b, g, r);
+rgb48funcs(BE, AV_RB16, b, g, r);
 
 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
 static void name ## _c(uint8_t *dst, const uint8_t *src, \
@@ -1098,6 +1063,14 @@ static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unu
     }
 }
 
+static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        dst[i]= src[4*i+3];
+    }
+}
+
 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
                        const uint8_t *src, const uint8_t *dummy, \
@@ -1532,8 +1505,6 @@ static inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
     void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
     void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
 
-    src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
-
     if (toYV12) {
         toYV12(formatConvBuffer, src, srcW, pal);
         src= formatConvBuffer;
@@ -1570,10 +1541,6 @@ static inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int ds
                            const int16_t *hChrFilterPos, int hChrFilterSize,
                            uint8_t *formatConvBuffer, uint32_t *pal)
 {
-
-    src1 += c->chrSrcOffset;
-    src2 += c->chrSrcOffset;
-
     if (c->chrToYV12) {
         uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
         c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
@@ -1969,10 +1936,10 @@ static void sws_init_swScale_c(SwsContext *c)
     }
     if (c->chrSrcHSubSample) {
         switch(srcFormat) {
-        case PIX_FMT_RGB48BE:
-        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half_c; break;
-        case PIX_FMT_BGR48BE:
-        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half_c; break;
+        case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half_c; break;
+        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half_c; break;
+        case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half_c; break;
+        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half_c; break;
         case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_half_c;  break;
         case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
         case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_half_c; break;
@@ -1986,10 +1953,10 @@ static void sws_init_swScale_c(SwsContext *c)
         }
     } else {
         switch(srcFormat) {
-        case PIX_FMT_RGB48BE:
-        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_c; break;
-        case PIX_FMT_BGR48BE:
-        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_c; break;
+        case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_c; break;
+        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_c; break;
+        case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_c; break;
+        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_c; break;
         case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_c;  break;
         case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
         case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_c; break;
@@ -2038,37 +2005,21 @@ static void sws_init_swScale_c(SwsContext *c)
     case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
     case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY_c;  break;
     case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
-    case PIX_FMT_RGB48BE:
-    case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY_c; break;
-    case PIX_FMT_BGR48BE:
-    case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY_c; break;
+    case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
+    case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
+    case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
+    case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
     }
     if (c->alpPixBuf) {
         switch (srcFormat) {
-        case PIX_FMT_RGB32  :
-        case PIX_FMT_RGB32_1:
-        case PIX_FMT_BGR32  :
-        case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA_c; break;
-        case PIX_FMT_Y400A  : c->alpToYV12 = yuy2ToY_c; break;
+        case PIX_FMT_BGRA:
+        case PIX_FMT_RGBA:  c->alpToYV12 = rgbaToA_c; break;
+        case PIX_FMT_ABGR:
+        case PIX_FMT_ARGB:  c->alpToYV12 = abgrToA_c; break;
+        case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
         }
     }
 
-    switch (srcFormat) {
-    case PIX_FMT_Y400A  :
-        c->alpSrcOffset = 1;
-        break;
-    case PIX_FMT_RGB32  :
-    case PIX_FMT_BGR32  :
-        c->alpSrcOffset = 3;
-        break;
-    case PIX_FMT_RGB48LE:
-    case PIX_FMT_BGR48LE:
-        c->lumSrcOffset = 1;
-        c->chrSrcOffset = 1;
-        c->alpSrcOffset = 1;
-        break;
-    }
-
     if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
         if (c->srcRange) {
             c->lumConvertRange = lumRangeFromJpeg_c;
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 6aaa843015..a9adb3f07a 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -313,10 +313,6 @@ typedef struct SwsContext {
     void (*lumConvertRange)(uint16_t *dst, int width); ///< Color range conversion function for luma plane if needed.
     void (*chrConvertRange)(uint16_t *dst1, uint16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed.
 
-    int lumSrcOffset; ///< Offset given to luma src pointers passed to horizontal input functions.
-    int chrSrcOffset; ///< Offset given to chroma src pointers passed to horizontal input functions.
-    int alpSrcOffset; ///< Offset given to alpha src pointers passed to horizontal input functions.
-
     int needs_hcscale; ///< Set if there are chroma planes to be converted.
 
 } SwsContext;
-- 
cgit v1.2.3


From b3b28b080f588a63d9bcf2714daa7119ece2da61 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 5 Jun 2011 21:40:04 -0400
Subject: swscale: cosmetics.

---
 libswscale/swscale.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index d97c4e1259..f6ed45382e 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1906,8 +1906,7 @@ static void sws_init_swScale_c(SwsContext *c)
 
     c->hScale       = hScale_c;
 
-    if (c->flags & SWS_FAST_BILINEAR)
-    {
+    if (c->flags & SWS_FAST_BILINEAR) {
         c->hyscale_fast = hyscale_fast_c;
         c->hcscale_fast = hcscale_fast_c;
     }
-- 
cgit v1.2.3


From ca364a5b43044bc98a7aef001fb1543b7b13411d Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 5 Jun 2011 21:46:16 -0400
Subject: swscale: extract SWS_FULL_CHR_H_INT conditional into init code.

---
 libswscale/ppc/swscale_altivec.c  |  2 +-
 libswscale/swscale.c              | 60 ++++++++++++++-------------------------
 libswscale/x86/swscale_template.c |  6 ++++
 3 files changed, 28 insertions(+), 40 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index 423297a8f0..42e965de95 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -415,7 +415,7 @@ void ff_sws_init_swScale_altivec(SwsContext *c)
 
     /* The following list of supported dstFormat values should
      * match what's found in the body of ff_yuv2packedX_altivec() */
-    if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf &&
+    if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf &&
         (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
          c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
          c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB)) {
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index f6ed45382e..9705b14d00 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1770,50 +1770,28 @@ static int swScale(SwsContext *c, const uint8_t* src[],
             } else {
                 assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
                 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
-                if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
+                if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
                     int chrAlpha= vChrFilter[2*dstY+1];
-                    if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbX_c_full(c, //FIXME write a packed1_full function
-                                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                        vChrFilter+dstY*vChrFilterSize, chrUSrcPtr,
-                                        chrVSrcPtr, vChrFilterSize,
-                                        alpSrcPtr, dest, dstW, dstY);
-                    } else {
-                        c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
-                                       *chrVSrcPtr, *(chrVSrcPtr+1),
-                                       alpPixBuf ? *alpSrcPtr : NULL,
-                                       dest, dstW, chrAlpha, dstFormat, flags, dstY);
-                    }
-                } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
+                    c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
+                                   *chrVSrcPtr, *(chrVSrcPtr+1),
+                                   alpPixBuf ? *alpSrcPtr : NULL,
+                                   dest, dstW, chrAlpha, dstFormat, flags, dstY);
+                } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
                     int lumAlpha= vLumFilter[2*dstY+1];
                     int chrAlpha= vChrFilter[2*dstY+1];
                     lumMmxFilter[2]=
                     lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
                     chrMmxFilter[2]=
                     chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
-                    if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbX_c_full(c, //FIXME write a packed2_full function
-                                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                        vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                        alpSrcPtr, dest, dstW, dstY);
-                    } else {
-                        c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
-                                       *chrVSrcPtr, *(chrVSrcPtr+1),
-                                       alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
-                                       dest, dstW, lumAlpha, chrAlpha, dstY);
-                    }
+                    c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
+                                   *chrVSrcPtr, *(chrVSrcPtr+1),
+                                   alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
+                                   dest, dstW, lumAlpha, chrAlpha, dstY);
                 } else { //general RGB
-                    if(flags & SWS_FULL_CHR_H_INT) {
-                        yuv2rgbX_c_full(c,
-                                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                        vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                        alpSrcPtr, dest, dstW, dstY);
-                    } else {
-                        c->yuv2packedX(c,
-                                       vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                       vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                       alpSrcPtr, dest, dstW, dstY);
-                    }
+                    c->yuv2packedX(c,
+                                   vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
+                                   vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
+                                   alpSrcPtr, dest, dstW, dstY);
                 }
             }
         } else { // hmm looks like we can't use MMX here without overwriting this array's tail
@@ -1900,9 +1878,13 @@ static void sws_init_swScale_c(SwsContext *c)
         c->yuv2yuv1     = yuv2yuv1_c;
         c->yuv2yuvX     = yuv2yuvX_c;
     }
-    c->yuv2packed1  = yuv2packed1_c;
-    c->yuv2packed2  = yuv2packed2_c;
-    c->yuv2packedX  = yuv2packedX_c;
+    if(c->flags & SWS_FULL_CHR_H_INT) {
+        c->yuv2packedX = yuv2rgbX_c_full;
+    } else {
+        c->yuv2packed1  = yuv2packed1_c;
+        c->yuv2packed2  = yuv2packed2_c;
+        c->yuv2packedX  = yuv2packedX_c;
+    }
 
     c->hScale       = hScale_c;
 
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 174c3fbfd6..bd72b75326 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2193,6 +2193,7 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
         if (c->flags & SWS_ACCURATE_RND) {
             c->yuv2yuv1     = RENAME(yuv2yuv1_ar    );
             c->yuv2yuvX     = RENAME(yuv2yuvX_ar    );
+            if (!(c->flags & SWS_FULL_CHR_H_INT)) {
             switch (c->dstFormat) {
             case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X_ar);   break;
             case PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X_ar);   break;
@@ -2201,9 +2202,11 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
             case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
             default: break;
             }
+            }
         } else {
             c->yuv2yuv1     = RENAME(yuv2yuv1    );
             c->yuv2yuvX     = RENAME(yuv2yuvX    );
+            if (!(c->flags & SWS_FULL_CHR_H_INT)) {
             switch (c->dstFormat) {
             case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X);   break;
             case PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X);   break;
@@ -2212,7 +2215,10 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
             case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
             default: break;
             }
+            }
+        }
         }
+        if (!(c->flags & SWS_FULL_CHR_H_INT)) {
         switch (c->dstFormat) {
         case PIX_FMT_RGB32:
                 c->yuv2packed1 = RENAME(yuv2rgb32_1);
-- 
cgit v1.2.3


From 0fb5193156053b820579df9ab4596b950eb05d4d Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 5 Jun 2011 21:48:51 -0400
Subject: swscale: reindent x86 init code.

---
 libswscale/x86/swscale_template.c | 70 +++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 35 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index bd72b75326..3646ccceff 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -2189,62 +2189,62 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
                      dstFormat = c->dstFormat;
 
     if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat)) {
-    if (!(c->flags & SWS_BITEXACT)) {
-        if (c->flags & SWS_ACCURATE_RND) {
-            c->yuv2yuv1     = RENAME(yuv2yuv1_ar    );
-            c->yuv2yuvX     = RENAME(yuv2yuvX_ar    );
-            if (!(c->flags & SWS_FULL_CHR_H_INT)) {
-            switch (c->dstFormat) {
-            case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X_ar);   break;
-            case PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X_ar);   break;
-            case PIX_FMT_RGB555:  c->yuv2packedX = RENAME(yuv2rgb555_X_ar);  break;
-            case PIX_FMT_RGB565:  c->yuv2packedX = RENAME(yuv2rgb565_X_ar);  break;
-            case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
-            default: break;
-            }
-            }
-        } else {
-            c->yuv2yuv1     = RENAME(yuv2yuv1    );
-            c->yuv2yuvX     = RENAME(yuv2yuvX    );
-            if (!(c->flags & SWS_FULL_CHR_H_INT)) {
-            switch (c->dstFormat) {
-            case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X);   break;
-            case PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X);   break;
-            case PIX_FMT_RGB555:  c->yuv2packedX = RENAME(yuv2rgb555_X);  break;
-            case PIX_FMT_RGB565:  c->yuv2packedX = RENAME(yuv2rgb565_X);  break;
-            case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
-            default: break;
+        if (!(c->flags & SWS_BITEXACT)) {
+            if (c->flags & SWS_ACCURATE_RND) {
+                c->yuv2yuv1 = RENAME(yuv2yuv1_ar    );
+                c->yuv2yuvX = RENAME(yuv2yuvX_ar    );
+                if (!(c->flags & SWS_FULL_CHR_H_INT)) {
+                    switch (c->dstFormat) {
+                    case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X_ar);   break;
+                    case PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X_ar);   break;
+                    case PIX_FMT_RGB555:  c->yuv2packedX = RENAME(yuv2rgb555_X_ar);  break;
+                    case PIX_FMT_RGB565:  c->yuv2packedX = RENAME(yuv2rgb565_X_ar);  break;
+                    case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
+                    default: break;
+                    }
+                }
+            } else {
+                c->yuv2yuv1 = RENAME(yuv2yuv1    );
+                c->yuv2yuvX = RENAME(yuv2yuvX    );
+                if (!(c->flags & SWS_FULL_CHR_H_INT)) {
+                    switch (c->dstFormat) {
+                    case PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X);   break;
+                    case PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X);   break;
+                    case PIX_FMT_RGB555:  c->yuv2packedX = RENAME(yuv2rgb555_X);  break;
+                    case PIX_FMT_RGB565:  c->yuv2packedX = RENAME(yuv2rgb565_X);  break;
+                    case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
+                    default: break;
+                    }
+                }
             }
-            }
-        }
         }
         if (!(c->flags & SWS_FULL_CHR_H_INT)) {
-        switch (c->dstFormat) {
-        case PIX_FMT_RGB32:
+            switch (c->dstFormat) {
+            case PIX_FMT_RGB32:
                 c->yuv2packed1 = RENAME(yuv2rgb32_1);
                 c->yuv2packed2 = RENAME(yuv2rgb32_2);
                 break;
-        case PIX_FMT_BGR24:
+            case PIX_FMT_BGR24:
                 c->yuv2packed1 = RENAME(yuv2bgr24_1);
                 c->yuv2packed2 = RENAME(yuv2bgr24_2);
                 break;
-        case PIX_FMT_RGB555:
+            case PIX_FMT_RGB555:
                 c->yuv2packed1 = RENAME(yuv2rgb555_1);
                 c->yuv2packed2 = RENAME(yuv2rgb555_2);
                 break;
-        case PIX_FMT_RGB565:
+            case PIX_FMT_RGB565:
                 c->yuv2packed1 = RENAME(yuv2rgb565_1);
                 c->yuv2packed2 = RENAME(yuv2rgb565_2);
                 break;
-        case PIX_FMT_YUYV422:
+            case PIX_FMT_YUYV422:
                 c->yuv2packed1 = RENAME(yuv2yuyv422_1);
                 c->yuv2packed2 = RENAME(yuv2yuyv422_2);
                 break;
-        default:
+            default:
                 break;
+            }
         }
     }
-    }
 
 #if !COMPILE_TEMPLATE_MMX2
     c->hScale       = RENAME(hScale      );
-- 
cgit v1.2.3


From df91d091747395384a7c585cee568f9949e6c9f2 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 5 Jun 2011 22:27:45 -0400
Subject: swscale: integrate yuv2nv12X_C into yuv2yuvX() function pointers.

---
 libswscale/swscale.c          | 27 ++++++++++++---------------
 libswscale/swscale_internal.h |  6 ------
 2 files changed, 12 insertions(+), 21 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 9705b14d00..845af3d190 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -362,10 +362,13 @@ static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
 static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
                                const int16_t **lumSrc, int lumFilterSize,
                                const int16_t *chrFilter, const int16_t **chrUSrc,
-                               const int16_t **chrVSrc,
-                               int chrFilterSize, uint8_t *dest, uint8_t *uDest,
-                               int dstW, int chrDstW, enum PixelFormat dstFormat)
+                               const int16_t **chrVSrc, int chrFilterSize,
+                               const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
+                               uint8_t *vDest, uint8_t *aDest,
+                               int dstW, int chrDstW)
 {
+    enum PixelFormat dstFormat = c->dstFormat;
+
     //FIXME Optimize (just quickly written not optimized..)
     int i;
     for (i=0; i<dstW; i++) {
@@ -1743,14 +1746,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
             const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
             const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
-                c->yuv2nv12X(c,
-                             vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                             vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                             dest, uDest, dstW, chrDstW, dstFormat);
-            } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
+            if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
                 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
@@ -1805,8 +1801,8 @@ static int swScale(SwsContext *c, const uint8_t* src[],
                 yuv2nv12X_c(c, vLumFilter+dstY*vLumFilterSize,
                             lumSrcPtr, vLumFilterSize,
                             vChrFilter+chrDstY*vChrFilterSize,
-                            chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                            dest, uDest, dstW, chrDstW, dstFormat);
+                            chrUSrcPtr, chrVSrcPtr, vChrFilterSize, NULL,
+                            dest, uDest, NULL, NULL, dstW, chrDstW);
             } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
@@ -1865,8 +1861,9 @@ static void sws_init_swScale_c(SwsContext *c)
     enum PixelFormat srcFormat = c->srcFormat,
                      dstFormat = c->dstFormat;
 
-    c->yuv2nv12X    = yuv2nv12X_c;
-    if (is16BPS(dstFormat)) {
+    if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
+        c->yuv2yuvX     = yuv2nv12X_c;
+    } else if (is16BPS(dstFormat)) {
         c->yuv2yuvX     = isBE(dstFormat) ? yuv2yuvX16BE_c  : yuv2yuvX16LE_c;
     } else if (is9_OR_10BPS(dstFormat)) {
         if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index a9adb3f07a..dcf8eb522a 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -250,12 +250,6 @@ typedef struct SwsContext {
 #endif
 
     /* function pointers for swScale() */
-    void (*yuv2nv12X  )(struct SwsContext *c,
-                        const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                        const int16_t *chrFilter, const int16_t **chrUSrc,
-                        const int16_t **chrVSrc, int chrFilterSize,
-                        uint8_t *dest, uint8_t *uDest,
-                        int dstW, int chrDstW, int dstFormat);
     void (*yuv2yuv1   )(struct SwsContext *c,
                         const int16_t *lumSrc, const int16_t *chrUSrc,
                         const int16_t *chrVSrc, const int16_t *alpSrc,
-- 
cgit v1.2.3


From b73fe700253f1e93c3ca10f72fc8159d7e12aaa1 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 5 Jun 2011 22:31:11 -0400
Subject: swscale: add yuv2planar/packed function typedefs.

---
 libswscale/swscale_internal.h | 74 +++++++++++++++++++++++--------------------
 1 file changed, 40 insertions(+), 34 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index dcf8eb522a..013eef9e31 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -57,6 +57,41 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[],
                        int srcStride[], int srcSliceY, int srcSliceH,
                        uint8_t* dst[], int dstStride[]);
 
+typedef void (*yuv2planar1_fn) (struct SwsContext *c,
+                                const int16_t *lumSrc, const int16_t *chrUSrc,
+                                const int16_t *chrVSrc, const int16_t *alpSrc,
+                                uint8_t *dest,
+                                uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
+                                int dstW, int chrDstW);
+typedef void (*yuv2planarX_fn) (struct SwsContext *c,
+                                const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                const int16_t *chrFilter, const int16_t **chrUSrc,
+                                const int16_t **chrVSrc, int chrFilterSize,
+                                const int16_t **alpSrc,
+                                uint8_t *dest,
+                                uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
+                                int dstW, int chrDstW);
+typedef void (*yuv2packed1_fn) (struct SwsContext *c,
+                                const uint16_t *buf0,
+                                const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                const uint16_t *vbuf0, const uint16_t *vbuf1,
+                                const uint16_t *abuf0,
+                                uint8_t *dest,
+                                int dstW, int uvalpha, int dstFormat, int flags, int y);
+typedef void (*yuv2packed2_fn) (struct SwsContext *c,
+                                const uint16_t *buf0, const uint16_t *buf1,
+                                const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                const uint16_t *vbuf0, const uint16_t *vbuf1,
+                                const uint16_t *abuf0, const uint16_t *abuf1,
+                                uint8_t *dest,
+                                int dstW, int yalpha, int uvalpha, int y);
+typedef void (*yuv2packedX_fn) (struct SwsContext *c,
+                                const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                const int16_t *chrFilter, const int16_t **chrUSrc,
+                                const int16_t **chrVSrc, int chrFilterSize,
+                                const int16_t **alpSrc, uint8_t *dest,
+                                int dstW, int dstY);
+
 /* This struct should be aligned on at least a 32-byte boundary. */
 typedef struct SwsContext {
     /**
@@ -250,40 +285,11 @@ typedef struct SwsContext {
 #endif
 
     /* function pointers for swScale() */
-    void (*yuv2yuv1   )(struct SwsContext *c,
-                        const int16_t *lumSrc, const int16_t *chrUSrc,
-                        const int16_t *chrVSrc, const int16_t *alpSrc,
-                        uint8_t *dest,
-                        uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
-                        int dstW, int chrDstW);
-    void (*yuv2yuvX   )(struct SwsContext *c,
-                        const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                        const int16_t *chrFilter, const int16_t **chrUSrc,
-                        const int16_t **chrVSrc, int chrFilterSize,
-                        const int16_t **alpSrc,
-                        uint8_t *dest,
-                        uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
-                        int dstW, int chrDstW);
-    void (*yuv2packed1)(struct SwsContext *c,
-                        const uint16_t *buf0,
-                        const uint16_t *ubuf0, const uint16_t *ubuf1,
-                        const uint16_t *vbuf0, const uint16_t *vbuf1,
-                        const uint16_t *abuf0,
-                        uint8_t *dest,
-                        int dstW, int uvalpha, int dstFormat, int flags, int y);
-    void (*yuv2packed2)(struct SwsContext *c,
-                        const uint16_t *buf0, const uint16_t *buf1,
-                        const uint16_t *ubuf0, const uint16_t *ubuf1,
-                        const uint16_t *vbuf0, const uint16_t *vbuf1,
-                        const uint16_t *abuf0, const uint16_t *abuf1,
-                        uint8_t *dest,
-                        int dstW, int yalpha, int uvalpha, int y);
-    void (*yuv2packedX)(struct SwsContext *c,
-                        const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
-                        const int16_t *chrFilter, const int16_t **chrUSrc,
-                        const int16_t **chrVSrc, int chrFilterSize,
-                        const int16_t **alpSrc, uint8_t *dest,
-                        int dstW, int dstY);
+    yuv2planar1_fn yuv2yuv1;
+    yuv2planarX_fn yuv2yuvX;
+    yuv2packed1_fn yuv2packed1;
+    yuv2packed2_fn yuv2packed2;
+    yuv2packedX_fn yuv2packedX;
 
     void (*lumToYV12)(uint8_t *dst, const uint8_t *src,
                       int width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler.
-- 
cgit v1.2.3


From edeb56fa31071841125f723b741858a4187ee748 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 5 Jun 2011 22:54:30 -0400
Subject: swscale: remove duplicate conversion routine in swScale().

---
 libswscale/swscale.c | 126 ++++++++++++++++++++++-----------------------------
 1 file changed, 53 insertions(+), 73 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 845af3d190..8f41547be1 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1562,6 +1562,37 @@ static inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int ds
         c->chrConvertRange(dst1, dst2, dstWidth);
 }
 
+static av_always_inline void
+find_c_packed_planar_out_funcs(SwsContext *c,
+                               yuv2planar1_fn *yuv2yuv1,    yuv2planarX_fn *yuv2yuvX,
+                               yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
+                               yuv2packedX_fn *yuv2packedX)
+{
+    enum PixelFormat dstFormat = c->dstFormat;
+
+    if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
+        *yuv2yuvX     = yuv2nv12X_c;
+    } else if (is16BPS(dstFormat)) {
+        *yuv2yuvX     = isBE(dstFormat) ? yuv2yuvX16BE_c  : yuv2yuvX16LE_c;
+    } else if (is9_OR_10BPS(dstFormat)) {
+        if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
+            *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c :  yuv2yuvX9LE_c;
+        } else {
+            *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
+        }
+    } else {
+        *yuv2yuv1     = yuv2yuv1_c;
+        *yuv2yuvX     = yuv2yuvX_c;
+    }
+    if(c->flags & SWS_FULL_CHR_H_INT) {
+        *yuv2packedX = yuv2rgbX_c_full;
+    } else {
+        *yuv2packed1  = yuv2packed1_c;
+        *yuv2packed2  = yuv2packed2_c;
+        *yuv2packedX  = yuv2packedX_c;
+    }
+}
+
 #define DEBUG_SWSCALE_BUFFERS 0
 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
 
@@ -1605,6 +1636,11 @@ static int swScale(SwsContext *c, const uint8_t* src[],
     const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
     int lastDstY;
     uint32_t *pal=c->pal_yuv;
+    yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
+    yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
+    yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
+    yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
+    yuv2packedX_fn yuv2packedX = c->yuv2packedX;
 
     /* vars which will change and which we need to store back in the context */
     int dstY= c->dstY;
@@ -1741,7 +1777,14 @@ static int swScale(SwsContext *c, const uint8_t* src[],
 #if HAVE_MMX
         updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
 #endif
-        if (dstY < dstH-2) {
+        if (dstY >= dstH-2) {
+            // hmm looks like we can't use MMX here without overwriting this array's tail
+            find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
+                                           &yuv2packed1, &yuv2packed2,
+                                           &yuv2packedX);
+        }
+
+        {
             const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
             const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
             const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
@@ -1754,10 +1797,10 @@ static int swScale(SwsContext *c, const uint8_t* src[],
                     const int16_t *chrUBuf= chrUSrcPtr[0];
                     const int16_t *chrVBuf= chrVSrcPtr[0];
                     const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
-                    c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
+                    yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
                                 uDest, vDest, aDest, dstW, chrDstW);
                 } else { //General YV12
-                    c->yuv2yuvX(c,
+                    yuv2yuvX(c,
                                 vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
                                 chrVSrcPtr, vChrFilterSize,
@@ -1768,7 +1811,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
                 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
                 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
                     int chrAlpha= vChrFilter[2*dstY+1];
-                    c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
+                    yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
                                    *chrVSrcPtr, *(chrVSrcPtr+1),
                                    alpPixBuf ? *alpSrcPtr : NULL,
                                    dest, dstW, chrAlpha, dstFormat, flags, dstY);
@@ -1779,61 +1822,17 @@ static int swScale(SwsContext *c, const uint8_t* src[],
                     lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
                     chrMmxFilter[2]=
                     chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
-                    c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
+                    yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
                                    *chrVSrcPtr, *(chrVSrcPtr+1),
                                    alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
                                    dest, dstW, lumAlpha, chrAlpha, dstY);
                 } else { //general RGB
-                    c->yuv2packedX(c,
+                    yuv2packedX(c,
                                    vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
                                    vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
                                    alpSrcPtr, dest, dstW, dstY);
                 }
             }
-        } else { // hmm looks like we can't use MMX here without overwriting this array's tail
-            const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
-                yuv2nv12X_c(c, vLumFilter+dstY*vLumFilterSize,
-                            lumSrcPtr, vLumFilterSize,
-                            vChrFilter+chrDstY*vChrFilterSize,
-                            chrUSrcPtr, chrVSrcPtr, vChrFilterSize, NULL,
-                            dest, uDest, NULL, NULL, dstW, chrDstW);
-            } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
-                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
-                if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
-                if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) {
-                    yuv2yuvX16_c(c, vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
-                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW,
-                                 dstFormat);
-                } else {
-                    yuv2yuvX_c(c, vLumFilter+dstY*vLumFilterSize,
-                               lumSrcPtr, vLumFilterSize,
-                               vChrFilter+chrDstY*vChrFilterSize,
-                               chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                               alpSrcPtr, dest, uDest, vDest, aDest,
-                               dstW, chrDstW);
-                }
-            } else {
-                assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
-                assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
-                if(flags & SWS_FULL_CHR_H_INT) {
-                    yuv2rgbX_c_full(c,
-                                    vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                    vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                    alpSrcPtr, dest, dstW, dstY);
-                } else {
-                    yuv2packedX_c(c,
-                                  vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
-                                  vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
-                                  alpSrcPtr, dest, dstW, dstY);
-                }
-            }
         }
     }
 
@@ -1858,30 +1857,11 @@ static int swScale(SwsContext *c, const uint8_t* src[],
 
 static void sws_init_swScale_c(SwsContext *c)
 {
-    enum PixelFormat srcFormat = c->srcFormat,
-                     dstFormat = c->dstFormat;
+    enum PixelFormat srcFormat = c->srcFormat;
 
-    if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
-        c->yuv2yuvX     = yuv2nv12X_c;
-    } else if (is16BPS(dstFormat)) {
-        c->yuv2yuvX     = isBE(dstFormat) ? yuv2yuvX16BE_c  : yuv2yuvX16LE_c;
-    } else if (is9_OR_10BPS(dstFormat)) {
-        if (dstFormat == PIX_FMT_YUV420P9BE || dstFormat == PIX_FMT_YUV420P9LE) {
-            c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c :  yuv2yuvX9LE_c;
-        } else {
-            c->yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
-        }
-    } else {
-        c->yuv2yuv1     = yuv2yuv1_c;
-        c->yuv2yuvX     = yuv2yuvX_c;
-    }
-    if(c->flags & SWS_FULL_CHR_H_INT) {
-        c->yuv2packedX = yuv2rgbX_c_full;
-    } else {
-        c->yuv2packed1  = yuv2packed1_c;
-        c->yuv2packed2  = yuv2packed2_c;
-        c->yuv2packedX  = yuv2packedX_c;
-    }
+    find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
+                                   &c->yuv2packed1, &c->yuv2packed2,
+                                   &c->yuv2packedX);
 
     c->hScale       = hScale_c;
 
-- 
cgit v1.2.3