From a27db4c349574d44c581fa484f03fb1ee816aaf9 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 8 Jun 2011 12:12:50 -0400
Subject: swscale: fix function declaration keywords in x86/swscale_template.c.

Remove inline keyword for functions that are only called through
their function pointers (and thus cannot be inlined); add av_cold
keyword to init function, and use av_always_inline instead of
inline for functions that must be inlined for performance reasons.
---
 libswscale/x86/swscale_template.c | 383 ++++++++++++++++++++------------------
 1 file changed, 202 insertions(+), 181 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 8eb18050a2..8fad257ddf 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -70,13 +70,13 @@
         : "%"REG_d, "%"REG_S\
     );
 
-static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
-                                    const int16_t **lumSrc, int lumFilterSize,
-                                    const int16_t *chrFilter, const int16_t **chrUSrc,
-                                    const int16_t **chrVSrc,
-                                    int chrFilterSize, const int16_t **alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                    uint8_t *aDest, int dstW, int chrDstW)
+static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
+                             const int16_t **lumSrc, int lumFilterSize,
+                             const int16_t *chrFilter, const int16_t **chrUSrc,
+                             const int16_t **chrVSrc,
+                             int chrFilterSize, const int16_t **alpSrc,
+                             uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                             uint8_t *aDest, int dstW, int chrDstW)
 {
     if (uDest) {
         x86_reg uv_off = c->uv_off;
@@ -151,13 +151,13 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
         : "%"REG_a, "%"REG_d, "%"REG_S\
     );
 
-static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
-                                       const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrUSrc,
-                                       const int16_t **chrVSrc,
-                                       int chrFilterSize, const int16_t **alpSrc,
-                                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                       uint8_t *aDest, int dstW, int chrDstW)
+static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
+                                const int16_t **lumSrc, int lumFilterSize,
+                                const int16_t *chrFilter, const int16_t **chrUSrc,
+                                const int16_t **chrVSrc,
+                                int chrFilterSize, const int16_t **alpSrc,
+                                uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                                uint8_t *aDest, int dstW, int chrDstW)
 {
     if (uDest) {
         x86_reg uv_off = c->uv_off;
@@ -171,11 +171,11 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
     YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, dest, dstW, 0)
 }
 
-static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
-                                    const int16_t *chrUSrc, const int16_t *chrVSrc,
-                                    const int16_t *alpSrc,
-                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                    uint8_t *aDest, int dstW, int chrDstW)
+static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
+                             const int16_t *chrUSrc, const int16_t *chrVSrc,
+                             const int16_t *alpSrc,
+                             uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                             uint8_t *aDest, int dstW, int chrDstW)
 {
     int p= 4;
     const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
@@ -204,11 +204,11 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
     }
 }
 
-static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
-                                       const int16_t *chrUSrc, const int16_t *chrVSrc,
-                                       const int16_t *alpSrc,
-                                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                       uint8_t *aDest, int dstW, int chrDstW)
+static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
+                                const int16_t *chrUSrc, const int16_t *chrVSrc,
+                                const int16_t *alpSrc,
+                                uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                                uint8_t *aDest, int dstW, int chrDstW)
 {
     int p= 4;
     const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
@@ -458,12 +458,12 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
     " jb      1b                \n\t"
 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)  REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
 
-static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
-                                          const int16_t **lumSrc, int lumFilterSize,
-                                          const int16_t *chrFilter, const int16_t **chrUSrc,
-                                          const int16_t **chrVSrc,
-                                          int chrFilterSize, const int16_t **alpSrc,
-                                          uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                   const int16_t **lumSrc, int lumFilterSize,
+                                   const int16_t *chrFilter, const int16_t **chrUSrc,
+                                   const int16_t **chrVSrc,
+                                   int chrFilterSize, const int16_t **alpSrc,
+                                   uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -491,12 +491,12 @@ static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilte
     }
 }
 
-static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
-                                       const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrUSrc,
-                                       const int16_t **chrVSrc,
-                                       int chrFilterSize, const int16_t **alpSrc,
-                                       uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
+                                const int16_t **lumSrc, int lumFilterSize,
+                                const int16_t *chrFilter, const int16_t **chrUSrc,
+                                const int16_t **chrVSrc,
+                                int chrFilterSize, const int16_t **alpSrc,
+                                uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -548,12 +548,12 @@ static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
     " jb             1b             \n\t"
 #define WRITERGB16(dst, dstw, index)  REAL_WRITERGB16(dst, dstw, index)
 
-static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
-                                           const int16_t **lumSrc, int lumFilterSize,
-                                           const int16_t *chrFilter, const int16_t **chrUSrc,
-                                           const int16_t **chrVSrc,
-                                           int chrFilterSize, const int16_t **alpSrc,
-                                           uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                    const int16_t **lumSrc, int lumFilterSize,
+                                    const int16_t *chrFilter, const int16_t **chrUSrc,
+                                    const int16_t **chrVSrc,
+                                    int chrFilterSize, const int16_t **alpSrc,
+                                    uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -572,12 +572,12 @@ static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilt
     YSCALEYUV2PACKEDX_END
 }
 
-static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
-                                        const int16_t **lumSrc, int lumFilterSize,
-                                        const int16_t *chrFilter, const int16_t **chrUSrc,
-                                        const int16_t **chrVSrc,
-                                        int chrFilterSize, const int16_t **alpSrc,
-                                        uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
+                                 const int16_t **lumSrc, int lumFilterSize,
+                                 const int16_t *chrFilter, const int16_t **chrUSrc,
+                                 const int16_t **chrVSrc,
+                                 int chrFilterSize, const int16_t **alpSrc,
+                                 uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -625,12 +625,12 @@ static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
     " jb             1b             \n\t"
 #define WRITERGB15(dst, dstw, index)  REAL_WRITERGB15(dst, dstw, index)
 
-static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
-                                           const int16_t **lumSrc, int lumFilterSize,
-                                           const int16_t *chrFilter, const int16_t **chrUSrc,
-                                           const int16_t **chrVSrc,
-                                           int chrFilterSize, const int16_t **alpSrc,
-                                           uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                    const int16_t **lumSrc, int lumFilterSize,
+                                    const int16_t *chrFilter, const int16_t **chrUSrc,
+                                    const int16_t **chrVSrc,
+                                    int chrFilterSize, const int16_t **alpSrc,
+                                    uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -649,12 +649,12 @@ static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilt
     YSCALEYUV2PACKEDX_END
 }
 
-static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
-                                        const int16_t **lumSrc, int lumFilterSize,
-                                        const int16_t *chrFilter, const int16_t **chrUSrc,
-                                        const int16_t **chrVSrc,
-                                        int chrFilterSize, const int16_t **alpSrc,
-                                        uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
+                                 const int16_t **lumSrc, int lumFilterSize,
+                                 const int16_t *chrFilter, const int16_t **chrUSrc,
+                                 const int16_t **chrVSrc,
+                                 int chrFilterSize, const int16_t **alpSrc,
+                                 uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -782,12 +782,12 @@ static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
 #define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
 #endif
 
-static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
-                                          const int16_t **lumSrc, int lumFilterSize,
-                                          const int16_t *chrFilter, const int16_t **chrUSrc,
-                                          const int16_t **chrVSrc,
-                                          int chrFilterSize, const int16_t **alpSrc,
-                                          uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                   const int16_t **lumSrc, int lumFilterSize,
+                                   const int16_t *chrFilter, const int16_t **chrUSrc,
+                                   const int16_t **chrVSrc,
+                                   int chrFilterSize, const int16_t **alpSrc,
+                                   uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -806,12 +806,12 @@ static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilte
     );
 }
 
-static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
-                                       const int16_t **lumSrc, int lumFilterSize,
-                                       const int16_t *chrFilter, const int16_t **chrUSrc,
-                                       const int16_t **chrVSrc,
-                                       int chrFilterSize, const int16_t **alpSrc,
-                                       uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
+                                const int16_t **lumSrc, int lumFilterSize,
+                                const int16_t *chrFilter, const int16_t **chrUSrc,
+                                const int16_t **chrVSrc,
+                                int chrFilterSize, const int16_t **alpSrc,
+                                uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -847,12 +847,12 @@ static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
     " jb          1b            \n\t"
 #define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
 
-static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
-                                            const int16_t **lumSrc, int lumFilterSize,
-                                            const int16_t *chrFilter, const int16_t **chrUSrc,
-                                            const int16_t **chrVSrc,
-                                            int chrFilterSize, const int16_t **alpSrc,
-                                            uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
+                                     const int16_t **lumSrc, int lumFilterSize,
+                                     const int16_t *chrFilter, const int16_t **chrUSrc,
+                                     const int16_t **chrVSrc,
+                                     int chrFilterSize, const int16_t **alpSrc,
+                                     uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -868,12 +868,12 @@ static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFil
     YSCALEYUV2PACKEDX_END
 }
 
-static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
-                                         const int16_t **lumSrc, int lumFilterSize,
-                                         const int16_t *chrFilter, const int16_t **chrUSrc,
-                                         const int16_t **chrVSrc,
-                                         int chrFilterSize, const int16_t **alpSrc,
-                                         uint8_t *dest, int dstW, int dstY)
+static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
+                                  const int16_t **lumSrc, int lumFilterSize,
+                                  const int16_t *chrFilter, const int16_t **chrUSrc,
+                                  const int16_t **chrVSrc,
+                                  int chrFilterSize, const int16_t **alpSrc,
+                                  uint8_t *dest, int dstW, int dstY)
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
@@ -969,12 +969,12 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
 /**
  * vertical bilinear scale YV12 to RGB
  */
-static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
-                                       const uint16_t *buf1, const uint16_t *ubuf0,
-                                       const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                       const uint16_t *vbuf1, const uint16_t *abuf0,
-                                       const uint16_t *abuf1, uint8_t *dest,
-                                       int dstW, int yalpha, int uvalpha, int y)
+static void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
+                                const uint16_t *buf1, const uint16_t *ubuf0,
+                                const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                const uint16_t *vbuf1, const uint16_t *abuf0,
+                                const uint16_t *abuf1, uint8_t *dest,
+                                int dstW, int yalpha, int uvalpha, int y)
 {
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
 #if ARCH_X86_64
@@ -1031,12 +1031,12 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
     }
 }
 
-static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
-                                       const uint16_t *buf1, const uint16_t *ubuf0,
-                                       const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                       const uint16_t *vbuf1, const uint16_t *abuf0,
-                                       const uint16_t *abuf1, uint8_t *dest,
-                                       int dstW, int yalpha, int uvalpha, int y)
+static void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
+                                const uint16_t *buf1, const uint16_t *ubuf0,
+                                const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                const uint16_t *vbuf1, const uint16_t *abuf0,
+                                const uint16_t *abuf1, uint8_t *dest,
+                                int dstW, int yalpha, int uvalpha, int y)
 {
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
@@ -1053,12 +1053,12 @@ static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
     );
 }
 
-static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
-                                        const uint16_t *buf1, const uint16_t *ubuf0,
-                                        const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                        const uint16_t *vbuf1, const uint16_t *abuf0,
-                                        const uint16_t *abuf1, uint8_t *dest,
-                                        int dstW, int yalpha, int uvalpha, int y)
+static void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
+                                 const uint16_t *buf1, const uint16_t *ubuf0,
+                                 const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                 const uint16_t *vbuf1, const uint16_t *abuf0,
+                                 const uint16_t *abuf1, uint8_t *dest,
+                                 int dstW, int yalpha, int uvalpha, int y)
 {
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
@@ -1081,12 +1081,12 @@ static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
     );
 }
 
-static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
-                                        const uint16_t *buf1, const uint16_t *ubuf0,
-                                        const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                        const uint16_t *vbuf1, const uint16_t *abuf0,
-                                        const uint16_t *abuf1, uint8_t *dest,
-                                        int dstW, int yalpha, int uvalpha, int y)
+static void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
+                                 const uint16_t *buf1, const uint16_t *ubuf0,
+                                 const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                 const uint16_t *vbuf1, const uint16_t *abuf0,
+                                 const uint16_t *abuf1, uint8_t *dest,
+                                 int dstW, int yalpha, int uvalpha, int y)
 {
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
@@ -1149,12 +1149,12 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
 
 #define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
 
-static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
-                                         const uint16_t *buf1, const uint16_t *ubuf0,
-                                         const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                         const uint16_t *vbuf1, const uint16_t *abuf0,
-                                         const uint16_t *abuf1, uint8_t *dest,
-                                         int dstW, int yalpha, int uvalpha, int y)
+static void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
+                                  const uint16_t *buf1, const uint16_t *ubuf0,
+                                  const uint16_t *ubuf1, const uint16_t *vbuf0,
+                                  const uint16_t *vbuf1, const uint16_t *abuf0,
+                                  const uint16_t *abuf1, uint8_t *dest,
+                                  int dstW, int yalpha, int uvalpha, int y)
 {
     //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
     __asm__ volatile(
@@ -1288,12 +1288,12 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
 /**
  * YV12 to RGB without scaling or interpolating
  */
-static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
-                                       const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                       const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                       const uint16_t *abuf0, uint8_t *dest,
-                                       int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                       int flags, int y)
+static void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
+                                const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                const uint16_t *vbuf0, const uint16_t *vbuf1,
+                                const uint16_t *abuf0, uint8_t *dest,
+                                int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                int flags, int y)
 {
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
@@ -1356,12 +1356,12 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
     }
 }
 
-static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
-                                       const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                       const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                       const uint16_t *abuf0, uint8_t *dest,
-                                       int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                       int flags, int y)
+static void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
+                                const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                const uint16_t *vbuf0, const uint16_t *vbuf1,
+                                const uint16_t *abuf0, uint8_t *dest,
+                                int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                int flags, int y)
 {
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
@@ -1394,12 +1394,12 @@ static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
     }
 }
 
-static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
-                                        const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                        const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                        const uint16_t *abuf0, uint8_t *dest,
-                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                        int flags, int y)
+static void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
+                                 const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                 const uint16_t *vbuf0, const uint16_t *vbuf1,
+                                 const uint16_t *abuf0, uint8_t *dest,
+                                 int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                 int flags, int y)
 {
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
@@ -1444,12 +1444,12 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
     }
 }
 
-static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
-                                        const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                        const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                        const uint16_t *abuf0, uint8_t *dest,
-                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                        int flags, int y)
+static void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
+                                 const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                 const uint16_t *vbuf0, const uint16_t *vbuf1,
+                                 const uint16_t *abuf0, uint8_t *dest,
+                                 int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                 int flags, int y)
 {
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
@@ -1531,12 +1531,12 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
     "psraw                $7, %%mm7     \n\t"
 #define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
 
-static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
-                                         const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                         const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                         const uint16_t *abuf0, uint8_t *dest,
-                                         int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                         int flags, int y)
+static void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
+                                  const uint16_t *ubuf0, const uint16_t *ubuf1,
+                                  const uint16_t *vbuf0, const uint16_t *vbuf1,
+                                  const uint16_t *abuf0, uint8_t *dest,
+                                  int dstW, int uvalpha, enum PixelFormat dstFormat,
+                                  int flags, int y)
 {
     const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
 
@@ -1570,7 +1570,8 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
 #if !COMPILE_TEMPLATE_MMX2
 //FIXME yuy2* can read up to 7 samples too much
 
-static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
+static void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src,
+                            int width, uint32_t *unused)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm2           \n\t"
@@ -1589,7 +1590,9 @@ static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, int width,
     );
 }
 
-static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
+static void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV,
+                             const uint8_t *src1, const uint8_t *src2,
+                             int width, uint32_t *unused)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@@ -1615,7 +1618,9 @@ static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
     assert(src1 == src2);
 }
 
-static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
+static void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV,
+                           const uint8_t *src1, const uint8_t *src2,
+                           int width, uint32_t *unused)
 {
     __asm__ volatile(
         "mov                    %0, %%"REG_a"       \n\t"
@@ -1641,7 +1646,8 @@ static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s
 
 /* This is almost identical to the previous, end exists only because
  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
+static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src,
+                            int width, uint32_t *unused)
 {
     __asm__ volatile(
         "mov                  %0, %%"REG_a"         \n\t"
@@ -1659,7 +1665,9 @@ static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, int width,
     );
 }
 
-static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
+static void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV,
+                             const uint8_t *src1, const uint8_t *src2,
+                             int width, uint32_t *unused)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@@ -1685,7 +1693,9 @@ static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
     assert(src1 == src2);
 }
 
-static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
+static void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV,
+                           const uint8_t *src1, const uint8_t *src2,
+                           int width, uint32_t *unused)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@@ -1710,8 +1720,8 @@ static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s
     );
 }
 
-static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
-                                    const uint8_t *src, int width)
+static av_always_inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
+                                              const uint8_t *src, int width)
 {
     __asm__ volatile(
         "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@@ -1736,22 +1746,23 @@ static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
     );
 }
 
-static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
-                                    const uint8_t *src1, const uint8_t *src2,
-                                    int width, uint32_t *unused)
+static void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
+                             const uint8_t *src1, const uint8_t *src2,
+                             int width, uint32_t *unused)
 {
     RENAME(nvXXtoUV)(dstU, dstV, src1, width);
 }
 
-static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
-                                    const uint8_t *src1, const uint8_t *src2,
-                                    int width, uint32_t *unused)
+static void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
+                             const uint8_t *src1, const uint8_t *src2,
+                             int width, uint32_t *unused)
 {
     RENAME(nvXXtoUV)(dstV, dstU, src1, width);
 }
 #endif /* !COMPILE_TEMPLATE_MMX2 */
 
-static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, int width, enum PixelFormat srcFormat)
+static av_always_inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src,
+                                                  int width, enum PixelFormat srcFormat)
 {
 
     if(srcFormat == PIX_FMT_BGR24) {
@@ -1804,7 +1815,21 @@ static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, int wi
     );
 }
 
-static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, int width, enum PixelFormat srcFormat)
+static void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src,
+                             int width, uint32_t *unused)
+{
+    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
+}
+
+static void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src,
+                             int width, uint32_t *unused)
+{
+    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
+}
+
+static av_always_inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV,
+                                                   const uint8_t *src, int width,
+                                                   enum PixelFormat srcFormat)
 {
     __asm__ volatile(
         "movq                    24(%4), %%mm6       \n\t"
@@ -1862,23 +1887,17 @@ static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uin
     );
 }
 
-static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
-{
-    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
-}
-
-static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
+static void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV,
+                              const uint8_t *src1, const uint8_t *src2,
+                              int width, uint32_t *unused)
 {
     RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
     assert(src1 == src2);
 }
 
-static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
-{
-    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
-}
-
-static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
+static void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV,
+                              const uint8_t *src1, const uint8_t *src2,
+                              int width, uint32_t *unused)
 {
     assert(src1==src2);
     RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
@@ -1886,8 +1905,10 @@ static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
 
 #if !COMPILE_TEMPLATE_MMX2
 // bilinear / bicubic scaling
-static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
-                                  const int16_t *filter, const int16_t *filterPos, int filterSize)
+static void RENAME(hScale)(int16_t *dst, int dstW,
+                           const uint8_t *src, int srcW,
+                           int xInc, const int16_t *filter,
+                           const int16_t *filterPos, int filterSize)
 {
     assert(filterSize % 4 == 0 && filterSize>0);
     if (filterSize==4) { // Always true for upscaling, sometimes for down, too.
@@ -2042,9 +2063,9 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
 #endif /* !COMPILE_TEMPLATE_MMX2 */
 
 #if COMPILE_TEMPLATE_MMX2
-static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
-                                        int dstWidth, const uint8_t *src, int srcW,
-                                        int xInc)
+static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
+                                 int dstWidth, const uint8_t *src,
+                                 int srcW, int xInc)
 {
     int16_t *filterPos = c->hLumFilterPos;
     int16_t *filter    = c->hLumFilter;
@@ -2114,9 +2135,9 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
         dst[i] = src[srcW-1]*128;
 }
 
-static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
-                                        int dstWidth, const uint8_t *src1,
-                                        const uint8_t *src2, int srcW, int xInc)
+static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
+                                 int dstWidth, const uint8_t *src1,
+                                 const uint8_t *src2, int srcW, int xInc)
 {
     int16_t *filterPos = c->hChrFilterPos;
     int16_t *filter    = c->hChrFilter;
@@ -2177,7 +2198,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *d
 }
 #endif /* COMPILE_TEMPLATE_MMX2 */
 
-static void RENAME(sws_init_swScale)(SwsContext *c)
+static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
 {
     enum PixelFormat srcFormat = c->srcFormat,
                      dstFormat = c->dstFormat;
-- 
cgit v1.2.3


From 6e5a8d3c9affbb242e39cea29bd44003361504d4 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 8 Jun 2011 12:31:11 -0400
Subject: swscale: fix function declarations in swscale.c.

Remove inline keyword from functions that are never inlined.
Use av_always_inline for functions that should be force-inlined
for performance reasons. Use av_cold for init functions.
---
 libswscale/swscale.c | 67 ++++++++++++++++++++++++++--------------------------
 1 file changed, 34 insertions(+), 33 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index c2259ca294..b6561408e6 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -285,13 +285,13 @@ yuv2NBPS(10, LE, 0);
 yuv2NBPS(16, BE, 1);
 yuv2NBPS(16, LE, 0);
 
-static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
-                              const int16_t **lumSrc, int lumFilterSize,
-                              const int16_t *chrFilter, const int16_t **chrUSrc,
-                              const int16_t **chrVSrc,
-                              int chrFilterSize, const int16_t **alpSrc,
-                              uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                              uint8_t *aDest, int dstW, int chrDstW)
+static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
+                       const int16_t **lumSrc, int lumFilterSize,
+                       const int16_t *chrFilter, const int16_t **chrUSrc,
+                       const int16_t **chrVSrc,
+                       int chrFilterSize, const int16_t **alpSrc,
+                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                       uint8_t *aDest, int dstW, int chrDstW)
 {
     //FIXME Optimize (just quickly written not optimized..)
     int i;
@@ -327,16 +327,15 @@ static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
 
             aDest[i]= av_clip_uint8(val>>19);
         }
-
 }
 
-static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
-                               const int16_t **lumSrc, int lumFilterSize,
-                               const int16_t *chrFilter, const int16_t **chrUSrc,
-                               const int16_t **chrVSrc, int chrFilterSize,
-                               const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
-                               uint8_t *vDest, uint8_t *aDest,
-                               int dstW, int chrDstW)
+static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
+                        const int16_t **lumSrc, int lumFilterSize,
+                        const int16_t *chrFilter, const int16_t **chrUSrc,
+                        const int16_t **chrVSrc, int chrFilterSize,
+                        const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
+                        uint8_t *vDest, uint8_t *aDest,
+                        int dstW, int chrDstW)
 {
     enum PixelFormat dstFormat = c->dstFormat;
 
@@ -857,11 +856,11 @@ static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
     YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
 }
 
-static inline void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
-                                   const int16_t **lumSrc, int lumFilterSize,
-                                   const int16_t *chrFilter, const int16_t **chrUSrc,
-                                   const int16_t **chrVSrc, int chrFilterSize,
-                                   const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
+static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
+                            const int16_t **lumSrc, int lumFilterSize,
+                            const int16_t *chrFilter, const int16_t **chrUSrc,
+                            const int16_t **chrVSrc, int chrFilterSize,
+                            const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
 {
     int i;
     int step= c->dstFormatBpp/8;
@@ -943,7 +942,9 @@ static inline void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
     }
 }
 
-static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val)
+static av_always_inline void fillPlane(uint8_t* plane, int stride,
+                                       int width, int height,
+                                       int y, uint8_t val)
 {
     int i;
     uint8_t *ptr = plane + stride*y;
@@ -1469,12 +1470,12 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
 }
 
 // *** horizontal scale Y line to temp buffer
-static inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
-                           const uint8_t *src, int srcW, int xInc,
-                           const int16_t *hLumFilter,
-                           const int16_t *hLumFilterPos, int hLumFilterSize,
-                           uint8_t *formatConvBuffer,
-                           uint32_t *pal, int isAlpha)
+static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
+                                     const uint8_t *src, int srcW, int xInc,
+                                     const int16_t *hLumFilter,
+                                     const int16_t *hLumFilterPos, int hLumFilterSize,
+                                     uint8_t *formatConvBuffer,
+                                     uint32_t *pal, int isAlpha)
 {
     void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
     void (*convertRange)(uint16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
@@ -1509,11 +1510,11 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
     }
 }
 
-static inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
-                           const uint8_t *src1, const uint8_t *src2,
-                           int srcW, int xInc, const int16_t *hChrFilter,
-                           const int16_t *hChrFilterPos, int hChrFilterSize,
-                           uint8_t *formatConvBuffer, uint32_t *pal)
+static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
+                                     const uint8_t *src1, const uint8_t *src2,
+                                     int srcW, int xInc, const int16_t *hChrFilter,
+                                     const int16_t *hChrFilterPos, int hChrFilterSize,
+                                     uint8_t *formatConvBuffer, uint32_t *pal)
 {
     if (c->chrToYV12) {
         uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
@@ -1826,7 +1827,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
     return dstY - lastDstY;
 }
 
-static void sws_init_swScale_c(SwsContext *c)
+static av_cold void sws_init_swScale_c(SwsContext *c)
 {
     enum PixelFormat srcFormat = c->srcFormat;
 
-- 
cgit v1.2.3


From e458b536052a35fe3b0f221ca3ccb308faa84f35 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 7 Jun 2011 11:06:44 -0400
Subject: swscale: merge macros that are used only once.

This reduces source code size without affecting the binary.
---
 libswscale/swscale.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index b6561408e6..98331018c0 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -381,7 +381,7 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         }
 }
 
-#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
+#define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
     for (i=0; i<(dstW>>1); i++) {\
         int j;\
         int Y1 = 1<<18;\
@@ -413,10 +413,7 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             }\
             A1>>=19;\
             A2>>=19;\
-        }
-
-#define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
-        YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
+        }\
         if ((Y1|Y2|U|V)&256) {\
             if (Y1>255)   Y1=255; \
             else if (Y1<0)Y1=0;   \
@@ -432,7 +429,7 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             A2=av_clip_uint8(A2);\
         }
 
-#define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
+#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
     for (i=0; i<dstW; i++) {\
         int j;\
         int Y = 0;\
@@ -458,10 +455,7 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             A >>=19;\
             if (A&256)\
                 A = av_clip_uint8(A);\
-        }
-
-#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
-    YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
+        }\
         Y-= c->yuv2rgb_y_offset;\
         Y*= c->yuv2rgb_y_coeff;\
         Y+= rnd;\
-- 
cgit v1.2.3


From f1e0b90c640235916d5b2f98a3b2d5a9975b740f Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 7 Jun 2011 11:22:54 -0400
Subject: swscale: use standard clipping functions.

This generates better code on some non-x86 architectures.
---
 libswscale/swscale.c | 53 ++++++++++++++++++++--------------------------------
 1 file changed, 20 insertions(+), 33 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 98331018c0..9d5a373375 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -404,6 +404,12 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         Y2>>=19;\
         U >>=19;\
         V >>=19;\
+        if ((Y1|Y2|U|V)&0x100) {\
+            Y1 = av_clip_uint8(Y1); \
+            Y2 = av_clip_uint8(Y2); \
+            U  = av_clip_uint8(U); \
+            V  = av_clip_uint8(V); \
+        }\
         if (alpha) {\
             A1 = 1<<18;\
             A2 = 1<<18;\
@@ -413,20 +419,10 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             }\
             A1>>=19;\
             A2>>=19;\
-        }\
-        if ((Y1|Y2|U|V)&256) {\
-            if (Y1>255)   Y1=255; \
-            else if (Y1<0)Y1=0;   \
-            if (Y2>255)   Y2=255; \
-            else if (Y2<0)Y2=0;   \
-            if (U>255)    U=255;  \
-            else if (U<0) U=0;    \
-            if (V>255)    V=255;  \
-            else if (V<0) V=0;    \
-        }\
-        if (alpha && ((A1|A2)&256)) {\
-            A1=av_clip_uint8(A1);\
-            A2=av_clip_uint8(A2);\
+            if ((A1|A2)&0x100) {\
+                A1 = av_clip_uint8(A1); \
+                A2 = av_clip_uint8(A2); \
+            }\
         }
 
 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
@@ -453,7 +449,7 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             for (j=0; j<lumFilterSize; j++)\
                 A += alpSrc[j][i     ] * lumFilter[j];\
             A >>=19;\
-            if (A&256)\
+            if (A&0x100)\
                 A = av_clip_uint8(A);\
         }\
         Y-= c->yuv2rgb_y_offset;\
@@ -463,12 +459,9 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
         B= Y +                          U*c->yuv2rgb_u2b_coeff;\
         if ((R|G|B)&(0xC0000000)) {\
-            if (R>=(256<<22))   R=(256<<22)-1; \
-            else if (R<0)R=0;   \
-            if (G>=(256<<22))   G=(256<<22)-1; \
-            else if (G<0)G=0;   \
-            if (B>=(256<<22))   B=(256<<22)-1; \
-            else if (B<0)B=0;   \
+            R = av_clip_uintp2(R, 30); \
+            G = av_clip_uintp2(G, 30); \
+            B = av_clip_uintp2(B, 30); \
         }
 
 #define YSCALE_YUV_2_GRAY16_C \
@@ -476,8 +469,6 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         int j;\
         int Y1 = 1<<18;\
         int Y2 = 1<<18;\
-        int U  = 1<<18;\
-        int V  = 1<<18;\
         \
         const int i2= 2*i;\
         \
@@ -487,11 +478,9 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         }\
         Y1>>=11;\
         Y2>>=11;\
-        if ((Y1|Y2|U|V)&65536) {\
-            if (Y1>65535)   Y1=65535; \
-            else if (Y1<0)Y1=0;   \
-            if (Y2>65535)   Y2=65535; \
-            else if (Y2<0)Y2=0;   \
+        if ((Y1|Y2)&0x1000) {\
+            Y1 = av_clip_uint16(Y1); \
+            Y2 = av_clip_uint16(Y2); \
         }
 
 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
@@ -604,11 +593,9 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         }\
         Y1>>=19;\
         Y2>>=19;\
-        if ((Y1|Y2)&256) {\
-            if (Y1>255)   Y1=255;\
-            else if (Y1<0)Y1=0;\
-            if (Y2>255)   Y2=255;\
-            else if (Y2<0)Y2=0;\
+        if ((Y1|Y2)&0x100) {\
+            Y1 = av_clip_uint8(Y1); \
+            Y2 = av_clip_uint8(Y2); \
         }\
         acc+= acc + g[Y1+d128[(i+0)&7]];\
         acc+= acc + g[Y2+d128[(i+1)&7]];\
-- 
cgit v1.2.3


From aa39f5f6d61c8c2640dd39520419264ffa1850de Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 8 Jun 2011 13:29:09 -0400
Subject: swscale: extract gray16 output functions from yuv2packed[12X]().

This is part of the Great Evil Plan to simplify swscale.
---
 libswscale/swscale.c | 197 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 142 insertions(+), 55 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 9d5a373375..7398043e9e 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -381,6 +381,126 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         }
 }
 
+static av_always_inline void
+yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
+                        const int16_t **lumSrc, int lumFilterSize,
+                        const int16_t *chrFilter, const int16_t **chrUSrc,
+                        const int16_t **chrVSrc, int chrFilterSize,
+                        const int16_t **alpSrc, uint8_t *dest, int dstW,
+                        int y, enum PixelFormat target)
+{
+    int i;
+
+#define output_pixel(pos, val) \
+        if (target == PIX_FMT_GRAY16BE) { \
+            AV_WB16(pos, val); \
+        } else { \
+            AV_WL16(pos, val); \
+        }
+    for (i = 0; i < (dstW >> 1); i++) {
+        int j;
+        int Y1 = 1 << 18;
+        int Y2 = 1 << 18;
+        const int i2 = 2 * i;
+
+        for (j = 0; j < lumFilterSize; j++) {
+            Y1 += lumSrc[j][i2]   * lumFilter[j];
+            Y2 += lumSrc[j][i2+1] * lumFilter[j];
+        }
+        Y1 >>= 11;
+        Y2 >>= 11;
+        if ((Y1 | Y2) & 0x10000) {
+            Y1 = av_clip_uint16(Y1);
+            Y2 = av_clip_uint16(Y2);
+        }
+        output_pixel(&dest[2 * i2 + 0], Y1);
+        output_pixel(&dest[2 * i2 + 2], Y2);
+    }
+}
+
+static av_always_inline void
+yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0,
+                        const uint16_t *buf1, const uint16_t *ubuf0,
+                        const uint16_t *ubuf1, const uint16_t *vbuf0,
+                        const uint16_t *vbuf1, const uint16_t *abuf0,
+                        const uint16_t *abuf1, uint8_t *dest, int dstW,
+                        int yalpha, int uvalpha, int y,
+                        enum PixelFormat target)
+{
+    int  yalpha1 = 4095 - yalpha; \
+    int i;
+
+    for (i = 0; i < (dstW >> 1); i++) {
+        const int i2 = 2 * i;
+        int Y1 = (buf0[i2  ] * yalpha1 + buf1[i2  ] * yalpha) >> 11;
+        int Y2 = (buf0[i2+1] * yalpha1 + buf1[i2+1] * yalpha) >> 11;
+
+        output_pixel(&dest[2 * i2 + 0], Y1);
+        output_pixel(&dest[2 * i2 + 2], Y2);
+    }
+}
+
+static av_always_inline void
+yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0,
+                        const uint16_t *ubuf0, const uint16_t *ubuf1,
+                        const uint16_t *vbuf0, const uint16_t *vbuf1,
+                        const uint16_t *abuf0, uint8_t *dest, int dstW,
+                        int uvalpha, enum PixelFormat dstFormat,
+                        int flags, int y, enum PixelFormat target)
+{
+    int i;
+
+    for (i = 0; i < (dstW >> 1); i++) {
+        const int i2 = 2 * i;
+        int Y1 = buf0[i2  ] << 1;
+        int Y2 = buf0[i2+1] << 1;
+
+        output_pixel(&dest[2 * i2 + 0], Y1);
+        output_pixel(&dest[2 * i2 + 2], Y2);
+    }
+#undef output_pixel
+}
+
+#define YUV2PACKEDWRAPPER(name, ext, fmt) \
+static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
+                        const int16_t **lumSrc, int lumFilterSize, \
+                        const int16_t *chrFilter, const int16_t **chrUSrc, \
+                        const int16_t **chrVSrc, int chrFilterSize, \
+                        const int16_t **alpSrc, uint8_t *dest, int dstW, \
+                        int y) \
+{ \
+    name ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
+                          chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+                          alpSrc, dest, dstW, y, fmt); \
+} \
+ \
+static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
+                        const uint16_t *buf1, const uint16_t *ubuf0, \
+                        const uint16_t *ubuf1, const uint16_t *vbuf0, \
+                        const uint16_t *vbuf1, const uint16_t *abuf0, \
+                        const uint16_t *abuf1, uint8_t *dest, int dstW, \
+                        int yalpha, int uvalpha, int y) \
+{ \
+    name ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
+                          vbuf0, vbuf1, abuf0, abuf1, \
+                          dest, dstW, yalpha, uvalpha, y, fmt); \
+} \
+ \
+static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
+                        const uint16_t *ubuf0, const uint16_t *ubuf1, \
+                        const uint16_t *vbuf0, const uint16_t *vbuf1, \
+                        const uint16_t *abuf0, uint8_t *dest, int dstW, \
+                        int uvalpha, enum PixelFormat dstFormat, \
+                        int flags, int y) \
+{ \
+    name ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
+                          vbuf1, abuf0, dest, dstW, uvalpha, \
+                          dstFormat, flags, y, fmt); \
+}
+
+YUV2PACKEDWRAPPER(yuv2gray16, LE, PIX_FMT_GRAY16LE);
+YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
+
 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
     for (i=0; i<(dstW>>1); i++) {\
         int j;\
@@ -464,25 +584,6 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             B = av_clip_uintp2(B, 30); \
         }
 
-#define YSCALE_YUV_2_GRAY16_C \
-    for (i=0; i<(dstW>>1); i++) {\
-        int j;\
-        int Y1 = 1<<18;\
-        int Y2 = 1<<18;\
-        \
-        const int i2= 2*i;\
-        \
-        for (j=0; j<lumFilterSize; j++) {\
-            Y1 += lumSrc[j][i2] * lumFilter[j];\
-            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
-        }\
-        Y1>>=11;\
-        Y2>>=11;\
-        if ((Y1|Y2)&0x1000) {\
-            Y1 = av_clip_uint16(Y1); \
-            Y2 = av_clip_uint16(Y2); \
-        }
-
 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
     YSCALE_YUV_2_PACKEDX_C(type,alpha)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
     r = (type *)c->table_rV[V];   \
@@ -503,12 +604,6 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19;         \
         }
 
-#define YSCALE_YUV_2_GRAY16_2_C   \
-    for (i=0; i<(dstW>>1); i++) { \
-        const int i2= 2*i;       \
-        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>11;           \
-        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;
-
 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
     YSCALE_YUV_2_PACKED2_C(type,alpha)\
     r = (type *)c->table_rV[V];\
@@ -529,12 +624,6 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             A2= abuf0[i2+1]>>7;\
         }
 
-#define YSCALE_YUV_2_GRAY16_1_C \
-    for (i=0; i<(dstW>>1); i++) {\
-        const int i2= 2*i;\
-        int Y1= buf0[i2  ]<<1;\
-        int Y2= buf0[i2+1]<<1;
-
 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
     YSCALE_YUV_2_PACKED1_C(type,alpha)\
     r = (type *)c->table_rV[V];\
@@ -605,7 +694,7 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
         }\
     }
 
-#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
+#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_monoblack)\
     switch(c->dstFormat) {\
     case PIX_FMT_RGB48BE:\
     case PIX_FMT_RGB48LE:\
@@ -809,22 +898,6 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
             ((uint8_t*)dest)[2*i2+3]= Y2;\
         }                \
         break;\
-    case PIX_FMT_GRAY16BE:\
-        func_g16\
-            ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
-            ((uint8_t*)dest)[2*i2+1]= Y1;\
-            ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
-            ((uint8_t*)dest)[2*i2+3]= Y2;\
-        }                \
-        break;\
-    case PIX_FMT_GRAY16LE:\
-        func_g16\
-            ((uint8_t*)dest)[2*i2+0]= Y1;\
-            ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
-            ((uint8_t*)dest)[2*i2+2]= Y2;\
-            ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
-        }                \
-        break;\
     }
 
 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
@@ -834,7 +907,7 @@ static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
                           const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
 {
     int i;
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
+    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_MONOX_C)
 }
 
 static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
@@ -1158,7 +1231,7 @@ static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
     int uvalpha1=4095-uvalpha;
     int i;
 
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
+    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_MONO2_C)
 }
 
 /**
@@ -1178,9 +1251,9 @@ static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
     const int yalpha= 4096; //FIXME ...
 
     if (uvalpha < 2048) {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_MONO2_C)
     } else {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_MONO2_C)
     }
 }
 
@@ -1540,9 +1613,23 @@ find_c_packed_planar_out_funcs(SwsContext *c,
     if(c->flags & SWS_FULL_CHR_H_INT) {
         *yuv2packedX = yuv2rgbX_c_full;
     } else {
-        *yuv2packed1  = yuv2packed1_c;
-        *yuv2packed2  = yuv2packed2_c;
-        *yuv2packedX  = yuv2packedX_c;
+        switch (dstFormat) {
+        case PIX_FMT_GRAY16BE:
+            *yuv2packed1 = yuv2gray16BE_1_c;
+            *yuv2packed2 = yuv2gray16BE_2_c;
+            *yuv2packedX = yuv2gray16BE_X_c;
+            break;
+        case PIX_FMT_GRAY16LE:
+            *yuv2packed1 = yuv2gray16LE_1_c;
+            *yuv2packed2 = yuv2gray16LE_2_c;
+            *yuv2packedX = yuv2gray16LE_X_c;
+            break;
+        default:
+            *yuv2packed1 = yuv2packed1_c;
+            *yuv2packed2 = yuv2packed2_c;
+            *yuv2packedX = yuv2packedX_c;
+            break;
+        }
     }
 }
 
-- 
cgit v1.2.3


From f30ee65700cc2def6447de09c91afa3f7ecc7639 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 8 Jun 2011 14:50:49 -0400
Subject: swscale: change 9/10bit YUV input macros to inline functions.

Inline functions are slightly larger in source code, but
are easier to handle in source code editors. The binary code
generated is the same.
---
 libswscale/swscale.c | 64 +++++++++++++++++++++++++++++++++-------------------
 1 file changed, 41 insertions(+), 23 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 7398043e9e..b505c0cec0 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1344,33 +1344,51 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
 }
 
 // FIXME Maybe dither instead.
-#define YUV_NBPS(depth, endianness, rfunc) \
-static void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
-                                          const uint8_t *_srcU, const uint8_t *_srcV, \
-                                          int width, uint32_t *unused) \
+static av_always_inline void
+yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
+                          const uint8_t *_srcU, const uint8_t *_srcV,
+                          int width, enum PixelFormat origin, int depth)
+{
+    int i;
+    const uint16_t *srcU = (const uint16_t *) _srcU;
+    const uint16_t *srcV = (const uint16_t *) _srcV;
+
+#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
+    for (i = 0; i < width; i++) {
+        dstU[i] = input_pixel(&srcU[i]) >> (depth - 8);
+        dstV[i] = input_pixel(&srcV[i]) >> (depth - 8);
+    }
+}
+
+static av_always_inline void
+yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY,
+                         int width, enum PixelFormat origin, int depth)
+{
+    int i;
+    const uint16_t *srcY = (const uint16_t*)_srcY;
+
+    for (i = 0; i < width; i++)
+        dstY[i] = input_pixel(&srcY[i]) >> (depth - 8);
+#undef input_pixel
+}
+
+#define YUV_NBPS(depth, BE_LE, origin) \
+static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
+                                     const uint8_t *srcU, const uint8_t *srcV, \
+                                     int width, uint32_t *unused) \
 { \
-    int i; \
-    const uint16_t *srcU = (const uint16_t*)_srcU; \
-    const uint16_t *srcV = (const uint16_t*)_srcV; \
-    for (i = 0; i < width; i++) { \
-        dstU[i] = rfunc(&srcU[i])>>(depth-8); \
-        dstV[i] = rfunc(&srcV[i])>>(depth-8); \
-    } \
+    yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
 } \
-\
-static void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, \
-                                         int width, uint32_t *unused) \
+static void BE_LE ## depth ## ToY_c(uint8_t *dstY, const uint8_t *srcY, \
+                                    int width, uint32_t *unused) \
 { \
-    int i; \
-    const uint16_t *srcY = (const uint16_t*)_srcY; \
-    for (i = 0; i < width; i++) \
-        dstY[i] = rfunc(&srcY[i])>>(depth-8); \
-} \
+    yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
+}
 
-YUV_NBPS( 9, LE, AV_RL16)
-YUV_NBPS( 9, BE, AV_RB16)
-YUV_NBPS(10, LE, AV_RL16)
-YUV_NBPS(10, BE, AV_RB16)
+YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
+YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
+YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
+YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
 
 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
                        int width, uint32_t *unused)
-- 
cgit v1.2.3


From dbd3183935e252aaf5796638d4711cff27c75934 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Wed, 8 Jun 2011 15:12:32 -0400
Subject: swscale: change 48bit RGB input macros to inline functions.

Inline functions are slightly larger in source code, but
are easier to handle in source code editors. The binary code
generated is the same.
---
 libswscale/swscale.c | 115 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 71 insertions(+), 44 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index b505c0cec0..be42bcfbef 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1008,58 +1008,85 @@ static av_always_inline void fillPlane(uint8_t* plane, int stride,
     }
 }
 
-#define rgb48funcs(LE_BE, rfunc, compA, compB, compC) \
-static void compA ## compB ## compC ## 48 ## LE_BE ## ToY_c( \
-                       uint8_t *dst, const uint8_t *src, int width, \
-                       uint32_t *unused) \
+static av_always_inline void
+rgb48ToY_c_template(uint8_t *dst, const uint8_t *src, int width,
+                    enum PixelFormat origin)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
+        int a = input_pixel(&src[i*6+0]) >> 8;
+        int g = input_pixel(&src[i*6+2]) >> 8;
+        int c = input_pixel(&src[i*6+4]) >> 8;
+
+#define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? c : a)
+#define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? a : c)
+        dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+    }
+}
+
+static av_always_inline void
+rgb48ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
+                    const uint8_t *src1, const uint8_t *src2,
+                    int width, enum PixelFormat origin)
+{
+    int i;
+    assert(src1==src2);
+    for (i = 0; i < width; i++) {
+        int a = input_pixel(&src1[6*i + 0]) >> 8;
+        int g = input_pixel(&src1[6*i + 2]) >> 8;
+        int c = input_pixel(&src1[6*i + 4]) >> 8;
+
+        dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+        dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+    }
+}
+
+static av_always_inline void
+rgb48ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
+                          const uint8_t *src1, const uint8_t *src2,
+                          int width, enum PixelFormat origin)
+{
+    int i;
+    assert(src1==src2);
+    for (i = 0; i < width; i++) {
+        int a = (input_pixel(&src1[12*i + 0]) >> 8) + (input_pixel(&src1[12*i + 6]) >> 8);
+        int g = (input_pixel(&src1[12*i + 2]) >> 8) + (input_pixel(&src1[12*i + 8]) >> 8);
+        int c = (input_pixel(&src1[12*i + 4]) >> 8) + (input_pixel(&src1[12*i + 10]) >> 8);
+
+        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
+        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
+    }
+#undef r
+#undef b
+#undef input_pixel
+}
+
+#define rgb48funcs(pattern, BE_LE, origin) \
+static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *dst, const uint8_t *src, \
+                                    int width, uint32_t *unused) \
 { \
-    int i; \
-    for (i = 0; i < width; i++) { \
-        int compA = rfunc(&src[i*6+0]) >> 8; \
-        int compB = rfunc(&src[i*6+2]) >> 8; \
-        int compC = rfunc(&src[i*6+4]) >> 8; \
- \
-        dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
-    } \
+    rgb48ToY_c_template(dst, src, width, origin); \
 } \
  \
-static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_c( \
-                        uint8_t *dstU, uint8_t *dstV, \
-                        const uint8_t *src1, const uint8_t *src2, \
-                        int width, uint32_t *unused) \
+static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
+                                    const uint8_t *src1, const uint8_t *src2, \
+                                    int width, uint32_t *unused) \
 { \
-    int i; \
-    assert(src1==src2); \
-    for (i = 0; i < width; i++) { \
-        int compA = rfunc(&src1[6*i + 0]) >> 8; \
-        int compB = rfunc(&src1[6*i + 2]) >> 8; \
-        int compC = rfunc(&src1[6*i + 4]) >> 8; \
- \
-        dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
-        dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; \
-    } \
+    rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
 } \
  \
-static void compA ## compB ## compC ## 48 ## LE_BE ## ToUV_half_c( \
-                            uint8_t *dstU, uint8_t *dstV, \
-                            const uint8_t *src1, const uint8_t *src2, \
-                            int width, uint32_t *unused) \
+static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
+                                    const uint8_t *src1, const uint8_t *src2, \
+                                    int width, uint32_t *unused) \
 { \
-    int i; \
-    assert(src1==src2); \
-    for (i = 0; i < width; i++) { \
-        int compA = (rfunc(&src1[12*i + 0]) >> 8) + (rfunc(&src1[12*i + 6]) >> 8); \
-        int compB = (rfunc(&src1[12*i + 2]) >> 8) + (rfunc(&src1[12*i + 8]) >> 8); \
-        int compC = (rfunc(&src1[12*i + 4]) >> 8) + (rfunc(&src1[12*i + 10]) >> 8); \
- \
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1); \
-    } \
+    rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
 }
-rgb48funcs(LE, AV_RL16, r, g, b);
-rgb48funcs(BE, AV_RB16, r, g, b);
-rgb48funcs(LE, AV_RL16, b, g, r);
-rgb48funcs(BE, AV_RB16, b, g, r);
+
+rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
+rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
+rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
+rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
 
 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
 static void name ## _c(uint8_t *dst, const uint8_t *src, \
-- 
cgit v1.2.3


From e2babb9b47072ca586b4414680b8d7abf85a16e5 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 9 Jun 2011 10:00:09 -0400
Subject: swscale: rearrange code.

This way the code in the file is less cluttered all-over-the-
place.
---
 libswscale/swscale.c | 166 +++++++++++++++++++++++++--------------------------
 1 file changed, 83 insertions(+), 83 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index be42bcfbef..68508a9741 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -329,6 +329,33 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
         }
 }
 
+static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
+                       const int16_t *chrUSrc, const int16_t *chrVSrc,
+                       const int16_t *alpSrc,
+                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
+                       uint8_t *aDest, int dstW, int chrDstW)
+{
+    int i;
+    for (i=0; i<dstW; i++) {
+        int val= (lumSrc[i]+64)>>7;
+        dest[i]= av_clip_uint8(val);
+    }
+
+    if (uDest)
+        for (i=0; i<chrDstW; i++) {
+            int u=(chrUSrc[i]+64)>>7;
+            int v=(chrVSrc[i]+64)>>7;
+            uDest[i]= av_clip_uint8(u);
+            vDest[i]= av_clip_uint8(v);
+        }
+
+    if (CONFIG_SWSCALE_ALPHA && aDest)
+        for (i=0; i<dstW; i++) {
+            int val= (alpSrc[i]+64)>>7;
+            aDest[i]= av_clip_uint8(val);
+        }
+}
+
 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
                         const int16_t **lumSrc, int lumFilterSize,
                         const int16_t *chrFilter, const int16_t **chrUSrc,
@@ -996,6 +1023,46 @@ static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
     }
 }
 
+/**
+ * vertical bilinear scale YV12 to RGB
+ */
+static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
+                          const uint16_t *buf1, const uint16_t *ubuf0,
+                          const uint16_t *ubuf1, const uint16_t *vbuf0,
+                          const uint16_t *vbuf1, const uint16_t *abuf0,
+                          const uint16_t *abuf1, uint8_t *dest, int dstW,
+                          int yalpha, int uvalpha, int y)
+{
+    int  yalpha1=4095- yalpha;
+    int uvalpha1=4095-uvalpha;
+    int i;
+
+    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_MONO2_C)
+}
+
+/**
+ * YV12 to RGB without scaling or interpolating
+ */
+static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
+                          const uint16_t *ubuf0, const uint16_t *ubuf1,
+                          const uint16_t *vbuf0, const uint16_t *vbuf1,
+                          const uint16_t *abuf0, uint8_t *dest, int dstW,
+                          int uvalpha, enum PixelFormat dstFormat,
+                          int flags, int y)
+{
+    const int yalpha1=0;
+    int i;
+
+    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int yalpha= 4096; //FIXME ...
+
+    if (uvalpha < 2048) {
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_MONO2_C)
+    } else {
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_MONO2_C)
+    }
+}
+
 static av_always_inline void fillPlane(uint8_t* plane, int stride,
                                        int width, int height,
                                        int y, uint8_t val)
@@ -1111,22 +1178,6 @@ BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY
 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
 
-static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        dst[i]= src[4*i];
-    }
-}
-
-static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
-{
-    int i;
-    for (i=0; i<width; i++) {
-        dst[i]= src[4*i+3];
-    }
-}
-
 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
 static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
                        const uint8_t *src, const uint8_t *dummy, \
@@ -1171,6 +1222,22 @@ BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<
 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
 
+static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        dst[i]= src[4*i];
+    }
+}
+
+static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        dst[i]= src[4*i+3];
+    }
+}
+
 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
 {
     int i;
@@ -1217,73 +1284,6 @@ static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
     }
 }
 
-static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
-                       const int16_t *chrUSrc, const int16_t *chrVSrc,
-                       const int16_t *alpSrc,
-                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                       uint8_t *aDest, int dstW, int chrDstW)
-{
-    int i;
-    for (i=0; i<dstW; i++) {
-        int val= (lumSrc[i]+64)>>7;
-        dest[i]= av_clip_uint8(val);
-    }
-
-    if (uDest)
-        for (i=0; i<chrDstW; i++) {
-            int u=(chrUSrc[i]+64)>>7;
-            int v=(chrVSrc[i]+64)>>7;
-            uDest[i]= av_clip_uint8(u);
-            vDest[i]= av_clip_uint8(v);
-        }
-
-    if (CONFIG_SWSCALE_ALPHA && aDest)
-        for (i=0; i<dstW; i++) {
-            int val= (alpSrc[i]+64)>>7;
-            aDest[i]= av_clip_uint8(val);
-        }
-}
-
-/**
- * vertical bilinear scale YV12 to RGB
- */
-static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
-                          const uint16_t *buf1, const uint16_t *ubuf0,
-                          const uint16_t *ubuf1, const uint16_t *vbuf0,
-                          const uint16_t *vbuf1, const uint16_t *abuf0,
-                          const uint16_t *abuf1, uint8_t *dest, int dstW,
-                          int yalpha, int uvalpha, int y)
-{
-    int  yalpha1=4095- yalpha;
-    int uvalpha1=4095-uvalpha;
-    int i;
-
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_MONO2_C)
-}
-
-/**
- * YV12 to RGB without scaling or interpolating
- */
-static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
-                          const uint16_t *ubuf0, const uint16_t *ubuf1,
-                          const uint16_t *vbuf0, const uint16_t *vbuf1,
-                          const uint16_t *abuf0, uint8_t *dest, int dstW,
-                          int uvalpha, enum PixelFormat dstFormat,
-                          int flags, int y)
-{
-    const int yalpha1=0;
-    int i;
-
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
-    const int yalpha= 4096; //FIXME ...
-
-    if (uvalpha < 2048) {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_MONO2_C)
-    } else {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_MONO2_C)
-    }
-}
-
 //FIXME yuy2* can read up to 7 samples too much
 
 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
-- 
cgit v1.2.3


From 6d4d483eee04d78ba021e84aec2ff75d9fc3a236 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 9 Jun 2011 13:59:21 -0400
Subject: swscale: de-macro'ify RGB15/16/32 input functions.

Inline functions are easier to read, maintain, modify and test,
which justifies the slightly increased source size. This patch
also adds support for non-native endianness RGB15/16 and fixes
isSupportedOutput() to no longer claim that we support writing
non-native RGB565/555/444.
---
 libswscale/swscale.c | 278 +++++++++++++++++++++++++++++++--------------------
 libswscale/utils.c   |  29 +++++-
 2 files changed, 193 insertions(+), 114 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 68508a9741..3659b42725 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -819,10 +819,8 @@ YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
             dest+=6;\
         }\
         break;\
-    case PIX_FMT_RGB565BE:\
-    case PIX_FMT_RGB565LE:\
-    case PIX_FMT_BGR565BE:\
-    case PIX_FMT_BGR565LE:\
+    case PIX_FMT_RGB565:\
+    case PIX_FMT_BGR565:\
         {\
             const int dr1= dither_2x2_8[y&1    ][0];\
             const int dg1= dither_2x2_4[y&1    ][0];\
@@ -836,10 +834,8 @@ YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
             }\
         }\
         break;\
-    case PIX_FMT_RGB555BE:\
-    case PIX_FMT_RGB555LE:\
-    case PIX_FMT_BGR555BE:\
-    case PIX_FMT_BGR555LE:\
+    case PIX_FMT_RGB555:\
+    case PIX_FMT_BGR555:\
         {\
             const int dr1= dither_2x2_8[y&1    ][0];\
             const int dg1= dither_2x2_8[y&1    ][1];\
@@ -853,10 +849,8 @@ YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
             }\
         }\
         break;\
-    case PIX_FMT_RGB444BE:\
-    case PIX_FMT_RGB444LE:\
-    case PIX_FMT_BGR444BE:\
-    case PIX_FMT_BGR444LE:\
+    case PIX_FMT_RGB444:\
+    case PIX_FMT_BGR444:\
         {\
             const int dr1= dither_4x4_16[y&3    ][0];\
             const int dg1= dither_4x4_16[y&3    ][1];\
@@ -1155,72 +1149,126 @@ rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
 
-#define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
-static void name ## _c(uint8_t *dst, const uint8_t *src, \
-                       int width, uint32_t *unused)\
-{\
-    int i;\
-    for (i=0; i<width; i++) {\
-        int b= (((const type*)src)[i]>>shb)&maskb;\
-        int g= (((const type*)src)[i]>>shg)&maskg;\
-        int r= (((const type*)src)[i]>>shr)&maskr;\
-\
-        dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
-    }\
+static av_always_inline void
+rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
+                       int width, enum PixelFormat origin,
+                       int shr,   int shg,   int shb, int shp,
+                       int maskr, int maskg, int maskb,
+                       int rsh,   int gsh,   int bsh, int S)
+{
+    const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
+              rnd = 33 << (S - 1);
+    int i;
+
+    for (i = 0; i < width; i++) {
+#define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
+                         origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
+                        (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
+        int px = input_pixel(i) >> shp;
+        int b = (px & maskb) >> shb;
+        int g = (px & maskg) >> shg;
+        int r = (px & maskr) >> shr;
+
+        dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
+    }
 }
 
-BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
-BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY    , GY<<8, BY    , RGB2YUV_SHIFT+8)
-BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
-BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY    , GY<<8, BY    , RGB2YUV_SHIFT+8)
-BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY    , RGB2YUV_SHIFT+8)
-BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY    , RGB2YUV_SHIFT+7)
-BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
-BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
-
-#define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
-static void name ## _c(uint8_t *dstU, uint8_t *dstV, \
-                       const uint8_t *src, const uint8_t *dummy, \
-                       int width, uint32_t *unused)\
-{\
-    int i;\
-    for (i=0; i<width; i++) {\
-        int b= ((((const type*)src)[i]>>shp)&maskb)>>shb;\
-        int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
-        int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
-\
-        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
-        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
-    }\
-}\
-static void name ## _half_c(uint8_t *dstU, uint8_t *dstV, \
-                            const uint8_t *src, const uint8_t *dummy, \
-                            int width, uint32_t *unused)\
-{\
-    int i;\
-    for (i=0; i<width; i++) {\
-        int pix0= ((const type*)src)[2*i+0]>>shp;\
-        int pix1= ((const type*)src)[2*i+1]>>shp;\
-        int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
-        int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
-        int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
-        g&= maskg|(2*maskg);\
-\
-        g>>=shg;\
-\
-        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
-        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
-    }\
+static av_always_inline void
+rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
+                        const uint8_t *src, int width,
+                        enum PixelFormat origin,
+                        int shr,   int shg,   int shb, int shp,
+                        int maskr, int maskg, int maskb,
+                        int rsh,   int gsh,   int bsh, int S)
+{
+    const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
+              rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
+              rnd = 257 << (S - 1);
+    int i;
+
+    for (i = 0; i < width; i++) {
+        int px = input_pixel(i) >> shp;
+        int b = (px & maskb) >> shb;
+        int g = (px & maskg) >> shg;
+        int r = (px & maskr) >> shr;
+
+        dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
+        dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
+    }
+}
+
+static av_always_inline void
+rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
+                             const uint8_t *src, int width,
+                             enum PixelFormat origin,
+                             int shr,   int shg,   int shb, int shp,
+                             int maskr, int maskg, int maskb,
+                             int rsh,   int gsh,   int bsh, int S)
+{
+    const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
+              rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
+              rnd = 257 << S, maskgx = ~(maskr | maskb);
+    int i;
+
+    maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
+    for (i = 0; i < width; i++) {
+        int px0 = input_pixel(2 * i + 0) >> shp;
+        int px1 = input_pixel(2 * i + 1) >> shp;
+        int b, r, g = (px0 & maskgx) + (px1 & maskgx);
+        int rb = px0 + px1 - g;
+
+        b = (rb & maskb) >> shb;
+        if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
+            origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
+            g >>= shg;
+        } else {
+            g = (g  & maskg) >> shg;
+        }
+        r = (rb & maskr) >> shr;
+
+        dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
+        dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
+    }
+#undef input_pixel
+}
+
+#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
+                         maskg, maskb, rsh, gsh, bsh, S) \
+static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
+                          int width, uint32_t *unused) \
+{ \
+    rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
+                           maskr, maskg, maskb, rsh, gsh, bsh, S); \
+} \
+ \
+static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
+                           const uint8_t *src, const uint8_t *dummy, \
+                           int width, uint32_t *unused) \
+{ \
+    rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
+                            maskr, maskg, maskb, rsh, gsh, bsh, S); \
+} \
+ \
+static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
+                                const uint8_t *src, const uint8_t *dummy, \
+                                int width, uint32_t *unused) \
+{ \
+    rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
+                                 maskr, maskg, maskb, rsh, gsh, bsh, S); \
 }
 
-BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00,   0x00FF, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
-BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00,   0x00FF, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
-BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0,   0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
-BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8,   0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
-BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0,   0x001F, 0x07E0,   0xF800, RU<<11, GU<<5, BU    , RV<<11, GV<<5, BV    , RGB2YUV_SHIFT+8)
-BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<5, BU    , RV<<10, GV<<5, BV    , RGB2YUV_SHIFT+7)
-BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
-BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
+rgb16_32_wrapper(PIX_FMT_BGR32,    bgr32,  16, 0,  0, 0, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_BGR32_1,  bgr321, 16, 0,  0, 8, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_RGB32,    rgb32,   0, 0, 16, 0,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_RGB32_1,  rgb321,  0, 0, 16, 8,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT+7);
+rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7);
+rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT+7);
+rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7);
 
 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
@@ -1979,37 +2027,45 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
     }
     if (c->chrSrcHSubSample) {
         switch(srcFormat) {
-        case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half_c; break;
-        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half_c; break;
-        case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half_c; break;
-        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half_c; break;
-        case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_half_c;  break;
-        case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break;
-        case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_half_c; break;
-        case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break;
-        case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break;
-        case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV_half_c;  break;
-        case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break;
-        case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_half_c; break;
-        case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break;
-        case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break;
+        case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
+        case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
+        case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
+        case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
+        case PIX_FMT_RGB32   : c->chrToYV12 = bgr32ToUV_half_c;   break;
+        case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c;  break;
+        case PIX_FMT_BGR24   : c->chrToYV12 = bgr24ToUV_half_c;   break;
+        case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
+        case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
+        case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
+        case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
+        case PIX_FMT_BGR32   : c->chrToYV12 = rgb32ToUV_half_c;   break;
+        case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c;  break;
+        case PIX_FMT_RGB24   : c->chrToYV12 = rgb24ToUV_half_c;   break;
+        case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
+        case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
+        case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
+        case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
         }
     } else {
         switch(srcFormat) {
-        case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_c; break;
-        case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_c; break;
-        case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_c; break;
-        case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_c; break;
-        case PIX_FMT_RGB32  : c->chrToYV12 = bgr32ToUV_c;  break;
-        case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break;
-        case PIX_FMT_BGR24  : c->chrToYV12 = bgr24ToUV_c; break;
-        case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break;
-        case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break;
-        case PIX_FMT_BGR32  : c->chrToYV12 = rgb32ToUV_c;  break;
-        case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break;
-        case PIX_FMT_RGB24  : c->chrToYV12 = rgb24ToUV_c; break;
-        case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break;
-        case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break;
+        case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
+        case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
+        case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
+        case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
+        case PIX_FMT_RGB32   : c->chrToYV12 = bgr32ToUV_c;   break;
+        case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c;  break;
+        case PIX_FMT_BGR24   : c->chrToYV12 = bgr24ToUV_c;   break;
+        case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
+        case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
+        case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
+        case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
+        case PIX_FMT_BGR32   : c->chrToYV12 = rgb32ToUV_c;   break;
+        case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c;  break;
+        case PIX_FMT_RGB24   : c->chrToYV12 = rgb24ToUV_c;   break;
+        case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
+        case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
+        case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
+        case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
         }
     }
 
@@ -2030,13 +2086,17 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
     case PIX_FMT_YUV420P16LE:
     case PIX_FMT_YUV422P16LE:
     case PIX_FMT_YUV444P16LE:
-    case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c; break;
-    case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c; break;
-    case PIX_FMT_BGR565   : c->lumToYV12 = bgr16ToY_c; break;
-    case PIX_FMT_BGR555   : c->lumToYV12 = bgr15ToY_c; break;
-    case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c; break;
-    case PIX_FMT_RGB565   : c->lumToYV12 = rgb16ToY_c; break;
-    case PIX_FMT_RGB555   : c->lumToYV12 = rgb15ToY_c; break;
+    case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c;    break;
+    case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c;   break;
+    case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
+    case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
+    case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
+    case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
+    case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c;   break;
+    case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
+    case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
+    case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
+    case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
     case PIX_FMT_RGB8     :
     case PIX_FMT_BGR8     :
     case PIX_FMT_PAL8     :
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 827abc66d2..d552330ec5 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -75,13 +75,17 @@ const char *swscale_license(void)
         || (x)==PIX_FMT_BGR48BE     \
         || (x)==PIX_FMT_BGR48LE     \
         || (x)==PIX_FMT_BGR24       \
-        || (x)==PIX_FMT_BGR565      \
-        || (x)==PIX_FMT_BGR555      \
+        || (x)==PIX_FMT_BGR565LE    \
+        || (x)==PIX_FMT_BGR565BE    \
+        || (x)==PIX_FMT_BGR555LE    \
+        || (x)==PIX_FMT_BGR555BE    \
         || (x)==PIX_FMT_BGR32       \
         || (x)==PIX_FMT_BGR32_1     \
         || (x)==PIX_FMT_RGB24       \
-        || (x)==PIX_FMT_RGB565      \
-        || (x)==PIX_FMT_RGB555      \
+        || (x)==PIX_FMT_RGB565LE    \
+        || (x)==PIX_FMT_RGB565BE    \
+        || (x)==PIX_FMT_RGB555LE    \
+        || (x)==PIX_FMT_RGB555BE    \
         || (x)==PIX_FMT_GRAY8       \
         || (x)==PIX_FMT_Y400A       \
         || (x)==PIX_FMT_YUV410P     \
@@ -134,7 +138,22 @@ int sws_isSupportedInput(enum PixelFormat pix_fmt)
         || (x)==PIX_FMT_YUVJ422P    \
         || (x)==PIX_FMT_YUVJ440P    \
         || (x)==PIX_FMT_YUVJ444P    \
-        || isAnyRGB(x)              \
+        || isRGBinBytes(x)          \
+        || isBGRinBytes(x)          \
+        || (x)==PIX_FMT_RGB565      \
+        || (x)==PIX_FMT_RGB555      \
+        || (x)==PIX_FMT_RGB444      \
+        || (x)==PIX_FMT_BGR565      \
+        || (x)==PIX_FMT_BGR555      \
+        || (x)==PIX_FMT_BGR444      \
+        || (x)==PIX_FMT_RGB8        \
+        || (x)==PIX_FMT_BGR8        \
+        || (x)==PIX_FMT_RGB4_BYTE   \
+        || (x)==PIX_FMT_BGR4_BYTE   \
+        || (x)==PIX_FMT_RGB4        \
+        || (x)==PIX_FMT_BGR4        \
+        || (x)==PIX_FMT_MONOBLACK   \
+        || (x)==PIX_FMT_MONOWHITE   \
         || (x)==PIX_FMT_NV12        \
         || (x)==PIX_FMT_NV21        \
         || (x)==PIX_FMT_GRAY16BE    \
-- 
cgit v1.2.3


From 6b105e3ee607b4d83f894ee0c18bbd1d6f1e996f Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 9 Jun 2011 16:25:21 -0400
Subject: swscale: extract monowhite/black output from yuv2packed[12X]_c().

This is part of the Great Evil Plan to simplify swscale.
---
 libswscale/swscale.c | 172 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 113 insertions(+), 59 deletions(-)

(limited to 'libswscale')

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 3659b42725..39aac0c67b 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -528,6 +528,104 @@ static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
 YUV2PACKEDWRAPPER(yuv2gray16, LE, PIX_FMT_GRAY16LE);
 YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
 
+static av_always_inline void
+yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
+                      const int16_t **lumSrc, int lumFilterSize,
+                      const int16_t *chrFilter, const int16_t **chrUSrc,
+                      const int16_t **chrVSrc, int chrFilterSize,
+                      const int16_t **alpSrc, uint8_t *dest, int dstW,
+                      int y, enum PixelFormat target)
+{
+    const uint8_t * const d128=dither_8x8_220[y&7];
+    uint8_t *g = c->table_gU[128] + c->table_gV[128];
+    int i;
+    int acc = 0;
+
+#define output_pixel(pos, acc) \
+    if (target == PIX_FMT_MONOBLACK) { \
+        pos = acc; \
+    } else { \
+        pos = ~acc; \
+    }
+    for (i = 0; i < dstW - 1; i += 2) {
+        int j;
+        int Y1 = 1 << 18;
+        int Y2 = 1 << 18;
+
+        for (j = 0; j < lumFilterSize; j++) {
+            Y1 += lumSrc[j][i]   * lumFilter[j];
+            Y2 += lumSrc[j][i+1] * lumFilter[j];
+        }
+        Y1 >>= 19;
+        Y2 >>= 19;
+        if ((Y1 | Y2) & 0x100) {
+            Y1 = av_clip_uint8(Y1);
+            Y2 = av_clip_uint8(Y2);
+        }
+        acc += acc + g[Y1 + d128[(i + 0) & 7]];
+        acc += acc + g[Y2 + d128[(i + 1) & 7]];
+        if ((i & 7) == 6) {
+            output_pixel(*dest++, acc);
+        }
+    }
+}
+
+static av_always_inline void
+yuv2mono_2_c_template(SwsContext *c, const uint16_t *buf0,
+                      const uint16_t *buf1, const uint16_t *ubuf0,
+                      const uint16_t *ubuf1, const uint16_t *vbuf0,
+                      const uint16_t *vbuf1, const uint16_t *abuf0,
+                      const uint16_t *abuf1, uint8_t *dest, int dstW,
+                      int yalpha, int uvalpha, int y,
+                      enum PixelFormat target)
+{
+    const uint8_t * const d128 = dither_8x8_220[y & 7];
+    uint8_t *g = c->table_gU[128] + c->table_gV[128];
+    int  yalpha1 = 4095 - yalpha;
+    int i;
+
+    for (i = 0; i < dstW - 7; i += 8) {
+        int acc =    g[((buf0[i    ] * yalpha1 + buf1[i    ] * yalpha) >> 19) + d128[0]];
+        acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
+        acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
+        acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
+        acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
+        acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
+        acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
+        acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
+        output_pixel(*dest++, acc);
+    }
+}
+
+static av_always_inline void
+yuv2mono_1_c_template(SwsContext *c, const uint16_t *buf0,
+                      const uint16_t *ubuf0, const uint16_t *ubuf1,
+                      const uint16_t *vbuf0, const uint16_t *vbuf1,
+                      const uint16_t *abuf0, uint8_t *dest, int dstW,
+                      int uvalpha, enum PixelFormat dstFormat,
+                      int flags, int y, enum PixelFormat target)
+{
+    const uint8_t * const d128 = dither_8x8_220[y & 7];
+    uint8_t *g = c->table_gU[128] + c->table_gV[128];
+    int i;
+
+    for (i = 0; i < dstW - 7; i += 8) {
+        int acc =    g[(buf0[i    ] >> 7) + d128[0]];
+        acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
+        acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
+        acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
+        acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
+        acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
+        acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
+        acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
+        output_pixel(*dest++, acc);
+    }
+#undef output_pixel
+}
+
+YUV2PACKEDWRAPPER(yuv2mono, white, PIX_FMT_MONOWHITE);
+YUV2PACKEDWRAPPER(yuv2mono, black, PIX_FMT_MONOBLACK);
+
 #define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
     for (i=0; i<(dstW>>1); i++) {\
         int j;\
@@ -677,51 +775,7 @@ YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
     g = (type *)(c->table_gU[U] + c->table_gV[V]);\
     b = (type *)c->table_bU[U];
 
-#define YSCALE_YUV_2_MONO2_C \
-    const uint8_t * const d128=dither_8x8_220[y&7];\
-    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
-    for (i=0; i<dstW-7; i+=8) {\
-        int acc;\
-        acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
-        acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
-        acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
-        acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
-        acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
-        acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
-        acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
-        acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
-        ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
-        dest++;\
-    }
-
-#define YSCALE_YUV_2_MONOX_C \
-    const uint8_t * const d128=dither_8x8_220[y&7];\
-    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
-    int acc=0;\
-    for (i=0; i<dstW-1; i+=2) {\
-        int j;\
-        int Y1=1<<18;\
-        int Y2=1<<18;\
-\
-        for (j=0; j<lumFilterSize; j++) {\
-            Y1 += lumSrc[j][i] * lumFilter[j];\
-            Y2 += lumSrc[j][i+1] * lumFilter[j];\
-        }\
-        Y1>>=19;\
-        Y2>>=19;\
-        if ((Y1|Y2)&0x100) {\
-            Y1 = av_clip_uint8(Y1); \
-            Y2 = av_clip_uint8(Y2); \
-        }\
-        acc+= acc + g[Y1+d128[(i+0)&7]];\
-        acc+= acc + g[Y2+d128[(i+1)&7]];\
-        if ((i&7)==6) {\
-            ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
-            dest++;\
-        }\
-    }
-
-#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_monoblack)\
+#define YSCALE_YUV_2_ANYRGB_C(func, func2)\
     switch(c->dstFormat) {\
     case PIX_FMT_RGB48BE:\
     case PIX_FMT_RGB48LE:\
@@ -897,12 +951,6 @@ YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE);
             }\
         }\
         break;\
-    case PIX_FMT_MONOBLACK:\
-    case PIX_FMT_MONOWHITE:\
-        {\
-            func_monoblack\
-        }\
-        break;\
     case PIX_FMT_YUYV422:\
         func2\
             ((uint8_t*)dest)[2*i2+0]= Y1;\
@@ -928,7 +976,7 @@ static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
                           const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
 {
     int i;
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_MONOX_C)
+    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0))
 }
 
 static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
@@ -1031,7 +1079,7 @@ static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
     int uvalpha1=4095-uvalpha;
     int i;
 
-    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_MONO2_C)
+    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0))
 }
 
 /**
@@ -1044,16 +1092,12 @@ static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
                           int uvalpha, enum PixelFormat dstFormat,
                           int flags, int y)
 {
-    const int yalpha1=0;
     int i;
 
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
-    const int yalpha= 4096; //FIXME ...
-
     if (uvalpha < 2048) {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_MONO2_C)
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0))
     } else {
-        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_MONO2_C)
+        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0))
     }
 }
 
@@ -1717,6 +1761,16 @@ find_c_packed_planar_out_funcs(SwsContext *c,
             *yuv2packed2 = yuv2gray16LE_2_c;
             *yuv2packedX = yuv2gray16LE_X_c;
             break;
+        case PIX_FMT_MONOWHITE:
+            *yuv2packed1 = yuv2monowhite_1_c;
+            *yuv2packed2 = yuv2monowhite_2_c;
+            *yuv2packedX = yuv2monowhite_X_c;
+            break;
+        case PIX_FMT_MONOBLACK:
+            *yuv2packed1 = yuv2monoblack_1_c;
+            *yuv2packed2 = yuv2monoblack_2_c;
+            *yuv2packedX = yuv2monoblack_X_c;
+            break;
         default:
             *yuv2packed1 = yuv2packed1_c;
             *yuv2packed2 = yuv2packed2_c;
-- 
cgit v1.2.3