From 87fa05a0dae629cdad390adaa1054db3f4ecc3c7 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 11 Aug 2012 04:08:15 +0100
Subject: ARM: intmath: use native-size return types for clipping functions

This avoids having the compiler redundantly mask the values to
the smaller size.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavutil/arm/intmath.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libavutil/arm/intmath.h b/libavutil/arm/intmath.h
index d5a343c95f..88e9c26b11 100644
--- a/libavutil/arm/intmath.h
+++ b/libavutil/arm/intmath.h
@@ -44,7 +44,7 @@ static av_always_inline av_const int FASTDIV(int a, int b)
 }
 
 #define av_clip_uint8 av_clip_uint8_arm
-static av_always_inline av_const uint8_t av_clip_uint8_arm(int a)
+static av_always_inline av_const unsigned av_clip_uint8_arm(int a)
 {
     unsigned x;
     __asm__ ("usat %0, #8,  %1" : "=r"(x) : "r"(a));
@@ -52,15 +52,15 @@ static av_always_inline av_const uint8_t av_clip_uint8_arm(int a)
 }
 
 #define av_clip_int8 av_clip_int8_arm
-static av_always_inline av_const uint8_t av_clip_int8_arm(int a)
+static av_always_inline av_const int av_clip_int8_arm(int a)
 {
-    unsigned x;
+    int x;
     __asm__ ("ssat %0, #8,  %1" : "=r"(x) : "r"(a));
     return x;
 }
 
 #define av_clip_uint16 av_clip_uint16_arm
-static av_always_inline av_const uint16_t av_clip_uint16_arm(int a)
+static av_always_inline av_const unsigned av_clip_uint16_arm(int a)
 {
     unsigned x;
     __asm__ ("usat %0, #16, %1" : "=r"(x) : "r"(a));
@@ -68,7 +68,7 @@ static av_always_inline av_const uint16_t av_clip_uint16_arm(int a)
 }
 
 #define av_clip_int16 av_clip_int16_arm
-static av_always_inline av_const int16_t av_clip_int16_arm(int a)
+static av_always_inline av_const int av_clip_int16_arm(int a)
 {
     int x;
     __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
-- 
cgit v1.2.3


From c8252e80eb1d0d0964e94b3186bfdb776fee99d6 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 12 Aug 2012 02:08:46 +0100
Subject: x86: mlpdsp: avoid taking address of void

This code contains a C array of addresses of labels defined in
inline asm.  To do this, the names must be declared as external
in C.  The declared type does not matter since only the address is
used, and for some reason, the author of the code used the 'void'
type despite taking the address of a void expression being invalid.

Changing the type to char, a reasonable choice since the alignment
of the code labels cannot be known or guaranteed, eliminates gcc
warnings and allows building with suncc.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/x86/mlpdsp.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/libavcodec/x86/mlpdsp.c b/libavcodec/x86/mlpdsp.c
index 16e38c3313..d793c7d3e0 100644
--- a/libavcodec/x86/mlpdsp.c
+++ b/libavcodec/x86/mlpdsp.c
@@ -25,21 +25,21 @@
 
 #if HAVE_7REGS
 
-extern void ff_mlp_firorder_8;
-extern void ff_mlp_firorder_7;
-extern void ff_mlp_firorder_6;
-extern void ff_mlp_firorder_5;
-extern void ff_mlp_firorder_4;
-extern void ff_mlp_firorder_3;
-extern void ff_mlp_firorder_2;
-extern void ff_mlp_firorder_1;
-extern void ff_mlp_firorder_0;
-
-extern void ff_mlp_iirorder_4;
-extern void ff_mlp_iirorder_3;
-extern void ff_mlp_iirorder_2;
-extern void ff_mlp_iirorder_1;
-extern void ff_mlp_iirorder_0;
+extern char ff_mlp_firorder_8;
+extern char ff_mlp_firorder_7;
+extern char ff_mlp_firorder_6;
+extern char ff_mlp_firorder_5;
+extern char ff_mlp_firorder_4;
+extern char ff_mlp_firorder_3;
+extern char ff_mlp_firorder_2;
+extern char ff_mlp_firorder_1;
+extern char ff_mlp_firorder_0;
+
+extern char ff_mlp_iirorder_4;
+extern char ff_mlp_iirorder_3;
+extern char ff_mlp_iirorder_2;
+extern char ff_mlp_iirorder_1;
+extern char ff_mlp_iirorder_0;
 
 static const void *firtable[9] = { &ff_mlp_firorder_0, &ff_mlp_firorder_1,
                                    &ff_mlp_firorder_2, &ff_mlp_firorder_3,
-- 
cgit v1.2.3


From 8ec0204ee4ee93218c51a86d2faa24937c8108e7 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 12 Aug 2012 02:18:41 +0100
Subject: x86: cabac: allow building with suncc

This fixes two issues preventing suncc from building this code.

The undocumented 'a' operand modifier, causing gcc to omit a $ in
front of immediate operands (as required in addresses), is not
supported by suncc.  Luckily, the also undocumented 'c' modifer
has the same effect and is supported.

On some asm statements with a large number of operands, suncc for no
obvious reason fails to correctly substitute some of the operands.
Fortunately, some of the operands in these statements are plain
numbers which can be inserted directly into the code block instead
of passed as operands.

With these changes, the code builds correctly with both gcc and
suncc.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/x86/cabac.h     | 24 +++++++++++++-----------
 libavcodec/x86/h264_i386.h | 37 ++++++++++++++++++++++++-------------
 2 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index 389f155745..a74cf0bf1f 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -173,14 +173,16 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
     __asm__ volatile(
         BRANCHLESS_GET_CABAC("%0", "%q0", "(%4)", "%1", "%w1",
                              "%2", "%q2", "%3", "%b3",
-                             "%a6(%5)", "%a7(%5)", "%a8", "%a9", "%a10", "%11")
+                             "%c6(%5)", "%c7(%5)",
+                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
+                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
+                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
+                             "%8")
         : "=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp)
         : "r"(state), "r"(c),
           "i"(offsetof(CABACContext, bytestream)),
-          "i"(offsetof(CABACContext, bytestream_end)),
-          "i"(H264_NORM_SHIFT_OFFSET),
-          "i"(H264_LPS_RANGE_OFFSET),
-          "i"(H264_MLPS_STATE_OFFSET) TABLES_ARG
+          "i"(offsetof(CABACContext, bytestream_end))
+          TABLES_ARG
         : "%"REG_c, "memory"
     );
     return bit & 1;
@@ -192,8 +194,8 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
 {
     x86_reg tmp;
     __asm__ volatile(
-        "movl        %a6(%2), %k1       \n\t"
-        "movl        %a3(%2), %%eax     \n\t"
+        "movl        %c6(%2), %k1       \n\t"
+        "movl        %c3(%2), %%eax     \n\t"
         "shl             $17, %k1       \n\t"
         "add           %%eax, %%eax     \n\t"
         "sub             %k1, %%eax     \n\t"
@@ -204,17 +206,17 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
         "sub           %%edx, %%ecx     \n\t"
         "test           %%ax, %%ax      \n\t"
         "jnz              1f            \n\t"
-        "mov         %a4(%2), %1        \n\t"
+        "mov         %c4(%2), %1        \n\t"
         "subl        $0xFFFF, %%eax     \n\t"
         "movzwl         (%1), %%edx     \n\t"
         "bswap         %%edx            \n\t"
         "shrl            $15, %%edx     \n\t"
         "addl          %%edx, %%eax     \n\t"
-        "cmp         %a5(%2), %1        \n\t"
+        "cmp         %c5(%2), %1        \n\t"
         "jge              1f            \n\t"
-        "add"OPSIZE"      $2, %a4(%2)   \n\t"
+        "add"OPSIZE"      $2, %c4(%2)   \n\t"
         "1:                             \n\t"
-        "movl          %%eax, %a3(%2)   \n\t"
+        "movl          %%eax, %c3(%2)   \n\t"
 
         : "+c"(val), "=&r"(tmp)
         : "r"(c),
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index 2daa40ae52..bb881c35df 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -63,7 +63,11 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
 
         BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3",
                              "%5", "%q5", "%k0", "%b0",
-                             "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15", "%16")
+                             "%c11(%6)", "%c12(%6)",
+                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
+                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
+                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
+                             "%13")
 
         "test $1, %4                            \n\t"
         " jz 4f                                 \n\t"
@@ -71,7 +75,11 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
 
         BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3",
                              "%5", "%q5", "%k0", "%b0",
-                             "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15", "%16")
+                             "%c11(%6)", "%c12(%6)",
+                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
+                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
+                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
+                             "%13")
 
         "sub  %10, %1                           \n\t"
         "mov  %2, %0                            \n\t"
@@ -99,10 +107,8 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
           "+&r"(c->low), "=&r"(bit), "+&r"(c->range)
         : "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
           "i"(offsetof(CABACContext, bytestream)),
-          "i"(offsetof(CABACContext, bytestream_end)),
-          "i"(H264_NORM_SHIFT_OFFSET),
-          "i"(H264_LPS_RANGE_OFFSET),
-          "i"(H264_MLPS_STATE_OFFSET) TABLES_ARG
+          "i"(offsetof(CABACContext, bytestream_end))
+          TABLES_ARG
         : "%"REG_c, "memory"
     );
     return coeff_count;
@@ -137,22 +143,30 @@ static int decode_significance_8x8_x86(CABACContext *c,
 
         BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3",
                              "%5", "%q5", "%k0", "%b0",
-                             "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16", "%18")
+                             "%c12(%7)", "%c13(%7)",
+                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
+                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
+                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
+                             "%15")
 
         "mov %1, %k6                            \n\t"
         "test $1, %4                            \n\t"
         " jz 4f                                 \n\t"
 
 #ifdef BROKEN_RELOCATIONS
-        "movzbl %a17(%18, %q6), %k6\n\t"
+        "movzbl %c14(%15, %q6), %k6\n\t"
 #else
-        "movzbl "MANGLE(ff_h264_cabac_tables)"+%a17(%k6), %k6\n\t"
+        "movzbl "MANGLE(ff_h264_cabac_tables)"+%c14(%k6), %k6\n\t"
 #endif
         "add %11, %6                            \n\t"
 
         BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3",
                              "%5", "%q5", "%k0", "%b0",
-                             "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16", "%18")
+                             "%c12(%7)", "%c13(%7)",
+                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
+                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
+                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
+                             "%15")
 
         "mov %2, %0                             \n\t"
         "mov %1, %k6                            \n\t"
@@ -179,9 +193,6 @@ static int decode_significance_8x8_x86(CABACContext *c,
           "m"(sig_off), "m"(last_coeff_ctx_base),
           "i"(offsetof(CABACContext, bytestream)),
           "i"(offsetof(CABACContext, bytestream_end)),
-          "i"(H264_NORM_SHIFT_OFFSET),
-          "i"(H264_LPS_RANGE_OFFSET),
-          "i"(H264_MLPS_STATE_OFFSET),
           "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG
         : "%"REG_c, "memory"
     );
-- 
cgit v1.2.3


From 480178a29587df8ed6d5e93bfe79e4a08a61f9e1 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 12 Aug 2012 19:45:46 +0100
Subject: x86: yadif: fix asm with suncc

Under some circumstances, suncc will use a single register for the
address of all memory operands, inserting lea instructions loading
the correct address prior to each memory operand being used in the
code. In the yadif code, the branch in the asm block bypasses such
an lea instruction, causing an incorrect address to be used in the
following load.

This patch replaces the tmpX arrays with a single array and uses a
register operand to hold its address. Although this prevents using
offsets from the stack pointer to access these locations, the code
still builds as 32-bit PIC even with old compilers.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavfilter/x86/yadif_template.c | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/libavfilter/x86/yadif_template.c b/libavfilter/x86/yadif_template.c
index 1de0a58e8f..3e45f4fda0 100644
--- a/libavfilter/x86/yadif_template.c
+++ b/libavfilter/x86/yadif_template.c
@@ -107,10 +107,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
                                       uint8_t *next, int w, int prefs,
                                       int mrefs, int parity, int mode)
 {
-    DECLARE_ALIGNED(16, uint8_t, tmp0)[16];
-    DECLARE_ALIGNED(16, uint8_t, tmp1)[16];
-    DECLARE_ALIGNED(16, uint8_t, tmp2)[16];
-    DECLARE_ALIGNED(16, uint8_t, tmp3)[16];
+    DECLARE_ALIGNED(16, uint8_t, tmp)[16*4];
     int x;
 
 #define FILTER\
@@ -124,9 +121,9 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
             MOVQ"      "MM"3, "MM"4 \n\t"\
             "paddw     "MM"2, "MM"3 \n\t"\
             "psraw     $1,    "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
-            MOVQ"      "MM"0, %[tmp0] \n\t" /* c */\
-            MOVQ"      "MM"3, %[tmp1] \n\t" /* d */\
-            MOVQ"      "MM"1, %[tmp2] \n\t" /* e */\
+            MOVQ"      "MM"0,   (%[tmp]) \n\t" /* c */\
+            MOVQ"      "MM"3, 16(%[tmp]) \n\t" /* d */\
+            MOVQ"      "MM"1, 32(%[tmp]) \n\t" /* e */\
             "psubw     "MM"4, "MM"2 \n\t"\
             PABS(      MM"4", MM"2") /* temporal_diff0 */\
             LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\
@@ -148,7 +145,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
             "paddw     "MM"4, "MM"3 \n\t" /* temporal_diff2 */\
             "psrlw     $1,    "MM"3 \n\t"\
             "pmaxsw    "MM"3, "MM"2 \n\t"\
-            MOVQ"      "MM"2, %[tmp3] \n\t" /* diff */\
+            MOVQ"      "MM"2, 48(%[tmp]) \n\t" /* diff */\
 \
             "paddw     "MM"0, "MM"1 \n\t"\
             "paddw     "MM"0, "MM"0 \n\t"\
@@ -179,7 +176,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
             CHECK2\
 \
             /* if(p->mode<2) ... */\
-            MOVQ"    %[tmp3], "MM"6 \n\t" /* diff */\
+            MOVQ" 48(%[tmp]), "MM"6 \n\t" /* diff */\
             "cmpl      $2, %[mode] \n\t"\
             "jge       1f \n\t"\
             LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\
@@ -190,9 +187,9 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
             "paddw     "MM"5, "MM"3 \n\t"\
             "psrlw     $1,    "MM"2 \n\t" /* b */\
             "psrlw     $1,    "MM"3 \n\t" /* f */\
-            MOVQ"    %[tmp0], "MM"4 \n\t" /* c */\
-            MOVQ"    %[tmp1], "MM"5 \n\t" /* d */\
-            MOVQ"    %[tmp2], "MM"7 \n\t" /* e */\
+            MOVQ"   (%[tmp]), "MM"4 \n\t" /* c */\
+            MOVQ" 16(%[tmp]), "MM"5 \n\t" /* d */\
+            MOVQ" 32(%[tmp]), "MM"7 \n\t" /* e */\
             "psubw     "MM"4, "MM"2 \n\t" /* b-c */\
             "psubw     "MM"7, "MM"3 \n\t" /* f-e */\
             MOVQ"      "MM"5, "MM"0 \n\t"\
@@ -211,7 +208,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
             "pmaxsw    "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\
             "1: \n\t"\
 \
-            MOVQ"    %[tmp1], "MM"2 \n\t" /* d */\
+            MOVQ" 16(%[tmp]), "MM"2 \n\t" /* d */\
             MOVQ"      "MM"2, "MM"3 \n\t"\
             "psubw     "MM"6, "MM"2 \n\t" /* d-diff */\
             "paddw     "MM"6, "MM"3 \n\t" /* d+diff */\
@@ -219,16 +216,13 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
             "pminsw    "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
             "packuswb  "MM"1, "MM"1 \n\t"\
 \
-            :[tmp0]"=m"(tmp0),\
-             [tmp1]"=m"(tmp1),\
-             [tmp2]"=m"(tmp2),\
-             [tmp3]"=m"(tmp3)\
-            :[prev] "r"(prev),\
+            ::[prev] "r"(prev),\
              [cur]  "r"(cur),\
              [next] "r"(next),\
              [prefs]"r"((x86_reg)prefs),\
              [mrefs]"r"((x86_reg)mrefs),\
-             [mode] "g"(mode)\
+             [mode] "g"(mode),\
+             [tmp]  "r"(tmp)\
         );\
         __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
         dst += STEP;\
-- 
cgit v1.2.3


From 10b83cb653e6effc80f3f0ddaaf39aea87546a6d Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 12 Aug 2012 23:38:51 +0100
Subject: x86: swscale: remove disabled code

This code has been disabled since 2003.  Nobody will ever look at
it again.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libswscale/x86/rgb2rgb_template.c | 84 ---------------------------------------
 1 file changed, 84 deletions(-)

diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index 3374f45908..0f2bfd0581 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -287,7 +287,6 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_s
     uint16_t *d = (uint16_t *)dst;
     end = s + src_size;
     mm_end = end - 15;
-#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
     __asm__ volatile(
         "movq           %3, %%mm5   \n\t"
         "movq           %4, %%mm6   \n\t"
@@ -322,47 +321,6 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_s
         : "+r" (d), "+r"(s)
         : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
     );
-#else
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq    %0, %%mm7    \n\t"
-        "movq    %1, %%mm6    \n\t"
-        ::"m"(red_16mask),"m"(green_16mask));
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         4%1, %%mm3    \n\t"
-            "punpckldq    8%1, %%mm0    \n\t"
-            "punpckldq   12%1, %%mm3    \n\t"
-            "movq       %%mm0, %%mm1    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm3, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "psrlq         $3, %%mm0    \n\t"
-            "psrlq         $3, %%mm3    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %2, %%mm3    \n\t"
-            "psrlq         $5, %%mm1    \n\t"
-            "psrlq         $5, %%mm4    \n\t"
-            "pand       %%mm6, %%mm1    \n\t"
-            "pand       %%mm6, %%mm4    \n\t"
-            "psrlq         $8, %%mm2    \n\t"
-            "psrlq         $8, %%mm5    \n\t"
-            "pand       %%mm7, %%mm2    \n\t"
-            "pand       %%mm7, %%mm5    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            "psllq        $16, %%mm3    \n\t"
-            "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
-        d += 4;
-        s += 16;
-    }
-#endif
     __asm__ volatile(SFENCE:::"memory");
     __asm__ volatile(EMMS:::"memory");
     while (s < end) {
@@ -434,7 +392,6 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_s
     uint16_t *d = (uint16_t *)dst;
     end = s + src_size;
     mm_end = end - 15;
-#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
     __asm__ volatile(
         "movq           %3, %%mm5   \n\t"
         "movq           %4, %%mm6   \n\t"
@@ -469,47 +426,6 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_s
         : "+r" (d), "+r"(s)
         : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
     );
-#else
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq          %0, %%mm7    \n\t"
-        "movq          %1, %%mm6    \n\t"
-        ::"m"(red_15mask),"m"(green_15mask));
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         4%1, %%mm3    \n\t"
-            "punpckldq    8%1, %%mm0    \n\t"
-            "punpckldq   12%1, %%mm3    \n\t"
-            "movq       %%mm0, %%mm1    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm3, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "psrlq         $3, %%mm0    \n\t"
-            "psrlq         $3, %%mm3    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %2, %%mm3    \n\t"
-            "psrlq         $6, %%mm1    \n\t"
-            "psrlq         $6, %%mm4    \n\t"
-            "pand       %%mm6, %%mm1    \n\t"
-            "pand       %%mm6, %%mm4    \n\t"
-            "psrlq         $9, %%mm2    \n\t"
-            "psrlq         $9, %%mm5    \n\t"
-            "pand       %%mm7, %%mm2    \n\t"
-            "pand       %%mm7, %%mm5    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            "psllq        $16, %%mm3    \n\t"
-            "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
-        d += 4;
-        s += 16;
-    }
-#endif
     __asm__ volatile(SFENCE:::"memory");
     __asm__ volatile(EMMS:::"memory");
     while (s < end) {
-- 
cgit v1.2.3


From 90540c2d5ace46a1e9789c75fde0b1f7dbb12a9b Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 13 Aug 2012 00:53:05 +0100
Subject: x86: swscale: fix fragile memory accesses

To access data at multiple fixed offsets from a base address, this
code uses a single "m" operand and code of the form "32%0", relying on
the memory operand instantiation having no displacement, giving a final
result of the form "32(%rax)".  If the compiler uses a register and
displacement, e.g. "64(%rax)", the end result becomes "3264(%rax)",
which obviously does not work.

Replacing the "m" operands with "r" operands allows safe addition of a
displacement.  In theory, multiple memory operands could use a shared
base register with different index registers, "(%rax,%rbx)", potentially
making more efficient use of registers.  In the cases at hand, no such
sharing is possible since the addresses involved are entirely unrelated.

After this change, the code somewhat rudely accesses memory without
using a corresponding memory operand, which in some cases can lead to
unwanted "optimisations" of surrounding code.  However, the original
code also accesses memory not covered by a memory operand, so this is
not adding any defect not already present.  It is also hightly unlikely
that any such optimisations could be performed here since the memory
locations in questions are not accessed elsewhere in the same functions.

This fixes crashes with suncc.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libswscale/x86/rgb2rgb_template.c | 284 ++++++++++++++++++--------------------
 1 file changed, 137 insertions(+), 147 deletions(-)

diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index 0f2bfd0581..7a641e1814 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -73,25 +73,24 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
     __asm__ volatile("movq        %0, %%mm7"::"m"(mask32a):"memory");
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "punpckldq    3%1, %%mm0    \n\t"
-            "movd         6%1, %%mm1    \n\t"
-            "punpckldq    9%1, %%mm1    \n\t"
-            "movd        12%1, %%mm2    \n\t"
-            "punpckldq   15%1, %%mm2    \n\t"
-            "movd        18%1, %%mm3    \n\t"
-            "punpckldq   21%1, %%mm3    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movd        (%1), %%mm0    \n\t"
+            "punpckldq  3(%1), %%mm0    \n\t"
+            "movd       6(%1), %%mm1    \n\t"
+            "punpckldq  9(%1), %%mm1    \n\t"
+            "movd      12(%1), %%mm2    \n\t"
+            "punpckldq 15(%1), %%mm2    \n\t"
+            "movd      18(%1), %%mm3    \n\t"
+            "punpckldq 21(%1), %%mm3    \n\t"
             "por        %%mm7, %%mm0    \n\t"
             "por        %%mm7, %%mm1    \n\t"
             "por        %%mm7, %%mm2    \n\t"
             "por        %%mm7, %%mm3    \n\t"
-            MOVNTQ"     %%mm0,   %0     \n\t"
-            MOVNTQ"     %%mm1,  8%0     \n\t"
-            MOVNTQ"     %%mm2, 16%0     \n\t"
-            MOVNTQ"     %%mm3, 24%0"
-            :"=m"(*dest)
-            :"m"(*s)
+            MOVNTQ"     %%mm0,   (%0)   \n\t"
+            MOVNTQ"     %%mm1,  8(%0)   \n\t"
+            MOVNTQ"     %%mm2, 16(%0)   \n\t"
+            MOVNTQ"     %%mm3, 24(%0)"
+            :: "r"(dest), "r"(s)
             :"memory");
         dest += 32;
         s += 24;
@@ -138,9 +137,9 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
             "pand  "MANGLE(mask24hhhh)", %%mm5\n\t" \
             "por        %%mm5, %%mm4    \n\t" \
  \
-            MOVNTQ"     %%mm0,   %0     \n\t" \
-            MOVNTQ"     %%mm1,  8%0     \n\t" \
-            MOVNTQ"     %%mm4, 16%0"
+            MOVNTQ"     %%mm0,   (%0)    \n\t" \
+            MOVNTQ"     %%mm1,  8(%0)    \n\t" \
+            MOVNTQ"     %%mm4, 16(%0)"
 
 
 static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
@@ -154,18 +153,17 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr
     mm_end = end - 31;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movq          %1, %%mm0    \n\t"
-            "movq         8%1, %%mm1    \n\t"
-            "movq        16%1, %%mm4    \n\t"
-            "movq        24%1, %%mm5    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movq        (%1), %%mm0    \n\t"
+            "movq       8(%1), %%mm1    \n\t"
+            "movq      16(%1), %%mm4    \n\t"
+            "movq      24(%1), %%mm5    \n\t"
             "movq       %%mm0, %%mm2    \n\t"
             "movq       %%mm1, %%mm3    \n\t"
             "movq       %%mm4, %%mm6    \n\t"
             "movq       %%mm5, %%mm7    \n\t"
             STORE_BGR24_MMX
-            :"=m"(*dest)
-            :"m"(*s)
+            :: "r"(dest), "r"(s)
             :"memory");
         dest += 24;
         s += 32;
@@ -198,19 +196,18 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_s
     mm_end = end - 15;
     while (s<mm_end) {
         __asm__ volatile(
-            PREFETCH"  32%1         \n\t"
-            "movq        %1, %%mm0  \n\t"
-            "movq       8%1, %%mm2  \n\t"
+            PREFETCH" 32(%1)        \n\t"
+            "movq      (%1), %%mm0  \n\t"
+            "movq     8(%1), %%mm2  \n\t"
             "movq     %%mm0, %%mm1  \n\t"
             "movq     %%mm2, %%mm3  \n\t"
             "pand     %%mm4, %%mm0  \n\t"
             "pand     %%mm4, %%mm2  \n\t"
             "paddw    %%mm1, %%mm0  \n\t"
             "paddw    %%mm3, %%mm2  \n\t"
-            MOVNTQ"   %%mm0,  %0    \n\t"
-            MOVNTQ"   %%mm2, 8%0"
-            :"=m"(*d)
-            :"m"(*s)
+            MOVNTQ"   %%mm0,  (%0)  \n\t"
+            MOVNTQ"   %%mm2, 8(%0)"
+            :: "r"(d), "r"(s)
         );
         d+=16;
         s+=16;
@@ -243,9 +240,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s
     mm_end = end - 15;
     while (s<mm_end) {
         __asm__ volatile(
-            PREFETCH"  32%1         \n\t"
-            "movq        %1, %%mm0  \n\t"
-            "movq       8%1, %%mm2  \n\t"
+            PREFETCH" 32(%1)        \n\t"
+            "movq      (%1), %%mm0  \n\t"
+            "movq     8(%1), %%mm2  \n\t"
             "movq     %%mm0, %%mm1  \n\t"
             "movq     %%mm2, %%mm3  \n\t"
             "psrlq       $1, %%mm0  \n\t"
@@ -256,10 +253,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s
             "pand     %%mm6, %%mm3  \n\t"
             "por      %%mm1, %%mm0  \n\t"
             "por      %%mm3, %%mm2  \n\t"
-            MOVNTQ"   %%mm0,  %0    \n\t"
-            MOVNTQ"   %%mm2, 8%0"
-            :"=m"(*d)
-            :"m"(*s)
+            MOVNTQ"   %%mm0,  (%0)  \n\t"
+            MOVNTQ"   %%mm2, 8(%0)"
+            :: "r"(d), "r"(s)
         );
         d+=16;
         s+=16;
@@ -344,11 +340,11 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr
     mm_end = end - 15;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         4%1, %%mm3    \n\t"
-            "punpckldq    8%1, %%mm0    \n\t"
-            "punpckldq   12%1, %%mm3    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movd        (%1), %%mm0    \n\t"
+            "movd       4(%1), %%mm3    \n\t"
+            "punpckldq  8(%1), %%mm0    \n\t"
+            "punpckldq 12(%1), %%mm3    \n\t"
             "movq       %%mm0, %%mm1    \n\t"
             "movq       %%mm0, %%mm2    \n\t"
             "movq       %%mm3, %%mm4    \n\t"
@@ -371,8 +367,8 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr
             "por        %%mm5, %%mm3    \n\t"
             "psllq        $16, %%mm3    \n\t"
             "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+            MOVNTQ"     %%mm0, (%0)     \n\t"
+            :: "r"(d),"r"(s),"m"(blue_16mask):"memory");
         d += 4;
         s += 16;
     }
@@ -449,11 +445,11 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr
     mm_end = end - 15;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         4%1, %%mm3    \n\t"
-            "punpckldq    8%1, %%mm0    \n\t"
-            "punpckldq   12%1, %%mm3    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movd        (%1), %%mm0    \n\t"
+            "movd       4(%1), %%mm3    \n\t"
+            "punpckldq  8(%1), %%mm0    \n\t"
+            "punpckldq 12(%1), %%mm3    \n\t"
             "movq       %%mm0, %%mm1    \n\t"
             "movq       %%mm0, %%mm2    \n\t"
             "movq       %%mm3, %%mm4    \n\t"
@@ -476,8 +472,8 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr
             "por        %%mm5, %%mm3    \n\t"
             "psllq        $16, %%mm3    \n\t"
             "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+            MOVNTQ"     %%mm0, (%0)     \n\t"
+            ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
         d += 4;
         s += 16;
     }
@@ -504,11 +500,11 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr
     mm_end = end - 11;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         3%1, %%mm3    \n\t"
-            "punpckldq    6%1, %%mm0    \n\t"
-            "punpckldq    9%1, %%mm3    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movd        (%1), %%mm0    \n\t"
+            "movd       3(%1), %%mm3    \n\t"
+            "punpckldq  6(%1), %%mm0    \n\t"
+            "punpckldq  9(%1), %%mm3    \n\t"
             "movq       %%mm0, %%mm1    \n\t"
             "movq       %%mm0, %%mm2    \n\t"
             "movq       %%mm3, %%mm4    \n\t"
@@ -531,8 +527,8 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr
             "por        %%mm5, %%mm3    \n\t"
             "psllq        $16, %%mm3    \n\t"
             "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+            MOVNTQ"     %%mm0, (%0)     \n\t"
+            ::"r"(d),"r"(s),"m"(blue_16mask):"memory");
         d += 4;
         s += 12;
     }
@@ -561,11 +557,11 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s
     mm_end = end - 15;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         3%1, %%mm3    \n\t"
-            "punpckldq    6%1, %%mm0    \n\t"
-            "punpckldq    9%1, %%mm3    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movd        (%1), %%mm0    \n\t"
+            "movd       3(%1), %%mm3    \n\t"
+            "punpckldq  6(%1), %%mm0    \n\t"
+            "punpckldq  9(%1), %%mm3    \n\t"
             "movq       %%mm0, %%mm1    \n\t"
             "movq       %%mm0, %%mm2    \n\t"
             "movq       %%mm3, %%mm4    \n\t"
@@ -588,8 +584,8 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s
             "por        %%mm5, %%mm3    \n\t"
             "psllq        $16, %%mm3    \n\t"
             "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+            MOVNTQ"     %%mm0, (%0)     \n\t"
+            ::"r"(d),"r"(s),"m"(blue_16mask):"memory");
         d += 4;
         s += 12;
     }
@@ -618,11 +614,11 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr
     mm_end = end - 11;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         3%1, %%mm3    \n\t"
-            "punpckldq    6%1, %%mm0    \n\t"
-            "punpckldq    9%1, %%mm3    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movd        (%1), %%mm0    \n\t"
+            "movd       3(%1), %%mm3    \n\t"
+            "punpckldq  6(%1), %%mm0    \n\t"
+            "punpckldq  9(%1), %%mm3    \n\t"
             "movq       %%mm0, %%mm1    \n\t"
             "movq       %%mm0, %%mm2    \n\t"
             "movq       %%mm3, %%mm4    \n\t"
@@ -645,8 +641,8 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr
             "por        %%mm5, %%mm3    \n\t"
             "psllq        $16, %%mm3    \n\t"
             "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+            MOVNTQ"     %%mm0, (%0)     \n\t"
+            ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
         d += 4;
         s += 12;
     }
@@ -675,11 +671,11 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
     mm_end = end - 15;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"   32%1            \n\t"
-            "movd         %1, %%mm0     \n\t"
-            "movd        3%1, %%mm3     \n\t"
-            "punpckldq   6%1, %%mm0     \n\t"
-            "punpckldq   9%1, %%mm3     \n\t"
+            PREFETCH" 32(%1)            \n\t"
+            "movd       (%1), %%mm0     \n\t"
+            "movd      3(%1), %%mm3     \n\t"
+            "punpckldq 6(%1), %%mm0     \n\t"
+            "punpckldq 9(%1), %%mm3     \n\t"
             "movq      %%mm0, %%mm1     \n\t"
             "movq      %%mm0, %%mm2     \n\t"
             "movq      %%mm3, %%mm4     \n\t"
@@ -702,8 +698,8 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
             "por       %%mm5, %%mm3     \n\t"
             "psllq       $16, %%mm3     \n\t"
             "por       %%mm3, %%mm0     \n\t"
-            MOVNTQ"    %%mm0, %0        \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+            MOVNTQ"    %%mm0, (%0)      \n\t"
+            ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
         d += 4;
         s += 12;
     }
@@ -749,10 +745,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
     mm_end = end - 7;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movq          %1, %%mm0    \n\t"
-            "movq          %1, %%mm1    \n\t"
-            "movq          %1, %%mm2    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movq        (%1), %%mm0    \n\t"
+            "movq        (%1), %%mm1    \n\t"
+            "movq        (%1), %%mm2    \n\t"
             "pand          %2, %%mm0    \n\t"
             "pand          %3, %%mm1    \n\t"
             "pand          %4, %%mm2    \n\t"
@@ -780,9 +776,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
             "movq       %%mm0, %%mm6    \n\t"
             "movq       %%mm3, %%mm7    \n\t"
 
-            "movq         8%1, %%mm0    \n\t"
-            "movq         8%1, %%mm1    \n\t"
-            "movq         8%1, %%mm2    \n\t"
+            "movq       8(%1), %%mm0    \n\t"
+            "movq       8(%1), %%mm1    \n\t"
+            "movq       8(%1), %%mm2    \n\t"
             "pand          %2, %%mm0    \n\t"
             "pand          %3, %%mm1    \n\t"
             "pand          %4, %%mm2    \n\t"
@@ -808,7 +804,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
             "por        %%mm5, %%mm3    \n\t"
 
             :"=m"(*d)
-            :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
+            :"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
             :"memory");
         /* borrowed 32 to 24 */
         __asm__ volatile(
@@ -824,8 +820,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
 
             STORE_BGR24_MMX
 
-            :"=m"(*d)
-            :"m"(*s)
+            :: "r"(d), "m"(*s)
             :"memory");
         d += 24;
         s += 8;
@@ -852,10 +847,10 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
     mm_end = end - 7;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movq          %1, %%mm0    \n\t"
-            "movq          %1, %%mm1    \n\t"
-            "movq          %1, %%mm2    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movq        (%1), %%mm0    \n\t"
+            "movq        (%1), %%mm1    \n\t"
+            "movq        (%1), %%mm2    \n\t"
             "pand          %2, %%mm0    \n\t"
             "pand          %3, %%mm1    \n\t"
             "pand          %4, %%mm2    \n\t"
@@ -883,9 +878,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
             "movq       %%mm0, %%mm6    \n\t"
             "movq       %%mm3, %%mm7    \n\t"
 
-            "movq         8%1, %%mm0    \n\t"
-            "movq         8%1, %%mm1    \n\t"
-            "movq         8%1, %%mm2    \n\t"
+            "movq       8(%1), %%mm0    \n\t"
+            "movq       8(%1), %%mm1    \n\t"
+            "movq       8(%1), %%mm2    \n\t"
             "pand          %2, %%mm0    \n\t"
             "pand          %3, %%mm1    \n\t"
             "pand          %4, %%mm2    \n\t"
@@ -910,7 +905,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
             "por        %%mm4, %%mm3    \n\t"
             "por        %%mm5, %%mm3    \n\t"
             :"=m"(*d)
-            :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
+            :"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
             :"memory");
         /* borrowed 32 to 24 */
         __asm__ volatile(
@@ -926,8 +921,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
 
             STORE_BGR24_MMX
 
-            :"=m"(*d)
-            :"m"(*s)
+            :: "r"(d), "m"(*s)
             :"memory");
         d += 24;
         s += 8;
@@ -959,8 +953,8 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
     "movq       %%mm0, %%mm3    \n\t"                               \
     "punpcklwd  %%mm2, %%mm0    \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
     "punpckhwd  %%mm2, %%mm3    \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
-    MOVNTQ"     %%mm0,  %0      \n\t"                               \
-    MOVNTQ"     %%mm3, 8%0      \n\t"                               \
+    MOVNTQ"     %%mm0,  (%0)    \n\t"                               \
+    MOVNTQ"     %%mm3, 8(%0)    \n\t"                               \
 
 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size)
 {
@@ -975,10 +969,10 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
     mm_end = end - 3;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movq          %1, %%mm0    \n\t"
-            "movq          %1, %%mm1    \n\t"
-            "movq          %1, %%mm2    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movq        (%1), %%mm0    \n\t"
+            "movq        (%1), %%mm1    \n\t"
+            "movq        (%1), %%mm2    \n\t"
             "pand          %2, %%mm0    \n\t"
             "pand          %3, %%mm1    \n\t"
             "pand          %4, %%mm2    \n\t"
@@ -986,8 +980,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
             "psrlq         $2, %%mm1    \n\t"
             "psrlq         $7, %%mm2    \n\t"
             PACK_RGB32
-            :"=m"(*d)
-            :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
+            ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
             :"memory");
         d += 16;
         s += 4;
@@ -1017,10 +1010,10 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
     mm_end = end - 3;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movq          %1, %%mm0    \n\t"
-            "movq          %1, %%mm1    \n\t"
-            "movq          %1, %%mm2    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movq        (%1), %%mm0    \n\t"
+            "movq        (%1), %%mm1    \n\t"
+            "movq        (%1), %%mm2    \n\t"
             "pand          %2, %%mm0    \n\t"
             "pand          %3, %%mm1    \n\t"
             "pand          %4, %%mm2    \n\t"
@@ -1028,8 +1021,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
             "psrlq         $3, %%mm1    \n\t"
             "psrlq         $8, %%mm2    \n\t"
             PACK_RGB32
-            :"=m"(*d)
-            :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
+            ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
             :"memory");
         d += 16;
         s += 4;
@@ -1957,8 +1949,8 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                                        int srcStride1, int srcStride2,
                                        int dstStride1, int dstStride2)
 {
-    x86_reg y;
-    int x,w,h;
+    x86_reg x, y;
+    int w,h;
     w=width/2; h=height/2;
     __asm__ volatile(
         PREFETCH" %0    \n\t"
@@ -1970,11 +1962,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
         x=0;
         for (;x<w-31;x+=32) {
             __asm__ volatile(
-                PREFETCH"   32%1        \n\t"
-                "movq         %1, %%mm0 \n\t"
-                "movq        8%1, %%mm2 \n\t"
-                "movq       16%1, %%mm4 \n\t"
-                "movq       24%1, %%mm6 \n\t"
+                PREFETCH"   32(%1,%2)        \n\t"
+                "movq         (%1,%2), %%mm0 \n\t"
+                "movq        8(%1,%2), %%mm2 \n\t"
+                "movq       16(%1,%2), %%mm4 \n\t"
+                "movq       24(%1,%2), %%mm6 \n\t"
                 "movq      %%mm0, %%mm1 \n\t"
                 "movq      %%mm2, %%mm3 \n\t"
                 "movq      %%mm4, %%mm5 \n\t"
@@ -1987,16 +1979,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                 "punpckhbw %%mm5, %%mm5 \n\t"
                 "punpcklbw %%mm6, %%mm6 \n\t"
                 "punpckhbw %%mm7, %%mm7 \n\t"
-                MOVNTQ"    %%mm0,   %0  \n\t"
-                MOVNTQ"    %%mm1,  8%0  \n\t"
-                MOVNTQ"    %%mm2, 16%0  \n\t"
-                MOVNTQ"    %%mm3, 24%0  \n\t"
-                MOVNTQ"    %%mm4, 32%0  \n\t"
-                MOVNTQ"    %%mm5, 40%0  \n\t"
-                MOVNTQ"    %%mm6, 48%0  \n\t"
-                MOVNTQ"    %%mm7, 56%0"
-                :"=m"(d[2*x])
-                :"m"(s1[x])
+                MOVNTQ"    %%mm0,   (%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm1,  8(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm2, 16(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm3, 24(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm4, 32(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm5, 40(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm6, 48(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm7, 56(%0,%2,2)"
+                :: "r"(d), "r"(s1), "r"(x)
                 :"memory");
         }
         for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
@@ -2007,11 +1998,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
         x=0;
         for (;x<w-31;x+=32) {
             __asm__ volatile(
-                PREFETCH"   32%1        \n\t"
-                "movq         %1, %%mm0 \n\t"
-                "movq        8%1, %%mm2 \n\t"
-                "movq       16%1, %%mm4 \n\t"
-                "movq       24%1, %%mm6 \n\t"
+                PREFETCH"   32(%1,%2)        \n\t"
+                "movq         (%1,%2), %%mm0 \n\t"
+                "movq        8(%1,%2), %%mm2 \n\t"
+                "movq       16(%1,%2), %%mm4 \n\t"
+                "movq       24(%1,%2), %%mm6 \n\t"
                 "movq      %%mm0, %%mm1 \n\t"
                 "movq      %%mm2, %%mm3 \n\t"
                 "movq      %%mm4, %%mm5 \n\t"
@@ -2024,16 +2015,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                 "punpckhbw %%mm5, %%mm5 \n\t"
                 "punpcklbw %%mm6, %%mm6 \n\t"
                 "punpckhbw %%mm7, %%mm7 \n\t"
-                MOVNTQ"    %%mm0,   %0  \n\t"
-                MOVNTQ"    %%mm1,  8%0  \n\t"
-                MOVNTQ"    %%mm2, 16%0  \n\t"
-                MOVNTQ"    %%mm3, 24%0  \n\t"
-                MOVNTQ"    %%mm4, 32%0  \n\t"
-                MOVNTQ"    %%mm5, 40%0  \n\t"
-                MOVNTQ"    %%mm6, 48%0  \n\t"
-                MOVNTQ"    %%mm7, 56%0"
-                :"=m"(d[2*x])
-                :"m"(s2[x])
+                MOVNTQ"    %%mm0,   (%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm1,  8(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm2, 16(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm3, 24(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm4, 32(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm5, 40(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm6, 48(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm7, 56(%0,%2,2)"
+                :: "r"(d), "r"(s2), "r"(x)
                 :"memory");
         }
         for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
-- 
cgit v1.2.3


From d752509b7437beefac079021fc5ecbd4e27df6b5 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 5 Aug 2012 22:22:10 +0100
Subject: Use log2(x) instead of log(x) / log(2)

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 avconv.c             | 2 +-
 avprobe.c            | 2 +-
 libavcodec/imc.c     | 4 ++--
 libavcodec/snowenc.c | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/avconv.c b/avconv.c
index 2bc9d82378..83ff8b45e1 100644
--- a/avconv.c
+++ b/avconv.c
@@ -820,7 +820,7 @@ static void print_report(int is_last_report, int64_t timer_start)
                 if (qp >= 0 && qp < FF_ARRAY_ELEMS(qp_histogram))
                     qp_histogram[qp]++;
                 for (j = 0; j < 32; j++)
-                    snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%X", (int)lrintf(log(qp_histogram[j] + 1) / log(2)));
+                    snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%X", (int)lrintf(log2(qp_histogram[j] + 1)));
             }
             if (enc->flags&CODEC_FLAG_PSNR) {
                 int j;
diff --git a/avprobe.c b/avprobe.c
index 28746bf18e..8107e54531 100644
--- a/avprobe.c
+++ b/avprobe.c
@@ -468,7 +468,7 @@ static char *value_string(char *buf, int buf_size, double val, const char *unit)
         int index;
 
         if (unit == unit_byte_str && use_byte_value_binary_prefix) {
-            index = (int) (log(val)/log(2)) / 10;
+            index = (int) log2(val) / 10;
             index = av_clip(index, 0, FF_ARRAY_ELEMS(binary_unit_prefixes) - 1);
             val  /= pow(2, index * 10);
             prefix_string = binary_unit_prefixes[index];
diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index 95495bff96..e452baf39b 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -342,7 +342,7 @@ static void imc_decode_level_coefficients(IMCContext *q, int *levlCoeffBuf,
     // maybe some frequency division thingy
 
     flcoeffs1[0] = 20000.0 / pow (2, levlCoeffBuf[0] * 0.18945); // 0.18945 = log2(10) * 0.05703125
-    flcoeffs2[0] = log(flcoeffs1[0]) / log(2);
+    flcoeffs2[0] = log2f(flcoeffs1[0]);
     tmp  = flcoeffs1[0];
     tmp2 = flcoeffs2[0];
 
@@ -414,7 +414,7 @@ static int bit_allocation(IMCContext *q, IMCChannel *chctx,
         highest = FFMAX(highest, chctx->flcoeffs1[i]);
 
     for (i = 0; i < BANDS - 1; i++)
-        chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log(chctx->flcoeffs5[i]) / log(2);
+        chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log2f(chctx->flcoeffs5[i]);
     chctx->flcoeffs4[BANDS - 1] = limit;
 
     highest = highest * 0.25;
diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c
index 2eaf923540..481dc1a503 100644
--- a/libavcodec/snowenc.c
+++ b/libavcodec/snowenc.c
@@ -1529,7 +1529,7 @@ static void update_last_header_values(SnowContext *s){
 }
 
 static int qscale2qlog(int qscale){
-    return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
+    return rint(QROOT*log2(qscale / (float)FF_QP2LAMBDA))
            + 61*QROOT/8; ///< 64 > 60
 }
 
-- 
cgit v1.2.3


From fe5fba44c5b2600633a691ea0dc817e3c4940aa6 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 13 Aug 2012 19:01:19 +0200
Subject: configure: Fix typo in mpeg2video/svq1 decoder dependency declaration

---
 configure | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index a284b771a2..bc60b8b8b0 100755
--- a/configure
+++ b/configure
@@ -1412,7 +1412,7 @@ mpeg1video_encoder_select="aandcttables mpegvideo"
 mpeg2_dxva2_hwaccel_deps="dxva2api_h"
 mpeg2_dxva2_hwaccel_select="dxva2 mpeg2video_decoder"
 mpeg2_vaapi_hwaccel_select="vaapi mpeg2video_decoder"
-mpeg2video_encoder_select="mpegvideo"
+mpeg2video_decoder_select="mpegvideo"
 mpeg2video_encoder_select="aandcttables mpegvideo"
 mpeg4_decoder_select="h263_decoder mpeg4video_parser"
 mpeg4_encoder_select="h263_encoder"
@@ -1442,7 +1442,7 @@ shorten_decoder_select="golomb"
 sipr_decoder_select="lsp"
 snow_decoder_select="dwt"
 snow_encoder_select="aandcttables dwt mpegvideo"
-svq1_encoder_select="mpegvideo"
+svq1_decoder_select="mpegvideo"
 svq1_encoder_select="aandcttables mpegvideo"
 svq3_decoder_select="golomb h264chroma h264dsp h264pred h264qpel mpegvideo"
 svq3_decoder_suggest="zlib"
-- 
cgit v1.2.3


From fb96c1c5fe6b0131a7229e8b89222192465bf298 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Mon, 13 Aug 2012 19:46:04 +0200
Subject: rtmp: handle bytes read reports

0x03 (bytes read report) is a known type and should be safely ignored
beside in debug situations.
---
 libavformat/rtmpproto.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 5fb5ac4eef..1db3152e8d 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -1200,6 +1200,9 @@ static int rtmp_parse_result(URLContext *s, RTMPContext *rt, RTMPPacket *pkt)
 #endif
 
     switch (pkt->type) {
+    case RTMP_PT_BYTES_READ:
+        av_dlog(s, "received bytes read report\n");
+        break;
     case RTMP_PT_CHUNK_SIZE:
         if ((ret = handle_chunk_size(s, pkt)) < 0)
             return ret;
-- 
cgit v1.2.3


From cee03436e6f1e3d4893841698e73caa92f2a53c9 Mon Sep 17 00:00:00 2001
From: Boris Maksalov <boris.maksalov@yandex.ru>
Date: Fri, 10 Aug 2012 09:50:35 +0100
Subject: proresenc: use the edge emulation buffer

Prevents reading past the end of frame buffer.

Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
---
 libavcodec/proresenc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavcodec/proresenc.c b/libavcodec/proresenc.c
index 374ff45c70..40644bb3b1 100644
--- a/libavcodec/proresenc.c
+++ b/libavcodec/proresenc.c
@@ -251,24 +251,24 @@ static void get_slice_data(ProresContext *ctx, const uint16_t *src,
             ctx->dsp.fdct(esrc, elinesize, blocks);
             blocks += 64;
             if (blocks_per_mb > 2) {
-                ctx->dsp.fdct(src + 8, linesize, blocks);
+                ctx->dsp.fdct(esrc + 8, elinesize, blocks);
                 blocks += 64;
             }
-            ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
+            ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
             blocks += 64;
             if (blocks_per_mb > 2) {
-                ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
+                ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
                 blocks += 64;
             }
         } else {
             ctx->dsp.fdct(esrc, elinesize, blocks);
             blocks += 64;
-            ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
+            ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
             blocks += 64;
             if (blocks_per_mb > 2) {
-                ctx->dsp.fdct(src + 8, linesize, blocks);
+                ctx->dsp.fdct(esrc + 8, elinesize, blocks);
                 blocks += 64;
-                ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
+                ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
                 blocks += 64;
             }
         }
-- 
cgit v1.2.3


From 0af85d57c1e6207abd99ed8da61c6b7529ebfefc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Tue, 14 Aug 2012 01:29:15 +0300
Subject: avprobe: Include libm.h for the log2 fallback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 avprobe.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/avprobe.c b/avprobe.c
index 8107e54531..4e68313201 100644
--- a/avprobe.c
+++ b/avprobe.c
@@ -26,6 +26,7 @@
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/dict.h"
+#include "libavutil/libm.h"
 #include "libavdevice/avdevice.h"
 #include "cmdutils.h"
 
-- 
cgit v1.2.3


From ad08dfd594187d2de40b89d3b15cb8c43751532f Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 13 Aug 2012 13:00:24 +0200
Subject: build: Factor out mpegvideo encoding dependencies to
 CONFIG_MPEGVIDEOENC

A new hidden config variable is added for the codecs that depend on
the mpegvideo encoding parts.
---
 configure           | 20 +++++++++++---------
 libavcodec/Makefile | 44 ++++++++++++++------------------------------
 2 files changed, 25 insertions(+), 39 deletions(-)

diff --git a/configure b/configure
index bc60b8b8b0..5ffb9a4ccb 100755
--- a/configure
+++ b/configure
@@ -1226,6 +1226,7 @@ CONFIG_EXTRA="
     lpc
     mpegaudiodsp
     mpegvideo
+    mpegvideoenc
     nettle
     rtpdec
     sinewin
@@ -1334,6 +1335,7 @@ dct_select="rdft"
 mdct_select="fft"
 rdft_select="fft"
 mpegaudiodsp_select="dct"
+mpegvideoenc_select="mpegvideo"
 
 # decoders / encoders / hardware accelerators
 aac_decoder_select="mdct sinewin"
@@ -1353,7 +1355,7 @@ cavs_decoder_select="golomb mpegvideo"
 cook_decoder_select="mdct sinewin"
 cscd_decoder_suggest="zlib"
 dca_decoder_select="mdct"
-dnxhd_encoder_select="aandcttables mpegvideo"
+dnxhd_encoder_select="aandcttables mpegvideoenc"
 dxa_decoder_select="zlib"
 eac3_decoder_select="ac3_decoder"
 eac3_encoder_select="mdct ac3dsp"
@@ -1370,9 +1372,9 @@ flv_decoder_select="h263_decoder"
 flv_encoder_select="h263_encoder"
 fraps_decoder_select="huffman"
 h261_decoder_select="mpegvideo"
-h261_encoder_select="aandcttables mpegvideo"
+h261_encoder_select="aandcttables mpegvideoenc"
 h263_decoder_select="h263_parser mpegvideo"
-h263_encoder_select="aandcttables mpegvideo"
+h263_encoder_select="aandcttables mpegvideoenc"
 h263_vaapi_hwaccel_select="vaapi h263_decoder"
 h263i_decoder_select="h263_decoder"
 h263p_encoder_select="h263_encoder"
@@ -1386,10 +1388,10 @@ iac_decoder_select="fft mdct sinewin"
 imc_decoder_select="fft mdct sinewin"
 jpegls_decoder_select="golomb"
 jpegls_encoder_select="golomb"
-ljpeg_encoder_select="aandcttables mpegvideo"
+ljpeg_encoder_select="aandcttables mpegvideoenc"
 loco_decoder_select="golomb"
 mdec_decoder_select="mpegvideo"
-mjpeg_encoder_select="aandcttables mpegvideo"
+mjpeg_encoder_select="aandcttables mpegvideoenc"
 mlp_decoder_select="mlp_parser"
 mp1_decoder_select="mpegaudiodsp"
 mp1float_decoder_select="mpegaudiodsp"
@@ -1408,12 +1410,12 @@ mpeg_xvmc_decoder_deps="X11_extensions_XvMClib_h"
 mpeg_xvmc_decoder_select="mpegvideo_decoder"
 mpeg1_vdpau_decoder_select="vdpau mpeg1video_decoder"
 mpeg1video_decoder_select="mpegvideo"
-mpeg1video_encoder_select="aandcttables mpegvideo"
+mpeg1video_encoder_select="aandcttables mpegvideoenc"
 mpeg2_dxva2_hwaccel_deps="dxva2api_h"
 mpeg2_dxva2_hwaccel_select="dxva2 mpeg2video_decoder"
 mpeg2_vaapi_hwaccel_select="vaapi mpeg2video_decoder"
 mpeg2video_decoder_select="mpegvideo"
-mpeg2video_encoder_select="aandcttables mpegvideo"
+mpeg2video_encoder_select="aandcttables mpegvideoenc"
 mpeg4_decoder_select="h263_decoder mpeg4video_parser"
 mpeg4_encoder_select="h263_encoder"
 mpeg4_vaapi_hwaccel_select="vaapi mpeg4_decoder"
@@ -1441,9 +1443,9 @@ rv40_decoder_select="golomb h264chroma h264pred h264qpel mpegvideo"
 shorten_decoder_select="golomb"
 sipr_decoder_select="lsp"
 snow_decoder_select="dwt"
-snow_encoder_select="aandcttables dwt mpegvideo"
+snow_encoder_select="aandcttables dwt mpegvideoenc"
 svq1_decoder_select="mpegvideo"
-svq1_encoder_select="aandcttables mpegvideo"
+svq1_encoder_select="aandcttables mpegvideoenc"
 svq3_decoder_select="golomb h264chroma h264dsp h264pred h264qpel mpegvideo"
 svq3_decoder_suggest="zlib"
 theora_decoder_select="vp3_decoder"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index f2e4d710e7..2e04d10e9f 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -52,6 +52,8 @@ OBJS-$(CONFIG_MPEGAUDIODSP)            += mpegaudiodsp.o                \
                                           mpegaudiodsp_fixed.o          \
                                           mpegaudiodsp_float.o
 OBJS-$(CONFIG_MPEGVIDEO)               += mpegvideo.o mpegvideo_motion.o
+OBJS-$(CONFIG_MPEGVIDEOENC)            += mpegvideo_enc.o mpeg12data.o  \
+                                          motion_est.o ratecontrol.o
 RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
 OBJS-$(CONFIG_RDFT)                    += rdft.o $(RDFT-OBJS-yes)
 OBJS-$(CONFIG_SINEWIN)                 += sinewin.o
@@ -127,9 +129,7 @@ OBJS-$(CONFIG_DCA_DECODER)             += dcadec.o dca.o dcadsp.o      \
                                           dca_parser.o synth_filter.o
 OBJS-$(CONFIG_DFA_DECODER)             += dfa.o
 OBJS-$(CONFIG_DNXHD_DECODER)           += dnxhddec.o dnxhddata.o
-OBJS-$(CONFIG_DNXHD_ENCODER)           += dnxhdenc.o dnxhddata.o       \
-                                          mpegvideo_enc.o motion_est.o \
-                                          ratecontrol.o mpeg12data.o
+OBJS-$(CONFIG_DNXHD_ENCODER)           += dnxhdenc.o dnxhddata.o
 OBJS-$(CONFIG_DPX_DECODER)             += dpx.o
 OBJS-$(CONFIG_DPX_ENCODER)             += dpxenc.o
 OBJS-$(CONFIG_DSICINAUDIO_DECODER)     += dsicinav.o
@@ -176,17 +176,13 @@ OBJS-$(CONFIG_GIF_ENCODER)             += gif.o lzwenc.o
 OBJS-$(CONFIG_GSM_DECODER)             += gsmdec.o gsmdec_data.o msgsmdec.o
 OBJS-$(CONFIG_GSM_MS_DECODER)          += gsmdec.o gsmdec_data.o msgsmdec.o
 OBJS-$(CONFIG_H261_DECODER)            += h261dec.o h261.o error_resilience.o
-OBJS-$(CONFIG_H261_ENCODER)            += h261enc.o h261.o             \
-                                          mpegvideo_enc.o motion_est.o \
-                                          ratecontrol.o mpeg12data.o
+OBJS-$(CONFIG_H261_ENCODER)            += h261enc.o h261.o
 OBJS-$(CONFIG_H263_DECODER)            += h263dec.o h263.o ituh263dec.o        \
                                           mpeg4video.o mpeg4videodec.o flvdec.o\
                                           intelh263dec.o  error_resilience.o
 OBJS-$(CONFIG_H263_VAAPI_HWACCEL)      += vaapi_mpeg4.o
-OBJS-$(CONFIG_H263_ENCODER)            += mpegvideo_enc.o mpeg4video.o      \
-                                          mpeg4videoenc.o motion_est.o      \
-                                          ratecontrol.o h263.o ituh263enc.o \
-                                          flvenc.o mpeg12data.o             \
+OBJS-$(CONFIG_H263_ENCODER)            += mpeg4videoenc.o mpeg4video.o  \
+                                          h263.o ituh263enc.o flvenc.o  \
                                           error_resilience.o
 OBJS-$(CONFIG_H264_DECODER)            += h264.o                               \
                                           h264_loopfilter.o h264_direct.o      \
@@ -216,9 +212,7 @@ OBJS-$(CONFIG_JV_DECODER)              += jvdec.o
 OBJS-$(CONFIG_KGV1_DECODER)            += kgv1dec.o
 OBJS-$(CONFIG_KMVC_DECODER)            += kmvc.o
 OBJS-$(CONFIG_LAGARITH_DECODER)        += lagarith.o lagarithrac.o
-OBJS-$(CONFIG_LJPEG_ENCODER)           += ljpegenc.o mjpegenc.o mjpeg.o \
-                                          mpegvideo_enc.o motion_est.o  \
-                                          ratecontrol.o mpeg12data.o
+OBJS-$(CONFIG_LJPEG_ENCODER)           += ljpegenc.o mjpegenc.o mjpeg.o
 OBJS-$(CONFIG_LOCO_DECODER)            += loco.o
 OBJS-$(CONFIG_MACE3_DECODER)           += mace.o
 OBJS-$(CONFIG_MACE6_DECODER)           += mace.o
@@ -226,9 +220,7 @@ OBJS-$(CONFIG_MDEC_DECODER)            += mdec.o mpeg12.o mpeg12data.o \
                                           error_resilience.o
 OBJS-$(CONFIG_MIMIC_DECODER)           += mimic.o
 OBJS-$(CONFIG_MJPEG_DECODER)           += mjpegdec.o mjpeg.o
-OBJS-$(CONFIG_MJPEG_ENCODER)           += mjpegenc.o mjpeg.o           \
-                                          mpegvideo_enc.o motion_est.o \
-                                          ratecontrol.o mpeg12data.o
+OBJS-$(CONFIG_MJPEG_ENCODER)           += mjpegenc.o mjpeg.o
 OBJS-$(CONFIG_MJPEGB_DECODER)          += mjpegbdec.o mjpegdec.o mjpeg.o
 OBJS-$(CONFIG_MLP_DECODER)             += mlpdec.o mlpdsp.o
 OBJS-$(CONFIG_MMVIDEO_DECODER)         += mmvideo.o
@@ -266,17 +258,13 @@ OBJS-$(CONFIG_MPC8_DECODER)            += mpc8.o mpc.o mpegaudiodec.o      \
 OBJS-$(CONFIG_MPEG_XVMC_DECODER)       += mpegvideo_xvmc.o
 OBJS-$(CONFIG_MPEG1VIDEO_DECODER)      += mpeg12.o mpeg12data.o \
                                           error_resilience.o
-OBJS-$(CONFIG_MPEG1VIDEO_ENCODER)      += mpeg12enc.o mpegvideo_enc.o \
-                                          motion_est.o ratecontrol.o  \
-                                          mpeg12.o mpeg12data.o       \
+OBJS-$(CONFIG_MPEG1VIDEO_ENCODER)      += mpeg12enc.o mpeg12.o          \
                                           error_resilience.o
 OBJS-$(CONFIG_MPEG2_DXVA2_HWACCEL)     += dxva2_mpeg2.o
 OBJS-$(CONFIG_MPEG2_VAAPI_HWACCEL)     += vaapi_mpeg2.o
 OBJS-$(CONFIG_MPEG2VIDEO_DECODER)      += mpeg12.o mpeg12data.o \
                                           error_resilience.o
-OBJS-$(CONFIG_MPEG2VIDEO_ENCODER)      += mpeg12enc.o mpegvideo_enc.o \
-                                          motion_est.o ratecontrol.o  \
-                                          mpeg12.o mpeg12data.o       \
+OBJS-$(CONFIG_MPEG2VIDEO_ENCODER)      += mpeg12enc.o mpeg12.o          \
                                           error_resilience.o
 OBJS-$(CONFIG_MPEG4_VAAPI_HWACCEL)     += vaapi_mpeg4.o
 OBJS-$(CONFIG_MSMPEG4V1_DECODER)       += msmpeg4.o msmpeg4data.o
@@ -366,10 +354,8 @@ OBJS-$(CONFIG_SMACKER_DECODER)         += smacker.o
 OBJS-$(CONFIG_SMC_DECODER)             += smc.o
 OBJS-$(CONFIG_SNOW_DECODER)            += snowdec.o snow.o rangecoder.o
 OBJS-$(CONFIG_SNOW_ENCODER)            += snowenc.o snow.o rangecoder.o    \
-                                          motion_est.o ratecontrol.o       \
-                                          h263.o                           \
-                                          error_resilience.o ituh263enc.o  \
-                                          mpegvideo_enc.o mpeg12data.o
+                                          h263.o ituh263enc.o          \
+                                          error_resilience.o
 OBJS-$(CONFIG_SOL_DPCM_DECODER)        += dpcm.o
 OBJS-$(CONFIG_SP5X_DECODER)            += sp5xdec.o mjpegdec.o mjpeg.o
 OBJS-$(CONFIG_SRT_DECODER)             += srtdec.o ass.o
@@ -378,10 +364,8 @@ OBJS-$(CONFIG_SUNRAST_ENCODER)         += sunrastenc.o
 OBJS-$(CONFIG_SVQ1_DECODER)            += svq1dec.o svq1.o h263.o \
                                           error_resilience.o
 OBJS-$(CONFIG_SVQ1_ENCODER)            += svq1enc.o svq1.o    \
-                                          motion_est.o h263.o \
-                                          error_resilience.o \
-                                          ituh263enc.o mpegvideo_enc.o   \
-                                          ratecontrol.o mpeg12data.o
+                                          h263.o ituh263enc.o \
+                                          error_resilience.o
 OBJS-$(CONFIG_SVQ3_DECODER)            += h264.o svq3.o                       \
                                           h264_loopfilter.o h264_direct.o     \
                                           h264_sei.o h264_ps.o h264_refs.o    \
-- 
cgit v1.2.3


From cfc680ab3982f5c9e4240a9bb69859a49f420113 Mon Sep 17 00:00:00 2001
From: Sebastien Zwickert <dilaroga@free.fr>
Date: Mon, 13 Aug 2012 20:17:45 +0200
Subject: vda: Reuse the bitstream buffer and reallocate it only if needed

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/vda.c      |  2 ++
 libavcodec/vda.h      | 15 +++++++++++++++
 libavcodec/vda_h264.c | 37 +++++++++++--------------------------
 3 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/libavcodec/vda.c b/libavcodec/vda.c
index 3c03dcd3e0..d962c61a3b 100644
--- a/libavcodec/vda.c
+++ b/libavcodec/vda.c
@@ -226,6 +226,8 @@ int ff_vda_destroy_decoder(struct vda_context *vda_ctx)
 
     pthread_mutex_destroy(&vda_ctx->queue_mutex);
 
+    av_freep(&vda_ctx->priv_bitstream);
+
     if (kVDADecoderNoErr != status)
         return status;
 
diff --git a/libavcodec/vda.h b/libavcodec/vda.h
index 79fbfe86ac..ec7d2f75b6 100644
--- a/libavcodec/vda.h
+++ b/libavcodec/vda.h
@@ -138,6 +138,21 @@ struct vda_context {
     * - decoding: Set/Unset by user.
     */
     OSType              cv_pix_fmt_type;
+
+    /**
+    * The current bitstream buffer.
+    */
+    uint8_t             *priv_bitstream;
+
+    /**
+    * The current size of the bitstream.
+    */
+    int                 priv_bitstream_size;
+
+    /**
+    * The reference size used for fast reallocation.
+    */
+    int                 priv_allocated_size;
 };
 
 /** Create the video decoder. */
diff --git a/libavcodec/vda_h264.c b/libavcodec/vda_h264.c
index eb7e5c7dc3..a3f8c117c8 100644
--- a/libavcodec/vda_h264.c
+++ b/libavcodec/vda_h264.c
@@ -21,29 +21,18 @@
  */
 
 #include "h264.h"
-#include "h264data.h"
-
 #include "vda_internal.h"
 
-/* This structure is used to store the bitstream of the current frame. */
-struct vda_picture_context {
-    uint8_t *bitstream;
-    int      bitstream_size;
-};
-
 static int start_frame(AVCodecContext *avctx,
                        av_unused const uint8_t *buffer,
                        av_unused uint32_t size)
 {
-    const H264Context *h                = avctx->priv_data;
     struct vda_context *vda_ctx         = avctx->hwaccel_context;
-    struct vda_picture_context *pic_ctx = h->s.current_picture_ptr->f.hwaccel_picture_private;
 
     if (!vda_ctx->decoder)
         return -1;
 
-    pic_ctx->bitstream      = NULL;
-    pic_ctx->bitstream_size = 0;
+    vda_ctx->priv_bitstream_size = 0;
 
     return 0;
 }
@@ -52,24 +41,24 @@ static int decode_slice(AVCodecContext *avctx,
                         const uint8_t *buffer,
                         uint32_t size)
 {
-    H264Context *h                      = avctx->priv_data;
     struct vda_context *vda_ctx         = avctx->hwaccel_context;
-    struct vda_picture_context *pic_ctx = h->s.current_picture_ptr->f.hwaccel_picture_private;
     void *tmp;
 
     if (!vda_ctx->decoder)
         return -1;
 
-    tmp = av_realloc(pic_ctx->bitstream, pic_ctx->bitstream_size+size+4);
+    tmp = av_fast_realloc(vda_ctx->priv_bitstream,
+                          &vda_ctx->priv_allocated_size,
+                          vda_ctx->priv_bitstream_size + size + 4);
     if (!tmp)
         return AVERROR(ENOMEM);
 
-    pic_ctx->bitstream = tmp;
+    vda_ctx->priv_bitstream = tmp;
 
-    AV_WB32(pic_ctx->bitstream + pic_ctx->bitstream_size, size);
-    memcpy(pic_ctx->bitstream + pic_ctx->bitstream_size + 4, buffer, size);
+    AV_WB32(vda_ctx->priv_bitstream + vda_ctx->priv_bitstream_size, size);
+    memcpy(vda_ctx->priv_bitstream + vda_ctx->priv_bitstream_size + 4, buffer, size);
 
-    pic_ctx->bitstream_size += size + 4;
+    vda_ctx->priv_bitstream_size += size + 4;
 
     return 0;
 }
@@ -78,22 +67,19 @@ static int end_frame(AVCodecContext *avctx)
 {
     H264Context *h                      = avctx->priv_data;
     struct vda_context *vda_ctx         = avctx->hwaccel_context;
-    struct vda_picture_context *pic_ctx = h->s.current_picture_ptr->f.hwaccel_picture_private;
     AVFrame *frame                      = &h->s.current_picture_ptr->f;
     int status;
 
-    if (!vda_ctx->decoder || !pic_ctx->bitstream)
+    if (!vda_ctx->decoder || !vda_ctx->priv_bitstream)
         return -1;
 
-    status = ff_vda_decoder_decode(vda_ctx, pic_ctx->bitstream,
-                                   pic_ctx->bitstream_size,
+    status = ff_vda_decoder_decode(vda_ctx, vda_ctx->priv_bitstream,
+                                   vda_ctx->priv_bitstream_size,
                                    frame->reordered_opaque);
 
     if (status)
         av_log(avctx, AV_LOG_ERROR, "Failed to decode frame (%d)\n", status);
 
-    av_freep(&pic_ctx->bitstream);
-
     return status;
 }
 
@@ -105,5 +91,4 @@ AVHWAccel ff_h264_vda_hwaccel = {
     .start_frame    = start_frame,
     .decode_slice   = decode_slice,
     .end_frame      = end_frame,
-    .priv_data_size = sizeof(struct vda_picture_context),
 };
-- 
cgit v1.2.3


From 3c37970637c0e3f1576cae58feed05497e383fc4 Mon Sep 17 00:00:00 2001
From: Sebastien Zwickert <dilaroga@gmail.com>
Date: Tue, 14 Aug 2012 11:45:29 +0200
Subject: vda: support synchronous decoding

Note that the symbols used to run the hardware decoder in asynchronous mode
have been marked deprecated and will be dropped at a future version bump.

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/vda.c          | 105 ++++++++++++++++++++++++++++------------------
 libavcodec/vda.h          |  47 +++++++++++++++++++--
 libavcodec/vda_h264.c     |  11 +++--
 libavcodec/vda_internal.h |   4 ++
 libavcodec/version.h      |   3 ++
 5 files changed, 123 insertions(+), 47 deletions(-)

diff --git a/libavcodec/vda.c b/libavcodec/vda.c
index d962c61a3b..3ef8af46dc 100644
--- a/libavcodec/vda.c
+++ b/libavcodec/vda.c
@@ -20,8 +20,6 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include <pthread.h>
-#include <CoreFoundation/CFDictionary.h>
 #include <CoreFoundation/CFNumber.h>
 #include <CoreFoundation/CFData.h>
 #include <CoreFoundation/CFString.h>
@@ -29,6 +27,9 @@
 #include "libavutil/avutil.h"
 #include "vda_internal.h"
 
+#if FF_API_VDA_ASYNC
+#include <CoreFoundation/CFDictionary.h>
+
 /* helper to create a dictionary according to the given pts */
 static CFDictionaryRef vda_dictionary_with_pts(int64_t i_pts)
 {
@@ -77,6 +78,7 @@ static void vda_clear_queue(struct vda_context *vda_ctx)
 
     pthread_mutex_unlock(&vda_ctx->queue_mutex);
 }
+#endif
 
 /* Decoder callback that adds the VDA frame to the queue in display order. */
 static void vda_decoder_callback(void *vda_hw_ctx,
@@ -86,8 +88,6 @@ static void vda_decoder_callback(void *vda_hw_ctx,
                                  CVImageBufferRef image_buffer)
 {
     struct vda_context *vda_ctx = vda_hw_ctx;
-    vda_frame *new_frame;
-    vda_frame *queue_walker;
 
     if (!image_buffer)
         return;
@@ -95,35 +95,42 @@ static void vda_decoder_callback(void *vda_hw_ctx,
     if (vda_ctx->cv_pix_fmt_type != CVPixelBufferGetPixelFormatType(image_buffer))
         return;
 
-    if (!(new_frame = av_mallocz(sizeof(vda_frame))))
-        return;
-    new_frame->next_frame = NULL;
-    new_frame->cv_buffer  = CVPixelBufferRetain(image_buffer);
-    new_frame->pts        = vda_pts_from_dictionary(user_info);
-
-    pthread_mutex_lock(&vda_ctx->queue_mutex);
-
-    queue_walker = vda_ctx->queue;
-
-    if (!queue_walker || new_frame->pts < queue_walker->pts) {
-        /* we have an empty queue, or this frame earlier than the current queue head */
-        new_frame->next_frame = queue_walker;
-        vda_ctx->queue        = new_frame;
+    if (vda_ctx->use_sync_decoding) {
+        vda_ctx->cv_buffer = CVPixelBufferRetain(image_buffer);
     } else {
-        /* walk the queue and insert this frame where it belongs in display order */
-        vda_frame *next_frame;
-        while (1) {
-            next_frame = queue_walker->next_frame;
-            if (!next_frame || new_frame->pts < next_frame->pts) {
-                new_frame->next_frame    = next_frame;
-                queue_walker->next_frame = new_frame;
-                break;
+        vda_frame *new_frame;
+        vda_frame *queue_walker;
+
+        if (!(new_frame = av_mallocz(sizeof(vda_frame))))
+            return;
+        new_frame->next_frame = NULL;
+        new_frame->cv_buffer  = CVPixelBufferRetain(image_buffer);
+        new_frame->pts        = vda_pts_from_dictionary(user_info);
+
+        pthread_mutex_lock(&vda_ctx->queue_mutex);
+
+        queue_walker = vda_ctx->queue;
+
+        if (!queue_walker || new_frame->pts < queue_walker->pts) {
+            /* we have an empty queue, or this frame earlier than the current queue head */
+            new_frame->next_frame = queue_walker;
+            vda_ctx->queue        = new_frame;
+        } else {
+            /* walk the queue and insert this frame where it belongs in display order */
+            vda_frame *next_frame;
+            while (1) {
+                next_frame = queue_walker->next_frame;
+                if (!next_frame || new_frame->pts < next_frame->pts) {
+                    new_frame->next_frame    = next_frame;
+                    queue_walker->next_frame = new_frame;
+                    break;
+                }
+                queue_walker = next_frame;
             }
-            queue_walker = next_frame;
         }
-    }
 
-    pthread_mutex_unlock(&vda_ctx->queue_mutex);
+        pthread_mutex_unlock(&vda_ctx->queue_mutex);
+    }
 }
 
 int ff_vda_create_decoder(struct vda_context *vda_ctx,
@@ -140,7 +147,9 @@ int ff_vda_create_decoder(struct vda_context *vda_ctx,
     CFMutableDictionaryRef io_surface_properties;
     CFNumberRef cv_pix_fmt;
 
+#if FF_API_VDA_ASYNC
     pthread_mutex_init(&vda_ctx->queue_mutex, NULL);
+#endif
 
     /* Each VCL NAL in the bistream sent to the decoder
      * is preceeded by a 4 bytes length header.
@@ -209,10 +218,7 @@ int ff_vda_create_decoder(struct vda_context *vda_ctx,
     CFRelease(cv_pix_fmt);
     CFRelease(buffer_attributes);
 
-    if (kVDADecoderNoErr != status)
-        return status;
-
-    return 0;
+    return status;
 }
 
 int ff_vda_destroy_decoder(struct vda_context *vda_ctx)
@@ -222,18 +228,17 @@ int ff_vda_destroy_decoder(struct vda_context *vda_ctx)
     if (vda_ctx->decoder)
         status = VDADecoderDestroy(vda_ctx->decoder);
 
+#if FF_API_VDA_ASYNC
     vda_clear_queue(vda_ctx);
-
     pthread_mutex_destroy(&vda_ctx->queue_mutex);
+#endif
 
     av_freep(&vda_ctx->priv_bitstream);
 
-    if (kVDADecoderNoErr != status)
-        return status;
-
-    return 0;
+    return status;
 }
 
+#if FF_API_VDA_ASYNC
 vda_frame *ff_vda_queue_pop(struct vda_context *vda_ctx)
 {
     vda_frame *top_frame;
@@ -273,8 +278,26 @@ int ff_vda_decoder_decode(struct vda_context *vda_ctx,
     CFRelease(user_info);
     CFRelease(coded_frame);
 
-    if (kVDADecoderNoErr != status)
-        return status;
+    return status;
+}
+#endif
+
+int ff_vda_sync_decode(struct vda_context *vda_ctx)
+{
+    OSStatus status;
+    CFDataRef coded_frame;
+    uint32_t flush_flags = 1 << 0; ///< kVDADecoderFlush_emitFrames
+
+    coded_frame = CFDataCreate(kCFAllocatorDefault,
+                               vda_ctx->priv_bitstream,
+                               vda_ctx->priv_bitstream_size);
+
+    status = VDADecoderDecode(vda_ctx->decoder, 0, coded_frame, NULL);
+
+    if (kVDADecoderNoErr == status)
+        status = VDADecoderFlush(vda_ctx->decoder, flush_flags);
+
+    CFRelease(coded_frame);
 
-    return 0;
+    return status;
 }
diff --git a/libavcodec/vda.h b/libavcodec/vda.h
index ec7d2f75b6..4daaa1463a 100644
--- a/libavcodec/vda.h
+++ b/libavcodec/vda.h
@@ -29,7 +29,12 @@
  * Public libavcodec VDA header.
  */
 
+#include "libavcodec/version.h"
+
+#if FF_API_VDA_ASYNC
 #include <pthread.h>
+#endif
+
 #include <stdint.h>
 
 // emmintrin.h is unable to compile with -std=c99 -Werror=missing-prototypes
@@ -47,8 +52,11 @@
  * @{
  */
 
+#if FF_API_VDA_ASYNC
 /**
- *  This structure is used to store a decoded frame information and data.
+ * This structure is used to store decoded frame information and data.
+ *
+ * @deprecated Use synchronous decoding mode.
  */
 typedef struct vda_frame {
     /**
@@ -75,6 +83,7 @@ typedef struct vda_frame {
     */
     struct vda_frame    *next_frame;
 } vda_frame;
+#endif
 
 /**
  * This structure is used to provide the necessary configurations and data
@@ -91,9 +100,28 @@ struct vda_context {
     */
     VDADecoder          decoder;
 
+    /**
+    * The Core Video pixel buffer that contains the current image data.
+    *
+    * encoding: unused
+    * decoding: Set by libavcodec. Unset by user.
+    */
+    CVPixelBufferRef    cv_buffer;
+
+    /**
+    * Use the hardware decoder in synchronous mode.
+    *
+    * encoding: unused
+    * decoding: Set by user.
+    */
+    int                 use_sync_decoding;
+
+#if FF_API_VDA_ASYNC
     /**
     * VDA frames queue ordered by presentation timestamp.
     *
+    * @deprecated Use synchronous decoding mode.
+    *
     * - encoding: unused
     * - decoding: Set/Unset by libavcodec.
     */
@@ -102,10 +130,13 @@ struct vda_context {
     /**
     * Mutex for locking queue operations.
     *
+    * @deprecated Use synchronous decoding mode.
+    *
     * - encoding: unused
     * - decoding: Set/Unset by libavcodec.
     */
     pthread_mutex_t     queue_mutex;
+#endif
 
     /**
     * The frame width.
@@ -163,11 +194,21 @@ int ff_vda_create_decoder(struct vda_context *vda_ctx,
 /** Destroy the video decoder. */
 int ff_vda_destroy_decoder(struct vda_context *vda_ctx);
 
-/** Return the top frame of the queue. */
+#if FF_API_VDA_ASYNC
+/**
+* Return the top frame of the queue.
+*
+* @deprecated Use synchronous decoding mode.
+*/
 vda_frame *ff_vda_queue_pop(struct vda_context *vda_ctx);
 
-/** Release the given frame. */
+/**
+* Release the given frame.
+*
+* @deprecated Use synchronous decoding mode.
+*/
 void ff_vda_release_vda_frame(vda_frame *frame);
+#endif
 
 /**
  * @}
diff --git a/libavcodec/vda_h264.c b/libavcodec/vda_h264.c
index a3f8c117c8..e621973432 100644
--- a/libavcodec/vda_h264.c
+++ b/libavcodec/vda_h264.c
@@ -73,9 +73,14 @@ static int end_frame(AVCodecContext *avctx)
     if (!vda_ctx->decoder || !vda_ctx->priv_bitstream)
         return -1;
 
-    status = ff_vda_decoder_decode(vda_ctx, vda_ctx->priv_bitstream,
-                                   vda_ctx->priv_bitstream_size,
-                                   frame->reordered_opaque);
+    if (vda_ctx->use_sync_decoding) {
+        status = ff_vda_sync_decode(vda_ctx);
+        frame->data[3] = (void*)vda_ctx->cv_buffer;
+    } else {
+        status = ff_vda_decoder_decode(vda_ctx, vda_ctx->priv_bitstream,
+                                       vda_ctx->priv_bitstream_size,
+                                       frame->reordered_opaque);
+    }
 
     if (status)
         av_log(avctx, AV_LOG_ERROR, "Failed to decode frame (%d)\n", status);
diff --git a/libavcodec/vda_internal.h b/libavcodec/vda_internal.h
index 364ebfb53e..f5d24c21d7 100644
--- a/libavcodec/vda_internal.h
+++ b/libavcodec/vda_internal.h
@@ -31,11 +31,15 @@
  * @{
  */
 
+#if FF_API_VDA_ASYNC
 /** Send frame data to the hardware decoder. */
 int ff_vda_decoder_decode(struct vda_context *vda_ctx,
                           uint8_t *bitstream,
                           int bitstream_size,
                           int64_t frame_pts);
+#endif
+
+int ff_vda_sync_decode(struct vda_context *vda_ctx);
 
 /* @} */
 
diff --git a/libavcodec/version.h b/libavcodec/version.h
index a57369a9c4..a5ff119806 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -79,5 +79,8 @@
 #ifndef FF_API_CODEC_ID
 #define FF_API_CODEC_ID          (LIBAVCODEC_VERSION_MAJOR < 55)
 #endif
+#ifndef FF_API_VDA_ASYNC
+#define FF_API_VDA_ASYNC         (LIBAVCODEC_VERSION_MAJOR < 55)
+#endif
 
 #endif /* AVCODEC_VERSION_H */
-- 
cgit v1.2.3


From dc87ac55ab81b7242228cbfba997f627bbeb7349 Mon Sep 17 00:00:00 2001
From: Sebastien Zwickert <dilaroga@gmail.com>
Date: Tue, 14 Aug 2012 10:42:52 +0200
Subject: vda: Merge implementation into one file

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/Makefile       |   3 +-
 libavcodec/vda.c          | 303 ----------------------------------------------
 libavcodec/vda_h264.c     | 289 ++++++++++++++++++++++++++++++++++++++++++-
 libavcodec/vda_internal.h |  46 -------
 4 files changed, 285 insertions(+), 356 deletions(-)
 delete mode 100644 libavcodec/vda.c
 delete mode 100644 libavcodec/vda_internal.h

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 2e04d10e9f..d69ef720e4 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -58,7 +58,6 @@ RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
 OBJS-$(CONFIG_RDFT)                    += rdft.o $(RDFT-OBJS-yes)
 OBJS-$(CONFIG_SINEWIN)                 += sinewin.o
 OBJS-$(CONFIG_VAAPI)                   += vaapi.o
-OBJS-$(CONFIG_VDA)                     += vda.o
 OBJS-$(CONFIG_VDPAU)                   += vdpau.o
 OBJS-$(CONFIG_VP3DSP)                  += vp3dsp.o
 
@@ -698,7 +697,7 @@ SKIPHEADERS-$(CONFIG_DXVA2)            += dxva2.h dxva2_internal.h
 SKIPHEADERS-$(CONFIG_LIBSCHROEDINGER)  += libschroedinger.h
 SKIPHEADERS-$(CONFIG_MPEG_XVMC_DECODER) += xvmc.h
 SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_internal.h
-SKIPHEADERS-$(CONFIG_VDA)              += vda.h vda_internal.h
+SKIPHEADERS-$(CONFIG_VDA)              += vda.h
 SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h
 SKIPHEADERS-$(HAVE_W32THREADS)         += w32pthreads.h
 
diff --git a/libavcodec/vda.c b/libavcodec/vda.c
deleted file mode 100644
index 3ef8af46dc..0000000000
--- a/libavcodec/vda.c
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * VDA hardware acceleration
- *
- * copyright (c) 2011 Sebastien Zwickert
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <CoreFoundation/CFNumber.h>
-#include <CoreFoundation/CFData.h>
-#include <CoreFoundation/CFString.h>
-
-#include "libavutil/avutil.h"
-#include "vda_internal.h"
-
-#if FF_API_VDA_ASYNC
-#include <CoreFoundation/CFDictionary.h>
-
-/* helper to create a dictionary according to the given pts */
-static CFDictionaryRef vda_dictionary_with_pts(int64_t i_pts)
-{
-    CFStringRef key           = CFSTR("FF_VDA_DECODER_PTS_KEY");
-    CFNumberRef value         = CFNumberCreate(kCFAllocatorDefault,
-                                               kCFNumberSInt64Type, &i_pts);
-    CFDictionaryRef user_info = CFDictionaryCreate(kCFAllocatorDefault,
-                                                   (const void **)&key,
-                                                   (const void **)&value,
-                                                   1,
-                                                   &kCFTypeDictionaryKeyCallBacks,
-                                                   &kCFTypeDictionaryValueCallBacks);
-    CFRelease(value);
-    return user_info;
-}
-
-/* helper to retrieve the pts from the given dictionary */
-static int64_t vda_pts_from_dictionary(CFDictionaryRef user_info)
-{
-    CFNumberRef pts;
-    int64_t outValue = 0;
-
-    if (!user_info)
-        return 0;
-
-    pts = CFDictionaryGetValue(user_info, CFSTR("FF_VDA_DECODER_PTS_KEY"));
-
-    if (pts)
-        CFNumberGetValue(pts, kCFNumberSInt64Type, &outValue);
-
-    return outValue;
-}
-
-/* Remove and release all frames from the queue. */
-static void vda_clear_queue(struct vda_context *vda_ctx)
-{
-    vda_frame *top_frame;
-
-    pthread_mutex_lock(&vda_ctx->queue_mutex);
-
-    while (vda_ctx->queue) {
-        top_frame      = vda_ctx->queue;
-        vda_ctx->queue = top_frame->next_frame;
-        ff_vda_release_vda_frame(top_frame);
-    }
-
-    pthread_mutex_unlock(&vda_ctx->queue_mutex);
-}
-#endif
-
-/* Decoder callback that adds the VDA frame to the queue in display order. */
-static void vda_decoder_callback(void *vda_hw_ctx,
-                                 CFDictionaryRef user_info,
-                                 OSStatus status,
-                                 uint32_t infoFlags,
-                                 CVImageBufferRef image_buffer)
-{
-    struct vda_context *vda_ctx = vda_hw_ctx;
-
-    if (!image_buffer)
-        return;
-
-    if (vda_ctx->cv_pix_fmt_type != CVPixelBufferGetPixelFormatType(image_buffer))
-        return;
-
-    if (vda_ctx->use_sync_decoding) {
-        vda_ctx->cv_buffer = CVPixelBufferRetain(image_buffer);
-    } else {
-        vda_frame *new_frame;
-        vda_frame *queue_walker;
-
-        if (!(new_frame = av_mallocz(sizeof(vda_frame))))
-            return;
-        new_frame->next_frame = NULL;
-        new_frame->cv_buffer  = CVPixelBufferRetain(image_buffer);
-        new_frame->pts        = vda_pts_from_dictionary(user_info);
-
-        pthread_mutex_lock(&vda_ctx->queue_mutex);
-
-        queue_walker = vda_ctx->queue;
-
-        if (!queue_walker || new_frame->pts < queue_walker->pts) {
-            /* we have an empty queue, or this frame earlier than the current queue head */
-            new_frame->next_frame = queue_walker;
-            vda_ctx->queue        = new_frame;
-        } else {
-            /* walk the queue and insert this frame where it belongs in display order */
-            vda_frame *next_frame;
-            while (1) {
-                next_frame = queue_walker->next_frame;
-                if (!next_frame || new_frame->pts < next_frame->pts) {
-                    new_frame->next_frame    = next_frame;
-                    queue_walker->next_frame = new_frame;
-                    break;
-                }
-                queue_walker = next_frame;
-            }
-        }
-
-        pthread_mutex_unlock(&vda_ctx->queue_mutex);
-    }
-}
-
-int ff_vda_create_decoder(struct vda_context *vda_ctx,
-                          uint8_t *extradata,
-                          int extradata_size)
-{
-    OSStatus status = kVDADecoderNoErr;
-    CFNumberRef height;
-    CFNumberRef width;
-    CFNumberRef format;
-    CFDataRef avc_data;
-    CFMutableDictionaryRef config_info;
-    CFMutableDictionaryRef buffer_attributes;
-    CFMutableDictionaryRef io_surface_properties;
-    CFNumberRef cv_pix_fmt;
-
-#if FF_API_VDA_ASYNC
-    pthread_mutex_init(&vda_ctx->queue_mutex, NULL);
-#endif
-
-    /* Each VCL NAL in the bistream sent to the decoder
-     * is preceeded by a 4 bytes length header.
-     * Change the avcC atom header if needed, to signal headers of 4 bytes. */
-    if (extradata_size >= 4 && (extradata[4] & 0x03) != 0x03) {
-        uint8_t *rw_extradata;
-
-        if (!(rw_extradata = av_malloc(extradata_size)))
-            return AVERROR(ENOMEM);
-
-        memcpy(rw_extradata, extradata, extradata_size);
-
-        rw_extradata[4] |= 0x03;
-
-        avc_data = CFDataCreate(kCFAllocatorDefault, rw_extradata, extradata_size);
-
-        av_freep(&rw_extradata);
-    } else {
-        avc_data = CFDataCreate(kCFAllocatorDefault, extradata, extradata_size);
-    }
-
-    config_info = CFDictionaryCreateMutable(kCFAllocatorDefault,
-                                            4,
-                                            &kCFTypeDictionaryKeyCallBacks,
-                                            &kCFTypeDictionaryValueCallBacks);
-
-    height   = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &vda_ctx->height);
-    width    = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &vda_ctx->width);
-    format   = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &vda_ctx->format);
-
-    CFDictionarySetValue(config_info, kVDADecoderConfiguration_Height, height);
-    CFDictionarySetValue(config_info, kVDADecoderConfiguration_Width, width);
-    CFDictionarySetValue(config_info, kVDADecoderConfiguration_SourceFormat, format);
-    CFDictionarySetValue(config_info, kVDADecoderConfiguration_avcCData, avc_data);
-
-    buffer_attributes = CFDictionaryCreateMutable(kCFAllocatorDefault,
-                                                  2,
-                                                  &kCFTypeDictionaryKeyCallBacks,
-                                                  &kCFTypeDictionaryValueCallBacks);
-    io_surface_properties = CFDictionaryCreateMutable(kCFAllocatorDefault,
-                                                      0,
-                                                      &kCFTypeDictionaryKeyCallBacks,
-                                                      &kCFTypeDictionaryValueCallBacks);
-    cv_pix_fmt      = CFNumberCreate(kCFAllocatorDefault,
-                                     kCFNumberSInt32Type,
-                                     &vda_ctx->cv_pix_fmt_type);
-    CFDictionarySetValue(buffer_attributes,
-                         kCVPixelBufferPixelFormatTypeKey,
-                         cv_pix_fmt);
-    CFDictionarySetValue(buffer_attributes,
-                         kCVPixelBufferIOSurfacePropertiesKey,
-                         io_surface_properties);
-
-    status = VDADecoderCreate(config_info,
-                              buffer_attributes,
-                              vda_decoder_callback,
-                              vda_ctx,
-                              &vda_ctx->decoder);
-
-    CFRelease(height);
-    CFRelease(width);
-    CFRelease(format);
-    CFRelease(avc_data);
-    CFRelease(config_info);
-    CFRelease(io_surface_properties);
-    CFRelease(cv_pix_fmt);
-    CFRelease(buffer_attributes);
-
-    return status;
-}
-
-int ff_vda_destroy_decoder(struct vda_context *vda_ctx)
-{
-    OSStatus status = kVDADecoderNoErr;
-
-    if (vda_ctx->decoder)
-        status = VDADecoderDestroy(vda_ctx->decoder);
-
-#if FF_API_VDA_ASYNC
-    vda_clear_queue(vda_ctx);
-    pthread_mutex_destroy(&vda_ctx->queue_mutex);
-#endif
-
-    av_freep(&vda_ctx->priv_bitstream);
-
-    return status;
-}
-
-#if FF_API_VDA_ASYNC
-vda_frame *ff_vda_queue_pop(struct vda_context *vda_ctx)
-{
-    vda_frame *top_frame;
-
-    if (!vda_ctx->queue)
-        return NULL;
-
-    pthread_mutex_lock(&vda_ctx->queue_mutex);
-    top_frame      = vda_ctx->queue;
-    vda_ctx->queue = top_frame->next_frame;
-    pthread_mutex_unlock(&vda_ctx->queue_mutex);
-
-    return top_frame;
-}
-
-void ff_vda_release_vda_frame(vda_frame *frame)
-{
-    if (frame) {
-        CVPixelBufferRelease(frame->cv_buffer);
-        av_freep(&frame);
-    }
-}
-
-int ff_vda_decoder_decode(struct vda_context *vda_ctx,
-                          uint8_t *bitstream,
-                          int bitstream_size,
-                          int64_t frame_pts)
-{
-    OSStatus status = kVDADecoderNoErr;
-    CFDictionaryRef user_info;
-    CFDataRef coded_frame;
-
-    coded_frame = CFDataCreate(kCFAllocatorDefault, bitstream, bitstream_size);
-    user_info   = vda_dictionary_with_pts(frame_pts);
-    status      = VDADecoderDecode(vda_ctx->decoder, 0, coded_frame, user_info);
-
-    CFRelease(user_info);
-    CFRelease(coded_frame);
-
-    return status;
-}
-#endif
-
-int ff_vda_sync_decode(struct vda_context *vda_ctx)
-{
-    OSStatus status;
-    CFDataRef coded_frame;
-    uint32_t flush_flags = 1 << 0; ///< kVDADecoderFlush_emitFrames
-
-    coded_frame = CFDataCreate(kCFAllocatorDefault,
-                               vda_ctx->priv_bitstream,
-                               vda_ctx->priv_bitstream_size);
-
-    status = VDADecoderDecode(vda_ctx->decoder, 0, coded_frame, NULL);
-
-    if (kVDADecoderNoErr == status)
-        status = VDADecoderFlush(vda_ctx->decoder, flush_flags);
-
-    CFRelease(coded_frame);
-
-    return status;
-}
diff --git a/libavcodec/vda_h264.c b/libavcodec/vda_h264.c
index e621973432..2d2d228745 100644
--- a/libavcodec/vda_h264.c
+++ b/libavcodec/vda_h264.c
@@ -20,8 +20,182 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <CoreFoundation/CFNumber.h>
+#include <CoreFoundation/CFData.h>
+#include <CoreFoundation/CFString.h>
+
+#include "libavutil/avutil.h"
 #include "h264.h"
-#include "vda_internal.h"
+#include "vda.h"
+
+#if FF_API_VDA_ASYNC
+#include <CoreFoundation/CFDictionary.h>
+
+/* helper to create a dictionary according to the given pts */
+static CFDictionaryRef vda_dictionary_with_pts(int64_t i_pts)
+{
+    CFStringRef key           = CFSTR("FF_VDA_DECODER_PTS_KEY");
+    CFNumberRef value         = CFNumberCreate(kCFAllocatorDefault,
+                                               kCFNumberSInt64Type, &i_pts);
+    CFDictionaryRef user_info = CFDictionaryCreate(kCFAllocatorDefault,
+                                                   (const void **)&key,
+                                                   (const void **)&value,
+                                                   1,
+                                                   &kCFTypeDictionaryKeyCallBacks,
+                                                   &kCFTypeDictionaryValueCallBacks);
+    CFRelease(value);
+    return user_info;
+}
+
+/* helper to retrieve the pts from the given dictionary */
+static int64_t vda_pts_from_dictionary(CFDictionaryRef user_info)
+{
+    CFNumberRef pts;
+    int64_t outValue = 0;
+
+    if (!user_info)
+        return 0;
+
+    pts = CFDictionaryGetValue(user_info, CFSTR("FF_VDA_DECODER_PTS_KEY"));
+
+    if (pts)
+        CFNumberGetValue(pts, kCFNumberSInt64Type, &outValue);
+
+    return outValue;
+}
+
+/* Remove and release all frames from the queue. */
+static void vda_clear_queue(struct vda_context *vda_ctx)
+{
+    vda_frame *top_frame;
+
+    pthread_mutex_lock(&vda_ctx->queue_mutex);
+
+    while (vda_ctx->queue) {
+        top_frame      = vda_ctx->queue;
+        vda_ctx->queue = top_frame->next_frame;
+        ff_vda_release_vda_frame(top_frame);
+    }
+
+    pthread_mutex_unlock(&vda_ctx->queue_mutex);
+}
+
+static int vda_decoder_decode(struct vda_context *vda_ctx,
+                              uint8_t *bitstream,
+                              int bitstream_size,
+                              int64_t frame_pts)
+{
+    OSStatus status = kVDADecoderNoErr;
+    CFDictionaryRef user_info;
+    CFDataRef coded_frame;
+
+    coded_frame = CFDataCreate(kCFAllocatorDefault, bitstream, bitstream_size);
+    user_info   = vda_dictionary_with_pts(frame_pts);
+    status      = VDADecoderDecode(vda_ctx->decoder, 0, coded_frame, user_info);
+
+    CFRelease(user_info);
+    CFRelease(coded_frame);
+
+    return status;
+}
+
+vda_frame *ff_vda_queue_pop(struct vda_context *vda_ctx)
+{
+    vda_frame *top_frame;
+
+    if (!vda_ctx->queue)
+        return NULL;
+
+    pthread_mutex_lock(&vda_ctx->queue_mutex);
+    top_frame      = vda_ctx->queue;
+    vda_ctx->queue = top_frame->next_frame;
+    pthread_mutex_unlock(&vda_ctx->queue_mutex);
+
+    return top_frame;
+}
+
+void ff_vda_release_vda_frame(vda_frame *frame)
+{
+    if (frame) {
+        CVPixelBufferRelease(frame->cv_buffer);
+        av_freep(&frame);
+    }
+}
+#endif
+
+/* Decoder callback that adds the VDA frame to the queue in display order. */
+static void vda_decoder_callback(void *vda_hw_ctx,
+                                 CFDictionaryRef user_info,
+                                 OSStatus status,
+                                 uint32_t infoFlags,
+                                 CVImageBufferRef image_buffer)
+{
+    struct vda_context *vda_ctx = vda_hw_ctx;
+
+    if (!image_buffer)
+        return;
+
+    if (vda_ctx->cv_pix_fmt_type != CVPixelBufferGetPixelFormatType(image_buffer))
+        return;
+
+    if (vda_ctx->use_sync_decoding) {
+        vda_ctx->cv_buffer = CVPixelBufferRetain(image_buffer);
+    } else {
+        vda_frame *new_frame;
+        vda_frame *queue_walker;
+
+        if (!(new_frame = av_mallocz(sizeof(vda_frame))))
+            return;
+        new_frame->next_frame = NULL;
+        new_frame->cv_buffer  = CVPixelBufferRetain(image_buffer);
+        new_frame->pts        = vda_pts_from_dictionary(user_info);
+
+        pthread_mutex_lock(&vda_ctx->queue_mutex);
+
+        queue_walker = vda_ctx->queue;
+
+        if (!queue_walker || new_frame->pts < queue_walker->pts) {
+            /* we have an empty queue, or this frame earlier than the current queue head */
+            new_frame->next_frame = queue_walker;
+            vda_ctx->queue        = new_frame;
+        } else {
+            /* walk the queue and insert this frame where it belongs in display order */
+            vda_frame *next_frame;
+            while (1) {
+                next_frame = queue_walker->next_frame;
+                if (!next_frame || new_frame->pts < next_frame->pts) {
+                    new_frame->next_frame    = next_frame;
+                    queue_walker->next_frame = new_frame;
+                    break;
+                }
+                queue_walker = next_frame;
+            }
+        }
+
+        pthread_mutex_unlock(&vda_ctx->queue_mutex);
+    }
+}
+
+static int vda_sync_decode(struct vda_context *vda_ctx)
+{
+    OSStatus status;
+    CFDataRef coded_frame;
+    uint32_t flush_flags = 1 << 0; ///< kVDADecoderFlush_emitFrames
+
+    coded_frame = CFDataCreate(kCFAllocatorDefault,
+                               vda_ctx->priv_bitstream,
+                               vda_ctx->priv_bitstream_size);
+
+    status = VDADecoderDecode(vda_ctx->decoder, 0, coded_frame, NULL);
+
+    if (kVDADecoderNoErr == status)
+        status = VDADecoderFlush(vda_ctx->decoder, flush_flags);
+
+    CFRelease(coded_frame);
+
+    return status;
+}
+
 
 static int start_frame(AVCodecContext *avctx,
                        av_unused const uint8_t *buffer,
@@ -74,12 +248,12 @@ static int end_frame(AVCodecContext *avctx)
         return -1;
 
     if (vda_ctx->use_sync_decoding) {
-        status = ff_vda_sync_decode(vda_ctx);
+        status = vda_sync_decode(vda_ctx);
         frame->data[3] = (void*)vda_ctx->cv_buffer;
     } else {
-        status = ff_vda_decoder_decode(vda_ctx, vda_ctx->priv_bitstream,
-                                       vda_ctx->priv_bitstream_size,
-                                       frame->reordered_opaque);
+        status = vda_decoder_decode(vda_ctx, vda_ctx->priv_bitstream,
+                                    vda_ctx->priv_bitstream_size,
+                                    frame->reordered_opaque);
     }
 
     if (status)
@@ -88,6 +262,111 @@ static int end_frame(AVCodecContext *avctx)
     return status;
 }
 
+int ff_vda_create_decoder(struct vda_context *vda_ctx,
+                          uint8_t *extradata,
+                          int extradata_size)
+{
+    OSStatus status = kVDADecoderNoErr;
+    CFNumberRef height;
+    CFNumberRef width;
+    CFNumberRef format;
+    CFDataRef avc_data;
+    CFMutableDictionaryRef config_info;
+    CFMutableDictionaryRef buffer_attributes;
+    CFMutableDictionaryRef io_surface_properties;
+    CFNumberRef cv_pix_fmt;
+
+#if FF_API_VDA_ASYNC
+    pthread_mutex_init(&vda_ctx->queue_mutex, NULL);
+#endif
+
+    /* Each VCL NAL in the bistream sent to the decoder
+     * is preceeded by a 4 bytes length header.
+     * Change the avcC atom header if needed, to signal headers of 4 bytes. */
+    if (extradata_size >= 4 && (extradata[4] & 0x03) != 0x03) {
+        uint8_t *rw_extradata;
+
+        if (!(rw_extradata = av_malloc(extradata_size)))
+            return AVERROR(ENOMEM);
+
+        memcpy(rw_extradata, extradata, extradata_size);
+
+        rw_extradata[4] |= 0x03;
+
+        avc_data = CFDataCreate(kCFAllocatorDefault, rw_extradata, extradata_size);
+
+        av_freep(&rw_extradata);
+    } else {
+        avc_data = CFDataCreate(kCFAllocatorDefault, extradata, extradata_size);
+    }
+
+    config_info = CFDictionaryCreateMutable(kCFAllocatorDefault,
+                                            4,
+                                            &kCFTypeDictionaryKeyCallBacks,
+                                            &kCFTypeDictionaryValueCallBacks);
+
+    height   = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &vda_ctx->height);
+    width    = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &vda_ctx->width);
+    format   = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &vda_ctx->format);
+
+    CFDictionarySetValue(config_info, kVDADecoderConfiguration_Height, height);
+    CFDictionarySetValue(config_info, kVDADecoderConfiguration_Width, width);
+    CFDictionarySetValue(config_info, kVDADecoderConfiguration_SourceFormat, format);
+    CFDictionarySetValue(config_info, kVDADecoderConfiguration_avcCData, avc_data);
+
+    buffer_attributes = CFDictionaryCreateMutable(kCFAllocatorDefault,
+                                                  2,
+                                                  &kCFTypeDictionaryKeyCallBacks,
+                                                  &kCFTypeDictionaryValueCallBacks);
+    io_surface_properties = CFDictionaryCreateMutable(kCFAllocatorDefault,
+                                                      0,
+                                                      &kCFTypeDictionaryKeyCallBacks,
+                                                      &kCFTypeDictionaryValueCallBacks);
+    cv_pix_fmt      = CFNumberCreate(kCFAllocatorDefault,
+                                     kCFNumberSInt32Type,
+                                     &vda_ctx->cv_pix_fmt_type);
+    CFDictionarySetValue(buffer_attributes,
+                         kCVPixelBufferPixelFormatTypeKey,
+                         cv_pix_fmt);
+    CFDictionarySetValue(buffer_attributes,
+                         kCVPixelBufferIOSurfacePropertiesKey,
+                         io_surface_properties);
+
+    status = VDADecoderCreate(config_info,
+                              buffer_attributes,
+                              vda_decoder_callback,
+                              vda_ctx,
+                              &vda_ctx->decoder);
+
+    CFRelease(height);
+    CFRelease(width);
+    CFRelease(format);
+    CFRelease(avc_data);
+    CFRelease(config_info);
+    CFRelease(io_surface_properties);
+    CFRelease(cv_pix_fmt);
+    CFRelease(buffer_attributes);
+
+    return status;
+}
+
+int ff_vda_destroy_decoder(struct vda_context *vda_ctx)
+{
+    OSStatus status = kVDADecoderNoErr;
+
+    if (vda_ctx->decoder)
+        status = VDADecoderDestroy(vda_ctx->decoder);
+
+#if FF_API_VDA_ASYNC
+    vda_clear_queue(vda_ctx);
+    pthread_mutex_destroy(&vda_ctx->queue_mutex);
+#endif
+
+    av_freep(&vda_ctx->priv_bitstream);
+
+    return status;
+}
+
 AVHWAccel ff_h264_vda_hwaccel = {
     .name           = "h264_vda",
     .type           = AVMEDIA_TYPE_VIDEO,
diff --git a/libavcodec/vda_internal.h b/libavcodec/vda_internal.h
deleted file mode 100644
index f5d24c21d7..0000000000
--- a/libavcodec/vda_internal.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * VDA hardware acceleration
- *
- * copyright (c) 2011 Sebastien Zwickert
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_VDA_INTERNAL_H
-#define AVCODEC_VDA_INTERNAL_H
-
-#include "vda.h"
-
-/**
- * @addtogroup VDA_Decoding
- *
- * @{
- */
-
-#if FF_API_VDA_ASYNC
-/** Send frame data to the hardware decoder. */
-int ff_vda_decoder_decode(struct vda_context *vda_ctx,
-                          uint8_t *bitstream,
-                          int bitstream_size,
-                          int64_t frame_pts);
-#endif
-
-int ff_vda_sync_decode(struct vda_context *vda_ctx);
-
-/* @} */
-
-#endif /* AVCODEC_VDA_INTERNAL_H */
-- 
cgit v1.2.3


From 694be29f13622c6b38c747c3bb14c93c95435c4d Mon Sep 17 00:00:00 2001
From: Sebastien Zwickert <dilaroga@gmail.com>
Date: Tue, 14 Aug 2012 11:47:39 +0200
Subject: vda: better frame allocation

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/vda_h264.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/vda_h264.c b/libavcodec/vda_h264.c
index 2d2d228745..5c8f5679f9 100644
--- a/libavcodec/vda_h264.c
+++ b/libavcodec/vda_h264.c
@@ -144,7 +144,7 @@ static void vda_decoder_callback(void *vda_hw_ctx,
         vda_frame *new_frame;
         vda_frame *queue_walker;
 
-        if (!(new_frame = av_mallocz(sizeof(vda_frame))))
+        if (!(new_frame = av_mallocz(sizeof(*new_frame))))
             return;
         new_frame->next_frame = NULL;
         new_frame->cv_buffer  = CVPixelBufferRetain(image_buffer);
-- 
cgit v1.2.3


From 063910f54d04c08b85b7b07ad7368e15b943a722 Mon Sep 17 00:00:00 2001
From: Sebastien Zwickert <dilaroga@gmail.com>
Date: Tue, 14 Aug 2012 11:48:17 +0200
Subject: vda: cosmetics: fix Doxygen comment formatting

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/vda.h | 156 +++++++++++++++++++++++++++----------------------------
 1 file changed, 78 insertions(+), 78 deletions(-)

diff --git a/libavcodec/vda.h b/libavcodec/vda.h
index 4daaa1463a..f0ec2bfec3 100644
--- a/libavcodec/vda.h
+++ b/libavcodec/vda.h
@@ -60,27 +60,27 @@
  */
 typedef struct vda_frame {
     /**
-    * The PTS of the frame.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
+     * The PTS of the frame.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by libavcodec.
+     */
     int64_t             pts;
 
     /**
-    * The CoreVideo buffer that contains the decoded data.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
+     * The CoreVideo buffer that contains the decoded data.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by libavcodec.
+     */
     CVPixelBufferRef    cv_buffer;
 
     /**
-    * A pointer to the next frame.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
+     * A pointer to the next frame.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by libavcodec.
+     */
     struct vda_frame    *next_frame;
 } vda_frame;
 #endif
@@ -93,96 +93,96 @@ typedef struct vda_frame {
  */
 struct vda_context {
     /**
-    * VDA decoder object.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
+     * VDA decoder object.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by libavcodec.
+     */
     VDADecoder          decoder;
 
     /**
-    * The Core Video pixel buffer that contains the current image data.
-    *
-    * encoding: unused
-    * decoding: Set by libavcodec. Unset by user.
-    */
+     * The Core Video pixel buffer that contains the current image data.
+     *
+     * encoding: unused
+     * decoding: Set by libavcodec. Unset by user.
+     */
     CVPixelBufferRef    cv_buffer;
 
     /**
-    * Use the hardware decoder in synchronous mode.
-    *
-    * encoding: unused
-    * decoding: Set by user.
-    */
+     * Use the hardware decoder in synchronous mode.
+     *
+     * encoding: unused
+     * decoding: Set by user.
+     */
     int                 use_sync_decoding;
 
 #if FF_API_VDA_ASYNC
     /**
-    * VDA frames queue ordered by presentation timestamp.
-    *
-    * @deprecated Use synchronous decoding mode.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
+     * VDA frames queue ordered by presentation timestamp.
+     *
+     * @deprecated Use synchronous decoding mode.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by libavcodec.
+     */
     vda_frame           *queue;
 
     /**
-    * Mutex for locking queue operations.
-    *
-    * @deprecated Use synchronous decoding mode.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
+     * Mutex for locking queue operations.
+     *
+     * @deprecated Use synchronous decoding mode.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by libavcodec.
+     */
     pthread_mutex_t     queue_mutex;
 #endif
 
     /**
-    * The frame width.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by user.
-    */
+     * The frame width.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by user.
+     */
     int                 width;
 
     /**
-    * The frame height.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by user.
-    */
+     * The frame height.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by user.
+     */
     int                 height;
 
     /**
-    * The frame format.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by user.
-    */
+     * The frame format.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by user.
+     */
     int                 format;
 
     /**
-    * The pixel format for output image buffers.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by user.
-    */
+     * The pixel format for output image buffers.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by user.
+     */
     OSType              cv_pix_fmt_type;
 
     /**
-    * The current bitstream buffer.
-    */
+     * The current bitstream buffer.
+     */
     uint8_t             *priv_bitstream;
 
     /**
-    * The current size of the bitstream.
-    */
+     * The current size of the bitstream.
+     */
     int                 priv_bitstream_size;
 
     /**
-    * The reference size used for fast reallocation.
-    */
+     * The reference size used for fast reallocation.
+     */
     int                 priv_allocated_size;
 };
 
@@ -196,17 +196,17 @@ int ff_vda_destroy_decoder(struct vda_context *vda_ctx);
 
 #if FF_API_VDA_ASYNC
 /**
-* Return the top frame of the queue.
-*
-* @deprecated Use synchronous decoding mode.
-*/
+ * Return the top frame of the queue.
+ *
+ * @deprecated Use synchronous decoding mode.
+ */
 vda_frame *ff_vda_queue_pop(struct vda_context *vda_ctx);
 
 /**
-* Release the given frame.
-*
-* @deprecated Use synchronous decoding mode.
-*/
+ * Release the given frame.
+ *
+ * @deprecated Use synchronous decoding mode.
+ */
 void ff_vda_release_vda_frame(vda_frame *frame);
 #endif
 
-- 
cgit v1.2.3


From a4b8fc3a8a4ec8e704885d3097b6bfbdc7827b6e Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 13 Aug 2012 23:03:37 +0100
Subject: g723.1: simplify normalize_bits()

This function is always called with a non-negative argument, so
those special cases are not needed.  In the places the argument
might be zero, the return value for a zero argument does not matter
since it would then be used to scale an array full of zeros.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/g723_1.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/libavcodec/g723_1.c b/libavcodec/g723_1.c
index 37b7ff5f09..fd03ed97d4 100644
--- a/libavcodec/g723_1.c
+++ b/libavcodec/g723_1.c
@@ -265,13 +265,6 @@ static int16_t square_root(int val)
  */
 static int normalize_bits(int num, int width)
 {
-    if (!num)
-        return 0;
-    if (num == -1)
-        return width;
-    if (num < 0)
-        num = ~num;
-
     return width - av_log2(num) - 1;
 }
 
-- 
cgit v1.2.3


From 0d230e9312a676266bd6fa3478032db4860221a7 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 13 Aug 2012 23:50:10 +0100
Subject: g723.1: simplify scale_vector()

It is impossible for bits to be 15 here so the special case is
not needed.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/g723_1.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/libavcodec/g723_1.c b/libavcodec/g723_1.c
index fd03ed97d4..793b2d3dd9 100644
--- a/libavcodec/g723_1.c
+++ b/libavcodec/g723_1.c
@@ -283,12 +283,8 @@ static int scale_vector(int16_t *dst, const int16_t *vector, int length)
     max   = FFMIN(max, 0x7FFF);
     bits  = normalize_bits(max, 15);
 
-    if (bits == 15)
-        for (i = 0; i < length; i++)
-            dst[i] = vector[i] * 0x7fff >> 3;
-    else
-        for (i = 0; i < length; i++)
-            dst[i] = vector[i] << bits >> 3;
+    for (i = 0; i < length; i++)
+        dst[i] = vector[i] << bits >> 3;
 
     return bits - 3;
 }
-- 
cgit v1.2.3