From 49b4ac58249746b3564052782f372056d90e3511 Mon Sep 17 00:00:00 2001
From: XhmikosR <xhmikosr@users.sourceforge.net>
Date: Wed, 25 Aug 2010 23:17:34 +0000
Subject: updated ffmpeg

git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@2349 10f7b99b-c216-0410-bff0-8a66a9350fd8
---
 src/filters/transform/MPCVideoDec/ffmpeg/Makefile  |   4 +-
 .../transform/MPCVideoDec/ffmpeg/Makefile_2010     |   4 +-
 src/filters/transform/MPCVideoDec/ffmpeg/config.h  |  34 +++-
 .../MPCVideoDec/ffmpeg/libavcodec/dsputil.c        |   3 -
 .../MPCVideoDec/ffmpeg/libavcodec/dsputil.h        |  12 +-
 .../MPCVideoDec/ffmpeg/libavcodec/h263dec.c        |   2 +-
 .../MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c       |   4 +-
 .../MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c      |   9 +-
 .../MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c        |   3 +-
 .../MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c        |   8 +
 .../MPCVideoDec/ffmpeg/libavcodec/vp56dsp.h        |   8 +
 .../transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c  |   2 +-
 .../MPCVideoDec/ffmpeg/libavcodec/vp6dsp.c         |   2 +-
 .../MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c      |   2 +
 .../ffmpeg/libavcodec/x86/dsputil_mmx.c            |  16 +-
 .../MPCVideoDec/ffmpeg/libavcodec/x86/fft.c        |   8 +-
 .../MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c   |   4 +-
 .../MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm  | 202 ++++++++++++++++++++-
 .../MPCVideoDec/ffmpeg/libavcodec/x86/fft_sse.c    | 103 +----------
 .../ffmpeg/libavcodec/x86/h264dsp_mmx.c            |   2 +-
 .../ffmpeg/libavcodec/x86/mpegvideo_mmx.c          |   2 +
 .../MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c |   2 +-
 .../MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm  | 173 ++++++++++++++++++
 .../ffmpeg/libavcodec/x86/vp56dsp_init.c           |  47 +++++
 .../MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.c | 108 -----------
 .../MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.h |  30 ---
 .../ffmpeg/libavcodec/x86/vp6dsp_sse2.c            |  98 ----------
 .../ffmpeg/libavcodec/x86/vp6dsp_sse2.h            |  30 ---
 .../ffmpeg/libavcodec/x86/vp8dsp-init.c            |   8 +-
 .../MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm   |  50 ++---
 .../MPCVideoDec/ffmpeg/libavcore/imgutils.c        |  26 +--
 .../MPCVideoDec/ffmpeg/libavcore/imgutils.h        |  35 ++++
 .../MPCVideoDec/ffmpeg/libavutil/common.h          |   7 +
 33 files changed, 567 insertions(+), 481 deletions(-)
 create mode 100644 src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm
 create mode 100644 src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp_init.c
 delete mode 100644 src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.c
 delete mode 100644 src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.h
 delete mode 100644 src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.c
 delete mode 100644 src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.h

diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile
index 7c1807620..666048770 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile
@@ -155,8 +155,7 @@ SRCS_C=\
     $(LAVC_DIR)/x86/vc1dsp_mmx.c \
     $(LAVC_DIR)/x86/vp3dsp_mmx.c \
     $(LAVC_DIR)/x86/vp3dsp_sse2.c \
-    $(LAVC_DIR)/x86/vp6dsp_mmx.c \
-    $(LAVC_DIR)/x86/vp6dsp_sse2.c \
+    $(LAVC_DIR)/x86/vp56dsp_init.c \
     $(LAVC_DIR)/x86/vp8dsp-init.c \
 \
     $(LAVCORE_DIR)/avcore_utils.c \
@@ -187,6 +186,7 @@ SRCS_YASM=\
     $(LAVC_DIR)/x86/h264_intrapred.asm \
     $(LAVC_DIR)/x86/h264_weight_sse2.asm \
     $(LAVC_DIR)/x86/vc1dsp_yasm.asm \
+    $(LAVC_DIR)/x86/vp56dsp.asm \
     $(LAVC_DIR)/x86/vp8dsp.asm \
     $(LAVC_DIR)/x86/x86util.asm
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile_2010 b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile_2010
index b722d5960..27b4f57f1 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile_2010
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile_2010
@@ -155,8 +155,7 @@ SRCS_C=\
     $(LAVC_DIR)/x86/vc1dsp_mmx.c \
     $(LAVC_DIR)/x86/vp3dsp_mmx.c \
     $(LAVC_DIR)/x86/vp3dsp_sse2.c \
-    $(LAVC_DIR)/x86/vp6dsp_mmx.c \
-    $(LAVC_DIR)/x86/vp6dsp_sse2.c \
+    $(LAVC_DIR)/x86/vp56dsp_init.c \
     $(LAVC_DIR)/x86/vp8dsp-init.c \
 \
     $(LAVCORE_DIR)/avcore_utils.c \
@@ -187,6 +186,7 @@ SRCS_YASM=\
     $(LAVC_DIR)/x86/h264_intrapred.asm \
     $(LAVC_DIR)/x86/h264_weight_sse2.asm \
     $(LAVC_DIR)/x86/vc1dsp_yasm.asm \
+    $(LAVC_DIR)/x86/vp56dsp.asm \
     $(LAVC_DIR)/x86/vp8dsp.asm \
     $(LAVC_DIR)/x86/x86util.asm
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/config.h b/src/filters/transform/MPCVideoDec/ffmpeg/config.h
index d3b203c61..507cca37f 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/config.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/config.h
@@ -48,11 +48,12 @@
 // registry switch is not read)
 //#define USE_DPRINTF 1
 
-#define FFMPEG_LICENSE "GPL version 2.1 or later"
+#define FFMPEG_CONFIGURATION "ffdshow custom"
+#define FFMPEG_LICENSE "GPL version 2 or later"
 #define CC_TYPE "gcc"
 #define CC_VERSION __VERSION__
 
-#define ASMALIGN(ZEROBITS) ".align 1 << " #ZEROBITS "\n\t"
+#define ASMALIGN(ZEROBITS) ".p2align " #ZEROBITS "\n\t"
 
 // MPC custom code for linking with MSVC
 #if defined(__GNUC__) && ARCH_X86_64
@@ -62,7 +63,36 @@
 #endif
 #define EXTERN_ASM _
 
+#define ARCH_ALPHA 0
+#define ARCH_ARM 0
+#define ARCH_AVR32 0
+#define ARCH_AVR32_AP 0
+#define ARCH_AVR32_UC 0
+#define ARCH_BFIN 0
+#define ARCH_IA64 0
+#define ARCH_M68K 0
+#define ARCH_MIPS 0
+#define ARCH_MIPS64 0
+#define ARCH_PARISC 0
+#define ARCH_PPC 0
+#define ARCH_PPC64 0
+#define ARCH_S390 0
+#define ARCH_SH4 0
+#define ARCH_SPARC 0
+#define ARCH_SPARC64 0
+#define ARCH_TOMI 0
+
 #define HAVE_ALTIVEC 0
+#define HAVE_ARMV5TE 0
+#define HAVE_ARMV6 0
+#define HAVE_ARMV6T2 0
+#define HAVE_ARMVFP 0
+#define HAVE_IWMMXT 0
+#define HAVE_MMI 0
+#define HAVE_NEON 0
+#define HAVE_PPC4XX 0
+#define HAVE_VIS 0
+
 #define HAVE_ALTIVEC_H 0
 #define HAVE_BIGENDIAN 0
 #define HAVE_BSWAP 1
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c
index eff067ad6..e4a4a7ad6 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c
@@ -4417,9 +4417,6 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc
         c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
         c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
     }
-    if (CONFIG_VP6_DECODER) {
-        c->vp6_filter_diag4= ff_vp6_filter_diag4_c;
-    }
 
     c->h261_loop_filter= h261_loop_filter_c;
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h
index 778d3dfc1..cfd1b7f33 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h
@@ -357,9 +357,6 @@ typedef struct DSPContext {
     void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
     void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
 
-    void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride,
-                             const int16_t *h_weights,const int16_t *v_weights);
-
     /* assume len is a multiple of 4, and arrays are 16-byte aligned */
     void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
     void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
@@ -604,7 +601,6 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){
 /* should be defined by architectures supporting
    one or more MultiMedia extension */
 int mm_support(void);
-extern int mm_flags;
 
 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
 
@@ -629,16 +625,10 @@ static inline void emms(void)
  #endif
 }
 
-
-#define emms_c() \
-{\
-    if (mm_flags & FF_MM_MMX)\
-        emms();\
-}
+#define emms_c() emms()
 
 #else
 
-#define mm_flags 0
 #define mm_support() 0
 
 #endif
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c
index 5793dd1af..503c3b6ba 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c
@@ -542,7 +542,7 @@ retry:
 #endif
 
 #if HAVE_MMX
-    if(s->codec_id == CODEC_ID_MPEG4 && s->xvid_build>=0 && avctx->idct_algo == FF_IDCT_AUTO && (mm_flags & FF_MM_MMX)){
+    if(s->codec_id == CODEC_ID_MPEG4 && s->xvid_build>=0 && avctx->idct_algo == FF_IDCT_AUTO && (mm_support() & FF_MM_MMX)){
         avctx->idct_algo= FF_IDCT_XVIDMMX;
         avctx->coded_width= 0; // force reinit
 //        dsputil_init(&s->dsp, avctx);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c
index 05999713b..fcb4f2011 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c
@@ -1024,7 +1024,7 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
     if(8*len + get_bits_count(&s->gb) > s->gb.size_in_bits)
         return -1;
 
-    id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16);
+    id = get_bits_long(&s->gb, 32);
     id = av_be2ne32(id);
     len -= 6;
 
@@ -1114,7 +1114,7 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
     /* Apple MJPEG-A */
     if ((s->start_code == APP1) && (len > (0x28 - 8)))
     {
-        id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16);
+        id = get_bits_long(&s->gb, 32);
         id = av_be2ne32(id);
         len -= 4;
         if (id == AV_RL32("mjpg")) /* Apple MJPEG-A */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c
index 4171490ae..489c66f1d 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c
@@ -937,7 +937,14 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
             return -1;
 
         s->current_picture_ptr= pic;
-        s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
+        //FIXME use only the vars from current_pic
+        if(s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO) {
+            if(s->picture_structure == PICT_FRAME)
+                s->current_picture_ptr->top_field_first= s->top_field_first;
+            else
+                s->current_picture_ptr->top_field_first= (s->picture_structure == PICT_TOP_FIELD) == s->first_field;
+        } else
+            s->current_picture_ptr->top_field_first= s->top_field_first;
         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
     }
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c
index 4983fa7fa..f71ddf2d5 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c
@@ -1395,8 +1395,7 @@ return -1;
 #endif
 
     if(s->msmpeg4_version==1){
-        int start_code;
-        start_code = (get_bits(&s->gb, 16)<<16) | get_bits(&s->gb, 16);
+        int start_code = get_bits_long(&s->gb, 32);
         if(start_code!=0x00000100){
             av_log(s->avctx, AV_LOG_ERROR, "invalid startcode\n");
             return -1;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c
index f9da3d78d..d67604b01 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c
@@ -82,5 +82,13 @@ void ff_vp56dsp_init(VP56DSPContext *s, enum CodecID codec)
     } else {
         s->edge_filter_hor = vp6_edge_filter_hor;
         s->edge_filter_ver = vp6_edge_filter_ver;
+
+        if (CONFIG_VP6_DECODER) {
+            s->vp6_filter_diag4= ff_vp6_filter_diag4_c;
+        }
     }
+
+    #if HAVE_MMX
+    ff_vp56dsp_init_x86(s, codec);
+    #endif
 }
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.h
index 2d6941fa2..74a9cb530 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.h
@@ -22,13 +22,21 @@
 #define AVCODEC_VP56DSP_H
 
 #include <stdint.h>
+#include "avcodec.h"
 
 typedef struct VP56DSPContext {
     void (*edge_filter_hor)(uint8_t *yuv, int stride, int t);
     void (*edge_filter_ver)(uint8_t *yuv, int stride, int t);
+
+    void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride,
+                             const int16_t *h_weights,const int16_t *v_weights);
 } VP56DSPContext;
 
+void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride,
+                           const int16_t *h_weights, const int16_t *v_weights);
+
 void ff_vp56dsp_init(VP56DSPContext *s, enum CodecID codec);
 void ff_vp56dsp_init_arm(VP56DSPContext *s, enum CodecID codec);
+void ff_vp56dsp_init_x86(VP56DSPContext* c, enum CodecID codec);
 
 #endif /* AVCODEC_VP56DSP_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c
index de3be4084..57d357023 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c
@@ -559,7 +559,7 @@ static void vp6_filter(VP56Context *s, uint8_t *dst, uint8_t *src,
             vp6_filter_hv4(dst, src+offset1, stride, stride,
                            vp6_block_copy_filter[select][y8]);
         } else {
-            s->dsp.vp6_filter_diag4(dst, src+offset1+((mv.x^mv.y)>>31), stride,
+            s->vp56dsp.vp6_filter_diag4(dst, src+offset1+((mv.x^mv.y)>>31), stride,
                              vp6_block_copy_filter[select][x8],
                              vp6_block_copy_filter[select][y8]);
         }
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6dsp.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6dsp.c
index 69a11ee18..1119b5670 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6dsp.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6dsp.c
@@ -22,7 +22,7 @@
  */
 
 #include "libavutil/common.h"
-#include "dsputil.h"
+#include "vp56dsp.h"
 
 
 void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c
index 663be3c57..e96e3a93c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c
@@ -137,4 +137,6 @@ int mm_support(void)
         (rval&FF_MM_3DNOWEXT) ? "3DNowExt ":"");
 #endif
     return rval;
+
+    /* TODO: allow overriding with ffdshow settings for disabling extensions */
 }
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
index 4add01fe9..c4939ec65 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
@@ -30,15 +30,11 @@
 #include "dsputil_mmx.h"
 #include "vp3dsp_mmx.h"
 #include "vp3dsp_sse2.h"
-#include "vp6dsp_mmx.h"
-#include "vp6dsp_sse2.h"
 #include "idct_xvid.h"
 
 //#undef NDEBUG
 //#include <assert.h>
 
-int mm_flags; /* multimedia extension flags */
-
 /* pixel operations */
 DECLARE_ALIGNED(8,  const uint64_t, ff_bone) = 0x0101010101010101ULL;
 DECLARE_ALIGNED(8,  const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
@@ -2504,7 +2500,7 @@ float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
 
 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 {
-    mm_flags = mm_support();
+    int mm_flags = mm_support();
 
     if (avctx->dsp_mask) {
         if (avctx->dsp_mask & FF_MM_FORCE)
@@ -2626,10 +2622,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx;
         c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx;
 
-        if (CONFIG_VP6_DECODER) {
-            c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
-        }
-
         if (mm_flags & FF_MM_MMX2) {
             c->prefetch = prefetch_mmx2;
 
@@ -2812,10 +2804,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             H264_QPEL_FUNCS(3, 1, sse2);
             H264_QPEL_FUNCS(3, 2, sse2);
             H264_QPEL_FUNCS(3, 3, sse2);
-
-            if (CONFIG_VP6_DECODER) {
-                c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2;
-            }
         }
 #if HAVE_SSSE3
         if(mm_flags & FF_MM_SSSE3){
@@ -2898,7 +2886,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 #if CONFIG_H264DSP
 void ff_h264dsp_init_x86(H264DSPContext *c)
 {
-    mm_flags = mm_support();
+    int mm_flags = mm_support();
 
     if (mm_flags & FF_MM_MMX) {
         c->h264_idct_dc_add=
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c
index dba8c3faf..eb5c65ecb 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c
@@ -21,16 +21,12 @@
 
 av_cold void ff_fft_init_mmx(FFTContext *s)
 {
-/* Crashes on 64-bit?
- * ToDo: verify if that is still the case with the current code and with GCC 4.4.x and above
- */
-#if HAVE_YASM && ARCH_X86_32
+#if HAVE_YASM
     int has_vectors = mm_support();
     if (has_vectors & FF_MM_SSE && HAVE_SSE) {
         /* SSE for P3/P4/K8 */
         s->imdct_calc  = ff_imdct_calc_sse;
-        /* crashes DTS decoder */
-        //s->imdct_half  = ff_imdct_half_sse;
+        s->imdct_half  = ff_imdct_half_sse;
         s->fft_permute = ff_fft_permute_sse;
         s->fft_calc    = ff_fft_calc_sse;
     } else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) {
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c
index 8226ae962..9a8108bdd 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c
@@ -56,7 +56,7 @@ void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z)
 void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input)
 {
     x86_reg j, k;
-    long n = 1 << s->mdct_bits;
+    long n = s->mdct_size;
     long n2 = n >> 1;
     long n4 = n >> 2;
     long n8 = n >> 3;
@@ -147,7 +147,7 @@ void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
 void ff_imdct_calc_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input)
 {
     x86_reg j, k;
-    long n = 1 << s->mdct_bits;
+    long n = s->mdct_size;
     long n4 = n >> 2;
 
     ff_imdct_half_3dn2(s, output+n4, input);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
index 23a360fa6..31176d6c9 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
@@ -29,6 +29,23 @@
 
 %include "x86inc.asm"
 
+%ifdef ARCH_X86_64
+%define pointer resq
+%else
+%define pointer resd
+%endif
+
+struc FFTContext
+    .nbits:    resd 1
+    .reverse:  resd 1
+    .revtab:   pointer 1
+    .tmpbuf:   pointer 1
+    .mdctsize: resd 1
+    .mdctbits: resd 1
+    .tcos:     pointer 1
+    .tsin:     pointer 1
+endstruc
+
 SECTION_RODATA
 
 %define M_SQRT1_2 0.70710678118654752440
@@ -428,6 +445,16 @@ DECL_PASS pass_interleave_3dn, PASS_BIG 0
 %define SECTION_REL
 %endif
 
+%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs
+    lea r2, [dispatch_tab%1]
+    mov r2, [r2 + (%2q-2)*gprsize]
+%ifdef PIC
+    lea r3, [$$]
+    add r2, r3
+%endif
+    call r2
+%endmacro ; FFT_DISPATCH
+
 %macro DECL_FFT 2-3 ; nbits, cpu, suffix
 %xdefine list_of_fft fft4%2 SECTION_REL, fft8%2 SECTION_REL
 %if %1==5
@@ -464,13 +491,7 @@ section .text
 ; On x86_32, this function does the register saving and restoring for all of fft.
 ; The others pass args in registers and don't spill anything.
 cglobal fft_dispatch%3%2, 2,5,8, z, nbits
-    lea r2, [dispatch_tab%3%2]
-    mov r2, [r2 + (nbitsq-2)*gprsize]
-%ifdef PIC
-    lea r3, [$$]
-    add r2, r3
-%endif
-    call r2
+    FFT_DISPATCH %3%2, nbits
     RET
 %endmacro ; DECL_FFT
 
@@ -481,3 +502,170 @@ DECL_FFT 4, _3dn, _interleave
 DECL_FFT 4, _3dn2
 DECL_FFT 4, _3dn2, _interleave
 
+INIT_XMM
+%undef mulps
+%undef addps
+%undef subps
+%undef unpcklps
+%undef unpckhps
+
+%macro PREROTATER 5 ;-2*k, 2*k, input+n4, tcos+n8, tsin+n8
+    movaps   xmm0, [%3+%2*4]
+    movaps   xmm1, [%3+%1*4-0x10]
+    movaps   xmm2, xmm0
+    shufps   xmm0, xmm1, 0x88
+    shufps   xmm1, xmm2, 0x77
+    movlps   xmm4, [%4+%2*2]
+    movlps   xmm5, [%5+%2*2+0x0]
+    movhps   xmm4, [%4+%1*2-0x8]
+    movhps   xmm5, [%5+%1*2-0x8]
+    movaps   xmm2, xmm0
+    movaps   xmm3, xmm1
+    mulps    xmm0, xmm5
+    mulps    xmm1, xmm4
+    mulps    xmm2, xmm4
+    mulps    xmm3, xmm5
+    subps    xmm1, xmm0
+    addps    xmm2, xmm3
+    movaps   xmm0, xmm1
+    unpcklps xmm1, xmm2
+    unpckhps xmm0, xmm2
+%endmacro
+
+%macro PREROTATEW 3 ;addr1, addr2, xmm
+    movlps   %1,   %3
+    movhps   %2,   %3
+%endmacro
+
+%macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5
+    movaps   xmm6, [%4+%1*2]
+    movaps   %2,   [%4+%1*2+0x10]
+    movaps   %3,   xmm6
+    movaps   xmm7, %2
+    mulps    xmm6, [%5+%1*1]
+    mulps    %2,   [%6+%1*1]
+    mulps    %3,   [%6+%1*1]
+    mulps    xmm7, [%5+%1*1]
+    subps    %2,   xmm6
+    addps    %3,   xmm7
+%endmacro
+
+%macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8
+.post:
+    CMUL     %1,   xmm0, xmm1, %3, %4, %5
+    CMUL     %2,   xmm4, xmm5, %3, %4, %5
+    shufps   xmm1, xmm1, 0x1b
+    shufps   xmm5, xmm5, 0x1b
+    movaps   xmm6, xmm4
+    unpckhps xmm4, xmm1
+    unpcklps xmm6, xmm1
+    movaps   xmm2, xmm0
+    unpcklps xmm0, xmm5
+    unpckhps xmm2, xmm5
+    movaps   [%3+%2*2],      xmm6
+    movaps   [%3+%2*2+0x10], xmm4
+    movaps   [%3+%1*2],      xmm0
+    movaps   [%3+%1*2+0x10], xmm2
+    sub      %2,   0x10
+    add      %1,   0x10
+    jl       .post
+%endmacro
+
+cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *input
+%ifdef ARCH_X86_64
+%define rrevtab r10
+%define rtcos   r11
+%define rtsin   r12
+    push  r10
+    push  r11
+    push  r12
+    push  r13
+    push  r14
+%else
+%define rrevtab r6
+%define rtsin   r6
+%define rtcos   r5
+%endif
+    mov   r3d, [r0+FFTContext.mdctsize]
+    add   r2, r3
+    shr   r3, 1
+    mov   rtcos, [r0+FFTContext.tcos]
+    mov   rtsin, [r0+FFTContext.tsin]
+    add   rtcos, r3
+    add   rtsin, r3
+%ifndef ARCH_X86_64
+    push  rtcos
+    push  rtsin
+%endif
+    shr   r3, 1
+    mov   rrevtab, [r0+FFTContext.revtab]
+    add   rrevtab, r3
+%ifndef ARCH_X86_64
+    push  rrevtab
+%endif
+
+    sub   r3, 4
+%ifdef ARCH_X86_64
+    xor   r4, r4
+    sub   r4, r3
+%endif
+.pre:
+%ifndef ARCH_X86_64
+;unspill
+    xor   r4, r4
+    sub   r4, r3
+    mov   rtsin, [esp+4]
+    mov   rtcos, [esp+8]
+%endif
+
+    PREROTATER r4, r3, r2, rtcos, rtsin
+%ifdef ARCH_X86_64
+    movzx  r5,  word [rrevtab+r4*1-4]
+    movzx  r6,  word [rrevtab+r4*1-2]
+    movzx  r13, word [rrevtab+r3*1]
+    movzx  r14, word [rrevtab+r3*1+2]
+    PREROTATEW [r1+r5 *8], [r1+r6 *8], xmm0
+    PREROTATEW [r1+r13*8], [r1+r14*8], xmm1
+    add    r4, 4
+%else
+    mov    r6, [esp]
+    movzx  r5, word [r6+r4*1-4]
+    movzx  r4, word [r6+r4*1-2]
+    PREROTATEW [r1+r5*8], [r1+r4*8], xmm0
+    movzx  r5, word [r6+r3*1]
+    movzx  r4, word [r6+r3*1+2]
+    PREROTATEW [r1+r5*8], [r1+r4*8], xmm1
+%endif
+    sub    r3, 4
+    jns    .pre
+
+    mov  r5, r0
+    mov  r6, r1
+    mov  r0, r1
+    mov  r1d, [r5+FFTContext.nbits]
+
+    FFT_DISPATCH _sse, r1
+
+    mov  r0d, [r5+FFTContext.mdctsize]
+    add  r6, r0
+    shr  r0, 1
+%ifndef ARCH_X86_64
+%define rtcos r2
+%define rtsin r3
+    mov  rtcos, [esp+8]
+    mov  rtsin, [esp+4]
+%endif
+    neg  r0
+    mov  r1, -16
+    sub  r1, r0
+    POSROTATESHUF r0, r1, r6, rtcos, rtsin
+%ifdef ARCH_X86_64
+    pop  r14
+    pop  r13
+    pop  r12
+    pop  r11
+    pop  r10
+%else
+    add esp, 12
+%endif
+    RET
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_sse.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_sse.c
index 726e186b5..c4082b15b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_sse.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_sse.c
@@ -71,111 +71,10 @@ void ff_fft_permute_sse(FFTContext *s, FFTComplex *z)
     memcpy(z, s->tmp_buf, n*sizeof(FFTComplex));
 }
 
-void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input)
-{
-    av_unused x86_reg i, j, k, l;
-    long n = 1 << s->mdct_bits;
-    long n2 = n >> 1;
-    long n4 = n >> 2;
-    long n8 = n >> 3;
-    const uint16_t *revtab = s->revtab + n8;
-    const FFTSample *tcos = s->tcos;
-    const FFTSample *tsin = s->tsin;
-    FFTComplex *z = (FFTComplex *)output;
-
-    /* pre rotation */
-    for(k=n8-2; k>=0; k-=2) {
-        __asm__ volatile(
-            "movaps     (%2,%1,2), %%xmm0 \n" // { z[k].re,    z[k].im,    z[k+1].re,  z[k+1].im  }
-            "movaps  -16(%2,%0,2), %%xmm1 \n" // { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im }
-            "movaps        %%xmm0, %%xmm2 \n"
-            "shufps $0x88, %%xmm1, %%xmm0 \n" // { z[k].re,    z[k+1].re,  z[-k-2].re, z[-k-1].re }
-            "shufps $0x77, %%xmm2, %%xmm1 \n" // { z[-k-1].im, z[-k-2].im, z[k+1].im,  z[k].im    }
-            "movlps       (%3,%1), %%xmm4 \n"
-            "movlps       (%4,%1), %%xmm5 \n"
-            "movhps     -8(%3,%0), %%xmm4 \n" // { cos[k],     cos[k+1],   cos[-k-2],  cos[-k-1]  }
-            "movhps     -8(%4,%0), %%xmm5 \n" // { sin[k],     sin[k+1],   sin[-k-2],  sin[-k-1]  }
-            "movaps        %%xmm0, %%xmm2 \n"
-            "movaps        %%xmm1, %%xmm3 \n"
-            "mulps         %%xmm5, %%xmm0 \n" // re*sin
-            "mulps         %%xmm4, %%xmm1 \n" // im*cos
-            "mulps         %%xmm4, %%xmm2 \n" // re*cos
-            "mulps         %%xmm5, %%xmm3 \n" // im*sin
-            "subps         %%xmm0, %%xmm1 \n" // -> re
-            "addps         %%xmm3, %%xmm2 \n" // -> im
-            "movaps        %%xmm1, %%xmm0 \n"
-            "unpcklps      %%xmm2, %%xmm1 \n" // { z[k],    z[k+1]  }
-            "unpckhps      %%xmm2, %%xmm0 \n" // { z[-k-2], z[-k-1] }
-            ::"r"(-4*k), "r"(4*k),
-              "r"(input+n4), "r"(tcos+n8), "r"(tsin+n8)
-        );
-#if ARCH_X86_64
-        // if we have enough regs, don't let gcc make the luts latency-bound
-        // but if not, latency is faster than spilling
-        __asm__("movlps %%xmm0, %0 \n"
-            "movhps %%xmm0, %1 \n"
-            "movlps %%xmm1, %2 \n"
-            "movhps %%xmm1, %3 \n"
-            :"=m"(z[revtab[-k-2]]),
-             "=m"(z[revtab[-k-1]]),
-             "=m"(z[revtab[ k  ]]),
-             "=m"(z[revtab[ k+1]])
-        );
-#else
-        __asm__("movlps %%xmm0, %0" :"=m"(z[revtab[-k-2]]));
-        __asm__("movhps %%xmm0, %0" :"=m"(z[revtab[-k-1]]));
-        __asm__("movlps %%xmm1, %0" :"=m"(z[revtab[ k  ]]));
-        __asm__("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]]));
-#endif
-    }
-
-    ff_fft_dispatch_sse(z, s->nbits);
-
-    /* post rotation + reinterleave + reorder */
-
-#define CMUL(j,xmm0,xmm1)\
-        "movaps   (%2,"#j",2), %%xmm6 \n"\
-        "movaps 16(%2,"#j",2), "#xmm0"\n"\
-        "movaps        %%xmm6, "#xmm1"\n"\
-        "movaps        "#xmm0",%%xmm7 \n"\
-        "mulps      (%3,"#j"), %%xmm6 \n"\
-        "mulps      (%4,"#j"), "#xmm0"\n"\
-        "mulps      (%4,"#j"), "#xmm1"\n"\
-        "mulps      (%3,"#j"), %%xmm7 \n"\
-        "subps         %%xmm6, "#xmm0"\n"\
-        "addps         %%xmm7, "#xmm1"\n"
-
-    j = -n2;
-    k = n2-16;
-    __asm__ volatile(
-        "1: \n"
-        CMUL(%0, %%xmm0, %%xmm1)
-        CMUL(%1, %%xmm4, %%xmm5)
-        "shufps    $0x1b, %%xmm1, %%xmm1 \n"
-        "shufps    $0x1b, %%xmm5, %%xmm5 \n"
-        "movaps   %%xmm4, %%xmm6 \n"
-        "unpckhps %%xmm1, %%xmm4 \n"
-        "unpcklps %%xmm1, %%xmm6 \n"
-        "movaps   %%xmm0, %%xmm2 \n"
-        "unpcklps %%xmm5, %%xmm0 \n"
-        "unpckhps %%xmm5, %%xmm2 \n"
-        "movaps   %%xmm6,   (%2,%1,2) \n"
-        "movaps   %%xmm4, 16(%2,%1,2) \n"
-        "movaps   %%xmm0,   (%2,%0,2) \n"
-        "movaps   %%xmm2, 16(%2,%0,2) \n"
-        "sub $16, %1 \n"
-        "add $16, %0 \n"
-        "jl 1b \n"
-        :"+&r"(j), "+&r"(k)
-        :"r"(z+n8), "r"(tcos+n8), "r"(tsin+n8)
-        :"memory"
-    );
-}
-
 void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input)
 {
     x86_reg j, k;
-    long n = 1 << s->mdct_bits;
+    long n = s->mdct_size;
     long n4 = n >> 2;
 
     ff_imdct_half_sse(s, output+n4, input);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
index 35a016b2f..4b2e54603 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
@@ -2368,7 +2368,7 @@ void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int s
 #if CONFIG_H264PRED
 void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
 {
-    mm_flags = mm_support();
+    int mm_flags = mm_support();
 
 #if HAVE_YASM
     if (mm_flags & FF_MM_MMX) {
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c
index f9a8847de..75ec4b2cf 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c
@@ -625,6 +625,8 @@ static void  denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
 
 void MPV_common_init_mmx(MpegEncContext *s)
 {
+    int mm_flags = mm_support();
+
     if (mm_flags & FF_MM_MMX) {
         const int dct_algo = s->avctx->dct_algo;
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c
index 3ce097894..eb3ad2c32 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c
@@ -714,7 +714,7 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq)
 #endif
 
 void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) {
-    mm_flags = mm_support();
+    int mm_flags = mm_support();
 
     dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx;
     dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm
new file mode 100644
index 000000000..1b3165e54
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm
@@ -0,0 +1,173 @@
+;******************************************************************************
+;* MMX/SSE2-optimized functions for the VP6 decoder
+;* Copyright (C) 2009  Sebastien Lucas <sebastien.lucas@gmail.com>
+;* Copyright (C) 2009  Zuxy Meng <zuxy.meng@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+cextern pw_64
+
+SECTION .text
+
+%macro DIAG4_MMX 6
+    movq          m0, [%1+%2]
+    movq          m1, [%1+%3]
+    movq          m3, m0
+    movq          m4, m1
+    punpcklbw     m0, m7
+    punpcklbw     m1, m7
+    punpckhbw     m3, m7
+    punpckhbw     m4, m7
+    pmullw        m0, [rsp+8*11] ; src[x-8 ] * biweight [0]
+    pmullw        m1, [rsp+8*12] ; src[x   ] * biweight [1]
+    pmullw        m3, [rsp+8*11] ; src[x-8 ] * biweight [0]
+    pmullw        m4, [rsp+8*12] ; src[x   ] * biweight [1]
+    paddw         m0, m1
+    paddw         m3, m4
+    movq          m1, [%1+%4]
+    movq          m2, [%1+%5]
+    movq          m4, m1
+    movq          m5, m2
+    punpcklbw     m1, m7
+    punpcklbw     m2, m7
+    punpcklbw     m4, m7
+    punpcklbw     m5, m7
+    pmullw        m1, [rsp+8*13] ; src[x+8 ] * biweight [2]
+    pmullw        m2, [rsp+8*14] ; src[x+16] * biweight [3]
+    pmullw        m4, [rsp+8*13] ; src[x+8 ] * biweight [2]
+    pmullw        m5, [rsp+8*14] ; src[x+16] * biweight [3]
+    paddw         m1, m2
+    paddw         m4, m5
+    paddsw        m0, m1
+    paddsw        m3, m4
+    paddsw        m0, m6         ; Add 64
+    paddsw        m3, m6         ; Add 64
+    psraw         m0, 7
+    psraw         m3, 7
+    packuswb      m0, m3
+    movq        [%6], m0
+%endmacro
+
+%macro DIAG4_SSE2 6
+    movq          m0, [%1+%2]
+    movq          m1, [%1+%3]
+    punpcklbw     m0, m7
+    punpcklbw     m1, m7
+    pmullw        m0, m4         ; src[x-8 ] * biweight [0]
+    pmullw        m1, m5         ; src[x   ] * biweight [1]
+    paddw         m0, m1
+    movq          m1, [%1+%4]
+    movq          m2, [%1+%5]
+    punpcklbw     m1, m7
+    punpcklbw     m2, m7
+    pmullw        m1, m6         ; src[x+8 ] * biweight [2]
+    pmullw        m2, m3         ; src[x+16] * biweight [3]
+    paddw         m1, m2
+    paddsw        m0, m1
+    paddsw        m0, [pw_64]    ; Add 64
+    psraw         m0, 7
+    packuswb      m0, m0
+    movq        [%6], m0
+%endmacro
+
+%macro SPLAT4REGS_MMX 0
+    movq         m5, m3
+    punpcklwd    m3, m3
+    movq         m4, m3
+    punpckldq    m3, m3
+    punpckhdq    m4, m4
+    punpckhwd    m5, m5
+    movq         m6, m5
+    punpckhdq    m6, m6
+    punpckldq    m5, m5
+    movq [rsp+8*11], m3
+    movq [rsp+8*12], m4
+    movq [rsp+8*13], m5
+    movq [rsp+8*14], m6
+%endmacro
+
+%macro SPLAT4REGS_SSE2 0
+    pshuflw      m4, m3, 0x0
+    pshuflw      m5, m3, 0x55
+    pshuflw      m6, m3, 0xAA
+    pshuflw      m3, m3, 0xFF
+    punpcklqdq   m4, m4
+    punpcklqdq   m5, m5
+    punpcklqdq   m6, m6
+    punpcklqdq   m3, m3
+%endmacro
+
+%macro vp6_filter_diag4 2
+; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, int stride,
+;                                const int16_t h_weight[4], const int16_t v_weights[4])
+cglobal vp6_filter_diag4_%1, 5, 7, %2
+    mov          r5, rsp         ; backup stack pointer
+    and         rsp, ~(mmsize-1) ; align stack
+%ifidn %1, sse2
+    sub         rsp, 8*11
+%else
+    sub         rsp, 8*15
+    movq         m6, [pw_64]
+%endif
+%ifdef ARCH_X86_64
+    movsxd       r2, r2d
+%endif
+
+    sub          r1, r2
+
+    pxor         m7, m7
+    movq         m3, [r3]
+    SPLAT4REGS
+
+    mov          r3, rsp
+    mov          r6, 11
+.nextrow
+    DIAG4        r1, -1, 0, 1, 2, r3
+    add          r3, 8
+    add          r1, r2
+    dec          r6
+    jnz .nextrow
+
+    movq         m3, [r4]
+    SPLAT4REGS
+
+    lea          r3, [rsp+8]
+    mov          r6, 8
+.nextcol
+    DIAG4        r3, -8, 0, 8, 16, r0
+    add          r3, 8
+    add          r0, r2
+    dec          r6
+    jnz .nextcol
+
+    mov         rsp, r5          ; restore stack pointer
+    RET
+%endmacro
+
+INIT_MMX
+%define DIAG4      DIAG4_MMX
+%define SPLAT4REGS SPLAT4REGS_MMX
+vp6_filter_diag4 mmx,  0
+
+INIT_XMM
+%define DIAG4      DIAG4_SSE2
+%define SPLAT4REGS SPLAT4REGS_SSE2
+vp6_filter_diag4 sse2, 8
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp_init.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp_init.c
new file mode 100644
index 000000000..5120ed231
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp_init.c
@@ -0,0 +1,47 @@
+/*
+ * VP6 MMX/SSE2 optimizations
+ * Copyright (C) 2009  Sebastien Lucas <sebastien.lucas@gmail.com>
+ * Copyright (C) 2009  Zuxy Meng <zuxy.meng@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/x86_cpu.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/vp56dsp.h"
+
+void ff_vp6_filter_diag4_mmx(uint8_t *dst, uint8_t *src, int stride,
+                             const int16_t *h_weights,const int16_t *v_weights);
+void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride,
+                              const int16_t *h_weights,const int16_t *v_weights);
+
+av_cold void ff_vp56dsp_init_x86(VP56DSPContext* c, enum CodecID codec)
+{
+#if HAVE_YASM
+    int mm_flags = mm_support();
+
+    if (CONFIG_VP6_DECODER && codec == CODEC_ID_VP6) {
+        if (mm_flags & FF_MM_MMX) {
+            c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
+        }
+
+        if (mm_flags & FF_MM_SSE2) {
+            c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2;
+        }
+    }
+#endif
+}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.c
deleted file mode 100644
index 905b3a7f0..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
- * @file
- * MMX-optimized functions for the VP6 decoder
- *
- * Copyright (C) 2009  Sebastien Lucas <sebastien.lucas@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/x86_cpu.h"
-#include "libavcodec/dsputil.h"
-#include "dsputil_mmx.h"
-#include "vp6dsp_mmx.h"
-
-
-#define DIAG4_MMX(in1,in2,in3,in4)                                            \
-    "movq  "#in1"(%0), %%mm0             \n\t"                                \
-    "movq  "#in2"(%0), %%mm1             \n\t"                                \
-    "movq       %%mm0, %%mm3             \n\t"                                \
-    "movq       %%mm1, %%mm4             \n\t"                                \
-    "punpcklbw  %%mm7, %%mm0             \n\t"                                \
-    "punpcklbw  %%mm7, %%mm1             \n\t"                                \
-    "punpckhbw  %%mm7, %%mm3             \n\t"                                \
-    "punpckhbw  %%mm7, %%mm4             \n\t"                                \
-    "pmullw     0(%2), %%mm0             \n\t" /* src[x-8 ] * biweight [0] */ \
-    "pmullw     8(%2), %%mm1             \n\t" /* src[x   ] * biweight [1] */ \
-    "pmullw     0(%2), %%mm3             \n\t" /* src[x-8 ] * biweight [0] */ \
-    "pmullw     8(%2), %%mm4             \n\t" /* src[x   ] * biweight [1] */ \
-    "paddw      %%mm1, %%mm0             \n\t"                                \
-    "paddw      %%mm4, %%mm3             \n\t"                                \
-    "movq  "#in3"(%0), %%mm1             \n\t"                                \
-    "movq  "#in4"(%0), %%mm2             \n\t"                                \
-    "movq       %%mm1, %%mm4             \n\t"                                \
-    "movq       %%mm2, %%mm5             \n\t"                                \
-    "punpcklbw  %%mm7, %%mm1             \n\t"                                \
-    "punpcklbw  %%mm7, %%mm2             \n\t"                                \
-    "punpckhbw  %%mm7, %%mm4             \n\t"                                \
-    "punpckhbw  %%mm7, %%mm5             \n\t"                                \
-    "pmullw    16(%2), %%mm1             \n\t" /* src[x+8 ] * biweight [2] */ \
-    "pmullw    24(%2), %%mm2             \n\t" /* src[x+16] * biweight [3] */ \
-    "pmullw    16(%2), %%mm4             \n\t" /* src[x+8 ] * biweight [2] */ \
-    "pmullw    24(%2), %%mm5             \n\t" /* src[x+16] * biweight [3] */ \
-    "paddw      %%mm2, %%mm1             \n\t"                                \
-    "paddw      %%mm5, %%mm4             \n\t"                                \
-    "paddsw     %%mm1, %%mm0             \n\t"                                \
-    "paddsw     %%mm4, %%mm3             \n\t"                                \
-    "paddsw     %%mm6, %%mm0             \n\t" /* Add 64 */                   \
-    "paddsw     %%mm6, %%mm3             \n\t" /* Add 64 */                   \
-    "psraw         $7, %%mm0             \n\t"                                \
-    "psraw         $7, %%mm3             \n\t"                                \
-    "packuswb   %%mm3, %%mm0             \n\t"                                \
-    "movq       %%mm0,  (%1)             \n\t"
-
-void ff_vp6_filter_diag4_mmx(uint8_t *dst, uint8_t *src, int stride,
-                             const int16_t *h_weights, const int16_t *v_weights)
-{
-    uint8_t tmp[8*11], *t = tmp;
-    int16_t weights[4*4];
-    int i;
-    src -= stride;
-
-    for (i=0; i<4*4; i++)
-        weights[i] = h_weights[i>>2];
-
-    __asm__ volatile(
-    "pxor %%mm7, %%mm7                   \n\t"
-    "movq "MANGLE(ff_pw_64)", %%mm6      \n\t"
-    "1:                                  \n\t"
-    DIAG4_MMX(-1,0,1,2)
-    "add  $8, %1                         \n\t"
-    "add  %3, %0                         \n\t"
-    "decl %4                             \n\t"
-    "jnz 1b                              \n\t"
-    : "+r"(src), "+r"(t)
-    : "r"(weights), "r"((x86_reg)stride), "r"(11)
-    : "memory");
-
-    t = tmp + 8;
-    for (i=0; i<4*4; i++)
-        weights[i] = v_weights[i>>2];
-
-    __asm__ volatile(
-    "pxor %%mm7, %%mm7                   \n\t"
-    "movq "MANGLE(ff_pw_64)", %%mm6      \n\t"
-    "1:                                  \n\t"
-    DIAG4_MMX(-8,0,8,16)
-    "add  $8, %0                         \n\t"
-    "add  %3, %1                         \n\t"
-    "decl %4                             \n\t"
-    "jnz 1b                              \n\t"
-    : "+r"(t), "+r"(dst)
-    : "r"(weights), "r"((x86_reg)stride), "r"(8)
-    : "memory");
-}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.h
deleted file mode 100644
index 743bc4361..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * vp6dsp MMX function declarations
- * Copyright (c) 2009  Sebastien Lucas <sebastien.lucas@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_X86_VP6DSP_MMX_H
-#define AVCODEC_X86_VP6DSP_MMX_H
-
-#include <stdint.h>
-
-void ff_vp6_filter_diag4_mmx(uint8_t *dst, uint8_t *src, int stride,
-                             const int16_t *h_weights,const int16_t *v_weights);
-
-#endif /* AVCODEC_X86_VP6DSP_MMX_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.c
deleted file mode 100644
index bfd733aa7..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * @file
- * SSE2-optimized functions for the VP6 decoder
- *
- * Copyright (C) 2009  Zuxy Meng <zuxy.meng@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/x86_cpu.h"
-#include "libavcodec/dsputil.h"
-#include "dsputil_mmx.h"
-#include "vp6dsp_sse2.h"
-
-#define DIAG4_SSE2(in1,in2,in3,in4)                                           \
-    "movq  "#in1"(%0), %%xmm0            \n\t"                                \
-    "movq  "#in2"(%0), %%xmm1            \n\t"                                \
-    "punpcklbw %%xmm7, %%xmm0            \n\t"                                \
-    "punpcklbw %%xmm7, %%xmm1            \n\t"                                \
-    "pmullw    %%xmm4, %%xmm0            \n\t" /* src[x-8 ] * biweight [0] */ \
-    "pmullw    %%xmm5, %%xmm1            \n\t" /* src[x   ] * biweight [1] */ \
-    "paddw     %%xmm1, %%xmm0            \n\t"                                \
-    "movq  "#in3"(%0), %%xmm1            \n\t"                                \
-    "movq  "#in4"(%0), %%xmm2            \n\t"                                \
-    "punpcklbw %%xmm7, %%xmm1            \n\t"                                \
-    "punpcklbw %%xmm7, %%xmm2            \n\t"                                \
-    "pmullw    %%xmm6, %%xmm1            \n\t" /* src[x+8 ] * biweight [2] */ \
-    "pmullw    %%xmm3, %%xmm2            \n\t" /* src[x+16] * biweight [3] */ \
-    "paddw     %%xmm2, %%xmm1            \n\t"                                \
-    "paddsw     %%xmm1, %%xmm0           \n\t"                                \
-    "paddsw "MANGLE(ff_pw_64)", %%xmm0   \n\t" /* Add 64 */                   \
-    "psraw         $7, %%xmm0            \n\t"                                \
-    "packuswb  %%xmm0, %%xmm0            \n\t"                                \
-    "movq      %%xmm0,   (%1)            \n\t"                                \
-
-void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride,
-                              const int16_t *h_weights,const int16_t *v_weights)
-{
-    uint8_t tmp[8*11], *t = tmp;
-    src -= stride;
-
-    __asm__ volatile(
-    "pxor           %%xmm7, %%xmm7       \n\t"
-    "movq               %4, %%xmm3       \n\t"
-    "pshuflw    $0, %%xmm3, %%xmm4       \n\t"
-    "punpcklqdq     %%xmm4, %%xmm4       \n\t"
-    "pshuflw   $85, %%xmm3, %%xmm5       \n\t"
-    "punpcklqdq     %%xmm5, %%xmm5       \n\t"
-    "pshuflw  $170, %%xmm3, %%xmm6       \n\t"
-    "punpcklqdq     %%xmm6, %%xmm6       \n\t"
-    "pshuflw  $255, %%xmm3, %%xmm3       \n\t"
-    "punpcklqdq     %%xmm3, %%xmm3       \n\t"
-    "1:                                  \n\t"
-    DIAG4_SSE2(-1,0,1,2)
-    "add  $8, %1                         \n\t"
-    "add  %2, %0                         \n\t"
-    "decl %3                             \n\t"
-    "jnz 1b                              \n\t"
-    : "+r"(src), "+r"(t)
-    : "g"((x86_reg)stride), "r"(11), "m"(*(const int64_t*)h_weights)
-    : "memory");
-
-    t = tmp + 8;
-
-    __asm__ volatile(
-    "movq               %4, %%xmm3       \n\t"
-    "pshuflw    $0, %%xmm3, %%xmm4       \n\t"
-    "punpcklqdq     %%xmm4, %%xmm4       \n\t"
-    "pshuflw   $85, %%xmm3, %%xmm5       \n\t"
-    "punpcklqdq     %%xmm5, %%xmm5       \n\t"
-    "pshuflw  $170, %%xmm3, %%xmm6       \n\t"
-    "punpcklqdq     %%xmm6, %%xmm6       \n\t"
-    "pshuflw  $255, %%xmm3, %%xmm3       \n\t"
-    "punpcklqdq     %%xmm3, %%xmm3       \n\t"
-    "1:                                  \n\t"
-    DIAG4_SSE2(-8,0,8,16)
-    "add  $8, %0                         \n\t"
-    "add  %2, %1                         \n\t"
-    "decl %3                             \n\t"
-    "jnz 1b                              \n\t"
-    : "+r"(t), "+r"(dst)
-    : "g"((x86_reg)stride), "r"(8), "m"(*(const int64_t*)v_weights)
-    : "memory");
-}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.h
deleted file mode 100644
index a30089a3e..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * vp6dsp SSE2 function declarations
- * Copyright (c) 2009  Zuxy Meng <zuxy.meng@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_X86_VP6DSP_SSE2_H
-#define AVCODEC_X86_VP6DSP_SSE2_H
-
-#include <stdint.h>
-
-void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride,
-                             const int16_t *h_weights,const int16_t *v_weights);
-
-#endif /* AVCODEC_X86_VP6DSP_SSE2_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c
index 40fd0e4e3..ed5cf4602 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c
@@ -282,7 +282,7 @@ DECLARE_LOOP_FILTER(sse4)
 
 av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
 {
-    mm_flags = mm_support();
+    int mm_flags = mm_support();
 
 #if HAVE_YASM
     if (mm_flags & FF_MM_MMX) {
@@ -313,14 +313,12 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
     /* note that 4-tap width=16 functions are missing because w=16
      * is only used for luma, and luma is always a copy or sixtap. */
     if (mm_flags & FF_MM_MMX2) {
-#if ARCH_X86_32
         VP8_LUMA_MC_FUNC(0, 16, mmxext);
         VP8_MC_FUNC(1, 8, mmxext);
         VP8_MC_FUNC(2, 4, mmxext);
         VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
         VP8_BILINEAR_MC_FUNC(1, 8, mmxext);
         VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
-#endif
 
         c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext;
         c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext;
@@ -344,12 +342,10 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
     }
 
     if (mm_flags & (FF_MM_SSE2|FF_MM_SSE2SLOW)) {
-#if ARCH_X86_32
         VP8_LUMA_MC_FUNC(0, 16, sse2);
         VP8_MC_FUNC(1, 8, sse2);
         VP8_BILINEAR_MC_FUNC(0, 16, sse2);
         VP8_BILINEAR_MC_FUNC(1, 8, sse2);
-#endif
 
         c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
 
@@ -373,14 +369,12 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
     }
 
     if (mm_flags & FF_MM_SSSE3) {
-#if ARCH_X86_32
         VP8_LUMA_MC_FUNC(0, 16, ssse3);
         VP8_MC_FUNC(1, 8, ssse3);
         VP8_MC_FUNC(2, 4, ssse3);
         VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
         VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
         VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
-#endif
 
         c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
         c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm
index 6999e87b6..8cdbb3c7a 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm
@@ -211,7 +211,7 @@ cglobal put_vp8_epel%1_h6_ssse3, 6, 6, %2
     ; go to next line
     add       r0, r1
     add       r2, r3
-    dec       r4            ; next row
+    dec      r4d            ; next row
     jg .nextrow
     REP_RET
 
@@ -242,7 +242,7 @@ cglobal put_vp8_epel%1_h4_ssse3, 6, 6, %3
     ; go to next line
     add       r0, r1
     add       r2, r3
-    dec       r4            ; next row
+    dec      r4d            ; next row
     jg .nextrow
     REP_RET
 
@@ -281,7 +281,7 @@ cglobal put_vp8_epel%1_v4_ssse3, 7, 7, %2
     ; go to next line
     add        r0, r1
     add        r2, r3
-    dec        r4                          ; next row
+    dec       r4d                          ; next row
     jg .nextrow
     REP_RET
 
@@ -328,7 +328,7 @@ cglobal put_vp8_epel%1_v6_ssse3, 7, 7, %2
     ; go to next line
     add        r0, r1
     add        r2, r3
-    dec        r4                          ; next row
+    dec       r4d                          ; next row
     jg .nextrow
     REP_RET
 %endmacro
@@ -381,7 +381,7 @@ cglobal put_vp8_epel4_h4_mmxext, 6, 6
     ; go to next line
     add        r0, r1
     add        r2, r3
-    dec        r4                          ; next row
+    dec       r4d                          ; next row
     jg .nextrow
     REP_RET
 
@@ -438,7 +438,7 @@ cglobal put_vp8_epel4_h6_mmxext, 6, 6
     ; go to next line
     add        r0, r1
     add        r2, r3
-    dec        r4                          ; next row
+    dec       r4d                          ; next row
     jg .nextrow
     REP_RET
 
@@ -486,7 +486,7 @@ cglobal put_vp8_epel8_h4_sse2, 6, 6, 10
     ; go to next line
     add       r0, r1
     add       r2, r3
-    dec       r4            ; next row
+    dec      r4d            ; next row
     jg .nextrow
     REP_RET
 
@@ -548,7 +548,7 @@ cglobal put_vp8_epel8_h6_sse2, 6, 6, 14
     ; go to next line
     add       r0, r1
     add       r2, r3
-    dec       r4            ; next row
+    dec      r4d            ; next row
     jg .nextrow
     REP_RET
 
@@ -601,7 +601,7 @@ cglobal put_vp8_epel%2_v4_%1, 7, 7, %3
     ; go to next line
     add       r0, r1
     add       r2, r3
-    dec       r4                           ; next row
+    dec      r4d                           ; next row
     jg .nextrow
     REP_RET
 
@@ -666,7 +666,7 @@ cglobal put_vp8_epel%2_v6_%1, 7, 7, %3
     ; go to next line
     add       r0, r1
     add       r2, r3
-    dec       r4                           ; next row
+    dec      r4d                           ; next row
     jg .nextrow
     REP_RET
 %endmacro
@@ -718,7 +718,7 @@ cglobal put_vp8_bilinear%2_v_%1, 7,7,%3
 
     lea       r0, [r0+r1*2]
     lea       r2, [r2+r3*2]
-    sub       r4, 2
+    sub      r4d, 2
     jg .nextrow
     REP_RET
 
@@ -764,7 +764,7 @@ cglobal put_vp8_bilinear%2_h_%1, 7,7,%3
 
     lea       r0, [r0+r1*2]
     lea       r2, [r2+r3*2]
-    sub       r4, 2
+    sub      r4d, 2
     jg .nextrow
     REP_RET
 %endmacro
@@ -807,7 +807,7 @@ cglobal put_vp8_bilinear%1_v_ssse3, 7,7
 
     lea       r0, [r0+r1*2]
     lea       r2, [r2+r3*2]
-    sub       r4, 2
+    sub      r4d, 2
     jg .nextrow
     REP_RET
 
@@ -843,7 +843,7 @@ cglobal put_vp8_bilinear%1_h_ssse3, 7,7
 
     lea       r0, [r0+r1*2]
     lea       r2, [r2+r3*2]
-    sub       r4, 2
+    sub      r4d, 2
     jg .nextrow
     REP_RET
 %endmacro
@@ -1470,8 +1470,8 @@ VP8_DC_WHT sse
     pshufb         %1, %3
 %endmacro
 
-%macro SIMPLE_LOOPFILTER 3
-cglobal vp8_%2_loop_filter_simple_%1, 3, %3
+%macro SIMPLE_LOOPFILTER 4
+cglobal vp8_%2_loop_filter_simple_%1, 3, %3, %4
 %if mmsize == 8 ; mmx/mmxext
     mov            r3, 2
 %endif
@@ -1612,21 +1612,21 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3
 
 INIT_MMX
 %define SPLATB_REG SPLATB_REG_MMX
-SIMPLE_LOOPFILTER mmx,    v, 4
-SIMPLE_LOOPFILTER mmx,    h, 5
+SIMPLE_LOOPFILTER mmx,    v, 4, 0
+SIMPLE_LOOPFILTER mmx,    h, 5, 0
 %define SPLATB_REG SPLATB_REG_MMXEXT
-SIMPLE_LOOPFILTER mmxext, v, 4
-SIMPLE_LOOPFILTER mmxext, h, 5
+SIMPLE_LOOPFILTER mmxext, v, 4, 0
+SIMPLE_LOOPFILTER mmxext, h, 5, 0
 INIT_XMM
 %define SPLATB_REG SPLATB_REG_SSE2
 %define WRITE_8W   WRITE_8W_SSE2
-SIMPLE_LOOPFILTER sse2,   v, 3
-SIMPLE_LOOPFILTER sse2,   h, 5
+SIMPLE_LOOPFILTER sse2,   v, 3, 8
+SIMPLE_LOOPFILTER sse2,   h, 5, 8
 %define SPLATB_REG SPLATB_REG_SSSE3
-SIMPLE_LOOPFILTER ssse3,  v, 3
-SIMPLE_LOOPFILTER ssse3,  h, 5
+SIMPLE_LOOPFILTER ssse3,  v, 3, 8
+SIMPLE_LOOPFILTER ssse3,  h, 5, 8
 %define WRITE_8W   WRITE_8W_SSE4
-SIMPLE_LOOPFILTER sse4,   h, 5
+SIMPLE_LOOPFILTER sse4,   h, 5, 8
 
 ;-----------------------------------------------------------------------------
 ; void vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, [uint8_t *v,] int stride,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c
index 84db01ae7..ebaeff16c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c
@@ -29,21 +29,12 @@ int av_get_image_linesize(enum PixelFormat pix_fmt, int width, int plane)
     const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
     int max_step     [4];       /* max pixel step for each plane */
     int max_step_comp[4];       /* the component for each plane which has the max pixel step */
-    int s, i;
+    int s;
 
     if (desc->flags & PIX_FMT_BITSTREAM)
         return (width * (desc->comp[0].step_minus1+1) + 7) >> 3;
 
-    memset(max_step     , 0, sizeof(max_step     ));
-    memset(max_step_comp, 0, sizeof(max_step_comp));
-    for (i = 0; i < 4; i++) {
-        const AVComponentDescriptor *comp = &(desc->comp[i]);
-        if ((comp->step_minus1+1) > max_step[comp->plane]) {
-            max_step     [comp->plane] = comp->step_minus1+1;
-            max_step_comp[comp->plane] = i;
-        }
-    }
-
+    av_fill_image_max_pixsteps(max_step, max_step_comp, desc);
     s = (max_step_comp[plane] == 1 || max_step_comp[plane] == 2) ? desc->log2_chroma_w : 0;
     return max_step[plane] * (((width + (1 << s) - 1)) >> s);
 }
@@ -65,16 +56,7 @@ int av_fill_image_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int widt
         return 0;
     }
 
-    memset(max_step     , 0, sizeof(max_step     ));
-    memset(max_step_comp, 0, sizeof(max_step_comp));
-    for (i = 0; i < 4; i++) {
-        const AVComponentDescriptor *comp = &(desc->comp[i]);
-        if ((comp->step_minus1+1) > max_step[comp->plane]) {
-            max_step     [comp->plane] = comp->step_minus1+1;
-            max_step_comp[comp->plane] = i;
-        }
-    }
-
+    av_fill_image_max_pixsteps(max_step, max_step_comp, desc);
     for (i = 0; i < 4; i++) {
         int s = (max_step_comp[i] == 1 || max_step_comp[i] == 2) ? desc->log2_chroma_w : 0;
         linesizes[i] = max_step[i] * (((width + (1 << s) - 1)) >> s);
@@ -132,7 +114,7 @@ int av_check_image_size(unsigned int w, unsigned int h, int log_offset, void *lo
 {
     ImgUtils imgutils = { &imgutils_class, log_offset, log_ctx };
 
-    if((int)w>0 && (int)h>0 && (w+128)*(uint64_t)(h+128) < INT_MAX/8)
+    if ((int)w>0 && (int)h>0 && (w+128)*(uint64_t)(h+128) < INT_MAX/8)
         return 0;
 
     av_log(&imgutils, AV_LOG_ERROR, "Picture size %ux%u is invalid\n", w, h);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h
index c2cf6eb53..8e08d4738 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h
@@ -24,9 +24,44 @@
  * misc image utilities
  */
 
+#include "libavutil/pixdesc.h"
 #include "libavutil/pixfmt.h"
 #include "avcore.h"
 
+/**
+ * Compute the max pixel step for each plane of an image with a
+ * format described by pixdesc.
+ *
+ * The pixel step is the distance in bytes between the first byte of
+ * the group of bytes which describe a pixel component and the first
+ * byte of the successive group in the same plane for the same
+ * component.
+ *
+ * @param max_pixsteps an array which is filled with the max pixel step
+ * for each plane. Since a plane may contain different pixel
+ * components, the computed max_pixsteps[plane] is relative to the
+ * component in the plane with the max pixel step.
+ * @param max_pixstep_comps an array which is filled with the component
+ * for each plane which has the max pixel step. May be NULL.
+ */
+static inline void av_fill_image_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4],
+                                              const AVPixFmtDescriptor *pixdesc)
+{
+    int i;
+    memset(max_pixsteps, 0, 4*sizeof(max_pixsteps[0]));
+    if (max_pixstep_comps)
+        memset(max_pixstep_comps, 0, 4*sizeof(max_pixstep_comps[0]));
+
+    for (i = 0; i < 4; i++) {
+        const AVComponentDescriptor *comp = &(pixdesc->comp[i]);
+        if ((comp->step_minus1+1) > max_pixsteps[comp->plane]) {
+            max_pixsteps[comp->plane] = comp->step_minus1+1;
+            if (max_pixstep_comps)
+                max_pixstep_comps[comp->plane] = i;
+        }
+    }
+}
+
 /**
  * Compute the size of an image line with format pix_fmt and width
  * width for the plane plane.
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h
index 13fc7cdc2..5d5e0f2c4 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h
@@ -35,6 +35,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "attributes.h"
+#include "libavutil/avconfig.h"
 
 #if defined(_MSC_VER) & !defined(__cplusplus)
 #    define inline __inline
@@ -42,6 +43,12 @@
 
 #ifdef HAVE_AV_CONFIG_H
 
+#if AV_HAVE_BIGENDIAN
+#   define AV_NE(be, le) (be)
+#else
+#   define AV_NE(be, le) (le)
+#endif
+
 //rounded division & shift
 #define RSHIFT(a,b) ((a) > 0 ? ((a) + ((1<<(b))>>1))>>(b) : ((a) + ((1<<(b))>>1)-1)>>(b))
 /* assume b>0 */
-- 
cgit v1.2.3