Updated FFmpeg

git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@1792 10f7b99b-c216-0410-bff0-8a66a9350fd8
author: clsid2 <clsid2@users.sourceforge.net> 2010-04-19 23:29:00 +0400
committer: clsid2 <clsid2@users.sourceforge.net> 2010-04-19 23:29:00 +0400
commit: e220e2c9e47b5c1f98eec2c8deead4074fa07c76 (patch)
tree: cf616685c536ce5089d76c46dfe3df14b17a3924 /src/filters/transform/MPCVideoDec
parent: 0b7b1d06b32b43517f7b584207533ecfe5ce20fa (diff)
155 files changed, 7913 insertions, 10242 deletions
diff --git a/src/filters/transform/MPCVideoDec/MPCVideoDec.vcproj b/src/filters/transform/MPCVideoDec/MPCVideoDec.vcproj
index a6728816e..04ac5372e 100644
--- a/src/filters/transform/MPCVideoDec/MPCVideoDec.vcproj
+++ b/src/filters/transform/MPCVideoDec/MPCVideoDec.vcproj
@@ -203,7 +203,7 @@
 			<Tool
 				Name="VCLinkerTool"
 				LinkLibraryDependencies="false"
-				AdditionalDependencies="decssRU.lib dsutilRU.lib filtersRU.lib strmbaseRU.lib Winmm.lib zlibR.lib basevideofilterRU.lib libavcodec_gcc.lib libgcc.a libmingwex.a d3d9.lib"
+				AdditionalDependencies="dsutilRU.lib filtersRU.lib strmbaseRU.lib Winmm.lib zlibR.lib basevideofilterRU.lib libavcodec_gcc.lib libgcc.a libmingwex.a d3d9.lib"
 				OutputFile="..\..\..\..\bin\x86\$(ProjectName).ax"
 				AdditionalLibraryDirectories="..\..\..\..\lib"
 				ModuleDefinitionFile="MPCVideoDec.def"
diff --git a/src/filters/transform/MPCVideoDec/MPCVideoDecFilter.cpp b/src/filters/transform/MPCVideoDec/MPCVideoDecFilter.cpp
index d87a0f92a..7e0ed8612 100644
--- a/src/filters/transform/MPCVideoDec/MPCVideoDecFilter.cpp
+++ b/src/filters/transform/MPCVideoDec/MPCVideoDecFilter.cpp
@@ -37,8 +37,6 @@ extern "C"
 {
 	#include "FfmpegContext.h"
 	#include "libswscale/swscale.h"
-
-	extern void init_libvo();
 }
 
 #include "../../../DSUtil/DSUtil.h"
@@ -638,9 +636,6 @@ CMPCVideoDecFilter::CMPCVideoDecFilter(LPUNKNOWN lpunk, HRESULT* phr)
 	avcodec_init();
 	avcodec_register_all();
 	av_log_set_callback(LogLibAVCodec);
-#if INCLUDE_MPC_VIDEO_DECODER
-	init_libvo();
-#endif
 
 	EnumWindows(EnumFindProcessWnd, (LPARAM)&hWnd);
 	DetectVideoCard(hWnd);
diff --git a/src/filters/transform/MPCVideoDec/ffImgfmt.cpp b/src/filters/transform/MPCVideoDec/ffImgfmt.cpp
index 1959b476b..7718cd9a4 100644
--- a/src/filters/transform/MPCVideoDec/ffImgfmt.cpp
+++ b/src/filters/transform/MPCVideoDec/ffImgfmt.cpp
@@ -24,8 +24,6 @@
 #include <moreuuids.h>
 #include "char_t.h"
 
-
-
 const TcspInfo cspInfos[]=
 {
  {
@@ -288,9 +286,9 @@ const TcspFcc cspFccs[]=
 
 char_t* csp_getName(int csp,char_t *buf,size_t len)
 {
- return csp_getName(csp_getInfo(csp),csp,buf,len);
+ return csp_getName2(csp_getInfo(csp),csp,buf,len);
 }
-char_t* csp_getName(const TcspInfo *cspInfo,int csp,char_t *buf,size_t len)
+char_t* csp_getName2(const TcspInfo *cspInfo,int csp,char_t *buf,size_t len)
 {
  const char_t *colorspaceName=cspInfo?cspInfo->name:_l("unknown");
  _sntprintf_s(buf,
@@ -970,7 +968,6 @@ int csp_bestMatch(int inCSP,int wantedCSPS,int *rank)
 }
 
 
-
 bool TcspInfos::TsortFc::operator ()(const TcspInfo* &csp1,const TcspInfo* &csp2)
 {
  int rank1;csp_bestMatch(csp,csp1->id,&rank1);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile
index 4dda3b7d2..26d79fe53 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile
@@ -21,6 +21,7 @@ SRCS_C=\
 		$(LAVC_DIR)/ac3tab.c \
 		$(LAVC_DIR)/adpcm.c \
 		$(LAVC_DIR)/allcodecs.c \
+		$(LAVC_DIR)/avfft.c \
 		$(LAVC_DIR)/bitstream.c \
 		$(LAVC_DIR)/cabac.c \
 		$(LAVC_DIR)/CompilatorVersion.c\
@@ -37,6 +38,7 @@ SRCS_C=\
 		$(LAVC_DIR)/h263_parser.c \
 		$(LAVC_DIR)/h263dec.c \
 		$(LAVC_DIR)/h264.c \
+		$(LAVC_DIR)/h264dsp.c \
 		$(LAVC_DIR)/h264_cabac.c \
 		$(LAVC_DIR)/h264_cavlc.c \
 		$(LAVC_DIR)/h264_direct.c \
@@ -58,8 +60,8 @@ SRCS_C=\
 		$(LAVC_DIR)/jpeglsdec.c \
 		$(LAVC_DIR)/jrevdct.c \
 		$(LAVC_DIR)/libamr.c \
+		$(LAVC_DIR)/lpc.c \
 		$(LAVC_DIR)/mdct.c \
-		$(LAVC_DIR)/mdct_tablegen.c \
 		$(LAVC_DIR)/mjpeg.c \
 		$(LAVC_DIR)/mjpegdec.c \
 		$(LAVC_DIR)/mlp.c \
@@ -90,7 +92,6 @@ SRCS_C=\
 		$(LAVC_DIR)/svq1.c \
 		$(LAVC_DIR)/svq1dec.c \
 		$(LAVC_DIR)/svq3.c \
-		$(LAVC_DIR)/tableprint.c \
 		$(LAVC_DIR)/utils.c \
 		$(LAVC_DIR)/vc1.c \
 		$(LAVC_DIR)/vc1dec.c \
@@ -111,9 +112,11 @@ SRCS_C=\
 		$(LAVC_DIR)/amr_float/sp_dec.c \
 	\
 		$(LAVU_DIR)/crc.c \
+		$(LAVU_DIR)/intfloat_readwrite.c \
 		$(LAVU_DIR)/log.c \
 		$(LAVU_DIR)/mathematics.c \
 		$(LAVU_DIR)/mem.c \
+		$(LAVU_DIR)/pixdesc.c \
 		$(LAVU_DIR)/random_seed.c \
 		$(LAVU_DIR)/rational.c \
 		$(LAVU_DIR)/lfg.c \
@@ -133,14 +136,12 @@ SRCS_C=\
 		$(LAVC_DIR)/x86/vp3dsp_sse2.c \
 		$(LAVC_DIR)/x86/vp6dsp_mmx.c \
 		$(LAVC_DIR)/x86/vp6dsp_sse2.c
-
+	
 SRCS_C+=\
-		$(LIBV_DIR)/aclib.c \
-		$(LIBV_DIR)/libvoinit.c \
 		$(LSWS_DIR)/rgb2rgb.c \
 		$(LSWS_DIR)/yuv2rgb.c \
 		$(LSWS_DIR)/swscale.c \
-		$(LSWS_DIR)/libswscalew32thread.c
+		$(LSWS_DIR)/sww32thread.c
 		
 ifneq ($(64BIT),yes)
 SRCS_C+=\
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/config.h b/src/filters/transform/MPCVideoDec/ffmpeg/config.h
index 27413f2d5..a2e01124b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/config.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/config.h
@@ -1,20 +1,33 @@
+#ifndef FFMPEG_CONFIG_H
+#define FFMPEG_CONFIG_H
+
 #ifdef __GNUC__
 	#define HAVE_INLINE_ASM 1
-  #define HAVE_MMX 1
-  #define HAVE_SSE 1
-  #define HAVE_SSSE3 1
-  #define HAVE_AMD3DNOW 1
-  #define HAVE_AMD3DNOWEXT 1
-  
-  #define ARCH_X86 1  
-  #ifdef ARCH_X86_64
-    #define HAVE_FAST_64BIT 1
-    #define HAVE_CMOV 1
-    #define HAVE_FAST_CMOV 1
-  #endif
+	#define HAVE_MMX 1
+	#define HAVE_MMX2 1
+	#define HAVE_SSE 1
+	#define HAVE_SSSE3 1
+	#define HAVE_AMD3DNOW 1
+	#define HAVE_AMD3DNOWEXT 1
+
+	#define ARCH_X86 1
+
+	#ifdef ARCH_X86_64
+		#define HAVE_FAST_64BIT 1
+		#define HAVE_CMOV 1
+		#define HAVE_FAST_CMOV 1
+		#define HAVE_STRUCT_TIMESPEC 1
+	#else
+		#define ARCH_X86_32 1
+		#define ARCH_X86_64 0
+	#endif
+
+	#define PTW32_STATIC_LIB 1
+	#define restrict restrict
 #else
 	#define HAVE_INLINE_ASM 0
 	#define HAVE_MMX 0
+	#define HAVE_MMX2 0
 	#define HAVE_SSE 0
 	#define HAVE_SSSE3 0
 	#define HAVE_AMD3DNOW 0
@@ -23,50 +36,67 @@
 	#define ARCH_X86_32 0
 	#define ARCH_X86_64 0
 	#define HAVE_FAST_64BIT 0
-  #define HAVE_CMOV 0
-  #define HAVE_FAST_CMOV 0
+	#define HAVE_CMOV 0
+	#define HAVE_FAST_CMOV 0
+
+	#define restrict
+	#define __asm__ __asm
 #endif
 
-#define HAVE_TEN_OPERANDS 1
-#define HAVE_EBP_AVAILABLE 1
-#define HAVE_EBX_AVAILABLE 1
+#define FFMPEG_LICENSE "GPL version 2.1 or later"
+#define CC_TYPE "gcc"
+#define CC_VERSION __VERSION__
 
-#define HAVE_BIGENDIAN 0
+#define ASMALIGN(ZEROBITS) ".align 1 << " #ZEROBITS "\n\t"
 
-#define HAVE_FAST_UNALIGNED 1
-
-#ifdef __GNUC__
-  #define HAVE_ATTRIBUTE_PACKED 1
-#else
-  #define HAVE_ATTRIBUTE_PACKED 0
-#endif
-
-#ifndef __GNUC__
-  #define EMULATE_FAST_INT
-#endif
+#define EXTERN_PREFIX "_"
+#define EXTERN_ASM _
 
+#define HAVE_ALTIVEC 0
+#define HAVE_ALTIVEC_H 0
+#define HAVE_BIGENDIAN 0
 #define HAVE_BSWAP 1
+#define HAVE_EBP_AVAILABLE 1
+#define HAVE_EBX_AVAILABLE 1
+#define HAVE_FAST_CLZ 0
+#define HAVE_FAST_UNALIGNED 1
+#define HAVE_LOCAL_ALIGNED_16 1
+#define HAVE_LOCAL_ALIGNED_8 1
 #define HAVE_MALLOC_H 1
 #define HAVE_MEMALIGN 1
+#define HAVE_TEN_OPERANDS 1
 #define HAVE_THREADS 1
 #define HAVE_W32THREADS 1
 #define HAVE_YASM 1
 
-#define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t"
+#ifdef __GNUC__
+	#define HAVE_ATTRIBUTE_PACKED 1
+	#define HAVE_ATTRIBUTE_MAY_ALIAS 1
+#else
+	#define HAVE_ATTRIBUTE_PACKED 0
+	#define HAVE_ATTRIBUTE_MAY_ALIAS 0
+	#define EMULATE_FAST_INT
+#endif
 
-//#define CONFIG_AUDIO_NONSHORT 1
+#define CONFIG_DWT 0
 #define CONFIG_HARDCODED_TABLES 0
 #define CONFIG_GPL 1
 #define CONFIG_GRAY 1
+#define CONFIG_H264DSP 1
 #define CONFIG_LIBAMR_NB 1
-#define CONFIG_LIBAMR_NB_FIXED 0
 #define CONFIG_LIBXVID 0
+#define CONFIG_LPC 0
+#define CONFIG_MDCT 1
 #define CONFIG_MPEGAUDIO_HP 1
 #define CONFIG_SMALL 0
 #define CONFIG_ZLIB 1
 
 #define CONFIG_DECODERS 1
-#define CONFIG_ENCODERS 0	
+#define CONFIG_ENCODERS 0
+#define CONFIG_SWSCALE 1
+#define CONFIG_SWSCALE_ALPHA 1
+#define CONFIG_POSTPROC 0
+#define CONFIG_RUNTIME_CPUDETECT 1
 
 #define CONFIG_AASC_DECODER 0
 #define CONFIG_AMV_DECODER 1
@@ -132,7 +162,7 @@
 #define CONFIG_WMV1_DECODER 1
 #define CONFIG_WMV2_DECODER 1
 #define CONFIG_WMV3_DECODER 1
-#define CONFIG_WNV1_DECODER 1
+#define CONFIG_WNV1_DECODER 0
 #define CONFIG_XL_DECODER 0
 #define CONFIG_ZLIB_DECODER 0
 #define CONFIG_ZMBV_DECODER 0
@@ -221,3 +251,5 @@
 #define CONFIG_MPEGAUDIO_PARSER 0
 #define CONFIG_MPEG4VIDEO_PARSER 0
 #define CONFIG_MLP_PARSER 1
+
+#endif /* FFMPEG_CONFIG_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt b/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt
new file mode 100644
index 000000000..a5132a3f6
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt
@@ -0,0 +1,8 @@
+The following files have MPC-specific custom code (compared to ffdshow):
+
+* makefile
+* config.h
+* libavcodec/avcodec.h
+* libavcodec/dsputil.c
+* libavcodec/mpeg12.c
+* libavutil/log.h
+\ No newline at end of file
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/ffImgfmt.h b/src/filters/transform/MPCVideoDec/ffmpeg/ffImgfmt.h
index f12d85a69..cc5d526ba 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/ffImgfmt.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/ffImgfmt.h
@@ -1,6 +1,8 @@
 #ifndef _FFIMGFMT_H_
 #define _FFIMGFMT_H_
 
+#include "libavutil/pixfmt.h"
+
 //================================ ffdshow ==================================
 // the RGB related values in this enum refers to the "memory byte order" (byte order as stored in memory).
 // under x86 architecture (little endians), the byte order is stored reversed (comparing to the write order),
@@ -100,90 +102,7 @@ static __inline int csp_xvid4_2ffdshow(int csp)
   }
 }
 
-//================================= libavcodec ===================================
-
-/**
- * Pixel format. Notes:
- *
- * PIX_FMT_RGB32 is handled in an endian-specific manner. An RGBA
- * color is put together as:
- *  (A << 24) | (R << 16) | (G << 8) | B
- * This is stored as BGRA on little-endian CPU architectures and ARGB on
- * big-endian CPUs.
- *
- * When the pixel format is palettized RGB (PIX_FMT_PAL8), the palettized
- * image data is stored in AVFrame.data[0]. The palette is transported in
- * AVFrame.data[1], is 1024 bytes long (256 4-byte entries) and is
- * formatted the same as in PIX_FMT_RGB32 described above (i.e., it is
- * also endian-specific). Note also that the individual RGB palette
- * components stored in AVFrame.data[1] should be in the range 0..255.
- * This is important as many custom PAL8 video codecs that were designed
- * to run on the IBM VGA graphics adapter use 6-bit palette components.
- *
- * For all the 8bit per pixel formats, an RGB32 palette is in data[1] like
- * for pal8. This palette is filled in automatically by the function
- * allocating the picture.
- *
- * Note, make sure that all newly added big endian formats have pix_fmt&1==1
- *       and that all newly added little endian formats have pix_fmt&1==0
- *       this allows simpler detection of big vs little endian.
- */
-enum PixelFormat {
-    PIX_FMT_NONE= -1,
-    PIX_FMT_YUV420P,   ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
-    PIX_FMT_YUYV422,   ///< packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
-    PIX_FMT_RGB24,     ///< packed RGB 8:8:8, 24bpp, RGBRGB...
-    PIX_FMT_BGR24,     ///< packed RGB 8:8:8, 24bpp, BGRBGR...
-    PIX_FMT_YUV422P,   ///< planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
-    PIX_FMT_YUV444P,   ///< planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
-    PIX_FMT_RGB32,     ///< packed RGB 8:8:8, 32bpp, (msb)8A 8R 8G 8B(lsb), in CPU endianness
-    PIX_FMT_YUV410P,   ///< planar YUV 4:1:0,  9bpp, (1 Cr & Cb sample per 4x4 Y samples)
-    PIX_FMT_YUV411P,   ///< planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
-    PIX_FMT_RGB565,    ///< packed RGB 5:6:5, 16bpp, (msb)   5R 6G 5B(lsb), in CPU endianness
-    PIX_FMT_RGB555,    ///< packed RGB 5:5:5, 16bpp, (msb)1A 5R 5G 5B(lsb), in CPU endianness, most significant bit to 0
-    PIX_FMT_GRAY8,     ///<        Y        ,  8bpp
-    PIX_FMT_MONOWHITE, ///<        Y        ,  1bpp, 0 is white, 1 is black
-    PIX_FMT_MONOBLACK, ///<        Y        ,  1bpp, 0 is black, 1 is white
-    PIX_FMT_PAL8,      ///< 8 bit with PIX_FMT_RGB32 palette
-    PIX_FMT_YUVJ420P,  ///< planar YUV 4:2:0, 12bpp, full scale (JPEG)
-    PIX_FMT_YUVJ422P,  ///< planar YUV 4:2:2, 16bpp, full scale (JPEG)
-    PIX_FMT_YUVJ444P,  ///< planar YUV 4:4:4, 24bpp, full scale (JPEG)
-    PIX_FMT_XVMC_MPEG2_MC,///< XVideo Motion Acceleration via common packet passing
-    PIX_FMT_XVMC_MPEG2_IDCT,
-    PIX_FMT_UYVY422,   ///< packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
-    PIX_FMT_UYYVYY411, ///< packed YUV 4:1:1, 12bpp, Cb Y0 Y1 Cr Y2 Y3
-    PIX_FMT_BGR32,     ///< packed RGB 8:8:8, 32bpp, (msb)8A 8B 8G 8R(lsb), in CPU endianness
-    PIX_FMT_BGR565,    ///< packed RGB 5:6:5, 16bpp, (msb)   5B 6G 5R(lsb), in CPU endianness
-    PIX_FMT_BGR555,    ///< packed RGB 5:5:5, 16bpp, (msb)1A 5B 5G 5R(lsb), in CPU endianness, most significant bit to 1
-    PIX_FMT_BGR8,      ///< packed RGB 3:3:2,  8bpp, (msb)2B 3G 3R(lsb)
-    PIX_FMT_BGR4,      ///< packed RGB 1:2:1,  4bpp, (msb)1B 2G 1R(lsb)
-    PIX_FMT_BGR4_BYTE, ///< packed RGB 1:2:1,  8bpp, (msb)1B 2G 1R(lsb)
-    PIX_FMT_RGB8,      ///< packed RGB 3:3:2,  8bpp, (msb)2R 3G 3B(lsb)
-    PIX_FMT_RGB4,      ///< packed RGB 1:2:1,  4bpp, (msb)1R 2G 1B(lsb)
-    PIX_FMT_RGB4_BYTE, ///< packed RGB 1:2:1,  8bpp, (msb)1R 2G 1B(lsb)
-    PIX_FMT_NV12,      ///< planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 for UV
-    PIX_FMT_NV21,      ///< as above, but U and V bytes are swapped
-
-    PIX_FMT_RGB32_1,   ///< packed RGB 8:8:8, 32bpp, (msb)8R 8G 8B 8A(lsb), in CPU endianness
-    PIX_FMT_BGR32_1,   ///< packed RGB 8:8:8, 32bpp, (msb)8B 8G 8R 8A(lsb), in CPU endianness
-
-    PIX_FMT_GRAY16BE,  ///<        Y        , 16bpp, big-endian
-    PIX_FMT_GRAY16LE,  ///<        Y        , 16bpp, little-endian
-    PIX_FMT_YUV440P,   ///< planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
-    PIX_FMT_YUVJ440P,  ///< planar YUV 4:4:0 full scale (JPEG)
-    PIX_FMT_YUVA420P,  ///< planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
-    PIX_FMT_RGB48BE,   ///< packed RGB 16:16:16, 48bpp, 16R, 16G, 16B, big-endian
-    PIX_FMT_RGB48LE,   ///< packed RGB 16:16:16, 48bpp, 16R, 16G, 16B, little-endian
-    PIX_FMT_NB,        ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
-};
-
-#define PIX_FMT_RGBA PIX_FMT_BGR32
-#define PIX_FMT_BGRA PIX_FMT_RGB32
-#define PIX_FMT_ARGB PIX_FMT_BGR32_1
-#define PIX_FMT_ABGR PIX_FMT_RGB32_1
-#define PIX_FMT_GRAY16 PIX_FMT_GRAY16LE
-#define PIX_FMT_RGB48 PIX_FMT_RGB48LE
-
+//================================= ffmpeg ===================================
 static __inline int csp_lavc2ffdshow(enum PixelFormat pix_fmt)
 {
  switch (pix_fmt)
@@ -205,6 +124,7 @@ static __inline int csp_lavc2ffdshow(enum PixelFormat pix_fmt)
    case PIX_FMT_RGB565  :return FF_CSP_RGB16;
    case PIX_FMT_PAL8    :return FF_CSP_PAL8;
    case PIX_FMT_GRAY8   :return FF_CSP_Y800;
+   case PIX_FMT_NV12    :return FF_CSP_NV12;
    default              :return FF_CSP_NULL;
   }
 }
@@ -226,6 +146,7 @@ static __inline enum PixelFormat csp_ffdshow2lavc(int pix_fmt)
    case FF_CSP_RGB16:return PIX_FMT_RGB565;
    case FF_CSP_PAL8:return PIX_FMT_PAL8;
    case FF_CSP_Y800:return PIX_FMT_GRAY8;
+   case FF_CSP_NV12:return PIX_FMT_NV12;
    default         :return PIX_FMT_NB;
   }
 }
@@ -538,7 +459,7 @@ static __inline int csp_supXvid(int x)
 
 bool csp_inFOURCCmask(int x,FOURCC fcc);
 
-extern char_t* csp_getName(const TcspInfo *cspInfo,int csp,char_t *buf,size_t len);
+extern char_t* csp_getName2(const TcspInfo *cspInfo,int csp,char_t *buf,size_t len);
 extern char_t* csp_getName(int csp,char_t *buf,size_t len);
 extern int csp_bestMatch(int inCSP,int wantedCSPS,int *rank=NULL);
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/aac_ac3_parser.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/aac_ac3_parser.c
index 839608875..8a73e709a 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/aac_ac3_parser.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/aac_ac3_parser.c
@@ -71,24 +71,32 @@ get_next:
     *poutbuf_size = buf_size;
 
     /* update codec info */
-    avctx->sample_rate = s->sample_rate;
     if(s->codec_id)
         avctx->codec_id = s->codec_id;
 
-    /* allow downmixing to stereo (or mono for AC-3) */
-    if(avctx->request_channels > 0 &&
-            avctx->request_channels < s->channels &&
-            (avctx->request_channels <= 2 ||
-            (avctx->request_channels == 1 &&
-            (avctx->codec_id == CODEC_ID_AC3 ||
-             avctx->codec_id == CODEC_ID_EAC3)))) {
-        avctx->channels = avctx->request_channels;
-    } else if (avctx->codec_id != CODEC_ID_AAC || s->channels) {
-        avctx->channels = s->channels;
-        avctx->channel_layout = s->channel_layout;
+    /* Due to backwards compatible HE-AAC the sample rate, channel count,
+       and total number of samples found in an AAC ADTS header are not
+       reliable. Bit rate is still accurate because the total frame duration in
+       seconds is still correct (as is the number of bits in the frame). */
+    if (avctx->codec_id != CODEC_ID_AAC) {
+        avctx->sample_rate = s->sample_rate;
+
+        /* allow downmixing to stereo (or mono for AC-3) */
+        if(avctx->request_channels > 0 &&
+                avctx->request_channels < s->channels &&
+                (avctx->request_channels <= 2 ||
+                (avctx->request_channels == 1 &&
+                (avctx->codec_id == CODEC_ID_AC3 ||
+                 avctx->codec_id == CODEC_ID_EAC3)))) {
+            avctx->channels = avctx->request_channels;
+        } else {
+            avctx->channels = s->channels;
+            avctx->channel_layout = s->channel_layout;
+        }
+        avctx->frame_size = s->samples;
     }
+
     avctx->bit_rate = s->bit_rate;
-    avctx->frame_size = s->samples;
 
     return i;
 }
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec.c
index 36ca31fad..8c2387e07 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec.c
@@ -216,7 +216,7 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
     if (avctx->error_recognition >= FF_ER_CAREFUL) {
         s->input_buffer = av_mallocz(AC3_FRAME_BUFFER_SIZE + FF_INPUT_BUFFER_PADDING_SIZE);
         if (!s->input_buffer)
-            return AVERROR_NOMEM;
+            return AVERROR(ENOMEM);
     }
 
     avctx->sample_fmt = SAMPLE_FMT_S16;
@@ -817,41 +817,54 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
     if (s->eac3 && (!blk || get_bits1(gbc))) {
         s->spx_in_use = get_bits1(gbc);
         if (s->spx_in_use) {
-            int begf, endf;
-            int spx_end_subband;
+            int dst_start_freq, dst_end_freq, src_start_freq,
+                start_subband, end_subband;
 
             /* determine which channels use spx */
             if (s->channel_mode == AC3_CHMODE_MONO) {
-                s->channel_in_spx[1] = 1;
+                s->channel_uses_spx[1] = 1;
             } else {
                 for (ch = 1; ch <= fbw_channels; ch++)
-                    s->channel_in_spx[ch] = get_bits1(gbc);
+                    s->channel_uses_spx[ch] = get_bits1(gbc);
             }
 
-            s->spx_copy_start_freq = get_bits(gbc, 2) * 12 + 25;
-            begf = get_bits(gbc, 3);
-            endf = get_bits(gbc, 3);
-            s->spx_start_subband = begf < 6 ? begf+2 : 2*begf-3;
-            spx_end_subband      = endf < 4 ? endf+5 : 2*endf+3;
-            if (s->spx_start_subband >= spx_end_subband) {
-                av_log(s->avctx, AV_LOG_ERROR, "invalid spectral extension range (%d >= %d)\n",
-                       s->spx_start_subband, spx_end_subband);
+            /* get the frequency bins of the spx copy region and the spx start
+               and end subbands */
+            dst_start_freq = get_bits(gbc, 2);
+            start_subband  = get_bits(gbc, 3) + 2;
+            if (start_subband > 7)
+                start_subband += start_subband - 7;
+            end_subband    = get_bits(gbc, 3) + 5;
+            if (end_subband   > 7)
+                end_subband   += end_subband   - 7;
+            dst_start_freq = dst_start_freq * 12 + 25;
+            src_start_freq = start_subband  * 12 + 25;
+            dst_end_freq   = end_subband    * 12 + 25;
+
+            /* check validity of spx ranges */
+            if (start_subband >= end_subband) {
+                av_log(s->avctx, AV_LOG_ERROR, "invalid spectral extension "
+                       "range (%d >= %d)\n", start_subband, end_subband);
                 return -1;
             }
-            s->spx_start_freq    = s->spx_start_subband * 12 + 25;
-            s->spx_end_freq      = spx_end_subband      * 12 + 25;
-            if (s->spx_copy_start_freq >= s->spx_start_freq) {
-                av_log(s->avctx, AV_LOG_ERROR, "invalid spectral extension copy start bin (%d >= %d)\n",
-                       s->spx_copy_start_freq, s->spx_start_freq);
+            if (dst_start_freq >= src_start_freq) {
+                av_log(s->avctx, AV_LOG_ERROR, "invalid spectral extension "
+                       "copy start bin (%d >= %d)\n", dst_start_freq, src_start_freq);
                 return -1;
             }
+
+            s->spx_dst_start_freq = dst_start_freq;
+            s->spx_src_start_freq = src_start_freq;
+            s->spx_dst_end_freq   = dst_end_freq;
+
             decode_band_structure(gbc, blk, s->eac3, 0,
-                                  s->spx_start_subband, spx_end_subband,
+                                  start_subband, end_subband,
                                   ff_eac3_default_spx_band_struct,
-                                  &s->num_spx_bands, s->spx_band_sizes);
+                                  &s->num_spx_bands,
+                                  s->spx_band_sizes);
         } else {
             for (ch = 1; ch <= fbw_channels; ch++) {
-                s->channel_in_spx[ch] = 0;
+                s->channel_uses_spx[ch] = 0;
                 s->first_spx_coords[ch] = 1;
             }
         }
@@ -860,15 +873,16 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
     /* spectral extension coordinates */
     if (s->spx_in_use) {
         for (ch = 1; ch <= fbw_channels; ch++) {
-            if (s->channel_in_spx[ch]) {
+            if (s->channel_uses_spx[ch]) {
                 if (s->first_spx_coords[ch] || get_bits1(gbc)) {
-                    int bin;
                     float spx_blend;
-                    int master_spx_coord;
+                    int bin, master_spx_coord;
+
                     s->first_spx_coords[ch] = 0;
-                    spx_blend = get_bits(gbc, 5) / 32.0f;
+                    spx_blend = get_bits(gbc, 5) * (1.0f/32);
                     master_spx_coord = get_bits(gbc, 2) * 3;
-                    bin = s->spx_start_freq;
+
+                    bin = s->spx_src_start_freq;
                     for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
                         int bandsize;
                         int spx_coord_exp, spx_coord_mant;
@@ -876,25 +890,23 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
 
                         /* calculate blending factors */
                         bandsize = s->spx_band_sizes[bnd];
-                        nratio = ((float)((bin + (bandsize >> 1))) / s->spx_end_freq) - spx_blend;
+                        nratio = ((float)((bin + (bandsize >> 1))) / s->spx_dst_end_freq) - spx_blend;
                         nratio = av_clipf(nratio, 0.0f, 1.0f);
-                        nblend = sqrt(       nratio);
-                        sblend = sqrt(1.0f - nratio);
-                        nblend *= 1.73205077648f; // scale noise to give unity variance
+                        nblend = sqrtf(3.0f * nratio); // noise is scaled by sqrt(3) to give unity variance
+                        sblend = sqrtf(1.0f - nratio);
                         bin += bandsize;
 
                         /* decode spx coordinates */
                         spx_coord_exp  = get_bits(gbc, 4);
                         spx_coord_mant = get_bits(gbc, 2);
-                        if (spx_coord_exp == 15)
-                            spx_coord = spx_coord_mant / 4.0f;
-                        else
-                            spx_coord = (spx_coord_mant + 4) / 8.0f;
-                        spx_coord /= (float)(1 << (spx_coord_exp + master_spx_coord));
+                        if (spx_coord_exp == 15) spx_coord_mant <<= 1;
+                        else                     spx_coord_mant += 4;
+                        spx_coord_mant <<= (25 - spx_coord_exp - master_spx_coord);
+                        spx_coord = spx_coord_mant * (1.0f/(1<<23));
 
                         /* multiply noise and signal blending factors by spx coordinate */
-                        s->spx_noise_blend [ch][bnd] = nblend * spx_coord * 32.0f;
-                        s->spx_signal_blend[ch][bnd] = sblend * spx_coord * 32.0f;
+                        s->spx_noise_blend [ch][bnd] = nblend * spx_coord;
+                        s->spx_signal_blend[ch][bnd] = sblend * spx_coord;
                     }
                 }
             } else {
@@ -939,7 +951,7 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
 
             /* coupling frequency range */
             cpl_start_subband = get_bits(gbc, 4);
-            cpl_end_subband = s->spx_in_use ? s->spx_start_subband - 1 :
+            cpl_end_subband = s->spx_in_use ? (s->spx_src_start_freq - 37) / 12 :
                                               get_bits(gbc, 4) + 3;
             if (cpl_start_subband >= cpl_end_subband) {
                 av_log(s->avctx, AV_LOG_ERROR, "invalid coupling range (%d >= %d)\n",
@@ -1015,8 +1027,8 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
             s->num_rematrixing_bands = 4;
             if (cpl_in_use && s->start_freq[CPL_CH] <= 61) {
                 s->num_rematrixing_bands -= 1 + (s->start_freq[CPL_CH] == 37);
-            } else if (s->spx_in_use && s->spx_start_freq <= 61) {
-                s->num_rematrixing_bands -= 1;
+            } else if (s->spx_in_use && s->spx_src_start_freq <= 61) {
+                s->num_rematrixing_bands--;
             }
             for(bnd=0; bnd<s->num_rematrixing_bands; bnd++)
                 s->rematrixing_flags[bnd] = get_bits1(gbc);
@@ -1042,8 +1054,8 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
             int prev = s->end_freq[ch];
             if (s->channel_in_cpl[ch])
                 s->end_freq[ch] = s->start_freq[CPL_CH];
-            else if (s->channel_in_spx[ch])
-                s->end_freq[ch] = s->spx_start_freq;
+            else if (s->channel_uses_spx[ch])
+                s->end_freq[ch] = s->spx_src_start_freq;
             else {
                 int bandwidth_code = get_bits(gbc, 6);
                 if (bandwidth_code > 60) {
@@ -1256,8 +1268,9 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
     }
 
     /* apply spectral extension to high frequency bins */
-    if (s->spx_in_use)
+    if (s->spx_in_use) {
         ff_eac3_apply_spectral_extension(s);
+    }
 
     /* downmix and MDCT. order depends on whether block switching is used for
        any channel in this block. this is because coefficients for the long
@@ -1320,21 +1333,7 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
     *data_size = 0;
     err = parse_frame_header(s);
 
-    /* check that reported frame size fits in input buffer */
-    if(!err && s->frame_size > buf_size) {
-        av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");
-        err = AAC_AC3_PARSE_ERROR_FRAME_SIZE;
-    }
-
-    /* check for crc mismatch */
-    if(err != AAC_AC3_PARSE_ERROR_FRAME_SIZE && avctx->error_recognition >= FF_ER_CAREFUL) {
-        if(av_crc(av_crc_get_table(AV_CRC_16_ANSI), 0, &buf[2], s->frame_size-2)) {
-            av_log(avctx, AV_LOG_ERROR, "frame CRC mismatch\n");
-            err = AAC_AC3_PARSE_ERROR_CRC;
-        }
-    }
-
-    if(err && err != AAC_AC3_PARSE_ERROR_CRC) {
+    if (err) {
         switch(err) {
             case AAC_AC3_PARSE_ERROR_SYNC:
                 av_log(avctx, AV_LOG_ERROR, "frame sync error\n");
@@ -1362,6 +1361,18 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
                 av_log(avctx, AV_LOG_ERROR, "invalid header\n");
                 break;
         }
+    } else {
+        /* check that reported frame size fits in input buffer */
+        if (s->frame_size > buf_size) {
+            av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");
+            err = AAC_AC3_PARSE_ERROR_FRAME_SIZE;
+        } else if (avctx->error_recognition >= FF_ER_CAREFUL) {
+            /* check for crc mismatch */
+            if (av_crc(av_crc_get_table(AV_CRC_16_ANSI), 0, &buf[2], s->frame_size-2)) {
+                av_log(avctx, AV_LOG_ERROR, "frame CRC mismatch\n");
+                err = AAC_AC3_PARSE_ERROR_CRC;
+            }
+        }
     }
 
     /* if frame is ok, set audio parameters */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec.h
index e982d4aca..bd555bdde 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec.h
@@ -22,6 +22,29 @@
 /**
  * @file libavcodec/ac3.h
  * Common code between the AC-3 and E-AC-3 decoders.
+ *
+ * Summary of MDCT Coefficient Grouping:
+ * The individual MDCT coefficient indices are often referred to in the
+ * (E-)AC-3 specification as frequency bins.  These bins are grouped together
+ * into subbands of 12 coefficients each.  The subbands are grouped together
+ * into bands as defined in the bitstream by the band structures, which
+ * determine the number of bands and the size of each band.  The full spectrum
+ * of 256 frequency bins is divided into 1 DC bin + 21 subbands = 253 bins.
+ * This system of grouping coefficients is used for channel bandwidth, stereo
+ * rematrixing, channel coupling, enhanced coupling, and spectral extension.
+ *
+ * +-+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+-+
+ * |1|  |12|  |  [12|12|12|12]  |  |  |  |  |  |  |  |  |  |  |  |  |3|
+ * +-+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+-+
+ * ~~~  ~~~~     ~~~~~~~~~~~~~                                      ~~~
+ *  |     |            |                                             |
+ *  |     |            |                    3 unused frequency bins--+
+ *  |     |            |
+ *  |     |            +--1 band containing 4 subbands
+ *  |     |
+ *  |     +--1 subband of 12 frequency bins
+ *  |
+ *  +--DC frequency bin
  */
 
 #ifndef AVCODEC_AC3DEC_H
@@ -31,6 +54,7 @@
 #include "ac3.h"
 #include "get_bits.h"
 #include "dsputil.h"
+#include "fft.h"
 
 /* override ac3.h to include coupling channel */
 #undef AC3_MAX_CHANNELS
@@ -92,15 +116,14 @@ typedef struct {
 ///@defgroup spx spectral extension
 ///@{
     int spx_in_use;                             ///< spectral extension in use              (spxinu)
-    uint8_t channel_in_spx[AC3_MAX_CHANNELS];   ///< channel in spectral extension          (chinspx)
+    uint8_t channel_uses_spx[AC3_MAX_CHANNELS]; ///< channel uses spectral extension        (chinspx)
     int8_t spx_atten_code[AC3_MAX_CHANNELS];    ///< spx attenuation code                   (spxattencod)
-    int spx_start_subband;                      ///< spx beginning frequency band           (spxbegf)
-    int spx_start_freq;                         ///< spx start frequency bin
-    int spx_end_freq;                           ///< spx end frequency bin
-    int spx_copy_start_freq;                    ///< spx starting frequency for copying     (copystartmant)
+    int spx_src_start_freq;                     ///< spx start frequency bin
+    int spx_dst_end_freq;                       ///< spx end frequency bin
+    int spx_dst_start_freq;                     ///< spx starting frequency bin for copying (copystartmant)
+                                                ///< the copy region ends at the start of the spx region.
     int num_spx_bands;                          ///< number of spx bands                    (nspxbnds)
-    uint8_t spx_band_struct[SPX_MAX_BANDS];     ///< spectral extension band structure      (spxbndstrc)
-    uint8_t spx_band_sizes[SPX_MAX_BANDS];      ///< number of bins in each band            (spxbndsztab)
+    uint8_t spx_band_sizes[SPX_MAX_BANDS];      ///< number of bins in each spx band
     uint8_t first_spx_coords[AC3_MAX_CHANNELS]; ///< first spx coordinates states           (firstspxcos)
     float spx_noise_blend[AC3_MAX_CHANNELS][SPX_MAX_BANDS]; ///< spx noise blending factor  (nblendfact)
     float spx_signal_blend[AC3_MAX_CHANNELS][SPX_MAX_BANDS];///< spx signal blending factor (sblendfact)
@@ -175,12 +198,12 @@ typedef struct {
 ///@}
 
 ///@defgroup arrays aligned arrays
-    DECLARE_ALIGNED_16(int,   fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];       ///> fixed-point transform coefficients
-    DECLARE_ALIGNED_16(float, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];   ///< transform coefficients
-    DECLARE_ALIGNED_16(float, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];             ///< delay - added to the next block
-    DECLARE_ALIGNED_16(float, window)[AC3_BLOCK_SIZE];                              ///< window coefficients
-    DECLARE_ALIGNED_16(float, tmp_output)[AC3_BLOCK_SIZE];                          ///< temporary storage for output before windowing
-    DECLARE_ALIGNED_16(float, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];            ///< output after imdct transform and windowing
+    DECLARE_ALIGNED(16, int,   fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];       ///> fixed-point transform coefficients
+    DECLARE_ALIGNED(16, float, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];   ///< transform coefficients
+    DECLARE_ALIGNED(16, float, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];             ///< delay - added to the next block
+    DECLARE_ALIGNED(16, float, window)[AC3_BLOCK_SIZE];                              ///< window coefficients
+    DECLARE_ALIGNED(16, float, tmp_output)[AC3_BLOCK_SIZE];                          ///< temporary storage for output before windowing
+    DECLARE_ALIGNED(16, float, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];            ///< output after imdct transform and windowing
 ///@}
 } AC3DecodeContext;
 
@@ -196,6 +219,9 @@ int ff_eac3_parse_header(AC3DecodeContext *s);
  */
 void ff_eac3_decode_transform_coeffs_aht_ch(AC3DecodeContext *s, int ch);
 
+void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2],
+                      int out_ch, int in_ch, int len);
+
 /**
  * Apply spectral extension to each channel by copying lower frequency
  * coefficients to higher frequency bins and applying side information to
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec_data.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec_data.c
index 57bed8de6..abe359b88 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec_data.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec_data.c
@@ -70,42 +70,3 @@ const uint8_t ff_eac3_default_cpl_band_struct[18] =
  */
 const uint8_t ff_eac3_default_spx_band_struct[17] =
 { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 };
-
-/**
- * Table E.25: Spectral Extension Attenuation Table
- * ff_eac3_spx_atten_tab[code][bin]=pow(2.0,(bin+1)*(code+1)/-15.0);
- */
-const float ff_eac3_spx_atten_tab[32][3] = {
-    { 0.954841603910416503f, 0.911722488558216804f, 0.870550563296124125f },
-    { 0.911722488558216804f, 0.831237896142787758f, 0.757858283255198995f },
-    { 0.870550563296124125f, 0.757858283255198995f, 0.659753955386447100f },
-    { 0.831237896142787758f, 0.690956439983888004f, 0.574349177498517438f },
-    { 0.793700525984099792f, 0.629960524947436595f, 0.500000000000000000f },
-    { 0.757858283255198995f, 0.574349177498517438f, 0.435275281648062062f },
-    { 0.723634618720189082f, 0.523647061410313364f, 0.378929141627599553f },
-    { 0.690956439983888004f, 0.477420801955208307f, 0.329876977693223550f },
-    { 0.659753955386447100f, 0.435275281648062062f, 0.287174588749258719f },
-    { 0.629960524947436595f, 0.396850262992049896f, 0.250000000000000000f },
-    { 0.601512518041058319f, 0.361817309360094541f, 0.217637640824031003f },
-    { 0.574349177498517438f, 0.329876977693223550f, 0.189464570813799776f },
-    { 0.548412489847312945f, 0.300756259020529160f, 0.164938488846611775f },
-    { 0.523647061410313364f, 0.274206244923656473f, 0.143587294374629387f },
-    { 0.500000000000000000f, 0.250000000000000000f, 0.125000000000000000f },
-    { 0.477420801955208307f, 0.227930622139554201f, 0.108818820412015502f },
-    { 0.455861244279108402f, 0.207809474035696939f, 0.094732285406899888f },
-    { 0.435275281648062062f, 0.189464570813799776f, 0.082469244423305887f },
-    { 0.415618948071393879f, 0.172739109995972029f, 0.071793647187314694f },
-    { 0.396850262992049896f, 0.157490131236859149f, 0.062500000000000000f },
-    { 0.378929141627599553f, 0.143587294374629387f, 0.054409410206007751f },
-    { 0.361817309360094541f, 0.130911765352578369f, 0.047366142703449930f },
-    { 0.345478219991944002f, 0.119355200488802049f, 0.041234622211652958f },
-    { 0.329876977693223550f, 0.108818820412015502f, 0.035896823593657347f },
-    { 0.314980262473718298f, 0.099212565748012460f, 0.031250000000000000f },
-    { 0.300756259020529160f, 0.090454327340023621f, 0.027204705103003875f },
-    { 0.287174588749258719f, 0.082469244423305887f, 0.023683071351724965f },
-    { 0.274206244923656473f, 0.075189064755132290f, 0.020617311105826479f },
-    { 0.261823530705156682f, 0.068551561230914118f, 0.017948411796828673f },
-    { 0.250000000000000000f, 0.062500000000000000f, 0.015625000000000000f },
-    { 0.238710400977604098f, 0.056982655534888536f, 0.013602352551501938f },
-    { 0.227930622139554201f, 0.051952368508924235f, 0.011841535675862483f }
-};
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec_data.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec_data.h
index 877ea0bde..9ed7c7318 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec_data.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec_data.h
@@ -30,6 +30,5 @@ extern const uint8_t ff_ac3_rematrix_band_tab[5];
 extern const uint8_t ff_eac3_hebap_tab[64];
 extern const uint8_t ff_eac3_default_cpl_band_struct[18];
 extern const uint8_t ff_eac3_default_spx_band_struct[17];
-extern const float   ff_eac3_spx_atten_tab[32][3];
 
 #endif /* AVCODEC_AC3DEC_DATA_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/allcodecs.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/allcodecs.c
index b3a10e214..7b08bfbc8 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/allcodecs.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/allcodecs.c
@@ -47,112 +47,113 @@ void avcodec_register_all(void)
     initialized = 1;
 
     /* video codecs */
-    //REGISTER_DECODER (AASC, aasc);
+    REGISTER_DECODER (AASC, aasc);
     REGISTER_DECODER (AMV, amv);
-    //REGISTER_DECODER (ASV1, asv1);
-    //REGISTER_DECODER (ASV2, asv2);
-    //REGISTER_DECODER (AVS, avs);
-    //REGISTER_DECODER (CAVS, cavs);
-    //REGISTER_DECODER (CINEPAK, cinepak);
-    //REGISTER_DECODER (COREPNG, corepng);
-    //REGISTER_DECODER (CSCD, cscd);
-    //REGISTER_DECODER (CYUV, cyuv);
-    //REGISTER_ENCDEC  (DVVIDEO, dvvideo);
-    //REGISTER_DECODER (EIGHTBPS, eightbps);
-    //REGISTER_ENCDEC  (FFV1, ffv1);
-    //REGISTER_ENCDEC  (FFVHUFF, ffvhuff);
-    REGISTER_DECODER  (FLV, flv);
-    //REGISTER_DECODER (FRAPS, fraps);
-    //REGISTER_ENCDEC  (H261, h261);
-    REGISTER_DECODER  (H263, h263);
-    //REGISTER_DECODER (H263I, h263i);
-    //REGISTER_ENCODER (H263P, h263p);
+    REGISTER_DECODER (ASV1, asv1);
+    REGISTER_DECODER (ASV2, asv2);
+    REGISTER_DECODER (AVS, avs);
+    REGISTER_DECODER (CAVS, cavs);
+    REGISTER_DECODER (CINEPAK, cinepak);
+    REGISTER_DECODER (COREPNG, corepng);
+    REGISTER_DECODER (CSCD, cscd);
+    REGISTER_DECODER (CYUV, cyuv);
+    REGISTER_ENCDEC  (DVVIDEO, dvvideo);
+    REGISTER_DECODER (EIGHTBPS, eightbps);
+    REGISTER_ENCDEC  (FFV1, ffv1);
+    REGISTER_ENCDEC  (FFVHUFF, ffvhuff);
+    REGISTER_ENCDEC  (FLV, flv);
+    REGISTER_DECODER (FRAPS, fraps);
+    REGISTER_ENCDEC  (H261, h261);
+    REGISTER_ENCDEC  (H263, h263);
+    REGISTER_DECODER (H263I, h263i);
+    REGISTER_ENCODER (H263P, h263p);
     REGISTER_DECODER (H264, h264);
-    //REGISTER_ENCDEC  (HUFFYUV, huffyuv);
-    //REGISTER_DECODER (INDEO2, indeo2);
-    //REGISTER_DECODER (INDEO3, indeo3);
-    //REGISTER_DECODER (JPEGLS, jpegls);
-    //REGISTER_ENCODER (LJPEG, ljpeg);
-    //REGISTER_DECODER (LOCO, loco);
-    //REGISTER_ENCDEC  (MJPEG, mjpeg);
-    //REGISTER_DECODER (MJPEGB, mjpegb);
-    //REGISTER_ENCDEC  (MPEG1VIDEO, mpeg1video);
-    REGISTER_DECODER  (MPEG2VIDEO, mpeg2video);
-    REGISTER_DECODER  (MPEG4, mpeg4);
-    //REGISTER_DECODER (MPEGVIDEO, mpegvideo);
-    REGISTER_DECODER  (MSMPEG4V1, msmpeg4v1);
-    REGISTER_DECODER  (MSMPEG4V2, msmpeg4v2);
-    REGISTER_DECODER  (MSMPEG4V3, msmpeg4v3);
-    //REGISTER_DECODER (MSRLE, msrle);
-    //REGISTER_DECODER (MSVIDEO1, msvideo1);
-    //REGISTER_DECODER (MSZH, mszh);
-    //REGISTER_ENCDEC  (PNG, png);
-    //REGISTER_DECODER (QPEG, qpeg);
-    //REGISTER_DECODER (QTRLE, qtrle);
-    //REGISTER_DECODER (RPZA, rpza);
+    REGISTER_ENCDEC  (HUFFYUV, huffyuv);
+    REGISTER_DECODER (INDEO2, indeo2);
+    REGISTER_DECODER (INDEO3, indeo3);
+    REGISTER_DECODER (JPEGLS, jpegls);
+    REGISTER_ENCODER (LJPEG, ljpeg);
+    REGISTER_DECODER (LOCO, loco);
+    REGISTER_ENCDEC  (MJPEG, mjpeg);
+    REGISTER_DECODER (MJPEGB, mjpegb);
+    REGISTER_ENCDEC  (MPEG1VIDEO, mpeg1video);
+    REGISTER_ENCDEC  (MPEG2VIDEO, mpeg2video);
+    REGISTER_ENCDEC  (MPEG4, mpeg4);
+    REGISTER_DECODER (MPEGVIDEO, mpegvideo);
+    REGISTER_ENCDEC  (MSMPEG4V1, msmpeg4v1);
+    REGISTER_ENCDEC  (MSMPEG4V2, msmpeg4v2);
+    REGISTER_ENCDEC  (MSMPEG4V3, msmpeg4v3);
+    REGISTER_DECODER (MSRLE, msrle);
+    REGISTER_DECODER (MSVIDEO1, msvideo1);
+    REGISTER_DECODER (MSZH, mszh);
+    REGISTER_ENCDEC  (PNG, png);
+    REGISTER_DECODER (QPEG, qpeg);
+    REGISTER_DECODER (QTRLE, qtrle);
+    REGISTER_DECODER (RPZA, rpza);
     REGISTER_DECODER (RV10, rv10);
     REGISTER_DECODER (RV20, rv20);
     REGISTER_DECODER (RV30, rv30);
     REGISTER_DECODER (RV40, rv40);
-    //REGISTER_DECODER (SP5X, sp5x);
+    REGISTER_DECODER (SP5X, sp5x);
     REGISTER_DECODER (SVQ1, svq1);
     REGISTER_DECODER (SVQ3, svq3);
     REGISTER_DECODER (THEORA, theora);
-    //REGISTER_DECODER (TRUEMOTION1, truemotion1);
-    //REGISTER_DECODER (TRUEMOTION2, truemotion2);
-    //REGISTER_DECODER (TSCC, tscc);
-    //REGISTER_DECODER (ULTI, ulti);
+    REGISTER_DECODER (TRUEMOTION1, truemotion1);
+    REGISTER_DECODER (TRUEMOTION2, truemotion2);
+    REGISTER_DECODER (TSCC, tscc);
+    REGISTER_DECODER (ULTI, ulti);
     REGISTER_DECODER (VC1, vc1);
-    //REGISTER_DECODER (VCR1, vcr1);
+    REGISTER_DECODER (VCR1, vcr1);
     REGISTER_DECODER (VP3, vp3);
     REGISTER_DECODER (VP5, vp5);
     REGISTER_DECODER (VP6, vp6);
     REGISTER_DECODER (VP6A, vp6a);
     REGISTER_DECODER (VP6F, vp6f);
-    REGISTER_DECODER  (WMV1, wmv1);
-    REGISTER_DECODER  (WMV2, wmv2);
+    REGISTER_ENCDEC  (WMV1, wmv1);
+    REGISTER_ENCDEC  (WMV2, wmv2);
     REGISTER_DECODER (WMV3, wmv3);
-    //REGISTER_DECODER (WNV1, wnv1);
-    //REGISTER_DECODER (XL, xl);
-    //REGISTER_DECODER (ZLIB, zlib);
-    //REGISTER_DECODER (ZMBV, zmbv);
+    REGISTER_DECODER (WNV1, wnv1);
+    REGISTER_DECODER (XL, xl);
+    REGISTER_DECODER (ZLIB, zlib);
+    REGISTER_DECODER (ZMBV, zmbv);
 
     /* audio codecs */
     //REGISTER_DECODER (AAC, aac);
-    REGISTER_DECODER  (AC3, ac3);
-    //REGISTER_DECODER (ATRAC3, atrac3);
-    //REGISTER_DECODER (COOK, cook);
-    //REGISTER_DECODER (DCA, dca);
+    REGISTER_ENCDEC  (AC3, ac3);
+    REGISTER_DECODER (ATRAC3, atrac3);
+    REGISTER_DECODER (COOK, cook);
+    REGISTER_DECODER (DCA, dca);
     REGISTER_DECODER (EAC3, eac3);
-    //REGISTER_DECODER (FLAC, flac);
-    //REGISTER_DECODER (IMC, imc);
-    //REGISTER_DECODER (MACE3, mace3);
-    //REGISTER_DECODER (MACE6, mace6);
+    REGISTER_DECODER (FLAC, flac);
+    REGISTER_DECODER (IMC, imc);
+    REGISTER_DECODER (MACE3, mace3);
+    REGISTER_DECODER (MACE6, mace6);
     REGISTER_DECODER (MLP, mlp);
-    //REGISTER_DECODER (MP1, mp1);
-    //REGISTER_DECODER (MP2, mp2);
-    //REGISTER_DECODER (MP3, mp3);
-    //REGISTER_DECODER (MSGSM, msgsm);
+    REGISTER_DECODER (MP1, mp1);
+    REGISTER_DECODER (MP2, mp2);
+    REGISTER_DECODER (MP3, mp3);
+    REGISTER_DECODER (MSGSM, msgsm);
     REGISTER_DECODER (NELLYMOSER, nellymoser);
-    //REGISTER_DECODER (QDM2, qdm2);
-    //REGISTER_DECODER (RA_144, ra_144);
-    //REGISTER_DECODER (RA_288, ra_288);
+    REGISTER_DECODER (QDM2, qdm2);
+    REGISTER_DECODER (RA_144, ra_144);
+    REGISTER_DECODER (RA_288, ra_288);
     REGISTER_DECODER (TRUEHD, truehd);
-    //REGISTER_DECODER (TRUESPEECH, truespeech);
-    //REGISTER_DECODER (TTA, tta);
-    //REGISTER_DECODER (VORBIS, vorbis);
-    //REGISTER_DECODER (WMAV1, wmav1);
-    //REGISTER_DECODER (WMAV2, wmav2);
+    REGISTER_DECODER (TRUESPEECH, truespeech);
+    REGISTER_DECODER (TTA, tta);
+    REGISTER_DECODER (VORBIS, vorbis);
+    REGISTER_DECODER (WMAV1, wmav1);
+    REGISTER_DECODER (WMAV2, wmav2);
+    REGISTER_DECODER (WAVPACK, wavpack);
 
     /* pcm codecs */
-    //REGISTER_DECODER (PCM_ALAW, pcm_alaw);
-    //REGISTER_DECODER (PCM_MULAW,pcm_mulaw);
+    REGISTER_DECODER (PCM_ALAW, pcm_alaw);
+    REGISTER_DECODER (PCM_MULAW,pcm_mulaw);
 
     /* ADPCM codecs */
     REGISTER_DECODER (ADPCM_4XM, adpcm_4xm);
     REGISTER_DECODER (ADPCM_CT, adpcm_ct);
     REGISTER_DECODER (ADPCM_EA, adpcm_ea);
-    //REGISTER_DECODER (ADPCM_G726, adpcm_g726);
+    REGISTER_DECODER (ADPCM_G726, adpcm_g726);
     REGISTER_DECODER (ADPCM_IMA_AMV, adpcm_ima_amv);
     REGISTER_DECODER (ADPCM_IMA_DK3, adpcm_ima_dk3);
     REGISTER_DECODER (ADPCM_IMA_DK4, adpcm_ima_dk4);
@@ -167,15 +168,15 @@ void avcodec_register_all(void)
     REGISTER_DECODER (ADPCM_SWF, adpcm_swf);
     REGISTER_DECODER (ADPCM_XA, adpcm_xa);
     REGISTER_DECODER (ADPCM_YAMAHA, adpcm_yamaha);
-    
+
     /* external libraries */
     REGISTER_DECODER (LIBAMR_NB, libamr_nb);
 
     /* parsers */
     //REGISTER_PARSER  (AAC, aac);
     REGISTER_PARSER  (AC3, ac3);
-    //REGISTER_PARSER  (DCA, dca);
+    REGISTER_PARSER  (DCA, dca);
     REGISTER_PARSER  (MLP, mlp);
-    //REGISTER_PARSER  (MPEGAUDIO, mpegaudio);    
+    REGISTER_PARSER  (MPEGAUDIO, mpegaudio);    
 }
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h
index c539552c3..12c34fed3 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h
@@ -44,7 +44,7 @@
 #include "libavutil/avutil.h"
 
 #define LIBAVCODEC_VERSION_MAJOR 52
-#define LIBAVCODEC_VERSION_MINOR 48
+#define LIBAVCODEC_VERSION_MINOR 62
 #define LIBAVCODEC_VERSION_MICRO  0
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
@@ -327,6 +327,9 @@ typedef struct RcOverride{
 #define CODEC_FLAG2_CHUNKS        0x00008000 ///< Input bitstream might be truncated at a packet boundaries instead of only at frame boundaries.
 #define CODEC_FLAG2_NON_LINEAR_QUANT 0x00010000 ///< Use MPEG-2 nonlinear quantizer.
 #define CODEC_FLAG2_BIT_RESERVOIR 0x00020000 ///< Use a bit reservoir when encoding if possible
+#define CODEC_FLAG2_MBTREE        0x00040000 ///< Use macroblock tree ratecontrol (x264 only)
+#define CODEC_FLAG2_PSY           0x00080000 ///< Use psycho visual optimizations.
+#define CODEC_FLAG2_SSIM          0x00100000 ///< Compute SSIM during encoding, error[] values are undefined.
 
 /* Unsupported options :
  *              Syntax Arithmetic coding (SAC)
@@ -337,8 +340,9 @@ typedef struct RcOverride{
 
 #define CODEC_CAP_DRAW_HORIZ_BAND 0x0001 ///< Decoder can use draw_horiz_band callback.
 /**
- * Codec uses get_buffer() for allocating buffers.
- * direct rendering method 1
+ * Codec uses get_buffer() for allocating buffers and supports custom allocators.
+ * If not set, it might not use get_buffer() at all or use operations that
+ * assume the buffer was allocated by avcodec_default_get_buffer.
  */
 #define CODEC_CAP_DR1             0x0002
 /* If 'parse_only' field is true, then avcodec_parse_frame() can be used. */
@@ -373,7 +377,7 @@ typedef struct RcOverride{
  */
 #define CODEC_CAP_SUBFRAMES        0x0100
 /**
- * Codec supports frame-based multithreading.
+ * Codec supports frame-level multithreading.
  */
 #define CODEC_CAP_FRAME_THREADS    0x0200
 
@@ -635,7 +639,8 @@ typedef struct AVPanScan{
     short *dct_coeff;\
 \
     /**\
-     * motion referece frame index\
+     * motion reference frame index\
+     * the order in which these are stored can depend on the codec.\
      * - encoding: Set by user.\
      * - decoding: Set by libavcodec.\
      */\
@@ -686,6 +691,7 @@ typedef struct AVPanScan{
 #define FF_QSCALE_TYPE_MPEG1 0
 #define FF_QSCALE_TYPE_MPEG2 1
 #define FF_QSCALE_TYPE_H264  2
+#define FF_QSCALE_TYPE_VP56  3
 
 #define FF_BUFFER_TYPE_INTERNAL 1
 #define FF_BUFFER_TYPE_USER     2 ///< direct rendering buffers (image is (de)allocated by user)
@@ -706,6 +712,58 @@ typedef struct AVPanScan{
 #define FF_BUFFER_HINTS_PRESERVE 0x04 // User must not alter buffer content.
 #define FF_BUFFER_HINTS_REUSABLE 0x08 // Codec will reuse the buffer (update).
 
+typedef struct AVPacket {
+    /**
+     * Presentation timestamp in AVStream->time_base units; the time at which
+     * the decompressed packet will be presented to the user.
+     * Can be AV_NOPTS_VALUE if it is not stored in the file.
+     * pts MUST be larger or equal to dts as presentation cannot happen before
+     * decompression, unless one wants to view hex dumps. Some formats misuse
+     * the terms dts and pts/cts to mean something different. Such timestamps
+     * must be converted to true pts/dts before they are stored in AVPacket.
+     */
+    int64_t pts;
+    /**
+     * Decompression timestamp in AVStream->time_base units; the time at which
+     * the packet is decompressed.
+     * Can be AV_NOPTS_VALUE if it is not stored in the file.
+     */
+    int64_t dts;
+    uint8_t *data;
+    int   size;
+    int   stream_index;
+    int   flags;
+    /**
+     * Duration of this packet in AVStream->time_base units, 0 if unknown.
+     * Equals next_pts - this_pts in presentation order.
+     */
+    int   duration;
+    void  (*destruct)(struct AVPacket *);
+    void  *priv;
+    int64_t pos;                            ///< byte position in stream, -1 if unknown
+
+    /**
+     * Time difference in AVStream->time_base units from the pts of this
+     * packet to the point at which the output from the decoder has converged
+     * independent from the availability of previous frames. That is, the
+     * frames are virtually identical no matter if decoding started from
+     * the very first frame or from this keyframe.
+     * Is AV_NOPTS_VALUE if unknown.
+     * This field is not the display duration of the current packet.
+     *
+     * The purpose of this field is to allow seeking in streams that have no
+     * keyframes in the conventional sense. It corresponds to the
+     * recovery point SEI in H.264 and match_time_delta in NUT. It is also
+     * essential for some types of subtitle streams to ensure that all
+     * subtitles are correctly displayed after seeking.
+     */
+    int64_t convergence_duration;
+} AVPacket;
+#define AV_PKT_FLAG_KEY   0x0001
+#if LIBAVCODEC_VERSION_MAJOR < 53
+#define PKT_FLAG_KEY AV_PKT_FLAG_KEY
+#endif
+
 /**
  * Audio Video Frame.
  * New fields can be added to the end of FF_COMMON_FRAME with minor version
@@ -1024,6 +1082,7 @@ typedef struct AVCodecContext {
 #define FF_BUG_HPEL_CHROMA      2048
 #define FF_BUG_DC_CLIP          4096
 #define FF_BUG_MS               8192 ///< Work around various bugs in Microsoft's broken decoders.
+#define FF_BUG_TRUNCATED       16384
 //#define FF_BUG_FAKE_SCALABILITY 16 //Autodetection should work 100%.
 
     /**
@@ -1081,7 +1140,7 @@ typedef struct AVCodecContext {
     /**
      * Called at the beginning of each frame to get a buffer for it.
      * If pic.reference is set then the frame will be read later by libavcodec.
-     * avcodec_align_dimensions() should be used to find the required width and
+     * avcodec_align_dimensions2() should be used to find the required width and
      * height, as they normally need to be rounded up to the next multiple of 16.
      * if CODEC_CAP_DR1 is not set then get_buffer() must call
      * avcodec_default_get_buffer() instead of providing buffers allocated by
@@ -1318,7 +1377,9 @@ typedef struct AVCodecContext {
     /* lower 16 bits - CPU features */
 #define FF_MM_MMX      0x0001 ///< standard MMX
 #define FF_MM_3DNOW    0x0004 ///< AMD 3DNOW
+#if LIBAVCODEC_VERSION_MAJOR < 53
 #define FF_MM_MMXEXT   0x0002 ///< SSE integer functions or AMD MMX ext
+#endif
 #define FF_MM_MMX2     0x0002 ///< SSE integer functions or AMD MMX ext
 #define FF_MM_SSE      0x0008 ///< SSE functions
 #define FF_MM_SSE2     0x0010 ///< PIV SSE2 functions
@@ -1834,11 +1895,21 @@ typedef struct AVCodecContext {
      */
      int profile;
 #define FF_PROFILE_UNKNOWN -99
+
 #define FF_PROFILE_AAC_MAIN 0
 #define FF_PROFILE_AAC_LOW  1
 #define FF_PROFILE_AAC_SSR  2
 #define FF_PROFILE_AAC_LTP  3
 
+#define FF_PROFILE_H264_BASELINE    66
+#define FF_PROFILE_H264_MAIN        77
+#define FF_PROFILE_H264_EXTENDED    88
+#define FF_PROFILE_H264_HIGH        100
+#define FF_PROFILE_H264_HIGH_10     110
+#define FF_PROFILE_H264_HIGH_422    122
+#define FF_PROFILE_H264_HIGH_444    244
+#define FF_PROFILE_H264_CAVLC_444   44
+
     /**
      * level
      * - encoding: Set by user.
@@ -2161,13 +2232,6 @@ typedef struct AVCodecContext {
     int64_t reordered_opaque2; /* ffdshow custom code */
     int64_t reordered_opaque3; /* ffdshow custom code */
 
-    int is_copy;  /* ffmpeg-mt */
-    int thread_type;  /* ffmpeg-mt */
-#define FF_THREAD_FRAME   1 //< Decode more than one frame at once
-#define FF_THREAD_SLICE   2 //< Decode more than one part of a single frame at once
-#define FF_THREAD_DEFAULT 3 //< Use both if possible.
-
-    int active_thread_type; /* ffmpeg-mt */
     /**
      * Bits per sample/pixel of internal libavcodec pixel/sample format.
      * This field is applicable only when sample_fmt is SAMPLE_FMT_S32.
@@ -2205,13 +2269,6 @@ typedef struct AVCodecContext {
     float rc_min_vbv_overflow_use;
 
     /**
-     * Hardware accelerator in use
-     * - encoding: unused.
-     * - decoding: Set by libavcodec
-     */
-    struct AVHWAccel *hwaccel;
-
-    /**
      * For some codecs, the time base is closer to the field rate than the frame rate.
      * Most notably, H.264 and MPEG-2 specify time_base as half of frame duration
      * if no telecine is used ...
@@ -2275,6 +2332,86 @@ typedef struct AVCodecContext {
      */
     int (*execute2)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg, int jobnr, int threadnr), void *arg2, int *ret, int count);
 
+    /**
+     * explicit P-frame weighted prediction analysis method
+     * 0: off
+     * 1: fast blind weighting (one reference duplicate with -1 offset)
+     * 2: smart weighting (full fade detection analysis)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int weighted_p_pred;
+
+    /**
+     * AQ mode
+     * 0: Disabled
+     * 1: Variance AQ (complexity mask)
+     * 2: Auto-variance AQ (experimental)
+     * - encoding: Set by user
+     * - decoding: unused
+     */
+    int aq_mode;
+
+    /**
+     * AQ strength
+     * Reduces blocking and blurring in flat and textured areas.
+     * - encoding: Set by user
+     * - decoding: unused
+     */
+    float aq_strength;
+
+    /**
+     * PSY RD
+     * Strength of psychovisual optimization
+     * - encoding: Set by user
+     * - decoding: unused
+     */
+    float psy_rd;
+
+    /**
+     * PSY trellis
+     * Strength of psychovisual optimization
+     * - encoding: Set by user
+     * - decoding: unused
+     */
+    float psy_trellis;
+
+    /**
+     * RC lookahead
+     * Number of frames for frametype and ratecontrol lookahead
+     * - encoding: Set by user
+     * - decoding: unused
+     */
+    int rc_lookahead;
+
+    /**
+     * Whether this is a copy of the context which had init() called on it.
+     * This is used by multithreading - shared tables and picture pointers
+     * should be freed from the original context only.
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec.
+     */
+    int is_copy;
+
+    /**
+     * Which multithreading methods to use.
+     * Use of FF_THREAD_FRAME will increase decoding delay by one frame per thread,
+     * so clients which require strictly conforming DTS must not use it.
+     *
+     * - encoding: Set by user, otherwise the default is used.
+     * - decoding: Set by user, otherwise the default is used.
+     */
+    int thread_type;
+#define FF_THREAD_FRAME   1 //< Decode more than one frame at once
+#define FF_THREAD_SLICE   2 //< Decode more than one part of a single frame at once
+
+    /**
+     * Which multithreading methods are actually active at the moment.
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec.
+     */
+    int active_thread_type;
+    
     /* ffdshow custom stuff (begin) */
     
     /**
@@ -2310,6 +2447,7 @@ typedef struct AVCodecContext {
      */
     int isDVD;
 
+    int got_first_frame; /* ffmpeg-mt */
     /* ffdshow custom stuff (end) */
 } AVCodecContext;
 
@@ -2443,6 +2581,10 @@ typedef struct AVCodec {
     const int *supported_samplerates;       ///< array of supported audio samplerates, or NULL if unknown, array is terminated by 0
     const enum SampleFormat *sample_fmts;   ///< array of supported sample formats, or NULL if unknown, array is terminated by -1
     const int64_t *channel_layouts;         ///< array of support channel layouts, or NULL if unknown. array is terminated by 0
+    
+    /* ffmpeg-mt */
+    int (*init_thread_copy)(AVCodecContext *);
+    int (*update_thread_context)(AVCodecContext *dst, AVCodecContext *src);
 } AVCodec;
 
 /**
@@ -2455,6 +2597,7 @@ typedef struct AVPicture {
 } AVPicture;
 
 void avcodec_get_chroma_sub_sample(enum PixelFormat pix_fmt, int *h_shift, int *v_shift);
+const char *avcodec_get_pix_fmt_name(enum PixelFormat pix_fmt);
 void avcodec_set_dimensions(AVCodecContext *s, int width, int height);
 
 /* external high level API */
@@ -2554,7 +2697,28 @@ FF_EXPORT AVFrame *avcodec_alloc_frame(void);
 FF_EXPORT int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic);
 FF_EXPORT void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic);
 FF_EXPORT int avcodec_default_reget_buffer(AVCodecContext *s, AVFrame *pic);
+
+/**
+ * Returns the amount of padding in pixels which the get_buffer callback must
+ * provide around the edge of the image for codecs which do not have the
+ * CODEC_FLAG_EMU_EDGE flag.
+ *
+ * @return Required padding in pixels.
+ */
+unsigned avcodec_get_edge_width(void);
+/**
+ * Modifies width and height values so that they will result in a memory
+ * buffer that is acceptable for the codec if you do not use any horizontal
+ * padding.
+ */
 void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height);
+/**
+ * Modifies width and height values so that they will result in a memory
+ * buffer that is acceptable for the codec if you also ensure that all
+ * line sizes are a multiple of the respective linesize_align[i].
+ */
+void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
+                               int linesize_align[4]);
 
 /**
  * Checks if the given dimension of a picture is valid, meaning that all
@@ -2569,7 +2733,6 @@ enum PixelFormat avcodec_default_get_format(struct AVCodecContext *s, const enum
 
 FF_EXPORT int avcodec_thread_init(AVCodecContext *s, int thread_count);
 FF_EXPORT void avcodec_thread_free(AVCodecContext *s);
-int avcodec_thread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void *arg, int *ret, int count, int size);
 int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2),void *arg, int *ret, int count, int size);
 int avcodec_default_execute2(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2, int, int),void *arg, int *ret, int count);
 const char* avcodec_get_current_idct(AVCodecContext *avctx);
@@ -3029,28 +3192,15 @@ int av_picture_crop(AVPicture *dst, const AVPicture *src,
 int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width, enum PixelFormat pix_fmt,
             int padtop, int padbottom, int padleft, int padright, int *color);
 
+/**
+ * Encodes extradata length to a buffer. Used by xiph codecs.
+ *
+ * @param s buffer to write to; must be at least (v/255+1) bytes long
+ * @param v size of extradata in bytes
+ * @return number of bytes written to the buffer.
+ */
 unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
 
-/* error handling */
-#if EINVAL > 0
-#define AVERROR(e) (-(e)) /**< Returns a negative error code from a POSIX error code, to return from library functions. */
-#define AVUNERROR(e) (-(e)) /**< Returns a POSIX error code from a library function error return value. */
-#else
-/* Some platforms have E* and errno already negated. */
-#define AVERROR(e) (e)
-#define AVUNERROR(e) (e)
-#endif
-#define AVERROR_UNKNOWN     AVERROR(EINVAL)  /**< unknown error */
-#define AVERROR_IO          AVERROR(EIO)     /**< I/O error */
-#define AVERROR_NUMEXPECTED AVERROR(EDOM)    /**< Number syntax expected in filename. */
-#define AVERROR_INVALIDDATA AVERROR(EINVAL)  /**< invalid data found */
-#define AVERROR_NOMEM       AVERROR(ENOMEM)  /**< not enough memory */
-#define AVERROR_NOFMT       AVERROR(EILSEQ)  /**< unknown format */
-#define AVERROR_NOTSUPP     AVERROR(ENOSYS)  /**< Operation not supported. */
-#define AVERROR_NOENT       AVERROR(ENOENT)  /**< No such file or directory. */
-#define AVERROR_EOF         AVERROR(EPIPE)   /**< End of file. */
-#define AVERROR_PATCHWELCOME    -MKTAG('P','A','W','E') /**< Not yet implemented in FFmpeg. Patches welcome. */
-
 /**
  * Logs a generic warning message about a missing feature. This function is
  * intended to be used internally by FFmpeg (libavcodec, libavformat, etc.)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avfft.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avfft.c
new file mode 100644
index 000000000..25fc4e095
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avfft.c
@@ -0,0 +1,142 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mem.h"
+#include "avfft.h"
+#include "fft.h"
+
+/* FFT */
+
+FFTContext *av_fft_init(int nbits, int inverse)
+{
+    FFTContext *s = av_malloc(sizeof(*s));
+
+    if (s)
+        ff_fft_init(s, nbits, inverse);
+
+    return s;
+}
+
+void av_fft_permute(FFTContext *s, FFTComplex *z)
+{
+    s->fft_permute(s, z);
+}
+
+void av_fft_calc(FFTContext *s, FFTComplex *z)
+{
+    s->fft_calc(s, z);
+}
+
+void av_fft_end(FFTContext *s)
+{
+    if (s) {
+        ff_fft_end(s);
+        av_free(s);
+    }
+}
+
+#if CONFIG_MDCT
+
+FFTContext *av_mdct_init(int nbits, int inverse, double scale)
+{
+    FFTContext *s = av_malloc(sizeof(*s));
+
+    if (s)
+        ff_mdct_init(s, nbits, inverse, scale);
+
+    return s;
+}
+
+void av_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->imdct_calc(s, output, input);
+}
+
+void av_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->imdct_half(s, output, input);
+}
+
+void av_mdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->mdct_calc(s, output, input);
+}
+
+void av_mdct_end(FFTContext *s)
+{
+    if (s) {
+        ff_mdct_end(s);
+        av_free(s);
+    }
+}
+
+#endif /* CONFIG_MDCT */
+
+#if CONFIG_RDFT
+
+RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans)
+{
+    RDFTContext *s = av_malloc(sizeof(*s));
+
+    if (s)
+        ff_rdft_init(s, nbits, trans);
+
+    return s;
+}
+
+void av_rdft_calc(RDFTContext *s, FFTSample *data)
+{
+    ff_rdft_calc(s, data);
+}
+
+void av_rdft_end(RDFTContext *s)
+{
+    if (s) {
+        ff_rdft_end(s);
+        av_free(s);
+    }
+}
+
+#endif /* CONFIG_RDFT */
+
+#if CONFIG_DCT
+
+DCTContext *av_dct_init(int nbits, enum DCTTransformType inverse)
+{
+    DCTContext *s = av_malloc(sizeof(*s));
+
+    if (s)
+        ff_dct_init(s, nbits, inverse);
+
+    return s;
+}
+
+void av_dct_calc(DCTContext *s, FFTSample *data)
+{
+    ff_dct_calc(s, data);
+}
+
+void av_dct_end(DCTContext *s)
+{
+    if (s) {
+        ff_dct_end(s);
+        av_free(s);
+    }
+}
+
+#endif /* CONFIG_DCT */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avfft.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avfft.h
new file mode 100644
index 000000000..623f0a33b
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avfft.h
@@ -0,0 +1,99 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AVFFT_H
+#define AVCODEC_AVFFT_H
+
+typedef float FFTSample;
+
+typedef struct FFTComplex {
+    FFTSample re, im;
+} FFTComplex;
+
+typedef struct FFTContext FFTContext;
+
+/**
+ * Set up a complex FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param inverse         if 0 perform the forward transform, if 1 perform the inverse
+ */
+FFTContext *av_fft_init(int nbits, int inverse);
+
+/**
+ * Do the permutation needed BEFORE calling ff_fft_calc().
+ */
+void av_fft_permute(FFTContext *s, FFTComplex *z);
+
+/**
+ * Do a complex FFT with the parameters defined in av_fft_init(). The
+ * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
+ */
+void av_fft_calc(FFTContext *s, FFTComplex *z);
+
+void av_fft_end(FFTContext *s);
+
+FFTContext *av_mdct_init(int nbits, int inverse, double scale);
+void av_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_mdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_mdct_end(FFTContext *s);
+
+/* Real Discrete Fourier Transform */
+
+enum RDFTransformType {
+    DFT_R2C,
+    IDFT_C2R,
+    IDFT_R2C,
+    DFT_C2R,
+};
+
+typedef struct RDFTContext RDFTContext;
+
+/**
+ * Set up a real FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param trans           the type of transform
+ */
+RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans);
+void av_rdft_calc(RDFTContext *s, FFTSample *data);
+void av_rdft_end(RDFTContext *s);
+
+/* Discrete Cosine Transform */
+
+typedef struct DCTContext DCTContext;
+
+enum DCTTransformType {
+    DCT_II = 0,
+    DCT_III,
+    DCT_I,
+    DST_I,
+};
+
+/**
+ * Sets up DCT.
+ * @param nbits           size of the input array:
+ *                        (1 << nbits)     for DCT-II, DCT-III and DST-I
+ *                        (1 << nbits) + 1 for DCT-I
+ *
+ * @note the first element of the input of DST-I is ignored
+ */
+DCTContext *av_dct_init(int nbits, enum DCTTransformType type);
+void av_dct_calc(DCTContext *s, FFTSample *data);
+void av_dct_end (DCTContext *s);
+
+#endif /* AVCODEC_AVFFT_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/bitstream.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/bitstream.c
index 4337c84b3..c1dc7479f 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/bitstream.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/bitstream.c
@@ -2,6 +2,7 @@
  * Common bit i/o utils
  * Copyright (c) 2000, 2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2010 Loren Merritt
  *
  * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
  *
@@ -30,6 +31,7 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "put_bits.h"
+#include "malloc.h"
 
 const uint8_t ff_log2_run[32]={
  0, 0, 0, 0, 1, 1, 1, 1,
@@ -116,108 +118,126 @@ static int alloc_table(VLC *vlc, int size, int use_static)
     return index;
 }
 
-static int build_table(VLC *vlc, int table_nb_bits,
-                       int nb_codes,
-                       const void *bits, int bits_wrap, int bits_size,
-                       const void *codes, int codes_wrap, int codes_size,
-                       const void *symbols, int symbols_wrap, int symbols_size,
-                       uint32_t code_prefix, int n_prefix, int flags)
+static av_always_inline uint32_t bitswap_32(uint32_t x) {
+    return av_reverse[x&0xFF]<<24
+         | av_reverse[(x>>8)&0xFF]<<16
+         | av_reverse[(x>>16)&0xFF]<<8
+         | av_reverse[x>>24];
+}
+
+typedef struct {
+    uint8_t bits;
+    uint16_t symbol;
+    /** codeword, with the first bit-to-be-read in the msb
+     * (even if intended for a little-endian bitstream reader) */
+    uint32_t code;
+} VLCcode;
+
+static int compare_vlcspec(const void *a, const void *b)
+{
+    const VLCcode *sa=a, *sb=b;
+    return (sa->code >> 1) - (sb->code >> 1);
+}
+
+/**
+ * Build VLC decoding tables suitable for use with get_vlc().
+ *
+ * @param vlc            the context to be initted
+ *
+ * @param table_nb_bits  max length of vlc codes to store directly in this table
+ *                       (Longer codes are delegated to subtables.)
+ *
+ * @param nb_codes       number of elements in codes[]
+ *
+ * @param codes          descriptions of the vlc codes
+ *                       These must be ordered such that codes going into the same subtable are contiguous.
+ *                       Sorting by VLCcode.code is sufficient, though not necessary.
+ */
+static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
+                       VLCcode *codes, int flags)
 {
-    int i, j, k, n, table_size, table_index, nb, n1, index, code_prefix2, symbol;
+    int table_size, table_index, index, code_prefix, symbol, subtable_bits;
+    int i, j, k, n, nb, inc;
     uint32_t code;
     VLC_TYPE (*table)[2];
 
     table_size = 1 << table_nb_bits;
     table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC);
 #ifdef DEBUG_VLC
-    av_log(NULL,AV_LOG_DEBUG,"new table index=%d size=%d code_prefix=%x n=%d\n",
-           table_index, table_size, code_prefix, n_prefix);
+    av_log(NULL,AV_LOG_DEBUG,"new table index=%d size=%d\n",
+           table_index, table_size);
 #endif
     if (table_index < 0)
         return -1;
     table = &vlc->table[table_index];
 
-    for(i=0;i<table_size;i++) {
+    for (i = 0; i < table_size; i++) {
         table[i][1] = 0; //bits
         table[i][0] = -1; //codes
     }
 
     /* first pass: map codes and compute auxillary table sizes */
-    for(i=0;i<nb_codes;i++) {
-        GET_DATA(n, bits, i, bits_wrap, bits_size);
-        GET_DATA(code, codes, i, codes_wrap, codes_size);
-        /* we accept tables with holes */
-        if (n <= 0)
-            continue;
-        if (!symbols)
-            symbol = i;
-        else
-            GET_DATA(symbol, symbols, i, symbols_wrap, symbols_size);
+    for (i = 0; i < nb_codes; i++) {
+        n = codes[i].bits;
+        code = codes[i].code;
+        symbol = codes[i].symbol;
 #if defined(DEBUG_VLC) && 0
         av_log(NULL,AV_LOG_DEBUG,"i=%d n=%d code=0x%x\n", i, n, code);
 #endif
-        /* if code matches the prefix, it is in the table */
-        n -= n_prefix;
-        if(flags & INIT_VLC_LE)
-            code_prefix2= code & (n_prefix>=32 ? 0xffffffff : (1 << n_prefix)-1);
-        else
-            code_prefix2= code >> n;
-        if (n > 0 && code_prefix2 == code_prefix) {
-            if (n <= table_nb_bits) {
-                /* no need to add another table */
-                j = (code << (table_nb_bits - n)) & (table_size - 1);
-                nb = 1 << (table_nb_bits - n);
-                for(k=0;k<nb;k++) {
-                    if(flags & INIT_VLC_LE)
-                        j = (code >> n_prefix) + (k<<n);
+        if (n <= table_nb_bits) {
+            /* no need to add another table */
+            j = code >> (32 - table_nb_bits);
+            nb = 1 << (table_nb_bits - n);
+            inc = 1;
+            if (flags & INIT_VLC_LE) {
+                j = bitswap_32(code);
+                inc = 1 << n;
+            }
+            for (k = 0; k < nb; k++) {
 #ifdef DEBUG_VLC
-                    av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n",
-                           j, i, n);
+                av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n",
+                       j, i, n);
 #endif
-                    if (table[j][1] /*bits*/ != 0) {
-                        av_log(NULL, AV_LOG_ERROR, "incorrect codes\n");
-                        return -1;
-                    }
-                    table[j][1] = n; //bits
-                    table[j][0] = symbol;
-                    j++;
+                if (table[j][1] /*bits*/ != 0) {
+                    av_log(NULL, AV_LOG_ERROR, "incorrect codes\n");
+                    return -1;
                 }
-            } else {
-                n -= table_nb_bits;
-                j = (code >> ((flags & INIT_VLC_LE) ? n_prefix : n)) & ((1 << table_nb_bits) - 1);
-#ifdef DEBUG_VLC
-                av_log(NULL,AV_LOG_DEBUG,"%4x: n=%d (subtable)\n",
-                       j, n);
-#endif
-                /* compute table size */
-                n1 = -table[j][1]; //bits
-                if (n > n1)
-                    n1 = n;
-                table[j][1] = -n1; //bits
+                table[j][1] = n; //bits
+                table[j][0] = symbol;
+                j += inc;
             }
-        }
-    }
-
-    /* second pass : fill auxillary tables recursively */
-    for(i=0;i<table_size;i++) {
-        n = table[i][1]; //bits
-        if (n < 0) {
-            n = -n;
-            if (n > table_nb_bits) {
-                n = table_nb_bits;
-                table[i][1] = -n; //bits
+        } else {
+            /* fill auxiliary table recursively */
+            n -= table_nb_bits;
+            code_prefix = code >> (32 - table_nb_bits);
+            subtable_bits = n;
+            codes[i].bits = n;
+            codes[i].code = code << table_nb_bits;
+            for (k = i+1; k < nb_codes; k++) {
+                n = codes[k].bits - table_nb_bits;
+                if (n <= 0)
+                    break;
+                code = codes[k].code;
+                if (code >> (32 - table_nb_bits) != code_prefix)
+                    break;
+                codes[k].bits = n;
+                codes[k].code = code << table_nb_bits;
+                subtable_bits = FFMAX(subtable_bits, n);
             }
-            index = build_table(vlc, n, nb_codes,
-                                bits, bits_wrap, bits_size,
-                                codes, codes_wrap, codes_size,
-                                symbols, symbols_wrap, symbols_size,
-                                (flags & INIT_VLC_LE) ? (code_prefix | (i << n_prefix)) : ((code_prefix << table_nb_bits) | i),
-                                n_prefix + table_nb_bits, flags);
+            subtable_bits = FFMIN(subtable_bits, table_nb_bits);
+            j = (flags & INIT_VLC_LE) ? bitswap_32(code_prefix) >> (32 - table_nb_bits) : code_prefix;
+            table[j][1] = -subtable_bits;
+#ifdef DEBUG_VLC
+            av_log(NULL,AV_LOG_DEBUG,"%4x: n=%d (subtable)\n",
+                   j, codes[i].bits + table_nb_bits);
+#endif
+            index = build_table(vlc, subtable_bits, k-i, codes+i, flags);
             if (index < 0)
                 return -1;
             /* note: realloc has been done, so reload tables */
             table = &vlc->table[table_index];
-            table[i][0] = index; //code
+            table[j][0] = index; //code
+            i = k-1;
         }
     }
     return table_index;
@@ -256,6 +276,13 @@ int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
              const void *symbols, int symbols_wrap, int symbols_size,
              int flags)
 {
+    #if __STDC_VERSION__ >= 199901L
+    VLCcode buf[nb_codes];
+    #else
+    VLCcode *buf = (VLCcode*)alloca(sizeof(VLCcode)*nb_codes);
+    #endif
+    int i, j;
+
     vlc->bits = nb_bits;
     if(flags & INIT_VLC_USE_NEW_STATIC){
         if(vlc->table_size && vlc->table_size == vlc->table_allocated){
@@ -273,11 +300,31 @@ int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
     av_log(NULL,AV_LOG_DEBUG,"build table nb_codes=%d\n", nb_codes);
 #endif
 
-    if (build_table(vlc, nb_bits, nb_codes,
-                    bits, bits_wrap, bits_size,
-                    codes, codes_wrap, codes_size,
-                    symbols, symbols_wrap, symbols_size,
-                    0, 0, flags) < 0) {
+    assert(symbols_size <= 2 || !symbols);
+    j = 0;
+#define COPY(condition)\
+    for (i = 0; i < nb_codes; i++) {\
+        GET_DATA(buf[j].bits, bits, i, bits_wrap, bits_size);\
+        if (!(condition))\
+            continue;\
+        GET_DATA(buf[j].code, codes, i, codes_wrap, codes_size);\
+        if (flags & INIT_VLC_LE)\
+            buf[j].code = bitswap_32(buf[j].code);\
+        else\
+            buf[j].code <<= 32 - buf[j].bits;\
+        if (symbols)\
+            GET_DATA(buf[j].symbol, symbols, i, symbols_wrap, symbols_size)\
+        else\
+            buf[j].symbol = i;\
+        j++;\
+    }
+    COPY(buf[j].bits > nb_bits);
+    // qsort is the slowest part of init_vlc, and could probably be improved or avoided
+    qsort(buf, j, sizeof(VLCcode), compare_vlcspec);
+    COPY(buf[j].bits && buf[j].bits <= nb_bits);
+    nb_codes = j;
+
+    if (build_table(vlc, nb_bits, nb_codes, buf, flags) < 0) {
         av_freep(&vlc->table);
         return -1;
     }
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/cook.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/cook.c
index 58a7cc74b..8992f8935 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/cook.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/cook.c
@@ -52,6 +52,7 @@
 #include "get_bits.h"
 #include "dsputil.h"
 #include "bytestream.h"
+#include "fft.h"
 
 #include "cookdata.h"
 
@@ -149,7 +150,7 @@ typedef struct cook {
     /* data buffers */
 
     uint8_t*            decoded_bytes_buffer;
-    DECLARE_ALIGNED_16(float,mono_mdct_output)[2048];
+    DECLARE_ALIGNED(16, float,mono_mdct_output)[2048];
     float               decode_buffer_1[1024];
     float               decode_buffer_2[1024];
     float               decode_buffer_0[1060]; /* static allocation for joint decode */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c
index 6ee66cd74..389c9d093 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c
@@ -31,19 +31,14 @@
 #include "dsputil.h"
 #include "simple_idct.h"
 #include "faandct.h"
-#include "faanidct.h"
+//#include "faanidct.h"
 #include "mathops.h"
 #include "mpegvideo.h"
 #include "config.h"
-
-/* vorbis.c */
-void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
-
-/* ac3dec.c */
-void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
-
-/* pngdec.c */
-void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
+#include "lpc.h"
+#include "ac3dec.h"
+#include "vorbis.h"
+#include "png.h"
 
 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
 uint32_t ff_squareTbl[512] = {0, };
@@ -77,7 +72,7 @@ const uint8_t ff_zigzag248_direct[64] = {
 };
 
 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
-DECLARE_ALIGNED_16(uint16_t, inv_zigzag_direct16)[64];
+DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
 
 const uint8_t ff_alternate_horizontal_scan[64] = {
     0,  1,   2,  3,  8,  9, 16, 17,
@@ -330,7 +325,6 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
     return s;
 }
 
-
 /* draw the edges of width 'w' of an image of size width, height */
 //FIXME check that this is ok for mpeg4 interlaced
 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
@@ -548,6 +542,27 @@ static void put_signed_pixels_clamped_c(const DCTELEM *block,
     }
 }
 
+static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                    int line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        pixels[0] = block[0];
+        pixels[1] = block[1];
+        pixels[2] = block[2];
+        pixels[3] = block[3];
+        pixels[4] = block[4];
+        pixels[5] = block[5];
+        pixels[6] = block[6];
+        pixels[7] = block[7];
+
+        pixels += line_size;
+        block += 8;
+    }
+}
+
 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
                           int line_size)
 {
@@ -639,6 +654,42 @@ static int sum_abs_dctelem_c(DCTELEM *block)
     return sum;
 }
 
+static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
+{
+    int i;
+
+    for (i = 0; i < h; i++) {
+        memset(block, value, 16);
+        block += line_size;
+    }
+}
+
+static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
+{
+    int i;
+
+    for (i = 0; i < h; i++) {
+        memset(block, value, 8);
+        block += line_size;
+    }
+}
+
+static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize)
+{
+    int i, j;
+    uint16_t *dst1 = (uint16_t *) dst;
+    uint16_t *dst2 = (uint16_t *)(dst + linesize);
+
+    for (j = 0; j < 8; j++) {
+        for (i = 0; i < 8; i++) {
+            dst1[i] = dst2[i] = src[i] * 0x0101;
+        }
+        src  += 8;
+        dst1 += linesize;
+        dst2 += linesize;
+    }
+}
+
 #if 0
 
 #define PIXOP2(OPNAME, OP) \
@@ -2543,76 +2594,6 @@ H264_MC(avg_, 16)
 #undef op2_put
 #endif
 
-#define op_scale1(x)  block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
-#define op_scale2(x)  dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
-#define H264_WEIGHT(W,H) \
-static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
-    int y; \
-    offset <<= log2_denom; \
-    if(log2_denom) offset += 1<<(log2_denom-1); \
-    for(y=0; y<H; y++, block += stride){ \
-        op_scale1(0); \
-        op_scale1(1); \
-        if(W==2) continue; \
-        op_scale1(2); \
-        op_scale1(3); \
-        if(W==4) continue; \
-        op_scale1(4); \
-        op_scale1(5); \
-        op_scale1(6); \
-        op_scale1(7); \
-        if(W==8) continue; \
-        op_scale1(8); \
-        op_scale1(9); \
-        op_scale1(10); \
-        op_scale1(11); \
-        op_scale1(12); \
-        op_scale1(13); \
-        op_scale1(14); \
-        op_scale1(15); \
-    } \
-} \
-static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
-    int y; \
-    offset = ((offset + 1) | 1) << log2_denom; \
-    for(y=0; y<H; y++, dst += stride, src += stride){ \
-        op_scale2(0); \
-        op_scale2(1); \
-        if(W==2) continue; \
-        op_scale2(2); \
-        op_scale2(3); \
-        if(W==4) continue; \
-        op_scale2(4); \
-        op_scale2(5); \
-        op_scale2(6); \
-        op_scale2(7); \
-        if(W==8) continue; \
-        op_scale2(8); \
-        op_scale2(9); \
-        op_scale2(10); \
-        op_scale2(11); \
-        op_scale2(12); \
-        op_scale2(13); \
-        op_scale2(14); \
-        op_scale2(15); \
-    } \
-}
-
-H264_WEIGHT(16,16)
-H264_WEIGHT(16,8)
-H264_WEIGHT(8,16)
-H264_WEIGHT(8,8)
-H264_WEIGHT(8,4)
-H264_WEIGHT(4,8)
-H264_WEIGHT(4,4)
-H264_WEIGHT(4,2)
-H264_WEIGHT(2,4)
-H264_WEIGHT(2,2)
-
-#undef op_scale1
-#undef op_scale2
-#undef H264_WEIGHT
-
 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
     int i;
@@ -2633,8 +2614,6 @@ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
 
 #if CONFIG_CAVS_DECODER
 /* AVS specific */
-void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
-
 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
     put_pixels8_c(dst, src, stride, 8);
 }
@@ -2649,26 +2628,16 @@ void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
 }
 #endif /* CONFIG_CAVS_DECODER */
 
-void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
-
 #if CONFIG_VC1_DECODER
 /* VC-1 specific */
-void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
-
-void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
+void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
     put_pixels8_c(dst, src, stride, 8);
 }
-void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
+void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
     avg_pixels8_c(dst, src, stride, 8);
 }
 #endif /* CONFIG_VC1_DECODER */
 
-void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
-
-#if CONFIG_RV30_DECODER
-void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
-#endif /* CONFIG_RV30_DECODER */
-
 #if CONFIG_RV40_DECODER
 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
     put_pixels16_xy2_c(dst, src, stride, 16);
@@ -2682,8 +2651,6 @@ static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
     avg_pixels8_xy2_c(dst, src, stride, 8);
 }
-
-void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
 #endif /* CONFIG_RV40_DECODER */
 
 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
@@ -2864,179 +2831,6 @@ static void h261_loop_filter_c(uint8_t *src, int stride){
     }
 }
 
-static av_always_inline av_flatten void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
-{
-    int i, d;
-    for( i = 0; i < 4; i++ ) {
-        if( tc0[i] < 0 ) {
-            pix += 4*ystride;
-            continue;
-        }
-        for( d = 0; d < 4; d++ ) {
-            const int p0 = pix[-1*xstride];
-            const int p1 = pix[-2*xstride];
-            const int p2 = pix[-3*xstride];
-            const int q0 = pix[0];
-            const int q1 = pix[1*xstride];
-            const int q2 = pix[2*xstride];
-
-            if( FFABS( p0 - q0 ) < alpha &&
-                FFABS( p1 - p0 ) < beta &&
-                FFABS( q1 - q0 ) < beta ) {
-
-                int tc = tc0[i];
-                int i_delta;
-
-                if( FFABS( p2 - p0 ) < beta ) {
-                    if(tc0[i])
-                    pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
-                    tc++;
-                }
-                if( FFABS( q2 - q0 ) < beta ) {
-                    if(tc0[i])
-                    pix[   xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
-                    tc++;
-                }
-
-                i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
-                pix[-xstride] = av_clip_uint8( p0 + i_delta );    /* p0' */
-                pix[0]        = av_clip_uint8( q0 - i_delta );    /* q0' */
-            }
-            pix += ystride;
-        }
-    }
-}
-static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
-    h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
-}
-static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
-    h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
-}
-
-static av_always_inline av_flatten void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
-{
-    int d;
-    for( d = 0; d < 16; d++ ) {
-        const int p2 = pix[-3*xstride];
-        const int p1 = pix[-2*xstride];
-        const int p0 = pix[-1*xstride];
-
-        const int q0 = pix[ 0*xstride];
-        const int q1 = pix[ 1*xstride];
-        const int q2 = pix[ 2*xstride];
-
-        if( FFABS( p0 - q0 ) < alpha &&
-            FFABS( p1 - p0 ) < beta &&
-            FFABS( q1 - q0 ) < beta ) {
-
-            if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
-                if( FFABS( p2 - p0 ) < beta)
-                {
-                    const int p3 = pix[-4*xstride];
-                    /* p0', p1', p2' */
-                    pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
-                    pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
-                    pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
-                } else {
-                    /* p0' */
-                    pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
-                }
-                if( FFABS( q2 - q0 ) < beta)
-                {
-                    const int q3 = pix[3*xstride];
-                    /* q0', q1', q2' */
-                    pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
-                    pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
-                    pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
-                } else {
-                    /* q0' */
-                    pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
-                }
-            }else{
-                /* p0', q0' */
-                pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
-                pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
-            }
-        }
-        pix += ystride;
-    }
-}
-static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
-{
-    h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta);
-}
-static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
-{
-    h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta);
-}
-
-static av_always_inline av_flatten void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
-{
-    int i, d;
-    for( i = 0; i < 4; i++ ) {
-        const int tc = tc0[i];
-        if( tc <= 0 ) {
-            pix += 2*ystride;
-            continue;
-        }
-        for( d = 0; d < 2; d++ ) {
-            const int p0 = pix[-1*xstride];
-            const int p1 = pix[-2*xstride];
-            const int q0 = pix[0];
-            const int q1 = pix[1*xstride];
-
-            if( FFABS( p0 - q0 ) < alpha &&
-                FFABS( p1 - p0 ) < beta &&
-                FFABS( q1 - q0 ) < beta ) {
-
-                int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
-
-                pix[-xstride] = av_clip_uint8( p0 + delta );    /* p0' */
-                pix[0]        = av_clip_uint8( q0 - delta );    /* q0' */
-            }
-            pix += ystride;
-        }
-    }
-}
-static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
-    h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
-}
-static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
-    h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
-}
-
-static av_always_inline av_flatten void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
-{
-    int d;
-    for( d = 0; d < 8; d++ ) {
-        const int p0 = pix[-1*xstride];
-        const int p1 = pix[-2*xstride];
-        const int q0 = pix[0];
-        const int q1 = pix[1*xstride];
-
-        if( FFABS( p0 - q0 ) < alpha &&
-            FFABS( p1 - p0 ) < beta &&
-            FFABS( q1 - q0 ) < beta ) {
-
-            pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
-            pix[0]        = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
-        }
-        pix += ystride;
-    }
-}
-static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
-{
-    h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
-}
-static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
-{
-    h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
-}
-
 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
 {
     int s, i;
@@ -3391,7 +3185,7 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
         case FF_CMP_NSSE:
             cmp[i]= c->nsse[i];
             break;
-#if CONFIG_SNOW_ENCODER
+#if CONFIG_DWT
         case FF_CMP_W53:
             cmp[i]= c->w53[i];
             break;
@@ -3677,8 +3471,7 @@ static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_
 
 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
     MpegEncContext * const s= (MpegEncContext *)c;
-    DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8];
-    DCTELEM * const temp= (DCTELEM*)aligned_temp;
+    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
 
     assert(h==8);
 
@@ -3742,8 +3535,7 @@ static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s
 
 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
     MpegEncContext * const s= (MpegEncContext *)c;
-    DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8];
-    DCTELEM * const temp= (DCTELEM*)aligned_temp;
+    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
     int sum=0, i;
 
     assert(h==8);
@@ -3759,9 +3551,8 @@ static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
 
 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
     MpegEncContext * const s= (MpegEncContext *)c;
-    DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64*2/8];
-    DCTELEM * const temp= (DCTELEM*)aligned_temp;
-    DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
+    LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
+    DCTELEM * const bak = temp+64;
     int sum=0, i;
 
     assert(h==8);
@@ -3784,12 +3575,9 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s
 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
     MpegEncContext * const s= (MpegEncContext *)c;
     const uint8_t *scantable= s->intra_scantable.permutated;
-    DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8];
-    DECLARE_ALIGNED_16(uint64_t, aligned_src1)[8];
-    DECLARE_ALIGNED_16(uint64_t, aligned_src2)[8];
-    DCTELEM * const temp= (DCTELEM*)aligned_temp;
-    uint8_t * const lsrc1 = (uint8_t*)aligned_src1;
-    uint8_t * const lsrc2 = (uint8_t*)aligned_src2;
+    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
+    LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
+    LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
     int i, last, run, bits, level, distortion, start_i;
     const int esc_length= s->ac_esc_length;
     uint8_t * length;
@@ -3863,8 +3651,7 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
     MpegEncContext * const s= (MpegEncContext *)c;
     const uint8_t *scantable= s->intra_scantable.permutated;
-    DECLARE_ALIGNED_16(uint64_t, aligned_temp)[sizeof(DCTELEM)*64/8];
-    DCTELEM * const temp= (DCTELEM*)aligned_temp;
+    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
     int i, last, run, bits, level, start_i;
     const int esc_length= s->ac_esc_length;
     uint8_t * length;
@@ -4041,7 +3828,6 @@ void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, c
     }
 }
 
-#if CONFIG_AAC_DECODER
 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
                                  int len)
 {
@@ -4093,7 +3879,6 @@ static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul,
         dst[i+3] = sv[0][3] * mul;
     }
 }
-#endif
 
 static void butterflies_float_c(float *restrict v1, float *restrict v2,
                                 int len)
@@ -4123,6 +3908,51 @@ static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul,
         dst[i] = src[i] * mul;
 }
 
+static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
+                   uint32_t maxi, uint32_t maxisign)
+{
+
+    if(a > mini) return mini;
+    else if((a^(1<<31)) > maxisign) return maxi;
+    else return a;
+}
+
+static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
+    int i;
+    uint32_t mini = *(uint32_t*)min;
+    uint32_t maxi = *(uint32_t*)max;
+    uint32_t maxisign = maxi ^ (1<<31);
+    uint32_t *dsti = (uint32_t*)dst;
+    const uint32_t *srci = (const uint32_t*)src;
+    for(i=0; i<len; i+=8) {
+        dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
+        dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
+        dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
+        dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
+        dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
+        dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
+        dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
+        dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
+    }
+}
+static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
+    int i;
+    if(min < 0 && max > 0) {
+        vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
+    } else {
+        for(i=0; i < len; i+=8) {
+            dst[i    ] = av_clipf(src[i    ], min, max);
+            dst[i + 1] = av_clipf(src[i + 1], min, max);
+            dst[i + 2] = av_clipf(src[i + 2], min, max);
+            dst[i + 3] = av_clipf(src[i + 3], min, max);
+            dst[i + 4] = av_clipf(src[i + 4], min, max);
+            dst[i + 5] = av_clipf(src[i + 5], min, max);
+            dst[i + 6] = av_clipf(src[i + 6], min, max);
+            dst[i + 7] = av_clipf(src[i + 7], min, max);
+        }
+    }
+}
+
 static av_always_inline int float_to_int16_one(const float *src){
     int_fast32_t tmp = *(const int32_t*)src;
     if(tmp & 0xf0000){
@@ -4154,6 +3984,26 @@ void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, i
     }
 }
 
+static int32_t scalarproduct_int16_c(int16_t * v1, int16_t * v2, int order, int shift)
+{
+    int res = 0;
+
+    while (order--)
+        res += (*v1++ * *v2++) >> shift;
+
+    return res;
+}
+
+static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, int16_t *v2, int16_t *v3, int order, int mul)
+{
+    int res = 0;
+    while (order--) {
+        res   += *v1 * *v2++;
+        *v1++ += mul * *v3++;
+    }
+    return res;
+}
+
 #define W0 2048
 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
@@ -4306,11 +4156,11 @@ av_cold void dsputil_static_init(void)
 
 int ff_check_alignment(void){
     static int did_fail=0;
-    DECLARE_ALIGNED_16(int, aligned);
+    DECLARE_ALIGNED(16, int, aligned);
 
     if((intptr_t)&aligned & 15){
         if(!did_fail){
-#if HAVE_MMX
+#if HAVE_MMX || HAVE_ALTIVEC
             av_log(NULL, AV_LOG_ERROR,
                 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
                 "and may be very slow or crash. This is not a bug in libavcodec,\n"
@@ -4382,11 +4232,13 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc
             c->idct_add= ff_wmv2_idct_add_c;
             c->idct    = ff_wmv2_idct_c;
             c->idct_permutation_type= FF_NO_IDCT_PERM;
+#if 0
         }else if(avctx->idct_algo==FF_IDCT_FAAN){
             c->idct_put= ff_faanidct_put;
             c->idct_add= ff_faanidct_add;
             c->idct    = ff_faanidct;
             c->idct_permutation_type= FF_NO_IDCT_PERM;
+#endif
         }else{ //accurate/default
             c->idct_put= ff_simple_idct_put;
             c->idct_add= ff_simple_idct_add;
@@ -4395,21 +4247,11 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc
         }
     }
 
-    if (CONFIG_H264_DECODER) {
-        c->h264_idct_add= ff_h264_idct_add_c;
-        c->h264_idct8_add= ff_h264_idct8_add_c;
-        c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
-        c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
-        c->h264_idct_add16     = ff_h264_idct_add16_c;
-        c->h264_idct8_add4     = ff_h264_idct8_add4_c;
-        c->h264_idct_add8      = ff_h264_idct_add8_c;
-        c->h264_idct_add16intra= ff_h264_idct_add16intra_c;
-    }
-
     c->get_pixels = get_pixels_c;
     c->diff_pixels = diff_pixels_c;
     c->put_pixels_clamped = put_pixels_clamped_c;
     c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
+//    c->put_pixels_nonclamped = put_pixels_nonclamped_c;
     c->add_pixels_clamped = add_pixels_clamped_c;
     c->add_pixels8 = add_pixels8_c;
     c->add_pixels4 = add_pixels4_c;
@@ -4421,6 +4263,10 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc
     c->pix_sum = pix_sum_c;
     c->pix_norm1 = pix_norm1_c;
 
+//    c->fill_block_tab[0] = fill_block16_c;
+//    c->fill_block_tab[1] = fill_block8_c;
+//    c->scale_block = scale_block_c;
+
     /* TODO [0] 16  [1] 8 */
     c->pix_abs[0][0] = pix_abs16_c;
     c->pix_abs[0][1] = pix_abs16_x2_c;
@@ -4523,27 +4369,6 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc
     c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c;
     c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c;
 
-    c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
-    c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
-    c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
-    c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
-    c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
-    c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
-    c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
-    c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
-    c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
-    c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
-    c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
-    c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
-    c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
-    c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
-    c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
-    c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
-    c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
-    c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
-    c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
-    c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
-
     c->draw_edges = draw_edges_c;
 
 #if CONFIG_CAVS_DECODER
@@ -4583,6 +4408,7 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc
     c->name[0]= name ## 16_c;\
     c->name[1]= name ## 8x8_c;
 
+#if CONFIG_ENCODERS
     SET_CMP_FUNC(hadamard8_diff)
     c->hadamard8_diff[4]= hadamard8_intra16_c;
     c->hadamard8_diff[5]= hadamard8_intra8x8_c;
@@ -4591,7 +4417,9 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc
 #if CONFIG_GPL
     SET_CMP_FUNC(dct264_sad)
 #endif
+#endif /* CONFIG_ENCODERS */
     c->sad[0]= pix_abs16_c;
+#if CONFIG_ENCODERS
     c->sad[1]= pix_abs8_c;
     c->sse[0]= sse16_c;
     c->sse[1]= sse8_c;
@@ -4607,37 +4435,27 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc
     c->vsse[5]= vsse_intra8_c;
     c->nsse[0]= nsse16_c;
     c->nsse[1]= nsse8_c;
-#if CONFIG_SNOW_ENCODER
-    c->w53[0]= w53_16_c;
-    c->w53[1]= w53_8_c;
-    c->w97[0]= w97_16_c;
-    c->w97[1]= w97_8_c;
+#if CONFIG_DWT
+    ff_dsputil_init_dwt(c);
 #endif
 
     c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
+#endif /* CONFIG_ENCODERS */
 
     c->add_bytes= add_bytes_c;
     c->add_bytes_l2= add_bytes_l2_c;
     c->diff_bytes= diff_bytes_c;
+#if CONFIG_HUFFYUV_DECODER
     c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
     c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
     c->add_hfyu_left_prediction  = add_hfyu_left_prediction_c;
     c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
+#endif
     c->bswap_buf= bswap_buf;
 #if CONFIG_PNG_DECODER
     c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
 #endif
 
-    c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
-    c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
-    c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c;
-    c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c;
-    c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
-    c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
-    c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
-    c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
-    c->h264_loop_filter_strength= NULL;
-
     if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
         c->h263_h_loop_filter= h263_h_loop_filter_c;
         c->h263_v_loop_filter= h263_v_loop_filter_c;
@@ -4646,6 +4464,7 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc
     if (CONFIG_VP3_DECODER) {
         c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
         c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
+        c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
     }
     if (CONFIG_VP6_DECODER) {
         c->vp6_filter_diag4= ff_vp6_filter_diag4_c;
@@ -4653,8 +4472,10 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc
 
     c->h261_loop_filter= h261_loop_filter_c;
 
+#if CONFIG_ENCODERS
     c->try_8x8basis= try_8x8basis_c;
     c->add_8x8basis= add_8x8basis_c;
+#endif
 
 #if CONFIG_VORBIS_DECODER
     c->vorbis_inverse_coupling = vorbis_inverse_coupling;
@@ -4662,17 +4483,37 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc
 #if CONFIG_AC3_DECODER
     c->ac3_downmix = ff_ac3_downmix_c;
 #endif
+#if CONFIG_LPC
+    c->lpc_compute_autocorr = ff_lpc_compute_autocorr;
+#endif
+#if CONFIG_ATRAC3_DECODER | CONFIG_VORBIS_DECODER
     c->vector_fmul = vector_fmul_c;
+#endif
+#if CONFIG_WMAV1_DECODER | CONFIG_WMAV2_DECODER
     c->vector_fmul_reverse = vector_fmul_reverse_c;
     c->vector_fmul_add = vector_fmul_add_c;
+#endif
+#if CONFIG_AAC_DECODER | CONFIG_AC3_DECODER | CONFIG_ATRAC1_DECODER | CONFIG_VORBIS_DECODER
     c->vector_fmul_window = ff_vector_fmul_window_c;
+#endif
+#if CONFIG_AC3_DECODER | CONFIG_DCA_DECODER
     c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
+#endif
+#if CONFIG_IMC_DECODER | CONFIG_NELLYMOSER_DECODER
+//    c->vector_clipf = vector_clipf_c;
     c->float_to_int16 = ff_float_to_int16_c;
+#endif
+#if CONFIG_AAC_DECODER | CONFIG_AC3_DECODER | CONFIG_DCA_DECODER | CONFIG_VORBIS_DECODER
     c->float_to_int16_interleave = ff_float_to_int16_interleave_c;
+#endif
+//    c->scalarproduct_int16 = scalarproduct_int16_c;
+//    c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
 #if CONFIG_AAC_DECODER
     c->scalarproduct_float = scalarproduct_float_c;
 #endif
+#if CONFIG_AAC_DECODER | CONFIG_WMAV1_DECODER | CONFIG_WMAV2_DECODER
     c->butterflies_float = butterflies_float_c;
+#endif
 #if CONFIG_AAC_DECODER
     c->vector_fmul_scalar = vector_fmul_scalar_c;
 
@@ -4683,10 +4524,12 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc
     c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c;
 #endif
 
+#if CONFIG_ENCODERS
     c->shrink[0]= ff_img_copy_plane;
     c->shrink[1]= ff_shrink22;
     c->shrink[2]= ff_shrink44;
     c->shrink[3]= ff_shrink88;
+#endif
 
     c->prefetch= just_return;
 
@@ -4734,6 +4577,7 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc
     }
 }
 
+#if 0
 // avcodec_get_current_idct,avcodec_get_encoder_info by h.yamagata
 // It's caller's responsibility to check avctx->priv_data is MpegEncContext*.
 const char* avcodec_get_current_idct(AVCodecContext *avctx)
@@ -4773,3 +4617,4 @@ void avcodec_get_encoder_info(AVCodecContext *avctx,int *xvid_build,int *divx_ve
     *divx_build = s->divx_build;
     *lavc_build = s->lavc_build;
 }
+#endif
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h
index 3786102dd..22c91827f 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h
@@ -37,8 +37,6 @@
 //#define DEBUG
 /* dct code */
 typedef short DCTELEM;
-typedef int DWTELEM;
-typedef short IDWTELEM;
 
 void fdct_ifast (DCTELEM *data);
 void fdct_ifast248 (DCTELEM *data);
@@ -88,6 +86,7 @@ extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
 void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
 void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
 void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/);
 
 void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
 void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
@@ -96,6 +95,16 @@ void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
 void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride,
                            const int16_t *h_weights, const int16_t *v_weights);
 
+/* CAVS functions */
+void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride);
+void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride);
+
+/* VC1 functions */
+void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
+void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
+
 /* 1/2^n downscaling functions from imgconvert.c */
 void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
 void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
@@ -133,8 +142,8 @@ typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const ui
 typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
 typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
 typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
-typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset);
-typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset);
+
+typedef void (*op_fill_func)(uint8_t *block/*align width (8 or 16)*/, uint8_t value, int line_size, int h);
 
 #define DEF_OLD_QPEL(name)\
 void ff_put_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
@@ -165,10 +174,6 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
 // although currently h<4 is not used as functions with width <8 are neither used nor implemented
 typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
 
-
-// for snow slices
-typedef struct slice_buffer_s slice_buffer;
-
 /**
  * Scantable.
  */
@@ -193,6 +198,7 @@ typedef struct DSPContext {
     void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
     void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
     void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+//    void (*put_pixels_nonclamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
     void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
     void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size);
     void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size);
@@ -321,9 +327,6 @@ typedef struct DSPContext {
     qpel_mc_func put_2tap_qpel_pixels_tab[4][16];
     qpel_mc_func avg_2tap_qpel_pixels_tab[4][16];
 
-    h264_weight_func weight_h264_pixels_tab[10];
-    h264_biweight_func biweight_h264_pixels_tab[10];
-
     /* AVS specific */
     qpel_mc_func put_cavs_qpel_pixels_tab[2][16];
     qpel_mc_func avg_cavs_qpel_pixels_tab[2][16];
@@ -351,19 +354,6 @@ typedef struct DSPContext {
     void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
     void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
 
-    void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
-    void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0);
-    /* v/h_loop_filter_luma_intra: align 16 */
-    void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
-    void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
-    void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
-    void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0);
-    void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
-    void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
-    // h264_loop_filter_strength: simd only. the C version is inlined in h264.c
-    void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
-                                      int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field);
-
     void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
     void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
 
@@ -372,6 +362,7 @@ typedef struct DSPContext {
     void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale);
     void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale);
 
+    void (*vp3_idct_dc_add)(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/);
     void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
     void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
 
@@ -381,6 +372,8 @@ typedef struct DSPContext {
     /* assume len is a multiple of 4, and arrays are 16-byte aligned */
     void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
     void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
+    /* no alignment needed */
+//    void (*lpc_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc);
     /* assume len is a multiple of 8, and arrays are 16-byte aligned */
     void (*vector_fmul)(float *dst, const float *src, int len);
     void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
@@ -390,6 +383,8 @@ typedef struct DSPContext {
     void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len);
     /* assume len is a multiple of 8, and arrays are 16-byte aligned */
     void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
+//    void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
+#if CONFIG_AAC_DECODER
     /**
      * Multiply a vector of floats by a scalar float.  Source and
      * destination vectors must overlap exactly or not at all.
@@ -433,6 +428,7 @@ typedef struct DSPContext {
      * @param len length of vectors, multiple of 4
      */
     float (*scalarproduct_float)(const float *v1, const float *v2, int len);
+#endif
     /**
      * Calculate the sum and difference of two vectors of floats.
      * @param v1  first input vector, sum output, 16-byte aligned
@@ -495,21 +491,6 @@ typedef struct DSPContext {
     void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w);
 #define EDGE_WIDTH 16
 
-    /* h264 functions */
-    /* NOTE!!! if you implement any of h264_idct8_add, h264_idct8_add4 then you must implement all of them
-       NOTE!!! if you implement any of h264_idct_add, h264_idct_add16, h264_idct_add16intra, h264_idct_add8 then you must implement all of them
-        The reason for above, is that no 2 out of one list may use a different permutation.
-    */
-    void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
-    void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
-    void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
-    void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
-    void (*h264_dct)(DCTELEM block[4][4]);
-    void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
-    void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
-    void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
-    void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
-
     void (*prefetch)(void *mem, int stride, int h);
 
     void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
@@ -548,6 +529,20 @@ typedef struct DSPContext {
     void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize,
            int * range, int * sum,  int edges);
 
+    /**
+     * Calculate scalar product of two vectors.
+     * @param len length of vectors, should be multiple of 16
+     * @param shift number of bits to discard from product
+     */
+//    int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift);
+    /* ape functions */
+    /**
+     * Calculate scalar product of v1 and v2,
+     * and v1[i] += v3[i] * mul
+     * @param len length of vectors, should be multiple of 16
+     */
+//    int32_t (*scalarproduct_and_madd_int16)(int16_t *v1/*align 16*/, int16_t *v2, int16_t *v3, int len, int mul);
+
     /* rv30 functions */
     qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
     qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];
@@ -622,8 +617,14 @@ extern int mm_flags;
 
 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
 
-#define DECLARE_ALIGNED_16(t, v, ...) DECLARE_ALIGNED(16, t, v)
-#define DECLARE_ALIGNED_8(t, v, ...)  DECLARE_ALIGNED(8, t, v)
+void ff_dsputil_init_dwt(DSPContext *c);
+void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx);
 
 #if HAVE_MMX
 
@@ -656,222 +657,26 @@ static inline void emms(void)
 #   define STRIDE_ALIGN 8
 #endif
 
-/* PSNR */
-void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
-              int orig_linesize[3], int coded_linesize,
-              AVCodecContext *avctx);
+#define LOCAL_ALIGNED(a, t, v, s, ...)                          \
+    uint8_t la_##v[sizeof(t s __VA_ARGS__) + (a)];              \
+    t (*v) __VA_ARGS__ = (void *)FFALIGN((uintptr_t)la_##v, a)
 
-/* FFT computation */
-
-/* NOTE: soon integer code will be added, so you must use the
-   FFTSample type */
-typedef float FFTSample;
-
-typedef struct FFTComplex {
-    FFTSample re, im;
-} FFTComplex;
-
-typedef struct FFTContext {
-    int nbits;
-    int inverse;
-    uint16_t *revtab;
-    FFTComplex *exptab;
-    FFTComplex *exptab1; /* only used by SSE code */
-    FFTComplex *tmp_buf;
-    int mdct_size; /* size of MDCT (i.e. number of input data * 2) */
-    int mdct_bits; /* n = 2^nbits */
-    /* pre/post rotation tables */
-    FFTSample *tcos;
-    FFTSample *tsin;
-    void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
-    void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
-    void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
-    void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
-    void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
-    int split_radix;
-    int permutation;
-#define FF_MDCT_PERM_NONE       0
-#define FF_MDCT_PERM_INTERLEAVE 1
-} FFTContext;
-
-#if CONFIG_HARDCODED_TABLES
-#define COSTABLE_CONST const
-#define SINTABLE_CONST const
-#define SINETABLE_CONST const
+#if HAVE_LOCAL_ALIGNED_8
+#   define LOCAL_ALIGNED_8(t, v, s, ...) DECLARE_ALIGNED(8, t, v) s __VA_ARGS__
 #else
-#define COSTABLE_CONST
-#define SINTABLE_CONST
-#define SINETABLE_CONST
+#   define LOCAL_ALIGNED_8(t, v, s, ...) LOCAL_ALIGNED(8, t, v, s, __VA_ARGS__)
 #endif
 
-#define COSTABLE(size) \
-    COSTABLE_CONST DECLARE_ALIGNED_16(FFTSample, ff_cos_##size)[size/2]
-#define SINTABLE(size) \
-    SINTABLE_CONST DECLARE_ALIGNED_16(FFTSample, ff_sin_##size)[size/2]
-#define SINETABLE(size) \
-    SINETABLE_CONST DECLARE_ALIGNED_16(float, ff_sine_##size)[size]
-extern COSTABLE(16);
-extern COSTABLE(32);
-extern COSTABLE(64);
-extern COSTABLE(128);
-extern COSTABLE(256);
-extern COSTABLE(512);
-extern COSTABLE(1024);
-extern COSTABLE(2048);
-extern COSTABLE(4096);
-extern COSTABLE(8192);
-extern COSTABLE(16384);
-extern COSTABLE(32768);
-extern COSTABLE(65536);
-extern COSTABLE_CONST FFTSample* const ff_cos_tabs[17];
-
-/**
- * Initializes the cosine table in ff_cos_tabs[index]
- * \param index index in ff_cos_tabs array of the table to initialize
- */
-void ff_init_ff_cos_tabs(int index);
-
-extern SINTABLE(16);
-extern SINTABLE(32);
-extern SINTABLE(64);
-extern SINTABLE(128);
-extern SINTABLE(256);
-extern SINTABLE(512);
-extern SINTABLE(1024);
-extern SINTABLE(2048);
-extern SINTABLE(4096);
-extern SINTABLE(8192);
-extern SINTABLE(16384);
-extern SINTABLE(32768);
-extern SINTABLE(65536);
-
-/**
- * Sets up a complex FFT.
- * @param nbits           log2 of the length of the input array
- * @param inverse         if 0 perform the forward transform, if 1 perform the inverse
- */
-int ff_fft_init(FFTContext *s, int nbits, int inverse);
-void ff_fft_permute_c(FFTContext *s, FFTComplex *z);
-void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
-
-void ff_fft_init_mmx(FFTContext *s);
-
-/**
- * Do the permutation needed BEFORE calling ff_fft_calc().
- */
-static inline void ff_fft_permute(FFTContext *s, FFTComplex *z)
-{
-    s->fft_permute(s, z);
-}
-/**
- * Do a complex FFT with the parameters defined in ff_fft_init(). The
- * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
- */
-static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
-{
-    s->fft_calc(s, z);
-}
-void ff_fft_end(FFTContext *s);
-
-/* MDCT computation */
-
-static inline void ff_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
-{
-    s->imdct_calc(s, output, input);
-}
-static inline void ff_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
-{
-    s->imdct_half(s, output, input);
-}
-
-static inline void ff_mdct_calc(FFTContext *s, FFTSample *output,
-                                const FFTSample *input)
-{
-    s->mdct_calc(s, output, input);
-}
-
-/**
- * Generate a Kaiser-Bessel Derived Window.
- * @param   window  pointer to half window
- * @param   alpha   determines window shape
- * @param   n       size of half window
- */
-void ff_kbd_window_init(float *window, float alpha, int n);
-
-/**
- * Generate a sine window.
- * @param   window  pointer to half window
- * @param   n       size of half window
- */
-void ff_sine_window_init(float *window, int n);
-/**
- * initialize the specified entry of ff_sine_windows
- */
-void ff_init_ff_sine_windows(int index);
-extern SINETABLE(  32);
-extern SINETABLE(  64);
-extern SINETABLE( 128);
-extern SINETABLE( 256);
-extern SINETABLE( 512);
-extern SINETABLE(1024);
-extern SINETABLE(2048);
-extern SINETABLE(4096);
-extern SINETABLE_CONST float * const ff_sine_windows[13];
-
-int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale);
-void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
-void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input);
-void ff_mdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
-void ff_mdct_end(FFTContext *s);
-
-/* Real Discrete Fourier Transform */
-
-enum RDFTransformType {
-    RDFT,
-    IRDFT,
-    RIDFT,
-    IRIDFT,
-};
-
-typedef struct {
-    int nbits;
-    int inverse;
-    int sign_convention;
-
-    /* pre/post rotation tables */
-    const FFTSample *tcos;
-    SINTABLE_CONST FFTSample *tsin;
-    FFTContext fft;
-} RDFTContext;
-
-/**
- * Sets up a real FFT.
- * @param nbits           log2 of the length of the input array
- * @param trans           the type of transform
- */
-int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans);
-void ff_rdft_calc(RDFTContext *s, FFTSample *data);
-void ff_rdft_end(RDFTContext *s);
-
-/* Discrete Cosine Transform */
-
-typedef struct {
-    int nbits;
-    int inverse;
-    FFTSample *data;
-    RDFTContext rdft;
-    const float *costab;
-    FFTSample *csc2;
-} DCTContext;
+#if HAVE_LOCAL_ALIGNED_16
+#   define LOCAL_ALIGNED_16(t, v, s, ...) DECLARE_ALIGNED(16, t, v) s __VA_ARGS__
+#else
+#   define LOCAL_ALIGNED_16(t, v, s, ...) LOCAL_ALIGNED(16, t, v, s, __VA_ARGS__)
+#endif
 
-/**
- * Sets up (Inverse)DCT.
- * @param nbits           log2 of the length of the input array
- * @param inverse         >0 forward transform, <0 inverse transform
- */
-int  ff_dct_init(DCTContext *s, int nbits, int inverse);
-void ff_dct_calc(DCTContext *s, FFTSample *data);
-void ff_dct_end (DCTContext *s);
+/* PSNR */
+void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
+              int orig_linesize[3], int coded_linesize,
+              AVCodecContext *avctx);
 
 #define WRAPPER8_16(name8, name16)\
 static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
@@ -972,4 +777,42 @@ static inline void copy_block17(uint8_t *dst, const uint8_t *src, int dstStride,
 
 const char* avcodec_get_current_idct_mmx(AVCodecContext *avctx,DSPContext *c);
 
+/* ffdshow custom code begin */
+#ifndef D3DCOLOR_DEFINED
+typedef uint32_t D3DCOLOR;
+#define D3DCOLOR_DEFINED
+#endif
+
+#define D3DCOLOR_ARGB(a,r,g,b) \
+    ((D3DCOLOR)((((a)&0xff)<<24)|(((r)&0xff)<<16)|(((g)&0xff)<<8)|((b)&0xff)))
+
+static const double Rec601_Kr = 0.299;
+static const double Rec601_Kb = 0.114;
+static const double Rec601_Kg = 0.587;
+static const double Rec709_Kr = 0.2125;
+static const double Rec709_Kb = 0.0721;
+static const double Rec709_Kg = 0.7154;
+
+static inline uint32_t YCrCbToRGB_Rec709(uint8_t A, uint8_t Y, uint8_t Cr, uint8_t Cb)
+{
+
+  double rp = Y + 2*(Cr-128)*(1.0-Rec709_Kr);
+  double gp = Y - 2*(Cb-128)*(1.0-Rec709_Kb)*Rec709_Kb/Rec709_Kg - 2*(Cr-128)*(1.0-Rec709_Kr)*Rec709_Kr/Rec709_Kg;
+  double bp = Y + 2*(Cb-128)*(1.0-Rec709_Kb);
+
+  return D3DCOLOR_ARGB(A, (uint8_t)fabs(rp), (uint8_t)fabs(gp), (uint8_t)fabs(bp));
+}
+
+static inline uint32_t YCrCbToRGB_Rec601(uint8_t A, uint8_t Y, uint8_t Cr, uint8_t Cb)
+{
+
+  double rp = Y + 2*(Cr-128)*(1.0-Rec601_Kr);
+  double gp = Y - 2*(Cb-128)*(1.0-Rec601_Kb)*Rec601_Kb/Rec601_Kg - 2*(Cr-128)*(1.0-Rec601_Kr)*Rec601_Kr/Rec601_Kg;
+  double bp = Y + 2*(Cb-128)*(1.0-Rec601_Kb);
+
+  return D3DCOLOR_ARGB(A, (uint8_t)fabs(rp), (uint8_t)fabs(gp), (uint8_t)fabs(bp));
+}
+
+/* ffdshow custom code end */
+
 #endif /* AVCODEC_DSPUTIL_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/eac3dec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/eac3dec.c
index 79f3c3b49..52d15c83f 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/eac3dec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/eac3dec.c
@@ -66,48 +66,47 @@ typedef enum {
 void ff_eac3_apply_spectral_extension(AC3DecodeContext *s)
 {
     int bin, bnd, ch, i;
-    uint8_t wrapflag[SPX_MAX_BANDS]={0,}, num_copy_sections, copy_sizes[SPX_MAX_BANDS];
+    uint8_t wrapflag[SPX_MAX_BANDS]={1,0,}, num_copy_sections, copy_sizes[SPX_MAX_BANDS];
     float rms_energy[SPX_MAX_BANDS];
 
     /* Set copy index mapping table. Set wrap flags to apply a notch filter at
        wrap points later on. */
-    wrapflag[0] = 1;
-    bin = s->spx_copy_start_freq;
+    bin = s->spx_dst_start_freq;
     num_copy_sections = 0;
     for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
         int copysize;
         int bandsize = s->spx_band_sizes[bnd];
-        if ((bin + bandsize) > s->spx_start_freq) {
-            copy_sizes[num_copy_sections++] = bin - s->spx_copy_start_freq;
-            bin = s->spx_copy_start_freq;
+        if (bin + bandsize > s->spx_src_start_freq) {
+            copy_sizes[num_copy_sections++] = bin - s->spx_dst_start_freq;
+            bin = s->spx_dst_start_freq;
             wrapflag[bnd] = 1;
         }
         for (i = 0; i < bandsize; i += copysize) {
-            if (bin == s->spx_start_freq) {
-                copy_sizes[num_copy_sections++] = bin - s->spx_copy_start_freq;
-                bin = s->spx_copy_start_freq;
+            if (bin == s->spx_src_start_freq) {
+                copy_sizes[num_copy_sections++] = bin - s->spx_dst_start_freq;
+                bin = s->spx_dst_start_freq;
             }
-            copysize = FFMIN(bandsize - i, s->spx_start_freq - bin);
+            copysize = FFMIN(bandsize - i, s->spx_src_start_freq - bin);
             bin += copysize;
         }
     }
-    copy_sizes[num_copy_sections++] = bin - s->spx_copy_start_freq;
+    copy_sizes[num_copy_sections++] = bin - s->spx_dst_start_freq;
 
     for (ch = 1; ch <= s->fbw_channels; ch++) {
-        if (!s->channel_in_spx[ch])
+        if (!s->channel_uses_spx[ch])
             continue;
 
         /* Copy coeffs from normal bands to extension bands */
-        bin = s->spx_start_freq;
+        bin = s->spx_src_start_freq;
         for (i = 0; i < num_copy_sections; i++) {
             memcpy(&s->transform_coeffs[ch][bin],
-                   &s->transform_coeffs[ch][s->spx_copy_start_freq],
+                   &s->transform_coeffs[ch][s->spx_dst_start_freq],
                    copy_sizes[i]*sizeof(float));
             bin += copy_sizes[i];
         }
 
         /* Calculate RMS energy for each SPX band. */
-        bin = s->spx_start_freq;
+        bin = s->spx_src_start_freq;
         for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
             int bandsize = s->spx_band_sizes[bnd];
             float accum = 0.0f;
@@ -115,14 +114,14 @@ void ff_eac3_apply_spectral_extension(AC3DecodeContext *s)
                 float coeff = s->transform_coeffs[ch][bin++];
                 accum += coeff * coeff;
             }
-            rms_energy[bnd] = sqrt(accum / (float)bandsize);
+            rms_energy[bnd] = sqrtf(accum / bandsize);
         }
 
         /* Apply a notch filter at transitions between normal and extension
            bands and at all wrap points. */
         if (s->spx_atten_code[ch] >= 0) {
             const float *atten_tab = ff_eac3_spx_atten_tab[s->spx_atten_code[ch]];
-            bin = s->spx_start_freq - 2;
+            bin = s->spx_src_start_freq - 2;
             for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
                 if (wrapflag[bnd]) {
                     float *coeffs = &s->transform_coeffs[ch][bin];
@@ -139,12 +138,12 @@ void ff_eac3_apply_spectral_extension(AC3DecodeContext *s)
         /* Apply noise-blended coefficient scaling based on previously
            calculated RMS energy, blending factors, and SPX coordinates for
            each band. */
-        bin = s->spx_start_freq;
+        bin = s->spx_src_start_freq;
         for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
-            float nscale = s->spx_noise_blend[ch][bnd] * rms_energy[bnd];
+            float nscale = s->spx_noise_blend[ch][bnd] * rms_energy[bnd] * (1.0f/(1<<31));
             float sscale = s->spx_signal_blend[ch][bnd];
             for (i = 0; i < s->spx_band_sizes[bnd]; i++) {
-                float noise  = nscale * (((int)av_lfg_get(&s->dith_state))/(float)(1<<31));
+                float noise  = nscale * (int32_t)av_lfg_get(&s->dith_state);
                 s->transform_coeffs[ch][bin]   *= sscale;
                 s->transform_coeffs[ch][bin++] += noise;
             }
@@ -152,6 +151,7 @@ void ff_eac3_apply_spectral_extension(AC3DecodeContext *s)
     }
 }
 
+
 /** lrint(M_SQRT2*cos(2*M_PI/12)*(1<<23)) */
 #define COEFF_0 10273905LL
 
@@ -578,10 +578,11 @@ int ff_eac3_parse_header(AC3DecodeContext *s)
 
     /* spectral extension attenuation data */
     for (ch = 1; ch <= s->fbw_channels; ch++) {
-        if (parse_spx_atten_data && get_bits1(gbc))
+        if (parse_spx_atten_data && get_bits1(gbc)) {
             s->spx_atten_code[ch] = get_bits(gbc, 5);
-        else
+        } else {
             s->spx_atten_code[ch] = -1;
+        }
     }
 
     /* block start information */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/eac3dec_data.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/eac3dec_data.c
index 6c6a55180..9e9f8a254 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/eac3dec_data.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/eac3dec_data.c
@@ -1093,3 +1093,42 @@ const uint8_t ff_eac3_frm_expstr[32][6] = {
 {    EXP_D45,    EXP_D45,    EXP_D45,    EXP_D45,    EXP_D25,  EXP_REUSE},
 {    EXP_D45,    EXP_D45,    EXP_D45,    EXP_D45,    EXP_D45,    EXP_D45},
 };
+
+/**
+ * Table E.25: Spectral Extension Attenuation Table
+ * ff_eac3_spx_atten_tab[code][bin]=pow(2.0,(bin+1)*(code+1)/-15.0);
+ */
+const float ff_eac3_spx_atten_tab[32][3] = {
+    { 0.954841603910416503f, 0.911722488558216804f, 0.870550563296124125f },
+    { 0.911722488558216804f, 0.831237896142787758f, 0.757858283255198995f },
+    { 0.870550563296124125f, 0.757858283255198995f, 0.659753955386447100f },
+    { 0.831237896142787758f, 0.690956439983888004f, 0.574349177498517438f },
+    { 0.793700525984099792f, 0.629960524947436595f, 0.500000000000000000f },
+    { 0.757858283255198995f, 0.574349177498517438f, 0.435275281648062062f },
+    { 0.723634618720189082f, 0.523647061410313364f, 0.378929141627599553f },
+    { 0.690956439983888004f, 0.477420801955208307f, 0.329876977693223550f },
+    { 0.659753955386447100f, 0.435275281648062062f, 0.287174588749258719f },
+    { 0.629960524947436595f, 0.396850262992049896f, 0.250000000000000000f },
+    { 0.601512518041058319f, 0.361817309360094541f, 0.217637640824031003f },
+    { 0.574349177498517438f, 0.329876977693223550f, 0.189464570813799776f },
+    { 0.548412489847312945f, 0.300756259020529160f, 0.164938488846611775f },
+    { 0.523647061410313364f, 0.274206244923656473f, 0.143587294374629387f },
+    { 0.500000000000000000f, 0.250000000000000000f, 0.125000000000000000f },
+    { 0.477420801955208307f, 0.227930622139554201f, 0.108818820412015502f },
+    { 0.455861244279108402f, 0.207809474035696939f, 0.094732285406899888f },
+    { 0.435275281648062062f, 0.189464570813799776f, 0.082469244423305887f },
+    { 0.415618948071393879f, 0.172739109995972029f, 0.071793647187314694f },
+    { 0.396850262992049896f, 0.157490131236859149f, 0.062500000000000000f },
+    { 0.378929141627599553f, 0.143587294374629387f, 0.054409410206007751f },
+    { 0.361817309360094541f, 0.130911765352578369f, 0.047366142703449930f },
+    { 0.345478219991944002f, 0.119355200488802049f, 0.041234622211652958f },
+    { 0.329876977693223550f, 0.108818820412015502f, 0.035896823593657347f },
+    { 0.314980262473718298f, 0.099212565748012460f, 0.031250000000000000f },
+    { 0.300756259020529160f, 0.090454327340023621f, 0.027204705103003875f },
+    { 0.287174588749258719f, 0.082469244423305887f, 0.023683071351724965f },
+    { 0.274206244923656473f, 0.075189064755132290f, 0.020617311105826479f },
+    { 0.261823530705156682f, 0.068551561230914118f, 0.017948411796828673f },
+    { 0.250000000000000000f, 0.062500000000000000f, 0.015625000000000000f },
+    { 0.238710400977604098f, 0.056982655534888536f, 0.013602352551501938f },
+    { 0.227930622139554201f, 0.051952368508924235f, 0.011841535675862483f }
+};
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/eac3dec_data.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/eac3dec_data.h
index 76dd15456..133183398 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/eac3dec_data.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/eac3dec_data.h
@@ -31,5 +31,6 @@ extern const int16_t ff_eac3_gaq_remap_2_4_b[9][2];
 
 extern const int16_t (* const ff_eac3_mantissa_vq[8])[6];
 extern const uint8_t ff_eac3_frm_expstr[32][6];
+extern const float   ff_eac3_spx_atten_tab[32][3];
 
 #endif /* AVCODEC_EAC3DEC_DATA_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/error_resilience.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/error_resilience.c
index 80c7a5c4d..86755909e 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/error_resilience.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/error_resilience.c
@@ -30,13 +30,52 @@
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
+#include "h264.h"
+#include "rectangle.h"
 
-static void decode_mb(MpegEncContext *s){
+/*
+ * H264 redefines mb_intra so it is not mistakely used (its uninitialized in h264)
+ * but error concealment must support both h264 and h263 thus we must undo this
+ */
+#undef mb_intra
+
+static void decode_mb(MpegEncContext *s, int ref){
     s->dest[0] = s->current_picture.data[0] + (s->mb_y * 16* s->linesize  ) + s->mb_x * 16;
     s->dest[1] = s->current_picture.data[1] + (s->mb_y * (16>>s->chroma_y_shift) * s->uvlinesize) + s->mb_x * (16>>s->chroma_x_shift);
     s->dest[2] = s->current_picture.data[2] + (s->mb_y * (16>>s->chroma_y_shift) * s->uvlinesize) + s->mb_x * (16>>s->chroma_x_shift);
 
+    if(CONFIG_H264_DECODER && s->codec_id == CODEC_ID_H264){
+        H264Context *h= (void*)s;
+        h->mb_xy= s->mb_x + s->mb_y*s->mb_stride;
+        memset(h->non_zero_count_cache, 0, sizeof(h->non_zero_count_cache));
+        assert(ref>=0);
+        if(ref >= h->ref_count[0]) //FIXME it is posible albeit uncommon that slice references differ between slices, we take the easy approuch and ignore it for now. If this turns out to have any relevance in practice then correct remapping should be added
+            ref=0;
+        fill_rectangle(&s->current_picture.ref_index[0][4*h->mb_xy], 2, 2, 2, ref, 1);
+        fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
+        fill_rectangle(h->mv_cache[0][ scan8[0] ], 4, 4, 8, pack16to32(s->mv[0][0][0],s->mv[0][0][1]), 4);
+        assert(!FRAME_MBAFF);
+        ff_h264_hl_decode_mb(h);
+    }else{
+        assert(ref==0);
     MPV_decode_mb(s, s->block);
+    }
+}
+
+/**
+ * @param stride the number of MVs to get to the next row
+ * @param mv_step the number of MVs per row or column in a macroblock
+ */
+static void set_mv_strides(MpegEncContext *s, int *mv_step, int *stride){
+    if(s->codec_id == CODEC_ID_H264){
+        H264Context *h= (void*)s;
+        assert(s->quarter_sample);
+        *mv_step= 4;
+        *stride= h->b_stride;
+    }else{
+        *mv_step= 2;
+        *stride= s->b8_stride;
+    }
 }
 
 /**
@@ -197,8 +236,11 @@ static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, i
  * @param h     height in 8 pixel blocks
  */
 static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){
-    int b_x, b_y;
+    int b_x, b_y, mvx_stride, mvy_stride;
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    set_mv_strides(s, &mvx_stride, &mvy_stride);
+    mvx_stride >>= is_luma;
+    mvy_stride *= mvx_stride;
 
     for(b_y=0; b_y<h; b_y++){
         for(b_x=0; b_x<w-1; b_x++){
@@ -210,8 +252,8 @@ static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int st
             int left_damage =  left_status&(DC_ERROR|AC_ERROR|MV_ERROR);
             int right_damage= right_status&(DC_ERROR|AC_ERROR|MV_ERROR);
             int offset= b_x*8 + b_y*stride*8;
-            int16_t *left_mv=  s->current_picture.motion_val[0][s->b8_stride*(b_y<<(1-is_luma)) + ( b_x   <<(1-is_luma))];
-            int16_t *right_mv= s->current_picture.motion_val[0][s->b8_stride*(b_y<<(1-is_luma)) + ((b_x+1)<<(1-is_luma))];
+            int16_t *left_mv=  s->current_picture.motion_val[0][mvy_stride*b_y + mvx_stride* b_x   ];
+            int16_t *right_mv= s->current_picture.motion_val[0][mvy_stride*b_y + mvx_stride*(b_x+1)];
 
             if(!(left_damage||right_damage)) continue; // both undamaged
 
@@ -257,8 +299,11 @@ static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int st
  * @param h     height in 8 pixel blocks
  */
 static void v_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){
-    int b_x, b_y;
+    int b_x, b_y, mvx_stride, mvy_stride;
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    set_mv_strides(s, &mvx_stride, &mvy_stride);
+    mvx_stride >>= is_luma;
+    mvy_stride *= mvx_stride;
 
     for(b_y=0; b_y<h-1; b_y++){
         for(b_x=0; b_x<w; b_x++){
@@ -270,8 +315,8 @@ static void v_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int st
             int top_damage =      top_status&(DC_ERROR|AC_ERROR|MV_ERROR);
             int bottom_damage= bottom_status&(DC_ERROR|AC_ERROR|MV_ERROR);
             int offset= b_x*8 + b_y*stride*8;
-            int16_t *top_mv=    s->current_picture.motion_val[0][s->b8_stride*( b_y   <<(1-is_luma)) + (b_x<<(1-is_luma))];
-            int16_t *bottom_mv= s->current_picture.motion_val[0][s->b8_stride*((b_y+1)<<(1-is_luma)) + (b_x<<(1-is_luma))];
+            int16_t *top_mv=    s->current_picture.motion_val[0][mvy_stride* b_y    + mvx_stride*b_x];
+            int16_t *bottom_mv= s->current_picture.motion_val[0][mvy_stride*(b_y+1) + mvx_stride*b_x];
 
             if(!(top_damage||bottom_damage)) continue; // both undamaged
 
@@ -324,7 +369,9 @@ static void guess_mv(MpegEncContext *s){
     const int mb_width = s->mb_width;
     const int mb_height= s->mb_height;
     int i, depth, num_avail;
-    int mb_x, mb_y;
+    int mb_x, mb_y, mot_step, mot_stride;
+
+    set_mv_strides(s, &mot_step, &mot_stride);
 
     num_avail=0;
     for(i=0; i<s->mb_num; i++){
@@ -359,7 +406,7 @@ static void guess_mv(MpegEncContext *s){
                 s->mb_y= mb_y;
                 s->mv[0][0][0]= 0;
                 s->mv[0][0][1]= 0;
-                decode_mb(s);
+                decode_mb(s, 0);
             }
         }
         return;
@@ -379,12 +426,12 @@ int score_sum=0;
                 for(mb_x=0; mb_x<s->mb_width; mb_x++){
                     const int mb_xy= mb_x + mb_y*s->mb_stride;
                     int mv_predictor[8][2]={{0}};
+                    int ref[8]={0};
                     int pred_count=0;
                     int j;
                     int best_score=256*256*256*64;
                     int best_pred=0;
-                    const int mot_stride= s->b8_stride;
-                    const int mot_index= mb_x*2 + mb_y*2*mot_stride;
+                    const int mot_index= (mb_x + mb_y*mot_stride) * mot_step;
                     int prev_x= s->current_picture.motion_val[0][mot_index][0];
                     int prev_y= s->current_picture.motion_val[0][mot_index][1];
 
@@ -411,62 +458,75 @@ int score_sum=0;
                     none_left=0;
 
                     if(mb_x>0 && fixed[mb_xy-1]){
-                        mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index - 2][0];
-                        mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index - 2][1];
+                        mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index - mot_step][0];
+                        mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index - mot_step][1];
+                        ref         [pred_count]   = s->current_picture.ref_index[0][4*(mb_xy-1)];
                         pred_count++;
                     }
                     if(mb_x+1<mb_width && fixed[mb_xy+1]){
-                        mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index + 2][0];
-                        mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index + 2][1];
+                        mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index + mot_step][0];
+                        mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index + mot_step][1];
+                        ref         [pred_count]   = s->current_picture.ref_index[0][4*(mb_xy+1)];
                         pred_count++;
                     }
                     if(mb_y>0 && fixed[mb_xy-mb_stride]){
-                        mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index - mot_stride*2][0];
-                        mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index - mot_stride*2][1];
+                        mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index - mot_stride*mot_step][0];
+                        mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index - mot_stride*mot_step][1];
+                        ref         [pred_count]   = s->current_picture.ref_index[0][4*(mb_xy-s->mb_stride)];
                         pred_count++;
                     }
                     if(mb_y+1<mb_height && fixed[mb_xy+mb_stride]){
-                        mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index + mot_stride*2][0];
-                        mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index + mot_stride*2][1];
+                        mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index + mot_stride*mot_step][0];
+                        mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index + mot_stride*mot_step][1];
+                        ref         [pred_count]   = s->current_picture.ref_index[0][4*(mb_xy+s->mb_stride)];
                         pred_count++;
                     }
                     if(pred_count==0) continue;
 
                     if(pred_count>1){
-                        int sum_x=0, sum_y=0;
-                        int max_x, max_y, min_x, min_y;
+                        int sum_x=0, sum_y=0, sum_r=0;
+                        int max_x, max_y, min_x, min_y, max_r, min_r;
 
                         for(j=0; j<pred_count; j++){
                             sum_x+= mv_predictor[j][0];
                             sum_y+= mv_predictor[j][1];
+                            sum_r+= ref[j];
+                            if(j && ref[j] != ref[j-1])
+                                goto skip_mean_and_median;
                         }
 
                         /* mean */
                         mv_predictor[pred_count][0] = sum_x/j;
                         mv_predictor[pred_count][1] = sum_y/j;
+                        ref         [pred_count]    = sum_r/j;
 
                         /* median */
                         if(pred_count>=3){
-                            min_y= min_x= 99999;
-                            max_y= max_x=-99999;
+                            min_y= min_x= min_r= 99999;
+                            max_y= max_x= max_r=-99999;
                         }else{
-                            min_x=min_y=max_x=max_y=0;
+                            min_x=min_y=max_x=max_y=min_r=max_r=0;
                         }
                         for(j=0; j<pred_count; j++){
                             max_x= FFMAX(max_x, mv_predictor[j][0]);
                             max_y= FFMAX(max_y, mv_predictor[j][1]);
+                            max_r= FFMAX(max_r, ref[j]);
                             min_x= FFMIN(min_x, mv_predictor[j][0]);
                             min_y= FFMIN(min_y, mv_predictor[j][1]);
+                            min_r= FFMIN(min_r, ref[j]);
                         }
                         mv_predictor[pred_count+1][0] = sum_x - max_x - min_x;
                         mv_predictor[pred_count+1][1] = sum_y - max_y - min_y;
+                        ref         [pred_count+1]    = sum_r - max_r - min_r;
 
                         if(pred_count==4){
                             mv_predictor[pred_count+1][0] /= 2;
                             mv_predictor[pred_count+1][1] /= 2;
+                            ref         [pred_count+1]    /= 2;
                         }
                         pred_count+=2;
                     }
+skip_mean_and_median:
 
                     /* zero MV */
                     pred_count++;
@@ -474,6 +534,7 @@ int score_sum=0;
                     /* last MV */
                     mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index][0];
                     mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index][1];
+                    ref         [pred_count]   = s->current_picture.ref_index[0][4*mb_xy];
                     pred_count++;
 
                     s->mv_dir = MV_DIR_FORWARD;
@@ -493,7 +554,10 @@ int score_sum=0;
                         s->current_picture.motion_val[0][mot_index][0]= s->mv[0][0][0]= mv_predictor[j][0];
                         s->current_picture.motion_val[0][mot_index][1]= s->mv[0][0][1]= mv_predictor[j][1];
 
-                        decode_mb(s);
+                        if(ref[j]<0) //predictor intra or otherwise not available
+                            continue;
+
+                        decode_mb(s, ref[j]);
 
                         if(mb_x>0 && fixed[mb_xy-1]){
                             int k;
@@ -522,11 +586,16 @@ int score_sum=0;
                         }
                     }
 score_sum+= best_score;
-//FIXME no need to set s->current_picture.motion_val[0][mot_index][0] explicit
-                    s->current_picture.motion_val[0][mot_index][0]= s->mv[0][0][0]= mv_predictor[best_pred][0];
-                    s->current_picture.motion_val[0][mot_index][1]= s->mv[0][0][1]= mv_predictor[best_pred][1];
+                    s->mv[0][0][0]= mv_predictor[best_pred][0];
+                    s->mv[0][0][1]= mv_predictor[best_pred][1];
+
+                    for(i=0; i<mot_step; i++)
+                        for(j=0; j<mot_step; j++){
+                            s->current_picture.motion_val[0][mot_index+i+j*mot_stride][0]= s->mv[0][0][0];
+                            s->current_picture.motion_val[0][mot_index+i+j*mot_stride][1]= s->mv[0][0][1];
+                        }
 
-                    decode_mb(s);
+                    decode_mb(s, ref[best_pred]);
 
 
                     if(s->mv[0][0][0] != prev_x || s->mv[0][0][1] != prev_y){
@@ -565,6 +634,12 @@ static int is_intra_more_likely(MpegEncContext *s){
             undamaged_count++;
     }
 
+    if(s->codec_id == CODEC_ID_H264){
+        H264Context *h= (void*)s;
+        if(h->ref_count[0] <= 0 || !h->ref_list[0][0].data[0])
+            return 1;
+    }
+
     if(undamaged_count < 5) return 0; //almost all MBs damaged -> use temporal prediction
 
     skip_amount= FFMAX(undamaged_count/50, 1); //check only upto 50 MBs
@@ -680,13 +755,14 @@ void ff_er_frame_end(MpegEncContext *s){
     Picture *pic= s->current_picture_ptr;
 
     if(!s->error_recognition || s->error_count==0 || s->avctx->lowres ||
+       s->picture_structure != PICT_FRAME || // we dont support ER of field pictures yet, though it should not crash if enabled
        s->error_count==3*s->mb_width*(s->avctx->skip_top + s->avctx->skip_bottom)) return;
 
     if(s->current_picture.motion_val[0] == NULL){
         av_log(s->avctx, AV_LOG_ERROR, "Warning MVs not available\n");
 
         for(i=0; i<2; i++){
-            pic->ref_index[i]= av_mallocz(size * sizeof(uint8_t));
+            pic->ref_index[i]= av_mallocz(s->mb_stride * s->mb_height * 4 * sizeof(uint8_t));
             pic->motion_val_base[i]= av_mallocz((size+4) * 2 * sizeof(uint16_t));
             pic->motion_val[i]= pic->motion_val_base[i]+4;
         }
@@ -694,11 +770,6 @@ void ff_er_frame_end(MpegEncContext *s){
         s->current_picture= *s->current_picture_ptr;
     }
 
-    for(i=0; i<2; i++){
-        if(pic->ref_index[i])
-            memset(pic->ref_index[i], 0, size * sizeof(uint8_t));
-    }
-
     if(s->avctx->debug&FF_DEBUG_ER){
         for(mb_y=0; mb_y<s->mb_height; mb_y++){
             for(mb_x=0; mb_x<s->mb_width; mb_x++){
@@ -896,7 +967,7 @@ void ff_er_frame_end(MpegEncContext *s){
 
             s->mb_x= mb_x;
             s->mb_y= mb_y;
-            decode_mb(s);
+            decode_mb(s, 0/*FIXME h264 partitioned slices need this set*/);
         }
     }
 
@@ -938,7 +1009,7 @@ void ff_er_frame_end(MpegEncContext *s){
                 s->dsp.clear_blocks(s->block[0]);
                 s->mb_x= mb_x;
                 s->mb_y= mb_y;
-                decode_mb(s);
+                decode_mb(s, 0);
             }
         }
     }else
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/eval.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/eval.h
index 5481b1407..63ee99b60 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/eval.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/eval.h
@@ -1,6 +1,4 @@
 /*
- * simple arithmetic expression evaluator
- *
  * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
  *
  * This file is part of FFmpeg.
@@ -22,15 +20,18 @@
 
 /**
  * @file libavcodec/eval.h
- * eval header.
+ * simple arithmetic expression evaluator
  */
 
 #ifndef AVCODEC_EVAL_H
 #define AVCODEC_EVAL_H
 
+typedef struct AVExpr AVExpr;
+
 /**
  * Parses and evaluates an expression.
- * Note, this is significantly slower than ff_parse_eval()
+ * Note, this is significantly slower than ff_eval_expr().
+ *
  * @param s expression as a zero terminated string for example "1+2^3+5*5+sin(2/3)"
  * @param func1 NULL terminated array of function pointers for functions which take 1 argument
  * @param func2 NULL terminated array of function pointers for functions which take 2 arguments
@@ -42,15 +43,14 @@
  * @param opaque a pointer which will be passed to all functions from func1 and func2
  * @return the value of the expression
  */
-double ff_eval2(const char *s, const double *const_value, const char * const *const_name,
-               double (**func1)(void *, double), const char **func1_name,
-               double (**func2)(void *, double, double), const char **func2_name,
+double ff_parse_and_eval_expr(const char *s, const double *const_value, const char * const *const_name,
+               double (* const *func1)(void *, double), const char * const *func1_name,
+               double (* const *func2)(void *, double, double), const char * const *func2_name,
                void *opaque, const char **error);
 
-typedef struct ff_expr_s AVEvalExpr;
-
 /**
- * Parses a expression.
+ * Parses an expression.
+ *
  * @param s expression as a zero terminated string for example "1+2^3+5*5+sin(2/3)"
  * @param func1 NULL terminated array of function pointers for functions which take 1 argument
  * @param func2 NULL terminated array of function pointers for functions which take 2 arguments
@@ -58,21 +58,27 @@ typedef struct ff_expr_s AVEvalExpr;
  * @param func1_name NULL terminated array of zero terminated strings of func1 identifers
  * @param func2_name NULL terminated array of zero terminated strings of func2 identifers
  * @param error pointer to a char* which is set to an error message if something goes wrong
- * @return AVEvalExpr which must be freed with ff_eval_free by the user when it is not needed anymore
+ * @return AVExpr which must be freed with ff_free_expr() by the user when it is not needed anymore
  *         NULL if anything went wrong
  */
-AVEvalExpr * ff_parse(const char *s, const char * const *const_name,
-               double (**func1)(void *, double), const char **func1_name,
-               double (**func2)(void *, double, double), const char **func2_name,
+AVExpr *ff_parse_expr(const char *s, const char * const *const_name,
+               double (* const *func1)(void *, double), const char * const *func1_name,
+               double (* const *func2)(void *, double, double), const char * const *func2_name,
                const char **error);
+
 /**
  * Evaluates a previously parsed expression.
+ *
  * @param const_value a zero terminated array of values for the identifers from ff_parse const_name
  * @param opaque a pointer which will be passed to all functions from func1 and func2
  * @return the value of the expression
  */
-double ff_parse_eval(AVEvalExpr * e, const double *const_value, void *opaque);
-void ff_eval_free(AVEvalExpr * e);
+double ff_eval_expr(AVExpr * e, const double *const_value, void *opaque);
+
+/**
+ * Frees a parsed expression previously created with ff_parse().
+ */
+void ff_free_expr(AVExpr *e);
 
 /**
  * Parses the string in numstr and returns its value as a double. If
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/fft.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/fft.c
index 150a59aeb..db7cf5816 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/fft.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/fft.c
@@ -26,7 +26,10 @@
  * FFT/IFFT transforms.
  */
 
-#include "dsputil.h"
+#include <stdlib.h>
+#include <string.h>
+#include "libavutil/mathematics.h"
+#include "fft.h"
 
 /* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
 #if !CONFIG_HARDCODED_TABLES
@@ -99,12 +102,18 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
 
     s->fft_permute = ff_fft_permute_c;
     s->fft_calc    = ff_fft_calc_c;
+#if CONFIG_MDCT
     s->imdct_calc  = ff_imdct_calc_c;
     s->imdct_half  = ff_imdct_half_c;
     s->mdct_calc   = ff_mdct_calc_c;
+#endif
     s->exptab1     = NULL;
     s->split_radix = 1;
 
+    #if HAVE_MMX && HAVE_YASM && ARCH_X86_32
+    ff_fft_init_mmx(s);
+    #endif
+	
     if (s->split_radix) {
         for(j=4; j<=nbits; j++) {
             ff_init_ff_cos_tabs(j);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/fft.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/fft.h
new file mode 100644
index 000000000..f3f5710ea
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/fft.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FFT_H
+#define AVCODEC_FFT_H
+
+#include <stdint.h>
+#include "config.h"
+#include "libavutil/mem.h"
+#include "avfft.h"
+
+/* FFT computation */
+
+struct FFTContext {
+    int nbits;
+    int inverse;
+    uint16_t *revtab;
+    FFTComplex *exptab;
+    FFTComplex *exptab1; /* only used by SSE code */
+    FFTComplex *tmp_buf;
+    int mdct_size; /* size of MDCT (i.e. number of input data * 2) */
+    int mdct_bits; /* n = 2^nbits */
+    /* pre/post rotation tables */
+    FFTSample *tcos;
+    FFTSample *tsin;
+    void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
+    void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
+    void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    int split_radix;
+    int permutation;
+#define FF_MDCT_PERM_NONE       0
+#define FF_MDCT_PERM_INTERLEAVE 1
+};
+
+#if CONFIG_HARDCODED_TABLES
+#define COSTABLE_CONST const
+#define SINTABLE_CONST const
+#define SINETABLE_CONST const
+#else
+#define COSTABLE_CONST
+#define SINTABLE_CONST
+#define SINETABLE_CONST
+#endif
+
+#define COSTABLE(size) \
+    COSTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_cos_##size)[size/2]
+#define SINTABLE(size) \
+    SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2]
+#define SINETABLE(size) \
+    SINETABLE_CONST DECLARE_ALIGNED(16, float, ff_sine_##size)[size]
+extern COSTABLE(16);
+extern COSTABLE(32);
+extern COSTABLE(64);
+extern COSTABLE(128);
+extern COSTABLE(256);
+extern COSTABLE(512);
+extern COSTABLE(1024);
+extern COSTABLE(2048);
+extern COSTABLE(4096);
+extern COSTABLE(8192);
+extern COSTABLE(16384);
+extern COSTABLE(32768);
+extern COSTABLE(65536);
+extern COSTABLE_CONST FFTSample* const ff_cos_tabs[17];
+
+/**
+ * Initializes the cosine table in ff_cos_tabs[index]
+ * \param index index in ff_cos_tabs array of the table to initialize
+ */
+void ff_init_ff_cos_tabs(int index);
+
+extern SINTABLE(16);
+extern SINTABLE(32);
+extern SINTABLE(64);
+extern SINTABLE(128);
+extern SINTABLE(256);
+extern SINTABLE(512);
+extern SINTABLE(1024);
+extern SINTABLE(2048);
+extern SINTABLE(4096);
+extern SINTABLE(8192);
+extern SINTABLE(16384);
+extern SINTABLE(32768);
+extern SINTABLE(65536);
+
+/**
+ * Sets up a complex FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param inverse         if 0 perform the forward transform, if 1 perform the inverse
+ */
+int ff_fft_init(FFTContext *s, int nbits, int inverse);
+void ff_fft_permute_c(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
+
+void ff_fft_init_altivec(FFTContext *s);
+void ff_fft_init_mmx(FFTContext *s);
+void ff_fft_init_arm(FFTContext *s);
+
+/**
+ * Do the permutation needed BEFORE calling ff_fft_calc().
+ */
+static av_always_inline void ff_fft_permute(FFTContext *s, FFTComplex *z)
+{
+    s->fft_permute(s, z);
+}
+/**
+ * Do a complex FFT with the parameters defined in ff_fft_init(). The
+ * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
+ */
+static av_always_inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
+{
+    s->fft_calc(s, z);
+}
+void ff_fft_end(FFTContext *s);
+
+/* MDCT computation */
+
+static av_always_inline void ff_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->imdct_calc(s, output, input);
+}
+static av_always_inline void ff_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->imdct_half(s, output, input);
+}
+
+static av_always_inline void ff_mdct_calc(FFTContext *s, FFTSample *output,
+                                const FFTSample *input)
+{
+    s->mdct_calc(s, output, input);
+}
+
+/**
+ * Generate a Kaiser-Bessel Derived Window.
+ * @param   window  pointer to half window
+ * @param   alpha   determines window shape
+ * @param   n       size of half window
+ */
+void ff_kbd_window_init(float *window, float alpha, int n);
+
+/**
+ * Generate a sine window.
+ * @param   window  pointer to half window
+ * @param   n       size of half window
+ */
+void ff_sine_window_init(float *window, int n);
+
+/**
+ * initialize the specified entry of ff_sine_windows
+ */
+void ff_init_ff_sine_windows(int index);
+extern SINETABLE(  32);
+extern SINETABLE(  64);
+extern SINETABLE( 128);
+extern SINETABLE( 256);
+extern SINETABLE( 512);
+extern SINETABLE(1024);
+extern SINETABLE(2048);
+extern SINETABLE(4096);
+extern SINETABLE_CONST float * const ff_sine_windows[13];
+
+int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale);
+void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_end(FFTContext *s);
+
+/* Real Discrete Fourier Transform */
+
+struct RDFTContext {
+    int nbits;
+    int inverse;
+    int sign_convention;
+
+    /* pre/post rotation tables */
+    const FFTSample *tcos;
+    SINTABLE_CONST FFTSample *tsin;
+    FFTContext fft;
+    void (*rdft_calc)(struct RDFTContext *s, FFTSample *z);
+};
+
+/**
+ * Sets up a real FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param trans           the type of transform
+ */
+int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans);
+void ff_rdft_end(RDFTContext *s);
+
+static av_always_inline void ff_rdft_calc(RDFTContext *s, FFTSample *data)
+{
+    s->rdft_calc(s, data);
+}
+
+/* Discrete Cosine Transform */
+
+struct DCTContext {
+    int nbits;
+    int inverse;
+    RDFTContext rdft;
+    const float *costab;
+    FFTSample *csc2;
+    void (*dct_calc)(struct DCTContext *s, FFTSample *data);
+};
+
+/**
+ * Sets up DCT.
+ * @param nbits           size of the input array:
+ *                        (1 << nbits)     for DCT-II, DCT-III and DST-I
+ *                        (1 << nbits) + 1 for DCT-I
+ *
+ * @note the first element of the input of DST-I is ignored
+ */
+int  ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType type);
+void ff_dct_calc(DCTContext *s, FFTSample *data);
+void ff_dct_end (DCTContext *s);
+
+#endif /* AVCODEC_FFT_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/flv.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/flv.h
index e12b3b026..eb10f2260 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/flv.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/flv.h
@@ -20,6 +20,10 @@
 #ifndef AVCODEC_FLV_H
 #define AVCODEC_FLV_H
 
+#include "mpegvideo.h"
+#include "get_bits.h"
+#include "put_bits.h"
+
 void ff_flv_encode_picture_header(MpegEncContext * s, int picture_number);
 void ff_flv2_encode_ac_esc(PutBitContext *pb, int slevel, int level, int run, int last);
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/get_bits.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/get_bits.h
index c284ab50e..947459d15 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/get_bits.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/get_bits.h
@@ -45,27 +45,6 @@
 //#define A32_BITSTREAM_READER
 #endif
 
-#if ARCH_X86
-// avoid +32 for shift optimization (gcc should do that ...)
-static inline  int32_t NEG_SSR32( int32_t a, int8_t s){
-    __asm__ ("sarl %1, %0\n\t"
-         : "+r" (a)
-         : "ic" ((uint8_t)(-s))
-    );
-    return a;
-}
-static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
-    __asm__ ("shrl %1, %0\n\t"
-         : "+r" (a)
-         : "ic" ((uint8_t)(-s))
-    );
-    return a;
-}
-#else
-#    define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s)))
-#    define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s)))
-#endif
-
 /* bit input */
 /* buffer, buffer_end and size_in_bits must be present and used by every reader */
 typedef struct GetBitContext {
@@ -183,10 +162,10 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
 
 # ifdef ALT_BITSTREAM_READER_LE
 #   define SHOW_UBITS(name, gb, num)\
-        ((name##_cache) & (NEG_USR32(0xffffffff,num)))
+        zero_extend(name##_cache, num)
 
 #   define SHOW_SBITS(name, gb, num)\
-        NEG_SSR32((name##_cache)<<(32-(num)), num)
+        sign_extend(name##_cache, num)
 # else
 #   define SHOW_UBITS(name, gb, num)\
         NEG_USR32(name##_cache, num)
@@ -439,7 +418,7 @@ static inline void skip_bits1(GetBitContext *s){
  * reads 0-32 bits.
  */
 static inline unsigned int get_bits_long(GetBitContext *s, int n){
-    if(n<=17) return get_bits(s, n);
+    if(n<=MIN_CACHE_BITS) return get_bits(s, n);
     else{
 #ifdef ALT_BITSTREAM_READER_LE
         int ret= get_bits(s, 16);
@@ -462,7 +441,7 @@ static inline int get_sbits_long(GetBitContext *s, int n) {
  * shows 0-32 bits.
  */
 static inline unsigned int show_bits_long(GetBitContext *s, int n){
-    if(n<=17) return show_bits(s, n);
+    if(n<=MIN_CACHE_BITS) return show_bits(s, n);
     else{
         GetBitContext gb= *s;
         return get_bits_long(&gb, n);
@@ -547,9 +526,9 @@ void free_vlc(VLC *vlc);
 
 /**
  *
- * if the vlc code is invalid and max_depth=1 than no bits will be removed
- * if the vlc code is invalid and max_depth>1 than the number of bits removed
- * is undefined
+ * If the vlc code is invalid and max_depth=1, then no bits will be removed.
+ * If the vlc code is invalid and max_depth>1, then the number of bits removed
+ * is undefined.
  */
 #define GET_VLC(code, name, gb, table, bits, max_depth)\
 {\
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/golomb.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/golomb.h
index 3021a5600..e7719d65b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/golomb.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/golomb.h
@@ -37,6 +37,7 @@ extern "C" {
 #include <stdint.h>
 #include "get_bits.h"
 #include "put_bits.h"
+#include "libavutil/common.h"
 
 #define FFMIN(a,b) ((a) > (b) ? (b) : (a))
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h261dec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h261dec.c
index 3287f5e42..fb98c8f3f 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h261dec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h261dec.c
@@ -28,6 +28,7 @@
 #include "dsputil.h"
 #include "avcodec.h"
 #include "mpegvideo.h"
+#include "h263.h"
 #include "h261.h"
 #include "h261data.h"
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263.c
index a07c654e1..7e1009d2a 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263.c
@@ -71,10 +71,10 @@ void ff_h263_update_motion_val(MpegEncContext * s){
                 s->p_field_mv_table[i][0][mb_xy][0]= s->mv[0][i][0];
                 s->p_field_mv_table[i][0][mb_xy][1]= s->mv[0][i][1];
             }
-            s->current_picture.ref_index[0][xy           ]=
-            s->current_picture.ref_index[0][xy        + 1]= s->field_select[0][0];
-            s->current_picture.ref_index[0][xy + wrap    ]=
-            s->current_picture.ref_index[0][xy + wrap + 1]= s->field_select[0][1];
+            s->current_picture.ref_index[0][4*mb_xy    ]=
+            s->current_picture.ref_index[0][4*mb_xy + 1]= s->field_select[0][0];
+            s->current_picture.ref_index[0][4*mb_xy + 2]=
+            s->current_picture.ref_index[0][4*mb_xy + 3]= s->field_select[0][1];
         }
 
         /* no update if 8X8 because it has been done during parsing */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263.h
index 9adc17843..7502ecfcb 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263.h
@@ -20,6 +20,12 @@
 #ifndef AVCODEC_H263_H
 #define AVCODEC_H263_H
 
+#include <stdint.h>
+#include "libavutil/rational.h"
+#include "get_bits.h"
+#include "mpegvideo.h"
+#include "rl.h"
+
 // The defines below define the number of bits that are read at once for
 // reading vlc values. Changing these may improve speed and data cache needs
 // be aware though that decreasing them may need the number of stages that is
@@ -79,7 +85,6 @@ int h263_decode_picture_header(MpegEncContext *s);
 int ff_h263_decode_gob_header(MpegEncContext *s);
 void ff_h263_update_motion_val(MpegEncContext * s);
 void ff_h263_loop_filter(MpegEncContext * s);
-void ff_set_qscale(MpegEncContext * s, int qscale);
 int ff_h263_decode_mba(MpegEncContext *s);
 void ff_h263_encode_mba(MpegEncContext *s);
 void ff_init_qscale_tab(MpegEncContext *s);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263_parser.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263_parser.c
index 44f632e92..e1936c1ed 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263_parser.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263_parser.c
@@ -20,11 +20,12 @@
  */
 
 /**
- * @file h263_parser.c
+ * @file libavcodec/h263_parser.c
  * H.263 parser
  */
 
 #include "parser.h"
+#include "h263_parser.h"
 
 int ff_h263_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){
     int vop_found, i;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c
index 4313e8b4b..ffb380bc2 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c
@@ -278,7 +278,7 @@ static int decode_slice(MpegEncContext *s){
     }
 
     if(s->workaround_bugs&FF_BUG_AUTODETECT){
-        if(s->padding_bug_score > -2 && !s->data_partitioning /*&& (s->divx_version || !s->resync_marker)*/)
+        if(s->padding_bug_score > -2 && !s->data_partitioning /*&& (s->divx_version>=0 || !s->resync_marker)*/)
             s->workaround_bugs |=  FF_BUG_NO_PADDING;
         else
             s->workaround_bugs &= ~FF_BUG_NO_PADDING;
@@ -414,26 +414,26 @@ retry:
 
     avctx->has_b_frames= !s->low_delay;
 
-    if(s->xvid_build==0 && s->divx_version==0 && s->lavc_build==0){
+    if(s->xvid_build==-1 && s->divx_version==-1 && s->lavc_build==-1){
         if(s->stream_codec_tag == AV_RL32("XVID") ||
            s->codec_tag == AV_RL32("XVID") || s->codec_tag == AV_RL32("XVIX") ||
            s->codec_tag == AV_RL32("RMP4"))
-            s->xvid_build= -1;
+            s->xvid_build= 0;
 #if 0
         if(s->codec_tag == AV_RL32("DIVX") && s->vo_type==0 && s->vol_control_parameters==1
            && s->padding_bug_score > 0 && s->low_delay) // XVID with modified fourcc
-            s->xvid_build= -1;
+            s->xvid_build= 0;
 #endif
     }
 
-    if(s->xvid_build==0 && s->divx_version==0 && s->lavc_build==0){
+    if(s->xvid_build==-1 && s->divx_version==-1 && s->lavc_build==-1){
         if(s->codec_tag == AV_RL32("DIVX") && s->vo_type==0 && s->vol_control_parameters==0)
             s->divx_version= 400; //divx 4
     }
 
-    if(s->xvid_build && s->divx_version){
+    if(s->xvid_build>=0 && s->divx_version>=0){
         s->divx_version=
-        s->divx_build= 0;
+        s->divx_build= -1;
     }
 
     if(s->workaround_bugs&FF_BUG_AUTODETECT){
@@ -452,16 +452,16 @@ retry:
             s->workaround_bugs|= FF_BUG_QPEL_CHROMA2;
         }
 
-        if(s->xvid_build && s->xvid_build<=3)
+        if(s->xvid_build<=3U)
             s->padding_bug_score= 256*256*256*64;
 
-        if(s->xvid_build && s->xvid_build<=1)
+        if(s->xvid_build<=1U)
             s->workaround_bugs|= FF_BUG_QPEL_CHROMA;
 
-        if(s->xvid_build && s->xvid_build<=12)
+        if(s->xvid_build<=12U)
             s->workaround_bugs|= FF_BUG_EDGE;
 
-        if(s->xvid_build && s->xvid_build<=32)
+        if(s->xvid_build<=32U)
             s->workaround_bugs|= FF_BUG_DC_CLIP;
 
 #define SET_QPEL_FUNC(postfix1, postfix2) \
@@ -469,30 +469,30 @@ retry:
     s->dsp.put_no_rnd_ ## postfix1 = ff_put_no_rnd_ ## postfix2;\
     s->dsp.avg_ ## postfix1 = ff_avg_ ## postfix2;
 
-        if(s->lavc_build && s->lavc_build<4653)
+        if(s->lavc_build<4653U)
             s->workaround_bugs|= FF_BUG_STD_QPEL;
 
-        if(s->lavc_build && s->lavc_build<4655)
+        if(s->lavc_build<4655U)
             s->workaround_bugs|= FF_BUG_DIRECT_BLOCKSIZE;
 
-        if(s->lavc_build && s->lavc_build<4670){
+        if(s->lavc_build<4670U){
             s->workaround_bugs|= FF_BUG_EDGE;
         }
 
-        if(s->lavc_build && s->lavc_build<=4712)
+        if(s->lavc_build<=4712U)
             s->workaround_bugs|= FF_BUG_DC_CLIP;
 
-        if(s->divx_version)
+        if(s->divx_version>=0)
             s->workaround_bugs|= FF_BUG_DIRECT_BLOCKSIZE;
 //printf("padding_bug_score: %d\n", s->padding_bug_score);
         if(s->divx_version==501 && s->divx_build==20020416)
             s->padding_bug_score= 256*256*256*64;
 
-        if(s->divx_version && s->divx_version<500){
+        if(s->divx_version<500U){
             s->workaround_bugs|= FF_BUG_EDGE;
         }
 
-        if(s->divx_version)
+        if(s->divx_version>=0)
             s->workaround_bugs|= FF_BUG_HPEL_CHROMA;
 #if 0
         if(s->divx_version==500)
@@ -501,11 +501,11 @@ retry:
         /* very ugly XVID padding bug detection FIXME/XXX solve this differently
          * Let us hope this at least works.
          */
-        if(   s->resync_marker==0 && s->data_partitioning==0 && s->divx_version==0
+        if(   s->resync_marker==0 && s->data_partitioning==0 && s->divx_version==-1
            && s->codec_id==CODEC_ID_MPEG4 && s->vo_type==0)
             s->workaround_bugs|= FF_BUG_NO_PADDING;
 
-        if(s->lavc_build && s->lavc_build<4609) //FIXME not sure about the version num but a 4609 file seems ok
+        if(s->lavc_build<4609U) //FIXME not sure about the version num but a 4609 file seems ok
             s->workaround_bugs|= FF_BUG_NO_PADDING;
 #endif
     }
@@ -540,7 +540,7 @@ retry:
 #endif
 
 #if HAVE_MMX
-    if(s->codec_id == CODEC_ID_MPEG4 && s->xvid_build && avctx->idct_algo == FF_IDCT_AUTO && (mm_flags & FF_MM_MMX)){
+    if(s->codec_id == CODEC_ID_MPEG4 && s->xvid_build>=0 && avctx->idct_algo == FF_IDCT_AUTO && (mm_flags & FF_MM_MMX)){
         avctx->idct_algo= FF_IDCT_XVIDMMX;
         avctx->coded_width= 0; // force reinit
 //        dsputil_init(&s->dsp, avctx);
@@ -642,6 +642,7 @@ retry:
         }
 
     assert(s->bitstream_buffer_size==0);
+frame_end:
     /* divx 5.01+ bistream reorder stuff */
     if(s->codec_id==CODEC_ID_MPEG4 && s->divx_packed){
         int current_pos= get_bits_count(&s->gb)>>3;
@@ -656,7 +657,7 @@ retry:
                 }
             }
         }
-        if(s->gb.buffer == s->bitstream_buffer && buf_size>7 && s->xvid_build){ //xvid style
+        if(s->gb.buffer == s->bitstream_buffer && buf_size>7 && s->xvid_build>=0){ //xvid style
             startcode_found=1;
             current_pos=0;
         }
@@ -676,7 +677,6 @@ retry:
 intrax8_decoded:
     ff_er_frame_end(s);
 
-frame_end:
     MPV_frame_end(s);
 
 assert(s->current_picture.pict_type == s->current_picture_ptr->pict_type);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c
index 3490e3af1..c3a5f4f70 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c
@@ -51,15 +51,12 @@ static const uint8_t div6[52]={
 };
 
 void ff_h264_write_back_intra_pred_mode(H264Context *h){
-    const int mb_xy= h->mb_xy;
+    int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
 
-    h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
-    h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
-    h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
-    h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
-    h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
-    h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
-    h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
+    AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
+    mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
+    mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
+    mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
 }
 
 /**
@@ -155,11 +152,11 @@ const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_l
 # if HAVE_FAST_64BIT
 #   define RS 7
     for(i=0; i+1<length; i+=9){
-        if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
+        if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
 # else
 #   define RS 3
     for(i=0; i+1<length; i+=5){
-        if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
+        if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
 # endif
             continue;
         if(i>0 && !src[i]) i--;
@@ -427,21 +424,21 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
                     x_offset, y_offset, qpix_put, chroma_put);
 
         if(h->use_weight == 2){
-            int weight0 = h->implicit_weight[refn0][refn1];
+            int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
             int weight1 = 64 - weight0;
             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
         }else{
             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
-                            h->luma_weight[0][refn0], h->luma_weight[1][refn1],
-                            h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
+                            h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
+                            h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
-                            h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
-                            h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
+                            h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
+                            h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
-                            h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
-                            h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
+                            h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
+                            h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
         }
     }else{
         int list = list1 ? 1 : 0;
@@ -452,12 +449,12 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
                     qpix_put, chroma_put);
 
         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
-                       h->luma_weight[list][refn], h->luma_offset[list][refn]);
+                       h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
         if(h->use_weight_chroma){
             chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
-                             h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
+                             h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
             chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
-                             h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
+                             h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
         }
     }
 }
@@ -470,7 +467,7 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height,
                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
                            int list0, int list1){
     if((h->use_weight==2 && list0 && list1
-        && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
+        && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
        || h->use_weight==1)
         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
                          x_offset, y_offset, qpix_put, chroma_put,
@@ -599,7 +596,7 @@ static void free_tables(H264Context *h){
     av_freep(&h->list_counts);
 
     av_freep(&h->mb2b_xy);
-    av_freep(&h->mb2b8_xy);
+    av_freep(&h->mb2br_xy);
 
     for(i = 0; i < MAX_THREADS; i++) {
         hx = h->thread_context[i];
@@ -617,7 +614,7 @@ static void free_tables(H264Context *h){
 
 static void init_dequant8_coeff_table(H264Context *h){
     int i,q,x;
-    const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
+    const int transpose = (h->h264dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
     h->dequant8_coeff[0] = h->dequant8_buffer[0];
     h->dequant8_coeff[1] = h->dequant8_buffer[1];
 
@@ -640,7 +637,7 @@ static void init_dequant8_coeff_table(H264Context *h){
 
 static void init_dequant4_coeff_table(H264Context *h){
     int i,j,q,x;
-    const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
+    const int transpose = (h->h264dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
     for(i=0; i<6; i++ ){
         h->dequant4_coeff[i] = h->dequant4_buffer[i];
         for(j=0; j<i; j++){
@@ -683,33 +680,33 @@ static void init_dequant_tables(H264Context *h){
 int ff_h264_alloc_tables(H264Context *h){
     MpegEncContext * const s = &h->s;
     const int big_mb_num= s->mb_stride * (s->mb_height+1);
+    const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
     int x,y;
 
-    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t), fail)
+    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8  * sizeof(uint8_t), fail)
 
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 32 * sizeof(uint8_t), fail)
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
 
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
-    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t), fail);
-    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t), fail);
-    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 32*big_mb_num * sizeof(uint8_t) , fail);
+    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
+    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
+    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
 
     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
 
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy  , big_mb_num * sizeof(uint32_t), fail);
-    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b8_xy , big_mb_num * sizeof(uint32_t), fail);
+    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
     for(y=0; y<s->mb_height; y++){
         for(x=0; x<s->mb_width; x++){
             const int mb_xy= x + y*s->mb_stride;
             const int b_xy = 4*x + 4*y*h->b_stride;
-            const int b8_xy= 2*x + 2*y*h->b8_stride;
 
             h->mb2b_xy [mb_xy]= b_xy;
-            h->mb2b8_xy[mb_xy]= b8_xy;
+            h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
         }
     }
 
@@ -727,16 +724,17 @@ fail:
 /**
  * Mimic alloc_tables(), but for every context thread.
  */
-static void clone_tables(H264Context *dst, H264Context *src){
-    dst->intra4x4_pred_mode       = src->intra4x4_pred_mode;
+static void clone_tables(H264Context *dst, H264Context *src, int i){
+    MpegEncContext * const s = &src->s;
+    dst->intra4x4_pred_mode       = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
     dst->non_zero_count           = src->non_zero_count;
     dst->slice_table              = src->slice_table;
     dst->cbp_table                = src->cbp_table;
     dst->mb2b_xy                  = src->mb2b_xy;
-    dst->mb2b8_xy                 = src->mb2b8_xy;
+    dst->mb2br_xy                 = src->mb2br_xy;
     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
-    dst->mvd_table[0]             = src->mvd_table[0];
-    dst->mvd_table[1]             = src->mvd_table[1];
+    dst->mvd_table[0]             = src->mvd_table[0] + i*8*2*s->mb_stride;
+    dst->mvd_table[1]             = src->mvd_table[1] + i*8*2*s->mb_stride;
     dst->direct_table             = src->direct_table;
     dst->list_counts              = src->list_counts;
 
@@ -752,6 +750,9 @@ static int context_init(H264Context *h){
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
 
+    h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
+    h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
+
     return 0;
 fail:
     return -1; // free_tables will clean up for us
@@ -764,6 +765,7 @@ static av_cold void common_init(H264Context *h){
     s->height = s->avctx->height;
     s->codec_id= s->avctx->codec->id;
 
+    ff_h264dsp_init(&h->h264dsp);
     ff_h264_pred_init(&h->hpc, s->codec_id);
 
     h->dequant_coeff_pps= -1;
@@ -814,15 +816,22 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
 
     ff_h264_decode_init_vlc();
 
+    h->thread_context[0] = h;
+    h->outputed_poc = INT_MIN;
+    h->prev_poc_msb= 1<<16;
+    h->x264_build = -1;
+    ff_h264_reset_sei(h);
+
     /* ffdshow custom code (begin) */
     h->is_avc = avcodec_h264_decode_init_is_avc(avctx);
     h->got_avcC = 0;
     /* ffdshow custom code (end) */
 
-    h->thread_context[0] = h;
-    h->outputed_poc = INT_MIN;
-    h->prev_poc_msb= 1<<16;
-    ff_h264_reset_sei(h);
+    if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
+        s->avctx->has_b_frames = h->sps.num_reorder_frames;
+        s->low_delay = 0;
+    }
+
     return 0;
 }
 
@@ -861,9 +870,8 @@ int ff_h264_frame_start(H264Context *h){
         if(!h->thread_context[i]->s.obmc_scratchpad)
             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
 
-    /* some macroblocks will be accessed before they're available */
-    if(FRAME_MBAFF || s->avctx->thread_count > 1)
-        memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
+    /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
+    memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
 
 //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
 
@@ -921,11 +929,8 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
 
 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
     MpegEncContext * const s = &h->s;
-    int temp8, i;
-    uint64_t temp64;
     int deblock_left;
     int deblock_top;
-    int mb_xy;
     int top_idx = 1;
     uint8_t *top_border_m1;
     uint8_t *top_border;
@@ -940,9 +945,8 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_c
     }
 
     if(h->deblocking_filter == 2) {
-        mb_xy = h->mb_xy;
-        deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
-        deblock_top  = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
+        deblock_left = h->left_type[0];
+        deblock_top  = h->top_type;
     } else {
         deblock_left = (s->mb_x > 0);
         deblock_top =  (s->mb_y > !!MB_FIELD);
@@ -1064,8 +1068,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                             idct_dc_add =
                             idct_add    = s->dsp.add_pixels8;
                         }else{
-                            idct_dc_add = s->dsp.h264_idct8_dc_add;
-                            idct_add    = s->dsp.h264_idct8_add;
+                            idct_dc_add = h->h264dsp.h264_idct8_dc_add;
+                            idct_add    = h->h264dsp.h264_idct8_add;
                         }
                         for(i=0; i<16; i+=4){
                             uint8_t * const ptr= dest_y + block_offset[i];
@@ -1089,8 +1093,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                             idct_dc_add =
                             idct_add    = s->dsp.add_pixels4;
                         }else{
-                            idct_dc_add = s->dsp.h264_idct_dc_add;
-                            idct_add    = s->dsp.h264_idct_add;
+                            idct_dc_add = h->h264dsp.h264_idct_dc_add;
+                            idct_add    = h->h264dsp.h264_idct_add;
                         }
                         for(i=0; i<16; i++){
                             uint8_t * const ptr= dest_y + block_offset[i];
@@ -1141,7 +1145,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
             hl_motion(h, dest_y, dest_cb, dest_cr,
                       s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
                       s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
-                      s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
+                      h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab);
         }
 
 
@@ -1158,7 +1162,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                             }
                         }
                     }else{
-                         s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
+                         h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
                     }
                 }else if(h->cbp&15){
                     if(transform_bypass){
@@ -1171,9 +1175,9 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                         }
                     }else{
                         if(IS_8x8DCT(mb_type)){
-                            s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
+                            h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
                         }else{
-                            s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
+                            h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
                         }
                     }
                 }
@@ -1204,8 +1208,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
                 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                 if(is_h264){
-                    idct_add = s->dsp.h264_idct_add;
-                    idct_dc_add = s->dsp.h264_idct_dc_add;
+                    idct_add = h->h264dsp.h264_idct_add;
+                    idct_dc_add = h->h264dsp.h264_idct_dc_add;
                     for(i=16; i<16+8; i++){
                         if(h->non_zero_count_cache[ scan8[i] ])
                             idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
@@ -1223,25 +1227,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
             }
         }
     }
-#if ENABLE_SLICE_MT_PATCH
-    if((s->avctx->thread_count < 2) && (h->cbp || IS_INTRA(mb_type)))
-        s->dsp.clear_blocks(h->mb);
-#else
     if(h->cbp || IS_INTRA(mb_type))
         s->dsp.clear_blocks(h->mb);
-#endif
-
-    if(h->deblocking_filter && 0) {
-        backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
-        fill_filter_caches(h, mb_type); //FIXME don't fill stuff which isn't used by filter_mb
-        h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
-        h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
-        if (!simple && FRAME_MBAFF) {
-            ff_h264_filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
-        } else {
-            ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
-        }
-    }
 }
 
 /**
@@ -1289,16 +1276,16 @@ static int pred_weight_table(H264Context *h){
 
             luma_weight_flag= get_bits1(&s->gb);
             if(luma_weight_flag){
-                h->luma_weight[list][i]= get_se_golomb(&s->gb);
-                h->luma_offset[list][i]= get_se_golomb(&s->gb);
-                if(   h->luma_weight[list][i] != luma_def
-                   || h->luma_offset[list][i] != 0) {
+                h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
+                h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
+                if(   h->luma_weight[i][list][0] != luma_def
+                   || h->luma_weight[i][list][1] != 0) {
                     h->use_weight= 1;
                     h->luma_weight_flag[list]= 1;
                 }
             }else{
-                h->luma_weight[list][i]= luma_def;
-                h->luma_offset[list][i]= 0;
+                h->luma_weight[i][list][0]= luma_def;
+                h->luma_weight[i][list][1]= 0;
             }
 
             if(CHROMA){
@@ -1306,10 +1293,10 @@ static int pred_weight_table(H264Context *h){
                 if(chroma_weight_flag){
                     int j;
                     for(j=0; j<2; j++){
-                        h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
-                        h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
-                        if(   h->chroma_weight[list][i][j] != chroma_def
-                           || h->chroma_offset[list][i][j] != 0) {
+                        h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
+                        h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
+                        if(   h->chroma_weight[i][list][j][0] != chroma_def
+                           || h->chroma_weight[i][list][j][1] != 0) {
                             h->use_weight_chroma= 1;
                             h->chroma_weight_flag[list]= 1;
                         }
@@ -1317,8 +1304,8 @@ static int pred_weight_table(H264Context *h){
                 }else{
                     int j;
                     for(j=0; j<2; j++){
-                        h->chroma_weight[list][i][j]= chroma_def;
-                        h->chroma_offset[list][i][j]= 0;
+                        h->chroma_weight[i][list][j][0]= chroma_def;
+                        h->chroma_weight[i][list][j][1]= 0;
                     }
                 }
             }
@@ -1329,43 +1316,62 @@ static int pred_weight_table(H264Context *h){
     return 0;
 }
 
-static void implicit_weight_table(H264Context *h){
+/**
+ * Initialize implicit_weight table.
+ * @param field, 0/1 initialize the weight for interlaced MBAFF
+ *                -1 initializes the rest
+ */
+static void implicit_weight_table(H264Context *h, int field){
     MpegEncContext * const s = &h->s;
-    int ref0, ref1, i;
-    int cur_poc = s->current_picture_ptr->poc;
+    int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
 
     for (i = 0; i < 2; i++) {
         h->luma_weight_flag[i]   = 0;
         h->chroma_weight_flag[i] = 0;
     }
 
-    if(   h->ref_count[0] == 1 && h->ref_count[1] == 1
+    if(field < 0){
+        cur_poc = s->current_picture_ptr->poc;
+    if(   h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF
        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
         h->use_weight= 0;
         h->use_weight_chroma= 0;
         return;
     }
+        ref_start= 0;
+        ref_count0= h->ref_count[0];
+        ref_count1= h->ref_count[1];
+    }else{
+        cur_poc = s->current_picture_ptr->field_poc[field];
+        ref_start= 16;
+        ref_count0= 16+2*h->ref_count[0];
+        ref_count1= 16+2*h->ref_count[1];
+    }
 
     h->use_weight= 2;
     h->use_weight_chroma= 2;
     h->luma_log2_weight_denom= 5;
     h->chroma_log2_weight_denom= 5;
 
-    for(ref0=0; ref0 < h->ref_count[0]; ref0++){
+    for(ref0=ref_start; ref0 < ref_count0; ref0++){
         int poc0 = h->ref_list[0][ref0].poc;
-        for(ref1=0; ref1 < h->ref_count[1]; ref1++){
+        for(ref1=ref_start; ref1 < ref_count1; ref1++){
             int poc1 = h->ref_list[1][ref1].poc;
             int td = av_clip(poc1 - poc0, -128, 127);
+            int w= 32;
             if(td){
                 int tb = av_clip(cur_poc - poc0, -128, 127);
                 int tx = (16384 + (FFABS(td) >> 1)) / td;
-                int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
-                if(dist_scale_factor < -64 || dist_scale_factor > 128)
-                    h->implicit_weight[ref0][ref1] = 32;
-                else
-                    h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
-            }else
-                h->implicit_weight[ref0][ref1] = 32;
+                int dist_scale_factor = (tb*tx + 32) >> 8;
+                if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
+                    w = 64 - dist_scale_factor;
+            }
+            if(field<0){
+                h->implicit_weight[ref0][ref1][0]=
+                h->implicit_weight[ref0][ref1][1]= w;
+            }else{
+                h->implicit_weight[ref0][ref1][field]=w;
+            }
         }
     }
 }
@@ -1483,9 +1489,8 @@ static int init_poc(H264Context *h){
  * initialize scan tables
  */
 static void init_scan_tables(H264Context *h){
-    MpegEncContext * const s = &h->s;
     int i;
-    if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
+    if(h->h264dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
         memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
         memcpy(h-> field_scan,  field_scan, 16*sizeof(uint8_t));
     }else{
@@ -1496,7 +1501,7 @@ static void init_scan_tables(H264Context *h){
 #undef T
         }
     }
-    if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
+    if(h->h264dsp.h264_idct8_add == ff_h264_idct8_add_c){
         memcpy(h->zigzag_scan8x8,       ff_zigzag_direct,     64*sizeof(uint8_t));
         memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
         memcpy(h->field_scan8x8,        field_scan8x8,        64*sizeof(uint8_t));
@@ -1567,7 +1572,7 @@ static void field_end(H264Context *h){
 /**
  * Replicates H264 "master" context to thread contexts.
  */
-static void clone_slice(H264Context *dst, H264Context *src, int full) /* ffdshow custom code */
+static void clone_slice(H264Context *dst, H264Context *src)
 {
     memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
     dst->s.current_picture_ptr  = src->s.current_picture_ptr;
@@ -1589,63 +1594,6 @@ static void clone_slice(H264Context *dst, H264Context *src, int full) /* ffdshow
 
     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
-
-    /* ffdshow custom code (begin) */
-    if(!full)
-        return;
-
-    dst->slice_type            = src->slice_type;
-
-    dst->mb_linesize           = src->mb_linesize;
-
-    dst->sps                   = src->sps;
-    dst->pps                   = src->pps;
-
-    dst->mb_mbaff              = src->mb_mbaff;
-    dst->mb_aff_frame          = src->mb_aff_frame;
-
-    dst->s.me.qpel_put         = src->s.me.qpel_put;
-    dst->s.me.qpel_avg         = src->s.me.qpel_avg;
-
-    /* weighted motion pred */
-
-    dst->use_weight            = src->use_weight;
-    if(dst->use_weight != 0) {
-        dst->use_weight_chroma        = src->use_weight_chroma;
-        dst->luma_log2_weight_denom   = src->luma_log2_weight_denom;
-        dst->chroma_log2_weight_denom = src->chroma_log2_weight_denom;
-
-        memcpy(dst->luma_weight,     src->luma_weight,     sizeof(src->luma_weight));
-        memcpy(dst->luma_offset,     src->luma_offset,     sizeof(src->luma_offset));
-        memcpy(dst->chroma_weight,   src->chroma_weight,   sizeof(src->chroma_weight));
-        memcpy(dst->chroma_offset,   src->chroma_offset,   sizeof(src->chroma_offset));
-
-        memcpy(dst->implicit_weight, src->implicit_weight, sizeof(src->implicit_weight));
-    }
-
-    /* deblocking */
-    dst->deblocking_filter     = src->deblocking_filter;
-    dst->slice_alpha_c0_offset = src->slice_alpha_c0_offset;
-    dst->slice_beta_offset     = src->slice_beta_offset;
-    dst->last_qscale_diff      = src->last_qscale_diff;
-    memcpy(dst->ref2frm, src->ref2frm, sizeof(src->ref2frm));
-
-    dst->slice_num             = src->slice_num;
-    dst->slice_type_nos        = src->slice_type_nos;
-
-    // FIXME not sure if below are necessary
-    dst->direct_spatial_mv_pred= src->direct_spatial_mv_pred;
-    dst->s.qscale              = src->s.qscale;
-    dst->b_stride              = src->b_stride;
-    dst->b8_stride             = src->b8_stride;
-    dst->list_count            = src->list_count;
-    dst->emu_edge_height       = src->emu_edge_height;
-    dst->emu_edge_width        = src->emu_edge_width;
-    dst->s.picture_structure   = src->s.picture_structure;
-    dst->s.codec_id              = src->s.codec_id;
-    dst->s.flags               = src->s.flags;
-    dst->s.flags2              = src->s.flags2;
-    /* ffdshow custom code (end) */
 }
 
 /**
@@ -1709,11 +1657,6 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
     h->slice_type_nos= slice_type & 3;
 
     s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
-    if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
-        av_log(h->s.avctx, AV_LOG_ERROR,
-               "B picture before any references, skipping\n");
-        return -1;
-    }
 
     pps_id= get_ue_golomb(&s->gb);
     if(pps_id>=MAX_PPS_COUNT){
@@ -1732,6 +1675,10 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
     }
     h->sps = *h0->sps_buffers[h->pps.sps_id];
 
+    s->avctx->profile = h->sps.profile_idc;
+    s->avctx->level   = h->sps.level_idc;
+    s->avctx->refs    = h->sps.ref_frame_count;
+
     if(h == h0 && h->dequant_coeff_pps != pps_id){
         h->dequant_coeff_pps = pps_id;
         init_dequant_tables(h);
@@ -1741,7 +1688,6 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
 
     h->b_stride=  s->mb_width*4;
-    h->b8_stride= s->mb_width*2;
 
     s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
     if(h->sps.frame_mbs_only_flag)
@@ -1750,7 +1696,8 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
         s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
 
     if (s->context_initialized
-        && (   s->width != s->avctx->width || s->height != s->avctx->height)) {
+        && (   s->width != s->avctx->width || s->height != s->avctx->height
+            || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
         if(h != h0)
             return -1;   // width / height changed during parallelized decoding
         free_tables(h);
@@ -1776,16 +1723,11 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
         }
 
         if(h->sps.timing_info_present_flag){
-            #if __STDC_VERSION__ >= 199901L
-            s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
-            #else
-            s->avctx->time_base.num = h->sps.num_units_in_tick * 2;
-            s->avctx->time_base.den = h->sps.time_scale;
-            #endif
-            if(h->x264_build > 0 && h->x264_build < 44)
-                s->avctx->time_base.den *= 2;
+            int64_t den= h->sps.time_scale;
+            if(h->x264_build < 44U)
+                den *= 2;
             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
-                      s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
+                      h->sps.num_units_in_tick * 2, den, 1<<30);
         }
 
         if (MPV_common_init(s) < 0)
@@ -1801,10 +1743,11 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
             c = h->thread_context[i] = av_malloc(sizeof(H264Context));
             memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
             memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
+            c->h264dsp = h->h264dsp;
             c->sps = h->sps;
             c->pps = h->pps;
             init_scan_tables(c);
-            clone_tables(c, h);
+            clone_tables(c, h, i);
         }
 
         for(i = 0; i < s->avctx->thread_count; i++)
@@ -1912,7 +1855,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
         }
     }
     if(h != h0)
-        clone_slice(h, h0, 0); /* ffdshow custom code */
+        clone_slice(h, h0);
 
     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
 
@@ -2008,9 +1951,9 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
     if(   (h->pps.weighted_pred          && h->slice_type_nos == FF_P_TYPE )
        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
         pred_weight_table(h);
-    else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
-        implicit_weight_table(h);
-    else {
+    else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){
+        implicit_weight_table(h, -1);
+    }else {
         h->use_weight = 0;
         for (i = 0; i < 2; i++) {
             h->luma_weight_flag[i]   = 0;
@@ -2021,9 +1964,15 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
     if(h->nal_ref_idc)
         ff_h264_decode_ref_pic_marking(h0, &s->gb);
 
-    if(FRAME_MBAFF)
+    if(FRAME_MBAFF){
         ff_h264_fill_mbaff_ref_list(h);
 
+        if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){
+            implicit_weight_table(h, 0);
+            implicit_weight_table(h, 1);
+        }
+    }
+
     if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
         ff_h264_direct_dist_scale_factor(h);
     ff_h264_direct_ref_list_init(h);
@@ -2153,8 +2102,6 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
 
-    s->avctx->refs= h->sps.ref_frame_count;
-
     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
                h->slice_num,
@@ -2187,6 +2134,222 @@ int ff_h264_get_slice_type(const H264Context *h)
     }
 }
 
+/**
+ *
+ * @return non zero if the loop filter can be skiped
+ */
+static int fill_filter_caches(H264Context *h, int mb_type){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= h->mb_xy;
+    int top_xy, left_xy[2];
+    int top_type, left_type[2];
+
+    top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
+
+    //FIXME deblocking could skip the intra and nnz parts.
+
+    /* Wow, what a mess, why didn't they simplify the interlacing & intra
+     * stuff, I can't imagine that these complex rules are worth it. */
+
+    left_xy[1] = left_xy[0] = mb_xy-1;
+    if(FRAME_MBAFF){
+        const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
+        const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
+        if(s->mb_y&1){
+            if (left_mb_field_flag != curr_mb_field_flag) {
+                left_xy[0] -= s->mb_stride;
+            }
+        }else{
+            if(curr_mb_field_flag){
+                top_xy      += s->mb_stride & (((s->current_picture.mb_type[top_xy    ]>>7)&1)-1);
+            }
+            if (left_mb_field_flag != curr_mb_field_flag) {
+                left_xy[1] += s->mb_stride;
+            }
+        }
+    }
+
+    h->top_mb_xy = top_xy;
+    h->left_mb_xy[0] = left_xy[0];
+    h->left_mb_xy[1] = left_xy[1];
+    {
+        //for sufficiently low qp, filtering wouldn't do anything
+        //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
+        int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
+        int qp = s->current_picture.qscale_table[mb_xy];
+        if(qp <= qp_thresh
+           && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
+           && (top_xy   < 0 || ((qp + s->current_picture.qscale_table[top_xy    ] + 1)>>1) <= qp_thresh)){
+            if(!FRAME_MBAFF)
+                return 1;
+            if(   (left_xy[0]< 0            || ((qp + s->current_picture.qscale_table[left_xy[1]             ] + 1)>>1) <= qp_thresh)
+               && (top_xy    < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy    -s->mb_stride] + 1)>>1) <= qp_thresh))
+                return 1;
+        }
+    }
+
+    top_type     = s->current_picture.mb_type[top_xy]    ;
+    left_type[0] = s->current_picture.mb_type[left_xy[0]];
+    left_type[1] = s->current_picture.mb_type[left_xy[1]];
+    if(h->deblocking_filter == 2){
+        if(h->slice_table[top_xy     ] != h->slice_num) top_type= 0;
+        if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
+    }else{
+        if(h->slice_table[top_xy     ] == 0xFFFF) top_type= 0;
+        if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
+    }
+    h->top_type    = top_type    ;
+    h->left_type[0]= left_type[0];
+    h->left_type[1]= left_type[1];
+
+    if(IS_INTRA(mb_type))
+        return 0;
+
+    AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
+    AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
+    AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]);
+    AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]);
+    AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
+
+    h->cbp= h->cbp_table[mb_xy];
+
+    {
+        int list;
+        for(list=0; list<h->list_count; list++){
+            int8_t *ref;
+            int y, b_stride;
+            int16_t (*mv_dst)[2];
+            int16_t (*mv_src)[2];
+
+            if(!USES_LIST(mb_type, list)){
+                fill_rectangle(  h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
+                AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+                AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+                AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+                AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+                continue;
+            }
+
+            ref = &s->current_picture.ref_index[list][4*mb_xy];
+            {
+                int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+                AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
+                AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
+                ref += 2;
+                AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
+                AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
+            }
+
+            b_stride = h->b_stride;
+            mv_dst   = &h->mv_cache[list][scan8[0]];
+            mv_src   = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
+            for(y=0; y<4; y++){
+                AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
+            }
+
+        }
+    }
+
+
+/*
+0 . T T. T T T T
+1 L . .L . . . .
+2 L . .L . . . .
+3 . T TL . . . .
+4 L . .L . . . .
+5 L . .. . . . .
+*/
+//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
+    if(top_type){
+        AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
+    }
+
+    if(left_type[0]){
+        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8];
+        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8];
+        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8];
+        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8];
+    }
+
+    // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
+    if(!CABAC && h->pps.transform_8x8_mode){
+        if(IS_8x8DCT(top_type)){
+            h->non_zero_count_cache[4+8*0]=
+            h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4;
+            h->non_zero_count_cache[6+8*0]=
+            h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8;
+        }
+        if(IS_8x8DCT(left_type[0])){
+            h->non_zero_count_cache[3+8*1]=
+            h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF
+        }
+        if(IS_8x8DCT(left_type[1])){
+            h->non_zero_count_cache[3+8*3]=
+            h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF
+        }
+
+        if(IS_8x8DCT(mb_type)){
+            h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
+            h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= h->cbp & 1;
+
+            h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
+            h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
+
+            h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
+            h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
+
+            h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
+            h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
+        }
+    }
+
+    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
+        int list;
+        for(list=0; list<h->list_count; list++){
+            if(USES_LIST(top_type, list)){
+                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
+                const int b8_xy= 4*top_xy + 2;
+                int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+                AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
+                h->ref_cache[list][scan8[0] + 0 - 1*8]=
+                h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
+                h->ref_cache[list][scan8[0] + 2 - 1*8]=
+                h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
+            }else{
+                AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
+                AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+            }
+
+            if(!IS_INTERLACED(mb_type^left_type[0])){
+                if(USES_LIST(left_type[0], list)){
+                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
+                    const int b8_xy= 4*left_xy[0] + 1;
+                    int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+                    AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
+                    AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
+                    AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
+                    AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
+                    h->ref_cache[list][scan8[0] - 1 + 0 ]=
+                    h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
+                    h->ref_cache[list][scan8[0] - 1 +16 ]=
+                    h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
+                }else{
+                    AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
+                    AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
+                    AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
+                    AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
+                    h->ref_cache[list][scan8[0] - 1 + 0  ]=
+                    h->ref_cache[list][scan8[0] - 1 + 8  ]=
+                    h->ref_cache[list][scan8[0] - 1 + 16 ]=
+                    h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
 static void loop_filter(H264Context *h){
     MpegEncContext * const s = &h->s;
     uint8_t  *dest_y, *dest_cb, *dest_cr;
@@ -2197,7 +2360,7 @@ static void loop_filter(H264Context *h){
     if(h->deblocking_filter) {
         for(mb_x= 0; mb_x<s->mb_width; mb_x++){
             for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
-                int list, mb_xy, mb_type;
+                int mb_xy, mb_type;
                 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
                 h->slice_num= h->slice_table[mb_xy];
                 mb_type= s->current_picture.mb_type[mb_xy];
@@ -2242,6 +2405,19 @@ static void loop_filter(H264Context *h){
     h->slice_type= old_slice_type;
     s->mb_x= 0;
     s->mb_y= end_mb_y - FRAME_MBAFF;
+    h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
+    h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
+}
+
+static void predict_field_decoding_flag(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
+    int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
+                ? s->current_picture.mb_type[mb_xy-1]
+                : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
+                ? s->current_picture.mb_type[mb_xy-s->mb_stride]
+                : 0;
+    h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
 }
 
 static int decode_slice(struct AVCodecContext *avctx, void *arg){
@@ -2284,8 +2460,12 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
             }
             eos = get_cabac_terminate( &h->cabac );
 
+            if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
+                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+                return 0;
+            }
             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
-                av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%Id)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
+                av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
                 return -1;
             }
@@ -2297,6 +2477,8 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
                 ++s->mb_y;
                 if(FIELD_OR_MBAFF_PICTURE) {
                     ++s->mb_y;
+                    if(FRAME_MBAFF && s->mb_y < s->mb_height)
+                        predict_field_decoding_flag(h);
                 }
             }
 
@@ -2335,6 +2517,8 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
                 ++s->mb_y;
                 if(FIELD_OR_MBAFF_PICTURE) {
                     ++s->mb_y;
+                    if(FRAME_MBAFF && s->mb_y < s->mb_height)
+                        predict_field_decoding_flag(h);
                 }
                 if(s->mb_y >= s->mb_height){
                     tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
@@ -2414,184 +2598,6 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
     return -1; //not reached
 }
 
-/* ffdshow custom code */
-#if ENABLE_SLICE_MT_PATCH
-static void copy_context_to_mb(H264mb *dst, H264Context *src)
-{
-    dst->mb_x                           = src->s.mb_x;
-    dst->mb_y                           = src->s.mb_y;
-    dst->qscale                         = src->s.qscale;
-    dst->chroma_qp[0]                   = src->chroma_qp[0];
-    dst->chroma_qp[1]                   = src->chroma_qp[1];
-    dst->chroma_pred_mode               = src->chroma_pred_mode;
-    dst->intra16x16_pred_mode           = src->intra16x16_pred_mode;
-    dst->topleft_samples_available      = src->topleft_samples_available;
-    dst->topright_samples_available     = src->topright_samples_available;
-
-    memcpy(dst->mb,                       src->mb,                       sizeof(src->mb));
-    memcpy(dst->intra4x4_pred_mode_cache, src->intra4x4_pred_mode_cache, sizeof(src->intra4x4_pred_mode_cache));
-    memcpy(dst->non_zero_count_cache,     src->non_zero_count_cache,     sizeof(src->non_zero_count_cache));
-
-    if(src->slice_type != FF_I_TYPE && src->slice_type != FF_SI_TYPE) {
-        memcpy(dst->sub_mb_type,              src->sub_mb_type,              sizeof(src->sub_mb_type));
-        memcpy(dst->mv_cache,                 src->mv_cache,                 sizeof(src->mv_cache));
-        memcpy(dst->mvd_cache,                src->mvd_cache,                sizeof(src->mvd_cache));
-        memcpy(dst->direct_cache,             src->direct_cache,             sizeof(src->direct_cache));
-        memcpy(dst->ref_cache,                src->ref_cache,                sizeof(src->ref_cache));
-    }
-
-    dst->top_mb_xy                      = src->top_mb_xy;
-    dst->left_mb_xy[0]                  = src->left_mb_xy[0];
-    dst->left_mb_xy[1]                  = src->left_mb_xy[1];
-    dst->cbp                            = src->cbp;
-    
-    dst->mb_xy                          = src->mb_xy;
-
-    /* FIXME not necessary? */
-    dst->top_samples_available          = src->top_samples_available;
-    dst->left_samples_available         = src->left_samples_available;
-}
-
-static void copy_mb_to_context(H264Context *dst, H264mb *src)
-{
-    dst->s.mb_x                         = src->mb_x;
-    dst->s.mb_y                         = src->mb_y;
-    dst->s.qscale                       = src->qscale;
-    dst->chroma_qp[0]                   = src->chroma_qp[0];
-    dst->chroma_qp[1]                   = src->chroma_qp[1];
-    dst->chroma_pred_mode               = src->chroma_pred_mode;
-    dst->intra16x16_pred_mode           = src->intra16x16_pred_mode;
-    dst->topleft_samples_available      = src->topleft_samples_available;
-    dst->topright_samples_available     = src->topright_samples_available;
-
-    memcpy(dst->mb,                       src->mb,                       sizeof(src->mb));
-    memcpy(dst->intra4x4_pred_mode_cache, src->intra4x4_pred_mode_cache, sizeof(src->intra4x4_pred_mode_cache));
-    memcpy(dst->non_zero_count_cache,     src->non_zero_count_cache,     sizeof(src->non_zero_count_cache));
-
-    if(dst->slice_type != FF_I_TYPE && dst->slice_type != FF_SI_TYPE) {
-        memcpy(dst->sub_mb_type,              src->sub_mb_type,              sizeof(src->sub_mb_type));
-        memcpy(dst->mv_cache,                 src->mv_cache,                 sizeof(src->mv_cache));
-        memcpy(dst->mvd_cache,                src->mvd_cache,                sizeof(src->mvd_cache));
-        memcpy(dst->direct_cache,             src->direct_cache,             sizeof(src->direct_cache));
-        memcpy(dst->ref_cache,                src->ref_cache,                sizeof(src->ref_cache));
-    }
-
-    /* Needed for deblocking */
-
-    dst->top_mb_xy                      = src->top_mb_xy;
-    dst->left_mb_xy[0]                  = src->left_mb_xy[0];
-    dst->left_mb_xy[1]                  = src->left_mb_xy[1];
-    dst->cbp                            = src->cbp;
-    
-    dst->mb_xy                          = src->mb_xy;
-
-    /* FIXME not necessary? */
-    dst->top_samples_available          = src->top_samples_available;
-    dst->left_samples_available         = src->left_samples_available;
-}
-
-#define MAXBLOCKS 128
-
-static int decode_mb_parallelized(struct AVCodecContext *avctx, void *arg)
-{
-    H264Context *h = *(void**)arg;
-    H264Context *h0 = avctx->priv_data;
-    MpegEncContext * const s = &h->s;
-    const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
-    int i, ret;
-
-    if(h0 == h) {
-        /* first thread does entropy decode */
-
-        for(i = 0; i < MAXBLOCKS; i++) {
-            ret = ff_h264_decode_mb_cabac(h);
-            if(ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
-                av_log(h->s.avctx, AV_LOG_ERROR,
-                   "error while decoding MB %d %d, bytestream (%td)\n",
-                   s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
-                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y,
-                    (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
-                return -1;
-            }
-
-            copy_context_to_mb(h->blocks[h->phaze] + i, h);
-            if(++s->mb_x >= s->mb_width) {
-                s->mb_x = 0;
-                ++s->mb_y;
-            }
-
-            if(get_cabac_terminate(&h->cabac) || s->mb_y >= s->mb_height)
-                return i + 1;
-        }
-        return 0;
-
-    } else {
-        /* second thread does hl decode */
-
-        for(i = 0; i < h0->todecode; i++) {
-            copy_mb_to_context(h, h0->blocks[!h0->phaze] + i);
-            ff_h264_hl_decode_mb(h);
-        }
-        return 0;
-    }
-}
-
-static int decode_slice2(struct AVCodecContext *avctx, void *arg){
-    H264Context *h = *(void**)arg;
-    MpegEncContext * const s = &h->s;
-    const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
-    H264Context *h2 = h->thread_context[1];
-    int i, rv[2];
-
-    h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
-                    (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (CONFIG_H264_ENCODER && s->encoding);
-
-    clone_slice(h2, h, 1);
-
-    if(!h->blocks[0]) {
-        h->blocks[0] = av_malloc(sizeof(H264mb) * MAXBLOCKS);
-        h->blocks[1] = av_malloc(sizeof(H264mb) * MAXBLOCKS);
-    }
-
-    s->mb_skip_run= -1;
-
-    /* realign */
-    align_get_bits( &s->gb );
-
-    /* init cabac */
-    ff_init_cabac_states( &h->cabac);
-    ff_init_cabac_decoder( &h->cabac,
-               s->gb.buffer + get_bits_count(&s->gb)/8,
-               ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
-    ff_h264_init_cabac_states(h);
-
-    h->todecode = 0;
-
-    while(1) {
-    avctx->execute(avctx, (void *)decode_mb_parallelized,
-               (void **)h->thread_context, rv, 2, sizeof(void*));
-
-    h->phaze = !h->phaze;
-
-    if(rv[0] == -1)
-        return -1;
-    else if(rv[0] == 0) 
-        h->todecode = MAXBLOCKS;
-    else
-        break;
-    }
-
-    for(i = 0; i < rv[0]; i++) {
-      copy_mb_to_context(h2, h->blocks[!h->phaze] + i);
-      ff_h264_hl_decode_mb(h2);
-    }
-
-    tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
-    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
-    return 0;
-}
-#endif /* ENABLE_SLICE_MT_PATCH */
-
 /**
  * Call decode_slice() for each context.
  *
@@ -2605,16 +2611,7 @@ static void execute_decode_slices(H264Context *h, int context_count){
     int i;
 
     if(context_count == 1) {
-        /* ffdshow custom code, interlacing is not supported in multithreading mode */
-        #if ENABLE_SLICE_MT_PATCH
-        if(avctx->thread_count > 1 && h->pps.cabac && !(FIELD_OR_MBAFF_PICTURE)) {
-            decode_slice2(avctx, &h);
-        } else {
-            decode_slice(avctx, &h);
-        }
-        #else 
         decode_slice(avctx, &h);
-        #endif
     } else {
         for(i = 1; i < context_count; i++) {
             hx = h->thread_context[i];
@@ -2702,8 +2699,15 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
         if (ptr==NULL || dst_length < 0){
             return -1;
         }
+        i= buf_index + consumed;
+        if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
+           buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
+            s->workaround_bugs |= FF_BUG_TRUNCATED;
+
+        if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
         while(ptr[dst_length - 1] == 0 && dst_length > 0)
             dst_length--;
+        }
         bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
 
         if(s->avctx->debug&FF_DEBUG_STARTCODE){
@@ -2889,7 +2893,7 @@ static int decode_frame(AVCodecContext *avctx,
 
         for (i = 0; i < cnt; i++) {
             nalsize = AV_RB16(p) + 2;
-            if(decode_nal_units(h, p, nalsize)  < 0) {
+            if(decode_nal_units(h, p, nalsize) < 0) { // mpc-hc fix for files created by certain digital cameras
                 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
                 return -1;
             }
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.h
index aaa704fd5..c6650a13d 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.h
@@ -28,12 +28,11 @@
 #ifndef AVCODEC_H264_H
 #define AVCODEC_H264_H
 
-/* currently broken! */
-#define ENABLE_SLICE_MT_PATCH 0
-
+#include "libavutil/intreadwrite.h"
 #include "dsputil.h"
 #include "cabac.h"
 #include "mpegvideo.h"
+#include "h264dsp.h"
 #include "h264pred.h"
 #include "rectangle.h"
 
@@ -63,6 +62,8 @@
 
 #define ALLOW_NOCHROMA
 
+#define FMO 0
+
 /**
  * The maximum number of slices supported by the decoder.
  * must be a power of 2
@@ -262,56 +263,11 @@ typedef struct MMCO{
 } MMCO;
 
 /**
- *
- */
-
-typedef struct H264mb {
-    int mb_x, mb_y;
-    int qscale;
-    int chroma_qp[2]; //QPc
-    int chroma_pred_mode;
-    int intra16x16_pred_mode;
-    unsigned int topleft_samples_available;
-    unsigned int topright_samples_available;
-    int8_t intra4x4_pred_mode_cache[5*8];
-
-    uint8_t non_zero_count_cache[6*8];
-
-    int16_t mv_cache[2][5*8][2];
-    int8_t ref_cache[2][5*8];
-
-    int cbp;
-    int top_mb_xy;
-    int left_mb_xy[2];
-    
-    int mb_xy;
-
-    unsigned int sub_mb_type[4];
-
-    DCTELEM mb[16*24];
-    unsigned int top_samples_available,left_samples_available;
-    int16_t mvd_cache[2][5*8][2];
-    uint8_t direct_cache[5*8];
-} H264mb;
-
-
-/**
  * H264Context
  */
 typedef struct H264Context{
     MpegEncContext s;
-    int nal_ref_idc;
-    int nal_unit_type;
-    uint8_t *rbsp_buffer[2];
-    unsigned int rbsp_buffer_size[2];
-
-    /**
-      * Used to parse AVC variant of h264
-      */
-    int is_avc; ///< this flag is != 0 if codec is avc1
-    int got_avcC; ///< flag used to parse avcC data only once
-    int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
-
+    H264DSPContext h264dsp;
     int chroma_qp[2]; //QPc
 
     int qp_thresh;      ///< QP threshold to skip loopfilter
@@ -323,27 +279,33 @@ typedef struct H264Context{
     int chroma_pred_mode;
     int intra16x16_pred_mode;
 
+    int topleft_mb_xy;
     int top_mb_xy;
+    int topright_mb_xy;
     int left_mb_xy[2];
 
+    int topleft_type;
     int top_type;
+    int topright_type;
     int left_type[2];
 
+    const uint8_t * left_block;
+    int topleft_partition;
+
     int8_t intra4x4_pred_mode_cache[5*8];
-    int8_t (*intra4x4_pred_mode)[8];
+    int8_t (*intra4x4_pred_mode);
     H264PredContext hpc;
     unsigned int topleft_samples_available;
     unsigned int top_samples_available;
     unsigned int topright_samples_available;
     unsigned int left_samples_available;
     uint8_t (*top_borders[2])[16+2*8];
-    uint8_t left_border[2*(17+2*9)];
 
     /**
      * non zero coeff count cache.
      * is 64 if not available.
      */
-    DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache)[6*8];
+    DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[6*8];
 
     /*
     .UU.YYYY
@@ -356,8 +318,8 @@ typedef struct H264Context{
     /**
      * Motion vector cache.
      */
-    DECLARE_ALIGNED_16(int16_t, mv_cache)[2][5*8][2];
-    DECLARE_ALIGNED_8(int8_t, ref_cache)[2][5*8];
+    DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5*8][2];
+    DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5*8];
 #define LIST_NOT_USED -1 //FIXME rename?
 #define PART_NOT_AVAILABLE -2
 
@@ -378,9 +340,8 @@ typedef struct H264Context{
     int block_offset[2*(16+8)];
 
     uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
-    uint32_t *mb2b8_xy;
+    uint32_t *mb2br_xy;
     int b_stride; //FIXME use s->b4_stride
-    int b8_stride;
 
     int mb_linesize;   ///< may be equal to s->linesize or s->linesize*2, for mbaff
     int mb_uvlinesize;
@@ -388,29 +349,19 @@ typedef struct H264Context{
     int emu_edge_width;
     int emu_edge_height;
 
-    int halfpel_flag;
-    int thirdpel_flag;
-
-    int unknown_svq3_flag;
-    int next_slice_index;
-
-    SPS *sps_buffers[MAX_SPS_COUNT];
     SPS sps; ///< current sps
 
-    PPS *pps_buffers[MAX_PPS_COUNT];
     /**
      * current pps
      */
     PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
 
-    uint32_t dequant4_buffer[6][52][16];
+    uint32_t dequant4_buffer[6][52][16]; //FIXME should these be moved down?
     uint32_t dequant8_buffer[2][52][64];
     uint32_t (*dequant4_coeff[6])[16];
     uint32_t (*dequant8_coeff[2])[64];
-    int dequant_coeff_pps;     ///< reinit tables when pps changes
 
     int slice_num;
-    uint16_t *slice_table_base;
     uint16_t *slice_table;     ///< slice_table_base + 2*mb_stride + 1
     int slice_type;
     int slice_type_nos;        ///< S free slice type (SI/SP are remapped to I/P)
@@ -421,49 +372,21 @@ typedef struct H264Context{
     int mb_field_decoding_flag;
     int mb_mbaff;              ///< mb_aff_frame && mb_field_decoding_flag
 
-    DECLARE_ALIGNED_8(uint16_t, sub_mb_type)[4];
-
-    //POC stuff
-    int poc_lsb;
-    int poc_msb;
-    int delta_poc_bottom;
-    int delta_poc[2];
-    int frame_num;
-    int prev_poc_msb;             ///< poc_msb of the last reference pic for POC type 0
-    int prev_poc_lsb;             ///< poc_lsb of the last reference pic for POC type 0
-    int frame_num_offset;         ///< for POC type 2
-    int prev_frame_num_offset;    ///< for POC type 2
-    int prev_frame_num;           ///< frame_num of the last pic for POC type 1/2
-
-    /**
-     * frame_num for frames or 2*frame_num+1 for field pics.
-     */
-    int curr_pic_num;
-
-    /**
-     * max_frame_num or 2*max_frame_num for field pics.
-     */
-    int max_pic_num;
+    DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4];
 
     //Weighted pred stuff
     int use_weight;
     int use_weight_chroma;
     int luma_log2_weight_denom;
     int chroma_log2_weight_denom;
-    int luma_weight[2][48];
-    int luma_offset[2][48];
-    int chroma_weight[2][48][2];
-    int chroma_offset[2][48][2];
-    int implicit_weight[48][48];
-
-    //deblock
-    int deblocking_filter;         ///< disable_deblocking_filter_idc with 1<->0
-    int slice_alpha_c0_offset;
-    int slice_beta_offset;
-
-    int redundant_pic_count;
+    //The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss
+    int luma_weight[48][2][2];
+    int chroma_weight[48][2][2][2];
+    int implicit_weight[48][48][2];
 
     int direct_spatial_mv_pred;
+    int col_parity;
+    int col_fieldoff;
     int dist_scale_factor[16];
     int dist_scale_factor_field[2][32];
     int map_col_to_list0[2][16+32];
@@ -472,27 +395,13 @@ typedef struct H264Context{
     /**
      * num_ref_idx_l0/1_active_minus1 + 1
      */
-    unsigned int list_count;
     uint8_t *list_counts;            ///< Array of list_count per MB specifying the slice type
     unsigned int ref_count[2];   ///< counts frames or fields, depending on current mb mode
-    Picture *short_ref[32];
-    Picture *long_ref[32];
-    Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture
+    unsigned int list_count;
     Picture ref_list[2][48];         /**< 0..15: frame refs, 16..47: mbaff field refs.
                                           Reordered version of default_ref_list
                                           according to picture reordering in slice header */
     int ref2frm[MAX_SLICES][2][64];  ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1
-    Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size?
-    int outputed_poc;
-
-    /**
-     * memory management control operations buffer.
-     */
-    MMCO mmco[MAX_MMCO_COUNT];
-    int mmco_index;
-
-    int long_ref_count;  ///< number of actual long term references
-    int short_ref_count; ///< number of actual short term references
 
     //data partitioning
     GetBitContext intra_gb;
@@ -500,7 +409,7 @@ typedef struct H264Context{
     GetBitContext *intra_gb_ptr;
     GetBitContext *inter_gb_ptr;
 
-    DECLARE_ALIGNED_16(DCTELEM, mb)[16*24];
+    DECLARE_ALIGNED(16, DCTELEM, mb)[16*24];
     DCTELEM mb_padding[256];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
 
     /**
@@ -508,7 +417,6 @@ typedef struct H264Context{
      */
     CABACContext cabac;
     uint8_t      cabac_state[460];
-    int          cabac_init_idc;
 
     /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
     uint16_t     *cbp_table;
@@ -518,8 +426,8 @@ typedef struct H264Context{
     /* chroma_pred_mode for i4x4 or i16x16, else 0 */
     uint8_t     *chroma_pred_mode_table;
     int         last_qscale_diff;
-    int16_t     (*mvd_table[2])[2];
-    DECLARE_ALIGNED_16(int16_t, mvd_cache)[2][5*8][2];
+    uint8_t     (*mvd_table[2])[2];
+    DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5*8][2];
     uint8_t     *direct_table;
     uint8_t     direct_cache[5*8];
 
@@ -538,6 +446,79 @@ typedef struct H264Context{
 
     int x264_build;
 
+    int mb_xy;
+
+    int is_complex;
+
+    //deblock
+    int deblocking_filter;         ///< disable_deblocking_filter_idc with 1<->0
+    int slice_alpha_c0_offset;
+    int slice_beta_offset;
+
+//=============================================================
+    //Things below are not used in the MB or more inner code
+
+    int nal_ref_idc;
+    int nal_unit_type;
+    uint8_t *rbsp_buffer[2];
+    unsigned int rbsp_buffer_size[2];
+
+    /**
+     * Used to parse AVC variant of h264
+     */
+    int is_avc; ///< this flag is != 0 if codec is avc1
+    int got_avcC; ///< flag used to parse avcC data only once
+    int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
+
+    SPS *sps_buffers[MAX_SPS_COUNT];
+    PPS *pps_buffers[MAX_PPS_COUNT];
+
+    int dequant_coeff_pps;     ///< reinit tables when pps changes
+
+    uint16_t *slice_table_base;
+
+
+    //POC stuff
+    int poc_lsb;
+    int poc_msb;
+    int delta_poc_bottom;
+    int delta_poc[2];
+    int frame_num;
+    int prev_poc_msb;             ///< poc_msb of the last reference pic for POC type 0
+    int prev_poc_lsb;             ///< poc_lsb of the last reference pic for POC type 0
+    int frame_num_offset;         ///< for POC type 2
+    int prev_frame_num_offset;    ///< for POC type 2
+    int prev_frame_num;           ///< frame_num of the last pic for POC type 1/2
+
+    /**
+     * frame_num for frames or 2*frame_num+1 for field pics.
+     */
+    int curr_pic_num;
+
+    /**
+     * max_frame_num or 2*max_frame_num for field pics.
+     */
+    int max_pic_num;
+
+    int redundant_pic_count;
+
+    Picture *short_ref[32];
+    Picture *long_ref[32];
+    Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture
+    Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size?
+    int outputed_poc;
+
+    /**
+     * memory management control operations buffer.
+     */
+    MMCO mmco[MAX_MMCO_COUNT];
+    int mmco_index;
+
+    int long_ref_count;  ///< number of actual long term references
+    int short_ref_count; ///< number of actual short term references
+
+    int          cabac_init_idc;
+
     /**
      * @defgroup multithreading Members for slice based multithreading
      * @{
@@ -566,10 +547,6 @@ typedef struct H264Context{
     int last_slice_type;
     /** @} */
 
-    int mb_xy;
-
-    uint32_t svq3_watermark_key;
-
     /**
      * pic_struct in picture timing SEI message
      */
@@ -609,8 +586,6 @@ typedef struct H264Context{
      */
     int sei_recovery_frame_cnt;
 
-    int is_complex;
-
     int luma_weight_flag[2];   ///< 7.4.3.2 luma_weight_lX_flag
     int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag
 
@@ -618,12 +593,12 @@ typedef struct H264Context{
     int sei_buffering_period_present;  ///< Buffering period SEI flag
     int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs
 
-#if ENABLE_SLICE_MT_PATCH    
-    /* experimental */
-    int phaze;
-    int todecode;
-    H264mb *blocks[2];
-#endif
+    //SVQ3 specific fields
+    int halfpel_flag;
+    int thirdpel_flag;
+    int unknown_svq3_flag;
+    int next_slice_index;
+    uint32_t svq3_watermark_key;
 
     /* ffdshow custom stuff */
     int has_to_drop_first_non_ref;    // Workaround Haali's media splitter (http://forum.doom9.org/showthread.php?p=1226434#post1226434)
@@ -668,7 +643,7 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length);
  * @param consumed is the number of bytes used as input
  * @param length is the length of the array
  * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
- * @returns decoded bytes, might be src+1 if no escapes
+ * @return decoded bytes, might be src+1 if no escapes
  */
 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length);
 
@@ -730,13 +705,13 @@ av_cold void ff_h264_decode_init_vlc(void);
 
 /**
  * decodes a macroblock
- * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
+ * @return 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
  */
 int ff_h264_decode_mb_cavlc(H264Context *h);
 
 /**
  * decodes a CABAC coded macroblock
- * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
+ * @return 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
  */
 int ff_h264_decode_mb_cabac(H264Context *h);
 
@@ -786,6 +761,14 @@ static av_always_inline uint32_t pack16to32(int a, int b){
 #endif
 }
 
+static av_always_inline uint16_t pack8to16(int a, int b){
+#if HAVE_BIGENDIAN
+   return (b&0xFF) + (a<<8);
+#else
+   return (a&0xFF) + (b<<8);
+#endif
+}
+
 /**
  * gets the chroma qp.
  */
@@ -795,14 +778,10 @@ static inline int get_chroma_qp(H264Context *h, int t, int qscale){
 
 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my);
 
-static void fill_decode_caches(H264Context *h, int mb_type){
+static void fill_decode_neighbors(H264Context *h, int mb_type){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     int topleft_xy, top_xy, topright_xy, left_xy[2];
-    int topleft_type, top_type, topright_type, left_type[2];
-    const uint8_t * left_block;
-    int topleft_partition= -1;
-    int i;
     static const uint8_t left_block_options[4][16]={
         {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8},
         {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8},
@@ -810,6 +789,8 @@ static void fill_decode_caches(H264Context *h, int mb_type){
         {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}
     };
 
+    h->topleft_partition= -1;
+
     top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
 
     /* Wow, what a mess, why didn't they simplify the interlacing & intra
@@ -818,7 +799,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){
     topleft_xy = top_xy - 1;
     topright_xy= top_xy + 1;
     left_xy[1] = left_xy[0] = mb_xy-1;
-    left_block = left_block_options[0];
+    h->left_block = left_block_options[0];
     if(FRAME_MBAFF){
         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
@@ -827,12 +808,12 @@ static void fill_decode_caches(H264Context *h, int mb_type){
                 left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1;
                 if (curr_mb_field_flag) {
                     left_xy[1] += s->mb_stride;
-                    left_block = left_block_options[3];
+                    h->left_block = left_block_options[3];
                 } else {
                     topleft_xy += s->mb_stride;
                     // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
-                    topleft_partition = 0;
-                    left_block = left_block_options[1];
+                    h->topleft_partition = 0;
+                    h->left_block = left_block_options[1];
                 }
             }
         }else{
@@ -842,26 +823,62 @@ static void fill_decode_caches(H264Context *h, int mb_type){
                 top_xy      += s->mb_stride & (((s->current_picture.mb_type[top_xy    ]>>7)&1)-1);
             }
             if (left_mb_field_flag != curr_mb_field_flag) {
-                left_xy[1] = left_xy[0] = mb_xy - 1;
                 if (curr_mb_field_flag) {
                     left_xy[1] += s->mb_stride;
-                    left_block = left_block_options[3];
+                    h->left_block = left_block_options[3];
                 } else {
-                    left_block = left_block_options[2];
+                    h->left_block = left_block_options[2];
                 }
             }
         }
     }
 
-    h->top_mb_xy = top_xy;
+    h->topleft_mb_xy = topleft_xy;
+    h->top_mb_xy     = top_xy;
+    h->topright_mb_xy= topright_xy;
     h->left_mb_xy[0] = left_xy[0];
     h->left_mb_xy[1] = left_xy[1];
-        topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
-        top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
-        topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
-        left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
-        left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
+    //FIXME do we need all in the context?
+
+    h->topleft_type = s->current_picture.mb_type[topleft_xy] ;
+    h->top_type     = s->current_picture.mb_type[top_xy]     ;
+    h->topright_type= s->current_picture.mb_type[topright_xy];
+    h->left_type[0] = s->current_picture.mb_type[left_xy[0]] ;
+    h->left_type[1] = s->current_picture.mb_type[left_xy[1]] ;
+
+    if(FMO){
+    if(h->slice_table[topleft_xy ] != h->slice_num) h->topleft_type = 0;
+    if(h->slice_table[top_xy     ] != h->slice_num) h->top_type     = 0;
+    if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_type[1] = 0;
+    }else{
+        if(h->slice_table[topleft_xy ] != h->slice_num){
+            h->topleft_type = 0;
+            if(h->slice_table[top_xy     ] != h->slice_num) h->top_type     = 0;
+            if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_type[1] = 0;
+        }
+    }
+    if(h->slice_table[topright_xy] != h->slice_num) h->topright_type= 0;
+}
+
+static void fill_decode_caches(H264Context *h, int mb_type){
+    MpegEncContext * const s = &h->s;
+    int topleft_xy, top_xy, topright_xy, left_xy[2];
+    int topleft_type, top_type, topright_type, left_type[2];
+    const uint8_t * left_block= h->left_block;
+    int i;
 
+    topleft_xy   = h->topleft_mb_xy ;
+    top_xy       = h->top_mb_xy     ;
+    topright_xy  = h->topright_mb_xy;
+    left_xy[0]   = h->left_mb_xy[0] ;
+    left_xy[1]   = h->left_mb_xy[1] ;
+    topleft_type = h->topleft_type  ;
+    top_type     = h->top_type      ;
+    topright_type= h->topright_type ;
+    left_type[0] = h->left_type[0]  ;
+    left_type[1] = h->left_type[1]  ;
+
+    if(!IS_SKIP(mb_type)){
         if(IS_INTRA(mb_type)){
             int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
             h->topleft_samples_available=
@@ -885,8 +902,8 @@ static void fill_decode_caches(H264Context *h, int mb_type){
                         h->left_samples_available&= 0xFF5F;
                     }
                 }else{
-                    int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
-                                    ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
+                    int left_typei = s->current_picture.mb_type[left_xy[0] + s->mb_stride];
+
                     assert(left_xy[0] == left_xy[1]);
                     if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
                         h->topleft_samples_available&= 0xDF5F;
@@ -908,35 +925,21 @@ static void fill_decode_caches(H264Context *h, int mb_type){
 
             if(IS_INTRA4x4(mb_type)){
                 if(IS_INTRA4x4(top_type)){
-                    h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
-                    h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
-                    h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
-                    h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
+                    AV_COPY32(h->intra4x4_pred_mode_cache+4+8*0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]);
                 }else{
-                    int pred;
-                    if(!(top_type & type_mask))
-                        pred= -1;
-                    else{
-                        pred= 2;
-                    }
                     h->intra4x4_pred_mode_cache[4+8*0]=
                     h->intra4x4_pred_mode_cache[5+8*0]=
                     h->intra4x4_pred_mode_cache[6+8*0]=
-                    h->intra4x4_pred_mode_cache[7+8*0]= pred;
+                    h->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_mask);
                 }
                 for(i=0; i<2; i++){
                     if(IS_INTRA4x4(left_type[i])){
-                        h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
-                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
+                        int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[i]];
+                        h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_block[0+2*i]];
+                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_block[1+2*i]];
                     }else{
-                        int pred;
-                        if(!(left_type[i] & type_mask))
-                            pred= -1;
-                        else{
-                            pred= 2;
-                        }
                         h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
-                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
+                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[i] & type_mask);
                     }
                 }
             }
@@ -953,7 +956,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){
 */
 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
     if(top_type){
-        *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8];
+        AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
             h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8];
             h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8];
 
@@ -965,7 +968,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){
 
             h->non_zero_count_cache[1+8*3]=
             h->non_zero_count_cache[2+8*3]=
-            *(uint32_t*)&h->non_zero_count_cache[4+8*0]= CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
+            AV_WN32A(&h->non_zero_count_cache[4+8*0], CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040);
     }
 
     for (i=0; i<2; i++) {
@@ -986,32 +989,25 @@ static void fill_decode_caches(H264Context *h, int mb_type){
         // top_cbp
         if(top_type) {
             h->top_cbp = h->cbp_table[top_xy];
-        } else if(IS_INTRA(mb_type)) {
-            h->top_cbp = 0x1C0;
         } else {
-            h->top_cbp = 0;
+            h->top_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F;
         }
         // left_cbp
         if (left_type[0]) {
-            h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
-        } else if(IS_INTRA(mb_type)) {
-            h->left_cbp = 0x1C0;
+            h->left_cbp = (h->cbp_table[left_xy[0]] & 0x1f0)
+                        |  ((h->cbp_table[left_xy[0]]>>(left_block[0]&(~1)))&2)
+                        | (((h->cbp_table[left_xy[1]]>>(left_block[2]&(~1)))&2) << 2);
         } else {
-            h->left_cbp = 0;
-        }
-        if (left_type[0]) {
-            h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
-        }
-        if (left_type[1]) {
-            h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
+            h->left_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F;
         }
     }
+    }
 
 #if 1
-    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
+    if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){
         int list;
         for(list=0; list<h->list_count; list++){
-            if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type)){
+            if(!USES_LIST(mb_type, list)){
                 /*if(!h->mv_cache_clean[list]){
                     memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
                     memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
@@ -1019,134 +1015,135 @@ static void fill_decode_caches(H264Context *h, int mb_type){
                 }*/
                 continue;
             }
+            assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred));
+
             h->mv_cache_clean[list]= 0;
 
             if(USES_LIST(top_type, list)){
                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
-                const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
                 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
                     h->ref_cache[list][scan8[0] + 0 - 1*8]=
-                    h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
+                    h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][4*top_xy + 2];
                     h->ref_cache[list][scan8[0] + 2 - 1*8]=
-                    h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
+                    h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][4*top_xy + 3];
             }else{
                 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
-                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
+                AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101);
             }
 
+            if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){
             for(i=0; i<2; i++){
                 int cache_idx = scan8[0] - 1 + i*2*8;
                 if(USES_LIST(left_type[i], list)){
                     const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
-                    const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
-                    *(uint32_t*)h->mv_cache[list][cache_idx  ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
-                    *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
-                        h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
-                        h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
+                    const int b8_xy= 4*left_xy[i] + 1;
+                    AV_COPY32(h->mv_cache[list][cache_idx  ], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]);
+                    AV_COPY32(h->mv_cache[list][cache_idx+8], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]);
+                        h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + (left_block[0+i*2]&~1)];
+                        h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + (left_block[1+i*2]&~1)];
                 }else{
-                    *(uint32_t*)h->mv_cache [list][cache_idx  ]=
-                    *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
+                    AV_ZERO32(h->mv_cache [list][cache_idx  ]);
+                    AV_ZERO32(h->mv_cache [list][cache_idx+8]);
                     h->ref_cache[list][cache_idx  ]=
                     h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                 }
             }
-
-            if((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF)
-                continue;
-
-            if(USES_LIST(topleft_type, list)){
-                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
-                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
-                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
-                h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
             }else{
-                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
-                h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+                if(USES_LIST(left_type[0], list)){
+                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
+                    const int b8_xy= 4*left_xy[0] + 1;
+                    AV_COPY32(h->mv_cache[list][scan8[0] - 1], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]]);
+                    h->ref_cache[list][scan8[0] - 1]= s->current_picture.ref_index[list][b8_xy + (left_block[0]&~1)];
+                }else{
+                    AV_ZERO32(h->mv_cache [list][scan8[0] - 1]);
+                    h->ref_cache[list][scan8[0] - 1]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+                }
             }
 
             if(USES_LIST(topright_type, list)){
                 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
-                const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
-                *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
-                h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
+                AV_COPY32(h->mv_cache[list][scan8[0] + 4 - 1*8], s->current_picture.motion_val[list][b_xy]);
+                h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][4*topright_xy + 2];
             }else{
-                *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
+                AV_ZERO32(h->mv_cache [list][scan8[0] + 4 - 1*8]);
                 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
             }
+            if(h->ref_cache[list][scan8[0] + 4 - 1*8] < 0){
+                if(USES_LIST(topleft_type, list)){
+                    const int b_xy = h->mb2b_xy [topleft_xy] + 3 + h->b_stride + (h->topleft_partition & 2*h->b_stride);
+                    const int b8_xy= 4*topleft_xy + 1 + (h->topleft_partition & 2);
+                    AV_COPY32(h->mv_cache[list][scan8[0] - 1 - 1*8], s->current_picture.motion_val[list][b_xy]);
+                    h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
+                }else{
+                    AV_ZERO32(h->mv_cache[list][scan8[0] - 1 - 1*8]);
+                    h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+                }
+            }
 
-            if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
+            if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF)
                 continue;
 
-            h->ref_cache[list][scan8[5 ]+1] =
-            h->ref_cache[list][scan8[7 ]+1] =
-            h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
+            if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))) {
             h->ref_cache[list][scan8[4 ]] =
             h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
-            *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
-            *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
-            *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
-            *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
-            *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
+            AV_ZERO32(h->mv_cache [list][scan8[4 ]]);
+            AV_ZERO32(h->mv_cache [list][scan8[12]]);
 
             if( CABAC ) {
                 /* XXX beurk, Load mvd */
                 if(USES_LIST(top_type, list)){
-                    const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
-                    AV_COPY128(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]);
+                    const int b_xy= h->mb2br_xy[top_xy];
+                    AV_COPY64(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]);
                 }else{
-                    AV_ZERO128(h->mvd_cache[list][scan8[0] + 0 - 1*8]);
+                    AV_ZERO64(h->mvd_cache[list][scan8[0] + 0 - 1*8]);
                 }
                 if(USES_LIST(left_type[0], list)){
-                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
-                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
-                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
+                    const int b_xy= h->mb2br_xy[left_xy[0]] + 6;
+                    AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy - left_block[0]]);
+                    AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy - left_block[1]]);
                 }else{
-                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
-                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
+                    AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 0*8]);
+                    AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 1*8]);
                 }
                 if(USES_LIST(left_type[1], list)){
-                    const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
-                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
-                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
+                    const int b_xy= h->mb2br_xy[left_xy[1]] + 6;
+                    AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy - left_block[2]]);
+                    AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy - left_block[3]]);
                 }else{
-                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
-                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
+                    AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 2*8]);
+                    AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 3*8]);
                 }
-                *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
-                *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
-                *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
-                *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
-                *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
-
+                AV_ZERO16(h->mvd_cache [list][scan8[4 ]]);
+                AV_ZERO16(h->mvd_cache [list][scan8[12]]);
                 if(h->slice_type_nos == FF_B_TYPE){
-                    fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
+                    fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1);
 
                     if(IS_DIRECT(top_type)){
-                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
+                        AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101u*(MB_TYPE_DIRECT2>>1));
                     }else if(IS_8X8(top_type)){
-                        int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
-                        h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
-                        h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
+                        int b8_xy = 4*top_xy;
+                        h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy + 2];
+                        h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 3];
                     }else{
-                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
+                        AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_16x16>>1));
                     }
 
                     if(IS_DIRECT(left_type[0]))
-                        h->direct_cache[scan8[0] - 1 + 0*8]= 1;
+                        h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1;
                     else if(IS_8X8(left_type[0]))
-                        h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
+                        h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[4*left_xy[0] + 1 + (left_block[0]&~1)];
                     else
-                        h->direct_cache[scan8[0] - 1 + 0*8]= 0;
+                        h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_16x16>>1;
 
                     if(IS_DIRECT(left_type[1]))
-                        h->direct_cache[scan8[0] - 1 + 2*8]= 1;
+                        h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_DIRECT2>>1;
                     else if(IS_8X8(left_type[1]))
-                        h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
+                        h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[4*left_xy[1] + 1 + (left_block[2]&~1)];
                     else
-                        h->direct_cache[scan8[0] - 1 + 2*8]= 0;
+                        h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_16x16>>1;
                 }
             }
-
+            }
             if(FRAME_MBAFF){
 #define MAP_MVS\
                     MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
@@ -1164,7 +1161,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){
                     if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
                         h->ref_cache[list][idx] <<= 1;\
                         h->mv_cache[list][idx][1] /= 2;\
-                        h->mvd_cache[list][idx][1] /= 2;\
+                        h->mvd_cache[list][idx][1] >>=1;\
                     }
                     MAP_MVS
 #undef MAP_F2F
@@ -1187,218 +1184,6 @@ static void fill_decode_caches(H264Context *h, int mb_type){
 }
 
 /**
- *
- * @returns non zero if the loop filter can be skiped
- */
-static int fill_filter_caches(H264Context *h, int mb_type){
-    MpegEncContext * const s = &h->s;
-    const int mb_xy= h->mb_xy;
-    int top_xy, left_xy[2];
-    int top_type, left_type[2];
-    int i;
-
-    top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
-
-    //FIXME deblocking could skip the intra and nnz parts.
-
-    /* Wow, what a mess, why didn't they simplify the interlacing & intra
-     * stuff, I can't imagine that these complex rules are worth it. */
-
-    left_xy[1] = left_xy[0] = mb_xy-1;
-    if(FRAME_MBAFF){
-        const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
-        const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
-        if(s->mb_y&1){
-            if (left_mb_field_flag != curr_mb_field_flag) {
-                left_xy[0] -= s->mb_stride;
-            }
-        }else{
-            if(curr_mb_field_flag){
-                top_xy      += s->mb_stride & (((s->current_picture.mb_type[top_xy    ]>>7)&1)-1);
-            }
-            if (left_mb_field_flag != curr_mb_field_flag) {
-                left_xy[1] += s->mb_stride;
-            }
-        }
-    }
-
-    h->top_mb_xy = top_xy;
-    h->left_mb_xy[0] = left_xy[0];
-    h->left_mb_xy[1] = left_xy[1];
-    {
-        //for sufficiently low qp, filtering wouldn't do anything
-        //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
-        int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
-        int qp = s->current_picture.qscale_table[mb_xy];
-        if(qp <= qp_thresh
-           && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
-           && (top_xy   < 0 || ((qp + s->current_picture.qscale_table[top_xy    ] + 1)>>1) <= qp_thresh)){
-            if(!FRAME_MBAFF)
-                return 1;
-            if(   (left_xy[0]< 0            || ((qp + s->current_picture.qscale_table[left_xy[1]             ] + 1)>>1) <= qp_thresh)
-               && (top_xy    < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy    -s->mb_stride] + 1)>>1) <= qp_thresh))
-                return 1;
-        }
-    }
-
-    if(h->deblocking_filter == 2){
-        h->top_type    = top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
-        h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
-        h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
-    }else{
-        h->top_type    = top_type     = h->slice_table[top_xy     ] < 0xFFFF ? s->current_picture.mb_type[top_xy]     : 0;
-        h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
-        h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
-    }
-    if(IS_INTRA(mb_type))
-        return 0;
-
-    AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
-    AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
-    *((uint32_t*)&h->non_zero_count_cache[0+8*5])= *((uint32_t*)&h->non_zero_count[mb_xy][16]);
-    *((uint32_t*)&h->non_zero_count_cache[4+8*3])= *((uint32_t*)&h->non_zero_count[mb_xy][20]);
-    AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
-
-    h->cbp= h->cbp_table[mb_xy];
-
-    {
-        int list;
-        for(list=0; list<h->list_count; list++){
-            int8_t *ref;
-            int y, b_stride;
-            int16_t (*mv_dst)[2];
-            int16_t (*mv_src)[2];
-
-            if(!USES_LIST(mb_type, list)){
-                fill_rectangle(  h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
-                *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
-                *(uint32_t*)&h->ref_cache[list][scan8[ 2]] =
-                *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
-                *(uint32_t*)&h->ref_cache[list][scan8[10]] = ((LIST_NOT_USED)&0xFF)*0x01010101;
-                continue;
-            }
-
-            ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
-            {
-                int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
-                *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
-                *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
-                ref += h->b8_stride;
-                *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
-                *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
-            }
-
-            b_stride = h->b_stride;
-            mv_dst   = &h->mv_cache[list][scan8[0]];
-            mv_src   = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
-            for(y=0; y<4; y++){
-                AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
-            }
-
-        }
-    }
-
-
-/*
-0 . T T. T T T T
-1 L . .L . . . .
-2 L . .L . . . .
-3 . T TL . . . .
-4 L . .L . . . .
-5 L . .. . . . .
-*/
-//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
-    if(top_type){
-        *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8];
-    }
-
-    if(left_type[0]){
-        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8];
-        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8];
-        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8];
-        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8];
-    }
-
-    // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
-    if(!CABAC && h->pps.transform_8x8_mode){
-        if(IS_8x8DCT(top_type)){
-            h->non_zero_count_cache[4+8*0]=
-            h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4;
-            h->non_zero_count_cache[6+8*0]=
-            h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8;
-        }
-        if(IS_8x8DCT(left_type[0])){
-            h->non_zero_count_cache[3+8*1]=
-            h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF
-        }
-        if(IS_8x8DCT(left_type[1])){
-            h->non_zero_count_cache[3+8*3]=
-            h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF
-        }
-
-        if(IS_8x8DCT(mb_type)){
-            h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
-            h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= h->cbp & 1;
-
-            h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
-            h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
-
-            h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
-            h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
-
-            h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
-            h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
-        }
-    }
-
-    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
-        int list;
-        for(list=0; list<h->list_count; list++){
-            if(USES_LIST(top_type, list)){
-                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
-                const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
-                int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
-                AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
-                h->ref_cache[list][scan8[0] + 0 - 1*8]=
-                h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
-                h->ref_cache[list][scan8[0] + 2 - 1*8]=
-                h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
-            }else{
-                AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
-                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((LIST_NOT_USED)&0xFF)*0x01010101;
-            }
-
-            if(!IS_INTERLACED(mb_type^left_type[0])){
-                if(USES_LIST(left_type[0], list)){
-                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
-                    const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
-                    int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
-                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*0];
-                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 8 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*1];
-                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +16 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*2];
-                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +24 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*3];
-                    h->ref_cache[list][scan8[0] - 1 + 0 ]=
-                    h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*0]];
-                    h->ref_cache[list][scan8[0] - 1 +16 ]=
-                    h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*1]];
-                }else{
-                    *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0 ]=
-                    *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 8 ]=
-                    *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +16 ]=
-                    *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +24 ]= 0;
-                    h->ref_cache[list][scan8[0] - 1 + 0  ]=
-                    h->ref_cache[list][scan8[0] - 1 + 8  ]=
-                    h->ref_cache[list][scan8[0] - 1 + 16 ]=
-                    h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
-                }
-            }
-        }
-    }
-
-    return 0;
-}
-
-/**
  * gets the predicted intra4x4 prediction mode.
  */
 static inline int pred_intra_mode(H264Context *h, int n){
@@ -1418,19 +1203,19 @@ static inline void write_back_non_zero_count(H264Context *h){
 
     AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]);
     AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]);
-    *((uint32_t*)&h->non_zero_count[mb_xy][16]) = *((uint32_t*)&h->non_zero_count_cache[0+8*5]);
-    *((uint32_t*)&h->non_zero_count[mb_xy][20]) = *((uint32_t*)&h->non_zero_count_cache[4+8*3]);
+    AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[0+8*5]);
+    AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8*3]);
     AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]);
 }
 
 static inline void write_back_motion(H264Context *h, int mb_type){
     MpegEncContext * const s = &h->s;
-    const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
-    const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
+    const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; //try mb2b(8)_xy
+    const int b8_xy= 4*h->mb_xy;
     int list;
 
     if(!USES_LIST(mb_type, 0))
-        fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
+        fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, 2, (uint8_t)LIST_NOT_USED, 1);
 
     for(list=0; list<h->list_count; list++){
         int y, b_stride;
@@ -1447,51 +1232,42 @@ static inline void write_back_motion(H264Context *h, int mb_type){
             AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y);
         }
         if( CABAC ) {
-            int16_t (*mvd_dst)[2] = &h->mvd_table[list][b_xy];
-            int16_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
+            uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]];
+            uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
             if(IS_SKIP(mb_type))
-                fill_rectangle(mvd_dst, 4, 4, h->b_stride, 0, 4);
-            else
-            for(y=0; y<4; y++){
-                AV_COPY128(mvd_dst + y*b_stride, mvd_src + 8*y);
+                AV_ZERO128(mvd_dst);
+            else{
+            AV_COPY64(mvd_dst, mvd_src + 8*3);
+                AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0);
+                AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1);
+                AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2);
             }
         }
 
         {
             int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
-            ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
-            ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
-            ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
-            ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
+            ref_index[0+0*2]= h->ref_cache[list][scan8[0]];
+            ref_index[1+0*2]= h->ref_cache[list][scan8[4]];
+            ref_index[0+1*2]= h->ref_cache[list][scan8[8]];
+            ref_index[1+1*2]= h->ref_cache[list][scan8[12]];
         }
     }
 
     if(h->slice_type_nos == FF_B_TYPE && CABAC){
         if(IS_8X8(mb_type)){
-            uint8_t *direct_table = &h->direct_table[b8_xy];
-            direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
-            direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
-            direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
+            uint8_t *direct_table = &h->direct_table[4*h->mb_xy];
+            direct_table[1] = h->sub_mb_type[1]>>1;
+            direct_table[2] = h->sub_mb_type[2]>>1;
+            direct_table[3] = h->sub_mb_type[3]>>1;
         }
     }
 }
 
 static inline int get_dct8x8_allowed(H264Context *h){
     if(h->sps.direct_8x8_inference_flag)
-        return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8                )*0x0001000100010001ULL));
+        return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8                )*0x0001000100010001ULL));
     else
-        return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
-}
-
-static void predict_field_decoding_flag(H264Context *h){
-    MpegEncContext * const s = &h->s;
-    const int mb_xy= h->mb_xy;
-    int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
-                ? s->current_picture.mb_type[mb_xy-1]
-                : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
-                ? s->current_picture.mb_type[mb_xy-s->mb_stride]
-                : 0;
-    h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
+        return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
 }
 
 /**
@@ -1511,9 +1287,11 @@ static void decode_mb_skip(H264Context *h){
     if( h->slice_type_nos == FF_B_TYPE )
     {
         // just for fill_caches. pred_direct_motion will set the real mb_type
-        mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
-
+        mb_type|= MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
+        if(h->direct_spatial_mv_pred){
+            fill_decode_neighbors(h, mb_type);
         fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ...
+        }
         ff_h264_pred_direct_motion(h, &mb_type);
         mb_type|= MB_TYPE_SKIP;
     }
@@ -1522,6 +1300,7 @@ static void decode_mb_skip(H264Context *h){
         int mx, my;
         mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
 
+        fill_decode_neighbors(h, mb_type);
         fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ...
         pred_pskip_motion(h, &mx, &my);
         fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_cabac.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_cabac.c
index 7b2c8e87c..ffcefe6fc 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_cabac.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_cabac.c
@@ -688,30 +688,30 @@ static const int8_t cabac_context_init_PB[3][460][2] =
 void ff_h264_init_cabac_states(H264Context *h) {
     MpegEncContext * const s = &h->s;
     int i;
+    const int8_t (*tab)[2];
+
+    if( h->slice_type_nos == FF_I_TYPE ) tab = cabac_context_init_I;
+    else                                 tab = cabac_context_init_PB[h->cabac_init_idc];
 
     /* calculate pre-state */
     for( i= 0; i < 460; i++ ) {
-        int pre;
-        if( h->slice_type_nos == FF_I_TYPE )
-            pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
-        else
-            pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
+        int pre = 2*(((tab[i][0] * s->qscale) >>4 ) + tab[i][1]) - 127;
 
-        if( pre <= 63 )
-            h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
-        else
-            h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
+        pre^= pre>>31;
+        if(pre > 124)
+            pre= 124 + (pre&1);
+
+        h->cabac_state[i] =  pre;
     }
 }
 
 static int decode_cabac_field_decoding_flag(H264Context *h) {
     MpegEncContext * const s = &h->s;
-    const long mba_xy = h->mb_xy - 1L;
     const long mbb_xy = h->mb_xy - 2L*s->mb_stride;
 
     unsigned long ctx = 0;
 
-    ctx += (s->current_picture.mb_type[mba_xy]>>7)&(h->slice_table[mba_xy] == h->slice_num);
+    ctx += h->mb_field_decoding_flag & !!s->mb_x; //for FMO:(s->current_picture.mb_type[mba_xy]>>7)&(h->slice_table[mba_xy] == h->slice_num);
     ctx += (s->current_picture.mb_type[mbb_xy]>>7)&(h->slice_table[mbb_xy] == h->slice_num);
 
     return get_cabac_noinline( &h->cabac, &(h->cabac_state+70)[ctx] );
@@ -722,13 +722,10 @@ static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_sl
     int mb_type;
 
     if(intra_slice){
-        MpegEncContext * const s = &h->s;
-        const int mba_xy = h->left_mb_xy[0];
-        const int mbb_xy = h->top_mb_xy;
         int ctx=0;
-        if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
+        if( h->left_type[0] & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
             ctx++;
-        if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
+        if( h->top_type     & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
             ctx++;
         if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
             return 0;   /* I4x4 */
@@ -750,44 +747,6 @@ static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_sl
     return mb_type;
 }
 
-static int decode_cabac_mb_type_b( H264Context *h ) {
-    MpegEncContext * const s = &h->s;
-
-        const int mba_xy = h->left_mb_xy[0];
-        const int mbb_xy = h->top_mb_xy;
-        int ctx = 0;
-        int bits;
-        assert(h->slice_type_nos == FF_B_TYPE);
-
-        if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
-            ctx++;
-        if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
-            ctx++;
-
-        if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
-            return 0; /* B_Direct_16x16 */
-
-        if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
-            return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
-        }
-
-        bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
-        bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
-        bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
-        bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
-        if( bits < 8 )
-            return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
-        else if( bits == 13 ) {
-            return decode_cabac_intra_mb_type(h, 32, 0) + 23;
-        } else if( bits == 14 )
-            return 11; /* B_L1_L0_8x16 */
-        else if( bits == 15 )
-            return 22; /* B_8x8 */
-
-        bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
-        return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
-}
-
 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
     MpegEncContext * const s = &h->s;
     int mba_xy, mbb_xy;
@@ -834,10 +793,7 @@ static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
     mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
     mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
 
-    if( mode >= pred_mode )
-        return mode + 1;
-    else
-        return mode;
+    return mode + ( mode >= pred_mode );
 }
 
 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
@@ -847,10 +803,10 @@ static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
     int ctx = 0;
 
     /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
-    if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
+    if( h->left_type[0] && h->chroma_pred_mode_table[mba_xy] != 0 )
         ctx++;
 
-    if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
+    if( h->top_type     && h->chroma_pred_mode_table[mbb_xy] != 0 )
         ctx++;
 
     if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
@@ -867,17 +823,17 @@ static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
 static int decode_cabac_mb_cbp_luma( H264Context *h) {
     int cbp_b, cbp_a, ctx, cbp = 0;
 
-    cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
-    cbp_b = h->slice_table[h->top_mb_xy]     == h->slice_num ? h->top_cbp  : -1;
+    cbp_a = h->left_cbp;
+    cbp_b = h->top_cbp;
 
     ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
-    cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
+    cbp += get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
     ctx = !(cbp   & 0x01) + 2 * !(cbp_b & 0x08);
-    cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
+    cbp += get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
     ctx = !(cbp_a & 0x08) + 2 * !(cbp   & 0x01);
-    cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
+    cbp += get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
     ctx = !(cbp   & 0x04) + 2 * !(cbp   & 0x02);
-    cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
+    cbp += get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
     return cbp;
 }
 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
@@ -898,22 +854,7 @@ static int decode_cabac_mb_cbp_chroma( H264Context *h) {
     if( cbp_b == 2 ) ctx += 2;
     return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
 }
-static int decode_cabac_mb_dqp( H264Context *h) {
-    int   ctx= h->last_qscale_diff != 0;
-    int   val = 0;
-
-    while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
-        ctx= 2+(ctx>>1);
-        val++;
-        if(val > 102) //prevent infinite loop
-            return INT_MIN;
-    }
 
-    if( val&0x01 )
-        return   (val + 1)>>1 ;
-    else
-        return -((val + 1)>>1);
-}
 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
     if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
         return 0;   /* 8x8 */
@@ -940,10 +881,6 @@ static int decode_cabac_b_mb_sub_type( H264Context *h ) {
     return type;
 }
 
-static inline int decode_cabac_mb_transform_size( H264Context *h ) {
-    return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
-}
-
 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
     int refa = h->ref_cache[list][scan8[n] - 1];
     int refb = h->ref_cache[list][scan8[n] - 8];
@@ -951,9 +888,9 @@ static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
     int ctx  = 0;
 
     if( h->slice_type_nos == FF_B_TYPE) {
-        if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
+        if( refa > 0 && !(h->direct_cache[scan8[n] - 1]&(MB_TYPE_DIRECT2>>1)) )
             ctx++;
-        if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
+        if( refb > 0 && !(h->direct_cache[scan8[n] - 8]&(MB_TYPE_DIRECT2>>1)) )
             ctx += 2;
     } else {
         if( refa > 0 )
@@ -972,22 +909,21 @@ static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
     return ref;
 }
 
-static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
-    int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
-               abs( h->mvd_cache[list][scan8[n] - 8][l] );
-    int ctxbase = (l == 0) ? 40 : 47;
+static int decode_cabac_mb_mvd( H264Context *h, int ctxbase, int amvd, int *mvda) {
     int mvd;
-    int ctx = (amvd>2) + (amvd>32);
 
-    if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
+    if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+((amvd-3)>>(INT_BIT-1))+((amvd-33)>>(INT_BIT-1))+2])){
+//    if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+(amvd>2)+(amvd>32)])){
+        *mvda= 0;
         return 0;
+    }
 
     mvd= 1;
-    ctx= 3;
-    while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
+    ctxbase+= 3;
+    while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase] ) ) {
+        if( mvd < 4 )
+            ctxbase++;
         mvd++;
-        if( ctx < 6 )
-            ctx++;
     }
 
     if( mvd >= 9 ) {
@@ -1001,13 +937,25 @@ static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
             }
         }
         while( k-- ) {
-            if( get_cabac_bypass( &h->cabac ) )
-                mvd += 1 << k;
+            mvd += get_cabac_bypass( &h->cabac )<<k;
         }
-    }
+        *mvda=mvd < 70 ? mvd : 70;
+    }else
+        *mvda=mvd;
     return get_cabac_bypass_sign( &h->cabac, -mvd );
 }
 
+#define DECODE_CABAC_MB_MVD( h,  list,  n )\
+{\
+    int amvd0 = h->mvd_cache[list][scan8[n] - 1][0] +\
+                h->mvd_cache[list][scan8[n] - 8][0];\
+    int amvd1 = h->mvd_cache[list][scan8[n] - 1][1] +\
+                h->mvd_cache[list][scan8[n] - 8][1];\
+\
+    mx += decode_cabac_mb_mvd( h, 40, amvd0, &mpx );\
+    my += decode_cabac_mb_mvd( h, 47, amvd1, &mpy );\
+}
+
 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
     int nza, nzb;
     int ctx = 0;
@@ -1223,54 +1171,17 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
 
 }
 
-#if !CONFIG_SMALL
-static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
-    decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
+static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
+    decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1);
 }
 
 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
     decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
 }
-#endif
-
-static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
-#if CONFIG_SMALL
-    decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
-#else
-    if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
-    else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
-#endif
-}
-
-static inline void compute_mb_neighbors(H264Context *h)
-{
-    MpegEncContext * const s = &h->s;
-    const int mb_xy  = h->mb_xy;
-    h->top_mb_xy     = mb_xy - s->mb_stride;
-    h->left_mb_xy[0] = mb_xy - 1;
-    if(FRAME_MBAFF){
-        const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
-        const int top_pair_xy      = pair_xy     - s->mb_stride;
-        const int top_mb_field_flag  = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
-        const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
-        const int curr_mb_field_flag = MB_FIELD;
-        const int bottom = (s->mb_y & 1);
-
-        if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
-            h->top_mb_xy -= s->mb_stride;
-        }
-        if (!left_mb_field_flag == curr_mb_field_flag) {
-            h->left_mb_xy[0] = pair_xy - 1;
-        }
-    } else if (FIELD_PICTURE) {
-        h->top_mb_xy -= s->mb_stride;
-    }
-    return;
-}
 
 /**
  * decodes a macroblock
- * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
+ * @return 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
  */
 int ff_h264_decode_mb_cabac(H264Context *h) {
     MpegEncContext * const s = &h->s;
@@ -1279,20 +1190,11 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
     int dct8x8_allowed= h->pps.transform_8x8_mode;
 
     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
-    
-    /* ffdshow custom code */
-    #if ENABLE_SLICE_MT_PATCH
-    if(s->avctx->thread_count > 1) {
-        s->dsp.clear_blocks(h->mb);
-    }
-    #endif
 
     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
     if( h->slice_type_nos != FF_I_TYPE ) {
         int skip;
         /* a skipped mb needs the aff flag from the following mb */
-        if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
-            predict_field_decoding_flag(h);
         if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
             skip = h->next_mb_skipped;
         else
@@ -1324,17 +1226,43 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
 
     h->prev_mb_skipped = 0;
 
-    compute_mb_neighbors(h);
+    fill_decode_neighbors(h, -(MB_FIELD));
 
     if( h->slice_type_nos == FF_B_TYPE ) {
-        mb_type = decode_cabac_mb_type_b( h );
-        if( mb_type < 23 ){
-            partition_count= b_mb_type_info[mb_type].partition_count;
-            mb_type=         b_mb_type_info[mb_type].type;
+        int ctx = 0;
+        assert(h->slice_type_nos == FF_B_TYPE);
+
+        if( !IS_DIRECT( h->left_type[0]-1 ) )
+            ctx++;
+        if( !IS_DIRECT( h->top_type-1 ) )
+            ctx++;
+
+        if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) ){
+            mb_type= 0; /* B_Direct_16x16 */
+        }else if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
+            mb_type= 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
         }else{
-            mb_type -= 23;
-            goto decode_intra_mb;
+            int bits;
+            bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
+            bits+= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
+            bits+= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
+            bits+= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
+            if( bits < 8 ){
+                mb_type= bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
+            }else if( bits == 13 ){
+                mb_type= decode_cabac_intra_mb_type(h, 32, 0);
+                goto decode_intra_mb;
+            }else if( bits == 14 ){
+                mb_type= 11; /* B_L1_L0_8x16 */
+            }else if( bits == 15 ){
+                mb_type= 22; /* B_8x8 */
+            }else{
+                bits= ( bits<<1 ) + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
+                mb_type= bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
+            }
         }
+            partition_count= b_mb_type_info[mb_type].partition_count;
+            mb_type=         b_mb_type_info[mb_type].type;
     } else if( h->slice_type_nos == FF_P_TYPE ) {
         if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
             /* P-type */
@@ -1409,7 +1337,7 @@ decode_intra_mb:
     if( IS_INTRA( mb_type ) ) {
         int i, pred_mode;
         if( IS_INTRA4x4( mb_type ) ) {
-            if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
+            if( dct8x8_allowed && get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] ) ) {
                 mb_type |= MB_TYPE_8x8DCT;
                 for( i = 0; i < 16; i+=4 ) {
                     int pred = pred_intra_mode( h, i );
@@ -1454,11 +1382,8 @@ decode_intra_mb:
                 h->ref_cache[1][scan8[4]] =
                 h->ref_cache[0][scan8[12]] =
                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
-                if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
                     for( i = 0; i < 4; i++ )
-                        if( IS_DIRECT(h->sub_mb_type[i]) )
-                            fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
-                }
+                        fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, (h->sub_mb_type[i]>>1)&0xFF, 1 );
             }
         } else {
             for( i = 0; i < 4; i++ ) {
@@ -1495,7 +1420,7 @@ decode_intra_mb:
             for(i=0; i<4; i++){
                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ];
                 if(IS_DIRECT(h->sub_mb_type[i])){
-                    fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
+                    fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 2);
                     continue;
                 }
 
@@ -1507,11 +1432,9 @@ decode_intra_mb:
                         int mx, my;
                         const int index= 4*i + block_width*j;
                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
-                        int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
-                        pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
-
-                        mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
-                        my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
+                        uint8_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
+                        pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
+                        DECODE_CABAC_MB_MVD( h, list, index)
                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
 
                         if(IS_SUB_8X8(sub_mb_type)){
@@ -1521,43 +1444,41 @@ decode_intra_mb:
                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
 
                             mvd_cache[ 1 ][0]=
-                            mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
+                            mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mpx;
                             mvd_cache[ 1 ][1]=
-                            mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
+                            mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= mpy;
                         }else if(IS_SUB_8X4(sub_mb_type)){
                             mv_cache[ 1 ][0]= mx;
                             mv_cache[ 1 ][1]= my;
 
-                            mvd_cache[ 1 ][0]= mx - mpx;
-                            mvd_cache[ 1 ][1]= my - mpy;
+                            mvd_cache[ 1 ][0]=  mpx;
+                            mvd_cache[ 1 ][1]= mpy;
                         }else if(IS_SUB_4X8(sub_mb_type)){
                             mv_cache[ 8 ][0]= mx;
                             mv_cache[ 8 ][1]= my;
 
-                            mvd_cache[ 8 ][0]= mx - mpx;
-                            mvd_cache[ 8 ][1]= my - mpy;
+                            mvd_cache[ 8 ][0]= mpx;
+                            mvd_cache[ 8 ][1]= mpy;
                         }
                         mv_cache[ 0 ][0]= mx;
                         mv_cache[ 0 ][1]= my;
 
-                        mvd_cache[ 0 ][0]= mx - mpx;
-                        mvd_cache[ 0 ][1]= my - mpy;
+                        mvd_cache[ 0 ][0]= mpx;
+                        mvd_cache[ 0 ][1]= mpy;
                     }
                 }else{
-                    uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
-                    uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
-                    p[0] = p[1] = p[8] = p[9] = 0;
-                    pd[0]= pd[1]= pd[8]= pd[9]= 0;
+                    fill_rectangle(h->mv_cache [list][ scan8[4*i] ], 2, 2, 8, 0, 4);
+                    fill_rectangle(h->mvd_cache[list][ scan8[4*i] ], 2, 2, 8, 0, 2);
                 }
             }
         }
     } else if( IS_DIRECT(mb_type) ) {
         ff_h264_pred_direct_motion(h, &mb_type);
-        fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
-        fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
+        fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 2);
+        fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 2);
         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
     } else {
-        int list, mx, my, i, mpx, mpy;
+        int list, i;
         if(IS_16X16(mb_type)){
             for(list=0; list<h->list_count; list++){
                 if(IS_DIR(mb_type, 0, list)){
@@ -1571,21 +1492,18 @@ decode_intra_mb:
                     }else
                         ref=0;
                         fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
-                }else
-                    fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
+                }
             }
             for(list=0; list<h->list_count; list++){
                 if(IS_DIR(mb_type, 0, list)){
-                    pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
-
-                    mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
-                    my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
+                    int mx,my,mpx,mpy;
+                    pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
+                    DECODE_CABAC_MB_MVD( h, list, 0)
                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
 
-                    fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
+                    fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack8to16(mpx,mpy), 2);
                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
-                }else
-                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
+                }
             }
         }
         else if(IS_16X8(mb_type)){
@@ -1609,15 +1527,15 @@ decode_intra_mb:
             for(list=0; list<h->list_count; list++){
                 for(i=0; i<2; i++){
                     if(IS_DIR(mb_type, i, list)){
-                        pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
-                        mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
-                        my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
+                        int mx,my,mpx,mpy;
+                        pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
+                        DECODE_CABAC_MB_MVD( h, list, 8*i)
                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
 
-                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
+                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack8to16(mpx,mpy), 2);
                         fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
                     }else{
-                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
+                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 2);
                         fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
                     }
                 }
@@ -1644,15 +1562,15 @@ decode_intra_mb:
             for(list=0; list<h->list_count; list++){
                 for(i=0; i<2; i++){
                     if(IS_DIR(mb_type, i, list)){
-                        pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
-                        mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
-                        my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
+                        int mx,my,mpx,mpy;
+                        pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
+                        DECODE_CABAC_MB_MVD( h, list, 4*i)
 
                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
-                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
+                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack8to16(mpx,mpy), 2);
                         fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
                     }else{
-                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
+                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 2);
                         fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
                     }
                 }
@@ -1674,15 +1592,13 @@ decode_intra_mb:
     h->cbp_table[mb_xy] = h->cbp = cbp;
 
     if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
-        if( decode_cabac_mb_transform_size( h ) )
-            mb_type |= MB_TYPE_8x8DCT;
+        mb_type |= MB_TYPE_8x8DCT * get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
     }
     s->current_picture.mb_type[mb_xy]= mb_type;
 
     if( cbp || IS_INTRA16x16( mb_type ) ) {
         const uint8_t *scan, *scan8x8, *dc_scan;
         const uint32_t *qmul;
-        int dqp;
 
         if(IS_INTERLACED(mb_type)){
             scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
@@ -1694,29 +1610,45 @@ decode_intra_mb:
             dc_scan= luma_dc_zigzag_scan;
         }
 
-        h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
-        if( dqp == INT_MIN ){
-            av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
-            return -1;
-        }
-        s->qscale += dqp;
-        if(((unsigned)s->qscale) > 51){
-            if(s->qscale<0) s->qscale+= 52;
-            else            s->qscale-= 52;
-        }
-        h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
-        h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
+        // decode_cabac_mb_dqp
+        if(get_cabac_noinline( &h->cabac, &h->cabac_state[60 + (h->last_qscale_diff != 0)])){
+            int val = 1;
+            int ctx= 2;
+
+            while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
+                ctx= 3;
+                val++;
+                if(val > 102){ //prevent infinite loop
+                    av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
+                    return -1;
+                }
+            }
+
+            if( val&0x01 )
+                val=   (val + 1)>>1 ;
+            else
+                val= -((val + 1)>>1);
+            h->last_qscale_diff = val;
+            s->qscale += val;
+            if(((unsigned)s->qscale) > 51){
+                if(s->qscale<0) s->qscale+= 52;
+                else            s->qscale-= 52;
+            }
+            h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
+            h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
+        }else
+            h->last_qscale_diff=0;
 
         if( IS_INTRA16x16( mb_type ) ) {
             int i;
             //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
-            decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
+            decode_cabac_residual_dc( h, h->mb, 0, 0, dc_scan, 16);
 
             if( cbp&15 ) {
                 qmul = h->dequant4_coeff[0][s->qscale];
                 for( i = 0; i < 16; i++ ) {
                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
-                    decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
+                    decode_cabac_residual_nondc(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
                 }
             } else {
                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
@@ -1726,7 +1658,7 @@ decode_intra_mb:
             for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
                 if( cbp & (1<<i8x8) ) {
                     if( IS_8x8DCT(mb_type) ) {
-                        decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
+                        decode_cabac_residual_nondc(h, h->mb + 64*i8x8, 5, 4*i8x8,
                             scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
                     } else {
                         qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
@@ -1734,7 +1666,7 @@ decode_intra_mb:
                             const int index = 4*i8x8 + i4x4;
                             //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
 //START_TIMER
-                            decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
+                            decode_cabac_residual_nondc(h, h->mb + 16*index, 2, index, scan, qmul, 16);
 //STOP_TIMER("decode_residual")
                         }
                     }
@@ -1749,7 +1681,7 @@ decode_intra_mb:
             int c;
             for( c = 0; c < 2; c++ ) {
                 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
-                decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
+                decode_cabac_residual_dc(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, 4);
             }
         }
 
@@ -1760,7 +1692,7 @@ decode_intra_mb:
                 for( i = 0; i < 4; i++ ) {
                     const int index = 16 + 4 * c + i;
                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
-                    decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
+                    decode_cabac_residual_nondc(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
                 }
             }
         } else {
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_cavlc.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_cavlc.c
index d2c4fbc05..ccca67f74 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_cavlc.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_cavlc.c
@@ -431,8 +431,13 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
                     level_code= prefix + get_bits(gb, 4); //part
             }else{
                 level_code= 30 + get_bits(gb, prefix-3); //part
-                if(prefix>=16)
+                if(prefix>=16){
+                    if(prefix > 25+3){
+                        av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
+                        return -1;
+                    }
                     level_code += (1<<(prefix-3))-4096;
+                }
             }
 
             if(trailing_ones < 3) level_code += 2;
@@ -535,13 +540,6 @@ int ff_h264_decode_mb_cavlc(H264Context *h){
     int dct8x8_allowed= h->pps.transform_8x8_mode;
 
     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
-    
-    /* ffdshow custom code */
-    #if ENABLE_SLICE_MT_PATCH
-    if(s->avctx->thread_count > 1) {
-        s->dsp.clear_blocks(h->mb);
-    }
-    #endif
 
     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
@@ -554,8 +552,6 @@ int ff_h264_decode_mb_cavlc(H264Context *h){
             if(FRAME_MBAFF && (s->mb_y&1) == 0){
                 if(s->mb_skip_run==0)
                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
-                else
-                    predict_field_decoding_flag(h);
             }
             decode_mb_skip(h);
             return 0;
@@ -630,6 +626,7 @@ decode_intra_mb:
         h->ref_count[1] <<= 1;
     }
 
+    fill_decode_neighbors(h, mb_type);
     fill_decode_caches(h, mb_type);
 
     //mb_pred
@@ -797,22 +794,18 @@ decode_intra_mb:
                                 return -1;
                             }
                         }
-                    }else
-                        val= LIST_NOT_USED&0xFF;
                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
+                    }
             }
             for(list=0; list<h->list_count; list++){
-                unsigned int val;
                 if(IS_DIR(mb_type, 0, list)){
                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
                     mx += get_se_golomb(&s->gb);
                     my += get_se_golomb(&s->gb);
                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
 
-                    val= pack16to32(mx,my);
-                }else
-                    val=0;
-                fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
+                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
+                }
             }
         }
         else if(IS_16X8(mb_type)){
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_direct.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_direct.c
index 43b0d0d8a..a0a313565 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_direct.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_direct.c
@@ -30,7 +30,6 @@
 #include "avcodec.h"
 #include "mpegvideo.h"
 #include "h264.h"
-#include "h264_mvpred.h"
 #include "rectangle.h"
 
 //#undef NDEBUG
@@ -71,7 +70,7 @@ static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field,
     Picture * const ref1 = &h->ref_list[1][0];
     int j, old_ref, rfield;
     int start= mbafi ? 16                      : 0;
-    int end  = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
+    int end  = mbafi ? 16+2*h->ref_count[0]    : h->ref_count[0];
     int interl= mbafi || s->picture_structure != PICT_FRAME;
 
     /* bogus; fills in for missing frames */
@@ -87,10 +86,10 @@ static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field,
                 poc= (poc&~3) + rfield + 1;
 
             for(j=start; j<end; j++){
-                if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
+                if(4*h->ref_list[0][j].frame_num + (h->ref_list[0][j].reference&3) == poc){
                     int cur_ref= mbafi ? (j-16)^field : j;
                     map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
-                    if(rfield == field)
+                    if(rfield == field || !interl)
                         map[list][old_ref] = cur_ref;
                     break;
                 }
@@ -120,42 +119,108 @@ void ff_h264_direct_ref_list_init(H264Context * const h){
 
     cur->mbaff= FRAME_MBAFF;
 
+    h->col_fieldoff= 0;
+    if(s->picture_structure == PICT_FRAME){
+        int cur_poc = s->current_picture_ptr->poc;
+        int *col_poc = h->ref_list[1]->field_poc;
+        h->col_parity= (FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc));
+        ref1sidx=sidx= h->col_parity;
+    }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){ // FL -> FL & differ parity
+        h->col_fieldoff= s->mb_stride*(2*(h->ref_list[1][0].reference) - 3);
+    }
+
     if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
         return;
 
     for(list=0; list<2; list++){
         fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
+        if(FRAME_MBAFF)
         for(field=0; field<2; field++)
             fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
     }
 }
 
-void ff_h264_pred_direct_motion(H264Context * const h, int *mb_type){
+static void pred_spatial_direct_motion(H264Context * const h, int *mb_type){
     MpegEncContext * const s = &h->s;
-    int b8_stride = h->b8_stride;
+    int b8_stride = 2;
     int b4_stride = h->b_stride;
     int mb_xy = h->mb_xy;
     int mb_type_col[2];
     const int16_t (*l1mv0)[2], (*l1mv1)[2];
     const int8_t *l1ref0, *l1ref1;
     const int is_b8x8 = IS_8X8(*mb_type);
-    unsigned int sub_mb_type;
+    unsigned int sub_mb_type= MB_TYPE_L0L1;
     int i8, i4;
+    int ref[2];
+    int mv[2];
+    int list;
 
     assert(h->ref_list[1][0].reference&3);
 
 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
 
+
+    /* ref = min(neighbors) */
+    for(list=0; list<2; list++){
+        int left_ref = h->ref_cache[list][scan8[0] - 1];
+        int top_ref  = h->ref_cache[list][scan8[0] - 8];
+        int refc = h->ref_cache[list][scan8[0] - 8 + 4];
+        const int16_t *C= h->mv_cache[list][ scan8[0] - 8 + 4];
+        if(refc == PART_NOT_AVAILABLE){
+            refc = h->ref_cache[list][scan8[0] - 8 - 1];
+            C    = h-> mv_cache[list][scan8[0] - 8 - 1];
+        }
+        ref[list] = FFMIN3((unsigned)left_ref, (unsigned)top_ref, (unsigned)refc);
+        if(ref[list] >= 0){
+            //this is just pred_motion() but with the cases removed that cannot happen for direct blocks
+            const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
+            const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
+
+            int match_count= (left_ref==ref[list]) + (top_ref==ref[list]) + (refc==ref[list]);
+            if(match_count > 1){ //most common
+                mv[list]= pack16to32(mid_pred(A[0], B[0], C[0]),
+                                     mid_pred(A[1], B[1], C[1]) );
+            }else {
+                assert(match_count==1);
+                if(left_ref==ref[list]){
+                    mv[list]= AV_RN32A(A);
+                }else if(top_ref==ref[list]){
+                    mv[list]= AV_RN32A(B);
+                }else{
+                    mv[list]= AV_RN32A(C);
+                }
+            }
+        }else{
+            int mask= ~(MB_TYPE_L0 << (2*list));
+            mv[list] = 0;
+            ref[list] = -1;
+            if(!is_b8x8)
+                *mb_type &= mask;
+            sub_mb_type &= mask;
+        }
+    }
+    if(ref[0] < 0 && ref[1] < 0){
+        ref[0] = ref[1] = 0;
+        if(!is_b8x8)
+            *mb_type |= MB_TYPE_L0L1;
+        sub_mb_type |= MB_TYPE_L0L1;
+    }
+
+    if(!(is_b8x8|mv[0]|mv[1])){
+        fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
+        fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
+        fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
+        fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
+        *mb_type= (*mb_type & ~(MB_TYPE_8x8|MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_P1L0|MB_TYPE_P1L1))|MB_TYPE_16x16|MB_TYPE_DIRECT2;
+        return;
+    }
+
     if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
         if(!IS_INTERLACED(*mb_type)){                    //     AFR/FR    -> AFL/FL
-            int cur_poc = s->current_picture_ptr->poc;
-            int *col_poc = h->ref_list[1]->field_poc;
-            int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
-            mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
+            mb_xy= s->mb_x + ((s->mb_y&~1) + h->col_parity)*s->mb_stride;
             b8_stride = 0;
-        }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
-            int fieldoff= 2*(h->ref_list[1][0].reference)-3;
-            mb_xy += s->mb_stride*fieldoff;
+        }else{
+            mb_xy += h->col_fieldoff; // non zero for FL -> FL & differ parity
         }
         goto single_col;
     }else{                                               // AFL/AFR/FR/FL -> AFR/FR
@@ -163,99 +228,60 @@ void ff_h264_pred_direct_motion(H264Context * const h, int *mb_type){
             mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
             mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
-            b8_stride *= 3;
+            b8_stride = 2+4*s->mb_stride;
             b4_stride *= 6;
-            //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
+
+            sub_mb_type |= MB_TYPE_16x16|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
             if(    (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
                 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
                 && !is_b8x8){
-                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
-                *mb_type   |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
+                *mb_type   |= MB_TYPE_16x8 |MB_TYPE_DIRECT2; /* B_16x8 */
             }else{
-                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
-                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
+                *mb_type   |= MB_TYPE_8x8;
             }
         }else{                                           //     AFR/FR    -> AFR/FR
 single_col:
             mb_type_col[0] =
             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
-            if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
-                /* FIXME save sub mb types from previous frames (or derive from MVs)
-                * so we know exactly what block size to use */
-                sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
-                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
-            }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
-                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
-                *mb_type   |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
+
+            sub_mb_type |= MB_TYPE_16x16|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
+            if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
+                *mb_type   |= MB_TYPE_16x16|MB_TYPE_DIRECT2; /* B_16x16 */
+            }else if(!is_b8x8 && (mb_type_col[0] & (MB_TYPE_16x8|MB_TYPE_8x16))){
+                *mb_type   |= MB_TYPE_DIRECT2 | (mb_type_col[0] & (MB_TYPE_16x8|MB_TYPE_8x16));
             }else{
-                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
-                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
+                if(!h->sps.direct_8x8_inference_flag){
+                    /* FIXME save sub mb types from previous frames (or derive from MVs)
+                    * so we know exactly what block size to use */
+                    sub_mb_type += (MB_TYPE_8x8-MB_TYPE_16x16); /* B_SUB_4x4 */
+                }
+                *mb_type   |= MB_TYPE_8x8;
             }
         }
     }
 
     l1mv0  = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
     l1mv1  = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
-    l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
-    l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
+    l1ref0 = &h->ref_list[1][0].ref_index [0][4*mb_xy];
+    l1ref1 = &h->ref_list[1][0].ref_index [1][4*mb_xy];
     if(!b8_stride){
         if(s->mb_y&1){
-            l1ref0 += h->b8_stride;
-            l1ref1 += h->b8_stride;
+            l1ref0 += 2;
+            l1ref1 += 2;
             l1mv0  +=  2*b4_stride;
             l1mv1  +=  2*b4_stride;
         }
     }
 
-    if(h->direct_spatial_mv_pred){
-        int ref[2];
-        int mv[2][2];
-        int list;
-
-        /* FIXME interlacing + spatial direct uses wrong colocated block positions */
-
-        /* ref = min(neighbors) */
-        for(list=0; list<2; list++){
-            int refa = h->ref_cache[list][scan8[0] - 1];
-            int refb = h->ref_cache[list][scan8[0] - 8];
-            int refc = h->ref_cache[list][scan8[0] - 8 + 4];
-            if(refc == PART_NOT_AVAILABLE)
-                refc = h->ref_cache[list][scan8[0] - 8 - 1];
-            ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
-            if(ref[list] < 0)
-                ref[list] = -1;
-        }
-
-        if(ref[0] < 0 && ref[1] < 0){
-            ref[0] = ref[1] = 0;
-            mv[0][0] = mv[0][1] =
-            mv[1][0] = mv[1][1] = 0;
-        }else{
-            for(list=0; list<2; list++){
-                if(ref[list] >= 0)
-                    pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
-                else
-                    mv[list][0] = mv[list][1] = 0;
-            }
-        }
-
-        if(ref[1] < 0){
-            if(!is_b8x8)
-                *mb_type &= ~MB_TYPE_L1;
-            sub_mb_type &= ~MB_TYPE_L1;
-        }else if(ref[0] < 0){
-            if(!is_b8x8)
-                *mb_type &= ~MB_TYPE_L0;
-            sub_mb_type &= ~MB_TYPE_L0;
-        }
 
         if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
+            int n=0;
             for(i8=0; i8<4; i8++){
                 int x8 = i8&1;
                 int y8 = i8>>1;
                 int xy8 = x8+y8*b8_stride;
                 int xy4 = 3*x8+y8*b4_stride;
-                int a=0, b=0;
+                int a,b;
 
                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
                     continue;
@@ -263,40 +289,46 @@ single_col:
 
                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
-                if(!IS_INTRA(mb_type_col[y8])
+                if(!IS_INTRA(mb_type_col[y8]) && !h->ref_list[1][0].long_ref
                    && (   (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
                        || (l1ref0[xy8]  < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
+                    a=b=0;
                     if(ref[0] > 0)
-                        a= pack16to32(mv[0][0],mv[0][1]);
+                        a= mv[0];
                     if(ref[1] > 0)
-                        b= pack16to32(mv[1][0],mv[1][1]);
+                        b= mv[1];
+                    n++;
                 }else{
-                    a= pack16to32(mv[0][0],mv[0][1]);
-                    b= pack16to32(mv[1][0],mv[1][1]);
+                    a= mv[0];
+                    b= mv[1];
                 }
                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
             }
+            if(!is_b8x8 && !(n&3))
+                *mb_type= (*mb_type & ~(MB_TYPE_8x8|MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_P1L0|MB_TYPE_P1L1))|MB_TYPE_16x16|MB_TYPE_DIRECT2;
         }else if(IS_16X16(*mb_type)){
-            int a=0, b=0;
+            int a,b;
 
             fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
             fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
-            if(!IS_INTRA(mb_type_col[0])
+            if(!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref
                && (   (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
                    || (l1ref0[0]  < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
-                       && (h->x264_build>33 || !h->x264_build)))){
+                       && h->x264_build>33U))){
+                a=b=0;
                 if(ref[0] > 0)
-                    a= pack16to32(mv[0][0],mv[0][1]);
+                    a= mv[0];
                 if(ref[1] > 0)
-                    b= pack16to32(mv[1][0],mv[1][1]);
+                    b= mv[1];
             }else{
-                a= pack16to32(mv[0][0],mv[0][1]);
-                b= pack16to32(mv[1][0],mv[1][1]);
+                a= mv[0];
+                b= mv[1];
             }
             fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
             fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
         }else{
+            int n=0;
             for(i8=0; i8<4; i8++){
                 const int x8 = i8&1;
                 const int y8 = i8>>1;
@@ -305,16 +337,17 @@ single_col:
                     continue;
                 h->sub_mb_type[i8] = sub_mb_type;
 
-                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
-                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
+                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, mv[0], 4);
+                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, mv[1], 4);
                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
 
+                assert(b8_stride==2);
                 /* col_zero_flag */
-                if(!IS_INTRA(mb_type_col[0]) && (   l1ref0[x8 + y8*b8_stride] == 0
-                                              || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
-                                                  && (h->x264_build>33 || !h->x264_build)))){
-                    const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
+                if(!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref && (   l1ref0[i8] == 0
+                                              || (l1ref0[i8] < 0 && l1ref1[i8] == 0
+                                                  && h->x264_build>33U))){
+                    const int16_t (*l1mv)[2]= l1ref0[i8] == 0 ? l1mv0 : l1mv1;
                     if(IS_SUB_8X8(sub_mb_type)){
                         const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
                         if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
@@ -322,36 +355,119 @@ single_col:
                                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
                             if(ref[1] == 0)
                                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
+                            n+=4;
                         }
-                    }else
+                    }else{
+                        int m=0;
                     for(i4=0; i4<4; i4++){
                         const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
                         if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
                             if(ref[0] == 0)
-                                *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
+                                AV_ZERO32(h->mv_cache[0][scan8[i8*4+i4]]);
                             if(ref[1] == 0)
-                                *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
+                                AV_ZERO32(h->mv_cache[1][scan8[i8*4+i4]]);
+                            m++;
                         }
                     }
+                    if(!(m&3))
+                        h->sub_mb_type[i8]+= MB_TYPE_16x16 - MB_TYPE_8x8;
+                    n+=m;
+                    }
                 }
             }
+            if(!is_b8x8 && !(n&15))
+                *mb_type= (*mb_type & ~(MB_TYPE_8x8|MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_P1L0|MB_TYPE_P1L1))|MB_TYPE_16x16|MB_TYPE_DIRECT2;
         }
-    }else{ /* direct temporal mv pred */
+}
+
+static void pred_temp_direct_motion(H264Context * const h, int *mb_type){
+    MpegEncContext * const s = &h->s;
+    int b8_stride = 2;
+    int b4_stride = h->b_stride;
+    int mb_xy = h->mb_xy;
+    int mb_type_col[2];
+    const int16_t (*l1mv0)[2], (*l1mv1)[2];
+    const int8_t *l1ref0, *l1ref1;
+    const int is_b8x8 = IS_8X8(*mb_type);
+    unsigned int sub_mb_type;
+    int i8, i4;
+
+    assert(h->ref_list[1][0].reference&3);
+
+    if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
+        if(!IS_INTERLACED(*mb_type)){                    //     AFR/FR    -> AFL/FL
+            mb_xy= s->mb_x + ((s->mb_y&~1) + h->col_parity)*s->mb_stride;
+            b8_stride = 0;
+        }else{
+            mb_xy += h->col_fieldoff; // non zero for FL -> FL & differ parity
+        }
+        goto single_col;
+    }else{                                               // AFL/AFR/FR/FL -> AFR/FR
+        if(IS_INTERLACED(*mb_type)){                     // AFL       /FL -> AFR/FR
+            mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
+            mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
+            mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
+            b8_stride = 2+4*s->mb_stride;
+            b4_stride *= 6;
+
+            sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
+
+            if(    (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
+                && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
+                && !is_b8x8){
+                *mb_type   |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
+            }else{
+                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
+            }
+        }else{                                           //     AFR/FR    -> AFR/FR
+single_col:
+            mb_type_col[0] =
+            mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
+
+            sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
+            if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
+                *mb_type   |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
+            }else if(!is_b8x8 && (mb_type_col[0] & (MB_TYPE_16x8|MB_TYPE_8x16))){
+                *mb_type   |= MB_TYPE_L0L1|MB_TYPE_DIRECT2 | (mb_type_col[0] & (MB_TYPE_16x8|MB_TYPE_8x16));
+            }else{
+                if(!h->sps.direct_8x8_inference_flag){
+                    /* FIXME save sub mb types from previous frames (or derive from MVs)
+                    * so we know exactly what block size to use */
+                    sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
+                }
+                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
+            }
+        }
+    }
+
+    l1mv0  = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
+    l1mv1  = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
+    l1ref0 = &h->ref_list[1][0].ref_index [0][4*mb_xy];
+    l1ref1 = &h->ref_list[1][0].ref_index [1][4*mb_xy];
+    if(!b8_stride){
+        if(s->mb_y&1){
+            l1ref0 += 2;
+            l1ref1 += 2;
+            l1mv0  +=  2*b4_stride;
+            l1mv1  +=  2*b4_stride;
+        }
+    }
+
+    {
         const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
         const int *dist_scale_factor = h->dist_scale_factor;
-        int ref_offset= 0;
+        int ref_offset;
 
         if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
             map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
             map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
             dist_scale_factor   =h->dist_scale_factor_field[s->mb_y&1];
         }
-        if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
-            ref_offset += 16;
+        ref_offset = (h->ref_list[1][0].mbaff<<4) & (mb_type_col[0]>>3); //if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0])) ref_offset=16 else 0
 
         if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
-            /* FIXME assumes direct_8x8_inference == 1 */
             int y_shift  = 2*!IS_INTERLACED(*mb_type);
+            assert(h->sps.direct_8x8_inference_flag);
 
             for(i8=0; i8<4; i8++){
                 const int x8 = i8&1;
@@ -434,11 +550,12 @@ single_col:
                     continue;
                 }
 
-                ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
+                assert(b8_stride == 2);
+                ref0 = l1ref0[i8];
                 if(ref0 >= 0)
-                    ref0 = map_col_to_list0[0][ref0];
+                    ref0 = map_col_to_list0[0][ref0 + ref_offset];
                 else{
-                    ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
+                    ref0 = map_col_to_list0[1][l1ref1[i8] + ref_offset];
                     l1mv= l1mv1;
                 }
                 scale = dist_scale_factor[ref0];
@@ -456,10 +573,18 @@ single_col:
                     int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
                     mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
                     mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
-                    *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
-                        pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
+                    AV_WN32A(h->mv_cache[1][scan8[i8*4+i4]],
+                        pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]));
                 }
             }
         }
     }
 }
+
+void ff_h264_pred_direct_motion(H264Context * const h, int *mb_type){
+    if(h->direct_spatial_mv_pred){
+        pred_spatial_direct_motion(h, mb_type);
+    }else{
+        pred_temp_direct_motion(h, mb_type);
+    }
+}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_dxva.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_dxva.c
index 1e882039c..2c2b6b240 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_dxva.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_dxva.c
@@ -49,16 +49,16 @@ static void fill_dxva_slice_long(H264Context *h){
 	
 	// Fill prediction weights
 	memset (pSlice->Weights, 0, sizeof(pSlice->Weights));
-	for(j=0; j<2; j++){
-		for(i=0; i<h->ref_count[j]; i++){
+	for(i=0; i<2; i++){
+		for(j=0; j<h->ref_count[i]; j++){
 			//         L0&L1          Y,Cb,Cr  Weight,Offset
 			// Weights  [2]    [32]     [3]         [2]
-			pSlice->Weights[j][i][0][0] = h->luma_weight[j][i];
-			pSlice->Weights[j][i][0][1] = h->luma_offset[j][i];
+			pSlice->Weights[i][j][0][0] = h->luma_weight[j][i][0];
+			pSlice->Weights[i][j][0][1] = h->luma_weight[j][i][1];
 
 			for(k=0; k<2; k++){
-				pSlice->Weights[j][i][k+1][0] = h->chroma_weight[j][i][k];
-				pSlice->Weights[j][i][k+1][1] = h->chroma_offset[j][i][k];
+				pSlice->Weights[i][j][k+1][0] = h->chroma_weight[j][i][k][0];
+				pSlice->Weights[i][j][k+1][1] = h->chroma_weight[j][i][k][1];
 			}
 		}
 	}
@@ -123,15 +123,15 @@ int decode_slice_header_noexecute (H264Context *h){
         s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
     }
 
-	// ==> Start patch MPC DXVA
+    // ==> Start patch MPC DXVA
     h->first_mb_in_slice= get_ue_golomb(&s->gb);
-	// <== End patch MPC DXVA
+    // <== End patch MPC DXVA
 
     if(h->first_mb_in_slice == 0){ //FIXME better field boundary detection
         if(h0->current_slice && FIELD_PICTURE){
-			// ==> Start patch MPC DXVA
+            // ==> Start patch MPC DXVA
             field_end_noexecute(h);
-			// <== End patch MPC DXVA
+            // <== End patch MPC DXVA
         }
 
         h0->current_slice = 0;
@@ -162,11 +162,6 @@ int decode_slice_header_noexecute (H264Context *h){
     h->slice_type_nos= slice_type & 3;
 
     s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
-    if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
-        av_log(h->s.avctx, AV_LOG_ERROR,
-               "B picture before any references, skipping\n");
-        return -1;
-    }
 
     pps_id= get_ue_golomb(&s->gb);
     if(pps_id>=MAX_PPS_COUNT){
@@ -194,7 +189,6 @@ int decode_slice_header_noexecute (H264Context *h){
     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
 
     h->b_stride=  s->mb_width*4;
-    h->b8_stride= s->mb_width*2;
 
     s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
     if(h->sps.frame_mbs_only_flag)
@@ -203,7 +197,8 @@ int decode_slice_header_noexecute (H264Context *h){
         s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
 
     if (s->context_initialized
-        && (   s->width != s->avctx->width || s->height != s->avctx->height)) {
+        && (   s->width != s->avctx->width || s->height != s->avctx->height
+            || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
         if(h != h0)
             return -1;   // width / height changed during parallelized decoding
         free_tables(h);
@@ -229,16 +224,11 @@ int decode_slice_header_noexecute (H264Context *h){
         }
 
         if(h->sps.timing_info_present_flag){
-            #if __STDC_VERSION__ >= 199901L
-            s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
-            #else
-            s->avctx->time_base.num = h->sps.num_units_in_tick * 2;
-            s->avctx->time_base.den = h->sps.time_scale;
-            #endif
-            if(h->x264_build > 0 && h->x264_build < 44)
-                s->avctx->time_base.den *= 2;
+            int64_t den= h->sps.time_scale;
+            if(h->x264_build < 44U)
+                den *= 2;
             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
-                      s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
+                      h->sps.num_units_in_tick * 2, den, 1<<30);
         }
 
         if (MPV_common_init(s) < 0)
@@ -257,7 +247,7 @@ int decode_slice_header_noexecute (H264Context *h){
             c->sps = h->sps;
             c->pps = h->pps;
             init_scan_tables(c);
-            clone_tables(c, h);
+            clone_tables(c, h, i);
         }
 
         for(i = 0; i < s->avctx->thread_count; i++)
@@ -364,8 +354,8 @@ int decode_slice_header_noexecute (H264Context *h){
             return -1;
         }
     }
-    if(h != h0)
-        clone_slice(h, h0, 0); /* ffdshow custom code */
+    //if(h != h0)
+    //    clone_slice(h, h0); /* ffdshow custom code */
 
     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
 
@@ -461,9 +451,9 @@ int decode_slice_header_noexecute (H264Context *h){
     if(   (h->pps.weighted_pred          && h->slice_type_nos == FF_P_TYPE )
        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
         pred_weight_table(h);
-    else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
-        implicit_weight_table(h);
-    else {
+    else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){
+        implicit_weight_table(h, -1);
+    }else {
         h->use_weight = 0;
         for (i = 0; i < 2; i++) {
             h->luma_weight_flag[i]   = 0;
@@ -474,9 +464,15 @@ int decode_slice_header_noexecute (H264Context *h){
     if(h->nal_ref_idc)
         ff_h264_decode_ref_pic_marking(h0, &s->gb);
 
-    if(FRAME_MBAFF)
+    if(FRAME_MBAFF){
         ff_h264_fill_mbaff_ref_list(h);
 
+        if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){
+            implicit_weight_table(h, 0);
+            implicit_weight_table(h, 1);
+        }
+    }
+
     if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
         ff_h264_direct_dist_scale_factor(h);
     ff_h264_direct_ref_list_init(h);
@@ -615,7 +611,7 @@ int decode_slice_header_noexecute (H264Context *h){
 
     s->avctx->refs= h->sps.ref_frame_count;
 
-	fill_dxva_slice_long(h);
+    fill_dxva_slice_long(h);
 
     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
@@ -627,7 +623,7 @@ int decode_slice_header_noexecute (H264Context *h){
                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
                h->ref_count[0], h->ref_count[1],
                s->qscale,
-               h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
+               h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
                h->use_weight,
                h->use_weight==1 && h->use_weight_chroma ? "c" : "",
                h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
@@ -638,7 +634,7 @@ int decode_slice_header_noexecute (H264Context *h){
 }
 
 
-static int decode_nal_units_noexecute(H264Context *h, uint8_t *buf, int buf_size){
+static int decode_nal_units_noexecute(H264Context *h, const uint8_t *buf, int buf_size){
     MpegEncContext * const s = &h->s;
     AVCodecContext * const avctx= s->avctx;
     int buf_index=0;
@@ -737,11 +733,14 @@ static int decode_nal_units_noexecute(H264Context *h, uint8_t *buf, int buf_size
             hx->inter_gb_ptr= &hx->s.gb;
             hx->s.data_partitioning = 0;
 
-			// ==> Start patch MPC DXVA
-			hx->ref_pic_flag = (h->nal_ref_idc != 0);
+            // ==> Start patch MPC DXVA
+            hx->ref_pic_flag = (h->nal_ref_idc != 0);
             if((err = decode_slice_header_noexecute(hx)))
                break;
-			// <== End patch MPC DXVA
+            // <== End patch MPC DXVA
+
+            avctx->profile = hx->sps.profile_idc;
+            avctx->level   = hx->sps.level_idc;
 
             s->current_picture_ptr->key_frame |=
                     (hx->nal_unit_type == NAL_IDR_SLICE) ||
@@ -758,10 +757,14 @@ static int decode_nal_units_noexecute(H264Context *h, uint8_t *buf, int buf_size
             init_get_bits(&hx->s.gb, ptr, bit_length);
             hx->intra_gb_ptr=
             hx->inter_gb_ptr= NULL;
-			// ==> Start patch MPC DXVA
+            // ==> Start patch MPC DXVA
             if ((err = decode_slice_header_noexecute(hx)) < 0)
                 break;
-			// <== End patch MPC DXVA
+            // <== End patch MPC DXVA
+
+            avctx->profile = hx->sps.profile_idc;
+            avctx->level   = hx->sps.level_idc;
+
             hx->s.data_partitioning = 1;
 
             break;
@@ -814,9 +817,9 @@ static int decode_nal_units_noexecute(H264Context *h, uint8_t *buf, int buf_size
         }
 
         if(context_count == h->max_contexts) {
-			// ==> Start patch MPC DXVA
-			// execute_decode_slices(h, context_count);
-			// <== End patch MPC DXVA
+            // ==> Start patch MPC DXVA
+            // execute_decode_slices(h, context_count);
+            // <== End patch MPC DXVA
             context_count = 0;
         }
 
@@ -841,18 +844,13 @@ static int decode_nal_units_noexecute(H264Context *h, uint8_t *buf, int buf_size
 }
 
 
-int av_h264_decode_frame(AVCodecContext *avctx,
-						 int* nOutPOC, int64_t* rtStartTime,
-                             const uint8_t *buf, int buf_size)
+int av_h264_decode_frame(struct AVCodecContext* avctx, int* nOutPOC, int64_t* rtStartTime, const uint8_t *buf, int buf_size)
 {
     H264Context *h = avctx->priv_data;
     MpegEncContext *s = &h->s;
+    //AVFrame *pict = data;
     int buf_index;
 
-	// ==> Start patch MPC DXVA
-	if (nOutPOC) *nOutPOC = -1;
-	// <== End patch MPC DXVA
-
     s->flags= avctx->flags;
     s->flags2= avctx->flags2;
 
@@ -874,10 +872,12 @@ int av_h264_decode_frame(AVCodecContext *avctx,
             h->delayed_pic[i] = h->delayed_pic[i+1];
 
         if(out){
-			// ==> Start patch MPC DXVA
-			if (nOutPOC)	 *nOutPOC		= out->poc;
-			if (rtStartTime) *rtStartTime	= out->reordered_opaque;
-			// <== End patch MPC DXVA
+            // ==> Start patch MPC DXVA
+            //*data_size = sizeof(AVFrame);
+            //*pict= *(AVFrame*)out;
+            if (nOutPOC)	 *nOutPOC		= out->poc;
+            if (rtStartTime) *rtStartTime	= out->reordered_opaque;
+            // <== End patch MPC DXVA
         }
 
         return 0;
@@ -893,7 +893,7 @@ int av_h264_decode_frame(AVCodecContext *avctx,
 
         for (i = 0; i < cnt; i++) {
             nalsize = AV_RB16(p) + 2;
-            if(decode_nal_units(h, p, nalsize)  < 0) {
+            if(decode_nal_units(h, p, nalsize)  != nalsize) {
                 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
                 return -1;
             }
@@ -922,9 +922,9 @@ int av_h264_decode_frame(AVCodecContext *avctx,
     }
     /* ffdshow custom code (end) */
 
-	// ==> Start patch MPC DXVA
-	buf_index=decode_nal_units_noexecute(h, buf, buf_size);
-	// <== End patch MPC DXVA
+    // ==> Start patch MPC DXVA
+    buf_index=decode_nal_units_noexecute(h, buf, buf_size);
+    // <== End patch MPC DXVA
     if(buf_index < 0)
         return -1;
 
@@ -939,15 +939,15 @@ int av_h264_decode_frame(AVCodecContext *avctx,
         Picture *cur = s->current_picture_ptr;
         int i, pics, out_of_order, out_idx;
 
-		// ==> Start patch MPC DXVA
-		field_end_noexecute(h);
-		// <== End patch MPC DXVA
+        // ==> Start patch MPC DXVA
+        field_end_noexecute(h);
+        // <== End patch MPC DXVA
 
         if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
             /* Wait for second field. */
-			// ==> Start patch MPC DXVA
-			//*data_size = 0;
-			// <== End patch MPC DXVA
+            // ==> Start patch MPC DXVA
+            //*data_size = 0;
+            // <== End patch MPC DXVA
 
         } else {
             cur->interlaced_frame = 0;
@@ -1069,34 +1069,29 @@ int av_h264_decode_frame(AVCodecContext *avctx,
                 for(i=out_idx; h->delayed_pic[i]; i++)
                     h->delayed_pic[i] = h->delayed_pic[i+1];
             }
-			// ==> Start patch MPC DXVA
+            // ==> Start patch MPC DXVA
             if(!out_of_order && pics > s->avctx->has_b_frames){
-
+                //*data_size = sizeof(AVFrame);
 
                 if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
                     h->outputed_poc = INT_MIN;
                 } else
                     h->outputed_poc = out->poc;
-				if (nOutPOC)     *nOutPOC		= out->poc;
-				if (rtStartTime) *rtStartTime	= out->reordered_opaque;
+                //*pict= *(AVFrame*)out;
+                if (nOutPOC)     *nOutPOC		= out->poc;
+                if (rtStartTime) *rtStartTime	= out->reordered_opaque;
+                
             }else{
                 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
             }
-			// <== End patch MPC DXVA
+            // <== End patch MPC DXVA
         }
     }
 
-	// ==> Start patch MPC DXVA
+    // ==> Start patch MPC DXVA
     //assert(pict->data[0] || !*data_size);
     //ff_print_debug_info(s, pict);
-	// <== End patch MPC DXVA
-
-    /* ffdshow custom code (begin) */
-    //pict->h264_poc_decoded = h->poc_lsb + h->poc_msb;
-    //pict->h264_poc_outputed = h->outputed_poc;
-    //pict->h264_frame_num_decoded = h-> frame_num;
-    //pict->h264_max_frame_num = 1 << h->sps.log2_max_frame_num;
-    /* ffdshow custom code (end) */
+    // <== End patch MPC DXVA
 
     return get_consumed_bytes(s, buf_index, buf_size);
 }
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_loopfilter.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_loopfilter.c
index 1a2ceb816..3129e4889 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_loopfilter.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_loopfilter.c
@@ -25,6 +25,7 @@
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
+#include "libavutil/intreadwrite.h"
 #include "internal.h"
 #include "dsputil.h"
 #include "avcodec.h"
@@ -111,9 +112,9 @@ static void av_always_inline filter_mb_edgev( uint8_t *pix, int stride, int16_t
         tc[1] = tc0_table[index_a][bS[1]];
         tc[2] = tc0_table[index_a][bS[2]];
         tc[3] = tc0_table[index_a][bS[3]];
-        h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
+        h->h264dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
     } else {
-        h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
+        h->h264dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
     }
 }
 static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) {
@@ -128,9 +129,9 @@ static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, int16_t
         tc[1] = tc0_table[index_a][bS[1]]+1;
         tc[2] = tc0_table[index_a][bS[2]]+1;
         tc[3] = tc0_table[index_a][bS[3]]+1;
-        h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
+        h->h264dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
     } else {
-        h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
+        h->h264dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
     }
 }
 
@@ -281,9 +282,9 @@ static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, int16_t
         tc[1] = tc0_table[index_a][bS[1]];
         tc[2] = tc0_table[index_a][bS[2]];
         tc[3] = tc0_table[index_a][bS[3]];
-        h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
+        h->h264dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
     } else {
-        h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
+        h->h264dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
     }
 }
 
@@ -299,9 +300,9 @@ static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, int16_t
         tc[1] = tc0_table[index_a][bS[1]]+1;
         tc[2] = tc0_table[index_a][bS[2]]+1;
         tc[3] = tc0_table[index_a][bS[3]]+1;
-        h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
+        h->h264dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
     } else {
-        h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
+        h->h264dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
     }
 }
 
@@ -313,7 +314,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
 
     mb_xy = h->mb_xy;
 
-    if(!h->top_type || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
+    if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
         ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
         return;
     }
@@ -340,15 +341,13 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
         int16_t bS4[4] = {4,4,4,4};
         int16_t bS3[4] = {3,3,3,3};
         int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
+        if(left_type)
+            filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h);
         if( IS_8x8DCT(mb_type) ) {
-            if(left_type)
-                filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h);
             filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h);
             filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
             filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
         } else {
-            if(left_type)
-                filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h);
             filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h);
             filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h);
             filter_mb_edgev( &img_y[4*3], linesize, bS3, qp, h);
@@ -369,49 +368,44 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
         filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
         return;
     } else {
-        DECLARE_ALIGNED_8(int16_t, bS)[2][4][4];
-        uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
+        LOCAL_ALIGNED_8(int16_t, bS, [2], [4][4]);
         int edges;
         if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
             edges = 4;
-            bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
+            AV_WN64A(bS[0][0], 0x0002000200020002ULL);
+            AV_WN64A(bS[0][2], 0x0002000200020002ULL);
+            AV_WN64A(bS[1][0], 0x0002000200020002ULL);
+            AV_WN64A(bS[1][2], 0x0002000200020002ULL);
         } else {
-            int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
-                             (mb_type & MB_TYPE_16x8) ? 1 : 0;
-            int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
-                             && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16))
-                             ? 3 : 0;
-            int step = IS_8x8DCT(mb_type) ? 2 : 1;
-            edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
-            s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
+            int mask_edge1 = (3*(((5*mb_type)>>5)&1)) | (mb_type>>4); //(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : (mb_type & MB_TYPE_16x8) ? 1 : 0;
+            int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0;
+            int step =  1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1;
+            edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
+            h->h264dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
                                               h->list_count==2, edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
         }
-        if( IS_INTRA(h->left_type[0]) )
-            bSv[0][0] = 0x0004000400040004ULL;
+        if( IS_INTRA(left_type) )
+            AV_WN64A(bS[0][0], 0x0004000400040004ULL);
         if( IS_INTRA(h->top_type) )
-            bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
+            AV_WN64A(bS[1][0], FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL);
 
 #define FILTER(hv,dir,edge)\
-        if(bSv[dir][edge]) {\
+        if(AV_RN64A(bS[dir][edge])) {                                   \
             filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h );\
             if(!(edge&1)) {\
                 filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
                 filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
             }\
         }
+        if(left_type)
+            FILTER(v,0,0);
         if( edges == 1 ) {
-            if(left_type)
-                FILTER(v,0,0);
             FILTER(h,1,0);
         } else if( IS_8x8DCT(mb_type) ) {
-            if(left_type)
-                FILTER(v,0,0);
             FILTER(v,0,2);
             FILTER(h,1,0);
             FILTER(h,1,2);
         } else {
-            if(left_type)
-                FILTER(v,0,0);
             FILTER(v,0,1);
             FILTER(v,0,2);
             FILTER(v,0,3);
@@ -427,9 +421,10 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
 static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){
     int v;
 
-    v = h->ref_cache[0][b_idx] != h->ref_cache[0][bn_idx] |
-        h->mv_cache[0][b_idx][0] - h->mv_cache[0][bn_idx][0] + 3 >= 7U |
-        FFABS( h->mv_cache[0][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= mvy_limit;
+    v= h->ref_cache[0][b_idx] != h->ref_cache[0][bn_idx];
+    if(!v && h->ref_cache[0][b_idx]!=-1)
+        v= h->mv_cache[0][b_idx][0] - h->mv_cache[0][bn_idx][0] + 3 >= 7U |
+           FFABS( h->mv_cache[0][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= mvy_limit;
 
     if(h->list_count==2){
         if(!v)
@@ -482,16 +477,23 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
             int j;
 
             for(j=0; j<2; j++, mbn_xy += s->mb_stride){
-                DECLARE_ALIGNED_8(int16_t, bS)[4];
+                DECLARE_ALIGNED(8, int16_t, bS)[4];
                 int qp;
                 if( IS_INTRA(mb_type|s->current_picture.mb_type[mbn_xy]) ) {
-                    *(uint64_t*)bS= 0x0003000300030003ULL;
+                    AV_WN64A(bS, 0x0003000300030003ULL);
                 } else {
-                    const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 4+3*8; //FIXME  8x8dct?
+                    if(!CABAC && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])){
+                        bS[0]= 1+((h->cbp_table[mbn_xy] & 4)||h->non_zero_count_cache[scan8[0]+0]);
+                        bS[1]= 1+((h->cbp_table[mbn_xy] & 4)||h->non_zero_count_cache[scan8[0]+1]);
+                        bS[2]= 1+((h->cbp_table[mbn_xy] & 8)||h->non_zero_count_cache[scan8[0]+2]);
+                        bS[3]= 1+((h->cbp_table[mbn_xy] & 8)||h->non_zero_count_cache[scan8[0]+3]);
+                    }else{
+                    const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 4+3*8;
                     int i;
                     for( i = 0; i < 4; i++ ) {
                         bS[i] = 1 + !!(h->non_zero_count_cache[scan8[0]+i] | mbn_nnz[i]);
                     }
+                    }
                 }
                 // Do not use s->qscale as luma quantizer because it has not the same
                 // value in IPCM macroblocks.
@@ -505,21 +507,21 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
                                 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h);
             }
         }else{
-            DECLARE_ALIGNED_8(int16_t, bS)[4];
+            DECLARE_ALIGNED(8, int16_t, bS)[4];
             int qp;
 
             if( IS_INTRA(mb_type|mbm_type)) {
-                *(uint64_t*)bS= 0x0003000300030003ULL;
+                AV_WN64A(bS, 0x0003000300030003ULL);
                 if (   (!IS_INTERLACED(mb_type|mbm_type))
                     || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
                 )
-                    *(uint64_t*)bS= 0x0004000400040004ULL;
+                    AV_WN64A(bS, 0x0004000400040004ULL);
             } else {
-                int i, l;
+                int i;
                 int mv_done;
 
-                if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbm_type)) { //FIXME not posible left
-                    *(uint64_t*)bS= 0x0001000100010001ULL;
+                if( dir && FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbm_type)) {
+                    AV_WN64A(bS, 0x0001000100010001ULL);
                     mv_done = 1;
                 }
                 else if( mask_par0 && ((mbm_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
@@ -582,20 +584,20 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
 
     /* Calculate bS */
     for( edge = 1; edge < edges; edge++ ) {
-        DECLARE_ALIGNED_8(int16_t, bS)[4];
+        DECLARE_ALIGNED(8, int16_t, bS)[4];
         int qp;
 
         if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type)
             continue;
 
         if( IS_INTRA(mb_type)) {
-            *(uint64_t*)bS= 0x0003000300030003ULL;
+            AV_WN64A(bS, 0x0003000300030003ULL);
         } else {
-            int i, l;
+            int i;
             int mv_done;
 
             if( edge & mask_edge ) {
-                *(uint64_t*)bS= 0;
+                AV_ZERO64(bS);
                 mv_done = 1;
             }
             else if( mask_par0 ) {
@@ -658,7 +660,6 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
     const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
     int first_vertical_edge_done = 0;
     av_unused int dir;
-    int list;
 
     if (FRAME_MBAFF
             // and current and left pair do not have the same interlaced type
@@ -668,7 +669,7 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
         /* First vertical edge is different in MBAFF frames
          * There are 8 different bS to compute and 2 different Qp
          */
-        DECLARE_ALIGNED_8(int16_t, bS)[8];
+        DECLARE_ALIGNED(8, int16_t, bS)[8];
         int qp[2];
         int bqp[2];
         int rqp[2];
@@ -676,10 +677,10 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
         int i;
         first_vertical_edge_done = 1;
 
-        if( IS_INTRA(mb_type) )
-            *(uint64_t*)&bS[0]=
-            *(uint64_t*)&bS[4]= 0x0004000400040004ULL;
-        else {
+        if( IS_INTRA(mb_type) ) {
+            AV_WN64A(&bS[0], 0x0004000400040004ULL);
+            AV_WN64A(&bS[4], 0x0004000400040004ULL);
+        } else {
             static const uint8_t offset[2][2][8]={
                 {
                     {7+8*0, 7+8*0, 7+8*0, 7+8*0, 7+8*1, 7+8*1, 7+8*1, 7+8*1},
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_mvpred.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_mvpred.h
index c4e99c244..0a89b880b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_mvpred.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_mvpred.h
@@ -43,32 +43,33 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in
      * make mbaff happy, so we can't move all this logic to fill_caches */
     if(FRAME_MBAFF){
 
-#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
-                const int x4 = X4, y4 = Y4;\
-                const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
+#define SET_DIAG_MV(MV_OP, REF_OP, XY, Y4)\
+                const int xy = XY, y4 = Y4;\
+                const int mb_type = mb_types[xy+(y4>>2)*s->mb_stride];\
                 if(!USES_LIST(mb_type,list))\
                     return LIST_NOT_USED;\
-                mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
+                mv = s->current_picture_ptr->motion_val[list][h->mb2b_xy[xy]+3 + y4*h->b_stride];\
                 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
                 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
-                return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
+                return s->current_picture_ptr->ref_index[list][4*xy+1 + (y4&~1)] REF_OP;
 
         if(topright_ref == PART_NOT_AVAILABLE
            && i >= scan8[0]+8 && (i&7)==4
            && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
             const uint32_t *mb_types = s->current_picture_ptr->mb_type;
             const int16_t *mv;
-            *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
+            AV_ZERO32(h->mv_cache[list][scan8[0]-2]);
             *C = h->mv_cache[list][scan8[0]-2];
 
             if(!MB_FIELD
-               && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
-                SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
+               && IS_INTERLACED(h->left_type[0])){
+                SET_DIAG_MV(*2, >>1, h->left_mb_xy[0]+s->mb_stride, (s->mb_y&1)*2+(i>>5));
+                assert(h->left_mb_xy[0] == h->left_mb_xy[1]);
             }
             if(MB_FIELD
-               && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
+               && !IS_INTERLACED(h->left_type[0])){
                 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
-                SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
+                SET_DIAG_MV(/2, <<1, h->left_mb_xy[i>=36], ((i>>2))&3);
             }
         }
 #undef SET_DIAG_MV
@@ -220,8 +221,8 @@ static inline void pred_pskip_motion(H264Context * const h, int * const mx, int
     tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
 
     if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
-       || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
-       || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
+       || !( top_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 8 ]))
+       || !(left_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 1 ]))){
 
         *mx = *my = 0;
         return;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_parser.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_parser.c
index 08cb8f9fb..85008108c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_parser.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_parser.c
@@ -185,6 +185,9 @@ static inline int parse_nal_units(AVCodecParserContext *s,
             h->sps = *h->sps_buffers[h->pps.sps_id];
             h->frame_num = get_bits(&h->s.gb, h->sps.log2_max_frame_num);
 
+            avctx->profile = h->sps.profile_idc;
+            avctx->level   = h->sps.level_idc;
+
             if(h->sps.frame_mbs_only_flag){
                 h->s.picture_structure= PICT_FRAME;
             }else{
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_ps.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_ps.c
index 7fe8f0f08..bd4b074a3 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_ps.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_ps.c
@@ -176,7 +176,7 @@ static inline int decode_vui_parameters(H264Context *h, SPS *sps){
     if(sps->timing_info_present_flag){
         sps->num_units_in_tick = get_bits_long(&s->gb, 32);
         sps->time_scale = get_bits_long(&s->gb, 32);
-        if(sps->num_units_in_tick-1 > 0x7FFFFFFEU || sps->time_scale-1 > 0x7FFFFFFEU){
+        if(!sps->num_units_in_tick || !sps->time_scale){
             av_log(h->s.avctx, AV_LOG_ERROR, "time_scale/num_units_in_tick invalid or unsupported (%d/%d)\n", sps->time_scale, sps->num_units_in_tick);
             return -1;
         }
@@ -291,13 +291,6 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
     sps->scaling_matrix_present = 0;
     sps->colorspace = 2; /* ffdshow custom code */
 
-    // ==> Start patch MPC Fidelity Range Extensions stuff
-    sps->chroma_format_idc = 1;
-    sps->bit_depth_luma   = 8;  // bit_depth_luma_minus8
-    sps->bit_depth_chroma = 8;  // bit_depth_chroma_minus8
-    sps->residual_color_transform_flag = 0;
-    // <== End patch MPC
-	
     if(sps->profile_idc >= 100){ //high profile
         sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
         if(sps->chroma_format_idc == 3)
@@ -356,6 +349,10 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
         sps->mb_aff= 0;
 
     sps->direct_8x8_inference_flag= get_bits1(&s->gb);
+    if(!sps->frame_mbs_only_flag && !sps->direct_8x8_inference_flag){
+        av_log(h->s.avctx, AV_LOG_ERROR, "This stream was generated by a broken encoder, invalid 8x8 inference\n");
+        goto fail;
+    }
 
 #ifndef ALLOW_INTERLACE
     if(sps->mb_aff)
@@ -471,10 +468,6 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
 |   slice_group_change_direction_flag               |1  |u(1)    |
 |   slice_group_change_rate_minus1                  |1  |ue(v)   |
 #endif
-			// ==> Start patch MPC
-			pps->slice_group_change_direction_flag=get_bits1(&s->gb);        //    |1  |u(1)    |
-			pps->slice_group_change_rate_minus1=get_ue_golomb(&s->gb);        //      |1  |ue(v)   |
-			// <== End patch MPC
             break;
         case 6:
 #if 0
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_refs.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_refs.c
index 183f01526..98e7e2a81 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_refs.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_refs.c
@@ -315,20 +315,14 @@ void ff_h264_fill_mbaff_ref_list(H264Context *h){
             field[1].reference = PICT_BOTTOM_FIELD;
             field[1].poc= field[1].field_poc[1];
 
-            h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
-            h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
+            h->luma_weight[16+2*i][list][0] = h->luma_weight[16+2*i+1][list][0] = h->luma_weight[i][list][0];
+            h->luma_weight[16+2*i][list][1] = h->luma_weight[16+2*i+1][list][1] = h->luma_weight[i][list][1];
             for(j=0; j<2; j++){
-                h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
-                h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
+                h->chroma_weight[16+2*i][list][j][0] = h->chroma_weight[16+2*i+1][list][j][0] = h->chroma_weight[i][list][j][0];
+                h->chroma_weight[16+2*i][list][j][1] = h->chroma_weight[16+2*i+1][list][j][1] = h->chroma_weight[i][list][j][1];
             }
         }
     }
-    for(j=0; j<h->ref_count[1]; j++){
-        for(i=0; i<h->ref_count[0]; i++)
-            h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
-        memcpy(h->implicit_weight[16+2*j],   h->implicit_weight[j], sizeof(*h->implicit_weight));
-        memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
-    }
 }
 
 /**
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_sei.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_sei.c
index f98448130..2d1f21e9e 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_sei.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_sei.c
@@ -109,7 +109,7 @@ static int decode_unregistered_user_data(H264Context *h, int size){
 
     user_data[i]= 0;
     e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
-    if(e==1 && build>=0)
+    if(e==1 && build>0)
         h->x264_build= build;
 
     if(s->avctx->debug & FF_DEBUG_BUGS)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264data.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264data.h
index c08478eb0..bbe553e87 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264data.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264data.h
@@ -197,7 +197,7 @@ static const PMbInfo p_sub_mb_type_info[4]={
 };
 
 static const PMbInfo b_mb_type_info[23]={
-{MB_TYPE_DIRECT2                                                   , 1, },
+{MB_TYPE_DIRECT2|MB_TYPE_L0L1                                      , 1, },
 {MB_TYPE_16x16|MB_TYPE_P0L0                                       , 1, },
 {MB_TYPE_16x16             |MB_TYPE_P0L1                          , 1, },
 {MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1                          , 1, },
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264dsp.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264dsp.c
new file mode 100644
index 000000000..93cdc21a7
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264dsp.c
@@ -0,0 +1,320 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/h264dsp.c
+ * H.264 / AVC / MPEG4 part10 DSP functions.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include <stdint.h>
+#include "avcodec.h"
+#include "h264dsp.h"
+
+#define op_scale1(x)  block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
+#define op_scale2(x)  dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
+#define H264_WEIGHT(W,H) \
+static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
+    int y; \
+    offset <<= log2_denom; \
+    if(log2_denom) offset += 1<<(log2_denom-1); \
+    for(y=0; y<H; y++, block += stride){ \
+        op_scale1(0); \
+        op_scale1(1); \
+        if(W==2) continue; \
+        op_scale1(2); \
+        op_scale1(3); \
+        if(W==4) continue; \
+        op_scale1(4); \
+        op_scale1(5); \
+        op_scale1(6); \
+        op_scale1(7); \
+        if(W==8) continue; \
+        op_scale1(8); \
+        op_scale1(9); \
+        op_scale1(10); \
+        op_scale1(11); \
+        op_scale1(12); \
+        op_scale1(13); \
+        op_scale1(14); \
+        op_scale1(15); \
+    } \
+} \
+static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
+    int y; \
+    offset = ((offset + 1) | 1) << log2_denom; \
+    for(y=0; y<H; y++, dst += stride, src += stride){ \
+        op_scale2(0); \
+        op_scale2(1); \
+        if(W==2) continue; \
+        op_scale2(2); \
+        op_scale2(3); \
+        if(W==4) continue; \
+        op_scale2(4); \
+        op_scale2(5); \
+        op_scale2(6); \
+        op_scale2(7); \
+        if(W==8) continue; \
+        op_scale2(8); \
+        op_scale2(9); \
+        op_scale2(10); \
+        op_scale2(11); \
+        op_scale2(12); \
+        op_scale2(13); \
+        op_scale2(14); \
+        op_scale2(15); \
+    } \
+}
+
+H264_WEIGHT(16,16)
+H264_WEIGHT(16,8)
+H264_WEIGHT(8,16)
+H264_WEIGHT(8,8)
+H264_WEIGHT(8,4)
+H264_WEIGHT(4,8)
+H264_WEIGHT(4,4)
+H264_WEIGHT(4,2)
+H264_WEIGHT(2,4)
+H264_WEIGHT(2,2)
+
+#undef op_scale1
+#undef op_scale2
+#undef H264_WEIGHT
+
+static av_always_inline av_flatten void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
+{
+    int i, d;
+    for( i = 0; i < 4; i++ ) {
+        if( tc0[i] < 0 ) {
+            pix += 4*ystride;
+            continue;
+        }
+        for( d = 0; d < 4; d++ ) {
+            const int p0 = pix[-1*xstride];
+            const int p1 = pix[-2*xstride];
+            const int p2 = pix[-3*xstride];
+            const int q0 = pix[0];
+            const int q1 = pix[1*xstride];
+            const int q2 = pix[2*xstride];
+
+            if( FFABS( p0 - q0 ) < alpha &&
+                FFABS( p1 - p0 ) < beta &&
+                FFABS( q1 - q0 ) < beta ) {
+
+                int tc = tc0[i];
+                int i_delta;
+
+                if( FFABS( p2 - p0 ) < beta ) {
+                    if(tc0[i])
+                    pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
+                    tc++;
+                }
+                if( FFABS( q2 - q0 ) < beta ) {
+                    if(tc0[i])
+                    pix[   xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
+                    tc++;
+                }
+
+                i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+                pix[-xstride] = av_clip_uint8( p0 + i_delta );    /* p0' */
+                pix[0]        = av_clip_uint8( q0 - i_delta );    /* q0' */
+            }
+            pix += ystride;
+        }
+    }
+}
+static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
+}
+static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
+}
+
+static av_always_inline av_flatten void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
+{
+    int d;
+    for( d = 0; d < 16; d++ ) {
+        const int p2 = pix[-3*xstride];
+        const int p1 = pix[-2*xstride];
+        const int p0 = pix[-1*xstride];
+
+        const int q0 = pix[ 0*xstride];
+        const int q1 = pix[ 1*xstride];
+        const int q2 = pix[ 2*xstride];
+
+        if( FFABS( p0 - q0 ) < alpha &&
+            FFABS( p1 - p0 ) < beta &&
+            FFABS( q1 - q0 ) < beta ) {
+
+            if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
+                if( FFABS( p2 - p0 ) < beta)
+                {
+                    const int p3 = pix[-4*xstride];
+                    /* p0', p1', p2' */
+                    pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
+                    pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
+                    pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
+                } else {
+                    /* p0' */
+                    pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                }
+                if( FFABS( q2 - q0 ) < beta)
+                {
+                    const int q3 = pix[3*xstride];
+                    /* q0', q1', q2' */
+                    pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
+                    pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
+                    pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
+                } else {
+                    /* q0' */
+                    pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                }
+            }else{
+                /* p0', q0' */
+                pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+            }
+        }
+        pix += ystride;
+    }
+}
+static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta);
+}
+static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta);
+}
+
+static av_always_inline av_flatten void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
+{
+    int i, d;
+    for( i = 0; i < 4; i++ ) {
+        const int tc = tc0[i];
+        if( tc <= 0 ) {
+            pix += 2*ystride;
+            continue;
+        }
+        for( d = 0; d < 2; d++ ) {
+            const int p0 = pix[-1*xstride];
+            const int p1 = pix[-2*xstride];
+            const int q0 = pix[0];
+            const int q1 = pix[1*xstride];
+
+            if( FFABS( p0 - q0 ) < alpha &&
+                FFABS( p1 - p0 ) < beta &&
+                FFABS( q1 - q0 ) < beta ) {
+
+                int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+
+                pix[-xstride] = av_clip_uint8( p0 + delta );    /* p0' */
+                pix[0]        = av_clip_uint8( q0 - delta );    /* q0' */
+            }
+            pix += ystride;
+        }
+    }
+}
+static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
+}
+static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+    h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
+}
+
+static av_always_inline av_flatten void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
+{
+    int d;
+    for( d = 0; d < 8; d++ ) {
+        const int p0 = pix[-1*xstride];
+        const int p1 = pix[-2*xstride];
+        const int q0 = pix[0];
+        const int q1 = pix[1*xstride];
+
+        if( FFABS( p0 - q0 ) < alpha &&
+            FFABS( p1 - p0 ) < beta &&
+            FFABS( q1 - q0 ) < beta ) {
+
+            pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
+            pix[0]        = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
+        }
+        pix += ystride;
+    }
+}
+static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
+}
+static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+    h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
+}
+
+void ff_h264dsp_init(H264DSPContext *c)
+{
+    c->h264_idct_add= ff_h264_idct_add_c;
+    c->h264_idct8_add= ff_h264_idct8_add_c;
+    c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
+    c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
+    c->h264_idct_add16     = ff_h264_idct_add16_c;
+    c->h264_idct8_add4     = ff_h264_idct8_add4_c;
+    c->h264_idct_add8      = ff_h264_idct_add8_c;
+    c->h264_idct_add16intra= ff_h264_idct_add16intra_c;
+
+    c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
+    c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
+    c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
+    c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
+    c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
+    c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
+    c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
+    c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
+    c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
+    c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
+    c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
+    c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
+    c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
+    c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
+    c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
+    c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
+    c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
+    c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
+    c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
+    c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
+
+    c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
+    c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
+    c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c;
+    c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c;
+    c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
+    c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
+    c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
+    c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
+    c->h264_loop_filter_strength= NULL;
+
+    #if HAVE_MMX
+    ff_h264dsp_init_x86(c);
+    #endif
+}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264dsp.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264dsp.h
new file mode 100644
index 000000000..c5121b41c
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264dsp.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/h264dsp.h
+ * H.264 DSP functions.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#ifndef AVCODEC_H264DSP_H
+#define AVCODEC_H264DSP_H
+
+#include <stdint.h>
+#include "dsputil.h"
+
+//typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
+typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset);
+typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset);
+
+/**
+ * Context for storing H.264 DSP functions
+ */
+typedef struct H264DSPContext{
+    /* weighted MC */
+    h264_weight_func weight_h264_pixels_tab[10];
+    h264_biweight_func biweight_h264_pixels_tab[10];
+
+    /* loop filter */
+    void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0);
+    /* v/h_loop_filter_luma_intra: align 16 */
+    void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
+    void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
+    void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0);
+    void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
+    void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
+    // h264_loop_filter_strength: simd only. the C version is inlined in h264.c
+    void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
+                                      int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field);
+
+    /* IDCT */
+    /* NOTE!!! if you implement any of h264_idct8_add, h264_idct8_add4 then you must implement all of them
+       NOTE!!! if you implement any of h264_idct_add, h264_idct_add16, h264_idct_add16intra, h264_idct_add8 then you must implement all of them
+        The reason for above, is that no 2 out of one list may use a different permutation.
+    */
+    void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
+    void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
+    void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
+    void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
+    void (*h264_dct)(DCTELEM block[4][4]);
+    void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
+    void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
+    void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
+    void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
+}H264DSPContext;
+
+void ff_h264dsp_init(H264DSPContext *c);
+void ff_h264dsp_init_arm(H264DSPContext *c);
+void ff_h264dsp_init_ppc(H264DSPContext *c);
+void ff_h264dsp_init_x86(H264DSPContext *c);
+
+#endif /* AVCODEC_H264DSP_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c
index 6584fae99..55891b45f 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c
@@ -32,6 +32,9 @@
 
 #include "avcodec.h"
 #include "dsputil.h"
+#include "internal.h"
+#include "imgconvert.h"
+#include "libavutil/pixdesc.h"
 
 #if HAVE_MMX
 #include "x86/mmx.h"
@@ -51,14 +54,10 @@
 #define FF_PIXEL_PALETTE  2  /**< one components containing indexes for a palette */
 
 typedef struct PixFmtInfo {
-    const char *name;
     uint8_t nb_channels;     /**< number of channels (including alpha) */
     uint8_t color_type;      /**< color type (see FF_COLOR_xxx constants) */
     uint8_t pixel_type;      /**< pixel storage type (see FF_PIXEL_xxx constants) */
     uint8_t is_alpha : 1;    /**< true if alpha can be specified */
-    uint8_t is_hwaccel : 1;  /**< true if this is an HW accelerated format */
-    uint8_t x_chroma_shift;  /**< X chroma subsampling factor is 2 ^ shift */
-    uint8_t y_chroma_shift;  /**< Y chroma subsampling factor is 2 ^ shift */
     uint8_t depth;           /**< bit depth of the color components */
 } PixFmtInfo;
 
@@ -67,203 +66,219 @@ typedef struct PixFmtInfo {
 static const PixFmtInfo pix_fmt_info[PIX_FMT_NB] = {
     /* YUV formats */
     [PIX_FMT_YUV420P] = {
-        .name = "yuv420p",
         .nb_channels = 3,
         .color_type = FF_COLOR_YUV,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 1, .y_chroma_shift = 1,
     },
     [PIX_FMT_YUV422P] = {
-        .name = "yuv422p",
         .nb_channels = 3,
         .color_type = FF_COLOR_YUV,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 1, .y_chroma_shift = 0,
     },
     [PIX_FMT_YUV444P] = {
-        .name = "yuv444p",
         .nb_channels = 3,
         .color_type = FF_COLOR_YUV,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
     [PIX_FMT_YUYV422] = {
-        .name = "yuyv422",
         .nb_channels = 1,
         .color_type = FF_COLOR_YUV,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 8,
-        .x_chroma_shift = 1, .y_chroma_shift = 0,
     },
     [PIX_FMT_UYVY422] = {
-        .name = "uyvy422",
         .nb_channels = 1,
         .color_type = FF_COLOR_YUV,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 8,
-        .x_chroma_shift = 1, .y_chroma_shift = 0,
     },
     [PIX_FMT_YUV410P] = {
-        .name = "yuv410p",
         .nb_channels = 3,
         .color_type = FF_COLOR_YUV,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 2, .y_chroma_shift = 2,
     },
     [PIX_FMT_YUV411P] = {
-        .name = "yuv411p",
         .nb_channels = 3,
         .color_type = FF_COLOR_YUV,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 2, .y_chroma_shift = 0,
     },
     [PIX_FMT_YUV440P] = {
-        .name = "yuv440p",
         .nb_channels = 3,
         .color_type = FF_COLOR_YUV,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 0, .y_chroma_shift = 1,
     },
+    [PIX_FMT_YUV420P16LE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_YUV,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 16,
+    },
+    [PIX_FMT_YUV422P16LE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_YUV,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 16,
+    },
+    [PIX_FMT_YUV444P16LE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_YUV,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 16,
+    },
+    [PIX_FMT_YUV420P16BE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_YUV,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 16,
+    },
+    [PIX_FMT_YUV422P16BE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_YUV,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 16,
+    },
+    [PIX_FMT_YUV444P16BE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_YUV,
+        .pixel_type = FF_PIXEL_PLANAR,
+        .depth = 16,
+    },
+
 
     /* YUV formats with alpha plane */
     [PIX_FMT_YUVA420P] = {
-        .name = "yuva420p",
         .nb_channels = 4,
         .color_type = FF_COLOR_YUV,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 1, .y_chroma_shift = 1,
     },
 
     /* JPEG YUV */
     [PIX_FMT_YUVJ420P] = {
-        .name = "yuvj420p",
         .nb_channels = 3,
         .color_type = FF_COLOR_YUV_JPEG,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 1, .y_chroma_shift = 1,
     },
     [PIX_FMT_YUVJ422P] = {
-        .name = "yuvj422p",
         .nb_channels = 3,
         .color_type = FF_COLOR_YUV_JPEG,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 1, .y_chroma_shift = 0,
     },
     [PIX_FMT_YUVJ444P] = {
-        .name = "yuvj444p",
         .nb_channels = 3,
         .color_type = FF_COLOR_YUV_JPEG,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
     [PIX_FMT_YUVJ440P] = {
-        .name = "yuvj440p",
         .nb_channels = 3,
         .color_type = FF_COLOR_YUV_JPEG,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 0, .y_chroma_shift = 1,
     },
 
     /* RGB formats */
     [PIX_FMT_RGB24] = {
-        .name = "rgb24",
         .nb_channels = 3,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 8,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
     [PIX_FMT_BGR24] = {
-        .name = "bgr24",
         .nb_channels = 3,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 8,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
-    [PIX_FMT_RGB32] = {
-        .name = "rgb32",
+    [PIX_FMT_ARGB] = {
         .nb_channels = 4, .is_alpha = 1,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 8,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
     [PIX_FMT_RGB48BE] = {
-        .name = "rgb48be",
         .nb_channels = 3,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 16,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
     [PIX_FMT_RGB48LE] = {
-        .name = "rgb48le",
         .nb_channels = 3,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 16,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
-    [PIX_FMT_RGB565] = {
-        .name = "rgb565",
+    [PIX_FMT_RGB565BE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 5,
+    },
+    [PIX_FMT_RGB565LE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 5,
+    },
+    [PIX_FMT_RGB555BE] = {
         .nb_channels = 3,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 5,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
-    [PIX_FMT_RGB555] = {
-        .name = "rgb555",
+    [PIX_FMT_RGB555LE] = {
         .nb_channels = 3,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 5,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_RGB444BE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 4,
+    },
+    [PIX_FMT_RGB444LE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 4,
     },
 
     /* gray / mono formats */
     [PIX_FMT_GRAY16BE] = {
-        .name = "gray16be",
         .nb_channels = 1,
         .color_type = FF_COLOR_GRAY,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 16,
     },
     [PIX_FMT_GRAY16LE] = {
-        .name = "gray16le",
         .nb_channels = 1,
         .color_type = FF_COLOR_GRAY,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 16,
     },
     [PIX_FMT_GRAY8] = {
-        .name = "gray",
         .nb_channels = 1,
         .color_type = FF_COLOR_GRAY,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
     },
     [PIX_FMT_MONOWHITE] = {
-        .name = "monow",
         .nb_channels = 1,
         .color_type = FF_COLOR_GRAY,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 1,
     },
     [PIX_FMT_MONOBLACK] = {
-        .name = "monob",
         .nb_channels = 1,
         .color_type = FF_COLOR_GRAY,
         .pixel_type = FF_PIXEL_PLANAR,
@@ -272,124 +287,119 @@ static const PixFmtInfo pix_fmt_info[PIX_FMT_NB] = {
 
     /* paletted formats */
     [PIX_FMT_PAL8] = {
-        .name = "pal8",
         .nb_channels = 4, .is_alpha = 1,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PALETTE,
         .depth = 8,
     },
     [PIX_FMT_UYYVYY411] = {
-        .name = "uyyvyy411",
         .nb_channels = 1,
         .color_type = FF_COLOR_YUV,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 8,
-        .x_chroma_shift = 2, .y_chroma_shift = 0,
     },
-    [PIX_FMT_BGR32] = {
-        .name = "bgr32",
+    [PIX_FMT_ABGR] = {
         .nb_channels = 4, .is_alpha = 1,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 8,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
-    [PIX_FMT_BGR565] = {
-        .name = "bgr565",
+    [PIX_FMT_BGR565BE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 5,
+    },
+    [PIX_FMT_BGR565LE] = {
         .nb_channels = 3,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 5,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
-    [PIX_FMT_BGR555] = {
-        .name = "bgr555",
+    [PIX_FMT_BGR555BE] = {
         .nb_channels = 3,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 5,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
+    },
+    [PIX_FMT_BGR555LE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 5,
+    },
+    [PIX_FMT_BGR444BE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 4,
+    },
+    [PIX_FMT_BGR444LE] = {
+        .nb_channels = 3,
+        .color_type = FF_COLOR_RGB,
+        .pixel_type = FF_PIXEL_PACKED,
+        .depth = 4,
     },
     [PIX_FMT_RGB8] = {
-        .name = "rgb8",
         .nb_channels = 1,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 8,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
     [PIX_FMT_RGB4] = {
-        .name = "rgb4",
         .nb_channels = 1,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 4,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
     [PIX_FMT_RGB4_BYTE] = {
-        .name = "rgb4_byte",
         .nb_channels = 1,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 8,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
     [PIX_FMT_BGR8] = {
-        .name = "bgr8",
         .nb_channels = 1,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 8,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
     [PIX_FMT_BGR4] = {
-        .name = "bgr4",
         .nb_channels = 1,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 4,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
     [PIX_FMT_BGR4_BYTE] = {
-        .name = "bgr4_byte",
         .nb_channels = 1,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 8,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
     [PIX_FMT_NV12] = {
-        .name = "nv12",
         .nb_channels = 2,
         .color_type = FF_COLOR_YUV,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 1, .y_chroma_shift = 1,
     },
     [PIX_FMT_NV21] = {
-        .name = "nv12",
         .nb_channels = 2,
         .color_type = FF_COLOR_YUV,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 1, .y_chroma_shift = 1,
     },
 
-    [PIX_FMT_BGR32_1] = {
-        .name = "bgr32_1",
+    [PIX_FMT_BGRA] = {
         .nb_channels = 4, .is_alpha = 1,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 8,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
-    [PIX_FMT_RGB32_1] = {
-        .name = "rgb32_1",
+    [PIX_FMT_RGBA] = {
         .nb_channels = 4, .is_alpha = 1,
         .color_type = FF_COLOR_RGB,
         .pixel_type = FF_PIXEL_PACKED,
         .depth = 8,
-        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
 };
 
@@ -398,329 +408,300 @@ static PixFmtInfo pix_fmt_info[PIX_FMT_NB];
 
 void avpicture_init_pixfmtinfo(void)
 {
- pix_fmt_info[PIX_FMT_YUV420P].name = "yuv420p";
  pix_fmt_info[PIX_FMT_YUV420P].nb_channels = 3;
  pix_fmt_info[PIX_FMT_YUV420P].color_type = FF_COLOR_YUV;
  pix_fmt_info[PIX_FMT_YUV420P].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_YUV420P].depth = 8;
- pix_fmt_info[PIX_FMT_YUV420P].x_chroma_shift = 1;
- pix_fmt_info[PIX_FMT_YUV420P].y_chroma_shift = 1;
 
- pix_fmt_info[PIX_FMT_YUV422P].name = "yuv422p";
  pix_fmt_info[PIX_FMT_YUV422P].nb_channels = 3;
  pix_fmt_info[PIX_FMT_YUV422P].color_type = FF_COLOR_YUV;
  pix_fmt_info[PIX_FMT_YUV422P].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_YUV422P].depth = 8,
- pix_fmt_info[PIX_FMT_YUV422P].x_chroma_shift = 1;
- pix_fmt_info[PIX_FMT_YUV422P].y_chroma_shift = 0;
 
- pix_fmt_info[PIX_FMT_YUV444P].name = "yuv444p";
  pix_fmt_info[PIX_FMT_YUV444P].nb_channels = 3;
  pix_fmt_info[PIX_FMT_YUV444P].color_type = FF_COLOR_YUV;
  pix_fmt_info[PIX_FMT_YUV444P].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_YUV444P].depth = 8;
- pix_fmt_info[PIX_FMT_YUV444P].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_YUV444P].y_chroma_shift = 0;
 
- pix_fmt_info[PIX_FMT_YUYV422].name = "yuyv422";
  pix_fmt_info[PIX_FMT_YUYV422].nb_channels = 1;
  pix_fmt_info[PIX_FMT_YUYV422].color_type = FF_COLOR_YUV;
  pix_fmt_info[PIX_FMT_YUYV422].pixel_type = FF_PIXEL_PACKED;
  pix_fmt_info[PIX_FMT_YUYV422].depth = 8;
- pix_fmt_info[PIX_FMT_YUYV422].x_chroma_shift = 1;
- pix_fmt_info[PIX_FMT_YUYV422].y_chroma_shift = 0;
 
- pix_fmt_info[PIX_FMT_UYVY422].name = "uyvy422";
  pix_fmt_info[PIX_FMT_UYVY422].nb_channels = 1;
  pix_fmt_info[PIX_FMT_UYVY422].color_type = FF_COLOR_YUV;
  pix_fmt_info[PIX_FMT_UYVY422].pixel_type = FF_PIXEL_PACKED;
  pix_fmt_info[PIX_FMT_UYVY422].depth = 8;
- pix_fmt_info[PIX_FMT_UYVY422].x_chroma_shift = 1;
- pix_fmt_info[PIX_FMT_UYVY422].y_chroma_shift = 0;
 
- pix_fmt_info[PIX_FMT_YUV410P].name = "yuv410p";
  pix_fmt_info[PIX_FMT_YUV410P].nb_channels = 3;
  pix_fmt_info[PIX_FMT_YUV410P].color_type = FF_COLOR_YUV;
  pix_fmt_info[PIX_FMT_YUV410P].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_YUV410P].depth = 8;
- pix_fmt_info[PIX_FMT_YUV410P].x_chroma_shift = 2;
- pix_fmt_info[PIX_FMT_YUV410P].y_chroma_shift = 2;
 
- pix_fmt_info[PIX_FMT_YUV411P].name = "yuv411p";
  pix_fmt_info[PIX_FMT_YUV411P].nb_channels = 3;
  pix_fmt_info[PIX_FMT_YUV411P].color_type = FF_COLOR_YUV;
  pix_fmt_info[PIX_FMT_YUV411P].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_YUV411P].depth = 8;
- pix_fmt_info[PIX_FMT_YUV411P].x_chroma_shift = 2;
- pix_fmt_info[PIX_FMT_YUV411P].y_chroma_shift = 0;
  
- pix_fmt_info[PIX_FMT_YUV440P].name = "yuv440p";
  pix_fmt_info[PIX_FMT_YUV440P].nb_channels = 3;
  pix_fmt_info[PIX_FMT_YUV440P].color_type = FF_COLOR_YUV;
  pix_fmt_info[PIX_FMT_YUV440P].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_YUV440P].depth = 8;
- pix_fmt_info[PIX_FMT_YUV440P].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_YUV440P].y_chroma_shift = 1;
  
- pix_fmt_info[PIX_FMT_YUVA420P].name = "yuva420p";
+ pix_fmt_info[PIX_FMT_YUV420P16LE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_YUV420P16LE].color_type = FF_COLOR_YUV;
+ pix_fmt_info[PIX_FMT_YUV420P16LE].pixel_type = FF_PIXEL_PLANAR;
+ pix_fmt_info[PIX_FMT_YUV420P16LE].depth = 16;
+ 
+ pix_fmt_info[PIX_FMT_YUV422P16LE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_YUV422P16LE].color_type = FF_COLOR_YUV;
+ pix_fmt_info[PIX_FMT_YUV422P16LE].pixel_type = FF_PIXEL_PLANAR;
+ pix_fmt_info[PIX_FMT_YUV422P16LE].depth = 16;
+ 
+ pix_fmt_info[PIX_FMT_YUV444P16LE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_YUV444P16LE].color_type = FF_COLOR_YUV;
+ pix_fmt_info[PIX_FMT_YUV444P16LE].pixel_type = FF_PIXEL_PLANAR;
+ pix_fmt_info[PIX_FMT_YUV444P16LE].depth = 16;
+ 
+ pix_fmt_info[PIX_FMT_YUV420P16BE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_YUV420P16BE].color_type = FF_COLOR_YUV;
+ pix_fmt_info[PIX_FMT_YUV420P16BE].pixel_type = FF_PIXEL_PLANAR;
+ pix_fmt_info[PIX_FMT_YUV420P16BE].depth = 16;
+ 
+ pix_fmt_info[PIX_FMT_YUV422P16BE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_YUV422P16BE].color_type = FF_COLOR_YUV;
+ pix_fmt_info[PIX_FMT_YUV422P16BE].pixel_type = FF_PIXEL_PLANAR;
+ pix_fmt_info[PIX_FMT_YUV422P16BE].depth = 16;
+ 
+ pix_fmt_info[PIX_FMT_YUV444P16BE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_YUV444P16BE].color_type = FF_COLOR_YUV;
+ pix_fmt_info[PIX_FMT_YUV444P16BE].pixel_type = FF_PIXEL_PLANAR;
+ pix_fmt_info[PIX_FMT_YUV444P16BE].depth = 16;
+ 
+ /* YUV formats with alpha plane */
  pix_fmt_info[PIX_FMT_YUVA420P].nb_channels = 4;
  pix_fmt_info[PIX_FMT_YUVA420P].color_type = FF_COLOR_YUV;
  pix_fmt_info[PIX_FMT_YUVA420P].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_YUVA420P].depth = 8;
- pix_fmt_info[PIX_FMT_YUVA420P].x_chroma_shift = 1;
- pix_fmt_info[PIX_FMT_YUVA420P].y_chroma_shift = 1;
 
  /* JPEG YUV */
- pix_fmt_info[PIX_FMT_YUVJ420P].name = "yuvj420p";
  pix_fmt_info[PIX_FMT_YUVJ420P].nb_channels = 3;
  pix_fmt_info[PIX_FMT_YUVJ420P].color_type = FF_COLOR_YUV_JPEG;
  pix_fmt_info[PIX_FMT_YUVJ420P].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_YUVJ420P].depth = 8;
- pix_fmt_info[PIX_FMT_YUVJ420P].x_chroma_shift = 1;
- pix_fmt_info[PIX_FMT_YUVJ420P].y_chroma_shift = 1;
 
- pix_fmt_info[PIX_FMT_YUVJ422P].name = "yuvj422p";
  pix_fmt_info[PIX_FMT_YUVJ422P].nb_channels = 3;
  pix_fmt_info[PIX_FMT_YUVJ422P].color_type = FF_COLOR_YUV_JPEG;
  pix_fmt_info[PIX_FMT_YUVJ422P].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_YUVJ422P].depth = 8;
- pix_fmt_info[PIX_FMT_YUVJ422P].x_chroma_shift = 1;
- pix_fmt_info[PIX_FMT_YUVJ422P].y_chroma_shift = 0;
 
- pix_fmt_info[PIX_FMT_YUVJ444P].name = "yuvj444p";
  pix_fmt_info[PIX_FMT_YUVJ444P].nb_channels = 3;
  pix_fmt_info[PIX_FMT_YUVJ444P].color_type = FF_COLOR_YUV_JPEG;
  pix_fmt_info[PIX_FMT_YUVJ444P].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_YUVJ444P].depth = 8;
- pix_fmt_info[PIX_FMT_YUVJ444P].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_YUVJ444P].y_chroma_shift = 0;
  
- pix_fmt_info[PIX_FMT_YUVJ440P].name = "yuvj440p";
  pix_fmt_info[PIX_FMT_YUVJ440P].nb_channels = 3;
  pix_fmt_info[PIX_FMT_YUVJ440P].color_type = FF_COLOR_YUV_JPEG;
  pix_fmt_info[PIX_FMT_YUVJ440P].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_YUVJ440P].depth = 8;
- pix_fmt_info[PIX_FMT_YUVJ440P].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_YUVJ440P].y_chroma_shift = 1;
 
  /* RGB formats */
- pix_fmt_info[PIX_FMT_RGB24].name = "rgb24";
  pix_fmt_info[PIX_FMT_RGB24].nb_channels = 3;
  pix_fmt_info[PIX_FMT_RGB24].color_type = FF_COLOR_RGB;
  pix_fmt_info[PIX_FMT_RGB24].pixel_type = FF_PIXEL_PACKED;
  pix_fmt_info[PIX_FMT_RGB24].depth = 8;
- pix_fmt_info[PIX_FMT_RGB24].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_RGB24].y_chroma_shift = 0;
 
- pix_fmt_info[PIX_FMT_BGR24].name = "bgr24";
  pix_fmt_info[PIX_FMT_BGR24].nb_channels = 3;
  pix_fmt_info[PIX_FMT_BGR24].color_type = FF_COLOR_RGB;
  pix_fmt_info[PIX_FMT_BGR24].pixel_type = FF_PIXEL_PACKED;
  pix_fmt_info[PIX_FMT_BGR24].depth = 8;
- pix_fmt_info[PIX_FMT_BGR24].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_BGR24].y_chroma_shift = 0;
-
- pix_fmt_info[PIX_FMT_RGB32].name = "rgb32";
- pix_fmt_info[PIX_FMT_RGB32].nb_channels = 4;
- pix_fmt_info[PIX_FMT_RGB32].is_alpha = 1;
- pix_fmt_info[PIX_FMT_RGB32].color_type = FF_COLOR_RGB;
- pix_fmt_info[PIX_FMT_RGB32].pixel_type = FF_PIXEL_PACKED;
- pix_fmt_info[PIX_FMT_RGB32].depth = 8;
- pix_fmt_info[PIX_FMT_RGB32].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_RGB32].y_chroma_shift = 0;
+
+ pix_fmt_info[PIX_FMT_ARGB].nb_channels = 4;
+ pix_fmt_info[PIX_FMT_ARGB].is_alpha = 1;
+ pix_fmt_info[PIX_FMT_ARGB].color_type = FF_COLOR_RGB;
+ pix_fmt_info[PIX_FMT_ARGB].pixel_type = FF_PIXEL_PACKED;
+ pix_fmt_info[PIX_FMT_ARGB].depth = 8;
  
- pix_fmt_info[PIX_FMT_RGB48BE].name = "rgb48be";
  pix_fmt_info[PIX_FMT_RGB48BE].nb_channels = 3;
  pix_fmt_info[PIX_FMT_RGB48BE].color_type = FF_COLOR_RGB;
  pix_fmt_info[PIX_FMT_RGB48BE].pixel_type = FF_PIXEL_PACKED;
  pix_fmt_info[PIX_FMT_RGB48BE].depth = 16;
- pix_fmt_info[PIX_FMT_RGB48BE].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_RGB48BE].y_chroma_shift = 0;
 
- pix_fmt_info[PIX_FMT_RGB48LE].name = "rgb48le";
  pix_fmt_info[PIX_FMT_RGB48LE].nb_channels = 3;
  pix_fmt_info[PIX_FMT_RGB48LE].color_type = FF_COLOR_RGB;
  pix_fmt_info[PIX_FMT_RGB48LE].pixel_type = FF_PIXEL_PACKED;
  pix_fmt_info[PIX_FMT_RGB48LE].depth = 16;
- pix_fmt_info[PIX_FMT_RGB48LE].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_RGB48LE].y_chroma_shift = 0; 
  
- pix_fmt_info[PIX_FMT_RGB565].name = "rgb565";
- pix_fmt_info[PIX_FMT_RGB565].nb_channels = 3;
- pix_fmt_info[PIX_FMT_RGB565].color_type = FF_COLOR_RGB;
- pix_fmt_info[PIX_FMT_RGB565].pixel_type = FF_PIXEL_PACKED;
- pix_fmt_info[PIX_FMT_RGB565].depth = 5;
- pix_fmt_info[PIX_FMT_RGB565].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_RGB565].y_chroma_shift = 0;
-
- pix_fmt_info[PIX_FMT_RGB555].name = "rgb555";
- pix_fmt_info[PIX_FMT_RGB555].nb_channels = 3;
- pix_fmt_info[PIX_FMT_RGB555].color_type = FF_COLOR_RGB;
- pix_fmt_info[PIX_FMT_RGB555].pixel_type = FF_PIXEL_PACKED;
- pix_fmt_info[PIX_FMT_RGB555].depth = 5;
- pix_fmt_info[PIX_FMT_RGB555].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_RGB555].y_chroma_shift = 0;
+ pix_fmt_info[PIX_FMT_RGB565BE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_RGB565BE].color_type = FF_COLOR_RGB;
+ pix_fmt_info[PIX_FMT_RGB565BE].pixel_type = FF_PIXEL_PACKED;
+ pix_fmt_info[PIX_FMT_RGB565BE].depth = 5;
+ 
+ pix_fmt_info[PIX_FMT_RGB565LE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_RGB565LE].color_type = FF_COLOR_RGB;
+ pix_fmt_info[PIX_FMT_RGB565LE].pixel_type = FF_PIXEL_PACKED;
+ pix_fmt_info[PIX_FMT_RGB565LE].depth = 5;
+
+ pix_fmt_info[PIX_FMT_RGB555BE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_RGB555BE].color_type = FF_COLOR_RGB;
+ pix_fmt_info[PIX_FMT_RGB555BE].pixel_type = FF_PIXEL_PACKED;
+ pix_fmt_info[PIX_FMT_RGB555BE].depth = 5;
+ 
+ pix_fmt_info[PIX_FMT_RGB555LE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_RGB555LE].color_type = FF_COLOR_RGB;
+ pix_fmt_info[PIX_FMT_RGB555LE].pixel_type = FF_PIXEL_PACKED;
+ pix_fmt_info[PIX_FMT_RGB555LE].depth = 5;
+ 
+ pix_fmt_info[PIX_FMT_RGB444BE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_RGB444BE].color_type = FF_COLOR_RGB;
+ pix_fmt_info[PIX_FMT_RGB444BE].pixel_type = FF_PIXEL_PACKED;
+ pix_fmt_info[PIX_FMT_RGB444BE].depth = 4;
+ 
+ pix_fmt_info[PIX_FMT_RGB444LE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_RGB444LE].color_type = FF_COLOR_RGB;
+ pix_fmt_info[PIX_FMT_RGB444LE].pixel_type = FF_PIXEL_PACKED;
+ pix_fmt_info[PIX_FMT_RGB444LE].depth = 4;
 
  /* gray / mono formats */
- pix_fmt_info[PIX_FMT_GRAY16BE].name = "gray16be";
  pix_fmt_info[PIX_FMT_GRAY16BE].nb_channels = 1;
  pix_fmt_info[PIX_FMT_GRAY16BE].color_type = FF_COLOR_GRAY;
  pix_fmt_info[PIX_FMT_GRAY16BE].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_GRAY16BE].depth = 16;
 
- pix_fmt_info[PIX_FMT_GRAY16LE].name = "gray16le";
  pix_fmt_info[PIX_FMT_GRAY16LE].nb_channels = 1;
  pix_fmt_info[PIX_FMT_GRAY16LE].color_type = FF_COLOR_GRAY;
  pix_fmt_info[PIX_FMT_GRAY16LE].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_GRAY16LE].depth = 16;
 
- pix_fmt_info[PIX_FMT_GRAY8].name = "gray";
  pix_fmt_info[PIX_FMT_GRAY8].nb_channels = 1;
  pix_fmt_info[PIX_FMT_GRAY8].color_type = FF_COLOR_GRAY;
  pix_fmt_info[PIX_FMT_GRAY8].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_GRAY8].depth = 8;
 
- pix_fmt_info[PIX_FMT_MONOWHITE].name = "monow";
  pix_fmt_info[PIX_FMT_MONOWHITE].nb_channels = 1;
  pix_fmt_info[PIX_FMT_MONOWHITE].color_type = FF_COLOR_GRAY;
  pix_fmt_info[PIX_FMT_MONOWHITE].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_MONOWHITE].depth = 1;
 
- pix_fmt_info[PIX_FMT_MONOBLACK].name = "monob";
  pix_fmt_info[PIX_FMT_MONOBLACK].nb_channels = 1;
  pix_fmt_info[PIX_FMT_MONOBLACK].color_type = FF_COLOR_GRAY;
  pix_fmt_info[PIX_FMT_MONOBLACK].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_MONOBLACK].depth = 1;
 
  /* paletted formats */
- pix_fmt_info[PIX_FMT_PAL8].name = "pal8";
  pix_fmt_info[PIX_FMT_PAL8].nb_channels = 4;
  pix_fmt_info[PIX_FMT_PAL8].is_alpha = 1;
  pix_fmt_info[PIX_FMT_PAL8].color_type = FF_COLOR_RGB;
  pix_fmt_info[PIX_FMT_PAL8].pixel_type = FF_PIXEL_PALETTE;
  pix_fmt_info[PIX_FMT_PAL8].depth = 8;
 
- pix_fmt_info[PIX_FMT_UYYVYY411].name = "uyyvyy411";
  pix_fmt_info[PIX_FMT_UYYVYY411].nb_channels = 1;
  pix_fmt_info[PIX_FMT_UYYVYY411].color_type = FF_COLOR_YUV;
  pix_fmt_info[PIX_FMT_UYYVYY411].pixel_type = FF_PIXEL_PACKED;
  pix_fmt_info[PIX_FMT_UYYVYY411].depth = 8;
- pix_fmt_info[PIX_FMT_UYYVYY411].x_chroma_shift = 2;
- pix_fmt_info[PIX_FMT_UYYVYY411].y_chroma_shift = 0;
-
- pix_fmt_info[PIX_FMT_BGR32].name = "bgr32";
- pix_fmt_info[PIX_FMT_BGR32].nb_channels = 4;
- pix_fmt_info[PIX_FMT_BGR32].is_alpha = 1;
- pix_fmt_info[PIX_FMT_BGR32].color_type = FF_COLOR_RGB;
- pix_fmt_info[PIX_FMT_BGR32].pixel_type = FF_PIXEL_PACKED;
- pix_fmt_info[PIX_FMT_BGR32].depth = 8;
- pix_fmt_info[PIX_FMT_BGR32].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_BGR32].y_chroma_shift = 0;
-
- pix_fmt_info[PIX_FMT_BGR565].name = "bgr565";
- pix_fmt_info[PIX_FMT_BGR565].nb_channels = 3;
- pix_fmt_info[PIX_FMT_BGR565].color_type = FF_COLOR_RGB;
- pix_fmt_info[PIX_FMT_BGR565].pixel_type = FF_PIXEL_PACKED;
- pix_fmt_info[PIX_FMT_BGR565].depth = 5;
- pix_fmt_info[PIX_FMT_BGR565].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_BGR565].y_chroma_shift = 0;
-
- pix_fmt_info[PIX_FMT_BGR555].name = "bgr555";
- pix_fmt_info[PIX_FMT_BGR555].nb_channels = 3;
- pix_fmt_info[PIX_FMT_BGR555].color_type = FF_COLOR_RGB;
- pix_fmt_info[PIX_FMT_BGR555].pixel_type = FF_PIXEL_PACKED;
- pix_fmt_info[PIX_FMT_BGR555].depth = 5;
- pix_fmt_info[PIX_FMT_BGR555].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_BGR555].y_chroma_shift = 0;
-
- pix_fmt_info[PIX_FMT_RGB8].name = "rgb8";
+
+ pix_fmt_info[PIX_FMT_ABGR].nb_channels = 4;
+ pix_fmt_info[PIX_FMT_ABGR].is_alpha = 1;
+ pix_fmt_info[PIX_FMT_ABGR].color_type = FF_COLOR_RGB;
+ pix_fmt_info[PIX_FMT_ABGR].pixel_type = FF_PIXEL_PACKED;
+ pix_fmt_info[PIX_FMT_ABGR].depth = 8;
+
+ pix_fmt_info[PIX_FMT_BGR565BE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_BGR565BE].color_type = FF_COLOR_RGB;
+ pix_fmt_info[PIX_FMT_BGR565BE].pixel_type = FF_PIXEL_PACKED;
+ pix_fmt_info[PIX_FMT_BGR565BE].depth = 5;
+ 
+ pix_fmt_info[PIX_FMT_BGR565LE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_BGR565LE].color_type = FF_COLOR_RGB;
+ pix_fmt_info[PIX_FMT_BGR565LE].pixel_type = FF_PIXEL_PACKED;
+ pix_fmt_info[PIX_FMT_BGR565LE].depth = 5;
+
+ pix_fmt_info[PIX_FMT_BGR555BE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_BGR555BE].color_type = FF_COLOR_RGB;
+ pix_fmt_info[PIX_FMT_BGR555BE].pixel_type = FF_PIXEL_PACKED;
+ pix_fmt_info[PIX_FMT_BGR555BE].depth = 5;
+ 
+ pix_fmt_info[PIX_FMT_BGR555LE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_BGR555LE].color_type = FF_COLOR_RGB;
+ pix_fmt_info[PIX_FMT_BGR555LE].pixel_type = FF_PIXEL_PACKED;
+ pix_fmt_info[PIX_FMT_BGR555LE].depth = 5;
+ 
+ pix_fmt_info[PIX_FMT_BGR444BE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_BGR444BE].color_type = FF_COLOR_RGB;
+ pix_fmt_info[PIX_FMT_BGR444BE].pixel_type = FF_PIXEL_PACKED;
+ pix_fmt_info[PIX_FMT_BGR444BE].depth = 4;
+ 
+ pix_fmt_info[PIX_FMT_BGR444LE].nb_channels = 3;
+ pix_fmt_info[PIX_FMT_BGR444LE].color_type = FF_COLOR_RGB;
+ pix_fmt_info[PIX_FMT_BGR444LE].pixel_type = FF_PIXEL_PACKED;
+ pix_fmt_info[PIX_FMT_BGR444LE].depth = 4;
+
  pix_fmt_info[PIX_FMT_RGB8].nb_channels = 1;
  pix_fmt_info[PIX_FMT_RGB8].color_type = FF_COLOR_RGB;
  pix_fmt_info[PIX_FMT_RGB8].pixel_type = FF_PIXEL_PACKED;
  pix_fmt_info[PIX_FMT_RGB8].depth = 8;
- pix_fmt_info[PIX_FMT_RGB8].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_RGB8].y_chroma_shift = 0;
 
- pix_fmt_info[PIX_FMT_RGB4].name = "rgb4";
  pix_fmt_info[PIX_FMT_RGB4].nb_channels = 1;
  pix_fmt_info[PIX_FMT_RGB4].color_type = FF_COLOR_RGB;
  pix_fmt_info[PIX_FMT_RGB4].pixel_type = FF_PIXEL_PACKED;
  pix_fmt_info[PIX_FMT_RGB4].depth = 4;
- pix_fmt_info[PIX_FMT_RGB4].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_RGB4].y_chroma_shift = 0;
 
- pix_fmt_info[PIX_FMT_RGB4_BYTE].name = "rgb4_byte";
  pix_fmt_info[PIX_FMT_RGB4_BYTE].nb_channels = 1;
  pix_fmt_info[PIX_FMT_RGB4_BYTE].color_type = FF_COLOR_RGB;
  pix_fmt_info[PIX_FMT_RGB4_BYTE].pixel_type = FF_PIXEL_PACKED;
  pix_fmt_info[PIX_FMT_RGB4_BYTE].depth = 8;
- pix_fmt_info[PIX_FMT_RGB4_BYTE].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_RGB4_BYTE].y_chroma_shift = 0;
 
- pix_fmt_info[PIX_FMT_BGR8].name = "bgr8";
  pix_fmt_info[PIX_FMT_BGR8].nb_channels = 1;
  pix_fmt_info[PIX_FMT_BGR8].color_type = FF_COLOR_RGB;
  pix_fmt_info[PIX_FMT_BGR8].pixel_type = FF_PIXEL_PACKED;
  pix_fmt_info[PIX_FMT_BGR8].depth = 8;
- pix_fmt_info[PIX_FMT_BGR8].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_BGR8].y_chroma_shift = 0;
 
- pix_fmt_info[PIX_FMT_BGR4].name = "bgr4";
  pix_fmt_info[PIX_FMT_BGR4].nb_channels = 1;
  pix_fmt_info[PIX_FMT_BGR4].color_type = FF_COLOR_RGB;
  pix_fmt_info[PIX_FMT_BGR4].pixel_type = FF_PIXEL_PACKED;
  pix_fmt_info[PIX_FMT_BGR4].depth = 4;
- pix_fmt_info[PIX_FMT_BGR4].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_BGR4].y_chroma_shift = 0;
 
- pix_fmt_info[PIX_FMT_BGR4_BYTE].name = "bgr4_byte";
  pix_fmt_info[PIX_FMT_BGR4_BYTE].nb_channels = 1;
  pix_fmt_info[PIX_FMT_BGR4_BYTE].color_type = FF_COLOR_RGB;
  pix_fmt_info[PIX_FMT_BGR4_BYTE].pixel_type = FF_PIXEL_PACKED;
  pix_fmt_info[PIX_FMT_BGR4_BYTE].depth = 8;
- pix_fmt_info[PIX_FMT_BGR4_BYTE].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_BGR4_BYTE].y_chroma_shift = 0;
 
- pix_fmt_info[PIX_FMT_NV12].name = "nv12";
  pix_fmt_info[PIX_FMT_NV12].nb_channels = 2;
  pix_fmt_info[PIX_FMT_NV12].color_type = FF_COLOR_YUV;
  pix_fmt_info[PIX_FMT_NV12].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_NV12].depth = 8;
- pix_fmt_info[PIX_FMT_NV12].x_chroma_shift = 1;
- pix_fmt_info[PIX_FMT_NV12].y_chroma_shift = 1;
 
- pix_fmt_info[PIX_FMT_NV21].name = "nv12";
  pix_fmt_info[PIX_FMT_NV21].nb_channels = 2;
  pix_fmt_info[PIX_FMT_NV21].color_type = FF_COLOR_YUV;
  pix_fmt_info[PIX_FMT_NV21].pixel_type = FF_PIXEL_PLANAR;
  pix_fmt_info[PIX_FMT_NV21].depth = 8;
- pix_fmt_info[PIX_FMT_NV21].x_chroma_shift = 1;
- pix_fmt_info[PIX_FMT_NV21].y_chroma_shift = 1;
-
- pix_fmt_info[PIX_FMT_BGR32_1].name = "bgr32_1";
- pix_fmt_info[PIX_FMT_BGR32_1].nb_channels = 4;
- pix_fmt_info[PIX_FMT_BGR32_1].is_alpha = 1;
- pix_fmt_info[PIX_FMT_BGR32_1].color_type = FF_COLOR_RGB;
- pix_fmt_info[PIX_FMT_BGR32_1].pixel_type = FF_PIXEL_PACKED;
- pix_fmt_info[PIX_FMT_BGR32_1].depth = 8;
- pix_fmt_info[PIX_FMT_BGR32_1].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_BGR32_1].y_chroma_shift = 0;
-
- pix_fmt_info[PIX_FMT_RGB32_1].name = "rgb32_1";
- pix_fmt_info[PIX_FMT_RGB32_1].nb_channels = 4;
- pix_fmt_info[PIX_FMT_RGB32_1].is_alpha = 1;
- pix_fmt_info[PIX_FMT_RGB32_1].color_type = FF_COLOR_RGB;
- pix_fmt_info[PIX_FMT_RGB32_1].pixel_type = FF_PIXEL_PACKED;
- pix_fmt_info[PIX_FMT_RGB32_1].depth = 8;
- pix_fmt_info[PIX_FMT_RGB32_1].x_chroma_shift = 0;
- pix_fmt_info[PIX_FMT_RGB32_1].y_chroma_shift = 0;
+
+ pix_fmt_info[PIX_FMT_BGRA].nb_channels = 4;
+ pix_fmt_info[PIX_FMT_BGRA].is_alpha = 1;
+ pix_fmt_info[PIX_FMT_BGRA].color_type = FF_COLOR_RGB;
+ pix_fmt_info[PIX_FMT_BGRA].pixel_type = FF_PIXEL_PACKED;
+ pix_fmt_info[PIX_FMT_BGRA].depth = 8;
+
+ pix_fmt_info[PIX_FMT_RGBA].nb_channels = 4;
+ pix_fmt_info[PIX_FMT_RGBA].is_alpha = 1;
+ pix_fmt_info[PIX_FMT_RGBA].color_type = FF_COLOR_RGB;
+ pix_fmt_info[PIX_FMT_RGBA].pixel_type = FF_PIXEL_PACKED;
+ pix_fmt_info[PIX_FMT_RGBA].depth = 8;
 }
 #endif
 
-void avcodec_get_chroma_sub_sample(int pix_fmt, int *h_shift, int *v_shift)
+void avcodec_get_chroma_sub_sample(enum PixelFormat pix_fmt, int *h_shift, int *v_shift)
+{
+    *h_shift = av_pix_fmt_descriptors[pix_fmt].log2_chroma_w;
+    *v_shift = av_pix_fmt_descriptors[pix_fmt].log2_chroma_h;
+}
+
+const char *avcodec_get_pix_fmt_name(enum PixelFormat pix_fmt)
 {
-    *h_shift = pix_fmt_info[pix_fmt].x_chroma_shift;
-    *v_shift = pix_fmt_info[pix_fmt].y_chroma_shift;
+    if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB)
+        return NULL;
+    else
+        return av_pix_fmt_descriptors[pix_fmt].name;
 }
 
 int ff_set_systematic_pal(uint32_t pal[256], enum PixelFormat pix_fmt){
@@ -764,89 +745,36 @@ int ff_set_systematic_pal(uint32_t pal[256], enum PixelFormat pix_fmt){
 
 int ff_fill_linesize(AVPicture *picture, enum PixelFormat pix_fmt, int width)
 {
-    int w2;
-    const PixFmtInfo *pinfo;
+    int i;
+    const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
+    int max_plane_step     [4];
+    int max_plane_step_comp[4];
 
     memset(picture->linesize, 0, sizeof(picture->linesize));
 
-    pinfo = &pix_fmt_info[pix_fmt];
-    switch(pix_fmt) {
-    case PIX_FMT_YUV420P:
-    case PIX_FMT_YUV422P:
-    case PIX_FMT_YUV444P:
-    case PIX_FMT_YUV410P:
-    case PIX_FMT_YUV411P:
-    case PIX_FMT_YUV440P:
-    case PIX_FMT_YUVJ420P:
-    case PIX_FMT_YUVJ422P:
-    case PIX_FMT_YUVJ444P:
-    case PIX_FMT_YUVJ440P:
-        w2 = (width + (1 << pinfo->x_chroma_shift) - 1) >> pinfo->x_chroma_shift;
-        picture->linesize[0] = width;
-        picture->linesize[1] = w2;
-        picture->linesize[2] = w2;
-        break;
-    case PIX_FMT_YUVA420P:
-        w2 = (width + (1 << pinfo->x_chroma_shift) - 1) >> pinfo->x_chroma_shift;
-        picture->linesize[0] = width;
-        picture->linesize[1] = w2;
-        picture->linesize[2] = w2;
-        picture->linesize[3] = width;
-        break;
-    case PIX_FMT_NV12:
-    case PIX_FMT_NV21:
-        w2 = (width + (1 << pinfo->x_chroma_shift) - 1) >> pinfo->x_chroma_shift;
-        picture->linesize[0] = width;
-        picture->linesize[1] = 2 * w2;
-        break;
-    case PIX_FMT_RGB24:
-    case PIX_FMT_BGR24:
-        picture->linesize[0] = width * 3;
-        break;
-    case PIX_FMT_RGB32:
-    case PIX_FMT_BGR32:
-    case PIX_FMT_RGB32_1:
-    case PIX_FMT_BGR32_1:
-        picture->linesize[0] = width * 4;
-        break;
-    case PIX_FMT_RGB48BE:
-    case PIX_FMT_RGB48LE:
-        picture->linesize[0] = width * 6;
-        break;
-    case PIX_FMT_GRAY16BE:
-    case PIX_FMT_GRAY16LE:
-    case PIX_FMT_BGR555:
-    case PIX_FMT_BGR565:
-    case PIX_FMT_RGB555:
-    case PIX_FMT_RGB565:
-    case PIX_FMT_YUYV422:
-        picture->linesize[0] = width * 2;
-        break;
-    case PIX_FMT_UYVY422:
-        picture->linesize[0] = width * 2;
-        break;
-    case PIX_FMT_UYYVYY411:
-        picture->linesize[0] = width + width/2;
-        break;
-    case PIX_FMT_RGB4:
-    case PIX_FMT_BGR4:
-        picture->linesize[0] = width / 2;
-        break;
-    case PIX_FMT_MONOWHITE:
-    case PIX_FMT_MONOBLACK:
-        picture->linesize[0] = (width + 7) >> 3;
-        break;
-    case PIX_FMT_PAL8:
-    case PIX_FMT_RGB8:
-    case PIX_FMT_BGR8:
-    case PIX_FMT_RGB4_BYTE:
-    case PIX_FMT_BGR4_BYTE:
-    case PIX_FMT_GRAY8:
-        picture->linesize[0] = width;
-        break;
-    default:
+    if (desc->flags & PIX_FMT_HWACCEL)
         return -1;
+
+    if (desc->flags & PIX_FMT_BITSTREAM) {
+        picture->linesize[0] = (width * (desc->comp[0].step_minus1+1) + 7) >> 3;
+        return 0;
     }
+
+    memset(max_plane_step     , 0, sizeof(max_plane_step     ));
+    memset(max_plane_step_comp, 0, sizeof(max_plane_step_comp));
+    for (i = 0; i < 4; i++) {
+        const AVComponentDescriptor *comp = &(desc->comp[i]);
+        if ((comp->step_minus1+1) > max_plane_step[comp->plane]) {
+            max_plane_step     [comp->plane] = comp->step_minus1+1;
+            max_plane_step_comp[comp->plane] = i;
+        }
+    }
+
+    for (i = 0; i < 4; i++) {
+        int s = (max_plane_step_comp[i] == 1 || max_plane_step_comp[i] == 2) ? desc->log2_chroma_w : 0;
+        picture->linesize[i] = max_plane_step[i] * (((width + (1 << s) - 1)) >> s);
+    }
+
     return 0;
 }
 
@@ -854,9 +782,8 @@ int ff_fill_pointer(AVPicture *picture, uint8_t *ptr, enum PixelFormat pix_fmt,
                     int height)
 {
     int size, h2, size2;
-    const PixFmtInfo *pinfo;
+    const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
 
-    pinfo = &pix_fmt_info[pix_fmt];
     size = picture->linesize[0] * height;
     switch(pix_fmt) {
     case PIX_FMT_YUV420P:
@@ -869,7 +796,13 @@ int ff_fill_pointer(AVPicture *picture, uint8_t *ptr, enum PixelFormat pix_fmt,
     case PIX_FMT_YUVJ422P:
     case PIX_FMT_YUVJ444P:
     case PIX_FMT_YUVJ440P:
-        h2 = (height + (1 << pinfo->y_chroma_shift) - 1) >> pinfo->y_chroma_shift;
+    case PIX_FMT_YUV420P16LE:
+    case PIX_FMT_YUV422P16LE:
+    case PIX_FMT_YUV444P16LE:
+    case PIX_FMT_YUV420P16BE:
+    case PIX_FMT_YUV422P16BE:
+    case PIX_FMT_YUV444P16BE:
+        h2 = (height + (1 << desc->log2_chroma_h) - 1) >> desc->log2_chroma_h;
         size2 = picture->linesize[1] * h2;
         picture->data[0] = ptr;
         picture->data[1] = picture->data[0] + size;
@@ -877,7 +810,7 @@ int ff_fill_pointer(AVPicture *picture, uint8_t *ptr, enum PixelFormat pix_fmt,
         picture->data[3] = NULL;
         return size + 2 * size2;
     case PIX_FMT_YUVA420P:
-        h2 = (height + (1 << pinfo->y_chroma_shift) - 1) >> pinfo->y_chroma_shift;
+        h2 = (height + (1 << desc->log2_chroma_h) - 1) >> desc->log2_chroma_h;
         size2 = picture->linesize[1] * h2;
         picture->data[0] = ptr;
         picture->data[1] = picture->data[0] + size;
@@ -886,7 +819,7 @@ int ff_fill_pointer(AVPicture *picture, uint8_t *ptr, enum PixelFormat pix_fmt,
         return 2 * size + 2 * size2;
     case PIX_FMT_NV12:
     case PIX_FMT_NV21:
-        h2 = (height + (1 << pinfo->y_chroma_shift) - 1) >> pinfo->y_chroma_shift;
+        h2 = (height + (1 << desc->log2_chroma_h) - 1) >> desc->log2_chroma_h;
         size2 = picture->linesize[1] * h2;
         picture->data[0] = ptr;
         picture->data[1] = picture->data[0] + size;
@@ -895,18 +828,26 @@ int ff_fill_pointer(AVPicture *picture, uint8_t *ptr, enum PixelFormat pix_fmt,
         return size + size2;
     case PIX_FMT_RGB24:
     case PIX_FMT_BGR24:
-    case PIX_FMT_RGB32:
-    case PIX_FMT_BGR32:
-    case PIX_FMT_RGB32_1:
-    case PIX_FMT_BGR32_1:
+    case PIX_FMT_ARGB:
+    case PIX_FMT_ABGR:
+    case PIX_FMT_RGBA:
+    case PIX_FMT_BGRA:
     case PIX_FMT_RGB48BE:
     case PIX_FMT_RGB48LE:
     case PIX_FMT_GRAY16BE:
     case PIX_FMT_GRAY16LE:
-    case PIX_FMT_BGR555:
-    case PIX_FMT_BGR565:
-    case PIX_FMT_RGB555:
-    case PIX_FMT_RGB565:
+    case PIX_FMT_BGR444BE:
+    case PIX_FMT_BGR444LE:
+    case PIX_FMT_BGR555BE:
+    case PIX_FMT_BGR555LE:
+    case PIX_FMT_BGR565BE:
+    case PIX_FMT_BGR565LE:
+    case PIX_FMT_RGB444BE:
+    case PIX_FMT_RGB444LE:
+    case PIX_FMT_RGB555BE:
+    case PIX_FMT_RGB555LE:
+    case PIX_FMT_RGB565BE:
+    case PIX_FMT_RGB565LE:
     case PIX_FMT_YUYV422:
     case PIX_FMT_UYVY422:
     case PIX_FMT_UYYVYY411:
@@ -914,6 +855,7 @@ int ff_fill_pointer(AVPicture *picture, uint8_t *ptr, enum PixelFormat pix_fmt,
     case PIX_FMT_BGR4:
     case PIX_FMT_MONOWHITE:
     case PIX_FMT_MONOBLACK:
+    case PIX_FMT_Y400A:
         picture->data[0] = ptr;
         picture->data[1] = NULL;
         picture->data[2] = NULL;
@@ -957,6 +899,7 @@ int ff_get_plane_bytewidth(enum PixelFormat pix_fmt, int width, int plane)
 {
     int bits;
     const PixFmtInfo *pf = &pix_fmt_info[pix_fmt];
+    const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
 
     pf = &pix_fmt_info[pix_fmt];
     switch(pf->pixel_type) {
@@ -964,10 +907,18 @@ int ff_get_plane_bytewidth(enum PixelFormat pix_fmt, int width, int plane)
         switch(pix_fmt) {
         case PIX_FMT_YUYV422:
         case PIX_FMT_UYVY422:
-        case PIX_FMT_RGB565:
-        case PIX_FMT_RGB555:
-        case PIX_FMT_BGR565:
-        case PIX_FMT_BGR555:
+        case PIX_FMT_RGB565BE:
+        case PIX_FMT_RGB565LE:
+        case PIX_FMT_RGB555BE:
+        case PIX_FMT_RGB555LE:
+        case PIX_FMT_RGB444BE:
+        case PIX_FMT_RGB444LE:
+        case PIX_FMT_BGR565BE:
+        case PIX_FMT_BGR565LE:
+        case PIX_FMT_BGR555BE:
+        case PIX_FMT_BGR555LE:
+        case PIX_FMT_BGR444BE:
+        case PIX_FMT_BGR444LE:
             bits = 16;
             break;
         case PIX_FMT_UYYVYY411:
@@ -981,7 +932,7 @@ int ff_get_plane_bytewidth(enum PixelFormat pix_fmt, int width, int plane)
         break;
     case FF_PIXEL_PLANAR:
             if (plane == 1 || plane == 2)
-                width= -((-width)>>pf->x_chroma_shift);
+                width= -((-width)>>desc->log2_chroma_w);
 
             return (width * pf->depth + 7) >> 3;
         break;
@@ -999,6 +950,7 @@ void av_picture_copy(AVPicture *dst, const AVPicture *src,
 {
     int i;
     const PixFmtInfo *pf = &pix_fmt_info[pix_fmt];
+    const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
 
     switch(pf->pixel_type) {
     case FF_PIXEL_PACKED:
@@ -1008,7 +960,7 @@ void av_picture_copy(AVPicture *dst, const AVPicture *src,
             int bwidth = ff_get_plane_bytewidth(pix_fmt, width, i);
             h = height;
             if (i == 1 || i == 2) {
-                h= -((-height)>>pf->y_chroma_shift);
+                h= -((-height)>>desc->log2_chroma_h);
             }
             ff_img_copy_plane(dst->data[i], dst->linesize[i],
                            src->data[i], src->linesize[i],
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ituh263dec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ituh263dec.c
index fc997f080..5337aab6b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ituh263dec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ituh263dec.c
@@ -563,7 +563,7 @@ not_coded:
 
 static int h263_skip_b_part(MpegEncContext *s, int cbp)
 {
-    DECLARE_ALIGNED(16, DCTELEM, dblock)[64];
+    LOCAL_ALIGNED_16(DCTELEM, dblock, [64]);
     int i, mbi;
 
     /* we have to set s->mb_intra to zero to decode B-part of PB-frame correctly
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/lpc.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/lpc.c
new file mode 100644
index 000000000..844db037c
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/lpc.c
@@ -0,0 +1,26 @@
+/**
+ * LPC utility code
+ * Copyright (c) 2006  Justin Ruggles <justin.ruggles@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/lls.h"
+#include "dsputil.h"
+
+#define LPC_USE_DOUBLE
+#include "lpc.h"
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/lpc.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/lpc.h
new file mode 100644
index 000000000..c8053d0d7
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/lpc.h
@@ -0,0 +1,81 @@
+/**
+ * LPC utility code
+ * Copyright (c) 2006  Justin Ruggles <justin.ruggles@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_LPC_H
+#define AVCODEC_LPC_H
+
+#include <stdint.h>
+
+#ifdef LPC_USE_DOUBLE
+#define LPC_TYPE double
+#else
+#define LPC_TYPE float
+#endif
+
+/**
+ * Levinson-Durbin recursion.
+ * Produces LPC coefficients from autocorrelation data.
+ */
+static inline int compute_lpc_coefs(const LPC_TYPE *autoc, int max_order,
+                                    LPC_TYPE *lpc, int lpc_stride, int fail,
+                                    int normalize)
+{
+    int i, j;
+    LPC_TYPE err;
+    LPC_TYPE *lpc_last = lpc;
+
+    if (normalize)
+        err = *autoc++;
+
+    if (fail && (autoc[max_order - 1] == 0 || err <= 0))
+        return -1;
+
+    for(i=0; i<max_order; i++) {
+        LPC_TYPE r = -autoc[i];
+
+        if (normalize) {
+            for(j=0; j<i; j++)
+                r -= lpc_last[j] * autoc[i-j-1];
+
+            r /= err;
+            err *= 1.0 - (r * r);
+        }
+
+        lpc[i] = r;
+
+        for(j=0; j < (i+1)>>1; j++) {
+            LPC_TYPE f = lpc_last[    j];
+            LPC_TYPE b = lpc_last[i-1-j];
+            lpc[    j] = f + r * b;
+            lpc[i-1-j] = b + r * f;
+        }
+
+        if (fail && err < 0)
+            return -1;
+
+        lpc_last = lpc;
+        lpc += lpc_stride;
+    }
+
+    return 0;
+}
+
+#endif /* AVCODEC_LPC_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mathops.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mathops.h
index acccd645b..f03e35ab1 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mathops.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mathops.h
@@ -116,6 +116,13 @@ static inline av_const int sign_extend(int val, unsigned bits)
 }
 #endif
 
+#ifndef zero_extend
+static inline av_const unsigned zero_extend(unsigned val, unsigned bits)
+{
+    return (val << (INT_BIT - bits)) >> (INT_BIT - bits);
+}
+#endif
+
 #ifndef COPY3_IF_LT
 #define COPY3_IF_LT(x, y, a, b, c, d)\
 if ((y) < (x)) {\
@@ -125,5 +132,13 @@ if ((y) < (x)) {\
 }
 #endif
 
+#ifndef NEG_SSR32
+#   define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s)))
+#endif
+
+#ifndef NEG_USR32
+#   define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s)))
+#endif
+
 #endif /* AVCODEC_MATHOPS_H */
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mdct.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mdct.c
index 3bf953134..557a4c12e 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mdct.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mdct.c
@@ -18,7 +18,12 @@
  * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
-#include "dsputil.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include "libavutil/common.h"
+#include "libavutil/mathematics.h"
+#include "fft.h"
 
 /**
  * @file libavcodec/mdct.c
@@ -56,7 +61,7 @@ av_cold void ff_kbd_window_init(float *window, float alpha, int n)
        window[i] = sqrt(local_window[i] / sum);
 }
 
-//#include "mdct_tablegen.h"
+#include "mdct_tablegen.h"
 
 /**
  * init MDCT or IMDCT computation.
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mdct_tablegen.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mdct_tablegen.c
deleted file mode 100644
index f04502f18..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mdct_tablegen.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Generate a header file for hardcoded MDCT tables
- *
- * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdlib.h>
-#define CONFIG_HARDCODED_TABLES 0
-#define av_cold
-#define SINETABLE_CONST
-#define SINETABLE(size) \
-    float ff_sine_##size[size]
-#define FF_ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0]))
-#ifndef M_PI
-#define M_PI 3.14159265358979323846
-#endif
-#include "mdct_tablegen.h"
-#include "tableprint.h"
-
-void tableinit(void)
-{
-    int i;
-    for (i = 5; i <= 12; i++)
-        ff_init_ff_sine_windows(i);
-}
-
-#define SINE_TABLE_DEF(size) \
-    { \
-        "SINETABLE("#size")", \
-        write_float_array, \
-        ff_sine_##size, \
-        size \
-    },
-
-const struct tabledef tables[] = {
-    SINE_TABLE_DEF(  32)
-    SINE_TABLE_DEF(  64)
-    SINE_TABLE_DEF( 128)
-    SINE_TABLE_DEF( 256)
-    SINE_TABLE_DEF( 512)
-    SINE_TABLE_DEF(1024)
-    SINE_TABLE_DEF(2048)
-    SINE_TABLE_DEF(4096)
-    { NULL }
-};
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mdct_tablegen.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mdct_tablegen.h
index af652e745..1722c3b4d 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mdct_tablegen.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mdct_tablegen.h
@@ -24,6 +24,7 @@
 // do not use libavutil/mathematics.h since this is compiled both
 // for the host and the target and config.h is only valid for the target
 #include <math.h>
+#include "../libavutil/attributes.h"
 
 #if !CONFIG_HARDCODED_TABLES
 SINETABLE(  32);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.h
index b786d8e77..fef4e5ccb 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.h
@@ -84,7 +84,7 @@ typedef struct MJpegDecodeContext {
     int got_picture;                                ///< we found a SOF and picture is valid, too.
     int linesize[MAX_COMPONENTS];                   ///< linesize << interlaced
     int8_t *qscale_table;
-    DECLARE_ALIGNED_16(DCTELEM, block)[64];
+    DECLARE_ALIGNED(16, DCTELEM, block)[64];
     DCTELEM (*blocks[MAX_COMPONENTS])[64]; ///< intermediate sums (progressive mode)
     uint8_t *last_nnz[MAX_COMPONENTS];
     uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mlp_parser.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mlp_parser.c
index 34c1850fe..1e7ea2616 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mlp_parser.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mlp_parser.c
@@ -297,5 +297,5 @@ AVCodecParser mlp_parser = {
     sizeof(MLPParseContext),
     mlp_init,
     mlp_parse,
-    NULL,
+    ff_parse_close,
 };
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mlpdsp.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mlpdsp.c
index 129a01187..386f463d1 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mlpdsp.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mlpdsp.c
@@ -55,8 +55,6 @@ static void ff_mlp_filter_channel(int32_t *state, const int32_t *coeff,
     }
 }
 
-void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx);
-
 void ff_mlp_init(DSPContext* c, AVCodecContext *avctx)
 {
     c->mlp_filter_channel = ff_mlp_filter_channel;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c
index 9837ffcb4..9315d8efe 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c
@@ -120,7 +120,7 @@ void ff_mpeg12_common_init(MpegEncContext *s)
 {
 
     s->y_dc_scale_table=
-    s->c_dc_scale_table= mpeg2_dc_scale_table[s->intra_dc_precision];
+    s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
 
 }
 
@@ -698,7 +698,9 @@ static inline int mpeg1_decode_block_inter(MpegEncContext *s,
             if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
                 goto end;
         }
-
+#if MIN_CACHE_BITS < 19
+        UPDATE_CACHE(re, &s->gb);
+#endif
         /* now quantify & encode AC coefficients */
         for(;;) {
             GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
@@ -738,9 +740,14 @@ static inline int mpeg1_decode_block_inter(MpegEncContext *s,
             }
 
             block[j] = level;
+#if MIN_CACHE_BITS < 19
+            UPDATE_CACHE(re, &s->gb);
+#endif
             if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
                 break;
+#if MIN_CACHE_BITS >= 19
             UPDATE_CACHE(re, &s->gb);
+#endif
         }
 end:
         LAST_SKIP_BITS(re, &s->gb, 2);
@@ -773,6 +780,9 @@ static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s, DCTELEM *bloc
             if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
                 goto end;
         }
+#if MIN_CACHE_BITS < 19
+        UPDATE_CACHE(re, &s->gb);
+#endif
 
         /* now quantify & encode AC coefficients */
         for(;;) {
@@ -809,9 +819,14 @@ static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s, DCTELEM *bloc
             }
 
             block[j] = level;
+#if MIN_CACHE_BITS < 19
+            UPDATE_CACHE(re, &s->gb);
+#endif
             if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
                 break;
+#if MIN_CACHE_BITS >= 19
             UPDATE_CACHE(re, &s->gb);
+#endif
         }
 end:
         LAST_SKIP_BITS(re, &s->gb, 2);
@@ -856,6 +871,9 @@ static inline int mpeg2_decode_block_non_intra(MpegEncContext *s,
             if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
                 goto end;
         }
+#if MIN_CACHE_BITS < 19
+        UPDATE_CACHE(re, &s->gb);
+#endif
 
         /* now quantify & encode AC coefficients */
         for(;;) {
@@ -889,9 +907,14 @@ static inline int mpeg2_decode_block_non_intra(MpegEncContext *s,
 
             mismatch ^= level;
             block[j] = level;
+#if MIN_CACHE_BITS < 19
+            UPDATE_CACHE(re, &s->gb);
+#endif
             if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
                 break;
+#if MIN_CACHE_BITS >= 19
             UPDATE_CACHE(re, &s->gb);
+#endif
         }
 end:
         LAST_SKIP_BITS(re, &s->gb, 2);
@@ -926,6 +949,9 @@ static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s,
         if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
             goto end;
     }
+#if MIN_CACHE_BITS < 19
+    UPDATE_CACHE(re, &s->gb);
+#endif
 
     /* now quantify & encode AC coefficients */
     for(;;) {
@@ -954,9 +980,14 @@ static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s,
         }
 
         block[j] = level;
+#if MIN_CACHE_BITS < 19
+        UPDATE_CACHE(re, &s->gb);
+#endif
         if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
             break;
+#if MIN_CACHE_BITS >=19
         UPDATE_CACHE(re, &s->gb);
+#endif
     }
 end:
     LAST_SKIP_BITS(re, &s->gb, 2);
@@ -1735,6 +1766,7 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y,
         if(s->current_picture.motion_val[0] && !s->encoding){ //note motion_val is normally NULL unless we want to extract the MVs
             const int wrap = s->b8_stride;
             int xy = s->mb_x*2 + s->mb_y*2*wrap;
+            int b8_xy= 4*(s->mb_x + s->mb_y*s->mb_stride);
             int motion_x, motion_y, dir, i;
 
             for(i=0; i<2; i++){
@@ -1753,11 +1785,12 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y,
                     s->current_picture.motion_val[dir][xy    ][1] = motion_y;
                     s->current_picture.motion_val[dir][xy + 1][0] = motion_x;
                     s->current_picture.motion_val[dir][xy + 1][1] = motion_y;
-                    s->current_picture.ref_index [dir][xy    ]=
-                    s->current_picture.ref_index [dir][xy + 1]= s->field_select[dir][i];
+                    s->current_picture.ref_index [dir][b8_xy    ]=
+                    s->current_picture.ref_index [dir][b8_xy + 1]= s->field_select[dir][i];
                     assert(s->field_select[dir][i]==0 || s->field_select[dir][i]==1);
                 }
                 xy += wrap;
+                b8_xy +=2;
             }
         }
 
@@ -2186,7 +2219,7 @@ int ff_mpeg1_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size,
                     return i-3;
                 }
             }
-            if(s && state == PICTURE_START_CODE){
+            if(pc->frame_start_found == 0 && s && state == PICTURE_START_CODE){
                 ff_fetch_timestamp(s, i-3, 1);
             }
         }
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12decdata.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12decdata.h
index b3da369b1..9890afd16 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12decdata.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12decdata.h
@@ -21,7 +21,7 @@
  */
 
 /**
- * @file mpeg12decdata.h
+ * @file libavcodec/mpeg12decdata.h
  * MPEG1/2 decoder tables.
  */
 
@@ -90,35 +90,4 @@ static const uint8_t non_linear_qscale[32] = {
     56,64,72,80,88,96,104,112,
 };
 
-static const uint8_t mpeg2_dc_scale_table1[128]={
-//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-};
-
-static const uint8_t mpeg2_dc_scale_table2[128]={
-//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-};
-
-static const uint8_t mpeg2_dc_scale_table3[128]={
-//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-};
-
-static const uint8_t * const mpeg2_dc_scale_table[4]={
-    ff_mpeg1_dc_scale_table,
-    mpeg2_dc_scale_table1,
-    mpeg2_dc_scale_table2,
-    mpeg2_dc_scale_table3,
-};
-
 #endif /* AVCODEC_MPEG12DECDATA_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg4video.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg4video.c
index a647960a5..dd4dd8ad9 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg4video.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg4video.c
@@ -137,7 +137,7 @@ int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){
     } else if(IS_INTERLACED(colocated_mb_type)){
         s->mv_type = MV_TYPE_FIELD;
         for(i=0; i<2; i++){
-            int field_select= s->next_picture.ref_index[0][s->block_index[2*i]];
+            int field_select= s->next_picture.ref_index[0][4*mb_index + 2*i];
             s->field_select[0][i]= field_select;
             s->field_select[1][i]= i;
             if(s->top_field_first){
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg4video.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg4video.h
index bf8762657..aab32364e 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg4video.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg4video.h
@@ -23,6 +23,11 @@
 #ifndef AVCODEC_MPEG4VIDEO_H
 #define AVCODEC_MPEG4VIDEO_H
 
+#include <stdint.h>
+#include "get_bits.h"
+#include "mpegvideo.h"
+#include "rl.h"
+
 // shapes
 #define RECT_SHAPE       0
 #define BIN_SHAPE        1
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg4video_parser.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg4video_parser.c
index cde2499ab..6e39b598d 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg4video_parser.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg4video_parser.c
@@ -22,6 +22,7 @@
 
 #include "parser.h"
 #include "mpegvideo.h"
+#include "mpeg4video.h"
 #include "mpeg4video_parser.h"
 
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg4videodec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg4videodec.c
index fc9006413..6d2c4d59d 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg4videodec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg4videodec.c
@@ -435,7 +435,7 @@ int mpeg4_decode_video_packet_header(MpegEncContext *s)
 /**
  * gets the average motion vector for a GMC MB.
  * @param n either 0 for the x component or 1 for y
- * @returns the average MV for a GMC MB
+ * @return the average MV for a GMC MB
  */
 static inline int get_amv(MpegEncContext *s, int n){
     int x, y, mb_v, sum, dx, dy, shift;
@@ -2085,7 +2085,7 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
      }
      /* detect buggy encoders which don't set the low_delay flag (divx4/xvid/opendivx)*/
      // note we cannot detect divx5 without b-frames easily (although it's buggy too)
-     if(s->vo_type==0 && s->vol_control_parameters==0 && s->divx_version==0 && s->picture_number==0){
+     if(s->vo_type==0 && s->vol_control_parameters==0 && s->divx_version==-1 && s->picture_number==0){
          av_log(s->avctx, AV_LOG_ERROR, "looks like this file was encoded with (divx4/(old)xvid/opendivx) -> forcing low_delay flag\n");
          s->low_delay=1;
      }
@@ -2124,7 +2124,7 @@ int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb)
     startcode = 0xff;
     for(;;) {
         if(get_bits_count(gb) >= gb->size_in_bits){
-            if(gb->size_in_bits==8 && (s->divx_version || s->xvid_build)){
+            if(gb->size_in_bits==8 && (s->divx_version>=0 || s->xvid_build>=0)){
                 av_log(s->avctx, AV_LOG_ERROR, "frame skip %d\n", gb->size_in_bits);
                 return FRAME_SKIPPED; //divx bug
             }else
@@ -2200,6 +2200,11 @@ static av_cold int decode_init(AVCodecContext *avctx)
     int ret;
     static int done = 0;
 
+    s->divx_version=
+    s->divx_build=
+    s->xvid_build=
+    s->lavc_build= -1;
+
     if((ret=ff_h263_decode_init(avctx)) < 0)
         return ret;
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c
index 72c0892d8..67085eb55 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c
@@ -27,6 +27,7 @@
  * The simplest mpeg encoder (well, it was the simplest!).
  */
 
+#include "libavutil/intmath.h"
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
@@ -74,6 +75,48 @@ const uint8_t ff_mpeg1_dc_scale_table[128]={
     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
 };
 
+static const uint8_t mpeg2_dc_scale_table1[128]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+};
+
+static const uint8_t mpeg2_dc_scale_table2[128]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+};
+
+static const uint8_t mpeg2_dc_scale_table3[128]={
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+};
+
+const uint8_t * const ff_mpeg2_dc_scale_table[4]={
+    ff_mpeg1_dc_scale_table,
+    mpeg2_dc_scale_table1,
+    mpeg2_dc_scale_table2,
+    mpeg2_dc_scale_table3,
+};
+
+const enum PixelFormat ff_pixfmt_list_420[] = {
+    PIX_FMT_YUV420P,
+    PIX_FMT_NONE
+};
+
+const enum PixelFormat ff_hwaccel_pixfmt_list_420[] = {
+    PIX_FMT_DXVA2_VLD,
+    PIX_FMT_VAAPI_VLD,
+    PIX_FMT_YUV420P,
+    PIX_FMT_NONE
+};
 
 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
     int i;
@@ -220,14 +263,14 @@ int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared){
             for(i=0; i<2; i++){
                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t), fail)
                 pic->motion_val[i]= pic->motion_val_base[i]+4;
-                FF_ALLOCZ_OR_GOTO(s->avctx, pic->ref_index[i], b8_array_size * sizeof(uint8_t), fail)
+                FF_ALLOCZ_OR_GOTO(s->avctx, pic->ref_index[i], 4*mb_array_size * sizeof(uint8_t), fail)
             }
             pic->motion_subsample_log2= 2;
         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
             for(i=0; i<2; i++){
                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t), fail)
                 pic->motion_val[i]= pic->motion_val_base[i]+4;
-                FF_ALLOCZ_OR_GOTO(s->avctx, pic->ref_index[i], b8_array_size * sizeof(uint8_t), fail)
+                FF_ALLOCZ_OR_GOTO(s->avctx, pic->ref_index[i], 4*mb_array_size * sizeof(uint8_t), fail)
             }
             pic->motion_subsample_log2= 3;
         }
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.h
index d011a943e..0d347f85b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.h
@@ -696,6 +696,7 @@ int ff_find_unused_picture(MpegEncContext *s, int shared);
 void ff_denoise_dct(MpegEncContext *s, DCTELEM *block);
 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src);
 const uint8_t *ff_find_start_code(const uint8_t *p, const uint8_t *end, uint32_t *state);
+void ff_set_qscale(MpegEncContext * s, int qscale);
 
 void ff_er_frame_start(MpegEncContext *s);
 void ff_er_frame_end(MpegEncContext *s);
@@ -714,6 +715,9 @@ void ff_copy_picture(Picture *dst, Picture *src);
  */
 int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared);
 
+extern const enum PixelFormat ff_pixfmt_list_420[];
+extern const enum PixelFormat ff_hwaccel_pixfmt_list_420[];
+
 static inline void ff_update_block_index(MpegEncContext *s){
     const int block_size= 8>>s->avctx->lowres;
 
@@ -764,6 +768,7 @@ int ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
 
 /* mpeg12.c */
 extern const uint8_t ff_mpeg1_dc_scale_table[128];
+extern const uint8_t * const ff_mpeg2_dc_scale_table[4];
 
 void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number);
 void mpeg1_encode_mb(MpegEncContext *s,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c
index 32c0a95a6..de48a6d23 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c
@@ -1732,9 +1732,7 @@ int ff_msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
                             if(s->qscale<8){
                                 ll= SHOW_UBITS(re, &s->gb, 3); SKIP_BITS(re, &s->gb, 3);
                                 if(ll==0){
-                                    if(SHOW_UBITS(re, &s->gb, 1)) av_log(s->avctx, AV_LOG_ERROR, "cool a new vlc code ,contact the ffmpeg developers and upload the file\n");
-                                    SKIP_BITS(re, &s->gb, 1);
-                                    ll=8;
+                                    ll= 8+SHOW_UBITS(re, &s->gb, 1); SKIP_BITS(re, &s->gb, 1);
                                 }
                             }else{
                                 ll=2;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/nellymoserdec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/nellymoserdec.c
index 942f8de30..d11d91ca4 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/nellymoserdec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/nellymoserdec.c
@@ -36,6 +36,7 @@
 #include "libavutil/random_seed.h"
 #include "avcodec.h"
 #include "dsputil.h"
+#include "fft.h"
 
 #define ALT_BITSTREAM_READER_LE
 #include "get_bits.h"
@@ -47,7 +48,7 @@
 
 typedef struct NellyMoserDecodeContext {
     AVCodecContext* avctx;
-    DECLARE_ALIGNED_16(float,float_buf)[NELLY_SAMPLES];
+    DECLARE_ALIGNED(16, float,float_buf)[NELLY_SAMPLES];
     float           state[128];
     AVLFG           random_state;
     GetBitContext   gb;
@@ -55,7 +56,7 @@ typedef struct NellyMoserDecodeContext {
     float           scale_bias;
     DSPContext      dsp;
     FFTContext      imdct_ctx;
-    DECLARE_ALIGNED_16(float,imdct_out)[NELLY_BUF_LEN * 2];
+    DECLARE_ALIGNED(16, float,imdct_out)[NELLY_BUF_LEN * 2];
 } NellyMoserDecodeContext;
 
 static void overlap_and_window(NellyMoserDecodeContext *s, float *state, float *audio, float *a_in)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/options.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/options.c
index ae6ee4785..2d547e750 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/options.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/options.c
@@ -55,6 +55,7 @@ void avcodec_get_context_defaults(AVCodecContext *s){
 
     s->palctrl = NULL;
     s->reget_buffer= avcodec_default_reget_buffer;
+    s->reordered_opaque= AV_NOPTS_VALUE;
     
     s->bit_rate= 800*1000;
     s->bit_rate_tolerance= s->bit_rate*10;
@@ -102,6 +103,7 @@ void avcodec_get_context_defaults(AVCodecContext *s){
     s->inter_quant_bias= FF_DEFAULT_QUANT_BIAS;
     s->rc_max_available_vbv_use = 1.0/3;
     s->rc_min_vbv_overflow_use = 3;
+    s->bidir_refine = 1;
 }
 
 AVCodecContext *avcodec_alloc_context(void){
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/parser.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/parser.c
index 3b5097950..860f26f54 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/parser.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/parser.c
@@ -244,7 +244,7 @@ void av_parser_close(AVCodecParserContext *s)
 
 /**
  * combines the (truncated) bitstream to a complete frame
- * @returns -1 if no complete frame could be created, AVERROR(ENOMEM) if there was a memory allocation error
+ * @return -1 if no complete frame could be created, AVERROR(ENOMEM) if there was a memory allocation error
  */
 int ff_combine_frame(ParseContext *pc, int next, const uint8_t **buf, int *buf_size)
 {
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ratecontrol.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ratecontrol.h
index 9a070e997..b07b90977 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ratecontrol.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ratecontrol.h
@@ -24,7 +24,7 @@
 #define AVCODEC_RATECONTROL_H
 
 /**
- * @file ratecontrol.h
+ * @file libavcodec/ratecontrol.h
  * ratecontrol header.
  */
 
@@ -85,7 +85,7 @@ typedef struct RateControlContext{
     void *non_lavc_opaque;        ///< context for non lavc rc code (for example xvid)
     float dry_run_qscale;         ///< for xvid rc
     int last_picture_number;      ///< for xvid rc
-    AVEvalExpr * rc_eq_eval;
+    AVExpr * rc_eq_eval;
 }RateControlContext;
 
 struct MpegEncContext;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rectangle.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rectangle.h
index f0574261e..ca9b36f85 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rectangle.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rectangle.h
@@ -20,7 +20,7 @@
  */
 
 /**
- * @file rectangle.h
+ * @file libavcodec/rectangle.h
  * useful rectangle filling function
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
@@ -37,11 +37,11 @@
  * fill a rectangle.
  * @param h height of the rectangle, should be a constant
  * @param w width of the rectangle, should be a constant
- * @param size the size of val (1 or 4), should be a constant
+ * @param size the size of val (1, 2 or 4), should be a constant
  */
 static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
     uint8_t *p= (uint8_t*)vp;
-    assert(size==1 || size==4);
+    assert(size==1 || size==2 || size==4);
     assert(w<=4);
 
     w      *= size;
@@ -58,7 +58,7 @@ static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride,
         *(uint16_t*)(p + 2*stride)= v;
         *(uint16_t*)(p + 3*stride)= v;
     }else if(w==4){
-        const uint32_t v= size==4 ? val : val*0x01010101;
+        const uint32_t v= size==4 ? val : size==2 ? val*0x00010001 : val*0x01010101;
         *(uint32_t*)(p + 0*stride)= v;
         if(h==1) return;
         *(uint32_t*)(p + 1*stride)= v;
@@ -68,7 +68,7 @@ static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride,
     }else if(w==8){
     //gcc can't optimize 64bit math on x86_32
 #if HAVE_FAST_64BIT
-        const uint64_t v= val*0x0100000001ULL;
+        const uint64_t v=  size==2 ? val*0x0001000100010001ULL : val*0x0100000001ULL;
         *(uint64_t*)(p + 0*stride)= v;
         if(h==1) return;
         *(uint64_t*)(p + 1*stride)= v;
@@ -87,16 +87,17 @@ static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride,
         *(uint64_t*)(p + 0+3*stride)= v;
         *(uint64_t*)(p + 8+3*stride)= v;
 #else
-        *(uint32_t*)(p + 0+0*stride)= val;
-        *(uint32_t*)(p + 4+0*stride)= val;
+        const uint32_t v= size==2 ? val*0x00010001 : val;
+        *(uint32_t*)(p + 0+0*stride)= v;
+        *(uint32_t*)(p + 4+0*stride)= v;
         if(h==1) return;
-        *(uint32_t*)(p + 0+1*stride)= val;
-        *(uint32_t*)(p + 4+1*stride)= val;
+        *(uint32_t*)(p + 0+1*stride)= v;
+        *(uint32_t*)(p + 4+1*stride)= v;
         if(h==2) return;
-        *(uint32_t*)(p + 0+2*stride)= val;
-        *(uint32_t*)(p + 4+2*stride)= val;
-        *(uint32_t*)(p + 0+3*stride)= val;
-        *(uint32_t*)(p + 4+3*stride)= val;
+        *(uint32_t*)(p + 0+2*stride)= v;
+        *(uint32_t*)(p + 4+2*stride)= v;
+        *(uint32_t*)(p + 0+3*stride)= v;
+        *(uint32_t*)(p + 4+3*stride)= v;
     }else if(w==16){
         *(uint32_t*)(p + 0+0*stride)= val;
         *(uint32_t*)(p + 4+0*stride)= val;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv10.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv10.c
index 11397049b..4e7995f5b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv10.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv10.c
@@ -28,6 +28,7 @@
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
+#include "mpeg4video.h"
 #include "h263.h"
 
 //#define DEBUG
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv34.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv34.h
index c199ea97f..5cefe935d 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv34.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv34.h
@@ -111,7 +111,7 @@ typedef struct RV34DecContext{
     int      *deblock_coefs; ///< deblock coefficients for each macroblock
 
     /** 8x8 block available flags (for MV prediction) */
-    DECLARE_ALIGNED_8(uint32_t, avail_cache)[3*4];
+    DECLARE_ALIGNED(8, uint32_t, avail_cache)[3*4];
 
     int (*parse_slice_header)(struct RV34DecContext *r, GetBitContext *gb, SliceInfo *si);
     int (*decode_mb_info)(struct RV34DecContext *r);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/svq3.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/svq3.c
index 36d263f39..6c2381b5b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/svq3.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/svq3.c
@@ -478,7 +478,7 @@ static int svq3_decode_mb(H264Context *h, unsigned int mb_type)
         */
 
         for (m = 0; m < 2; m++) {
-            if (s->mb_x > 0 && h->intra4x4_pred_mode[mb_xy - 1][0] != -1) {
+            if (s->mb_x > 0 && h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1]+6] != -1) {
                 for (i = 0; i < 4; i++) {
                     *(uint32_t *) h->mv_cache[m][scan8[0] - 1 + i*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - 1 + i*h->b_stride];
                 }
@@ -489,18 +489,18 @@ static int svq3_decode_mb(H264Context *h, unsigned int mb_type)
             }
             if (s->mb_y > 0) {
                 memcpy(h->mv_cache[m][scan8[0] - 1*8], s->current_picture.motion_val[m][b_xy - h->b_stride], 4*2*sizeof(int16_t));
-                memset(&h->ref_cache[m][scan8[0] - 1*8], (h->intra4x4_pred_mode[mb_xy - s->mb_stride][4] == -1) ? PART_NOT_AVAILABLE : 1, 4);
+                memset(&h->ref_cache[m][scan8[0] - 1*8], (h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
 
                 if (s->mb_x < (s->mb_width - 1)) {
                     *(uint32_t *) h->mv_cache[m][scan8[0] + 4 - 1*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - h->b_stride + 4];
                     h->ref_cache[m][scan8[0] + 4 - 1*8] =
-                        (h->intra4x4_pred_mode[mb_xy - s->mb_stride + 1][0] == -1 ||
-                         h->intra4x4_pred_mode[mb_xy - s->mb_stride    ][4] == -1) ? PART_NOT_AVAILABLE : 1;
+                        (h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride + 1]+6] == -1 ||
+                         h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride    ]  ] == -1) ? PART_NOT_AVAILABLE : 1;
                 }else
                     h->ref_cache[m][scan8[0] + 4 - 1*8] = PART_NOT_AVAILABLE;
                 if (s->mb_x > 0) {
                     *(uint32_t *) h->mv_cache[m][scan8[0] - 1 - 1*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - h->b_stride - 1];
-                    h->ref_cache[m][scan8[0] - 1 - 1*8] = (h->intra4x4_pred_mode[mb_xy - s->mb_stride - 1][3] == -1) ? PART_NOT_AVAILABLE : 1;
+                    h->ref_cache[m][scan8[0] - 1 - 1*8] = (h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride - 1]+3] == -1) ? PART_NOT_AVAILABLE : 1;
                 }else
                     h->ref_cache[m][scan8[0] - 1 - 1*8] = PART_NOT_AVAILABLE;
             }else
@@ -540,17 +540,17 @@ static int svq3_decode_mb(H264Context *h, unsigned int mb_type)
         if (mb_type == 8) {
             if (s->mb_x > 0) {
                 for (i = 0; i < 4; i++) {
-                    h->intra4x4_pred_mode_cache[scan8[0] - 1 + i*8] = h->intra4x4_pred_mode[mb_xy - 1][i];
+                    h->intra4x4_pred_mode_cache[scan8[0] - 1 + i*8] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1]+6-i];
                 }
                 if (h->intra4x4_pred_mode_cache[scan8[0] - 1] == -1) {
                     h->left_samples_available = 0x5F5F;
                 }
             }
             if (s->mb_y > 0) {
-                h->intra4x4_pred_mode_cache[4+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][4];
-                h->intra4x4_pred_mode_cache[5+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][5];
-                h->intra4x4_pred_mode_cache[6+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][6];
-                h->intra4x4_pred_mode_cache[7+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][3];
+                h->intra4x4_pred_mode_cache[4+8*0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride]+0];
+                h->intra4x4_pred_mode_cache[5+8*0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride]+1];
+                h->intra4x4_pred_mode_cache[6+8*0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride]+2];
+                h->intra4x4_pred_mode_cache[7+8*0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride]+3];
 
                 if (h->intra4x4_pred_mode_cache[4+8*0] == -1) {
                     h->top_samples_available = 0x33FF;
@@ -624,7 +624,7 @@ static int svq3_decode_mb(H264Context *h, unsigned int mb_type)
         }
     }
     if (!IS_INTRA4x4(mb_type)) {
-        memset(h->intra4x4_pred_mode[mb_xy], DC_PRED, 8);
+        memset(h->intra4x4_pred_mode+h->mb2br_xy[mb_xy], DC_PRED, 8);
     }
     if (!IS_SKIP(mb_type) || s->pict_type == FF_B_TYPE) {
         memset(h->non_zero_count_cache + 8, 0, 4*9*sizeof(uint8_t));
@@ -774,14 +774,14 @@ static int svq3_decode_slice_header(H264Context *h)
 
     /* reset intra predictors and invalidate motion vector references */
     if (s->mb_x > 0) {
-        memset(h->intra4x4_pred_mode[mb_xy - 1], -1, 4*sizeof(int8_t));
-        memset(h->intra4x4_pred_mode[mb_xy - s->mb_x], -1, 8*sizeof(int8_t)*s->mb_x);
+        memset(h->intra4x4_pred_mode+h->mb2br_xy[mb_xy - 1      ]+3, -1, 4*sizeof(int8_t));
+        memset(h->intra4x4_pred_mode+h->mb2br_xy[mb_xy - s->mb_x]  , -1, 8*sizeof(int8_t)*s->mb_x);
     }
     if (s->mb_y > 0) {
-        memset(h->intra4x4_pred_mode[mb_xy - s->mb_stride], -1, 8*sizeof(int8_t)*(s->mb_width - s->mb_x));
+        memset(h->intra4x4_pred_mode+h->mb2br_xy[mb_xy - s->mb_stride], -1, 8*sizeof(int8_t)*(s->mb_width - s->mb_x));
 
         if (s->mb_x > 0) {
-            h->intra4x4_pred_mode[mb_xy - s->mb_stride - 1][3] = -1;
+            h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - s->mb_stride - 1]+3] = -1;
         }
     }
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/tableprint.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/tableprint.c
deleted file mode 100644
index bca94e8d4..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/tableprint.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Generate a file for hardcoded tables
- *
- * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdio.h>
-#include <inttypes.h>
-#include "tableprint.h"
-
-#define WRITE_1D_FUNC(name, type, fmtstr, linebrk)\
-void write_##name##_array(const void *arg, int len, int dummy)\
-{\
-    const type *data = arg;\
-    int i;\
-    printf("   ");\
-    for (i = 0; i < len - 1; i++) {\
-       printf(" "fmtstr",", data[i]);\
-       if ((i & linebrk) == linebrk) printf("\n   ");\
-    }\
-    printf(" "fmtstr"\n", data[i]);\
-}
-
-WRITE_1D_FUNC(int8,   int8_t,   "%3"PRIi8, 15)
-WRITE_1D_FUNC(uint32, uint32_t, "0x%08"PRIx32, 7)
-WRITE_1D_FUNC(float,  float,    "%.18e", 3)
-
-#define WRITE_2D_FUNC(name, type)\
-void write_##name##_2d_array(const void *arg, int len, int len2)\
-{\
-    const type *data = arg;\
-    int i;\
-    printf("    {\n");\
-    for (i = 0; i < len; i++) {\
-        write_##name##_array(data + i * len2, len2, 0);\
-        printf(i == len - 1 ? "    }\n" : "    }, {\n");\
-    }\
-}
-
-WRITE_2D_FUNC(int8,   int8_t)
-WRITE_2D_FUNC(uint32, uint32_t)
-
-// ==> Start patch MPC
-//int main(int argc, char *argv[])
-//{
-//    int i;
-//
-//    printf("/* This file was generated by libavcodec/tableprint */\n");
-//    printf("#include <stdint.h>/n");
-//    tableinit();
-//
-//    for (i = 0; tables[i].declaration; i++) {
-//        printf(tables[i].declaration);
-//        printf(" = {\n");
-//        tables[i].printfunc(tables[i].data, tables[i].size, tables[i].size2);
-//        printf("};\n");
-//    }
-//    return 0;
-//}
-// <== End patch MPC
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/tableprint.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/tableprint.h
deleted file mode 100644
index 706e29653..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/tableprint.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Generate a file for hardcoded tables
- *
- * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_TABLEPRINT_H
-#define AVCODEC_TABLEPRINT_H
-
-#include <stdint.h>
-
-/**
- * \defgroup printfuncs Predefined functions for printing tables
- *
- * \{
- */
-void write_int8_array     (const void *, int, int);
-void write_uint32_array   (const void *, int, int);
-void write_float_array    (const void *, int, int);
-void write_int8_2d_array  (const void *, int, int);
-void write_uint32_2d_array(const void *, int, int);
-/** \} */ // end of printfuncs group
-
-struct tabledef {
-    /** String that declares the array. Adding " = { ..." after it should
-     * make a valid initializer, adding "extern" before and ";" if possible
-     * should make a valid extern declaration. */
-    const char *declaration;
-    /** Function used to print the table data (i.e. the part in {}).
-     * Should be one of the predefined write_*_array functions. */
-    void (*printfunc)(const void *, int, int);
-    /** Pointer passed to the printfunc, usually a pointer to the start
-     * of the array to be printed. */
-    const void *data;
-    int size;   ///< size of the first dimension of the array
-    int size2;  ///< size of the second dimension of the array if any
-};
-
-/** Initializes all the tables described in the tables array */
-void tableinit(void);
-/** Describes the tables that should be printed */
-extern const struct tabledef tables[];
-
-#endif /* AVCODEC_TABLEPRINT_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c
index d1a9b941f..04daf734f 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c
@@ -27,6 +27,7 @@
 
 #include "libavutil/avstring.h"
 #include "libavutil/crc.h"
+#include "libavutil/pixdesc.h"
 #include "avcodec.h"
 #include "dsputil.h"
 #include "imgconvert.h"
@@ -100,6 +101,11 @@ void avcodec_register(AVCodec *codec)
     codec->next = NULL;
 }
 
+unsigned avcodec_get_edge_width(void)
+{
+    return EDGE_WIDTH;
+}
+
 void avcodec_set_dimensions(AVCodecContext *s, int width, int height){
     s->coded_width = width;
     s->coded_height= height;
@@ -118,7 +124,7 @@ typedef struct InternalBuffer{
 
 #define INTERNAL_BUFFER_SIZE 32
 
-void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){
+void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, int linesize_align[4]){
     int w_align= 1;
     int h_align= 1;
 
@@ -174,6 +180,36 @@ void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){
     *height= FFALIGN(*height, h_align);
     if(s->codec_id == CODEC_ID_H264)
         *height+=2; // some of the optimized chroma MC reads one line too much
+
+    linesize_align[0] =
+    linesize_align[1] =
+    linesize_align[2] =
+    linesize_align[3] = STRIDE_ALIGN;
+//STRIDE_ALIGN is 8 for SSE* but this does not work for SVQ1 chroma planes
+//we could change STRIDE_ALIGN to 16 for x86/sse but it would increase the
+//picture size unneccessarily in some cases. The solution here is not
+//pretty and better ideas are welcome!
+#if HAVE_MMX
+    if(s->codec_id == CODEC_ID_SVQ1 || s->codec_id == CODEC_ID_VP5 ||
+       s->codec_id == CODEC_ID_VP6 || s->codec_id == CODEC_ID_VP6F ||
+       s->codec_id == CODEC_ID_VP6A) {
+        linesize_align[0] =
+        linesize_align[1] =
+        linesize_align[2] = 16;
+    }
+#endif
+}
+
+void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){
+    int chroma_shift = av_pix_fmt_descriptors[s->pix_fmt].log2_chroma_w;
+    int linesize_align[4];
+    int align;
+    avcodec_align_dimensions2(s, width, height, linesize_align);
+    align = FFMAX(linesize_align[0], linesize_align[3]);
+    linesize_align[1] <<= chroma_shift;
+    linesize_align[2] <<= chroma_shift;
+    align = FFMAX3(align, linesize_align[1], linesize_align[2]);
+    *width=FFALIGN(*width, align);
 }
 
 int avcodec_check_dimensions(void *av_log_ctx, unsigned int w, unsigned int h){
@@ -238,7 +274,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
 
         avcodec_get_chroma_sub_sample(s->pix_fmt, &h_chroma_shift, &v_chroma_shift);
 
-        avcodec_align_dimensions(s, &w, &h);
+        avcodec_align_dimensions2(s, &w, &h, stride_align);
 
         if(!(s->flags&CODEC_FLAG_EMU_EDGE)){
             w+= EDGE_WIDTH*2;
@@ -254,16 +290,6 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
 
             unaligned = 0;
             for (i=0; i<4; i++){
-//STRIDE_ALIGN is 8 for SSE* but this does not work for SVQ1 chroma planes
-//we could change STRIDE_ALIGN to 16 for x86/sse but it would increase the
-//picture size unneccessarily in some cases. The solution here is not
-//pretty and better ideas are welcome!
-#if HAVE_MMX
-                if(s->codec_id == CODEC_ID_SVQ1)
-                    stride_align[i]= 16;
-                else
-#endif
-                stride_align[i] = STRIDE_ALIGN;
                 unaligned |= picture.linesize[i] % stride_align[i];
             }
         } while (unaligned);
@@ -580,7 +606,7 @@ int attribute_align_arg avcodec_decode_audio2(AVCodecContext *avctx, int16_t *sa
     return ret;
 }
 
-int avcodec_close(AVCodecContext *avctx)
+av_cold int avcodec_close(AVCodecContext *avctx)
 {
     /* If there is a user-supplied mutex locking routine, call it. */
     if (ff_lockmgr_cb) {
@@ -598,10 +624,12 @@ int avcodec_close(AVCodecContext *avctx)
 
     //if (HAVE_THREADS && avctx->thread_opaque)
     //    avcodec_thread_free(avctx);
-    if (avctx->codec->close)
+    if (avctx->codec && avctx->codec->close)
         avctx->codec->close(avctx);
     avcodec_default_free_buffers(avctx);
     av_freep(&avctx->priv_data);
+    if(avctx->codec && avctx->codec->encode)
+        av_freep(&avctx->extradata);
     avctx->codec = NULL;
     //entangled_thread_counter--; /* ffdshow custom comment out */
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.c
index ea15b4018..38e01fb16 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.c
@@ -781,9 +781,11 @@ int vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
 
     if(v->interlace){
         v->fcm = decode012(gb);
-		// ==> Start patch MPC
-        if(v->fcm && !v->allow_interlaced) return -1; // interlaced frames/fields are not implemented
-		// <== End patch MPC
+        if(v->fcm){
+            if(!v->warn_interlaced++)
+                av_log(v->s.avctx, AV_LOG_ERROR, "Interlaced frames/fields support is not implemented\n");
+            return -1;
+        }
     }
     switch(get_unary(gb, 0, 4)) {
     case 0:
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c
index c38c494c1..65338e39d 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c
@@ -29,6 +29,7 @@
 #include "dsputil.h"
 #include "avcodec.h"
 #include "mpegvideo.h"
+#include "h263.h"
 #include "vc1.h"
 #include "vc1data.h"
 #include "vc1acdata.h"
@@ -36,7 +37,6 @@
 #include "unary.h"
 #include "simple_idct.h"
 #include "mathops.h"
-#include "h263.h"
 
 #undef NDEBUG
 #include <assert.h>
@@ -2992,11 +2992,9 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
         avctx->idct_algo=FF_IDCT_WMV2;
     }
 
-    if(ff_h263_decode_init(avctx) < 0)
+    if(ff_msmpeg4_decode_init(avctx) < 0)
         return -1;
     if (vc1_init_common(v) < 0) return -1;
-    // only for ff_msmp4_mb_i_table
-    if (ff_msmpeg4_decode_init(avctx) < 0) return -1;
 
     avctx->coded_width = avctx->width;
     avctx->coded_height = avctx->height;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dsp.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dsp.c
index 85896dbdd..31c976d6c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dsp.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dsp.c
@@ -581,10 +581,6 @@ VC1_MSPEL_MC(op_avg, avg_)
 
 /* pixel functions - really are entry points to vc1_mspel_mc */
 
-/* this one is defined in dsputil.c */
-void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
-void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
-
 #define PUT_VC1_MSPEL(a, b)\
 static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
      put_vc1_mspel_mc(dst, src, stride, a, b, rnd);                         \
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vorbis.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vorbis.h
new file mode 100644
index 000000000..ce9bead42
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vorbis.h
@@ -0,0 +1,48 @@
+/*
+ * copyright (c) 2006 Oded Shimon <ods15@ods15.dyndns.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VORBIS_H
+#define AVCODEC_VORBIS_H
+
+#include "avcodec.h"
+
+extern const float ff_vorbis_floor1_inverse_db_table[256];
+extern const float * const ff_vorbis_vwin[8];
+extern const uint8_t ff_vorbis_channel_layout_offsets[8][8];
+extern const int64_t ff_vorbis_channel_layouts[9];
+
+typedef struct {
+    uint_fast16_t x;
+    uint_fast16_t sort;
+    uint_fast16_t low;
+    uint_fast16_t high;
+} vorbis_floor1_entry;
+
+void ff_vorbis_ready_floor1_list(vorbis_floor1_entry * list, int values);
+unsigned int ff_vorbis_nth_root(unsigned int x, unsigned int n); // x^(1/n)
+int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, uint_fast32_t num);
+void ff_vorbis_floor1_render_list(vorbis_floor1_entry * list, int values,
+                                  uint_fast16_t * y_list, int * flag,
+                                  int multiplier, float * out, int samples);
+void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
+
+#define ilog(i) av_log2(2*(i))
+
+#endif /* AVCODEC_VORBIS_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp3.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp3.c
index e3595e718..0aa93d727 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp3.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp3.c
@@ -44,23 +44,10 @@
 
 static av_cold int vp3_decode_end(AVCodecContext *avctx);
 
-typedef struct Coeff {
-    struct Coeff *next;
-    DCTELEM coeff;
-    uint8_t index;
-} Coeff;
-
 //FIXME split things out into their own arrays
 typedef struct Vp3Fragment {
-    Coeff *next_coeff;
-    /* address of first pixel taking into account which plane the fragment
-     * lives on as well as the plane stride */
-    int first_pixel;
-    /* this is the macroblock that the fragment belongs to */
-    uint16_t macroblock;
+    int16_t dc;
     uint8_t coding_method;
-    int8_t motion_x;
-    int8_t motion_y;
     uint8_t qpi;
 } Vp3Fragment;
 
@@ -70,6 +57,11 @@ typedef struct Vp3Fragment {
 #define SB_PARTIALLY_CODED  1
 #define SB_FULLY_CODED      2
 
+// This is the maximum length of a single long bit run that can be encoded
+// for superblock coding or block qps. Theora special-cases this to read a
+// bit instead of flipping the current bit to allow for runs longer than 4129.
+#define MAXIMUM_LONG_BIT_RUN 4129
+
 #define MODE_INTER_NO_MV      0
 #define MODE_INTRA            1
 #define MODE_INTER_PLUS_MV    2
@@ -124,6 +116,13 @@ static const int ModeAlphabet[6][CODING_MODE_COUNT] =
 
 };
 
+static const uint8_t hilbert_offset[16][2] = {
+    {0,0}, {1,0}, {1,1}, {0,1},
+    {0,2}, {0,3}, {1,3}, {1,2},
+    {2,2}, {2,3}, {3,3}, {3,2},
+    {3,1}, {2,1}, {2,0}, {3,0}
+};
+
 #define MIN_DEQUANT_VAL 2
 
 typedef struct Vp3DecodeContext {
@@ -131,12 +130,14 @@ typedef struct Vp3DecodeContext {
     int theora, theora_tables;
     int version;
     int width, height;
+    int chroma_x_shift, chroma_y_shift;
     AVFrame golden_frame;
     AVFrame last_frame;
     AVFrame current_frame;
     int keyframe;
     DSPContext dsp;
     int flipped_image;
+    int last_slice_end;
 
     int qps[3];
     int nqps;
@@ -145,8 +146,10 @@ typedef struct Vp3DecodeContext {
     int superblock_count;
     int y_superblock_width;
     int y_superblock_height;
+    int y_superblock_count;
     int c_superblock_width;
     int c_superblock_height;
+    int c_superblock_count;
     int u_superblock_start;
     int v_superblock_start;
     unsigned char *superblock_coding;
@@ -156,14 +159,14 @@ typedef struct Vp3DecodeContext {
     int macroblock_height;
 
     int fragment_count;
-    int fragment_width;
-    int fragment_height;
+    int fragment_width[2];
+    int fragment_height[2];
 
     Vp3Fragment *all_fragments;
-    uint8_t *coeff_counts;
-    Coeff *coeffs;
-    Coeff *next_coeff;
     int fragment_start[3];
+    int data_offset[3];
+
+    int8_t (*motion_val[2])[2];
 
     ScanTable scantable;
 
@@ -175,19 +178,38 @@ typedef struct Vp3DecodeContext {
     uint8_t qr_size [2][3][64];
     uint16_t qr_base[2][3][64];
 
+    /**
+     * This is a list of all tokens in bitstream order. Reordering takes place
+     * by pulling from each level during IDCT. As a consequence, IDCT must be
+     * in Hilbert order, making the minimum slice height 64 for 4:2:0 and 32
+     * otherwise. The 32 different tokens with up to 12 bits of extradata are
+     * collapsed into 3 types, packed as follows:
+     *   (from the low to high bits)
+     *
+     * 2 bits: type (0,1,2)
+     *   0: EOB run, 14 bits for run length (12 needed)
+     *   1: zero run, 7 bits for run length
+     *                7 bits for the next coefficient (3 needed)
+     *   2: coefficient, 14 bits (11 needed)
+     *
+     * Coefficients are signed, so are packed in the highest bits for automatic
+     * sign extension.
+     */
+    int16_t *dct_tokens[3][64];
+    int16_t *dct_tokens_base;
+#define TOKEN_EOB(eob_run)              ((eob_run) << 2)
+#define TOKEN_ZERO_RUN(coeff, zero_run) (((coeff) << 9) + ((zero_run) << 2) + 1)
+#define TOKEN_COEFF(coeff)              (((coeff) << 2) + 2)
+
+    /**
+     * number of blocks that contain DCT coefficients at the given level or higher
+     */
+    int num_coded_frags[3][64];
+    int total_num_coded_frags;
+
     /* this is a list of indexes into the all_fragments array indicating
      * which of the fragments are coded */
-    int *coded_fragment_list;
-    int coded_fragment_list_index;
-    int pixel_addresses_initialized;
-
-    /* track which fragments have already been decoded; called 'fast'
-     * because this data structure avoids having to iterate through every
-     * fragment in coded_fragment_list; once a fragment has been fully
-     * decoded, it is removed from this list */
-    int *fast_fragment_list;
-    int fragment_list_y_head;
-    int fragment_list_c_head;
+    int *coded_fragment_list[3];
 
     VLC dc_vlc[16];
     VLC ac_vlc_1[16];
@@ -202,7 +224,7 @@ typedef struct Vp3DecodeContext {
 
     /* these arrays need to be on 16-byte boundaries since SSE2 operations
      * index into them */
-    DECLARE_ALIGNED_16(int16_t, qmat)[3][2][3][64];     //<qmat[qpi][is_inter][plane]
+    DECLARE_ALIGNED(16, int16_t, qmat)[3][2][3][64];     //<qmat[qpi][is_inter][plane]
 
     /* This table contains superblock_count * 16 entries. Each set of 16
      * numbers corresponds to the fragment indexes 0..15 of the superblock.
@@ -210,25 +232,10 @@ typedef struct Vp3DecodeContext {
      * index. */
     int *superblock_fragments;
 
-    /* This table contains superblock_count * 4 entries. Each set of 4
-     * numbers corresponds to the macroblock indexes 0..3 of the superblock.
-     * An entry will be -1 to indicate that no entry corresponds to that
-     * index. */
-    int *superblock_macroblocks;
-
-    /* This table contains macroblock_count * 6 entries. Each set of 6
-     * numbers corresponds to the fragment indexes 0..5 which comprise
-     * the macroblock (4 Y fragments and 2 C fragments). */
-    int *macroblock_fragments;
     /* This is an array that indicates how a particular macroblock
      * is coded. */
     unsigned char *macroblock_coding;
 
-    int first_coded_y_fragment;
-    int first_coded_c_fragment;
-    int last_coded_y_fragment;
-    int last_coded_c_fragment;
-
     uint8_t edge_emu_buffer[9*2048]; //FIXME dynamic alloc
     int8_t qscale_table[2048]; //FIXME dynamic alloc (width+15)/16
 
@@ -240,10 +247,9 @@ typedef struct Vp3DecodeContext {
     uint16_t huffman_table[80][32][2];
 
     uint8_t filter_limit_values[64];
-    DECLARE_ALIGNED_8(int, bounding_values_array)[256+2];
+    DECLARE_ALIGNED(8, int, bounding_values_array)[256+2];
 
     /* ffdshow custom stuffs (begin) */
-    int fps_numerator,fps_denumerator;
     int64_t granulepos;
     int keyframe_granule_shift,keyframe_frequency_force;
     /* ffdshow custom stuffs (end) */
@@ -262,236 +268,32 @@ typedef struct Vp3DecodeContext {
  */
 static int init_block_mapping(Vp3DecodeContext *s)
 {
-    int i, j;
-    signed int hilbert_walk_mb[4];
-
-    int current_fragment = 0;
-    int current_width = 0;
-    int current_height = 0;
-    int right_edge = 0;
-    int bottom_edge = 0;
-    int superblock_row_inc = 0;
-    int mapping_index = 0;
-
-    int current_macroblock;
-    int c_fragment;
-
-    static const signed char travel_width[16] = {
-         1,  1,  0, -1,
-         0,  0,  1,  0,
-         1,  0,  1,  0,
-         0, -1,  0,  1
-    };
-
-    static const signed char travel_height[16] = {
-         0,  0,  1,  0,
-         1,  1,  0, -1,
-         0,  1,  0, -1,
-        -1,  0, -1,  0
-    };
-
-    static const signed char travel_width_mb[4] = {
-         1,  0,  1,  0
-    };
-
-    static const signed char travel_height_mb[4] = {
-         0,  1,  0, -1
-    };
-
-    hilbert_walk_mb[0] = 1;
-    hilbert_walk_mb[1] = s->macroblock_width;
-    hilbert_walk_mb[2] = 1;
-    hilbert_walk_mb[3] = -s->macroblock_width;
-
-    /* iterate through each superblock (all planes) and map the fragments */
-    for (i = 0; i < s->superblock_count; i++) {
-        /* time to re-assign the limits? */
-        if (i == 0) {
-
-            /* start of Y superblocks */
-            right_edge = s->fragment_width;
-            bottom_edge = s->fragment_height;
-            current_width = -1;
-            current_height = 0;
-            superblock_row_inc = 3 * s->fragment_width -
-                (s->y_superblock_width * 4 - s->fragment_width);
-
-            /* the first operation for this variable is to advance by 1 */
-            current_fragment = -1;
-
-        } else if (i == s->u_superblock_start) {
-
-            /* start of U superblocks */
-            right_edge = s->fragment_width / 2;
-            bottom_edge = s->fragment_height / 2;
-            current_width = -1;
-            current_height = 0;
-            superblock_row_inc = 3 * (s->fragment_width / 2) -
-                (s->c_superblock_width * 4 - s->fragment_width / 2);
-
-            /* the first operation for this variable is to advance by 1 */
-            current_fragment = s->fragment_start[1] - 1;
-
-        } else if (i == s->v_superblock_start) {
-
-            /* start of V superblocks */
-            right_edge = s->fragment_width / 2;
-            bottom_edge = s->fragment_height / 2;
-            current_width = -1;
-            current_height = 0;
-            superblock_row_inc = 3 * (s->fragment_width / 2) -
-                (s->c_superblock_width * 4 - s->fragment_width / 2);
-
-            /* the first operation for this variable is to advance by 1 */
-            current_fragment = s->fragment_start[2] - 1;
-
-        }
-
-        if (current_width >= right_edge - 1) {
-            /* reset width and move to next superblock row */
-            current_width = -1;
-            current_height += 4;
-
-            /* fragment is now at the start of a new superblock row */
-            current_fragment += superblock_row_inc;
-        }
-
-        /* iterate through all 16 fragments in a superblock */
-        for (j = 0; j < 16; j++) {
-            current_fragment += travel_width[j] + right_edge * travel_height[j];
-            current_width += travel_width[j];
-            current_height += travel_height[j];
-
-            /* check if the fragment is in bounds */
-            if ((current_width < right_edge) &&
-                (current_height < bottom_edge)) {
-                s->superblock_fragments[mapping_index] = current_fragment;
-            } else {
-                s->superblock_fragments[mapping_index] = -1;
-            }
-
-            mapping_index++;
-        }
-    }
-
-    /* initialize the superblock <-> macroblock mapping; iterate through
-     * all of the Y plane superblocks to build this mapping */
-    right_edge = s->macroblock_width;
-    bottom_edge = s->macroblock_height;
-    current_width = -1;
-    current_height = 0;
-    superblock_row_inc = s->macroblock_width -
-        (s->y_superblock_width * 2 - s->macroblock_width);
-    mapping_index = 0;
-    current_macroblock = -1;
-    for (i = 0; i < s->u_superblock_start; i++) {
-
-        if (current_width >= right_edge - 1) {
-            /* reset width and move to next superblock row */
-            current_width = -1;
-            current_height += 2;
-
-            /* macroblock is now at the start of a new superblock row */
-            current_macroblock += superblock_row_inc;
-        }
+    int sb_x, sb_y, plane;
+    int x, y, i, j = 0;
 
-        /* iterate through each potential macroblock in the superblock */
-        for (j = 0; j < 4; j++) {
-            current_macroblock += hilbert_walk_mb[j];
-            current_width += travel_width_mb[j];
-            current_height += travel_height_mb[j];
-
-            /* check if the macroblock is in bounds */
-            if ((current_width < right_edge) &&
-                (current_height < bottom_edge)) {
-                s->superblock_macroblocks[mapping_index] = current_macroblock;
-            } else {
-                s->superblock_macroblocks[mapping_index] = -1;
-            }
-
-            mapping_index++;
-        }
-    }
-
-    /* initialize the macroblock <-> fragment mapping */
-    current_fragment = 0;
-    current_macroblock = 0;
-    mapping_index = 0;
-    for (i = 0; i < s->fragment_height; i += 2) {
-
-        for (j = 0; j < s->fragment_width; j += 2) {
-
-            s->all_fragments[current_fragment].macroblock = current_macroblock;
-            s->macroblock_fragments[mapping_index++] = current_fragment;
-
-            if (j + 1 < s->fragment_width) {
-                s->all_fragments[current_fragment + 1].macroblock = current_macroblock;
-                s->macroblock_fragments[mapping_index++] = current_fragment + 1;
-            } else
-                s->macroblock_fragments[mapping_index++] = -1;
-
-            if (i + 1 < s->fragment_height) {
-                s->all_fragments[current_fragment + s->fragment_width].macroblock =
-                    current_macroblock;
-                s->macroblock_fragments[mapping_index++] =
-                    current_fragment + s->fragment_width;
-            } else
-                s->macroblock_fragments[mapping_index++] = -1;
-
-            if ((j + 1 < s->fragment_width) && (i + 1 < s->fragment_height)) {
-                s->all_fragments[current_fragment + s->fragment_width + 1].macroblock =
-                    current_macroblock;
-                s->macroblock_fragments[mapping_index++] =
-                    current_fragment + s->fragment_width + 1;
-            } else
-                s->macroblock_fragments[mapping_index++] = -1;
-
-            /* C planes */
-            c_fragment = s->fragment_start[1] +
-                (i * s->fragment_width / 4) + (j / 2);
-            s->all_fragments[c_fragment].macroblock = s->macroblock_count;
-            s->macroblock_fragments[mapping_index++] = c_fragment;
-
-            c_fragment = s->fragment_start[2] +
-                (i * s->fragment_width / 4) + (j / 2);
-            s->all_fragments[c_fragment].macroblock = s->macroblock_count;
-            s->macroblock_fragments[mapping_index++] = c_fragment;
-
-            if (j + 2 <= s->fragment_width)
-                current_fragment += 2;
-            else
-                current_fragment++;
-            current_macroblock++;
-        }
-
-        current_fragment += s->fragment_width;
+    for (plane = 0; plane < 3; plane++) {
+        int sb_width    = plane ? s->c_superblock_width  : s->y_superblock_width;
+        int sb_height   = plane ? s->c_superblock_height : s->y_superblock_height;
+        int frag_width  = s->fragment_width[!!plane];
+        int frag_height = s->fragment_height[!!plane];
+
+        for (sb_y = 0; sb_y < sb_height; sb_y++)
+            for (sb_x = 0; sb_x < sb_width; sb_x++)
+                for (i = 0; i < 16; i++) {
+                    x = 4*sb_x + hilbert_offset[i][0];
+                    y = 4*sb_y + hilbert_offset[i][1];
+
+                    if (x < frag_width && y < frag_height)
+                        s->superblock_fragments[j++] = s->fragment_start[plane] + y*frag_width + x;
+                    else
+                        s->superblock_fragments[j++] = -1;
+                }
     }
 
     return 0;  /* successful path out */
 }
 
 /*
- * This function wipes out all of the fragment data.
- */
-static void init_frame(Vp3DecodeContext *s, GetBitContext *gb)
-{
-    int i;
-
-    /* zero out all of the fragment information */
-    s->coded_fragment_list_index = 0;
-    for (i = 0; i < s->fragment_count; i++) {
-        s->coeff_counts[i] = 0;
-        s->all_fragments[i].motion_x = 127;
-        s->all_fragments[i].motion_y = 127;
-        s->all_fragments[i].next_coeff= NULL;
-        s->all_fragments[i].qpi = 0;
-        s->coeffs[i].index=
-        s->coeffs[i].coeff=0;
-        s->coeffs[i].next= NULL;
-    }
-}
-
-/*
  * This function sets up the dequantization tables used for a particular
  * frame.
  */
@@ -567,15 +369,15 @@ static void init_loop_filter(Vp3DecodeContext *s)
  */
 static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
 {
+    int superblock_starts[3] = { 0, s->u_superblock_start, s->v_superblock_start };
     int bit = 0;
     int current_superblock = 0;
     int current_run = 0;
-    int decode_fully_flags = 0;
-    int decode_partial_blocks = 0;
-    int first_c_fragment_seen;
+    int num_partial_superblocks = 0;
 
     int i, j;
     int current_fragment;
+    int plane;
 
     if (s->keyframe) {
         memset(s->superblock_coding, SB_FULLY_CODED, s->superblock_count);
@@ -584,62 +386,67 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
 
         /* unpack the list of partially-coded superblocks */
         bit = get_bits1(gb);
-        /* toggle the bit because as soon as the first run length is
-         * fetched the bit will be toggled again */
-        bit ^= 1;
-        while (current_superblock < s->superblock_count) {
-            if (current_run-- == 0) {
-                bit ^= 1;
+        while (current_superblock < s->superblock_count && get_bits_left(gb) > 0) {
                 current_run = get_vlc2(gb,
-                    s->superblock_run_length_vlc.table, 6, 2);
-                if (current_run == 33)
+                    s->superblock_run_length_vlc.table, 6, 2) + 1;
+                if (current_run == 34)
                     current_run += get_bits(gb, 12);
 
-                /* if any of the superblocks are not partially coded, flag
-                 * a boolean to decode the list of fully-coded superblocks */
-                if (bit == 0) {
-                    decode_fully_flags = 1;
-                } else {
-
-                    /* make a note of the fact that there are partially coded
-                     * superblocks */
-                    decode_partial_blocks = 1;
-                }
+            if (current_superblock + current_run > s->superblock_count) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid partially coded superblock run length\n");
+                return -1;
             }
-            s->superblock_coding[current_superblock++] = bit;
+
+            memset(s->superblock_coding + current_superblock, bit, current_run);
+
+            current_superblock += current_run;
+            if (bit)
+                num_partial_superblocks += current_run;
+
+            if (s->theora && current_run == MAXIMUM_LONG_BIT_RUN)
+                bit = get_bits1(gb);
+            else
+                bit ^= 1;
         }
 
         /* unpack the list of fully coded superblocks if any of the blocks were
          * not marked as partially coded in the previous step */
-        if (decode_fully_flags) {
+        if (num_partial_superblocks < s->superblock_count) {
+            int superblocks_decoded = 0;
 
             current_superblock = 0;
-            current_run = 0;
             bit = get_bits1(gb);
-            /* toggle the bit because as soon as the first run length is
-             * fetched the bit will be toggled again */
-            bit ^= 1;
-            while (current_superblock < s->superblock_count) {
-
-                /* skip any superblocks already marked as partially coded */
-                if (s->superblock_coding[current_superblock] == SB_NOT_CODED) {
-
-                    if (current_run-- == 0) {
-                        bit ^= 1;
+            while (superblocks_decoded < s->superblock_count - num_partial_superblocks
+                   && get_bits_left(gb) > 0) {
                         current_run = get_vlc2(gb,
-                            s->superblock_run_length_vlc.table, 6, 2);
-                        if (current_run == 33)
+                            s->superblock_run_length_vlc.table, 6, 2) + 1;
+                        if (current_run == 34)
                             current_run += get_bits(gb, 12);
+
+                for (j = 0; j < current_run; current_superblock++) {
+                    if (current_superblock >= s->superblock_count) {
+                        av_log(s->avctx, AV_LOG_ERROR, "Invalid fully coded superblock run length\n");
+                        return -1;
                     }
+
+                /* skip any superblocks already marked as partially coded */
+                if (s->superblock_coding[current_superblock] == SB_NOT_CODED) {
                     s->superblock_coding[current_superblock] = 2*bit;
+                    j++;
+                }
                 }
-                current_superblock++;
+                superblocks_decoded += current_run;
+
+                if (s->theora && current_run == MAXIMUM_LONG_BIT_RUN)
+                    bit = get_bits1(gb);
+                else
+                    bit ^= 1;
             }
         }
 
         /* if there were partial blocks, initialize bitstream for
          * unpacking fragment codings */
-        if (decode_partial_blocks) {
+        if (num_partial_superblocks) {
 
             current_run = 0;
             bit = get_bits1(gb);
@@ -651,32 +458,25 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
 
     /* figure out which fragments are coded; iterate through each
      * superblock (all planes) */
-    s->coded_fragment_list_index = 0;
-    s->next_coeff= s->coeffs + s->fragment_count;
-    s->first_coded_y_fragment = s->first_coded_c_fragment = 0;
-    s->last_coded_y_fragment = s->last_coded_c_fragment = -1;
-    first_c_fragment_seen = 0;
+    s->total_num_coded_frags = 0;
     memset(s->macroblock_coding, MODE_COPY, s->macroblock_count);
-    for (i = 0; i < s->superblock_count; i++) {
+
+    for (plane = 0; plane < 3; plane++) {
+        int sb_start = superblock_starts[plane];
+        int sb_end = sb_start + (plane ? s->c_superblock_count : s->y_superblock_count);
+        int num_coded_frags = 0;
+
+    for (i = sb_start; i < sb_end && get_bits_left(gb) > 0; i++) {
 
         /* iterate through all 16 fragments in a superblock */
         for (j = 0; j < 16; j++) {
 
             /* if the fragment is in bounds, check its coding status */
             current_fragment = s->superblock_fragments[i * 16 + j];
-            if (current_fragment >= s->fragment_count) {
-                av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_superblocks(): bad fragment number (%d >= %d)\n",
-                    current_fragment, s->fragment_count);
-                return 1;
-            }
             if (current_fragment != -1) {
-                if (s->superblock_coding[i] == SB_NOT_CODED) {
-
-                    /* copy all the fragments from the prior frame */
-                    s->all_fragments[current_fragment].coding_method =
-                        MODE_COPY;
+                int coded = s->superblock_coding[i];
 
-                } else if (s->superblock_coding[i] == SB_PARTIALLY_CODED) {
+                if (s->superblock_coding[i] == SB_PARTIALLY_CODED) {
 
                     /* fragment may or may not be coded; this is the case
                      * that cares about the fragment coding runs */
@@ -685,79 +485,30 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
                         current_run = get_vlc2(gb,
                             s->fragment_run_length_vlc.table, 5, 2);
                     }
+                    coded = bit;
+                }
 
-                    if (bit) {
+                    if (coded) {
                         /* default mode; actual mode will be decoded in
                          * the next phase */
                         s->all_fragments[current_fragment].coding_method =
                             MODE_INTER_NO_MV;
-                        s->all_fragments[current_fragment].next_coeff= s->coeffs + current_fragment;
-                        s->coded_fragment_list[s->coded_fragment_list_index] =
+                        s->coded_fragment_list[plane][num_coded_frags++] =
                             current_fragment;
-                        if ((current_fragment >= s->fragment_start[1]) &&
-                            (s->last_coded_y_fragment == -1) &&
-                            (!first_c_fragment_seen)) {
-                            s->first_coded_c_fragment = s->coded_fragment_list_index;
-                            s->last_coded_y_fragment = s->first_coded_c_fragment - 1;
-                            first_c_fragment_seen = 1;
-                        }
-                        s->coded_fragment_list_index++;
-                        s->macroblock_coding[s->all_fragments[current_fragment].macroblock] = MODE_INTER_NO_MV;
                     } else {
                         /* not coded; copy this fragment from the prior frame */
                         s->all_fragments[current_fragment].coding_method =
                             MODE_COPY;
                     }
-
-                } else {
-
-                    /* fragments are fully coded in this superblock; actual
-                     * coding will be determined in next step */
-                    s->all_fragments[current_fragment].coding_method =
-                        MODE_INTER_NO_MV;
-                    s->all_fragments[current_fragment].next_coeff= s->coeffs + current_fragment;
-                    s->coded_fragment_list[s->coded_fragment_list_index] =
-                        current_fragment;
-                    if ((current_fragment >= s->fragment_start[1]) &&
-                        (s->last_coded_y_fragment == -1) &&
-                        (!first_c_fragment_seen)) {
-                        s->first_coded_c_fragment = s->coded_fragment_list_index;
-                        s->last_coded_y_fragment = s->first_coded_c_fragment - 1;
-                        first_c_fragment_seen = 1;
-                    }
-                    s->coded_fragment_list_index++;
-                    s->macroblock_coding[s->all_fragments[current_fragment].macroblock] = MODE_INTER_NO_MV;
-                }
             }
         }
     }
-
-    if (!first_c_fragment_seen)
-        /* only Y fragments coded in this frame */
-        s->last_coded_y_fragment = s->coded_fragment_list_index - 1;
-    else
-        /* end the list of coded C fragments */
-        s->last_coded_c_fragment = s->coded_fragment_list_index - 1;
-
-    for (i = 0; i < s->fragment_count - 1; i++) {
-        s->fast_fragment_list[i] = i + 1;
-    }
-    s->fast_fragment_list[s->fragment_count - 1] = -1;
-
-    if (s->last_coded_y_fragment == -1)
-        s->fragment_list_y_head = -1;
-    else {
-        s->fragment_list_y_head = s->first_coded_y_fragment;
-        s->fast_fragment_list[s->last_coded_y_fragment] = -1;
-    }
-
-    if (s->last_coded_c_fragment == -1)
-        s->fragment_list_c_head = -1;
-    else {
-        s->fragment_list_c_head = s->first_coded_c_fragment;
-        s->fast_fragment_list[s->last_coded_c_fragment] = -1;
+        s->total_num_coded_frags += num_coded_frags;
+        for (i = 0; i < 64; i++)
+            s->num_coded_frags[plane][i] = num_coded_frags;
+        if (plane < 2)
+            s->coded_fragment_list[plane+1] = s->coded_fragment_list[plane] + num_coded_frags;
     }
-
     return 0;
 }
 
@@ -767,12 +518,14 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
  */
 static int unpack_modes(Vp3DecodeContext *s, GetBitContext *gb)
 {
-    int i, j, k;
+    int i, j, k, sb_x, sb_y;
     int scheme;
     int current_macroblock;
     int current_fragment;
     int coding_mode;
     int custom_mode_alphabet[CODING_MODE_COUNT];
+    const int *alphabet;
+    Vp3Fragment *frag;
 
     if (s->keyframe) {
         for (i = 0; i < s->fragment_count; i++)
@@ -789,50 +542,76 @@ static int unpack_modes(Vp3DecodeContext *s, GetBitContext *gb)
                 custom_mode_alphabet[i] = MODE_INTER_NO_MV;
             for (i = 0; i < 8; i++)
                 custom_mode_alphabet[get_bits(gb, 3)] = i;
-        }
+            alphabet = custom_mode_alphabet;
+        } else
+            alphabet = ModeAlphabet[scheme-1];
 
         /* iterate through all of the macroblocks that contain 1 or more
          * coded fragments */
-        for (i = 0; i < s->u_superblock_start; i++) {
+        for (sb_y = 0; sb_y < s->y_superblock_height; sb_y++) {
+            for (sb_x = 0; sb_x < s->y_superblock_width; sb_x++) {
+                if (get_bits_left(gb) <= 0)
+                    return -1;
 
             for (j = 0; j < 4; j++) {
-                current_macroblock = s->superblock_macroblocks[i * 4 + j];
-                if ((current_macroblock == -1) ||
-                    (s->macroblock_coding[current_macroblock] == MODE_COPY))
+                int mb_x = 2*sb_x +   (j>>1);
+                int mb_y = 2*sb_y + (((j>>1)+j)&1);
+                current_macroblock = mb_y * s->macroblock_width + mb_x;
+
+                if (mb_x >= s->macroblock_width || mb_y >= s->macroblock_height)
+                    continue;
+
+#define BLOCK_X (2*mb_x + (k&1))
+#define BLOCK_Y (2*mb_y + (k>>1))
+                /* coding modes are only stored if the macroblock has at least one
+                 * luma block coded, otherwise it must be INTER_NO_MV */
+                for (k = 0; k < 4; k++) {
+                    current_fragment = BLOCK_Y*s->fragment_width[0] + BLOCK_X;
+                    if (s->all_fragments[current_fragment].coding_method != MODE_COPY)
+                        break;
+                }
+                if (k == 4) {
+                    s->macroblock_coding[current_macroblock] = MODE_INTER_NO_MV;
                     continue;
-                if (current_macroblock >= s->macroblock_count) {
-                    av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_modes(): bad macroblock number (%d >= %d)\n",
-                        current_macroblock, s->macroblock_count);
-                    return 1;
                 }
 
                 /* mode 7 means get 3 bits for each coding mode */
                 if (scheme == 7)
                     coding_mode = get_bits(gb, 3);
-                else if(scheme == 0)
-                    coding_mode = custom_mode_alphabet
-                        [get_vlc2(gb, s->mode_code_vlc.table, 3, 3)];
                 else
-                    coding_mode = ModeAlphabet[scheme-1]
+                    coding_mode = alphabet
                         [get_vlc2(gb, s->mode_code_vlc.table, 3, 3)];
 
                 s->macroblock_coding[current_macroblock] = coding_mode;
-                for (k = 0; k < 6; k++) {
-                    current_fragment =
-                        s->macroblock_fragments[current_macroblock * 6 + k];
-                    if (current_fragment == -1)
-                        continue;
-                    if (current_fragment >= s->fragment_count) {
-                        av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_modes(): bad fragment number (%d >= %d)\n",
-                            current_fragment, s->fragment_count);
-                        return 1;
+                for (k = 0; k < 4; k++) {
+                    frag = s->all_fragments + BLOCK_Y*s->fragment_width[0] + BLOCK_X;
+                    if (frag->coding_method != MODE_COPY)
+                        frag->coding_method = coding_mode;
+                }
+
+#define SET_CHROMA_MODES \
+    if (frag[s->fragment_start[1]].coding_method != MODE_COPY) \
+        frag[s->fragment_start[1]].coding_method = coding_mode;\
+    if (frag[s->fragment_start[2]].coding_method != MODE_COPY) \
+        frag[s->fragment_start[2]].coding_method = coding_mode;
+
+                if (s->chroma_y_shift) {
+                    frag = s->all_fragments + mb_y*s->fragment_width[1] + mb_x;
+                    SET_CHROMA_MODES
+                } else if (s->chroma_x_shift) {
+                    frag = s->all_fragments + 2*mb_y*s->fragment_width[1] + mb_x;
+                    for (k = 0; k < 2; k++) {
+                        SET_CHROMA_MODES
+                        frag += s->fragment_width[1];
+                    }
+                } else {
+                    for (k = 0; k < 4; k++) {
+                        frag = s->all_fragments + BLOCK_Y*s->fragment_width[1] + BLOCK_X;
+                        SET_CHROMA_MODES
                     }
-                    if (s->all_fragments[current_fragment].coding_method !=
-                        MODE_COPY)
-                        s->all_fragments[current_fragment].coding_method =
-                            coding_mode;
                 }
             }
+            }
         }
     }
 
@@ -845,47 +624,40 @@ static int unpack_modes(Vp3DecodeContext *s, GetBitContext *gb)
  */
 static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb)
 {
-    int i, j, k, l;
+    int j, k, sb_x, sb_y;
     int coding_mode;
-    int motion_x[6];
-    int motion_y[6];
+    int motion_x[4];
+    int motion_y[4];
     int last_motion_x = 0;
     int last_motion_y = 0;
     int prior_last_motion_x = 0;
     int prior_last_motion_y = 0;
     int current_macroblock;
     int current_fragment;
+    int frag;
 
     if (s->keyframe)
         return 0;
 
-    memset(motion_x, 0, 6 * sizeof(int));
-    memset(motion_y, 0, 6 * sizeof(int));
-
     /* coding mode 0 is the VLC scheme; 1 is the fixed code scheme */
     coding_mode = get_bits1(gb);
 
     /* iterate through all of the macroblocks that contain 1 or more
      * coded fragments */
-    for (i = 0; i < s->u_superblock_start; i++) {
+    for (sb_y = 0; sb_y < s->y_superblock_height; sb_y++) {
+        for (sb_x = 0; sb_x < s->y_superblock_width; sb_x++) {
+            if (get_bits_left(gb) <= 0)
+                return -1;
 
         for (j = 0; j < 4; j++) {
-            current_macroblock = s->superblock_macroblocks[i * 4 + j];
-            if ((current_macroblock == -1) ||
+            int mb_x = 2*sb_x +   (j>>1);
+            int mb_y = 2*sb_y + (((j>>1)+j)&1);
+            current_macroblock = mb_y * s->macroblock_width + mb_x;
+
+            if (mb_x >= s->macroblock_width || mb_y >= s->macroblock_height ||
                 (s->macroblock_coding[current_macroblock] == MODE_COPY))
                 continue;
-            if (current_macroblock >= s->macroblock_count) {
-                av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_vectors(): bad macroblock number (%d >= %d)\n",
-                    current_macroblock, s->macroblock_count);
-                return 1;
-            }
 
-            current_fragment = s->macroblock_fragments[current_macroblock * 6];
-            if (current_fragment >= s->fragment_count) {
-                av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_vectors(): bad fragment number (%d >= %d\n",
-                    current_fragment, s->fragment_count);
-                return 1;
-            }
             switch (s->macroblock_coding[current_macroblock]) {
 
             case MODE_INTER_PLUS_MV:
@@ -916,12 +688,9 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb)
 
                 /* fetch 4 vectors from the bitstream, one for each
                  * Y fragment, then average for the C fragment vectors */
-                motion_x[4] = motion_y[4] = 0;
                 for (k = 0; k < 4; k++) {
-                    for (l = 0; l < s->coded_fragment_list_index; l++)
-                        if (s->coded_fragment_list[l] == s->macroblock_fragments[6*current_macroblock + k])
-                            break;
-                    if (l < s->coded_fragment_list_index) {
+                    current_fragment = BLOCK_Y*s->fragment_width[0] + BLOCK_X;
+                    if (s->all_fragments[current_fragment].coding_method != MODE_COPY) {
                         if (coding_mode == 0) {
                             motion_x[k] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
                             motion_y[k] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
@@ -935,14 +704,7 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb)
                         motion_x[k] = 0;
                         motion_y[k] = 0;
                     }
-                    motion_x[4] += motion_x[k];
-                    motion_y[4] += motion_y[k];
                 }
-
-                motion_x[5]=
-                motion_x[4]= RSHIFT(motion_x[4], 2);
-                motion_y[5]=
-                motion_y[4]= RSHIFT(motion_y[4], 2);
                 break;
 
             case MODE_INTER_LAST_MV:
@@ -977,25 +739,61 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb)
             }
 
             /* assign the motion vectors to the correct fragments */
-            for (k = 0; k < 6; k++) {
+            for (k = 0; k < 4; k++) {
                 current_fragment =
-                    s->macroblock_fragments[current_macroblock * 6 + k];
-                if (current_fragment == -1)
-                    continue;
-                if (current_fragment >= s->fragment_count) {
-                    av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_vectors(): bad fragment number (%d >= %d)\n",
-                        current_fragment, s->fragment_count);
-                    return 1;
+                    BLOCK_Y*s->fragment_width[0] + BLOCK_X;
+                if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) {
+                    s->motion_val[0][current_fragment][0] = motion_x[k];
+                    s->motion_val[0][current_fragment][1] = motion_y[k];
+                } else {
+                    s->motion_val[0][current_fragment][0] = motion_x[0];
+                    s->motion_val[0][current_fragment][1] = motion_y[0];
                 }
+            }
+
+            if (s->chroma_y_shift) {
                 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) {
-                    s->all_fragments[current_fragment].motion_x = motion_x[k];
-                    s->all_fragments[current_fragment].motion_y = motion_y[k];
+                    motion_x[0] = RSHIFT(motion_x[0] + motion_x[1] + motion_x[2] + motion_x[3], 2);
+                    motion_y[0] = RSHIFT(motion_y[0] + motion_y[1] + motion_y[2] + motion_y[3], 2);
+                }
+                motion_x[0] = (motion_x[0]>>1) | (motion_x[0]&1);
+                motion_y[0] = (motion_y[0]>>1) | (motion_y[0]&1);
+                frag = mb_y*s->fragment_width[1] + mb_x;
+                s->motion_val[1][frag][0] = motion_x[0];
+                s->motion_val[1][frag][1] = motion_y[0];
+            } else if (s->chroma_x_shift) {
+                if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) {
+                    motion_x[0] = RSHIFT(motion_x[0] + motion_x[1], 1);
+                    motion_y[0] = RSHIFT(motion_y[0] + motion_y[1], 1);
+                    motion_x[1] = RSHIFT(motion_x[2] + motion_x[3], 1);
+                    motion_y[1] = RSHIFT(motion_y[2] + motion_y[3], 1);
                 } else {
-                    s->all_fragments[current_fragment].motion_x = motion_x[0];
-                    s->all_fragments[current_fragment].motion_y = motion_y[0];
+                    motion_x[1] = motion_x[0];
+                    motion_y[1] = motion_y[0];
+                }
+                motion_x[0] = (motion_x[0]>>1) | (motion_x[0]&1);
+                motion_x[1] = (motion_x[1]>>1) | (motion_x[1]&1);
+
+                frag = 2*mb_y*s->fragment_width[1] + mb_x;
+                for (k = 0; k < 2; k++) {
+                    s->motion_val[1][frag][0] = motion_x[k];
+                    s->motion_val[1][frag][1] = motion_y[k];
+                    frag += s->fragment_width[1];
+                }
+            } else {
+                for (k = 0; k < 4; k++) {
+                    frag = BLOCK_Y*s->fragment_width[1] + BLOCK_X;
+                    if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) {
+                        s->motion_val[1][frag][0] = motion_x[k];
+                        s->motion_val[1][frag][1] = motion_y[k];
+                    } else {
+                        s->motion_val[1][frag][0] = motion_x[0];
+                        s->motion_val[1][frag][1] = motion_y[0];
+                    }
                 }
             }
         }
+        }
     }
 
     return 0;
@@ -1004,7 +802,7 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb)
 static int unpack_block_qpis(Vp3DecodeContext *s, GetBitContext *gb)
 {
     int qpi, i, j, bit, run_length, blocks_decoded, num_blocks_at_qpi;
-    int num_blocks = s->coded_fragment_list_index;
+    int num_blocks = s->total_num_coded_frags;
 
     for (qpi = 0; qpi < s->nqps-1 && num_blocks > 0; qpi++) {
         i = blocks_decoded = num_blocks_at_qpi = 0;
@@ -1021,20 +819,20 @@ static int unpack_block_qpis(Vp3DecodeContext *s, GetBitContext *gb)
                 num_blocks_at_qpi += run_length;
 
             for (j = 0; j < run_length; i++) {
-                if (i >= s->coded_fragment_list_index)
+                if (i >= s->total_num_coded_frags)
                     return -1;
 
-                if (s->all_fragments[s->coded_fragment_list[i]].qpi == qpi) {
-                    s->all_fragments[s->coded_fragment_list[i]].qpi += bit;
+                if (s->all_fragments[s->coded_fragment_list[0][i]].qpi == qpi) {
+                    s->all_fragments[s->coded_fragment_list[0][i]].qpi += bit;
                     j++;
                 }
             }
 
-            if (run_length == 4129)
+            if (run_length == MAXIMUM_LONG_BIT_RUN)
                 bit = get_bits1(gb);
             else
                 bit ^= 1;
-        } while (blocks_decoded < num_blocks);
+        } while (blocks_decoded < num_blocks && get_bits_left(gb) > 0);
 
         num_blocks -= num_blocks_at_qpi;
     }
@@ -1056,49 +854,40 @@ static int unpack_block_qpis(Vp3DecodeContext *s, GetBitContext *gb)
  */
 static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb,
                         VLC *table, int coeff_index,
-                        int y_plane,
+                        int plane,
                         int eob_run)
 {
-    int i;
+    int i, j = 0;
     int token;
     int zero_run = 0;
     DCTELEM coeff = 0;
-    Vp3Fragment *fragment;
     int bits_to_get;
-    int next_fragment;
-    int previous_fragment;
-    int fragment_num;
-    int *list_head;
+    int blocks_ended;
+    int coeff_i = 0;
+    int num_coeffs = s->num_coded_frags[plane][coeff_index];
+    int16_t *dct_tokens = s->dct_tokens[plane][coeff_index];
 
     /* local references to structure members to avoid repeated deferences */
-    uint8_t *perm= s->scantable.permutated;
-    int *coded_fragment_list = s->coded_fragment_list;
+    int *coded_fragment_list = s->coded_fragment_list[plane];
     Vp3Fragment *all_fragments = s->all_fragments;
-    uint8_t *coeff_counts = s->coeff_counts;
     VLC_TYPE (*vlc_table)[2] = table->table;
-    int *fast_fragment_list = s->fast_fragment_list;
 
-    if (y_plane) {
-        next_fragment = s->fragment_list_y_head;
-        list_head = &s->fragment_list_y_head;
+    if (num_coeffs < 0)
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid number of coefficents at level %d\n", coeff_index);
+
+    if (eob_run > num_coeffs) {
+        coeff_i = blocks_ended = num_coeffs;
+        eob_run -= num_coeffs;
     } else {
-        next_fragment = s->fragment_list_c_head;
-        list_head = &s->fragment_list_c_head;
+        coeff_i = blocks_ended = eob_run;
+        eob_run = 0;
     }
 
-    i = next_fragment;
-    previous_fragment = -1;  /* this indicates that the previous fragment is actually the list head */
-    while (i != -1) {
-        fragment_num = coded_fragment_list[i];
+    // insert fake EOB token to cover the split between planes or zzi
+    if (blocks_ended)
+        dct_tokens[j++] = blocks_ended << 2;
 
-        if (coeff_counts[fragment_num] > coeff_index) {
-            previous_fragment = i;
-            i = fast_fragment_list[i];
-            continue;
-        }
-        fragment = &all_fragments[fragment_num];
-
-        if (!eob_run) {
+    while (coeff_i < num_coeffs && get_bits_left(gb) > 0) {
             /* decode a VLC into a token */
             token = get_vlc2(gb, vlc_table, 5, 3);
             /* use the token to get a zero run, a coefficient, and an eob run */
@@ -1106,7 +895,20 @@ static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb,
                 eob_run = eob_run_base[token];
                 if (eob_run_get_bits[token])
                     eob_run += get_bits(gb, eob_run_get_bits[token]);
-                coeff = zero_run = 0;
+
+                // record only the number of blocks ended in this plane,
+                // any spill will be recorded in the next plane.
+                if (eob_run > num_coeffs - coeff_i) {
+                    dct_tokens[j++] = TOKEN_EOB(num_coeffs - coeff_i);
+                    blocks_ended   += num_coeffs - coeff_i;
+                    eob_run        -= num_coeffs - coeff_i;
+                    coeff_i         = num_coeffs;
+                } else {
+                    dct_tokens[j++] = TOKEN_EOB(eob_run);
+                    blocks_ended   += eob_run;
+                    coeff_i        += eob_run;
+                    eob_run = 0;
+                }
             } else {
                 bits_to_get = coeff_get_bits[token];
                 if (bits_to_get)
@@ -1116,34 +918,49 @@ static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb,
                 zero_run = zero_run_base[token];
                 if (zero_run_get_bits[token])
                     zero_run += get_bits(gb, zero_run_get_bits[token]);
-            }
-        }
 
-        if (!eob_run) {
-            coeff_counts[fragment_num] += zero_run;
-            if (coeff_counts[fragment_num] < 64){
-                fragment->next_coeff->coeff= coeff;
-                fragment->next_coeff->index= perm[coeff_counts[fragment_num]++]; //FIXME perm here already?
-                fragment->next_coeff->next= s->next_coeff;
-                s->next_coeff->next=NULL;
-                fragment->next_coeff= s->next_coeff++;
-            }
-            /* previous fragment is now this fragment */
-            previous_fragment = i;
-        } else {
-            coeff_counts[fragment_num] |= 128;
-            eob_run--;
-            /* remove this fragment from the list */
-            if (previous_fragment != -1)
-                fast_fragment_list[previous_fragment] = fast_fragment_list[i];
-            else
-                *list_head = fast_fragment_list[i];
-            /* previous fragment remains unchanged */
-        }
+                if (zero_run) {
+                    dct_tokens[j++] = TOKEN_ZERO_RUN(coeff, zero_run);
+                } else {
+                    // Save DC into the fragment structure. DC prediction is
+                    // done in raster order, so the actual DC can't be in with
+                    // other tokens. We still need the token in dct_tokens[]
+                    // however, or else the structure collapses on itself.
+                    if (!coeff_index)
+                        all_fragments[coded_fragment_list[coeff_i]].dc = coeff;
+
+                    dct_tokens[j++] = TOKEN_COEFF(coeff);
+                }
+
+                if (coeff_index + zero_run > 64) {
+                    av_log(s->avctx, AV_LOG_DEBUG, "Invalid zero run of %d with"
+                           " %d coeffs left\n", zero_run, 64-coeff_index);
+                    zero_run = 64 - coeff_index;
+                }
 
-        i = fast_fragment_list[i];
+                // zero runs code multiple coefficients,
+                // so don't try to decode coeffs for those higher levels
+                for (i = coeff_index+1; i <= coeff_index+zero_run; i++)
+                    s->num_coded_frags[plane][i]--;
+                coeff_i++;
+            }
     }
 
+    if (blocks_ended > s->num_coded_frags[plane][coeff_index])
+        av_log(s->avctx, AV_LOG_ERROR, "More blocks ended than coded!\n");
+
+    // decrement the number of blocks that have higher coeffecients for each
+    // EOB run at this level
+    if (blocks_ended)
+        for (i = coeff_index+1; i < 64; i++)
+            s->num_coded_frags[plane][i] -= blocks_ended;
+
+    // setup the next buffer
+    if (plane < 2)
+        s->dct_tokens[plane+1][coeff_index] = dct_tokens + j;
+    else if (coeff_index < 63)
+        s->dct_tokens[0][coeff_index+1] = dct_tokens + j;
+
     return eob_run;
 }
 
@@ -1166,28 +983,32 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
     VLC *y_tables[64];
     VLC *c_tables[64];
 
+    s->dct_tokens[0][0] = s->dct_tokens_base;
+
     /* fetch the DC table indexes */
     dc_y_table = get_bits(gb, 4);
     dc_c_table = get_bits(gb, 4);
 
     /* unpack the Y plane DC coefficients */
     residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_y_table], 0,
-        1, residual_eob_run);
+        0, residual_eob_run);
 
     /* reverse prediction of the Y-plane DC coefficients */
-    reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height);
+    reverse_dc_prediction(s, 0, s->fragment_width[0], s->fragment_height[0]);
 
     /* unpack the C plane DC coefficients */
     residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_c_table], 0,
-        0, residual_eob_run);
+        1, residual_eob_run);
+    residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_c_table], 0,
+        2, residual_eob_run);
 
     /* reverse prediction of the C-plane DC coefficients */
     if (!(s->avctx->flags & CODEC_FLAG_GRAY))
     {
         reverse_dc_prediction(s, s->fragment_start[1],
-            s->fragment_width / 2, s->fragment_height / 2);
+            s->fragment_width[1], s->fragment_height[1]);
         reverse_dc_prediction(s, s->fragment_start[2],
-            s->fragment_width / 2, s->fragment_height / 2);
+            s->fragment_width[1], s->fragment_height[1]);
     }
 
     /* fetch the AC table indexes */
@@ -1214,13 +1035,13 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
 
     /* decode all AC coefficents */
     for (i = 1; i <= 63; i++) {
-        if (s->fragment_list_y_head != -1)
             residual_eob_run = unpack_vlcs(s, gb, y_tables[i], i,
-                1, residual_eob_run);
+                0, residual_eob_run);
 
-        if (s->fragment_list_c_head != -1)
             residual_eob_run = unpack_vlcs(s, gb, c_tables[i], i,
-                0, residual_eob_run);
+                1, residual_eob_run);
+            residual_eob_run = unpack_vlcs(s, gb, c_tables[i], i,
+                2, residual_eob_run);
     }
 
     return 0;
@@ -1233,7 +1054,7 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
  */
 #define COMPATIBLE_FRAME(x) \
   (compatible_frame[s->all_fragments[x].coding_method] == current_frame_type)
-#define DC_COEFF(u) (s->coeffs[u].index ? 0 : s->coeffs[u].coeff) //FIXME do somethin to simplify this
+#define DC_COEFF(u) s->all_fragments[u].dc
 
 static void reverse_dc_prediction(Vp3DecodeContext *s,
                                   int first_fragment,
@@ -1377,83 +1198,217 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
                 }
 
                 /* at long last, apply the predictor */
-                if(s->coeffs[i].index){
-                    *s->next_coeff= s->coeffs[i];
-                    s->coeffs[i].index=0;
-                    s->coeffs[i].coeff=0;
-                    s->coeffs[i].next= s->next_coeff++;
-                }
-                s->coeffs[i].coeff += predicted_dc;
+                DC_COEFF(i) += predicted_dc;
                 /* save the DC */
                 last_dc[current_frame_type] = DC_COEFF(i);
-                if(DC_COEFF(i) && !(s->coeff_counts[i]&127)){
-                    s->coeff_counts[i]= 129;
-//                    s->all_fragments[i].next_coeff= s->next_coeff;
-                    s->coeffs[i].next= s->next_coeff;
-                    (s->next_coeff++)->next=NULL;
+            }
+        }
+    }
+}
+
+static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int yend)
+{
+    int x, y;
+    int *bounding_values= s->bounding_values_array+127;
+
+    int width           = s->fragment_width[!!plane];
+    int height          = s->fragment_height[!!plane];
+    int fragment        = s->fragment_start        [plane] + ystart * width;
+    int stride          = s->current_frame.linesize[plane];
+    uint8_t *plane_data = s->current_frame.data    [plane];
+    if (!s->flipped_image) stride = -stride;
+    plane_data += s->data_offset[plane] + 8*ystart*stride;
+
+    for (y = ystart; y < yend; y++) {
+
+        for (x = 0; x < width; x++) {
+            /* This code basically just deblocks on the edges of coded blocks.
+             * However, it has to be much more complicated because of the
+             * braindamaged deblock ordering used in VP3/Theora. Order matters
+             * because some pixels get filtered twice. */
+            if( s->all_fragments[fragment].coding_method != MODE_COPY )
+            {
+                /* do not perform left edge filter for left columns frags */
+                if (x > 0) {
+                    s->dsp.vp3_h_loop_filter(
+                        plane_data + 8*x,
+                        stride, bounding_values);
+                }
+
+                /* do not perform top edge filter for top row fragments */
+                if (y > 0) {
+                    s->dsp.vp3_v_loop_filter(
+                        plane_data + 8*x,
+                        stride, bounding_values);
+                }
+
+                /* do not perform right edge filter for right column
+                 * fragments or if right fragment neighbor is also coded
+                 * in this frame (it will be filtered in next iteration) */
+                if ((x < width - 1) &&
+                    (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) {
+                    s->dsp.vp3_h_loop_filter(
+                        plane_data + 8*x + 8,
+                        stride, bounding_values);
+                }
+
+                /* do not perform bottom edge filter for bottom row
+                 * fragments or if bottom fragment neighbor is also coded
+                 * in this frame (it will be filtered in the next row) */
+                if ((y < height - 1) &&
+                    (s->all_fragments[fragment + width].coding_method == MODE_COPY)) {
+                    s->dsp.vp3_v_loop_filter(
+                        plane_data + 8*x + 8*stride,
+                        stride, bounding_values);
                 }
             }
+
+            fragment++;
         }
+        plane_data += 8*stride;
     }
 }
 
+/**
+ * Pulls DCT tokens from the 64 levels to decode and dequant the coefficients
+ * for the next block in coding order
+ */
+static inline int vp3_dequant(Vp3DecodeContext *s, Vp3Fragment *frag,
+                              int plane, int inter, DCTELEM block[64])
+{
+    int16_t *dequantizer = s->qmat[frag->qpi][inter][plane];
+    uint8_t *perm = s->scantable.permutated;
+    int i = 0;
+
+    do {
+        int token = *s->dct_tokens[plane][i];
+        switch (token & 3) {
+        case 0: // EOB
+            if (--token < 4) // 0-3 are token types, so the EOB run must now be 0
+                s->dct_tokens[plane][i]++;
+            else
+                *s->dct_tokens[plane][i] = token & ~3;
+            goto end;
+        case 1: // zero run
+            s->dct_tokens[plane][i]++;
+            i += (token >> 2) & 0x7f;
+            block[perm[i]] = (token >> 9) * dequantizer[perm[i]];
+            i++;
+            break;
+        case 2: // coeff
+            block[perm[i]] = (token >> 2) * dequantizer[perm[i]];
+            s->dct_tokens[plane][i++]++;
+            break;
+        default: // shouldn't happen
+            return i;
+        }
+    } while (i < 64);
+end:
+    // the actual DC+prediction is in the fragment structure
+    block[0] = frag->dc * s->qmat[0][inter][plane][0];
+    return i;
+}
+
+/**
+ * called when all pixels up to row y are complete
+ */
+static void vp3_draw_horiz_band(Vp3DecodeContext *s, int y)
+{
+    int h, cy;
+    int offset[4];
+
+    if(s->avctx->draw_horiz_band==NULL)
+        return;
+
+    h= y - s->last_slice_end;
+    y -= h;
+
+    if (!s->flipped_image) {
+        if (y == 0)
+            h -= s->height - s->avctx->height;  // account for non-mod16
+        y = s->height - y - h;
+    }
+
+    cy = y >> 1;
+    offset[0] = s->current_frame.linesize[0]*y;
+    offset[1] = s->current_frame.linesize[1]*cy;
+    offset[2] = s->current_frame.linesize[2]*cy;
+    offset[3] = 0;
+
+    emms_c();
+    s->avctx->draw_horiz_band(s->avctx, &s->current_frame, offset, y, 3, h);
+    s->last_slice_end= y + h;
+}
+
 /*
  * Perform the final rendering for a particular slice of data.
- * The slice number ranges from 0..(macroblock_height - 1).
+ * The slice number ranges from 0..(c_superblock_height - 1).
  */
 static void render_slice(Vp3DecodeContext *s, int slice)
 {
-    int x;
-    int16_t *dequantizer;
-    DECLARE_ALIGNED_16(DCTELEM, block)[64];
+    int x, y, i, j;
+    LOCAL_ALIGNED_16(DCTELEM, block, [64]);
     int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef;
     int motion_halfpel_index;
     uint8_t *motion_source;
-    int plane;
-    int current_macroblock_entry = slice * s->macroblock_width * 6;
+    int plane, first_pixel;
 
-    if (slice >= s->macroblock_height)
+    if (slice >= s->c_superblock_height)
         return;
 
     for (plane = 0; plane < 3; plane++) {
-        uint8_t *output_plane = s->current_frame.data    [plane];
-        uint8_t *  last_plane = s->   last_frame.data    [plane];
-        uint8_t *golden_plane = s-> golden_frame.data    [plane];
+        uint8_t *output_plane = s->current_frame.data    [plane] + s->data_offset[plane];
+        uint8_t *  last_plane = s->   last_frame.data    [plane] + s->data_offset[plane];
+        uint8_t *golden_plane = s-> golden_frame.data    [plane] + s->data_offset[plane];
         int stride            = s->current_frame.linesize[plane];
-        int plane_width       = s->width  >> !!plane;
-        int plane_height      = s->height >> !!plane;
-        int y =        slice *  FRAGMENT_PIXELS << !plane ;
-        int slice_height = y + (FRAGMENT_PIXELS << !plane);
-        int i = s->macroblock_fragments[current_macroblock_entry + plane + 3*!!plane];
+        int plane_width       = s->width  >> (plane && s->chroma_x_shift);
+        int plane_height      = s->height >> (plane && s->chroma_y_shift);
+        int8_t (*motion_val)[2] = s->motion_val[!!plane];
+
+        int sb_x, sb_y        = slice << (!plane && s->chroma_y_shift);
+        int slice_height      = sb_y + 1 + (!plane && s->chroma_y_shift);
+        int slice_width       = plane ? s->c_superblock_width : s->y_superblock_width;
+
+        int fragment_width    = s->fragment_width[!!plane];
+        int fragment_height   = s->fragment_height[!!plane];
+        int fragment_start    = s->fragment_start[plane];
 
         if (!s->flipped_image) stride = -stride;
+        if (CONFIG_GRAY && plane && (s->avctx->flags & CODEC_FLAG_GRAY))
+            continue;
 
 
         if(FFABS(stride) > 2048)
             return; //various tables are fixed size
 
-        /* for each fragment row in the slice (both of them)... */
-        for (; y < slice_height; y += 8) {
+        /* for each superblock row in the slice (both of them)... */
+        for (; sb_y < slice_height; sb_y++) {
 
-            /* for each fragment in a row... */
-            for (x = 0; x < plane_width; x += 8, i++) {
+            /* for each superblock in a row... */
+            for (sb_x = 0; sb_x < slice_width; sb_x++) {
 
-                if ((i < 0) || (i >= s->fragment_count)) {
-                    av_log(s->avctx, AV_LOG_ERROR, "  vp3:render_slice(): bad fragment number (%d)\n", i);
-                    return;
-                }
+                /* for each block in a superblock... */
+                for (j = 0; j < 16; j++) {
+                    x = 4*sb_x + hilbert_offset[j][0];
+                    y = 4*sb_y + hilbert_offset[j][1];
 
-                /* transform if this block was coded */
-                if ((s->all_fragments[i].coding_method != MODE_COPY) &&
-                    !((s->avctx->flags & CODEC_FLAG_GRAY) && plane)) {
+                    i = fragment_start + y*fragment_width + x;
 
+                    // bounds check
+                    if (x >= fragment_width || y >= fragment_height)
+                        continue;
+
+                first_pixel = 8*y*stride + 8*x;
+
+                /* transform if this block was coded */
+                if (s->all_fragments[i].coding_method != MODE_COPY) {
                     if ((s->all_fragments[i].coding_method == MODE_USING_GOLDEN) ||
                         (s->all_fragments[i].coding_method == MODE_GOLDEN_MV))
                         motion_source= golden_plane;
                     else
                         motion_source= last_plane;
 
-                    motion_source += s->all_fragments[i].first_pixel;
+                    motion_source += first_pixel;
                     motion_halfpel_index = 0;
 
                     /* sort out the motion vector if this fragment is coded
@@ -1461,17 +1416,11 @@ static void render_slice(Vp3DecodeContext *s, int slice)
                     if ((s->all_fragments[i].coding_method > MODE_INTRA) &&
                         (s->all_fragments[i].coding_method != MODE_USING_GOLDEN)) {
                         int src_x, src_y;
-                        motion_x = s->all_fragments[i].motion_x;
-                        motion_y = s->all_fragments[i].motion_y;
-                        if(plane){
-                            motion_x= (motion_x>>1) | (motion_x&1);
-                            motion_y= (motion_y>>1) | (motion_y&1);
-                        }
+                        motion_x = motion_val[y*fragment_width + x][0];
+                        motion_y = motion_val[y*fragment_width + x][1];
 
-                        src_x= (motion_x>>1) + x;
-                        src_y= (motion_y>>1) + y;
-                        if ((motion_x == 127) || (motion_y == 127))
-                            av_log(s->avctx, AV_LOG_ERROR, " help! got invalid motion vector! (%X, %X)\n", motion_x, motion_y);
+                        src_x= (motion_x>>1) + 8*x;
+                        src_y= (motion_y>>1) + 8*y;
 
                         motion_halfpel_index = motion_x & 0x01;
                         motion_source += (motion_x >> 1);
@@ -1499,63 +1448,54 @@ static void render_slice(Vp3DecodeContext *s, int slice)
                            put_no_rnd_pixels_tab is better optimzed */
                         if(motion_halfpel_index != 3){
                             s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index](
-                                output_plane + s->all_fragments[i].first_pixel,
+                                output_plane + first_pixel,
                                 motion_source, stride, 8);
                         }else{
                             int d= (motion_x ^ motion_y)>>31; // d is 0 if motion_x and _y have the same sign, else -1
                             s->dsp.put_no_rnd_pixels_l2[1](
-                                output_plane + s->all_fragments[i].first_pixel,
+                                output_plane + first_pixel,
                                 motion_source - d,
                                 motion_source + stride + 1 + d,
                                 stride, 8);
                         }
-                        dequantizer = s->qmat[s->all_fragments[i].qpi][1][plane];
-                    }else{
-                        dequantizer = s->qmat[s->all_fragments[i].qpi][0][plane];
                     }
 
-                    /* dequantize the DCT coefficients */
-                    if(s->avctx->idct_algo==FF_IDCT_VP3){
-                        Coeff *coeff= s->coeffs + i;
                         s->dsp.clear_block(block);
-                        while(coeff->next){
-                            block[coeff->index]= coeff->coeff * dequantizer[coeff->index];
-                            coeff= coeff->next;
-                        }
-                    }else{
-                        Coeff *coeff= s->coeffs + i;
-                        s->dsp.clear_block(block);
-                        while(coeff->next){
-                            block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2;
-                            coeff= coeff->next;
-                        }
-                    }
 
                     /* invert DCT and place (or add) in final output */
 
                     if (s->all_fragments[i].coding_method == MODE_INTRA) {
+                        vp3_dequant(s, s->all_fragments + i, plane, 0, block);
                         if(s->avctx->idct_algo!=FF_IDCT_VP3)
                             block[0] += 128<<3;
                         s->dsp.idct_put(
-                            output_plane + s->all_fragments[i].first_pixel,
+                            output_plane + first_pixel,
                             stride,
                             block);
                     } else {
+                        if (vp3_dequant(s, s->all_fragments + i, plane, 1, block)) {
                         s->dsp.idct_add(
-                            output_plane + s->all_fragments[i].first_pixel,
+                            output_plane + first_pixel,
                             stride,
                             block);
+                        } else {
+                            s->dsp.vp3_idct_dc_add(output_plane + first_pixel, stride, block);
+                        }
                     }
                 } else {
 
                     /* copy directly from the previous frame */
                     s->dsp.put_pixels_tab[1][0](
-                        output_plane + s->all_fragments[i].first_pixel,
-                        last_plane + s->all_fragments[i].first_pixel,
+                        output_plane + first_pixel,
+                        last_plane + first_pixel,
                         stride, 8);
 
                 }
+                }
             }
+
+            // Filter up to the last row in the superblock row
+            apply_loop_filter(s, plane, 4*sb_y - !!sb_y, FFMIN(4*sb_y+3, fragment_height-1));
         }
     }
 
@@ -1567,119 +1507,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
       *     dispatch (slice - 1);
       */
 
-    emms_c();
-}
-
-static void apply_loop_filter(Vp3DecodeContext *s)
-{
-    int plane;
-    int x, y;
-    int *bounding_values= s->bounding_values_array+127;
-
-    for (plane = 0; plane < 3; plane++) {
-        int width           = s->fragment_width  >> !!plane;
-        int height          = s->fragment_height >> !!plane;
-        int fragment        = s->fragment_start        [plane];
-        int stride          = s->current_frame.linesize[plane];
-        uint8_t *plane_data = s->current_frame.data    [plane];
-        if (!s->flipped_image) stride = -stride;
-
-        for (y = 0; y < height; y++) {
-
-            for (x = 0; x < width; x++) {
-                /* This code basically just deblocks on the edges of coded blocks.
-                 * However, it has to be much more complicated because of the
-                 * braindamaged deblock ordering used in VP3/Theora. Order matters
-                 * because some pixels get filtered twice. */
-                if( s->all_fragments[fragment].coding_method != MODE_COPY )
-                {
-                    /* do not perform left edge filter for left columns frags */
-                    if (x > 0) {
-                        s->dsp.vp3_h_loop_filter(
-                            plane_data + s->all_fragments[fragment].first_pixel,
-                            stride, bounding_values);
-                    }
-
-                    /* do not perform top edge filter for top row fragments */
-                    if (y > 0) {
-                        s->dsp.vp3_v_loop_filter(
-                            plane_data + s->all_fragments[fragment].first_pixel,
-                            stride, bounding_values);
-                    }
-
-                    /* do not perform right edge filter for right column
-                     * fragments or if right fragment neighbor is also coded
-                     * in this frame (it will be filtered in next iteration) */
-                    if ((x < width - 1) &&
-                        (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) {
-                        s->dsp.vp3_h_loop_filter(
-                            plane_data + s->all_fragments[fragment + 1].first_pixel,
-                            stride, bounding_values);
-                    }
-
-                    /* do not perform bottom edge filter for bottom row
-                     * fragments or if bottom fragment neighbor is also coded
-                     * in this frame (it will be filtered in the next row) */
-                    if ((y < height - 1) &&
-                        (s->all_fragments[fragment + width].coding_method == MODE_COPY)) {
-                        s->dsp.vp3_v_loop_filter(
-                            plane_data + s->all_fragments[fragment + width].first_pixel,
-                            stride, bounding_values);
-                    }
-                }
-
-                fragment++;
-            }
-        }
-    }
-}
-
-/*
- * This function computes the first pixel addresses for each fragment.
- * This function needs to be invoked after the first frame is allocated
- * so that it has access to the plane strides.
- */
-static void vp3_calculate_pixel_addresses(Vp3DecodeContext *s)
-{
-#define Y_INITIAL(chroma_shift)  s->flipped_image ? 1  : s->fragment_height >> chroma_shift
-#define Y_FINISHED(chroma_shift) s->flipped_image ? y <= s->fragment_height >> chroma_shift : y > 0
-
-    int i, x, y;
-    const int y_inc = s->flipped_image ? 1 : -1;
-
-    /* figure out the first pixel addresses for each of the fragments */
-    /* Y plane */
-    i = 0;
-    for (y = Y_INITIAL(0); Y_FINISHED(0); y += y_inc) {
-        for (x = 0; x < s->fragment_width; x++) {
-            s->all_fragments[i++].first_pixel =
-                s->golden_frame.linesize[0] * y * FRAGMENT_PIXELS -
-                    s->golden_frame.linesize[0] +
-                    x * FRAGMENT_PIXELS;
-        }
-    }
-
-    /* U plane */
-    i = s->fragment_start[1];
-    for (y = Y_INITIAL(1); Y_FINISHED(1); y += y_inc) {
-        for (x = 0; x < s->fragment_width / 2; x++) {
-            s->all_fragments[i++].first_pixel =
-                s->golden_frame.linesize[1] * y * FRAGMENT_PIXELS -
-                    s->golden_frame.linesize[1] +
-                    x * FRAGMENT_PIXELS;
-        }
-    }
-
-    /* V plane */
-    i = s->fragment_start[2];
-    for (y = Y_INITIAL(1); Y_FINISHED(1); y += y_inc) {
-        for (x = 0; x < s->fragment_width / 2; x++) {
-            s->all_fragments[i++].first_pixel =
-                s->golden_frame.linesize[2] * y * FRAGMENT_PIXELS -
-                    s->golden_frame.linesize[2] +
-                    x * FRAGMENT_PIXELS;
-        }
-    }
+    vp3_draw_horiz_band(s, FFMIN(64*slice + 64-16, s->height-16));
 }
 
 /*
@@ -1691,8 +1519,7 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
     int i, inter, plane;
     int c_width;
     int c_height;
-    int y_superblock_count;
-    int c_superblock_count;
+    int y_fragment_count, c_fragment_count;
 
     if (avctx->codec_tag == MKTAG('V','P','3','0'))
         s->version = 0;
@@ -1702,7 +1529,8 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
     s->avctx = avctx;
     s->width = FFALIGN(avctx->width, 16);
     s->height = FFALIGN(avctx->height, 16);
-    avctx->pix_fmt = PIX_FMT_YUV420P;
+    if (avctx->pix_fmt == PIX_FMT_NONE)
+        avctx->pix_fmt = PIX_FMT_YUV420P;
     avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
     //if(avctx->idct_algo==FF_IDCT_AUTO)
         avctx->idct_algo=FF_IDCT_VP3;
@@ -1715,42 +1543,48 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
     for (i = 0; i < 3; i++)
         s->qps[i] = -1;
 
+    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
+
     s->y_superblock_width = (s->width + 31) / 32;
     s->y_superblock_height = (s->height + 31) / 32;
-    y_superblock_count = s->y_superblock_width * s->y_superblock_height;
+    s->y_superblock_count = s->y_superblock_width * s->y_superblock_height;
 
     /* work out the dimensions for the C planes */
-    c_width = s->width / 2;
-    c_height = s->height / 2;
+    c_width = s->width >> s->chroma_x_shift;
+    c_height = s->height >> s->chroma_y_shift;
     s->c_superblock_width = (c_width + 31) / 32;
     s->c_superblock_height = (c_height + 31) / 32;
-    c_superblock_count = s->c_superblock_width * s->c_superblock_height;
+    s->c_superblock_count = s->c_superblock_width * s->c_superblock_height;
 
-    s->superblock_count = y_superblock_count + (c_superblock_count * 2);
-    s->u_superblock_start = y_superblock_count;
-    s->v_superblock_start = s->u_superblock_start + c_superblock_count;
+    s->superblock_count = s->y_superblock_count + (s->c_superblock_count * 2);
+    s->u_superblock_start = s->y_superblock_count;
+    s->v_superblock_start = s->u_superblock_start + s->c_superblock_count;
     s->superblock_coding = av_malloc(s->superblock_count);
 
     s->macroblock_width = (s->width + 15) / 16;
     s->macroblock_height = (s->height + 15) / 16;
     s->macroblock_count = s->macroblock_width * s->macroblock_height;
 
-    s->fragment_width = s->width / FRAGMENT_PIXELS;
-    s->fragment_height = s->height / FRAGMENT_PIXELS;
+    s->fragment_width[0] = s->width / FRAGMENT_PIXELS;
+    s->fragment_height[0] = s->height / FRAGMENT_PIXELS;
+    s->fragment_width[1]  = s->fragment_width[0]  >> s->chroma_x_shift;
+    s->fragment_height[1] = s->fragment_height[0] >> s->chroma_y_shift;
 
     /* fragment count covers all 8x8 blocks for all 3 planes */
-    s->fragment_count = s->fragment_width * s->fragment_height * 3 / 2;
-    s->fragment_start[1] = s->fragment_width * s->fragment_height;
-    s->fragment_start[2] = s->fragment_width * s->fragment_height * 5 / 4;
+    y_fragment_count     = s->fragment_width[0] * s->fragment_height[0];
+    c_fragment_count     = s->fragment_width[1] * s->fragment_height[1];
+    s->fragment_count    = y_fragment_count + 2*c_fragment_count;
+    s->fragment_start[1] = y_fragment_count;
+    s->fragment_start[2] = y_fragment_count + c_fragment_count;
 
     s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment));
-    s->coeff_counts = av_malloc(s->fragment_count * sizeof(*s->coeff_counts));
-    s->coeffs = av_malloc(s->fragment_count * sizeof(Coeff) * 65);
-    s->coded_fragment_list = av_malloc(s->fragment_count * sizeof(int));
-    s->fast_fragment_list = av_malloc(s->fragment_count * sizeof(int));
-    s->pixel_addresses_initialized = 0;
-    if (!s->superblock_coding || !s->all_fragments || !s->coeff_counts ||
-        !s->coeffs || !s->coded_fragment_list || !s->fast_fragment_list) {
+    s->coded_fragment_list[0] = av_malloc(s->fragment_count * sizeof(int));
+    s->dct_tokens_base = av_malloc(64*s->fragment_count * sizeof(*s->dct_tokens_base));
+    s->motion_val[0] = av_malloc(y_fragment_count * sizeof(*s->motion_val[0]));
+    s->motion_val[1] = av_malloc(c_fragment_count * sizeof(*s->motion_val[1]));
+
+    if (!s->superblock_coding || !s->all_fragments || !s->dct_tokens_base ||
+        !s->coded_fragment_list[0] || !s->motion_val[0] || !s->motion_val[1]) {
         vp3_decode_end(avctx);
         return -1;
     }
@@ -1856,11 +1690,8 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
 
     /* work out the block mapping tables */
     s->superblock_fragments = av_malloc(s->superblock_count * 16 * sizeof(int));
-    s->superblock_macroblocks = av_malloc(s->superblock_count * 4 * sizeof(int));
-    s->macroblock_fragments = av_malloc(s->macroblock_count * 6 * sizeof(int));
     s->macroblock_coding = av_malloc(s->macroblock_count + 1);
-    if (!s->superblock_fragments || !s->superblock_macroblocks ||
-        !s->macroblock_fragments || !s->macroblock_coding) {
+    if (!s->superblock_fragments || !s->macroblock_coding) {
         vp3_decode_end(avctx);
         return -1;
     }
@@ -1942,6 +1773,13 @@ static int vp3_decode_frame(AVCodecContext *avctx,
     if (avctx->skip_frame >= AVDISCARD_NONKEY && !s->keyframe)
         return buf_size;
 
+    s->current_frame.reference = 3;
+    s->current_frame.pict_type = s->keyframe ? FF_I_TYPE : FF_P_TYPE;
+    if (avctx->get_buffer(avctx, &s->current_frame) < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        goto error;
+    }
+
     if (s->keyframe) {
         if (!s->theora)
         {
@@ -1960,79 +1798,68 @@ static int vp3_decode_frame(AVCodecContext *avctx,
                     av_log(s->avctx, AV_LOG_ERROR, "Warning, unsupported keyframe coding type?!\n");
             skip_bits(&gb, 2); /* reserved? */
         }
-
-        if (s->last_frame.data[0] == s->golden_frame.data[0]) {
-            if (s->golden_frame.data[0])
-                avctx->release_buffer(avctx, &s->golden_frame);
-            s->last_frame= s->golden_frame; /* ensure that we catch any access to this released frame */
-        } else {
-            if (s->golden_frame.data[0])
-                avctx->release_buffer(avctx, &s->golden_frame);
-            if (s->last_frame.data[0])
-                avctx->release_buffer(avctx, &s->last_frame);
-        }
-
-        s->golden_frame.reference = 3;
-        if(avctx->get_buffer(avctx, &s->golden_frame) < 0) {
-            av_log(s->avctx, AV_LOG_ERROR, "vp3: get_buffer() failed\n");
-            return -1;
-        }
-
-        /* golden frame is also the current frame */
-        s->current_frame= s->golden_frame;
-
-        /* time to figure out pixel addresses? */
-        if (!s->pixel_addresses_initialized)
-        {
-            vp3_calculate_pixel_addresses(s);
-            s->pixel_addresses_initialized = 1;
-        }
     } else {
-        /* allocate a new current frame */
-        s->current_frame.reference = 3;
-        if (!s->pixel_addresses_initialized) {
-            av_log(s->avctx, AV_LOG_ERROR, "vp3: first frame not a keyframe\n");
-            return -1;
-        }
-        if(avctx->get_buffer(avctx, &s->current_frame) < 0) {
-            av_log(s->avctx, AV_LOG_ERROR, "vp3: get_buffer() failed\n");
-            return -1;
+        if (!s->golden_frame.data[0]) {
+            av_log(s->avctx, AV_LOG_WARNING, "vp3: first frame not a keyframe\n");
+
+            s->golden_frame.reference = 3;
+            s->golden_frame.pict_type = FF_I_TYPE;
+            if (avctx->get_buffer(avctx, &s->golden_frame) < 0) {
+                av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+                goto error;
+            }
+            s->last_frame = s->golden_frame;
+            s->last_frame.type = FF_BUFFER_TYPE_COPY;
         }
     }
 
     s->current_frame.qscale_table= s->qscale_table; //FIXME allocate individual tables per AVFrame
     s->current_frame.qstride= 0;
 
-    init_frame(s, &gb);
+    memset(s->all_fragments, 0, s->fragment_count * sizeof(Vp3Fragment));
 
     if (unpack_superblocks(s, &gb)){
         av_log(s->avctx, AV_LOG_ERROR, "error in unpack_superblocks\n");
-        return -1;
+        goto error;
     }
     if (unpack_modes(s, &gb)){
         av_log(s->avctx, AV_LOG_ERROR, "error in unpack_modes\n");
-        return -1;
+        goto error;
     }
     if (unpack_vectors(s, &gb)){
         av_log(s->avctx, AV_LOG_ERROR, "error in unpack_vectors\n");
-        return -1;
+        goto error;
     }
     if (unpack_block_qpis(s, &gb)){
         av_log(s->avctx, AV_LOG_ERROR, "error in unpack_block_qpis\n");
-        return -1;
+        goto error;
     }
     if (unpack_dct_coeffs(s, &gb)){
         av_log(s->avctx, AV_LOG_ERROR, "error in unpack_dct_coeffs\n");
-        return -1;
+        goto error;
     }
 
-    for (i = 0; i < s->macroblock_height; i++)
+    for (i = 0; i < 3; i++) {
+        int height = s->height >> (i && s->chroma_y_shift);
+        if (s->flipped_image)
+            s->data_offset[i] = 0;
+        else
+            s->data_offset[i] = (height-1) * s->current_frame.linesize[i];
+    }
+
+    s->last_slice_end = 0;
+    for (i = 0; i < s->c_superblock_height; i++)
         render_slice(s, i);
 
-    apply_loop_filter(s);
+    // filter the last row
+    for (i = 0; i < 3; i++) {
+        int row = (s->height >> (3+(i && s->chroma_y_shift))) - 1;
+        apply_loop_filter(s, i, row, row+1);
+    }
+    vp3_draw_horiz_band(s, s->height);
 
     /* ffdshow custom code (begin) */
-    if (s->theora && s->fps_numerator){
+    if (s->theora && avctx->time_base.num){
         if (avctx->granulepos>-1){
             s->granulepos=avctx->granulepos;
         }else{
@@ -2048,7 +1875,7 @@ static int vp3_decode_frame(AVCodecContext *avctx,
                     s->granulepos++;
                 }
         }
-        s->current_frame.reordered_opaque = 10000000LL * theora_granule_frame(s,s->granulepos) * s->fps_denumerator / s->fps_numerator;
+        s->current_frame.reordered_opaque = 10000000LL * theora_granule_frame(s,s->granulepos) * avctx->time_base.den / avctx->time_base.num;
         s->current_frame.pict_type=s->keyframe?FF_I_TYPE:FF_P_TYPE;
     }
     /* ffdshow custom code (end) */
@@ -2058,15 +1885,27 @@ static int vp3_decode_frame(AVCodecContext *avctx,
 
     /* release the last frame, if it is allocated and if it is not the
      * golden frame */
-    if ((s->last_frame.data[0]) &&
-        (s->last_frame.data[0] != s->golden_frame.data[0]))
+    if (s->last_frame.data[0] && s->last_frame.type != FF_BUFFER_TYPE_COPY)
         avctx->release_buffer(avctx, &s->last_frame);
 
     /* shuffle frames (last = current) */
     s->last_frame= s->current_frame;
+
+    if (s->keyframe) {
+        if (s->golden_frame.data[0])
+            avctx->release_buffer(avctx, &s->golden_frame);
+        s->golden_frame = s->current_frame;
+        s->last_frame.type = FF_BUFFER_TYPE_COPY;
+    }
+
     s->current_frame.data[0]= NULL; /* ensure that we catch any access to this released frame */
 
     return buf_size;
+
+error:
+    if (s->current_frame.data[0])
+        avctx->release_buffer(avctx, &s->current_frame);
+    return -1;
 }
 
 /*
@@ -2079,14 +1918,12 @@ static av_cold int vp3_decode_end(AVCodecContext *avctx)
 
     av_free(s->superblock_coding);
     av_free(s->all_fragments);
-    av_free(s->coeff_counts);
-    av_free(s->coeffs);
-    av_free(s->coded_fragment_list);
-    av_free(s->fast_fragment_list);
+    av_free(s->coded_fragment_list[0]);
+    av_free(s->dct_tokens_base);
     av_free(s->superblock_fragments);
-    av_free(s->superblock_macroblocks);
-    av_free(s->macroblock_fragments);
     av_free(s->macroblock_coding);
+    av_free(s->motion_val[0]);
+    av_free(s->motion_val[1]);
 
     for (i = 0; i < 16; i++) {
         free_vlc(&s->dc_vlc[i]);
@@ -2102,9 +1939,9 @@ static av_cold int vp3_decode_end(AVCodecContext *avctx)
     free_vlc(&s->motion_vector_vlc);
 
     /* release all frames */
-    if (s->golden_frame.data[0] && s->golden_frame.data[0] != s->last_frame.data[0])
+    if (s->golden_frame.data[0])
         avctx->release_buffer(avctx, &s->golden_frame);
-    if (s->last_frame.data[0])
+    if (s->last_frame.data[0] && s->last_frame.type != FF_BUFFER_TYPE_COPY)
         avctx->release_buffer(avctx, &s->last_frame);
     /* no need to release the current_frame since it will always be pointing
      * to the same frame as either the golden or last frame */
@@ -2147,10 +1984,16 @@ static int read_huffman_tree(AVCodecContext *avctx, GetBitContext *gb)
 }
 
 #if CONFIG_THEORA_DECODER
+static const enum PixelFormat theora_pix_fmts[4] = {
+    PIX_FMT_YUV420P, PIX_FMT_NONE, PIX_FMT_YUV422P, PIX_FMT_YUV444P
+};
+
 static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
 {
     Vp3DecodeContext *s = avctx->priv_data;
-    int visible_width, visible_height;
+    int visible_width, visible_height, colorspace;
+    int offset_x = 0, offset_y = 0;
+    AVRational fps;
 
     s->theora = get_bits_long(gb, 24);
     av_log(avctx, AV_LOG_DEBUG, "Theora bitstream version %X\n", s->theora);
@@ -2172,35 +2015,27 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
         return -1;
     }
 
-    if (s->theora >= 0x030400)
-    {
-        skip_bits(gb, 32); /* total number of superblocks in a frame */
-        // fixme, the next field is 36bits long
-        skip_bits(gb, 32); /* total number of blocks in a frame */
-        skip_bits(gb, 4); /* total number of blocks in a frame */
-        skip_bits(gb, 32); /* total number of macroblocks in a frame */
-    }
-
     if (s->theora >= 0x030200) {
         visible_width  = get_bits_long(gb, 24);
         visible_height = get_bits_long(gb, 24);
 
-        skip_bits(gb, 8); /* offset x */
-        skip_bits(gb, 8); /* offset y */
+        offset_x = get_bits(gb, 8); /* offset x */
+        offset_y = get_bits(gb, 8); /* offset y, from bottom */
     }
 
-    /* ffdshow custom code (begin) */
-    s->fps_numerator=get_bits(gb, 32); /* fps numerator */
-    s->fps_denumerator=get_bits(gb, 32); /* fps denumerator */
-    avctx->sample_aspect_ratio.num = get_bits(gb, 24); /* aspect numerator */
-    avctx->sample_aspect_ratio.den = get_bits(gb, 24); /* aspect denumerator */
-    /* ffdshow custom code (end) */
+    fps.num = get_bits_long(gb, 32);
+    fps.den = get_bits_long(gb, 32);
+    if (fps.num && fps.den) {
+        av_reduce(&avctx->time_base.num, &avctx->time_base.den,
+                  fps.den, fps.num, 1<<30);
+    }
+
+    avctx->sample_aspect_ratio.num = get_bits_long(gb, 24);
+    avctx->sample_aspect_ratio.den = get_bits_long(gb, 24);
 
     if (s->theora < 0x030200)
         s->keyframe_frequency_force=1<<get_bits(gb, 5); /* keyframe frequency force */ /* ffdshow custom code */
-    skip_bits(gb, 8); /* colorspace */
-    if (s->theora >= 0x030400)
-        skip_bits(gb, 2); /* pixel format: 420,res,422,444 */
+    colorspace = get_bits(gb, 8);
     skip_bits(gb, 24); /* bitrate */
 
     skip_bits(gb, 6); /* quality hint */
@@ -2208,20 +2043,30 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
     if (s->theora >= 0x030200)
     {
         s->keyframe_frequency_force=1<<get_bits(gb, 5); /* keyframe frequency force */ /* ffdshow custom code */
-
-        if (s->theora < 0x030400)
-            skip_bits(gb, 5); /* spare bits */
+        avctx->pix_fmt = theora_pix_fmts[get_bits(gb, 2)];
+        skip_bits(gb, 3); /* reserved */
     }
     s->keyframe_granule_shift=_ilog(s->keyframe_frequency_force-1); // ffdshow custom code
 
 //    align_get_bits(gb);
 
     if (   visible_width  <= s->width  && visible_width  > s->width-16
-        && visible_height <= s->height && visible_height > s->height-16)
+        && visible_height <= s->height && visible_height > s->height-16
+        && !offset_x && (offset_y == s->height - visible_height))
         avcodec_set_dimensions(avctx, visible_width, visible_height);
     else
         avcodec_set_dimensions(avctx, s->width, s->height);
 
+    if (colorspace == 1) {
+        avctx->color_primaries = AVCOL_PRI_BT470M;
+    } else if (colorspace == 2) {
+        avctx->color_primaries = AVCOL_PRI_BT470BG;
+    }
+    if (colorspace == 1 || colorspace == 2) {
+        avctx->colorspace = AVCOL_SPC_BT470BG;
+        avctx->color_trc  = AVCOL_TRC_BT709;
+    }
+
     return 0;
 }
 
@@ -2409,7 +2254,7 @@ AVCodec theora_decoder = {
     /*.encode = */NULL,
     /*.close = */vp3_decode_end,
     /*.decode = */vp3_decode_frame,
-    /*.capabilities = */CODEC_CAP_DR1,
+    /*.capabilities = */CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND,
     /*.next = */NULL,
     /*.flush = */NULL,
     /*.supported_framerates = */NULL,
@@ -2427,7 +2272,7 @@ AVCodec vp3_decoder = {
     /*.encode = */NULL,
     /*.close = */vp3_decode_end,
     /*.decode = */vp3_decode_frame,
-    /*.capabilities = */CODEC_CAP_DR1,
+    /*.capabilities = */CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND,
     /*.next = */NULL,
     /*.flush = */NULL,
     /*.supported_framerates = */NULL,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp3dsp.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp3dsp.c
index 83d46e97f..09161a546 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp3dsp.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp3dsp.c
@@ -19,7 +19,7 @@
  */
 
 /**
- * @file vp3dsp.c
+ * @file libavcodec/vp3dsp.c
  * Standard C DSP-oriented functions cribbed from the original VP3
  * source code.
  */
@@ -223,6 +223,25 @@ void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*
     idct(dest, line_size, block, 2);
 }
 
+void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/){
+    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    int i, dc = block[0];
+    dc = (46341*dc)>>16;
+    dc = (46341*dc + (8<<16))>>20;
+
+    for(i = 0; i < 8; i++){
+        dest[0] = cm[dest[0]+dc];
+        dest[1] = cm[dest[1]+dc];
+        dest[2] = cm[dest[2]+dc];
+        dest[3] = cm[dest[3]+dc];
+        dest[4] = cm[dest[4]+dc];
+        dest[5] = cm[dest[5]+dc];
+        dest[6] = cm[dest[6]+dc];
+        dest[7] = cm[dest[7]+dc];
+        dest += line_size;
+    }
+}
+
 void ff_vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values)
 {
     unsigned char *end;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c
index a4baf23d2..bf2e05bc9 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c
@@ -33,6 +33,7 @@ void vp56_init_dequant(VP56Context *s, int quantizer)
     s->quantizer = quantizer;
     s->dequant_dc = vp56_dc_dequant[quantizer] << 2;
     s->dequant_ac = vp56_ac_dequant[quantizer] << 2;
+    memset(s->qscale_table, quantizer, s->mb_width);
 }
 
 static int vp56_get_vectors_predictors(VP56Context *s, int row, int col,
@@ -481,6 +482,7 @@ static int vp56_size_changed(AVCodecContext *avctx)
         return -1;
     }
 
+    s->qscale_table = av_realloc(s->qscale_table, s->mb_width);
     s->above_blocks = av_realloc(s->above_blocks,
                                  (4*s->mb_width+6) * sizeof(*s->above_blocks));
     s->macroblocks = av_realloc(s->macroblocks,
@@ -503,8 +505,12 @@ int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
     int is_alpha, av_uninit(alpha_offset);
 
     if (s->has_alpha) {
+        if (remaining_buf_size < 3)
+            return -1;
         alpha_offset = bytestream_get_be24(&buf);
         remaining_buf_size -= 3;
+        if (remaining_buf_size < alpha_offset)
+            return -1;
     }
 
     for (is_alpha=0; is_alpha < 1+s->has_alpha; is_alpha++) {
@@ -638,6 +644,9 @@ int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
     FFSWAP(AVFrame *, s->framep[VP56_FRAME_CURRENT],
                       s->framep[VP56_FRAME_PREVIOUS]);
 
+    p->qstride = 0;
+    p->qscale_table = s->qscale_table;
+    p->qscale_type = FF_QSCALE_TYPE_VP56;
     *(AVFrame*)data = *p;
     *data_size = sizeof(AVFrame);
 
@@ -653,7 +662,7 @@ av_cold void vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
     avctx->pix_fmt = has_alpha ? PIX_FMT_YUVA420P : PIX_FMT_YUV420P;
 
     /* always use the VP3 IDCT */
-    	avctx->idct_algo = FF_IDCT_VP3;
+        avctx->idct_algo = FF_IDCT_VP3;
     dsputil_init(&s->dsp, avctx);
     ff_init_scantable(s->dsp.idct_permutation, &s->scantable,ff_zigzag_direct);
 
@@ -686,6 +695,7 @@ av_cold int vp56_free(AVCodecContext *avctx)
 {
     VP56Context *s = avctx->priv_data;
 
+    av_freep(&s->qscale_table);
     av_freep(&s->above_blocks);
     av_freep(&s->macroblocks);
     av_freep(&s->edge_emu_buffer_alloc);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h
index 53da77812..1a0ee2a2f 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h
@@ -110,6 +110,7 @@ struct vp56_context {
     int quantizer;
     uint16_t dequant_dc;
     uint16_t dequant_ac;
+    int8_t *qscale_table;
 
     /* DC predictors management */
     VP56RefDc *above_blocks;
@@ -120,7 +121,7 @@ struct vp56_context {
     /* blocks / macroblock */
     VP56mb mb_type;
     VP56Macroblock *macroblocks;
-    DECLARE_ALIGNED_16(DCTELEM, block_coeff)[6][64];
+    DECLARE_ALIGNED(16, DCTELEM, block_coeff)[6][64];
 
     /* motion vectors */
     VP56mv mv[6];  /* vectors for each block in MB */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c
index 519833bdc..5a2c07f45 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c
@@ -234,6 +234,7 @@ static void vp6_build_huff_tree(VP56Context *s, uint8_t coeff_model[],
         nodes[map[2*i+1]].count = b + !b;
     }
 
+    free_vlc(vlc);
     /* then build the huffman tree accodring to probabilities */
     ff_huff_build_tree(s->avctx, vlc, size, nodes, vp6_huff_cmp,
                        FF_HUFFMAN_FLAG_HNODE_FIRST);
@@ -608,6 +609,23 @@ static av_cold int vp6_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
+static av_cold int vp6_decode_free(AVCodecContext *avctx)
+{
+    VP56Context *s = avctx->priv_data;
+    int pt, ct, cg;
+
+    vp56_free(avctx);
+
+    for (pt=0; pt<2; pt++) {
+        free_vlc(&s->dccv_vlc[pt]);
+        free_vlc(&s->runv_vlc[pt]);
+        for (ct=0; ct<3; ct++)
+            for (cg=0; cg<6; cg++)
+                free_vlc(&s->ract_vlc[pt][ct][cg]);
+    }
+    return 0;
+}
+
 AVCodec vp6_decoder = {
     "vp6",
     CODEC_TYPE_VIDEO,
@@ -615,7 +633,7 @@ AVCodec vp6_decoder = {
     sizeof(VP56Context),
     vp6_decode_init,
     NULL,
-    vp56_free,
+    vp6_decode_free,
     vp56_decode_frame,
     /*.capabilities = */CODEC_CAP_DR1,
     /*.next = */NULL,
@@ -633,7 +651,7 @@ AVCodec vp6f_decoder = {
     sizeof(VP56Context),
     vp6_decode_init,
     NULL,
-    vp56_free,
+    vp6_decode_free,
     vp56_decode_frame,
     /*.capabilities = */CODEC_CAP_DR1,
     /*.next = */NULL,
@@ -651,7 +669,7 @@ AVCodec vp6a_decoder = {
     sizeof(VP56Context),
     vp6_decode_init,
     NULL,
-    vp56_free,
+    vp6_decode_free,
     vp56_decode_frame,
     /*.capabilities = */CODEC_CAP_DR1,
     /*.next = */NULL,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/w32thread.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/w32thread.c
index 295a87a35..88e7ef866 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/w32thread.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/w32thread.c
@@ -93,7 +93,7 @@ void avcodec_thread_free(AVCodecContext *s){
     av_freep(&s->thread_opaque);
 }
 
-int avcodec_thread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void *arg, int *ret, int count, int size){
+static int avcodec_thread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void *arg, int *ret, int count, int size){
     ThreadContext *c= s->thread_opaque;
     int i;
     int jobnr = 0;
@@ -116,7 +116,7 @@ int avcodec_thread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, vo
     return 0;
 }
 
-int avcodec_thread_execute2(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2, int, int),void *arg, int *ret, int count){
+static int avcodec_thread_execute2(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2, int, int),void *arg, int *ret, int count){
     ThreadContext *c= s->thread_opaque;
     int i;
     for(i=0; i<s->thread_count; i++)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/wmv2.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/wmv2.h
index 8522f3a32..d407cd017 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/wmv2.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/wmv2.h
@@ -50,7 +50,7 @@ typedef struct Wmv2Context{
     int hshift;
 
     ScanTable abt_scantable[2];
-    DECLARE_ALIGNED_16(DCTELEM, abt_block2)[6][64];
+    DECLARE_ALIGNED(16, DCTELEM, abt_block2)[6][64];
 }Wmv2Context;
 
 void ff_wmv2_common_init(Wmv2Context * w);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_mmx.c
index 9aa8a52aa..ff359230c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_mmx.c
@@ -27,8 +27,8 @@
  */
 static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y, const uint64_t *rnd_reg)
 {
-    DECLARE_ALIGNED_8(uint64_t, AA);
-    DECLARE_ALIGNED_8(uint64_t, DD);
+    DECLARE_ALIGNED(8, uint64_t, AA);
+    DECLARE_ALIGNED(8, uint64_t, DD);
     int i;
 
     if(y==0 && x==0) {
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
index 95cd1fe99..efb9651bb 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
@@ -24,6 +24,7 @@
 
 #include "libavutil/x86_cpu.h"
 #include "libavcodec/dsputil.h"
+#include "libavcodec/h264dsp.h"
 #include "libavcodec/mpegvideo.h"
 #include "libavcodec/simple_idct.h"
 #include "dsputil_mmx.h"
@@ -39,38 +40,38 @@
 int mm_flags; /* multimedia extension flags */
 
 /* pixel operations */
-DECLARE_ALIGNED_8 (const uint64_t, ff_bone) = 0x0101010101010101ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
+DECLARE_ALIGNED(8,  const uint64_t, ff_bone) = 0x0101010101010101ULL;
+DECLARE_ALIGNED(8,  const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
 
-DECLARE_ALIGNED_16(const uint64_t, ff_pdw_80000000)[2] =
+DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] =
 {0x8000000080000000ULL, 0x8000000080000000ULL};
 
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_3  ) = 0x0003000300030003ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_4  ) = 0x0004000400040004ULL;
-DECLARE_ALIGNED_16(const xmm_reg,  ff_pw_5  ) = {0x0005000500050005ULL, 0x0005000500050005ULL};
-DECLARE_ALIGNED_16(const xmm_reg,  ff_pw_8  ) = {0x0008000800080008ULL, 0x0008000800080008ULL};
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;
-DECLARE_ALIGNED_16(const xmm_reg,  ff_pw_16 ) = {0x0010001000100010ULL, 0x0010001000100010ULL};
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;
-DECLARE_ALIGNED_16(const xmm_reg,  ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C001C001C001CULL};
-DECLARE_ALIGNED_16(const xmm_reg,  ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL};
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;
-DECLARE_ALIGNED_16(const xmm_reg,  ff_pw_64 ) = {0x0040004000400040ULL, 0x0040004000400040ULL};
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
-
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1  ) = 0x0101010101010101ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3  ) = 0x0303030303030303ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_7  ) = 0x0707070707070707ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;
-
-DECLARE_ALIGNED_16(const double, ff_pd_1)[2] = { 1.0, 1.0 };
-DECLARE_ALIGNED_16(const double, ff_pd_2)[2] = { 2.0, 2.0 };
+DECLARE_ALIGNED(8,  const uint64_t, ff_pw_3  ) = 0x0003000300030003ULL;
+DECLARE_ALIGNED(8,  const uint64_t, ff_pw_4  ) = 0x0004000400040004ULL;
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_5  ) = {0x0005000500050005ULL, 0x0005000500050005ULL};
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_8  ) = {0x0008000800080008ULL, 0x0008000800080008ULL};
+DECLARE_ALIGNED(8,  const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_16 ) = {0x0010001000100010ULL, 0x0010001000100010ULL};
+DECLARE_ALIGNED(8,  const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C001C001C001CULL};
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL};
+DECLARE_ALIGNED(8,  const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_64 ) = {0x0040004000400040ULL, 0x0040004000400040ULL};
+DECLARE_ALIGNED(8,  const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
+DECLARE_ALIGNED(8,  const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
+DECLARE_ALIGNED(8,  const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
+
+DECLARE_ALIGNED(8,  const uint64_t, ff_pb_1  ) = 0x0101010101010101ULL;
+DECLARE_ALIGNED(8,  const uint64_t, ff_pb_3  ) = 0x0303030303030303ULL;
+DECLARE_ALIGNED(8,  const uint64_t, ff_pb_7  ) = 0x0707070707070707ULL;
+DECLARE_ALIGNED(8,  const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;
+DECLARE_ALIGNED(8,  const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;
+DECLARE_ALIGNED(8,  const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;
+DECLARE_ALIGNED(8,  const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;
+DECLARE_ALIGNED(8,  const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;
+
+DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };
+DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
 
 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)
 #define MOVQ_ZERO(regd)  __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)
@@ -1818,9 +1819,6 @@ PREFETCH(prefetch_3dnow, prefetch)
 #include "rv40dsp_mmx.c"
 
 /* CAVS specific */
-void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx);
-void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx);
-
 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
     put_pixels8_mmx(dst, src, stride, 8);
 }
@@ -1835,8 +1833,6 @@ void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
 }
 
 /* VC1 specific */
-void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);
-
 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
     put_pixels8_mmx(dst, src, stride, 8);
 }
@@ -1844,10 +1840,6 @@ void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, in
     avg_pixels8_mmx2(dst, src, stride, 8);
 }
 
-/* external functions, from idct_mmx.c */
-void ff_mmx_idct(DCTELEM *block);
-void ff_mmxext_idct(DCTELEM *block);
-
 /* XXX: those functions should be suppressed ASAP when all IDCTs are
    converted */
 #if CONFIG_GPL
@@ -2026,7 +2018,7 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], int out_c
     } else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] && matrix_cmp[3][0]==matrix_cmp[4][0]) {
         MIX5(IF1,IF0);
     } else {
-        DECLARE_ALIGNED_16(float, matrix_simd)[in_ch][2][4];
+        DECLARE_ALIGNED(16, float, matrix_simd)[in_ch][2][4];
         j = 2*in_ch*sizeof(float);
         __asm__ volatile(
             "1: \n"
@@ -2379,7 +2371,7 @@ static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int al
 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
 /* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
-    DECLARE_ALIGNED_16(int16_t, tmp)[len];\
+    DECLARE_ALIGNED(16, int16_t, tmp)[len];\
     int i,j,c;\
     for(c=0; c<channels; c++){\
         float_to_int16_##cpu(tmp, src[c], len);\
@@ -2591,16 +2583,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx;
         c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx;
 
-        c->h264_idct_dc_add=
-        c->h264_idct_add= ff_h264_idct_add_mmx;
-        c->h264_idct8_dc_add=
-        c->h264_idct8_add= ff_h264_idct8_add_mmx;
-
-        c->h264_idct_add16     = ff_h264_idct_add16_mmx;
-        c->h264_idct8_add4     = ff_h264_idct8_add4_mmx;
-        c->h264_idct_add8      = ff_h264_idct_add8_mmx;
-        c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx;
-
         if (CONFIG_VP6_DECODER) {
             c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
         }
@@ -2622,13 +2604,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
             c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
 
-            c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
-            c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
-            c->h264_idct_add16     = ff_h264_idct_add16_mmx2;
-            c->h264_idct8_add4     = ff_h264_idct8_add4_mmx2;
-            c->h264_idct_add8      = ff_h264_idct_add8_mmx2;
-            c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;
-
             if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
                 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
                 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
@@ -2642,6 +2617,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
                     c->vp3_h_loop_filter= ff_vp3_h_loop_filter_mmx2;
                 }
             }
+            if (CONFIG_VP3_DECODER) {
+                c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2;
+            }
 
 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \
             c->PFX ## _pixels_tab[IDX][ 0] = PFX ## SIZE ## _mc00_ ## CPU; \
@@ -2689,31 +2667,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2;
             c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2;
             c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2;
-            c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2;
-            c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2;
-            c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2;
-            c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2;
-            c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2;
-            c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2;
-            c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
-
-            c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
-            c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
-            c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
-            c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
-            c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
-            c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
-            c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
-            c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
-
-            c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
-            c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
-            c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
-            c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
-            c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
-            c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
-            c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
-            c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
 
 #if HAVE_YASM && ARCH_X86_32
             c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
@@ -2793,16 +2746,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
         if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){
             // these functions are slower than mmx on AMD, but faster on Intel
-/* FIXME works in most codecs, but crashes svq1 due to unaligned chroma
             c->put_pixels_tab[0][0] = put_pixels16_sse2;
             c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
-*/
             H264_QPEL_FUNCS(0, 0, sse2);
         }
         if(mm_flags & FF_MM_SSE2){
-            c->h264_idct8_add = ff_h264_idct8_add_sse2;
-            c->h264_idct8_add4= ff_h264_idct8_add4_sse2;
-
             H264_QPEL_FUNCS(0, 1, sse2);
             H264_QPEL_FUNCS(0, 2, sse2);
             H264_QPEL_FUNCS(0, 3, sse2);
@@ -2849,26 +2797,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         }
 #endif
 
-#if CONFIG_GPL && HAVE_YASM && ARCH_X86_32
-        if (mm_flags & FF_MM_MMX2){
-#if ARCH_X86_32
-            c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext;
-            c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext;
-#endif
-            if( mm_flags&FF_MM_SSE2 ){
-#if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110
-                c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
-                c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
-                c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2;
-                c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2;
-#endif
-                c->h264_idct_add16 = ff_h264_idct_add16_sse2;
-                c->h264_idct_add8  = ff_h264_idct_add8_sse2;
-                c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
-            }
-        }
-#endif
-
 /* disable audio related ASM for 64-bit builds */
 #if ARCH_X86_32
         if(mm_flags & FF_MM_3DNOW){
@@ -2916,6 +2844,84 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         dsputilenc_init_mmx(c, avctx);
 }
 
+#if CONFIG_H264DSP
+void ff_h264dsp_init_x86(H264DSPContext *c)
+{
+    mm_flags = mm_support();
+
+    if (mm_flags & FF_MM_MMX) {
+        c->h264_idct_dc_add=
+        c->h264_idct_add= ff_h264_idct_add_mmx;
+        c->h264_idct8_dc_add=
+        c->h264_idct8_add= ff_h264_idct8_add_mmx;
+
+        c->h264_idct_add16     = ff_h264_idct_add16_mmx;
+        c->h264_idct8_add4     = ff_h264_idct8_add4_mmx;
+        c->h264_idct_add8      = ff_h264_idct_add8_mmx;
+        c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx;
+
+        if (mm_flags & FF_MM_MMX2) {
+            c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
+            c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
+            c->h264_idct_add16     = ff_h264_idct_add16_mmx2;
+            c->h264_idct8_add4     = ff_h264_idct8_add4_mmx2;
+            c->h264_idct_add8      = ff_h264_idct_add8_mmx2;
+            c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;
+
+            c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2;
+            c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2;
+            c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2;
+            c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2;
+            c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2;
+            c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2;
+            c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
+
+            c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
+            c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
+            c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
+            c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
+            c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
+            c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
+            c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
+            c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
+
+            c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
+            c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
+            c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
+            c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
+            c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
+            c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
+            c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
+            c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
+        }
+        if(mm_flags & FF_MM_SSE2){
+            c->h264_idct8_add = ff_h264_idct8_add_sse2;
+            c->h264_idct8_add4= ff_h264_idct8_add4_sse2;
+        }
+
+#if CONFIG_GPL && HAVE_YASM && ARCH_X86_32
+        if (mm_flags & FF_MM_MMX2){
+#if ARCH_X86_32
+            c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext;
+            c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext;
+#endif
+            if( mm_flags&FF_MM_SSE2 ){
+#if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110
+                c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
+                c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
+                c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2;
+                c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2;
+#endif
+                c->h264_idct_add16 = ff_h264_idct_add16_sse2;
+                c->h264_idct_add8  = ff_h264_idct_add8_sse2;
+                c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
+            }
+        }
+#endif
+    }
+}
+#endif /* CONFIG_H264DSP */
+
 const char* avcodec_get_current_idct_mmx(AVCodecContext *avctx,DSPContext *c)
 {
     if (c->idct_put==ff_idct_xvid_mmx_put)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h
index c0ef49b30..64d23f553 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h
@@ -156,4 +156,18 @@ void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
 void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
 void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
 
+void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx);
+void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx);
+void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
+void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
+
+void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);
+void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);
+void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd);
+
+void ff_mmx_idct(DCTELEM *block);
+void ff_mmxext_idct(DCTELEM *block);
+
 #endif /* AVCODEC_X86_DSPUTIL_MMX_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fdct_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fdct_mmx.c
index 319daf28b..f790cb804 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fdct_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fdct_mmx.c
@@ -33,8 +33,6 @@
 #include "libavutil/common.h"
 #include "libavcodec/dsputil.h"
 
-#define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align)))
-
 //////////////////////////////////////////////////////////////////////
 //
 // constants for the forward DCT
@@ -55,30 +53,30 @@
 #define X8(x) x,x,x,x,x,x,x,x
 
 //concatenated table, for forward DCT transformation
-static const int16_t fdct_tg_all_16[24] ATTR_ALIGN(16) = {
+DECLARE_ALIGNED(16, static const int16_t, fdct_tg_all_16)[24] = {
     X8(13036),  // tg * (2<<16) + 0.5
     X8(27146),  // tg * (2<<16) + 0.5
     X8(-21746)  // tg * (2<<16) + 0.5
 };
 
-static const int16_t ocos_4_16[8] ATTR_ALIGN(16) = {
+DECLARE_ALIGNED(16, static const int16_t, ocos_4_16)[8] = {
     X8(23170)   //cos * (2<<15) + 0.5
 };
 
-static const int16_t fdct_one_corr[8] ATTR_ALIGN(16) = { X8(1) };
+DECLARE_ALIGNED(16, static const int16_t, fdct_one_corr)[8] = { X8(1) };
 
-static const int32_t fdct_r_row[2] ATTR_ALIGN(8) = {RND_FRW_ROW, RND_FRW_ROW };
+DECLARE_ALIGNED(8, static const int32_t, fdct_r_row)[2] = {RND_FRW_ROW, RND_FRW_ROW };
 
 static struct
 {
- const int32_t fdct_r_row_sse2[4] ATTR_ALIGN(16);
-} fdct_r_row_sse2 ATTR_ALIGN(16)=
+ DECLARE_ALIGNED(16, const int32_t, fdct_r_row_sse2)[4];
+} fdct_r_row_sse2 =
 {{
  RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW
 }};
-//static const long fdct_r_row_sse2[4] ATTR_ALIGN(16) = {RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW};
+//DECLARE_ALIGNED(16, static const long, fdct_r_row_sse2)[4] = {RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW};
 
-static const int16_t tab_frw_01234567[] ATTR_ALIGN(8) = {  // forward_dct coeff table
+DECLARE_ALIGNED(8, static const int16_t, tab_frw_01234567)[] = {  // forward_dct coeff table
   16384,   16384,   22725,   19266,
   16384,   16384,   12873,    4520,
   21407,    8867,   19266,   -4520,
@@ -154,10 +152,10 @@ static const int16_t tab_frw_01234567[] ATTR_ALIGN(8) = {  // forward_dct coeff
 
 static struct
 {
- const int16_t tab_frw_01234567_sse2[256] ATTR_ALIGN(16);
-} tab_frw_01234567_sse2 ATTR_ALIGN(16) =
+ DECLARE_ALIGNED(16, const int16_t, tab_frw_01234567_sse2)[256];
+} tab_frw_01234567_sse2 =
 {{
-//static const int16_t tab_frw_01234567_sse2[] ATTR_ALIGN(16) = {  // forward_dct coeff table
+//DECLARE_ALIGNED(16, static const int16_t, tab_frw_01234567_sse2)[] = {  // forward_dct coeff table
 #define TABLE_SSE2 C4,  C4,  C1,  C3, -C6, -C2, -C1, -C5, \
                    C4,  C4,  C5,  C7,  C2,  C6,  C3, -C7, \
                   -C4,  C4,  C7,  C3,  C6, -C2,  C7, -C5, \
@@ -535,7 +533,7 @@ static av_always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const
 
 void ff_fdct_mmx(int16_t *block)
 {
-    int64_t align_tmp[16] ATTR_ALIGN(8);
+    DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
     int16_t * block1= (int16_t*)align_tmp;
     const int16_t *table= tab_frw_01234567;
     int i;
@@ -553,7 +551,7 @@ void ff_fdct_mmx(int16_t *block)
 
 void ff_fdct_mmx2(int16_t *block)
 {
-    int64_t align_tmp[16] ATTR_ALIGN(8);
+    DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
     int16_t *block1= (int16_t*)align_tmp;
     const int16_t *table= tab_frw_01234567;
     int i;
@@ -571,7 +569,7 @@ void ff_fdct_mmx2(int16_t *block)
 
 void ff_fdct_sse2(int16_t *block)
 {
-    int64_t align_tmp[16] ATTR_ALIGN(16);
+    DECLARE_ALIGNED(16, int64_t, align_tmp)[16];
     int16_t * const block1= (int16_t*)align_tmp;
 
     fdct_col_sse2(block, block1, 0);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.h
index b0fff1b12..7ef583914 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.h
@@ -19,7 +19,7 @@
 #ifndef AVCODEC_X86_FFT_H
 #define AVCODEC_X86_FFT_H
 
-#include "libavcodec/dsputil.h"
+#include "libavcodec/fft.h"
 
 void ff_fft_permute_sse(FFTContext *s, FFTComplex *z);
 void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c
index 160caeecf..8226ae962 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c
@@ -23,7 +23,7 @@
 #include "libavcodec/dsputil.h"
 #include "fft.h"
 
-DECLARE_ALIGNED_8(static const int, m1m1)[2] = { 1<<31, 1<<31 };
+DECLARE_ALIGNED(8, static const int, m1m1)[2] = { 1<<31, 1<<31 };
 
 #ifdef EMULATE_3DNOWEXT
 #define PSWAPD(s,d)\
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
index 26bd748ae..9cb0ae1bf 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
@@ -419,18 +419,23 @@ DECL_PASS pass_interleave_3dn, PASS_BIG 0
 %define pass_3dn2 pass_3dn
 %define pass_interleave_3dn2 pass_interleave_3dn
 
+%ifdef PIC
+%define SECTION_REL - $$
+%else
+%define SECTION_REL
+%endif
 
 %macro DECL_FFT 2-3 ; nbits, cpu, suffix
-%xdefine list_of_fft fft4%2, fft8%2
+%xdefine list_of_fft fft4%2 SECTION_REL, fft8%2 SECTION_REL
 %if %1==5
-%xdefine list_of_fft list_of_fft, fft16%2
+%xdefine list_of_fft list_of_fft, fft16%2 SECTION_REL
 %endif
 
 %assign n 1<<%1
 %rep 17-%1
 %assign n2 n/2
 %assign n4 n/4
-%xdefine list_of_fft list_of_fft, fft %+ n %+ %3%2
+%xdefine list_of_fft list_of_fft, fft %+ n %+ %3%2 SECTION_REL
 
 align 16
 fft %+ n %+ %3%2:
@@ -448,10 +453,6 @@ fft %+ n %+ %3%2:
 %endrep
 %undef n
 
-%ifidn __OUTPUT_FORMAT__,macho64
-section .rodata
-%endif
-
 align 8
 dispatch_tab%3%2: pointer list_of_fft
 
@@ -462,6 +463,10 @@ section .text
 cglobal fft_dispatch%3%2, 2,5,8, z, nbits
     lea r2, [dispatch_tab%3%2 GLOBAL]
     mov r2, [r2 + (nbitsq-2)*gprsize]
+%ifdef PIC
+    lea r3, [$$ GLOBAL]
+    add r2, r3
+%endif
     call r2
     RET
 %endmacro ; DECL_FFT
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
index cfe3429aa..a6d822c9e 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
@@ -20,8 +20,8 @@
 
 #include "dsputil_mmx.h"
 
-DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_3_1  ) = 0x0103010301030103ULL;
-DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_7_3  ) = 0x0307030703070307ULL;
+DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1  ) = 0x0103010301030103ULL;
+DECLARE_ALIGNED(8, static const uint64_t, ff_pb_7_3  ) = 0x0307030703070307ULL;
 
 /***********************************/
 /* IDCT */
@@ -157,12 +157,12 @@ static inline void h264_idct8_1d(int16_t *block)
 static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
 {
     int i;
-    DECLARE_ALIGNED_8(int16_t, b2)[64];
+    DECLARE_ALIGNED(8, int16_t, b2)[64];
 
     block[0] += 32;
 
     for(i=0; i<2; i++){
-        DECLARE_ALIGNED_8(uint64_t, tmp);
+        DECLARE_ALIGNED(8, uint64_t, tmp);
 
         h264_idct8_1d(block+4*i);
 
@@ -628,7 +628,7 @@ static void ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset, DCTE
 
 static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
 {
-    DECLARE_ALIGNED_8(uint64_t, tmp0)[2];
+    DECLARE_ALIGNED(8, uint64_t, tmp0)[2];
 
     __asm__ volatile(
         "movq    (%2,%4), %%mm0    \n\t" //p1
@@ -690,7 +690,7 @@ static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, in
 {
     //FIXME: could cut some load/stores by merging transpose with filter
     // also, it only needs to transpose 6x8
-    DECLARE_ALIGNED_8(uint8_t, trans)[8*8];
+    DECLARE_ALIGNED(8, uint8_t, trans)[8*8];
     int i;
     for(i=0; i<2; i++, pix+=8*stride, tc0+=2) {
         if((tc0[0] & tc0[1]) < 0)
@@ -734,7 +734,7 @@ static void h264_v_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha,
 static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
 {
     //FIXME: could cut some load/stores by merging transpose with filter
-    DECLARE_ALIGNED_8(uint8_t, trans)[8*4];
+    DECLARE_ALIGNED(8, uint8_t, trans)[8*4];
     transpose4x4(trans, pix-2, 8, stride);
     transpose4x4(trans+4, pix-2+4*stride, 8, stride);
     h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0);
@@ -784,7 +784,7 @@ static void h264_v_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a
 static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta)
 {
     //FIXME: could cut some load/stores by merging transpose with filter
-    DECLARE_ALIGNED_8(uint8_t, trans)[8*4];
+    DECLARE_ALIGNED(8, uint8_t, trans)[8*4];
     transpose4x4(trans, pix-2, 8, stride);
     transpose4x4(trans+4, pix-2+4*stride, 8, stride);
     h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1);
@@ -815,7 +815,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40]
     for( dir=1; dir>=0; dir-- ) {
         const x86_reg d_idx = dir ? -8 : -1;
         const int mask_mv = dir ? mask_mv1 : mask_mv0;
-        DECLARE_ALIGNED_8(const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL;
+        DECLARE_ALIGNED(8, const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL;
         int b_idx, edge;
         for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) {
             __asm__ volatile(
@@ -964,8 +964,8 @@ static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uin
 \
     __asm__ volatile(\
         "pxor %%mm7, %%mm7          \n\t"\
-        "movq %5, %%mm4             \n\t"\
-        "movq %6, %%mm5             \n\t"\
+        "movq "MANGLE(ff_pw_5) ", %%mm4\n\t"\
+        "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\
         "1:                         \n\t"\
         "movd  -1(%0), %%mm1        \n\t"\
         "movd    (%0), %%mm2        \n\t"\
@@ -995,7 +995,7 @@ static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uin
         "decl %2                    \n\t"\
         " jnz 1b                    \n\t"\
         : "+a"(src), "+c"(dst), "+g"(h)\
-        : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+        : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
         : "memory"\
     );\
 }\
@@ -2106,7 +2106,7 @@ H264_MC_816(H264_MC_HV, ssse3)
 #endif
 
 /* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */
-DECLARE_ALIGNED_8(static const uint64_t, h264_rnd_reg)[4] = {
+DECLARE_ALIGNED(8, static const uint64_t, h264_rnd_reg)[4] = {
     0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL
 };
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c
index a39f54b3f..d1859080c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c
@@ -23,10 +23,9 @@
 #include "libavutil/common.h"
 #include "libavcodec/dsputil.h"
 
+#include "dsputil_mmx.h"
 #include "mmx.h"
 
-#define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align)))
-
 #define ROW_SHIFT 11
 #define COL_SHIFT 6
 
@@ -399,10 +398,10 @@ static inline void idct_col (int16_t * const col, const int offset)
 #define T3 43790
 #define C4 23170
 
-    static const short t1_vector[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
-    static const short t2_vector[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
-    static const short t3_vector[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
-    static const short c4_vector[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
+    DECLARE_ALIGNED(8, static const short, t1_vector)[] = {T1,T1,T1,T1};
+    DECLARE_ALIGNED(8, static const short, t2_vector)[] = {T2,T2,T2,T2};
+    DECLARE_ALIGNED(8, static const short, t3_vector)[] = {T3,T3,T3,T3};
+    DECLARE_ALIGNED(8, static const short, c4_vector)[] = {C4,C4,C4,C4};
 
     /* column code adapted from Peter Gubanov */
     /* http://www.elecard.com/peter/idct.shtml */
@@ -541,20 +540,20 @@ static inline void idct_col (int16_t * const col, const int offset)
 }
 
 
-static const int32_t rounder0[] ATTR_ALIGN(8) =
+DECLARE_ALIGNED(8, static const int32_t, rounder0)[] =
     rounder ((1 << (COL_SHIFT - 1)) - 0.5);
-static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
-static const int32_t rounder1[] ATTR_ALIGN(8) =
+DECLARE_ALIGNED(8, static const int32_t, rounder4)[] = rounder (0);
+DECLARE_ALIGNED(8, static const int32_t, rounder1)[] =
     rounder (1.25683487303);        /* C1*(C1/C4+C1+C7)/2 */
-static const int32_t rounder7[] ATTR_ALIGN(8) =
+DECLARE_ALIGNED(8, static const int32_t, rounder7)[] =
     rounder (-0.25);                /* C1*(C7/C4+C7-C1)/2 */
-static const int32_t rounder2[] ATTR_ALIGN(8) =
+DECLARE_ALIGNED(8, static const int32_t, rounder2)[] =
     rounder (0.60355339059);        /* C2 * (C6+C2)/2 */
-static const int32_t rounder6[] ATTR_ALIGN(8) =
+DECLARE_ALIGNED(8, static const int32_t, rounder6)[] =
     rounder (-0.25);                /* C2 * (C6-C2)/2 */
-static const int32_t rounder3[] ATTR_ALIGN(8) =
+DECLARE_ALIGNED(8, static const int32_t, rounder3)[] =
     rounder (0.087788325588);       /* C3*(-C3/C4+C3+C5)/2 */
-static const int32_t rounder5[] ATTR_ALIGN(8) =
+DECLARE_ALIGNED(8, static const int32_t, rounder5)[] =
     rounder (-0.441341716183);      /* C3*(-C5/C4+C5-C3)/2 */
 
 #undef COL_SHIFT
@@ -563,13 +562,13 @@ static const int32_t rounder5[] ATTR_ALIGN(8) =
 #define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
 void idct (int16_t * const block)                                       \
 {                                                                       \
-    static const int16_t table04[] ATTR_ALIGN(16) =                     \
+    DECLARE_ALIGNED(16, static const int16_t, table04)[] =              \
         table (22725, 21407, 19266, 16384, 12873,  8867, 4520);         \
-    static const int16_t table17[] ATTR_ALIGN(16) =                     \
+    DECLARE_ALIGNED(16, static const int16_t, table17)[] =              \
         table (31521, 29692, 26722, 22725, 17855, 12299, 6270);         \
-    static const int16_t table26[] ATTR_ALIGN(16) =                     \
+    DECLARE_ALIGNED(16, static const int16_t, table26)[] =              \
         table (29692, 27969, 25172, 21407, 16819, 11585, 5906);         \
-    static const int16_t table35[] ATTR_ALIGN(16) =                     \
+    DECLARE_ALIGNED(16, static const int16_t, table35)[] =              \
         table (26722, 25172, 22654, 19266, 15137, 10426, 5315);         \
                                                                         \
     idct_row_head (block, 0*8, table04);                                \
@@ -594,9 +593,6 @@ void idct (int16_t * const block)                                       \
     idct_col (block, 4);                                                \
 }
 
-void ff_mmx_idct(DCTELEM *block);
-void ff_mmxext_idct(DCTELEM *block);
-
 declare_idct (ff_mmxext_idct, mmxext_table,
               mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx_xvid.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx_xvid.c
index 22cd50019..50260ae6e 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx_xvid.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx_xvid.c
@@ -41,6 +41,7 @@
 
 #include <inttypes.h>
 #include "libavcodec/avcodec.h"
+#include "idct_xvid.h"
 
 //=============================================================================
 // Macros and other preprocessor constants
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mathops.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mathops.h
index 691a200fd..010cfb70b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mathops.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mathops.h
@@ -78,4 +78,23 @@ __asm__ volatile(\
 );
 #endif
 
+// avoid +32 for shift optimization (gcc should do that ...)
+#define NEG_SSR32 NEG_SSR32
+static inline  int32_t NEG_SSR32( int32_t a, int8_t s){
+    __asm__ ("sarl %1, %0\n\t"
+         : "+r" (a)
+         : "ic" ((uint8_t)(-s))
+    );
+    return a;
+}
+
+#define NEG_USR32 NEG_USR32
+static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
+    __asm__ ("shrl %1, %0\n\t"
+         : "+r" (a)
+         : "ic" ((uint8_t)(-s))
+    );
+    return a;
+}
+
 #endif /* AVCODEC_X86_MATHOPS_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx_template.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx_template.c
index 1e5cd2cb4..4a6840c98 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx_template.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx_template.c
@@ -98,7 +98,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
     x86_reg last_non_zero_p1;
     int level=0, q; //=0 is because gcc says uninitialized ...
     const uint16_t *qmat, *bias;
-    DECLARE_ALIGNED_16(int16_t, temp_block)[64];
+    DECLARE_ALIGNED(16, int16_t, temp_block)[64];
 
     assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly?
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/rv40dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/rv40dsp_mmx.c
index 9a702084a..2b154c0a0 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/rv40dsp_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/rv40dsp_mmx.c
@@ -24,7 +24,7 @@
 #include "dsputil_mmx.h"
 
 /* bias interleaved with bias div 8, use p+1 to access bias div 8 */
-DECLARE_ALIGNED_8(static const uint64_t, rv40_bias_reg)[4][8] = {
+DECLARE_ALIGNED(8, static const uint64_t, rv40_bias_reg)[4][8] = {
     { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0010001000100010ULL, 0x0002000200020002ULL,
       0x0020002000200020ULL, 0x0004000400040004ULL, 0x0010001000100010ULL, 0x0002000200020002ULL },
     { 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c
index bf96f2470..e0b1f5b99 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c
@@ -73,7 +73,7 @@
     "movq      %%mm"#R1", "#OFF"(%1)   \n\t"    \
     "add       %2, %0                  \n\t"
 
-DECLARE_ALIGNED_16(const uint64_t, ff_pw_9) = 0x0009000900090009ULL;
+DECLARE_ALIGNED(16, const uint64_t, ff_pw_9) = 0x0009000900090009ULL;
 
 /** Sacrifying mm6 allows to pipeline loads from src */
 static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,
@@ -442,7 +442,7 @@ static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\
             static const int shift_value[] = { 0, 5, 1, 5 };\
             int              shift = (shift_value[hmode]+shift_value[vmode])>>1;\
             int              r;\
-            DECLARE_ALIGNED_16(int16_t, tmp)[12*8];\
+            DECLARE_ALIGNED(16, int16_t, tmp)[12*8];\
 \
             r = (1<<(shift-1)) + rnd-1;\
             vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift);\
@@ -463,9 +463,6 @@ static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\
 VC1_MSPEL_MC(put_)
 VC1_MSPEL_MC(avg_)
 
-void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);
-void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd);
-
 /** Macro to ease bicubic filter interpolation functions declarations */
 #define DECLARE_FUNCTION(a, b)                                          \
 static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.c
index df0fcb9f1..89ecc7406 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.c
@@ -19,13 +19,14 @@
  */
 
 /**
- * @file vp3dsp_mmx.c
+ * @file libavcodec/x86/vp3dsp_mmx.c
  * MMX-optimized functions cribbed from the original VP3 source code.
  */
 
 #include "libavutil/x86_cpu.h"
 #include "libavcodec/dsputil.h"
 #include "dsputil_mmx.h"
+#include "vp3dsp_mmx.h"
 
 extern const uint16_t ff_vp3_idct_data[];
 
@@ -394,3 +395,44 @@ void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
     ff_vp3_idct_mmx(block);
     add_pixels_clamped_mmx(block, dest, line_size);
 }
+
+void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int linesize, const DCTELEM *block)
+{
+    int dc = block[0];
+    dc = (46341*dc)>>16;
+    dc = (46341*dc + (8<<16))>>20;
+
+    __asm__ volatile(
+        "movd          %3, %%mm0 \n\t"
+        "pshufw $0, %%mm0, %%mm0 \n\t"
+        "pxor       %%mm1, %%mm1 \n\t"
+        "psubw      %%mm0, %%mm1 \n\t"
+        "packuswb   %%mm0, %%mm0 \n\t"
+        "packuswb   %%mm1, %%mm1 \n\t"
+
+#define DC_ADD \
+        "movq        (%0), %%mm2 \n\t" \
+        "movq     (%0,%1), %%mm3 \n\t" \
+        "paddusb    %%mm0, %%mm2 \n\t" \
+        "movq   (%0,%1,2), %%mm4 \n\t" \
+        "paddusb    %%mm0, %%mm3 \n\t" \
+        "movq     (%0,%2), %%mm5 \n\t" \
+        "paddusb    %%mm0, %%mm4 \n\t" \
+        "paddusb    %%mm0, %%mm5 \n\t" \
+        "psubusb    %%mm1, %%mm2 \n\t" \
+        "psubusb    %%mm1, %%mm3 \n\t" \
+        "movq       %%mm2, (%0)  \n\t" \
+        "psubusb    %%mm1, %%mm4 \n\t" \
+        "movq       %%mm3, (%0,%1) \n\t" \
+        "psubusb    %%mm1, %%mm5 \n\t" \
+        "movq       %%mm4, (%0,%1,2) \n\t" \
+        "movq       %%mm5, (%0,%2) \n\t"
+
+        DC_ADD
+        "lea    (%0,%1,4), %0 \n\t"
+        DC_ADD
+
+        : "+r"(dest)
+        : "r"((x86_reg)linesize), "r"((x86_reg)3*linesize), "r"(dc)
+    );
+}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.h
index e565a3302..e0ebf0b0f 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.h
@@ -28,6 +28,7 @@
 void ff_vp3_idct_mmx(int16_t *data);
 void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block);
 void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
+void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size, const DCTELEM *block);
 
 void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
 void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.c
index e7745b372..fb1ff4504 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.c
@@ -25,8 +25,9 @@
 
 #include "libavcodec/dsputil.h"
 #include "dsputil_mmx.h"
+#include "vp3dsp_sse2.h"
 
-DECLARE_ALIGNED_16(const uint16_t, ff_vp3_idct_data)[7 * 8] =
+DECLARE_ALIGNED(16, const uint16_t, ff_vp3_idct_data)[7 * 8] =
 {
     64277,64277,64277,64277,64277,64277,64277,64277,
     60547,60547,60547,60547,60547,60547,60547,60547,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/attributes.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/attributes.h
new file mode 100644
index 000000000..e579432b5
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/attributes.h
@@ -0,0 +1,113 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavutil/attributes.h
+ * Macro definitions for various function/variable attributes
+ */
+
+#ifndef AVUTIL_ATTRIBUTES_H
+#define AVUTIL_ATTRIBUTES_H
+
+#ifdef __GNUC__
+#    define AV_GCC_VERSION_AT_LEAST(x,y) (__GNUC__ > x || __GNUC__ == x && __GNUC_MINOR__ >= y)
+#else
+#    define AV_GCC_VERSION_AT_LEAST(x,y) 0
+#endif
+
+#ifndef av_always_inline
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define av_always_inline __attribute__((always_inline)) inline
+#else
+#    define av_always_inline __inline
+#endif
+#endif
+
+#ifndef av_noinline
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define av_noinline __attribute__((noinline))
+#else
+#    define av_noinline
+#endif
+#endif
+
+#ifndef av_pure
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define av_pure __attribute__((pure))
+#else
+#    define av_pure
+#endif
+#endif
+
+#ifndef av_const
+#if AV_GCC_VERSION_AT_LEAST(2,6)
+#    define av_const __attribute__((const))
+#else 
+#	 define av_const
+#endif
+#endif
+
+#ifndef av_cold
+#if (!defined(__ICC) || __ICC > 1110) && AV_GCC_VERSION_AT_LEAST(4,3)
+#    define av_cold __attribute__((cold))
+#else
+#    define av_cold
+#endif
+#endif
+
+#ifndef av_flatten
+#if (!defined(__ICC) || __ICC > 1110) && AV_GCC_VERSION_AT_LEAST(4,1)
+#    define av_flatten __attribute__((flatten))
+#else
+#    define av_flatten
+#endif
+#endif
+
+#ifndef attribute_deprecated
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define attribute_deprecated __attribute__((deprecated))
+#else
+#    define attribute_deprecated
+#endif
+#endif
+
+#ifndef av_unused
+#if defined(__GNUC__)
+#    define av_unused __attribute__((unused))
+#else
+#    define av_unused
+#endif
+#endif
+
+#ifndef av_uninit
+#if defined(__GNUC__) && !defined(__ICC)
+#    define av_uninit(x) x=x
+#else
+#    define av_uninit(x) x
+#endif
+#endif
+
+#ifdef __GNUC__
+#    define av_builtin_constant_p __builtin_constant_p
+#else
+#    define av_builtin_constant_p(x) 0
+#endif
+
+#endif /* AVUTIL_ATTRIBUTES_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avconfig.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avconfig.h
new file mode 100644
index 000000000..27e3d852c
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avconfig.h
@@ -0,0 +1,5 @@
+/* Generated by ffconf */
+#ifndef AVUTIL_AVCONFIG_H
+#define AVUTIL_AVCONFIG_H
+#define AV_HAVE_BIGENDIAN 0
+#endif /* AVUTIL_AVCONFIG_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avstring.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avstring.h
index d716c3a28..01c2391b5 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avstring.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avstring.h
@@ -47,6 +47,20 @@ int av_strstart(const char *str, const char *pfx, const char **ptr);
 int av_stristart(const char *str, const char *pfx, const char **ptr);
 
 /**
+ * Locate the first case-independent occurrence in the string haystack
+ * of the string needle.  A zero-length string needle is considered to
+ * match at the start of haystack.
+ *
+ * This function is a case-insensitive version of the standard strstr().
+ *
+ * @param haystack string to search in
+ * @param needle   string to search for
+ * @return         pointer to the located match within haystack
+ *                 or a null pointer if no match
+ */
+char *av_stristr(const char *haystack, const char *needle);
+
+/**
  * Copy the string src to dst, but no more than size - 1 bytes, and
  * null-terminate dst.
  *
@@ -83,7 +97,7 @@ size_t av_strlcat(char *dst, const char *src, size_t size);
 
 /**
  * Append output to a string, according to a format. Never write out of
- * the destination buffer, and and always put a terminating 0 within
+ * the destination buffer, and always put a terminating 0 within
  * the buffer.
  * @param dst destination buffer (string to which the output is
  *  appended)
@@ -95,4 +109,9 @@ size_t av_strlcat(char *dst, const char *src, size_t size);
  */
 size_t av_strlcatf(char *dst, size_t size, const char *fmt, ...);
 
+/**
+ * Convert a number to a av_malloced string.
+ */
+char *av_d2str(double d);
+
 #endif /* AVUTIL_AVSTRING_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avutil.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avutil.h
index 5c42416aa..982f8de06 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avutil.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avutil.h
@@ -30,12 +30,17 @@
 #define AV_STRINGIFY(s)         AV_TOSTRING(s)
 #define AV_TOSTRING(s) #s
 
+#define AV_GLUE(a, b) a ## b
+#define AV_JOIN(a, b) AV_GLUE(a, b)
+
+#define AV_PRAGMA(s) _Pragma(#s)
+
 #define AV_VERSION_INT(a, b, c) (a<<16 | b<<8 | c)
 #define AV_VERSION_DOT(a, b, c) a ##.## b ##.## c
 #define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
 
 #define LIBAVUTIL_VERSION_MAJOR 50
-#define LIBAVUTIL_VERSION_MINOR  7
+#define LIBAVUTIL_VERSION_MINOR 14
 #define LIBAVUTIL_VERSION_MICRO  0
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
@@ -48,10 +53,22 @@
 
 #define LIBAVUTIL_IDENT         "Lavu" AV_STRINGIFY(LIBAVUTIL_VERSION)
 
+enum AVMediaType {
+    AVMEDIA_TYPE_UNKNOWN = -1,
+    AVMEDIA_TYPE_VIDEO,
+    AVMEDIA_TYPE_AUDIO,
+    AVMEDIA_TYPE_DATA,
+    AVMEDIA_TYPE_SUBTITLE,
+    AVMEDIA_TYPE_ATTACHMENT,
+    AVMEDIA_TYPE_NB
+};
 
 #include "common.h"
+#include "error.h"
 #include "mathematics.h"
 #include "rational.h"
+#include "intfloat_readwrite.h"
 #include "log.h"
+#include "pixfmt.h"
 
 #endif /* AVUTIL_AVUTIL_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/bswap.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/bswap.h
index 54c682678..fbfc6e3a2 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/bswap.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/bswap.h
@@ -28,7 +28,8 @@
 
 #include <stdint.h>
 #include "config.h"
-#include "common.h"
+#include "attributes.h"
+#include "common.h" /* for MSVC */
 
 #if ARCH_X86
 #   include "x86/bswap.h"
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h
index 7b16d915d..bd0d079ea 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h
@@ -34,98 +34,13 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include "attributes.h"
 
 #if defined(_MSC_VER) & !defined(__cplusplus)
 #    define inline __inline
 #endif
 
 #ifdef HAVE_AV_CONFIG_H
-#include "config.h"
-#endif
-
-#ifdef __GNUC__
-#    define AV_GCC_VERSION_AT_LEAST(x,y) (__GNUC__ > x || __GNUC__ == x && __GNUC_MINOR__ >= y)
-#else
-#    define AV_GCC_VERSION_AT_LEAST(x,y) 0
-#endif
-
-#ifndef av_always_inline
-#if AV_GCC_VERSION_AT_LEAST(3,1)
-#    define av_always_inline __attribute__((always_inline)) inline
-#else
-#    define av_always_inline inline
-#endif
-#endif
-
-#ifndef av_noinline
-#if AV_GCC_VERSION_AT_LEAST(3,1)
-#    define av_noinline __attribute__((noinline))
-#else
-#    define av_noinline
-#endif
-#endif
-
-#ifndef av_pure
-#if AV_GCC_VERSION_AT_LEAST(3,1)
-#    define av_pure __attribute__((pure))
-#else
-#    define av_pure
-#endif
-#endif
-
-#ifndef av_const
-#if AV_GCC_VERSION_AT_LEAST(2,6)
-#    define av_const __attribute__((const))
-#else
-#    define av_const
-#endif
-#endif
-
-#ifndef av_cold
-#if (!defined(__ICC) || __ICC > 1110) && AV_GCC_VERSION_AT_LEAST(4,3)
-#    define av_cold __attribute__((cold))
-#else
-#    define av_cold
-#endif
-#endif
-
-#ifndef av_flatten
-#if (!defined(__ICC) || __ICC > 1110) && AV_GCC_VERSION_AT_LEAST(4,1)
-#    define av_flatten __attribute__((flatten))
-#else
-#    define av_flatten
-#endif
-#endif
-
-#ifndef attribute_deprecated
-#if AV_GCC_VERSION_AT_LEAST(3,1)
-#    define attribute_deprecated __attribute__((deprecated))
-#else
-#    define attribute_deprecated
-#endif
-#endif
-
-#ifndef av_unused
-#if defined(__GNUC__)
-#    define av_unused __attribute__((unused))
-#else
-#    define av_unused
-#endif
-#endif
-
-#ifndef av_uninit
-#if defined(__GNUC__) && !defined(__ICC)
-#    define av_uninit(x) x=x
-#else
-#    define av_uninit(x) x
-#endif
-#endif
-
-#ifdef HAVE_AV_CONFIG_H
-#   include "intmath.h"
-#endif
-
-#ifdef HAVE_AV_CONFIG_H
 
 //rounded division & shift
 #define RSHIFT(a,b) ((a) > 0 ? ((a) + ((1<<(b))>>1))>>(b) : ((a) + ((1<<(b))>>1)-1)>>(b))
@@ -148,8 +63,7 @@ extern const uint8_t ff_log2_tab[256];
 
 extern const uint8_t av_reverse[256];
 
-#ifndef av_log2
-static inline av_const int av_log2(unsigned int v)
+static inline av_const int av_log2_c(unsigned int v)
 {
     int n = 0;
     if (v & 0xffff0000) {
@@ -164,10 +78,8 @@ static inline av_const int av_log2(unsigned int v)
 
     return n;
 }
-#endif
 
-#ifndef av_log2_16bit
-static inline av_const int av_log2_16bit(unsigned int v)
+static inline av_const int av_log2_16bit_c(unsigned int v)
 {
     int n = 0;
     if (v & 0xff00) {
@@ -178,6 +90,17 @@ static inline av_const int av_log2_16bit(unsigned int v)
 
     return n;
 }
+
+#ifdef HAVE_AV_CONFIG_H
+#   include "config.h"
+#   include "intmath.h"
+#endif
+
+#ifndef av_log2
+#   define av_log2       av_log2_c
+#endif
+#ifndef av_log2_16bit
+#   define av_log2_16bit av_log2_16bit_c
 #endif
 
 /**
@@ -341,6 +264,36 @@ static inline av_const int av_ceil_log2(int x)
         }\
     }
 
+/*!
+ * \def PUT_UTF16(val, tmp, PUT_16BIT)
+ * Converts a 32-bit Unicode character to its UTF-16 encoded form (2 or 4 bytes).
+ * \param val is an input-only argument and should be of type uint32_t. It holds
+ * a UCS-4 encoded Unicode character that is to be converted to UTF-16. If
+ * val is given as a function it is executed only once.
+ * \param tmp is a temporary variable and should be of type uint16_t. It
+ * represents an intermediate value during conversion that is to be
+ * output by PUT_16BIT.
+ * \param PUT_16BIT writes the converted UTF-16 data to any proper destination
+ * in desired endianness. It could be a function or a statement, and uses tmp
+ * as the input byte.  For example, PUT_BYTE could be "*output++ = tmp;"
+ * PUT_BYTE will be executed 1 or 2 times depending on input character.
+ */
+#define PUT_UTF16(val, tmp, PUT_16BIT)\
+    {\
+        uint32_t in = val;\
+        if (in < 0x10000) {\
+            tmp = in;\
+            PUT_16BIT\
+        } else {\
+            tmp = 0xD800 | ((in - 0x10000) >> 10);\
+            PUT_16BIT\
+            tmp = 0xDC00 | ((in - 0x10000) & 0x3FF);\
+            PUT_16BIT\
+        }\
+    }\
+
+
+
 #include "mem.h"
 
 #endif /* HAVE_AV_CONFIG_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/crc.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/crc.c
index 35ac73c35..d74c0cef1 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/crc.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/crc.c
@@ -19,6 +19,7 @@
  */
 
 #include "config.h"
+#include "common.h"
 #include "bswap.h"
 #include "crc.h"
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/crc.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/crc.h
index 5e5c663b1..b83ced156 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/crc.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/crc.h
@@ -23,7 +23,7 @@
 
 #include <stdint.h>
 #include <stddef.h>
-#include "common.h"
+#include "attributes.h"
 
 typedef uint32_t AVCRC;
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/error.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/error.h
new file mode 100644
index 000000000..3096baa9a
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/error.h
@@ -0,0 +1,60 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavutil/error.h
+ * error code definitions
+ */
+
+#ifndef AVUTIL_ERROR_H
+#define AVUTIL_ERROR_H
+
+#include <errno.h>
+#include "avutil.h"
+
+/* error handling */
+#if EDOM > 0
+#define AVERROR(e) (-(e))   ///< Returns a negative error code from a POSIX error code, to return from library functions.
+#define AVUNERROR(e) (-(e)) ///< Returns a POSIX error code from a library function error return value.
+#else
+/* Some platforms have E* and errno already negated. */
+#define AVERROR(e) (e)
+#define AVUNERROR(e) (e)
+#endif
+
+#if LIBAVUTIL_VERSION_MAJOR < 51
+#define AVERROR_INVALIDDATA AVERROR(EINVAL)  ///< Invalid data found when processing input
+#define AVERROR_IO          AVERROR(EIO)     ///< I/O error
+#define AVERROR_NOENT       AVERROR(ENOENT)  ///< No such file or directory
+#define AVERROR_NOFMT       AVERROR(EILSEQ)  ///< Unknown format
+#define AVERROR_NOMEM       AVERROR(ENOMEM)  ///< Not enough memory
+#define AVERROR_NUMEXPECTED AVERROR(EDOM)    ///< Number syntax expected in filename
+#define AVERROR_UNKNOWN     AVERROR(EINVAL)  ///< Unknown error
+#endif
+
+#define AVERROR_EOF         AVERROR(EPIPE)   ///< End of file
+#define AVERROR_NOTSUPP     AVERROR(ENOSYS)  ///< Operation not supported
+
+#define AVERROR_PATCHWELCOME    (-MKTAG('P','A','W','E')) ///< Not yet implemented in FFmpeg, patches welcome
+
+#if LIBAVUTIL_VERSION_MAJOR > 50
+#define AVERROR_INVALIDDATA     (-MKTAG('I','N','D','A')) ///< Invalid data found when processing input
+#define AVERROR_NUMEXPECTED     (-MKTAG('N','U','E','X')) ///< Number syntax expected in filename
+#endif
+
+#endif /* AVUTIL_ERROR_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/internal.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/internal.h
index 732ca66b4..54e39042f 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/internal.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/internal.h
@@ -35,8 +35,7 @@
 #include <stddef.h>
 #include <assert.h>
 #include "config.h"
-#include "common.h"
-#include "mem.h"
+#include "attributes.h"
 #include "timer.h"
 
 #ifndef attribute_align_arg
@@ -55,6 +54,14 @@
 #endif
 #endif
 
+#ifndef av_alias
+#if HAVE_ATTRIBUTE_MAY_ALIAS && (!defined(__ICC) || __ICC > 1110) && AV_GCC_VERSION_AT_LEAST(3,3)
+#   define av_alias __attribute__((may_alias))
+#else
+#   define av_alias
+#endif
+#endif
+
 #ifndef INT16_MIN
 #define INT16_MIN       (-0x7fff - 1)
 #endif
@@ -99,20 +106,11 @@
 #    define offsetof(T, F) ((unsigned int)((char *)&((T *)0)->F))
 #endif
 
-#ifdef _MSC_VER
-	#define snprintf _snprintf
-	#define vsnprintf _vsnprintf
-#endif
+#define snprintf _snprintf
+#define vsnprintf _vsnprintf
 
 #if defined(__MINGW32__) || defined(__CYGWIN__)
-// ==> Start patch MPC
-// Prefix is different for MSVC in x64 !
-#if defined(ARCH_X86_64)
-#    define EXTERN_PREFIX ""
-#else
-#    define EXTERN_PREFIX "_"
-#endif
-// <== End patch MPC
+#define EXTERN_PREFIX "_"
 #endif
 
 /* Use to export labels from asm. */
@@ -144,28 +142,6 @@
 
 /* math */
 
-extern const uint8_t ff_sqrt_tab[256];
-
-static inline av_const unsigned int ff_sqrt(unsigned int a)
-{
-    unsigned int b;
-
-    if (a < 255) return (ff_sqrt_tab[a + 1] - 1) >> 4;
-    else if (a < (1 << 12)) b = ff_sqrt_tab[a >> 4] >> 2;
-#if !CONFIG_SMALL
-    else if (a < (1 << 14)) b = ff_sqrt_tab[a >> 6] >> 1;
-    else if (a < (1 << 16)) b = ff_sqrt_tab[a >> 8]   ;
-#endif
-    else {
-        int s = av_log2_16bit(a >> 16) >> 1;
-        unsigned int c = a >> (s + 2);
-        b = ff_sqrt_tab[c >> (s + 8)];
-        b = FASTDIV(c,b) + (b << s);
-    }
-
-    return b - (a < b * b);
-}
-
 #if ARCH_X86
 #define MASK_ABS(mask, level)\
             __asm__ volatile(\
@@ -228,84 +204,7 @@ static inline av_const unsigned int ff_sqrt(unsigned int a)
     }\
 }
 
-/* ffdshow custom code */
-#ifndef __GNUC__
-
-#ifndef exp2
-static av_always_inline av_const double exp2(double x)
-{
-    return exp(x * 0.693147180559945);
-}
-#endif
-
-#ifndef exp2f
-static av_always_inline av_const float exp2f(float x)
-{
-    return exp2(x);
-}
-#endif
-
-#ifndef rint
-#define rint(x) (int)(x+0.5)
-#endif
-
-#ifndef llrint
-static av_always_inline av_const long long llrint(double x)
-{
-    return rint(x);
-}
-#endif
-
-#ifndef log2
-static av_always_inline av_const double log2(double x)
-{
-    return log(x) * 1.44269504088896340736;
-}
-#endif
-
-#ifndef log2f
-static av_always_inline av_const float log2f(float x)
-{
-    return log2(x);
-}
-#endif
-
-#ifndef lrint
-static av_always_inline av_const long int lrint(double x)
-{
-    return rint(x);
-}
-#endif
-
-#ifndef lrintf
-static av_always_inline av_const long int lrintf(float x)
-{
-    return (int)(rint(x));
-}
-#endif
-
-#ifndef round
-static av_always_inline av_const double round(double x)
-{
-    return (x > 0) ? floor(x + 0.5) : ceil(x - 0.5);
-}
-#endif
-
-#ifndef roundf
-static av_always_inline av_const float roundf(float x)
-{
-    return (x > 0) ? floor(x + 0.5) : ceil(x - 0.5);
-}
-#endif
-
-#ifndef truncf
-static av_always_inline av_const float truncf(float x)
-{
-    return (x > 0) ? floor(x) : ceil(x);
-}
-#endif
-
-#endif /* __GNUC__ */
+#include "libm.h"
 
 /**
  * Returns NULL if CONFIG_SMALL is true, otherwise the argument
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intfloat_readwrite.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intfloat_readwrite.c
new file mode 100644
index 000000000..1e60fc1db
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intfloat_readwrite.c
@@ -0,0 +1,97 @@
+/*
+ * portable IEEE float/double read/write functions
+ *
+ * Copyright (c) 2005 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavutil/intfloat_readwrite.c
+ * portable IEEE float/double read/write functions
+ */
+
+#include "common.h"
+#include "intfloat_readwrite.h"
+
+double av_int2dbl(int64_t v){
+    if(v+v > 0xFFEULL<<52)
+        return 0.0/0.0;
+    return ldexp(((v&((1LL<<52)-1)) + (1LL<<52)) * (v>>63|1), (v>>52&0x7FF)-1075);
+}
+
+float av_int2flt(int32_t v){
+    if(v+v > 0xFF000000U)
+        return 0.0/0.0;
+    return ldexp(((v&0x7FFFFF) + (1<<23)) * (v>>31|1), (v>>23&0xFF)-150);
+}
+
+double av_ext2dbl(const AVExtFloat ext){
+    uint64_t m = 0;
+    int e, i;
+
+    for (i = 0; i < 8; i++)
+        m = (m<<8) + ext.mantissa[i];
+    e = (((int)ext.exponent[0]&0x7f)<<8) | ext.exponent[1];
+    if (e == 0x7fff && m)
+        return 0.0/0.0;
+    e -= 16383 + 63;        /* In IEEE 80 bits, the whole (i.e. 1.xxxx)
+                             * mantissa bit is written as opposed to the
+                             * single and double precision formats. */
+    if (ext.exponent[0]&0x80)
+        m= -m;
+    return ldexp(m, e);
+}
+
+int64_t av_dbl2int(double d){
+    int e;
+    if     ( !d) return 0;
+    else if(d-d) return 0x7FF0000000000000LL + ((int64_t)(d<0)<<63) + (d!=d);
+    d= frexp(d, &e);
+    return (int64_t)(d<0)<<63 | (e+1022LL)<<52 | (int64_t)((fabs(d)-0.5)*(1LL<<53));
+}
+
+int32_t av_flt2int(float d){
+    int e;
+    if     ( !d) return 0;
+    else if(d-d) return 0x7F800000 + ((d<0)<<31) + (d!=d);
+    d= frexp(d, &e);
+    return (d<0)<<31 | (e+126)<<23 | (int64_t)((fabs(d)-0.5)*(1<<24));
+}
+
+AVExtFloat av_dbl2ext(double d){
+    struct AVExtFloat ext= {{0}};
+    int e, i; double f; uint64_t m;
+
+    f = fabs(frexp(d, &e));
+    if (f >= 0.5 && f < 1) {
+        e += 16382;
+        ext.exponent[0] = e>>8;
+        ext.exponent[1] = e;
+        m = (uint64_t)ldexp(f, 64);
+        for (i=0; i < 8; i++)
+            ext.mantissa[i] = m>>(56-(i<<3));
+    } else if (f != 0.0) {
+        ext.exponent[0] = 0x7f; ext.exponent[1] = 0xff;
+        if (f != 1/0.0)
+            ext.mantissa[0] = ~0;
+    }
+    if (d < 0)
+        ext.exponent[0] |= 0x80;
+    return ext;
+}
+
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intfloat_readwrite.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intfloat_readwrite.h
new file mode 100644
index 000000000..da3ed0249
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intfloat_readwrite.h
@@ -0,0 +1,40 @@
+/*
+ * copyright (c) 2005 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_INTFLOAT_READWRITE_H
+#define AVUTIL_INTFLOAT_READWRITE_H
+
+#include <stdint.h>
+#include "common.h"
+
+/* IEEE 80 bits extended float */
+typedef struct AVExtFloat  {
+    uint8_t exponent[2];
+    uint8_t mantissa[8];
+} AVExtFloat;
+
+double av_int2dbl(int64_t v) av_const;
+float av_int2flt(int32_t v) av_const;
+double av_ext2dbl(const AVExtFloat ext) av_const;
+int64_t av_dbl2int(double d) av_const;
+int32_t av_flt2int(float d) av_const;
+AVExtFloat av_dbl2ext(double d) av_const;
+
+#endif /* AVUTIL_INTFLOAT_READWRITE_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intmath.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intmath.h
index de9ef704b..052b61406 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intmath.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intmath.h
@@ -21,8 +21,9 @@
 #ifndef AVUTIL_INTMATH_H
 #define AVUTIL_INTMATH_H
 
+#include <stdint.h>
 #include "config.h"
-#include "common.h"
+#include "attributes.h"
 
 extern const uint32_t ff_inverse[257];
 
@@ -54,4 +55,42 @@ extern const uint32_t ff_inverse[257];
 
 #endif /* FASTDIV */
 
+/*
+ * Get definition of av_log2_c from common.h.  In the event we got
+ * here through common.h including this file, including it again will
+ * be a no-op due to multi-inclusion guards, so we must duplicate the
+ * fallback defines here.
+ */
+
+#include "common.h"
+
+#ifndef av_log2
+#   define av_log2       av_log2_c
+#endif
+#ifndef av_log2_16bit
+#   define av_log2_16bit av_log2_16bit_c
+#endif
+
+extern const uint8_t ff_sqrt_tab[256];
+
+static inline av_const unsigned int ff_sqrt(unsigned int a)
+{
+    unsigned int b;
+
+    if (a < 255) return (ff_sqrt_tab[a + 1] - 1) >> 4;
+    else if (a < (1 << 12)) b = ff_sqrt_tab[a >> 4] >> 2;
+#if !CONFIG_SMALL
+    else if (a < (1 << 14)) b = ff_sqrt_tab[a >> 6] >> 1;
+    else if (a < (1 << 16)) b = ff_sqrt_tab[a >> 8]   ;
+#endif
+    else {
+        int s = av_log2_16bit(a >> 16) >> 1;
+        unsigned int c = a >> (s + 2);
+        b = ff_sqrt_tab[c >> (s + 8)];
+        b = FASTDIV(c,b) + (b << s);
+    }
+
+    return b - (a < b * b);
+}
+
 #endif /* AVUTIL_INTMATH_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intreadwrite.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intreadwrite.h
index 0a36981c5..39fb9155b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intreadwrite.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intreadwrite.h
@@ -22,6 +22,33 @@
 #include <stdint.h>
 #include "config.h"
 #include "bswap.h"
+#include "common.h"
+
+/* ffdshow custom code */
+#if defined(_MSC_VER)
+#define av_alias
+#endif
+
+typedef union {
+    uint64_t u64;
+    uint32_t u32[2];
+    uint16_t u16[4];
+    uint8_t  u8 [8];
+    double   f64;
+    float    f32[2];
+} av_alias av_alias64;
+
+typedef union {
+    uint32_t u32;
+    uint16_t u16[2];
+    uint8_t  u8 [4];
+    float    f32;
+} av_alias av_alias32;
+
+typedef union {
+    uint16_t u16;
+    uint8_t  u8 [2];
+} av_alias av_alias16;
 
 /*
  * Arch-specific headers can provide any combination of
@@ -147,12 +174,12 @@
 
 #if   HAVE_ATTRIBUTE_PACKED
 
-struct unaligned_64 { uint64_t l; } __attribute__((packed));
-struct unaligned_32 { uint32_t l; } __attribute__((packed));
-struct unaligned_16 { uint16_t l; } __attribute__((packed));
+union unaligned_64 { uint64_t l; } __attribute__((packed)) av_alias;
+union unaligned_32 { uint32_t l; } __attribute__((packed)) av_alias;
+union unaligned_16 { uint16_t l; } __attribute__((packed)) av_alias;
 
-#   define AV_RN(s, p) (((const struct unaligned_##s *) (p))->l)
-#   define AV_WN(s, p, v) ((((struct unaligned_##s *) (p))->l) = (v))
+#   define AV_RN(s, p) (((const union unaligned_##s *) (p))->l)
+#   define AV_WN(s, p, v) ((((union unaligned_##s *) (p))->l) = (v))
 
 #elif defined(__DECC)
 
@@ -161,8 +188,8 @@ struct unaligned_16 { uint16_t l; } __attribute__((packed));
 
 #elif HAVE_FAST_UNALIGNED
 
-#   define AV_RN(s, p) (*((const uint##s##_t*)(p)))
-#   define AV_WN(s, p, v) (*((uint##s##_t*)(p)) = (v))
+#   define AV_RN(s, p) (((const av_alias##s*)(p))->u##s)
+#   define AV_WN(s, p, v) (((av_alias##s*)(p))->u##s = (v))
 
 #else
 
@@ -392,13 +419,54 @@ struct unaligned_16 { uint16_t l; } __attribute__((packed));
     } while(0)
 #endif
 
+/*
+ * The AV_[RW]NA macros access naturally aligned data
+ * in a type-safe way.
+ */
+
+#define AV_RNA(s, p)    (((const av_alias##s*)(p))->u##s)
+#define AV_WNA(s, p, v) (((av_alias##s*)(p))->u##s = (v))
+
+#ifndef AV_RN16A
+#   define AV_RN16A(p) AV_RNA(16, p)
+#endif
+
+#ifndef AV_RN32A
+#   define AV_RN32A(p) AV_RNA(32, p)
+#endif
+
+#ifndef AV_RN64A
+#   define AV_RN64A(p) AV_RNA(64, p)
+#endif
+
+#ifndef AV_WN16A
+#   define AV_WN16A(p, v) AV_WNA(16, p, v)
+#endif
+
+#ifndef AV_WN32A
+#   define AV_WN32A(p, v) AV_WNA(32, p, v)
+#endif
+
+#ifndef AV_WN64A
+#   define AV_WN64A(p, v) AV_WNA(64, p, v)
+#endif
+
 /* Parameters for AV_COPY*, AV_SWAP*, AV_ZERO* must be
  * naturally aligned. They may be implemented using MMX,
  * so emms_c() must be called before using any float code
  * afterwards.
  */
 
-#define AV_COPY(n, d, s) (*(uint##n##_t*)(d) = *(const uint##n##_t*)(s))
+#define AV_COPY(n, d, s) \
+    (((av_alias##n*)(d))->u##n = ((const av_alias##n*)(s))->u##n)
+
+#ifndef AV_COPY16
+#   define AV_COPY16(d, s) AV_COPY(16, d, s)
+#endif
+
+#ifndef AV_COPY32
+#   define AV_COPY32(d, s) AV_COPY(32, d, s)
+#endif
 
 #ifndef AV_COPY64
 #   define AV_COPY64(d, s) AV_COPY(64, d, s)
@@ -412,13 +480,21 @@ struct unaligned_16 { uint16_t l; } __attribute__((packed));
     } while(0)
 #endif
 
-#define AV_SWAP(n, a, b) FFSWAP(uint##n##_t, *(uint##n##_t*)(a), *(uint##n##_t*)(b))
+#define AV_SWAP(n, a, b) FFSWAP(av_alias##n, *(av_alias##n*)(a), *(av_alias##n*)(b))
 
 #ifndef AV_SWAP64
 #   define AV_SWAP64(a, b) AV_SWAP(64, a, b)
 #endif
 
-#define AV_ZERO(n, d) (*(uint##n##_t*)(d) = 0)
+#define AV_ZERO(n, d) (((av_alias##n*)(d))->u##n = 0)
+
+#ifndef AV_ZERO16
+#   define AV_ZERO16(d) AV_ZERO(16, d)
+#endif
+
+#ifndef AV_ZERO32
+#   define AV_ZERO32(d) AV_ZERO(32, d)
+#endif
 
 #ifndef AV_ZERO64
 #   define AV_ZERO64(d) AV_ZERO(64, d)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/lfg.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/lfg.c
index 1382f0214..09a7104a6 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/lfg.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/lfg.c
@@ -1,64 +1,56 @@
-/*
- * Lagged Fibonacci PRNG
- * Copyright (c) 2008 Michael Niedermayer
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <inttypes.h>
-#include "lfg.h"
-#include "md5.h"
-#include "intreadwrite.h"
-
-void av_cold av_lfg_init(AVLFG *c, unsigned int seed){
-    uint8_t tmp[16]={0};
-    int i;
-
-    for(i=8; i<64; i+=4){
-        AV_WL32(tmp, seed); tmp[4]=i;
-        av_md5_sum(tmp, tmp,  16);
-        c->state[i  ]= AV_RL32(tmp);
-        c->state[i+1]= AV_RL32(tmp+4);
-        c->state[i+2]= AV_RL32(tmp+8);
-        c->state[i+3]= AV_RL32(tmp+12);
-    }
-    c->index=0;
-}
-
-#ifdef TEST
-#include "log.h"
-#include "common.h"
-
-int main(void)
-{
-    int x=0;
-    int i, j;
-    AVLFG state;
-
-    av_lfg_init(&state, 0xdeadbeef);
-    for (j = 0; j < 10000; j++) {
-        START_TIMER
-        for (i = 0; i < 624; i++) {
-//            av_log(NULL,AV_LOG_ERROR, "%X\n", av_lfg_get(&state));
-            x+=av_lfg_get(&state);
-        }
-        STOP_TIMER("624 calls of av_lfg_get");
-    }
-    av_log(NULL, AV_LOG_ERROR, "final value:%X\n", x);
-    return 0;
-}
-#endif
+/*
+ * Lagged Fibonacci PRNG
+ * Copyright (c) 2008 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <inttypes.h>
+#include "lfg.h"
+#include "md5.h"
+#include "intreadwrite.h"
+#include "attributes.h"
+
+void av_cold av_lfg_init(AVLFG *c, unsigned int seed){
+    uint8_t tmp[16]={0};
+    int i;
+
+    for(i=8; i<64; i+=4){
+        AV_WL32(tmp, seed); tmp[4]=i;
+        av_md5_sum(tmp, tmp,  16);
+        c->state[i  ]= AV_RL32(tmp);
+        c->state[i+1]= AV_RL32(tmp+4);
+        c->state[i+2]= AV_RL32(tmp+8);
+        c->state[i+3]= AV_RL32(tmp+12);
+    }
+    c->index=0;
+}
+
+void av_bmg_get(AVLFG *lfg, double out[2])
+{
+    double x1, x2, w;
+
+    do {
+        x1 = 2.0/UINT_MAX*av_lfg_get(lfg) - 1.0;
+        x2 = 2.0/UINT_MAX*av_lfg_get(lfg) - 1.0;
+        w = x1*x1 + x2*x2;
+    } while (w >= 1.0);
+
+    w = sqrt((-2.0 * log(w)) / w);
+    out[0] = x1 * w;
+    out[1] = x2 * w;
+}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/lfg.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/lfg.h
index 939e160ab..b416683ea 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/lfg.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/lfg.h
@@ -22,9 +22,8 @@
 #ifndef AVUTIL_LFG_H
 #define AVUTIL_LFG_H
 
-#ifdef HAVE_AV_CONFIG_H
-#include "libavutil/common.h"
-#endif
+#include <stdint.h>
+#include "attributes.h"
 
 typedef struct {
     unsigned int state[64];
@@ -39,7 +38,7 @@ void av_lfg_init(AVLFG *c, unsigned int seed);
  * Please also consider a simple LCG like state= state*1664525+1013904223,
  * it may be good enough and faster for your specific use case.
  */
-static inline unsigned int av_lfg_get(AVLFG *c){
+static av_always_inline unsigned int av_lfg_get(AVLFG *c){
     c->state[c->index & 63] = c->state[(c->index-24) & 63] + c->state[(c->index-55) & 63];
     return c->state[c->index++ & 63];
 }
@@ -49,10 +48,18 @@ static inline unsigned int av_lfg_get(AVLFG *c){
  *
  * Please also consider av_lfg_get() above, it is faster.
  */
-static inline unsigned int av_mlfg_get(AVLFG *c){
+static av_always_inline unsigned int av_mlfg_get(AVLFG *c){
     unsigned int a= c->state[(c->index-55) & 63];
     unsigned int b= c->state[(c->index-24) & 63];
     return c->state[c->index++ & 63] = 2*a*b+a+b;
 }
 
+/**
+ * Gets the next two numbers generated by a Box-Muller Gaussian
+ * generator using the random numbers issued by lfg.
+ *
+ * @param out[2] array where are placed the two generated numbers
+ */
+void av_bmg_get(AVLFG *lfg, double out[2]);
+
 #endif /* AVUTIL_LFG_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/libm.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/libm.h
new file mode 100644
index 000000000..0b85fa7a4
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/libm.h
@@ -0,0 +1,101 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavutil/libm.h
+ * Replacements for frequently missing libm functions
+ */
+
+#ifndef AVUTIL_LIBM_H
+#define AVUTIL_LIBM_H
+
+#include <math.h>
+#include "config.h"
+#include "attributes.h"
+
+#ifndef exp2
+#define exp2(x) exp((x) * 0.693147180559945)
+#endif
+
+#ifndef exp2f
+#define exp2f(x) ((float)exp2(x))
+#endif
+
+#ifndef rint
+#define rint(x) (int)(x+0.5)
+#endif
+
+#ifndef llrint
+#define llrint(x) ((long long)rint(x))
+#endif
+
+#ifndef log2
+#define log2(x) (log(x) * 1.44269504088896340736)
+#endif
+
+#ifndef log2f
+#define log2f(x) ((float)log2(x))
+#endif
+
+#ifndef lrint
+static av_always_inline av_const long int lrint(double x)
+{
+    return rint(x);
+}
+#endif
+
+#ifndef lrintf
+static av_always_inline av_const long int lrintf(float x)
+{
+    return (int)(rint(x));
+}
+#endif
+
+#ifndef __GNUC__
+
+#ifndef round
+static av_always_inline av_const double round(double x)
+{
+    return (x > 0) ? floor(x + 0.5) : ceil(x - 0.5);
+}
+#endif
+
+#ifndef roundf
+static av_always_inline av_const float roundf(float x)
+{
+    return (x > 0) ? floor(x + 0.5) : ceil(x - 0.5);
+}
+#endif
+
+#ifndef truncf
+static av_always_inline av_const float truncf(float x)
+{
+    return (x > 0) ? floor(x) : ceil(x);
+}
+#endif
+
+#ifndef cbrtf
+static float cbrtf(float x)
+{
+    return pow((float)x, (float)1.0/3);
+}
+#endif
+
+#endif /* __GNUC__ */
+
+#endif /* AVUTIL_LIBM_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c
index 7d9ec0908..7da2a01fa 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c
@@ -30,7 +30,8 @@
 #if LIBAVUTIL_VERSION_MAJOR > 50
 static
 #endif
-int av_log_level = AV_LOG_INFO;
+int av_log_level = AV_LOG_WARNING;
+
 
 void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl)
 {
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/mathematics.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/mathematics.c
index 97aa1cb4c..294e66fc8 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/mathematics.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/mathematics.c
@@ -24,8 +24,8 @@
  */
 
 #include <assert.h>
-#include "avutil.h"
-#include "common.h"
+#include <stdint.h>
+#include <limits.h>
 #include "mathematics.h"
 
 const uint8_t ff_sqrt_tab[256]={
@@ -73,3 +73,73 @@ int64_t av_gcd(int64_t a, int64_t b){
     if(b) return av_gcd(b, a%b);
     else  return a;
 }
+
+int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd){
+    int64_t r=0;
+    assert(c > 0);
+    assert(b >=0);
+    assert(rnd >=0 && rnd<=5 && rnd!=4);
+
+    if(a<0 && a != INT64_MIN) return -av_rescale_rnd(-a, b, c, rnd ^ ((rnd>>1)&1));
+
+    if(rnd==AV_ROUND_NEAR_INF) r= c/2;
+    else if(rnd&1)             r= c-1;
+
+    if(b<=INT_MAX && c<=INT_MAX){
+        if(a<=INT_MAX)
+            return (a * b + r)/c;
+        else
+            return a/c*b + (a%c*b + r)/c;
+    }else{
+#if 1
+        uint64_t a0= a&0xFFFFFFFF;
+        uint64_t a1= a>>32;
+        uint64_t b0= b&0xFFFFFFFF;
+        uint64_t b1= b>>32;
+        uint64_t t1= a0*b1 + a1*b0;
+        uint64_t t1a= t1<<32;
+        int i;
+
+        a0 = a0*b0 + t1a;
+        a1 = a1*b1 + (t1>>32) + (a0<t1a);
+        a0 += r;
+        a1 += a0<r;
+
+        for(i=63; i>=0; i--){
+//            int o= a1 & 0x8000000000000000ULL;
+            a1+= a1 + ((a0>>i)&1);
+            t1+=t1;
+            if(/*o || */c <= a1){
+                a1 -= c;
+                t1++;
+            }
+        }
+        return t1;
+    }
+#else
+        AVInteger ai;
+        ai= av_mul_i(av_int2i(a), av_int2i(b));
+        ai= av_add_i(ai, av_int2i(r));
+
+        return av_i2int(av_div_i(ai, av_int2i(c)));
+    }
+#endif
+}
+
+int64_t av_rescale(int64_t a, int64_t b, int64_t c){
+    return av_rescale_rnd(a, b, c, AV_ROUND_NEAR_INF);
+}
+
+int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq){
+    int64_t b= bq.num * (int64_t)cq.den;
+    int64_t c= cq.num * (int64_t)bq.den;
+    return av_rescale_rnd(a, b, c, AV_ROUND_NEAR_INF);
+}
+
+int av_compare_ts(int64_t ts_a, AVRational tb_a, int64_t ts_b, AVRational tb_b){
+    int64_t a= tb_a.num * (int64_t)tb_b.den;
+    int64_t b= tb_b.num * (int64_t)tb_a.den;
+    if (av_rescale_rnd(ts_a, a, b, AV_ROUND_DOWN) < ts_b) return -1;
+    if (av_rescale_rnd(ts_b, b, a, AV_ROUND_DOWN) < ts_a) return  1;
+    return 0;
+}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/mathematics.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/mathematics.h
index b1dc6a67f..bbb7db113 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/mathematics.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/mathematics.h
@@ -23,7 +23,7 @@
 
 #include <stdint.h>
 #include <math.h>
-#include "common.h"
+#include "attributes.h"
 #include "rational.h"
 
 #ifndef M_E
@@ -35,12 +35,18 @@
 #ifndef M_LN10
 #define M_LN10         2.30258509299404568402  /* log_e 10 */
 #endif
+#ifndef M_LOG2_10
+#define M_LOG2_10      3.32192809488736234787  /* log_2 10 */
+#endif
 #ifndef M_PI
 #define M_PI           3.14159265358979323846  /* pi */
 #endif
 #ifndef M_SQRT1_2
 #define M_SQRT1_2      0.70710678118654752440  /* 1/sqrt(2) */
 #endif
+#ifndef M_SQRT2
+#define M_SQRT2        1.41421356237309504880  /* sqrt(2) */
+#endif
 #ifndef NAN
 	#if __STDC_VERSION__ >= 199901L
   	#define NAN 0.0/0.0
@@ -84,4 +90,13 @@ int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding) av_cons
  */
 int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq) av_const;
 
+/**
+ * Compares 2 timestamps each in its own timebases.
+ * The result of the function is undefined if one of the timestamps
+ * is outside the int64_t range when represented in the others timebase.
+ * @return -1 if ts_a is before ts_b, 1 if ts_a is after ts_b or 0 if they represent the same position
+ */
+int av_compare_ts(int64_t ts_a, AVRational tb_a, int64_t ts_b, AVRational tb_b);
+
+
 #endif /* AVUTIL_MATHEMATICS_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/md5.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/md5.c
index 37236eadd..ee425f549 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/md5.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/md5.c
@@ -1,162 +1,162 @@
-/*
- * Copyright (C) 2006 Michael Niedermayer (michaelni@gmx.at)
- * Copyright (C) 2003-2005 by Christopher R. Hertel (crh@ubiqx.mn.org)
- *
- * References:
- *  IETF RFC 1321: The MD5 Message-Digest Algorithm
- *       Ron Rivest. IETF, April, 1992
- *
- * based on http://ubiqx.org/libcifs/source/Auth/MD5.c
- *          from Christopher R. Hertel (crh@ubiqx.mn.org)
- * Simplified, cleaned and IMO redundant comments removed by michael.
- *
- * If you use gcc, then version 4.1 or later and -fomit-frame-pointer is
- * strongly recommended.
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <string.h>
-#include "bswap.h"
-#include "md5.h"
-
-typedef struct AVMD5{
-    uint64_t len;
-    uint8_t  block[64];
-    uint32_t ABCD[4];
-} AVMD5;
-
-const int av_md5_size= sizeof(AVMD5);
-
-static const uint8_t S[4][4] = {
-    { 7, 12, 17, 22 },  /* round 1 */
-    { 5,  9, 14, 20 },  /* round 2 */
-    { 4, 11, 16, 23 },  /* round 3 */
-    { 6, 10, 15, 21 }   /* round 4 */
-};
-
-static const uint32_t T[64] = { // T[i]= fabs(sin(i+1)<<32)
-    0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,   /* round 1 */
-    0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
-    0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
-    0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
-
-    0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,   /* round 2 */
-    0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
-    0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
-    0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
-
-    0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,   /* round 3 */
-    0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
-    0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
-    0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
-
-    0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,   /* round 4 */
-    0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
-    0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
-    0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
-};
-
-#define CORE(i, a, b, c, d) \
-        t = S[i>>4][i&3];\
-        a += T[i];\
-\
-        if(i<32){\
-            if(i<16) a += (d ^ (b&(c^d))) + X[      i &15 ];\
-            else     a += (c ^ (d&(c^b))) + X[ (1+5*i)&15 ];\
-        }else{\
-            if(i<48) a += (b^c^d)         + X[ (5+3*i)&15 ];\
-            else     a += (c^(b|~d))      + X[ (  7*i)&15 ];\
-        }\
-        a = b + (( a << t ) | ( a >> (32 - t) ));
-
-static void body(uint32_t ABCD[4], uint32_t X[16]){
-
-    int t;
-    int i av_unused;
-    unsigned int a= ABCD[3];
-    unsigned int b= ABCD[2];
-    unsigned int c= ABCD[1];
-    unsigned int d= ABCD[0];
-
-#if HAVE_BIGENDIAN
-    for(i=0; i<16; i++)
-        X[i]= bswap_32(X[i]);
-#endif
-
-#if CONFIG_SMALL
-    for( i = 0; i < 64; i++ ){
-        CORE(i,a,b,c,d)
-        t=d; d=c; c=b; b=a; a=t;
-    }
-#else
-#define CORE2(i) CORE(i,a,b,c,d) CORE((i+1),d,a,b,c) CORE((i+2),c,d,a,b) CORE((i+3),b,c,d,a)
-#define CORE4(i) CORE2(i) CORE2((i+4)) CORE2((i+8)) CORE2((i+12))
-CORE4(0) CORE4(16) CORE4(32) CORE4(48)
-#endif
-
-    ABCD[0] += d;
-    ABCD[1] += c;
-    ABCD[2] += b;
-    ABCD[3] += a;
-}
-
-void av_md5_init(AVMD5 *ctx){
-    ctx->len    = 0;
-
-    ctx->ABCD[0] = 0x10325476;
-    ctx->ABCD[1] = 0x98badcfe;
-    ctx->ABCD[2] = 0xefcdab89;
-    ctx->ABCD[3] = 0x67452301;
-}
-
-void av_md5_update(AVMD5 *ctx, const uint8_t *src, const int len){
-    int i, j;
-
-    j= ctx->len & 63;
-    ctx->len += len;
-
-    for( i = 0; i < len; i++ ){
-        ctx->block[j++] = src[i];
-        if( 64 == j ){
-            body(ctx->ABCD, (uint32_t*) ctx->block);
-            j = 0;
-        }
-    }
-}
-
-void av_md5_final(AVMD5 *ctx, uint8_t *dst){
-    int i;
-    uint64_t finalcount= le2me_64(ctx->len<<3);
-
-    av_md5_update(ctx, "\200", 1);
-    while((ctx->len & 63)!=56)
-        av_md5_update(ctx, "", 1);
-
-    av_md5_update(ctx, (uint8_t*)&finalcount, 8);
-
-    for(i=0; i<4; i++)
-        ((uint32_t*)dst)[i]= le2me_32(ctx->ABCD[3-i]);
-}
-
-void av_md5_sum(uint8_t *dst, const uint8_t *src, const int len){
-    AVMD5 ctx[1];
-
-    av_md5_init(ctx);
-    av_md5_update(ctx, src, len);
-    av_md5_final(ctx, dst);
-}
+/*
+ * Copyright (C) 2006 Michael Niedermayer (michaelni@gmx.at)
+ * Copyright (C) 2003-2005 by Christopher R. Hertel (crh@ubiqx.mn.org)
+ *
+ * References:
+ *  IETF RFC 1321: The MD5 Message-Digest Algorithm
+ *       Ron Rivest. IETF, April, 1992
+ *
+ * based on http://ubiqx.org/libcifs/source/Auth/MD5.c
+ *          from Christopher R. Hertel (crh@ubiqx.mn.org)
+ * Simplified, cleaned and IMO redundant comments removed by michael.
+ *
+ * If you use gcc, then version 4.1 or later and -fomit-frame-pointer is
+ * strongly recommended.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+#include "bswap.h"
+#include "md5.h"
+
+typedef struct AVMD5{
+    uint64_t len;
+    uint8_t  block[64];
+    uint32_t ABCD[4];
+} AVMD5;
+
+const int av_md5_size= sizeof(AVMD5);
+
+static const uint8_t S[4][4] = {
+    { 7, 12, 17, 22 },  /* round 1 */
+    { 5,  9, 14, 20 },  /* round 2 */
+    { 4, 11, 16, 23 },  /* round 3 */
+    { 6, 10, 15, 21 }   /* round 4 */
+};
+
+static const uint32_t T[64] = { // T[i]= fabs(sin(i+1)<<32)
+    0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,   /* round 1 */
+    0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
+    0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
+    0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
+
+    0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,   /* round 2 */
+    0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
+    0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
+    0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
+
+    0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,   /* round 3 */
+    0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
+    0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
+    0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
+
+    0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,   /* round 4 */
+    0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
+    0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
+    0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
+};
+
+#define CORE(i, a, b, c, d) \
+        t = S[i>>4][i&3];\
+        a += T[i];\
+\
+        if(i<32){\
+            if(i<16) a += (d ^ (b&(c^d))) + X[      i &15 ];\
+            else     a += (c ^ (d&(c^b))) + X[ (1+5*i)&15 ];\
+        }else{\
+            if(i<48) a += (b^c^d)         + X[ (5+3*i)&15 ];\
+            else     a += (c^(b|~d))      + X[ (  7*i)&15 ];\
+        }\
+        a = b + (( a << t ) | ( a >> (32 - t) ));
+
+static void body(uint32_t ABCD[4], uint32_t X[16]){
+
+    int t;
+    int i av_unused;
+    unsigned int a= ABCD[3];
+    unsigned int b= ABCD[2];
+    unsigned int c= ABCD[1];
+    unsigned int d= ABCD[0];
+
+#if HAVE_BIGENDIAN
+    for(i=0; i<16; i++)
+        X[i]= bswap_32(X[i]);
+#endif
+
+#if CONFIG_SMALL
+    for( i = 0; i < 64; i++ ){
+        CORE(i,a,b,c,d)
+        t=d; d=c; c=b; b=a; a=t;
+    }
+#else
+#define CORE2(i) CORE(i,a,b,c,d) CORE((i+1),d,a,b,c) CORE((i+2),c,d,a,b) CORE((i+3),b,c,d,a)
+#define CORE4(i) CORE2(i) CORE2((i+4)) CORE2((i+8)) CORE2((i+12))
+CORE4(0) CORE4(16) CORE4(32) CORE4(48)
+#endif
+
+    ABCD[0] += d;
+    ABCD[1] += c;
+    ABCD[2] += b;
+    ABCD[3] += a;
+}
+
+void av_md5_init(AVMD5 *ctx){
+    ctx->len    = 0;
+
+    ctx->ABCD[0] = 0x10325476;
+    ctx->ABCD[1] = 0x98badcfe;
+    ctx->ABCD[2] = 0xefcdab89;
+    ctx->ABCD[3] = 0x67452301;
+}
+
+void av_md5_update(AVMD5 *ctx, const uint8_t *src, const int len){
+    int i, j;
+
+    j= ctx->len & 63;
+    ctx->len += len;
+
+    for( i = 0; i < len; i++ ){
+        ctx->block[j++] = src[i];
+        if( 64 == j ){
+            body(ctx->ABCD, (uint32_t*) ctx->block);
+            j = 0;
+        }
+    }
+}
+
+void av_md5_final(AVMD5 *ctx, uint8_t *dst){
+    int i;
+    uint64_t finalcount= le2me_64(ctx->len<<3);
+
+    av_md5_update(ctx, "\200", 1);
+    while((ctx->len & 63)!=56)
+        av_md5_update(ctx, "", 1);
+
+    av_md5_update(ctx, (uint8_t*)&finalcount, 8);
+
+    for(i=0; i<4; i++)
+        ((uint32_t*)dst)[i]= le2me_32(ctx->ABCD[3-i]);
+}
+
+void av_md5_sum(uint8_t *dst, const uint8_t *src, const int len){
+    AVMD5 ctx[1];
+
+    av_md5_init(ctx);
+    av_md5_update(ctx, src, len);
+    av_md5_final(ctx, dst);
+}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/mem.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/mem.h
index 14dbde9b4..ef8bd5fa6 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/mem.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/mem.h
@@ -26,11 +26,18 @@
 #ifndef AVUTIL_MEM_H
 #define AVUTIL_MEM_H
 
-#include "common.h"
+#include "attributes.h"
 
 #if defined(__ICC) || defined(__SUNPRO_C)
     #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
     #define DECLARE_ASM_CONST(n,t,v)    const t __attribute__ ((aligned (n))) v
+#elif defined(__TI_COMPILER_VERSION__)
+    #define DECLARE_ALIGNED(n,t,v)                      \
+        AV_PRAGMA(DATA_ALIGN(v,n))                      \
+        t __attribute__((aligned(n))) v
+    #define DECLARE_ASM_CONST(n,t,v)                    \
+        AV_PRAGMA(DATA_ALIGN(v,n))                      \
+        static const t __attribute__((aligned(n))) v
 #elif defined(__GNUC__)
     #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
     #define DECLARE_ASM_CONST(n,t,v)    static const t attribute_used __attribute__ ((aligned (n))) v
@@ -42,7 +49,6 @@
     #define DECLARE_ASM_CONST(n,t,v)    static const t v
 #endif
 
-
 #if AV_GCC_VERSION_AT_LEAST(3,1)
     #define av_malloc_attrib __attribute__((__malloc__))
 #else
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/pixdesc.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/pixdesc.c
new file mode 100644
index 000000000..542d57aed
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/pixdesc.c
@@ -0,0 +1,1643 @@
+/*
+ * pixel format descriptor
+ * Copyright (c) 2009 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "pixfmt.h"
+#include "pixdesc.h"
+
+#include "intreadwrite.h"
+#include "common.h" /* for MSVC */
+
+void read_line(uint16_t *dst, const uint8_t *data[4], const int linesize[4],
+               const AVPixFmtDescriptor *desc, int x, int y, int c, int w, int read_pal_component)
+{
+    AVComponentDescriptor comp= desc->comp[c];
+    int plane= comp.plane;
+    int depth= comp.depth_minus1+1;
+    int mask = (1<<depth)-1;
+    int shift= comp.shift;
+    int step = comp.step_minus1+1;
+    int flags= desc->flags;
+
+    if (flags & PIX_FMT_BITSTREAM){
+        int skip = x*step + comp.offset_plus1-1;
+        const uint8_t *p = data[plane] + y*linesize[plane] + (skip>>3);
+        int shift = 8 - depth - (skip&7);
+
+        while(w--){
+            int val = (*p >> shift) & mask;
+            if(read_pal_component)
+                val= data[1][4*val + c];
+            shift -= step;
+            p -= shift>>3;
+            shift &= 7;
+            *dst++= val;
+        }
+    } else {
+        const uint8_t *p = data[plane]+ y*linesize[plane] + x*step + comp.offset_plus1-1;
+
+        while(w--){
+            int val;
+            if(flags & PIX_FMT_BE) val= AV_RB16(p);
+            else                   val= AV_RL16(p);
+            val = (val>>shift) & mask;
+            if(read_pal_component)
+                val= data[1][4*val + c];
+            p+= step;
+            *dst++= val;
+        }
+    }
+}
+
+void write_line(const uint16_t *src, uint8_t *data[4], const int linesize[4],
+                const AVPixFmtDescriptor *desc, int x, int y, int c, int w)
+{
+    AVComponentDescriptor comp = desc->comp[c];
+    int plane = comp.plane;
+    int depth = comp.depth_minus1+1;
+    int step  = comp.step_minus1+1;
+    int flags = desc->flags;
+
+    if (flags & PIX_FMT_BITSTREAM) {
+        int skip = x*step + comp.offset_plus1-1;
+        uint8_t *p = data[plane] + y*linesize[plane] + (skip>>3);
+        int shift = 8 - depth - (skip&7);
+
+        while (w--) {
+            *p |= *src++ << shift;
+            shift -= step;
+            p -= shift>>3;
+            shift &= 7;
+        }
+    } else {
+        int shift = comp.shift;
+        uint8_t *p = data[plane]+ y*linesize[plane] + x*step + comp.offset_plus1-1;
+
+        while (w--) {
+            if (flags & PIX_FMT_BE) {
+                uint16_t val = AV_RB16(p) | (*src++<<shift);
+                AV_WB16(p, val);
+            } else {
+                uint16_t val = AV_RL16(p) | (*src++<<shift);
+                AV_WL16(p, val);
+            }
+            p+= step;
+        }
+    }
+}
+
+// FFDShow modification : the following code is C99, which MSVC does not support
+#ifdef __GNUC__
+const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = {
+    [PIX_FMT_YUV420P] = {
+        .name = "yuv420p",
+        .nb_components= 3,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 1,
+        .comp = {
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+    },
+    [PIX_FMT_YUYV422] = {
+        .name = "yuyv422",
+        .nb_components= 3,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,1,0,7},        /* Y */
+            {0,3,2,0,7},        /* U */
+            {0,3,4,0,7},        /* V */
+        },
+    },
+    [PIX_FMT_RGB24] = {
+        .name = "rgb24",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,2,1,0,7},        /* R */
+            {0,2,2,0,7},        /* G */
+            {0,2,3,0,7},        /* B */
+        },
+    },
+    [PIX_FMT_BGR24] = {
+        .name = "bgr24",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,2,1,0,7},        /* B */
+            {0,2,2,0,7},        /* G */
+            {0,2,3,0,7},        /* R */
+        },
+    },
+    [PIX_FMT_YUV422P] = {
+        .name = "yuv422p",
+        .nb_components= 3,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+    },
+    [PIX_FMT_YUV444P] = {
+        .name = "yuv444p",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+    },
+    [PIX_FMT_YUV410P] = {
+        .name = "yuv410p",
+        .nb_components= 3,
+        .log2_chroma_w= 2,
+        .log2_chroma_h= 2,
+        .comp = {
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+    },
+    [PIX_FMT_YUV411P] = {
+        .name = "yuv411p",
+        .nb_components= 3,
+        .log2_chroma_w= 2,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+    },
+    [PIX_FMT_GRAY8] = {
+        .name = "gray",
+        .nb_components= 1,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,0,1,0,7},        /* Y */
+        },
+        .flags = PIX_FMT_PAL,
+    },
+    [PIX_FMT_MONOWHITE] = {
+        .name = "monow",
+        .nb_components= 1,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,0,1,0,0},        /* Y */
+        },
+        .flags = PIX_FMT_BITSTREAM,
+    },
+    [PIX_FMT_MONOBLACK] = {
+        .name = "monob",
+        .nb_components= 1,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,0,1,7,0},        /* Y */
+        },
+        .flags = PIX_FMT_BITSTREAM,
+    },
+    [PIX_FMT_PAL8] = {
+        .name = "pal8",
+        .nb_components= 1,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,0,1,0,7},
+        },
+        .flags = PIX_FMT_PAL,
+    },
+    [PIX_FMT_YUVJ420P] = {
+        .name = "yuvj420p",
+        .nb_components= 3,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 1,
+        .comp = {
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+    },
+    [PIX_FMT_YUVJ422P] = {
+        .name = "yuvj422p",
+        .nb_components= 3,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+    },
+    [PIX_FMT_YUVJ444P] = {
+        .name = "yuvj444p",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+    },
+    [PIX_FMT_XVMC_MPEG2_MC] = {
+        .name = "xvmcmc",
+        .flags = PIX_FMT_HWACCEL,
+    },
+    [PIX_FMT_XVMC_MPEG2_IDCT] = {
+        .name = "xvmcidct",
+        .flags = PIX_FMT_HWACCEL,
+    },
+    [PIX_FMT_UYVY422] = {
+        .name = "uyvy422",
+        .nb_components= 3,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,2,0,7},        /* Y */
+            {0,3,1,0,7},        /* U */
+            {0,3,3,0,7},        /* V */
+        },
+    },
+    [PIX_FMT_UYYVYY411] = {
+        .name = "uyyvyy411",
+        .nb_components= 3,
+        .log2_chroma_w= 2,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,3,2,0,7},        /* Y */
+            {0,5,1,0,7},        /* U */
+            {0,5,4,0,7},        /* V */
+        },
+    },
+    [PIX_FMT_BGR8] = {
+        .name = "bgr8",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,0,1,6,1},        /* B */
+            {0,0,1,3,2},        /* G */
+            {0,0,1,0,2},        /* R */
+        },
+        .flags = PIX_FMT_PAL,
+    },
+    [PIX_FMT_BGR4] = {
+        .name = "bgr4",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,3,1,0,0},        /* B */
+            {0,3,2,0,1},        /* G */
+            {0,3,4,0,0},        /* R */
+        },
+        .flags = PIX_FMT_BITSTREAM,
+    },
+    [PIX_FMT_BGR4_BYTE] = {
+        .name = "bgr4_byte",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,0,1,3,0},        /* B */
+            {0,0,1,1,1},        /* G */
+            {0,0,1,0,0},        /* R */
+        },
+        .flags = PIX_FMT_PAL,
+    },
+    [PIX_FMT_RGB8] = {
+        .name = "rgb8",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,0,1,6,1},        /* R */
+            {0,0,1,3,2},        /* G */
+            {0,0,1,0,2},        /* B */
+        },
+        .flags = PIX_FMT_PAL,
+    },
+    [PIX_FMT_RGB4] = {
+        .name = "rgb4",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,3,1,0,0},       /* R */
+            {0,3,2,0,1},       /* G */
+            {0,3,4,0,0},       /* B */
+        },
+        .flags = PIX_FMT_BITSTREAM,
+    },
+    [PIX_FMT_RGB4_BYTE] = {
+        .name = "rgb4_byte",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,0,1,3,0},        /* R */
+            {0,0,1,1,1},        /* G */
+            {0,0,1,0,0},        /* B */
+        },
+        .flags = PIX_FMT_PAL,
+    },
+    [PIX_FMT_NV12] = {
+        .name = "nv12",
+        .nb_components= 3,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 1,
+        .comp = {
+            {0,0,1,0,7},        /* Y */
+            {1,1,1,0,7},        /* U */
+            {1,1,2,0,7},        /* V */
+        },
+    },
+    [PIX_FMT_NV21] = {
+        .name = "nv21",
+        .nb_components= 3,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 1,
+        .comp = {
+            {0,0,1,0,7},        /* Y */
+            {1,1,1,0,7},        /* V */
+            {1,1,2,0,7},        /* U */
+        },
+    },
+    [PIX_FMT_ARGB] = {
+        .name = "argb",
+        .nb_components= 4,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,3,1,0,7},        /* A */
+            {0,3,2,0,7},        /* R */
+            {0,3,3,0,7},        /* G */
+            {0,3,4,0,7},        /* B */
+        },
+    },
+    [PIX_FMT_RGBA] = {
+        .name = "rgba",
+        .nb_components= 4,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,3,1,0,7},        /* R */
+            {0,3,2,0,7},        /* G */
+            {0,3,3,0,7},        /* B */
+            {0,3,4,0,7},        /* A */
+        },
+    },
+    [PIX_FMT_ABGR] = {
+        .name = "abgr",
+        .nb_components= 4,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,3,1,0,7},        /* A */
+            {0,3,2,0,7},        /* B */
+            {0,3,3,0,7},        /* G */
+            {0,3,4,0,7},        /* R */
+        },
+    },
+    [PIX_FMT_BGRA] = {
+        .name = "bgra",
+        .nb_components= 4,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,3,1,0,7},        /* B */
+            {0,3,2,0,7},        /* G */
+            {0,3,3,0,7},        /* R */
+            {0,3,4,0,7},        /* A */
+        },
+    },
+    [PIX_FMT_GRAY16BE] = {
+        .name = "gray16be",
+        .nb_components= 1,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,1,0,15},       /* Y */
+        },
+        .flags = PIX_FMT_BE,
+    },
+    [PIX_FMT_GRAY16LE] = {
+        .name = "gray16le",
+        .nb_components= 1,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,1,0,15},       /* Y */
+        },
+    },
+    [PIX_FMT_YUV440P] = {
+        .name = "yuv440p",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 1,
+        .comp = {
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+    },
+    [PIX_FMT_YUVJ440P] = {
+        .name = "yuvj440p",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 1,
+        .comp = {
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+    },
+    [PIX_FMT_YUVA420P] = {
+        .name = "yuva420p",
+        .nb_components= 4,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 1,
+        .comp = {
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+            {3,0,1,0,7},        /* A */
+        },
+    },
+    [PIX_FMT_VDPAU_H264] = {
+        .name = "vdpau_h264",
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .flags = PIX_FMT_HWACCEL,
+    },
+    [PIX_FMT_VDPAU_MPEG1] = {
+        .name = "vdpau_mpeg1",
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .flags = PIX_FMT_HWACCEL,
+    },
+    [PIX_FMT_VDPAU_MPEG2] = {
+        .name = "vdpau_mpeg2",
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .flags = PIX_FMT_HWACCEL,
+    },
+    [PIX_FMT_VDPAU_WMV3] = {
+        .name = "vdpau_wmv3",
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .flags = PIX_FMT_HWACCEL,
+    },
+    [PIX_FMT_VDPAU_VC1] = {
+        .name = "vdpau_vc1",
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .flags = PIX_FMT_HWACCEL,
+    },
+    [PIX_FMT_VDPAU_MPEG4] = {
+        .name = "vdpau_mpeg4",
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .flags = PIX_FMT_HWACCEL,
+    },
+    [PIX_FMT_RGB48BE] = {
+        .name = "rgb48be",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,5,1,0,15},       /* R */
+            {0,5,3,0,15},       /* G */
+            {0,5,5,0,15},       /* B */
+        },
+        .flags = PIX_FMT_BE,
+    },
+    [PIX_FMT_RGB48LE] = {
+        .name = "rgb48le",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,5,1,0,15},       /* R */
+            {0,5,3,0,15},       /* G */
+            {0,5,5,0,15},       /* B */
+        },
+    },
+    [PIX_FMT_RGB565BE] = {
+        .name = "rgb565be",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,0,3,4},        /* R */
+            {0,1,1,5,5},        /* G */
+            {0,1,1,0,4},        /* B */
+        },
+        .flags = PIX_FMT_BE,
+    },
+    [PIX_FMT_RGB565LE] = {
+        .name = "rgb565le",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,2,3,4},        /* R */
+            {0,1,1,5,5},        /* G */
+            {0,1,1,0,4},        /* B */
+        },
+    },
+    [PIX_FMT_RGB555BE] = {
+        .name = "rgb555be",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,0,2,4},        /* R */
+            {0,1,1,5,4},        /* G */
+            {0,1,1,0,4},        /* B */
+        },
+        .flags = PIX_FMT_BE,
+    },
+    [PIX_FMT_RGB555LE] = {
+        .name = "rgb555le",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,2,2,4},        /* R */
+            {0,1,1,5,4},        /* G */
+            {0,1,1,0,4},        /* B */
+        },
+    },
+    [PIX_FMT_RGB444BE] = {
+        .name = "rgb444be",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,0,0,3},        /* R */
+            {0,1,1,4,3},        /* G */
+            {0,1,1,0,3},        /* B */
+        },
+        .flags = PIX_FMT_BE,
+    },
+    [PIX_FMT_RGB444LE] = {
+        .name = "rgb444le",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,2,0,3},        /* R */
+            {0,1,1,4,3},        /* G */
+            {0,1,1,0,3},        /* B */
+        },
+    },
+    [PIX_FMT_BGR565BE] = {
+        .name = "bgr565be",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,0,3,4},        /* B */
+            {0,1,1,5,5},        /* G */
+            {0,1,1,0,4},        /* R */
+        },
+        .flags = PIX_FMT_BE,
+    },
+    [PIX_FMT_BGR565LE] = {
+        .name = "bgr565le",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,2,3,4},        /* B */
+            {0,1,1,5,5},        /* G */
+            {0,1,1,0,4},        /* R */
+        },
+    },
+    [PIX_FMT_BGR555BE] = {
+        .name = "bgr555be",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,0,2,4},       /* B */
+            {0,1,1,5,4},       /* G */
+            {0,1,1,0,4},       /* R */
+        },
+        .flags = PIX_FMT_BE,
+     },
+    [PIX_FMT_BGR555LE] = {
+        .name = "bgr555le",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,2,2,4},        /* B */
+            {0,1,1,5,4},        /* G */
+            {0,1,1,0,4},        /* R */
+        },
+    },
+    [PIX_FMT_BGR444BE] = {
+        .name = "bgr444be",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,0,0,3},       /* B */
+            {0,1,1,4,3},       /* G */
+            {0,1,1,0,3},       /* R */
+        },
+        .flags = PIX_FMT_BE,
+     },
+    [PIX_FMT_BGR444LE] = {
+        .name = "bgr444le",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,2,0,3},        /* B */
+            {0,1,1,4,3},        /* G */
+            {0,1,1,0,3},        /* R */
+        },
+    },
+    [PIX_FMT_VAAPI_MOCO] = {
+        .name = "vaapi_moco",
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .flags = PIX_FMT_HWACCEL,
+    },
+    [PIX_FMT_VAAPI_IDCT] = {
+        .name = "vaapi_idct",
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .flags = PIX_FMT_HWACCEL,
+    },
+    [PIX_FMT_VAAPI_VLD] = {
+        .name = "vaapi_vld",
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .flags = PIX_FMT_HWACCEL,
+    },
+    [PIX_FMT_YUV420P16LE] = {
+        .name = "yuv420p16le",
+        .nb_components= 3,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 1,
+        .comp = {
+            {0,1,1,0,15},        /* Y */
+            {1,1,1,0,15},        /* U */
+            {2,1,1,0,15},        /* V */
+        },
+    },
+    [PIX_FMT_YUV420P16BE] = {
+        .name = "yuv420p16be",
+        .nb_components= 3,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 1,
+        .comp = {
+            {0,1,1,0,15},        /* Y */
+            {1,1,1,0,15},        /* U */
+            {2,1,1,0,15},        /* V */
+        },
+        .flags = PIX_FMT_BE,
+    },
+    [PIX_FMT_YUV422P16LE] = {
+        .name = "yuv422p16le",
+        .nb_components= 3,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,1,0,15},        /* Y */
+            {1,1,1,0,15},        /* U */
+            {2,1,1,0,15},        /* V */
+        },
+    },
+    [PIX_FMT_YUV422P16BE] = {
+        .name = "yuv422p16be",
+        .nb_components= 3,
+        .log2_chroma_w= 1,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,1,0,15},        /* Y */
+            {1,1,1,0,15},        /* U */
+            {2,1,1,0,15},        /* V */
+        },
+        .flags = PIX_FMT_BE,
+    },
+    [PIX_FMT_YUV444P16LE] = {
+        .name = "yuv444p16le",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,1,0,15},        /* Y */
+            {1,1,1,0,15},        /* U */
+            {2,1,1,0,15},        /* V */
+        },
+    },
+    [PIX_FMT_YUV444P16BE] = {
+        .name = "yuv444p16be",
+        .nb_components= 3,
+        .log2_chroma_w= 0,
+        .log2_chroma_h= 0,
+        .comp = {
+            {0,1,1,0,15},        /* Y */
+            {1,1,1,0,15},        /* U */
+            {2,1,1,0,15},        /* V */
+        },
+        .flags = PIX_FMT_BE,
+    },
+    [PIX_FMT_DXVA2_VLD] = {
+        .name = "dxva2_vld",
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .flags = PIX_FMT_HWACCEL,
+    },
+    [PIX_FMT_Y400A] = {
+        .name = "y400a",
+        .nb_components= 2,
+        .comp = {
+            {0,1,1,0,7},        /* Y */
+            {0,1,2,0,7},        /* A */
+        },
+    },
+};
+
+#else
+
+//FFDShow modification for MSVC compilation : respect the order of the variables and the colorspaces as defined in pixfmt.h
+const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = {
+/*[PIX_FMT_YUV420P] =*/ {
+        /*.name = */"yuv420p",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_YUYV422] =*/ {
+        /*.name = */"yuyv422",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,1,0,7},        /* Y */
+            {0,3,2,0,7},        /* U */
+            {0,3,4,0,7},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_RGB24] =*/ {
+        /*.name = */"rgb24",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,2,1,0,7},        /* R */
+            {0,2,2,0,7},        /* G */
+            {0,2,3,0,7},        /* B */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_BGR24] =*/ {
+        /*.name = */"bgr24",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,2,1,0,7},        /* B */
+            {0,2,2,0,7},        /* G */
+            {0,2,3,0,7},        /* R */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_YUV422P] =*/ {
+        /*.name = */"yuv422p",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_YUV444P] =*/ {
+        /*.name = */"yuv444p",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_YUV410P] =*/ {
+        /*.name = */"yuv410p",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */2,
+        /*.log2_chroma_h= */2,
+        /*.comp = */{
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_YUV411P] =*/ {
+        /*.name = */"yuv411p",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */2,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_GRAY8] =*/ {
+        /*.name = */"gray",
+        /*.nb_components= */1,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,0,1,0,7},        /* Y */
+        },
+        /*.flags = */PIX_FMT_PAL,
+    },
+/*[PIX_FMT_MONOWHITE] =*/ {
+        /*.name = */"monow",
+        /*.nb_components= */1,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,0,1,0,0},        /* Y */
+        },
+        /*.flags= */PIX_FMT_BITSTREAM
+    },
+/*[PIX_FMT_MONOBLACK] =*/ {
+        /*.name = */"monob",
+        /*.nb_components= */1,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,0,1,7,0},        /* Y */
+        },
+        /*.flags= */PIX_FMT_BITSTREAM
+    },
+/*[PIX_FMT_PAL8] =*/ {
+        /*.name = */"pal8",
+        /*.nb_components= */1,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,0,1,0,7},
+        },
+        /*.flags= */PIX_FMT_PAL
+    },
+/*[PIX_FMT_YUVJ420P] =*/ {
+        /*.name = */"yuvj420p",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_YUVJ422P] =*/ {
+        /*.name = */"yuvj422p",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_YUVJ444P] =*/ {
+        /*.name = */"yuvj444p",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_XVMC_MPEG2_MC] =*/ {
+        /*.name = */"xvmcmc",
+        /*.nb_components= */0,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+        },
+        /*.flags= */PIX_FMT_HWACCEL
+    },
+/*[PIX_FMT_XVMC_MPEG2_IDCT] =*/ {
+        /*.name = */"xvmcidct",
+        /*.nb_components= */0,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+        },
+        /*.flags= */PIX_FMT_HWACCEL
+    },
+/*[PIX_FMT_UYVY422] =*/ {
+        /*.name = */"uyvy422",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,2,0,7},        /* Y */
+            {0,3,1,0,7},        /* U */
+            {0,3,3,0,7},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_UYYVYY411] =*/ {
+        /*.name = */"uyyvyy411",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */2,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,3,2,0,7},        /* Y */
+            {0,5,1,0,7},        /* U */
+            {0,5,4,0,7},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_BGR8] =*/ {
+        /*.name = */"bgr8",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,0,1,6,1},        /* B */
+            {0,0,1,3,2},        /* G */
+            {0,0,1,0,2},        /* R */
+        },
+        /*.flags= */PIX_FMT_PAL
+    },
+/*[PIX_FMT_BGR4] =*/ {
+        /*.name = */"bgr4",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,3,1,0,0},        /* B */
+            {0,3,2,0,1},        /* G */
+            {0,3,4,0,0},        /* R */
+        },
+        /*.flags= */PIX_FMT_BITSTREAM
+    },
+/*[PIX_FMT_BGR4_BYTE] =*/ {
+        /*.name = */"bgr4_byte",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,0,1,3,0},        /* B */
+            {0,0,1,1,1},        /* G */
+            {0,0,1,0,0},        /* R */
+        },
+        /*.flags= */PIX_FMT_PAL,
+    },
+/*[PIX_FMT_RGB8] =*/ {
+        /*.name = */"rgb8",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,0,1,6,1},        /* R */
+            {0,0,1,3,2},        /* G */
+            {0,0,1,0,2},        /* B */
+        },
+        /*.flags= */PIX_FMT_PAL
+    },
+/*[PIX_FMT_RGB4] =*/ {
+        /*.name = */"rgb4",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,3,1,0,0},       /* R */
+            {0,3,2,0,1},       /* G */
+            {0,3,4,0,0},       /* B */
+        },
+        /*.flags= */PIX_FMT_BITSTREAM
+    },
+/*[PIX_FMT_RGB4_BYTE] =*/ {
+        /*.name = */"rgb4_byte",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,0,1,3,0},        /* R */
+            {0,0,1,1,1},        /* G */
+            {0,0,1,0,0},        /* B */
+        },
+        /*.flags= */PIX_FMT_PAL
+    },
+/*[PIX_FMT_NV12] =*/ {
+        /*.name = */"nv12",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,0,1,0,7},        /* Y */
+            {1,1,1,0,7},        /* U */
+            {1,1,2,0,7},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_NV21] =*/ {
+        /*.name = */"nv21",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,0,1,0,7},        /* Y */
+            {1,1,1,0,7},        /* V */
+            {1,1,2,0,7},        /* U */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_ARGB] =*/ {
+        /*.name = */"argb",
+        /*.nb_components= */4,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,3,1,0,7},        /* A */
+            {0,3,2,0,7},        /* R */
+            {0,3,3,0,7},        /* G */
+            {0,3,4,0,7},        /* B */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_RGBA] =*/ {
+        /*.name = */"rgba",
+        /*.nb_components= */4,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,3,1,0,7},        /* R */
+            {0,3,2,0,7},        /* G */
+            {0,3,3,0,7},        /* B */
+            {0,3,4,0,7},        /* A */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_ABGR] =*/ {
+        /*.name = */"abgr",
+        /*.nb_components= */4,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,3,1,0,7},        /* A */
+            {0,3,2,0,7},        /* B */
+            {0,3,3,0,7},        /* G */
+            {0,3,4,0,7},        /* R */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_BGRA] =*/ {
+        /*.name = */"bgra",
+        /*.nb_components= */4,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,3,1,0,7},        /* B */
+            {0,3,2,0,7},        /* G */
+            {0,3,3,0,7},        /* R */
+            {0,3,4,0,7},        /* A */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_GRAY16BE] =*/ {
+        /*.name = */"gray16be",
+        /*.nb_components= */1,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,1,0,15},       /* Y */
+        },
+        /*.flags= */PIX_FMT_BE
+    },
+/*[PIX_FMT_GRAY16LE] =*/ {
+        /*.name = */"gray16le",
+        /*.nb_components= */1,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,1,0,15},       /* Y */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_YUV440P] =*/ {
+        /*.name = */"yuv440p",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_YUVJ440P] =*/ {
+        /*.name = */"yuvj440p",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_YUVA420P] =*/ {
+        /*.name = */"yuva420p",
+        /*.nb_components= */4,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,0,1,0,7},        /* Y */
+            {1,0,1,0,7},        /* U */
+            {2,0,1,0,7},        /* V */
+            {3,0,1,0,7},        /* A */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_VDPAU_H264] =*/ {
+        /*.name = */"vdpau_h264",
+        /*.nb_components= */0,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+        },
+        /*.flags= */PIX_FMT_HWACCEL
+    },
+/*[PIX_FMT_VDPAU_MPEG1] =*/ {
+        /*.name = */"vdpau_mpeg1",
+        /*.nb_components= */0,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+        },
+        /*.flags= */PIX_FMT_HWACCEL
+    },
+/*[PIX_FMT_VDPAU_MPEG2] =*/ {
+        /*.name = */"vdpau_mpeg2",
+        /*.nb_components= */0,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+        },
+        /*.flags= */PIX_FMT_HWACCEL
+    },
+/*[PIX_FMT_VDPAU_WMV3] =*/ {
+        /*.name = */"vdpau_wmv3",
+        /*.nb_components= */0,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+        },
+        /*.flags= */PIX_FMT_HWACCEL
+    },
+/*[PIX_FMT_VDPAU_VC1] =*/ {
+        /*.name = */"vdpau_vc1",
+        /*.nb_components= */0,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+        },
+        /*.flags= */PIX_FMT_HWACCEL
+    },
+/*[PIX_FMT_RGB48BE] =*/ {
+        /*.name = */"rgb48be",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,5,1,0,15},       /* R */
+            {0,5,3,0,15},       /* G */
+            {0,5,5,0,15},       /* B */
+        },
+        /*.flags= */PIX_FMT_BE
+    },
+/*[PIX_FMT_RGB48LE] =*/ {
+        /*.name = */"rgb48le",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,5,1,0,15},       /* R */
+            {0,5,3,0,15},       /* G */
+            {0,5,5,0,15},       /* B */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_RGB565BE] =*/ {
+        /*.name = */"rgb565be",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,0,3,4},        /* R */
+            {0,1,1,5,5},        /* G */
+            {0,1,1,0,4},        /* B */
+        },
+        /*.flags= */PIX_FMT_BE
+    },
+/*[PIX_FMT_RGB565LE] =*/ {
+        /*.name = */"rgb565le",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,2,3,4},        /* R */
+            {0,1,1,5,5},        /* G */
+            {0,1,1,0,4},        /* B */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_RGB555BE] =*/ {
+        /*.name = */"rgb555be",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,0,2,4},        /* R */
+            {0,1,1,5,4},        /* G */
+            {0,1,1,0,4},        /* B */
+        },
+        /*.flags= */PIX_FMT_BE
+    },
+/*[PIX_FMT_RGB555LE] =*/ {
+        /*.name = */"rgb555le",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,2,2,4},        /* R */
+            {0,1,1,5,4},        /* G */
+            {0,1,1,0,4},        /* B */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_RGB444BE] =*/ {
+        /*.name = */"rgb444be",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,0,0,3},        /* R */
+            {0,1,1,4,3},        /* G */
+            {0,1,1,0,3},        /* B */
+        },
+        /*.flags = */PIX_FMT_BE,
+    },
+/*[PIX_FMT_RGB444LE] =*/ {
+        /*.name = */"rgb444le",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,2,0,3},        /* R */
+            {0,1,1,4,3},        /* G */
+            {0,1,1,0,3},        /* B */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_BGR565BE] =*/ {
+        /*.name = */"bgr565be",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,0,3,4},        /* B */
+            {0,1,1,5,5},        /* G */
+            {0,1,1,0,4},        /* R */
+        },
+        /*.flags= */PIX_FMT_BE
+    },
+/*[PIX_FMT_BGR565LE] =*/ {
+        /*.name = */"bgr565le",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,2,3,4},        /* B */
+            {0,1,1,5,5},        /* G */
+            {0,1,1,0,4},        /* R */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_BGR555BE] =*/ {
+        /*.name = */"bgr555be",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,0,2,4},       /* B */
+            {0,1,1,5,4},       /* G */
+            {0,1,1,0,4},       /* R */
+        },
+        /*.flags= */PIX_FMT_BE
+     },
+/*[PIX_FMT_BGR555LE] =*/ {
+        /*.name = */"bgr555le",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,2,2,4},        /* B */
+            {0,1,1,5,4},        /* G */
+            {0,1,1,0,4},        /* R */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_BGR444BE] =*/ {
+        /*.name = */"bgr444be",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,0,0,3},       /* B */
+            {0,1,1,4,3},       /* G */
+            {0,1,1,0,3},       /* R */
+        },
+        /*.flags = */PIX_FMT_BE,
+     },
+/*[PIX_FMT_BGR444LE] =*/ {
+        /*.name = */"bgr444le",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,2,0,3},        /* B */
+            {0,1,1,4,3},        /* G */
+            {0,1,1,0,3},        /* R */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_VAAPI_MOCO] =*/ {
+        /*.name = */"vaapi_moco",
+        /*.nb_components= */0,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.flags= */PIX_FMT_HWACCEL
+    },
+/*[PIX_FMT_VAAPI_IDCT] =*/ {
+        /*.name = */"vaapi_idct",
+        /*.nb_components= */0,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+        },
+        /*.flags= */PIX_FMT_HWACCEL
+    },
+/*[PIX_FMT_VAAPI_VLD] =*/ {
+        /*.name = */"vaapi_vld",
+        /*.nb_components= */0,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+        },
+        /*.flags= */PIX_FMT_HWACCEL
+    },
+/*[PIX_FMT_YUV420P16LE] =*/ {
+        /*.name = */"yuv420p16le",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,1,1,0,15},        /* Y */
+            {1,1,1,0,15},        /* U */
+            {2,1,1,0,15},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_YUV420P16BE] =*/ {
+        /*.name = */"yuv420p16be",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,1,1,0,15},        /* Y */
+            {1,1,1,0,15},        /* U */
+            {2,1,1,0,15},        /* V */
+        },
+        /*.flags= */PIX_FMT_BE
+    },
+/*[PIX_FMT_YUV422P16LE] =*/ {
+        /*.name = */"yuv422p16le",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,1,0,15},        /* Y */
+            {1,1,1,0,15},        /* U */
+            {2,1,1,0,15},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_YUV422P16BE] =*/ {
+        /*.name = */"yuv422p16be",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,1,0,15},        /* Y */
+            {1,1,1,0,15},        /* U */
+            {2,1,1,0,15},        /* V */
+        },
+        /*.flags= */PIX_FMT_BE
+    },
+/*[PIX_FMT_YUV444P16LE] =*/ {
+        /*.name = */"yuv444p16le",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,1,0,15},        /* Y */
+            {1,1,1,0,15},        /* U */
+            {2,1,1,0,15},        /* V */
+        },
+        /*.flags= */0
+    },
+/*[PIX_FMT_YUV444P16BE] =*/ {
+        /*.name = */"yuv444p16be",
+        /*.nb_components= */3,
+        /*.log2_chroma_w= */0,
+        /*.log2_chroma_h= */0,
+        /*.comp = */{
+            {0,1,1,0,15},        /* Y */
+            {1,1,1,0,15},        /* U */
+            {2,1,1,0,15},        /* V */
+        },
+        /*.flags= */PIX_FMT_BE
+    },
+/*[PIX_FMT_VDPAU_MPEG4] =*/ {
+        /*.name = */"vdpau_mpeg4",
+        /*.nb_components= */0,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+        },
+        /*.flags= */PIX_FMT_HWACCEL
+    },
+/*[PIX_FMT_DXVA2_VLD] =*/ {
+        /*.name = */"dxva2_vld",
+        /*.nb_components= */0,
+        /*.log2_chroma_w= */1,
+        /*.log2_chroma_h= */1,
+        /*.comp = */{
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+            {0,0,0,0,0},
+        },
+        /*.flags= */PIX_FMT_HWACCEL
+    },
+/*[PIX_FMT_Y400A] =*/ {
+        /*.name = */"y400a",
+        /*.nb_components= */2,
+        /*.comp = */{
+            {0,1,1,0,7},        /* Y */
+            {0,1,2,0,7},        /* A */
+        },
+        /*.flags= */0
+    },
+};
+#endif
+
+
+static enum PixelFormat get_pix_fmt_internal(const char *name)
+{
+    enum PixelFormat pix_fmt;
+
+    for (pix_fmt = 0; pix_fmt < PIX_FMT_NB; pix_fmt++)
+        if (av_pix_fmt_descriptors[pix_fmt].name &&
+            !strcmp(av_pix_fmt_descriptors[pix_fmt].name, name))
+            return pix_fmt;
+
+    return PIX_FMT_NONE;
+}
+
+#if HAVE_BIGENDIAN
+#   define X_NE(be, le) be
+#else
+#   define X_NE(be, le) le
+#endif
+
+enum PixelFormat av_get_pix_fmt(const char *name)
+{
+    enum PixelFormat pix_fmt;
+
+    if (!strcmp(name, "rgb32"))
+        name = X_NE("argb", "bgra");
+    else if (!strcmp(name, "bgr32"))
+        name = X_NE("abgr", "rgba");
+
+    pix_fmt = get_pix_fmt_internal(name);
+    if (pix_fmt == PIX_FMT_NONE) {
+        char name2[32];
+
+        snprintf(name2, sizeof(name2), "%s%s", name, X_NE("be", "le"));
+        pix_fmt = get_pix_fmt_internal(name2);
+    }
+    return pix_fmt;
+}
+
+int av_get_bits_per_pixel(const AVPixFmtDescriptor *pixdesc)
+{
+    int c, bits = 0;
+    int log2_pixels = pixdesc->log2_chroma_w + pixdesc->log2_chroma_h;
+
+    for (c = 0; c < pixdesc->nb_components; c++) {
+        int s = c==1 || c==2 ? 0 : log2_pixels;
+        bits += (pixdesc->comp[c].depth_minus1+1) << s;
+    }
+
+    return bits >> log2_pixels;
+}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/pixdesc.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/pixdesc.h
new file mode 100644
index 000000000..940f86765
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/pixdesc.h
@@ -0,0 +1,154 @@
+/*
+ * pixel format descriptor
+ * Copyright (c) 2009 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_PIXDESC_H
+#define AVUTIL_PIXDESC_H
+
+#include <inttypes.h>
+
+typedef struct AVComponentDescriptor{
+    uint16_t plane        :2;            ///< which of the 4 planes contains the component
+
+    /**
+     * Number of elements between 2 horizontally consecutive pixels minus 1.
+     * Elements are bits for bitstream formats, bytes otherwise.
+     */
+    uint16_t step_minus1  :3;
+
+    /**
+     * Number of elements before the component of the first pixel plus 1.
+     * Elements are bits for bitstream formats, bytes otherwise.
+     */
+    uint16_t offset_plus1 :3;
+    uint16_t shift        :3;            ///< number of least significant bits that must be shifted away to get the value
+    uint16_t depth_minus1 :4;            ///< number of bits in the component minus 1
+}AVComponentDescriptor;
+
+/**
+ * Descriptor that unambiguously describes how the bits of a pixel are
+ * stored in the up to 4 data planes of an image. It also stores the
+ * subsampling factors and number of components.
+ *
+ * @note This is separate of the colorspace (RGB, YCbCr, YPbPr, JPEG-style YUV
+ *       and all the YUV variants) AVPixFmtDescriptor just stores how values
+ *       are stored not what these values represent.
+ */
+typedef struct AVPixFmtDescriptor{
+    const char *name;
+    uint8_t nb_components;      ///< The number of components each pixel has, (1-4)
+
+    /**
+     * Amount to shift the luma width right to find the chroma width.
+     * For YV12 this is 1 for example.
+     * chroma_width = -((-luma_width) >> log2_chroma_w)
+     * The note above is needed to ensure rounding up.
+     * This value only refers to the chroma components.
+     */
+    uint8_t log2_chroma_w;      ///< chroma_width = -((-luma_width )>>log2_chroma_w)
+
+    /**
+     * Amount to shift the luma height right to find the chroma height.
+     * For YV12 this is 1 for example.
+     * chroma_height= -((-luma_height) >> log2_chroma_h)
+     * The note above is needed to ensure rounding up.
+     * This value only refers to the chroma components.
+     */
+    uint8_t log2_chroma_h;
+
+    /**
+     * Parameters that describe how pixels are packed. If the format
+     * has chroma components, they must be stored in comp[1] and
+     * comp[2].
+     */
+    AVComponentDescriptor comp[4];
+    uint8_t flags; //ffdshow custom code (must match order as defined for MSVC in pixdesc.c)
+}AVPixFmtDescriptor;
+
+#define PIX_FMT_BE        1 ///< Pixel format is big-endian.
+#define PIX_FMT_PAL       2 ///< Pixel format has a palette in data[1], values are indexes in this palette.
+#define PIX_FMT_BITSTREAM 4 ///< All values of a component are bit-wise packed end to end.
+#define PIX_FMT_HWACCEL   8 ///< Pixel format is an HW accelerated format.
+
+/**
+ * The array of all the pixel format descriptors.
+ */
+extern const AVPixFmtDescriptor av_pix_fmt_descriptors[];
+
+/**
+ * Reads a line from an image, and writes the values of the
+ * pixel format component c to dst.
+ *
+ * @param data the array containing the pointers to the planes of the image
+ * @param linesizes the array containing the linesizes of the image
+ * @param desc the pixel format descriptor for the image
+ * @param x the horizontal coordinate of the first pixel to read
+ * @param y the vertical coordinate of the first pixel to read
+ * @param w the width of the line to read, that is the number of
+ * values to write to dst
+ * @param read_pal_component if not zero and the format is a paletted
+ * format writes the values corresponding to the palette
+ * component c in data[1] to dst, rather than the palette indexes in
+ * data[0]. The behavior is undefined if the format is not paletted.
+ */
+void read_line(uint16_t *dst, const uint8_t *data[4], const int linesize[4],
+               const AVPixFmtDescriptor *desc, int x, int y, int c, int w, int read_pal_component);
+
+/**
+ * Writes the values from src to the pixel format component c of an
+ * image line.
+ *
+ * @param src array containing the values to write
+ * @param data the array containing the pointers to the planes of the
+ * image to write into. It is supposed to be zeroed.
+ * @param linesizes the array containing the linesizes of the image
+ * @param desc the pixel format descriptor for the image
+ * @param x the horizontal coordinate of the first pixel to write
+ * @param y the vertical coordinate of the first pixel to write
+ * @param w the width of the line to write, that is the number of
+ * values to write to the image line
+ */
+void write_line(const uint16_t *src, uint8_t *data[4], const int linesize[4],
+                const AVPixFmtDescriptor *desc, int x, int y, int c, int w);
+
+/**
+ * Returns the pixel format corresponding to name.
+ *
+ * If there is no pixel format with name name, then looks for a
+ * pixel format with the name corresponding to the native endian
+ * format of name.
+ * For example in a little-endian system, first looks for "gray16",
+ * then for "gray16le".
+ *
+ * Finally if no pixel format has been found, returns PIX_FMT_NONE.
+ */
+enum PixelFormat av_get_pix_fmt(const char *name);
+
+/**
+ * Returns the number of bits per pixel used by the pixel format
+ * described by pixdesc.
+ *
+ * The returned number of bits refers to the number of bits actually
+ * used for storing the pixel information, that is padding bits are
+ * not counted.
+ */
+int av_get_bits_per_pixel(const AVPixFmtDescriptor *pixdesc);
+
+#endif /* AVUTIL_PIXDESC_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/pixfmt.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/pixfmt.h
new file mode 100644
index 000000000..6bb2746e9
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/pixfmt.h
@@ -0,0 +1,168 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_PIXFMT_H
+#define AVUTIL_PIXFMT_H
+
+/**
+ * @file libavutil/pixfmt.h
+ * pixel format definitions
+ *
+ * @warning This file has to be considered an internal but installed
+ * header, so it should not be directly included in your projects.
+ */
+
+#include "../libavutil/avconfig.h"
+
+/**
+ * Pixel format. Notes:
+ *
+ * PIX_FMT_RGB32 is handled in an endian-specific manner. An RGBA
+ * color is put together as:
+ *  (A << 24) | (R << 16) | (G << 8) | B
+ * This is stored as BGRA on little-endian CPU architectures and ARGB on
+ * big-endian CPUs.
+ *
+ * When the pixel format is palettized RGB (PIX_FMT_PAL8), the palettized
+ * image data is stored in AVFrame.data[0]. The palette is transported in
+ * AVFrame.data[1], is 1024 bytes long (256 4-byte entries) and is
+ * formatted the same as in PIX_FMT_RGB32 described above (i.e., it is
+ * also endian-specific). Note also that the individual RGB palette
+ * components stored in AVFrame.data[1] should be in the range 0..255.
+ * This is important as many custom PAL8 video codecs that were designed
+ * to run on the IBM VGA graphics adapter use 6-bit palette components.
+ *
+ * For all the 8bit per pixel formats, an RGB32 palette is in data[1] like
+ * for pal8. This palette is filled in automatically by the function
+ * allocating the picture.
+ *
+ * Note, make sure that all newly added big endian formats have pix_fmt&1==1
+ *       and that all newly added little endian formats have pix_fmt&1==0
+ *       this allows simpler detection of big vs little endian.
+ */
+
+//FFDShow note : be careful when you modify this file, pixdesc.c must be updated and must contain the entries in the same
+// order as this enumeration
+enum PixelFormat {
+    PIX_FMT_NONE= -1,
+    PIX_FMT_YUV420P,   ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
+    PIX_FMT_YUYV422,   ///< packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
+    PIX_FMT_RGB24,     ///< packed RGB 8:8:8, 24bpp, RGBRGB...
+    PIX_FMT_BGR24,     ///< packed RGB 8:8:8, 24bpp, BGRBGR...
+    PIX_FMT_YUV422P,   ///< planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
+    PIX_FMT_YUV444P,   ///< planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
+    PIX_FMT_YUV410P,   ///< planar YUV 4:1:0,  9bpp, (1 Cr & Cb sample per 4x4 Y samples)
+    PIX_FMT_YUV411P,   ///< planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
+    PIX_FMT_GRAY8,     ///<        Y        ,  8bpp
+    PIX_FMT_MONOWHITE, ///<        Y        ,  1bpp, 0 is white, 1 is black
+    PIX_FMT_MONOBLACK, ///<        Y        ,  1bpp, 0 is black, 1 is white
+    PIX_FMT_PAL8,      ///< 8 bit with PIX_FMT_RGB32 palette
+    PIX_FMT_YUVJ420P,  ///< planar YUV 4:2:0, 12bpp, full scale (JPEG)
+    PIX_FMT_YUVJ422P,  ///< planar YUV 4:2:2, 16bpp, full scale (JPEG)
+    PIX_FMT_YUVJ444P,  ///< planar YUV 4:4:4, 24bpp, full scale (JPEG)
+    PIX_FMT_XVMC_MPEG2_MC,///< XVideo Motion Acceleration via common packet passing
+    PIX_FMT_XVMC_MPEG2_IDCT,
+    PIX_FMT_UYVY422,   ///< packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
+    PIX_FMT_UYYVYY411, ///< packed YUV 4:1:1, 12bpp, Cb Y0 Y1 Cr Y2 Y3
+    PIX_FMT_BGR8,      ///< packed RGB 3:3:2,  8bpp, (msb)2B 3G 3R(lsb)
+    PIX_FMT_BGR4,      ///< packed RGB 1:2:1,  4bpp, (msb)1B 2G 1R(lsb)
+    PIX_FMT_BGR4_BYTE, ///< packed RGB 1:2:1,  8bpp, (msb)1B 2G 1R(lsb)
+    PIX_FMT_RGB8,      ///< packed RGB 3:3:2,  8bpp, (msb)2R 3G 3B(lsb)
+    PIX_FMT_RGB4,      ///< packed RGB 1:2:1,  4bpp, (msb)1R 2G 1B(lsb)
+    PIX_FMT_RGB4_BYTE, ///< packed RGB 1:2:1,  8bpp, (msb)1R 2G 1B(lsb)
+    PIX_FMT_NV12,      ///< planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 for UV
+    PIX_FMT_NV21,      ///< as above, but U and V bytes are swapped
+
+    PIX_FMT_ARGB,      ///< packed ARGB 8:8:8:8, 32bpp, ARGBARGB...
+    PIX_FMT_RGBA,      ///< packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
+    PIX_FMT_ABGR,      ///< packed ABGR 8:8:8:8, 32bpp, ABGRABGR...
+    PIX_FMT_BGRA,      ///< packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
+
+    PIX_FMT_GRAY16BE,  ///<        Y        , 16bpp, big-endian
+    PIX_FMT_GRAY16LE,  ///<        Y        , 16bpp, little-endian
+    PIX_FMT_YUV440P,   ///< planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
+    PIX_FMT_YUVJ440P,  ///< planar YUV 4:4:0 full scale (JPEG)
+    PIX_FMT_YUVA420P,  ///< planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
+    PIX_FMT_VDPAU_H264,///< H.264 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+    PIX_FMT_VDPAU_MPEG1,///< MPEG-1 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+    PIX_FMT_VDPAU_MPEG2,///< MPEG-2 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+    PIX_FMT_VDPAU_WMV3,///< WMV3 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+    PIX_FMT_VDPAU_VC1, ///< VC-1 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+    PIX_FMT_RGB48BE,   ///< packed RGB 16:16:16, 48bpp, 16R, 16G, 16B, big-endian
+    PIX_FMT_RGB48LE,   ///< packed RGB 16:16:16, 48bpp, 16R, 16G, 16B, little-endian
+
+    PIX_FMT_RGB565BE,  ///< packed RGB 5:6:5, 16bpp, (msb)   5R 6G 5B(lsb), big-endian
+    PIX_FMT_RGB565LE,  ///< packed RGB 5:6:5, 16bpp, (msb)   5R 6G 5B(lsb), little-endian
+    PIX_FMT_RGB555BE,  ///< packed RGB 5:5:5, 16bpp, (msb)1A 5R 5G 5B(lsb), big-endian, most significant bit to 0
+    PIX_FMT_RGB555LE,  ///< packed RGB 5:5:5, 16bpp, (msb)1A 5R 5G 5B(lsb), little-endian, most significant bit to 0
+    
+    PIX_FMT_RGB444BE,  ///< packed RGB 4:4:4, 16bpp, (msb)4A 4R 4G 4B(lsb), big-endian, most significant bits to 0
+    PIX_FMT_RGB444LE,  ///< packed RGB 4:4:4, 16bpp, (msb)4A 4R 4G 4B(lsb), little-endian, most significant bits to 0
+
+    PIX_FMT_BGR565BE,  ///< packed BGR 5:6:5, 16bpp, (msb)   5B 6G 5R(lsb), big-endian
+    PIX_FMT_BGR565LE,  ///< packed BGR 5:6:5, 16bpp, (msb)   5B 6G 5R(lsb), little-endian
+    PIX_FMT_BGR555BE,  ///< packed BGR 5:5:5, 16bpp, (msb)1A 5B 5G 5R(lsb), big-endian, most significant bit to 1
+    PIX_FMT_BGR555LE,  ///< packed BGR 5:5:5, 16bpp, (msb)1A 5B 5G 5R(lsb), little-endian, most significant bit to 1
+    
+    PIX_FMT_BGR444BE,  ///< packed BGR 4:4:4, 16bpp, (msb)4A 4B 4G 4R(lsb), big-endian, most significant bits to 1
+    PIX_FMT_BGR444LE,  ///< packed BGR 4:4:4, 16bpp, (msb)4A 4B 4G 4R(lsb), little-endian, most significant bits to 1
+
+    PIX_FMT_VAAPI_MOCO, ///< HW acceleration through VA API at motion compensation entry-point, Picture.data[3] contains a vaapi_render_state struct which contains macroblocks as well as various fields extracted from headers
+    PIX_FMT_VAAPI_IDCT, ///< HW acceleration through VA API at IDCT entry-point, Picture.data[3] contains a vaapi_render_state struct which contains fields extracted from headers
+    PIX_FMT_VAAPI_VLD,  ///< HW decoding through VA API, Picture.data[3] contains a vaapi_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+
+    PIX_FMT_YUV420P16LE,  ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
+    PIX_FMT_YUV420P16BE,  ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
+    PIX_FMT_YUV422P16LE,  ///< planar YUV 4:2:2, 32bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
+    PIX_FMT_YUV422P16BE,  ///< planar YUV 4:2:2, 32bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
+    PIX_FMT_YUV444P16LE,  ///< planar YUV 4:4:4, 48bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
+    PIX_FMT_YUV444P16BE,  ///< planar YUV 4:4:4, 48bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
+    PIX_FMT_VDPAU_MPEG4,  ///< MPEG4 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+    PIX_FMT_DXVA2_VLD,    ///< HW decoding through DXVA2, Picture.data[3] contains a LPDIRECT3DSURFACE9 pointer
+
+    PIX_FMT_Y400A,     ///< 8bit gray, 8bit alpha
+    PIX_FMT_NB,        ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
+};
+
+#if AV_HAVE_BIGENDIAN
+#   define PIX_FMT_NE(be, le) PIX_FMT_##be
+#else
+#   define PIX_FMT_NE(be, le) PIX_FMT_##le
+#endif
+
+#define PIX_FMT_RGB32   PIX_FMT_NE(ARGB, BGRA)
+#define PIX_FMT_RGB32_1 PIX_FMT_NE(RGBA, ABGR)
+#define PIX_FMT_BGR32   PIX_FMT_NE(ABGR, RGBA)
+#define PIX_FMT_BGR32_1 PIX_FMT_NE(BGRA, ARGB)
+
+#define PIX_FMT_GRAY16 PIX_FMT_NE(GRAY16BE, GRAY16LE)
+#define PIX_FMT_RGB48  PIX_FMT_NE(RGB48BE,  RGB48LE)
+#define PIX_FMT_RGB565 PIX_FMT_NE(RGB565BE, RGB565LE)
+#define PIX_FMT_RGB555 PIX_FMT_NE(RGB555BE, RGB555LE)
+#define PIX_FMT_RGB444 PIX_FMT_NE(RGB444BE, RGB444LE)
+#define PIX_FMT_BGR565 PIX_FMT_NE(BGR565BE, BGR565LE)
+#define PIX_FMT_BGR555 PIX_FMT_NE(BGR555BE, BGR555LE)
+#define PIX_FMT_BGR444 PIX_FMT_NE(BGR444BE, BGR444LE)
+
+#define PIX_FMT_YUV420P16 PIX_FMT_NE(YUV420P16BE, YUV420P16LE)
+#define PIX_FMT_YUV422P16 PIX_FMT_NE(YUV422P16BE, YUV422P16LE)
+#define PIX_FMT_YUV444P16 PIX_FMT_NE(YUV444P16BE, YUV444P16LE)
+
+#endif /* AVUTIL_PIXFMT_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/rational.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/rational.c
index 886e7ebaf..507444435 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/rational.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/rational.c
@@ -85,6 +85,16 @@ AVRational av_div_q(AVRational b, AVRational c){
     return av_mul_q(b, d);
 }
 
+AVRational av_add_q(AVRational b, AVRational c){
+    av_reduce(&b.num, &b.den, b.num * (int64_t)c.den + c.num * (int64_t)b.den, b.den * (int64_t)c.den, INT_MAX);
+    return b;
+}
+
+AVRational av_sub_q(AVRational b, AVRational c){
+    AVRational d = {-c.num, c.den};
+    return av_add_q(b, d);
+}
+
 AVRational av_d2q(double d, int max){
     AVRational a;
 #define LOG2  0.69314718055994530941723212145817656807550013436025
@@ -94,3 +104,28 @@ AVRational av_d2q(double d, int max){
 
     return a;
 }
+
+int av_nearer_q(AVRational q, AVRational q1, AVRational q2)
+{
+    /* n/d is q, a/b is the median between q1 and q2 */
+    int64_t a = q1.num * (int64_t)q2.den + q2.num * (int64_t)q1.den;
+    int64_t b = 2 * (int64_t)q1.den * q2.den;
+
+    /* rnd_up(a*d/b) > n => a*d/b > n */
+    int64_t x_up = av_rescale_rnd(a, q.den, b, AV_ROUND_UP);
+
+    /* rnd_down(a*d/b) < n => a*d/b < n */
+    int64_t x_down = av_rescale_rnd(a, q.den, b, AV_ROUND_DOWN);
+
+    return ((x_up > q.num) - (x_down < q.num)) * av_cmp_q(q2, q1);
+}
+
+int av_find_nearest_q_idx(AVRational q, const AVRational* q_list)
+{
+    int i, nearest_q_idx = 0;
+    for(i=0; q_list[i].den; i++)
+        if (av_nearer_q(q, q_list[i], q_list[nearest_q_idx]) > 0)
+            nearest_q_idx = i;
+
+    return nearest_q_idx;
+}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/rational.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/rational.h
index 157c038fd..c9d4da2c8 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/rational.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/rational.h
@@ -29,7 +29,7 @@
 #define AVUTIL_RATIONAL_H
 
 #include <stdint.h>
-#include "common.h"
+#include "attributes.h"
 
 /**
  * rational number numerator/denominator
@@ -45,7 +45,7 @@ typedef struct AVRational{
  * @param b second rational
  * @return 0 if a==b, 1 if a>b and -1 if a<b
  */
-static inline int av_cmp_q(AVRational a, AVRational b){
+static av_always_inline int av_cmp_q(AVRational a, AVRational b){
     const int64_t tmp= a.num * (int64_t)b.den - b.num * (int64_t)a.den;
 
     if(tmp) return (tmp>>63)|1;
@@ -57,7 +57,7 @@ static inline int av_cmp_q(AVRational a, AVRational b){
  * @param a rational to convert
  * @return (double) a
  */
-static inline double av_q2d(AVRational a){
+static av_always_inline double av_q2d(AVRational a){
     return a.num / (double) a.den;
 }
 
@@ -90,6 +90,22 @@ AVRational av_mul_q(AVRational b, AVRational c) av_const;
 AVRational av_div_q(AVRational b, AVRational c) av_const;
 
 /**
+ * Adds two rationals.
+ * @param b first rational
+ * @param c second rational
+ * @return b+c
+ */
+AVRational av_add_q(AVRational b, AVRational c) av_const;
+
+/**
+ * Subtracts one rational from another.
+ * @param b first rational
+ * @param c second rational
+ * @return b-c
+ */
+AVRational av_sub_q(AVRational b, AVRational c) av_const;
+
+/**
  * Converts a double precision floating point number to a rational.
  * @param d double to convert
  * @param max the maximum allowed numerator and denominator
@@ -97,4 +113,17 @@ AVRational av_div_q(AVRational b, AVRational c) av_const;
  */
 AVRational av_d2q(double d, int max) av_const;
 
+/**
+ * @return 1 if q1 is nearer to q than q2, -1 if q2 is nearer
+ * than q1, 0 if they have the same distance.
+ */
+int av_nearer_q(AVRational q, AVRational q1, AVRational q2);
+
+/**
+ * Finds the nearest value in q_list to q.
+ * @param q_list an array of rationals terminated by {0, 0}
+ * @return the index of the nearest value found in the array
+ */
+int av_find_nearest_q_idx(AVRational q, const AVRational* q_list);
+
 #endif /* AVUTIL_RATIONAL_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/bswap.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/bswap.h
index 0ff8a9338..462309f41 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/bswap.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/bswap.h
@@ -17,7 +17,7 @@
  */
 
 /**
- * @file bswap.h
+ * @file libavutil/x86/bswap.h
  * byte swapping routines
  */
 
@@ -26,7 +26,7 @@
 
 #include <stdint.h>
 #include "config.h"
-#include "libavutil/common.h"
+#include "libavutil/attributes.h"
 
 #define bswap_16 bswap_16
 static av_always_inline av_const uint16_t bswap_16(uint16_t x)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/intmath.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/intmath.h
index 7528c55dc..f3acddc0e 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/intmath.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/intmath.h
@@ -21,8 +21,6 @@
 #ifndef AVUTIL_X86_INTMATH_H
 #define AVUTIL_X86_INTMATH_H
 
-#include "libavutil/common.h"
-
 #define FASTDIV(a,b) \
     ({\
         int ret, dmy;\
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/intreadwrite.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/intreadwrite.h
index 4621f064d..4061d1923 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/intreadwrite.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/intreadwrite.h
@@ -23,6 +23,7 @@
 
 #include <stdint.h>
 #include "config.h"
+#include "libavutil/attributes.h"
 
 #if HAVE_MMX
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/config.h b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/config.h
index fad24058f..1a7a45353 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/config.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/config.h
@@ -46,7 +46,7 @@
 #define USE_FASTMEMCPY 1
 #define CONFIG_SWSCALE_ALPHA 1
 
-#define ASMALIGN(ZEROBITS) ".align 1<<" #ZEROBITS "\n\t"
+#define ASMALIGN(ZEROBITS) ".align 1 << " #ZEROBITS "\n\t"
 
 #define CONFIG_GPL 1
 
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/postprocFilters.h b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/postprocFilters.h
deleted file mode 100644
index dfa61d4fb..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/postprocFilters.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef _POSTPROCFILTERS_H_
-#define _POSTPROCFILTERS_H_
-
-#define V_DEBLOCK       0x01
-#define H_DEBLOCK       0x02
-#define DERING          0x04
-#define LEVEL_FIX	0x08 ///< Brightness & Contrast
-
-#define LUM_V_DEBLOCK   V_DEBLOCK               //   1
-#define LUM_H_DEBLOCK   H_DEBLOCK               //   2
-#define CHROM_V_DEBLOCK (V_DEBLOCK<<4)          //  16
-#define CHROM_H_DEBLOCK (H_DEBLOCK<<4)          //  32
-#define LUM_DERING      DERING                  //   4
-#define CHROM_DERING    (DERING<<4)             //  64
-#define LUM_LEVEL_FIX	LEVEL_FIX		//   8
-#define CHROM_LEVEL_FIX	(LEVEL_FIX<<4)		// 128 (not implemented yet)
-
-// Experimental vertical filters
-#define V_X1_FILTER	0x0
-#define V_A_DEBLOCK	0x2000000
-
-// Experimental horizontal filters
-#define H_X1_FILTER	0x0
-#define H_A_DEBLOCK	0x4000000
-
-/// select between full y range (255-0) or standart one (234-16)
-#define FULL_Y_RANGE	0x8000			// 32768
-
-//Deinterlacing Filters
-#define	LINEAR_IPOL_DEINT_FILTER	 0x10000	// 65536
-#define	LINEAR_BLEND_DEINT_FILTER	 0x20000	// 131072
-#define	CUBIC_BLEND_DEINT_FILTER	  0x8000	// (not implemented yet)
-#define	CUBIC_IPOL_DEINT_FILTER		 0x40000	// 262144
-#define	MEDIAN_DEINT_FILTER		     0x80000	// 524288
-#define	FFMPEG_DEINT_FILTER		    0x400000
-#define	LOWPASS5_DEINT_FILTER		0x800000
-
-#define TEMP_NOISE_FILTER		0x100000
-#define FORCE_QUANT			0x200000
-
-#endif
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/postprocess.c b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/postprocess.c
deleted file mode 100644
index a7c7a3696..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/postprocess.c
+++ /dev/null
@@ -1,860 +0,0 @@
-/*
-    Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
-
-    AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
-
-/**
- * @file postprocess.c
- * postprocessing.
- */
-
-/*
-			C	MMX	MMX2	3DNow	AltiVec
-isVertDC		Ec	Ec			Ec
-isVertMinMaxOk		Ec	Ec			Ec
-doVertLowPass		E		e	e	Ec
-doVertDefFilter		Ec	Ec	e	e	Ec
-isHorizDC		Ec	Ec			Ec
-isHorizMinMaxOk		a	E			Ec
-doHorizLowPass		E		e	e	Ec
-doHorizDefFilter	Ec	Ec	e	e	Ec
-do_a_deblock		Ec	E	Ec	E
-deRing			E		e	e*	Ecp
-Vertical RKAlgo1        E               a       a
-Horizontal RKAlgo1                      a       a
-Vertical X1#            a               E       E
-Horizontal X1#          a               E       E
-LinIpolDeinterlace      e               E       E*
-CubicIpolDeinterlace    a               e       e*
-LinBlendDeinterlace     e               E       E*
-MedianDeinterlace#	E	Ec	Ec
-TempDeNoiser#		E		e	e	Ec
-
-* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
-# more or less selfinvented filters so the exactness isnt too meaningfull
-E = Exact implementation
-e = allmost exact implementation (slightly different rounding,...)
-a = alternative / approximate impl
-c = checked against the other implementations (-vo md5)
-p = partially optimized, still some work to do
-*/
-
-/*
-TODO:
-reduce the time wasted on the mem transfer
-unroll stuff if instructions depend too much on the prior one
-move YScale thing to the end instead of fixing QP
-write a faster and higher quality deblocking filter :)
-make the mainloop more flexible (variable number of blocks at once
-        (the if/else stuff per block is slowing things down)
-compare the quality & speed of all filters
-split this huge file
-optimize c versions
-try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
-...
-*/
-
-//Changelog: use the CVS log
-
-#include "config.h"
-#include <inttypes.h>
-#include <stdio.h>
-#include <string.h>
-//#undef HAVE_MMX2
-//#define HAVE_AMD3DNOW
-//#undef HAVE_MMX
-//#undef ARCH_X86
-//#define DEBUG_BRIGHTNESS
-#include "../libavutil/internal.h"
-#ifdef USE_FASTMEMCPY
-#include "fastmemcpy.h"
-#endif
-#include "postprocess.h"
-#include "postprocess_internal.h"
-#include "cpudetect.h"
-
-#ifdef HAVE_ALTIVEC_H
-#include <altivec.h>
-#endif
-
-#ifndef HAVE_MEMALIGN
-#define memalign(a,b) av_malloc(b)
-#endif
-
-#define MIN(a,b) ((a) > (b) ? (b) : (a))
-#define MAX(a,b) ((a) < (b) ? (b) : (a))
-#define ABS(a) ((a) > 0 ? (a) : (-(a)))
-#define SIGN(a) ((a) > 0 ? 1 : -1)
-
-#define GET_MODE_BUFFER_SIZE 500
-#define OPTIONS_ARRAY_SIZE 10
-#define BLOCK_SIZE 8
-#define TEMP_STRIDE 8
-//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
-
-#if ARCH_X86_32 || ARCH_X86_64
-static uint64_t __attribute__((aligned(8))) attribute_used w05=		0x0005000500050005LL;
-static uint64_t __attribute__((aligned(8))) attribute_used w04=		0x0004000400040004LL;
-static uint64_t __attribute__((aligned(8))) attribute_used w20=		0x0020002000200020LL;
-static uint64_t __attribute__((aligned(8))) attribute_used b00= 		0x0000000000000000LL;
-static uint64_t __attribute__((aligned(8))) attribute_used b01= 		0x0101010101010101LL;
-static uint64_t __attribute__((aligned(8))) attribute_used b02= 		0x0202020202020202LL;
-static uint64_t __attribute__((aligned(8))) attribute_used b08= 		0x0808080808080808LL;
-static uint64_t __attribute__((aligned(8))) attribute_used b80= 		0x8080808080808080LL;
-#endif
-
-static uint8_t clip_table[3*256];
-static uint8_t * const clip_tab= clip_table + 256;
-
-static const int verbose= 0;
-
-static const int attribute_used deringThreshold= 20;
-
-
-
-
-#if ARCH_X86_32 || ARCH_X86_64
-static inline void prefetchnta(void *p)
-{
-        asm volatile(   "prefetchnta (%0)\n\t"
-                : : "r" (p)
-        );
-}
-
-static inline void prefetcht0(void *p)
-{
-        asm volatile(   "prefetcht0 (%0)\n\t"
-                : : "r" (p)
-        );
-}
-
-static inline void prefetcht1(void *p)
-{
-        asm volatile(   "prefetcht1 (%0)\n\t"
-                : : "r" (p)
-        );
-}
-
-static inline void prefetcht2(void *p)
-{
-        asm volatile(   "prefetcht2 (%0)\n\t"
-                : : "r" (p)
-        );
-}
-#endif
-
-// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
-
-/**
- * Check if the given 8x8 Block is mostly "flat"
- */
-static inline int isHorizDC_C(uint8_t src[], stride_t stride, PPContext *c)
-{
-        int numEq= 0;
-        int y;
-	const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
-	const int dcThreshold= dcOffset*2 + 1;
-
-        for(y=0; y<BLOCK_SIZE; y++)
-        {
-		if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
-                src+= stride;
-        }
-	return numEq > c->ppMode.flatnessThreshold;
-}
-
-/**
- * Check if the middle 8x8 Block in the given 8x16 block is flat
- */
-static inline int isVertDC_C(uint8_t src[], stride_t stride, PPContext *c){
-	int numEq= 0;
-	int y;
-	const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
-	const int dcThreshold= dcOffset*2 + 1;
-
-	src+= stride*4; // src points to begin of the 8x8 Block
-	for(y=0; y<BLOCK_SIZE-1; y++)
-	{
-		if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
-		src+= stride;
-	}
-	return numEq > c->ppMode.flatnessThreshold;
-}
-
-static inline int isHorizMinMaxOk_C(uint8_t src[], stride_t stride, int QP)
-{
-	int i;
-#if 1
-	for(i=0; i<2; i++){
-		if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
-		src += stride;
-		if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
-		src += stride;
-		if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
-		src += stride;
-		if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
-		src += stride;
-	}
-#else
-	for(i=0; i<8; i++){
-		if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
-		src += stride;
-	}
-#endif
-	return 1;
-}
-
-static inline int isVertMinMaxOk_C(uint8_t src[], stride_t stride, int QP)
-{
-#if 1
-#if 1
-	int x;
-	src+= stride*4;
-	for(x=0; x<BLOCK_SIZE; x+=4)
-	{
-		if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
-		if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
-		if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
-		if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
-	}
-#else
-	int x;
-	src+= stride*3;
-	for(x=0; x<BLOCK_SIZE; x++)
-	{
-		if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
-	}
-#endif
-	return 1;
-#else
-	int x;
-	src+= stride*4;
-	for(x=0; x<BLOCK_SIZE; x++)
-	{
-		int min=255;
-		int max=0;
-		int y;
-		for(y=0; y<8; y++){
-			int v= src[x + y*stride];
-			if(v>max) max=v;
-			if(v<min) min=v;
-		}
-		if(max-min > 2*QP) return 0;
-	}
-	return 1;
-#endif
-}
-
-static inline int horizClassify_C(uint8_t src[], stride_t stride, PPContext *c){
-	if( isHorizDC_C(src, stride, c) ){
-		if( isHorizMinMaxOk_C(src, stride, c->QP) )
-			return 1;
-		else
-			return 0;
-	}else{
-		return 2;
-	}
-}
-
-static inline int vertClassify_C(uint8_t src[], stride_t stride, PPContext *c){
-	if( isVertDC_C(src, stride, c) ){
-		if( isVertMinMaxOk_C(src, stride, c->QP) )
-        return 1;
-		else
-			return 0;
-	}else{
-		return 2;
-	}
-}
-
-static inline void doHorizDefFilter_C(uint8_t dst[], stride_t stride, PPContext *c)
-{
-        int y;
-        for(y=0; y<BLOCK_SIZE; y++)
-        {
-		const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
-
-		if(ABS(middleEnergy) < 8*c->QP)
-                {
-                        const int q=(dst[3] - dst[4])/2;
-                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
-                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
-
-                        int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
-                        d= MAX(d, 0);
-
-                        d= (5*d + 32) >> 6;
-                        d*= SIGN(-middleEnergy);
-
-                        if(q>0)
-                        {
-                                d= d<0 ? 0 : d;
-                                d= d>q ? q : d;
-                        }
-                        else
-                        {
-                                d= d>0 ? 0 : d;
-                                d= d<q ? q : d;
-                        }
-
-                        dst[3]-= d;
-                        dst[4]+= d;
-                }
-                dst+= stride;
-        }
-}
-
-/**
- * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
- * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
- */
-static inline void doHorizLowPass_C(uint8_t dst[], stride_t stride, PPContext *c)
-{
-        int y;
-        for(y=0; y<BLOCK_SIZE; y++)
-        {
-		const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
-		const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
-
-		int sums[10];
-		sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
-		sums[1] = sums[0] - first  + dst[3];
-		sums[2] = sums[1] - first  + dst[4];
-		sums[3] = sums[2] - first  + dst[5];
-		sums[4] = sums[3] - first  + dst[6];
-		sums[5] = sums[4] - dst[0] + dst[7];
-		sums[6] = sums[5] - dst[1] + last;
-		sums[7] = sums[6] - dst[2] + last;
-		sums[8] = sums[7] - dst[3] + last;
-		sums[9] = sums[8] - dst[4] + last;
-
-		dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
-		dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
-		dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
-		dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
-		dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
-		dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
-		dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
-		dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
-
-                dst+= stride;
-        }
-}
-
-/**
- * Experimental Filter 1 (Horizontal)
- * will not damage linear gradients
- * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
- * can only smooth blocks at the expected locations (it cant smooth them if they did move)
- * MMX2 version does correct clipping C version doesnt
- * not identical with the vertical one
- */
-static inline void horizX1Filter(uint8_t *src, stride_t stride, int QP)
-{
-        int y;
-        static uint64_t *lut= NULL;
-        if(lut==NULL)
-        {
-                int i;
-                lut= (uint64_t*)memalign(8, 256*8);
-                for(i=0; i<256; i++)
-                {
-                        int v= i < 128 ? 2*i : 2*(i-256);
-/*
-//Simulate 112242211 9-Tap filter
-                        uint64_t a= (v/16) & 0xFF;
-                        uint64_t b= (v/8) & 0xFF;
-                        uint64_t c= (v/4) & 0xFF;
-                        uint64_t d= (3*v/8) & 0xFF;
-*/
-//Simulate piecewise linear interpolation
-                        uint64_t a= (v/16) & 0xFF;
-                        uint64_t b= (v*3/16) & 0xFF;
-                        uint64_t c= (v*5/16) & 0xFF;
-                        uint64_t d= (7*v/16) & 0xFF;
-                        uint64_t A= (0x100 - a)&0xFF;
-                        uint64_t B= (0x100 - b)&0xFF;
-                        uint64_t C= (0x100 - c)&0xFF;
-                        uint64_t D= (0x100 - c)&0xFF;
-
-                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
-                                (D<<24) | (C<<16) | (B<<8) | (A);
-                        //lut[i] = (v<<32) | (v<<24);
-                }
-        }
-
-        for(y=0; y<BLOCK_SIZE; y++)
-        {
-                int a= src[1] - src[2];
-                int b= src[3] - src[4];
-                int c= src[5] - src[6];
-
-                int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
-
-                if(d < QP)
-                {
-                        int v = d * SIGN(-b);
-
-                        src[1] +=v/8;
-                        src[2] +=v/4;
-                        src[3] +=3*v/8;
-                        src[4] -=3*v/8;
-                        src[5] -=v/4;
-                        src[6] -=v/8;
-
-                }
-                src+=stride;
-        }
-}
-
-/**
- * accurate deblock filter
- */
-static av_always_inline void do_a_deblock_C(uint8_t *src, stride_t step, stride_t stride, PPContext *c){
-	int y;
-	const int QP= c->QP;
-	const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
-	const int dcThreshold= dcOffset*2 + 1;
-//START_TIMER
-	src+= step*4; // src points to begin of the 8x8 Block
-	for(y=0; y<8; y++){
-		int numEq= 0;
-
-		if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
-		if(numEq > c->ppMode.flatnessThreshold){
-			int min, max, x;
-
-			if(src[0] > src[step]){
-			    max= src[0];
-			    min= src[step];
-			}else{
-			    max= src[step];
-			    min= src[0];
-			}
-			for(x=2; x<8; x+=2){
-				if(src[x*step] > src[(x+1)*step]){
-					if(src[x    *step] > max) max= src[ x   *step];
-					if(src[(x+1)*step] < min) min= src[(x+1)*step];
-				}else{
-					if(src[(x+1)*step] > max) max= src[(x+1)*step];
-					if(src[ x   *step] < min) min= src[ x   *step];
-				}
-			}
-			if(max-min < 2*QP){
-				const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
-				const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
-
-				int sums[10];
-				sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
-				sums[1] = sums[0] - first       + src[3*step];
-				sums[2] = sums[1] - first       + src[4*step];
-				sums[3] = sums[2] - first       + src[5*step];
-				sums[4] = sums[3] - first       + src[6*step];
-				sums[5] = sums[4] - src[0*step] + src[7*step];
-				sums[6] = sums[5] - src[1*step] + last;
-				sums[7] = sums[6] - src[2*step] + last;
-				sums[8] = sums[7] - src[3*step] + last;
-				sums[9] = sums[8] - src[4*step] + last;
-
-				src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
-				src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
-				src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
-				src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
-				src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
-				src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
-				src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
-				src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
-			}
-		}else{
-			const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
-
-			if(ABS(middleEnergy) < 8*QP)
-			{
-				const int q=(src[3*step] - src[4*step])/2;
-				const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
-				const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
-
-				int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
-				d= MAX(d, 0);
-
-				d= (5*d + 32) >> 6;
-				d*= SIGN(-middleEnergy);
-
-				if(q>0)
-				{
-					d= d<0 ? 0 : d;
-					d= d>q ? q : d;
-				}
-				else
-				{
-					d= d>0 ? 0 : d;
-					d= d<q ? q : d;
-				}
-
-				src[3*step]-= d;
-				src[4*step]+= d;
-			}
-		}
-
-		src += stride;
-	}
-/*if(step==16){
-    STOP_TIMER("step16")
-}else{
-    STOP_TIMER("stepX")
-}*/
-}
-
-//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
-//Plain C versions
-#if !HAVE_MMX || defined (RUNTIME_CPUDETECT)
-#define COMPILE_C
-#endif
-
-#ifdef ARCH_POWERPC
-#ifdef HAVE_ALTIVEC
-#define COMPILE_ALTIVEC
-#endif //HAVE_ALTIVEC
-#endif //ARCH_POWERPC
-
-#if ARCH_X86_32 || ARCH_X86_64
-
-#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
-#define COMPILE_MMX
-#endif
-
-#if HAVE_MMX2 || defined (RUNTIME_CPUDETECT)
-#define COMPILE_MMX2
-#endif
-
-#if (HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
-#define COMPILE_3DNOW
-#endif
-#endif //ARCH_X86
-
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#undef HAVE_ALTIVEC
-
-#ifdef COMPILE_C
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#define RENAME(a) a ## _C
-#include "postprocess_template.c"
-#endif
-
-#ifdef ARCH_POWERPC
-#ifdef COMPILE_ALTIVEC
-#undef RENAME
-#define HAVE_ALTIVEC
-#define RENAME(a) a ## _altivec
-#include "postprocess_altivec_template.c"
-#include "postprocess_template.c"
-#endif
-#endif //ARCH_POWERPC
-
-//MMX versions
-#ifdef COMPILE_MMX
-#undef RENAME
-#define HAVE_MMX 1
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#define RENAME(a) a ## _MMX
-#include "postprocess_template.c"
-#endif
-
-//MMX2 versions
-#ifdef COMPILE_MMX2
-#undef RENAME
-#define HAVE_MMX 1
-#define HAVE_MMX2 1
-#undef HAVE_AMD3DNOW
-#define RENAME(a) a ## _MMX2
-#include "postprocess_template.c"
-#endif
-
-//3DNOW versions
-#ifdef COMPILE_3DNOW
-#undef RENAME
-#define HAVE_MMX 1
-#undef HAVE_MMX2
-#define HAVE_AMD3DNOW 1
-#define RENAME(a) a ## _3DNow
-#include "postprocess_template.c"
-#endif
-
-// minor note: the HAVE_xyz is messed up after that line so dont use it
-
-static inline void postProcess(uint8_t src[], stride_t srcStride, uint8_t dst[], stride_t dstStride, int width, int height,
-	QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
-{
-	PPContext *c= (PPContext *)vc;
-	PPMode *ppMode= (PPMode *)vm;
-	c->ppMode= *ppMode; //FIXME
-
-        // useing ifs here as they are faster than function pointers allthough the
-        // difference wouldnt be messureable here but its much better because
-        // someone might exchange the cpu whithout restarting mplayer ;)
-#ifdef RUNTIME_CPUDETECT
-#if ARCH_X86_32 || ARCH_X86_64
-        // ordered per speed fasterst first
-	if(c->cpuCaps & PP_CPU_CAPS_MMX2)
-		postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-	else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
-		postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-	else if(c->cpuCaps & PP_CPU_CAPS_MMX)
-		postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-        else
-		postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-#else
-#ifdef ARCH_POWERPC
-#ifdef HAVE_ALTIVEC
-        if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
-		postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-        else
-#endif
-#endif
-		postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-#endif
-#else //RUNTIME_CPUDETECT
-#if HAVE_MMX2
-		postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-#elif HAVE_AMD3DNOW
-		postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-#elif HAVE_MMX
-		postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-#elif defined (HAVE_ALTIVEC)
-		postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-#else
-		postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-#endif
-#endif //!RUNTIME_CPUDETECT
-}
-
-static void reallocAlign(void **p, int alignment, stride_t size){
-	av_free(*p);
-	*p= memalign(alignment, size);
-	memset(*p, 0, size);
-}
-
-static void reallocBuffers(PPContext *c, int width, int height, stride_t stride, int qpStride){
-	int mbWidth = (width+15)>>4;
-	int mbHeight= (height+15)>>4;
-	int i;
-
-	c->stride= stride;
-	c->qpStride= qpStride;
-
-	reallocAlign((void **)&c->tempDst, 8, stride*24*2);
-	reallocAlign((void **)&c->tempSrc, 8, stride*24*2);
-	reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
-	reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
-	for(i=0; i<256; i++)
-		c->yHistogram[i]= width*height/64*15/256;
-
-	for(i=0; i<3; i++)
-	{
-		//Note:the +17*1024 is just there so i dont have to worry about r/w over te end
-		reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024+256);
-		reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024+256);//FIXME size
-	}
-
-	reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
-	reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
-	reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
-	reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
-}
-
-static void global_init(void){
-	int i;
-	memset(clip_table, 0, 256);
-	for(i=256; i<512; i++)
-		clip_table[i]= i;
-	memset(clip_table+512, 0, 256);
-}
-
-pp_context_t *pp_get_context(int width, int height, int cpuCaps){
-	PPContext *c= memalign(32, sizeof(PPContext));
-	int stride= (width+15)&(~15); //assumed / will realloc if needed
-	int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
-
-	global_init();
-
-	memset(c, 0, sizeof(PPContext));
-	c->cpuCaps= cpuCaps;
-	if(cpuCaps&PP_FORMAT){
-		c->hChromaSubSample= cpuCaps&0x3;
-		c->vChromaSubSample= (cpuCaps>>4)&0x3;
-	}else{
-		c->hChromaSubSample= 1;
-		c->vChromaSubSample= 1;
-	}
-
-	reallocBuffers(c, width, height, stride, qpStride);
-
-	c->frameNum=-1;
-
-	return c;
-}
-
-void pp_free_context(void *vc){
-	PPContext *c = (PPContext*)vc;
-	int i;
-
-	for(i=0; i<3; i++) av_free(c->tempBlured[i]);
-	for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
-
-	av_free(c->tempBlocks);
-	av_free(c->yHistogram);
-	av_free(c->tempDst);
-	av_free(c->tempSrc);
-	av_free(c->deintTemp);
-	av_free(c->stdQPTable);
-	av_free(c->nonBQPTable);
-	av_free(c->forcedQPTable);
-
-	memset(c, 0, sizeof(PPContext));
-
-	av_free(c);
-}
-
-void  pp_postprocess(uint8_t * src[3], stride_t srcStride[3],
-                 uint8_t * dst[3], stride_t dstStride[3],
-                 int width, int height,
-                 QP_STORE_T *QP_store,  int QPStride,
-		 pp_mode_t *vm,  void *vc, int pict_type)
-{
-	int mbWidth = (width+15)>>4;
-	int mbHeight= (height+15)>>4;
-	PPMode *mode = (PPMode*)vm;
-	PPContext *c = (PPContext*)vc;
-        stride_t minStride= MAX(srcStride[0], dstStride[0]);
-
-	if(c->stride < minStride || c->qpStride < QPStride)
-		reallocBuffers(c, width, height,
-				MAX(minStride, c->stride),
-				MAX(c->qpStride, QPStride));
-
-        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
-        {
-                int i;
-		QP_store= c->forcedQPTable;
-		QPStride= 0;
-                if(mode->lumMode & FORCE_QUANT)
-			for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
-                else
-			for(i=0; i<mbWidth; i++) QP_store[i]= 1;
-        }
-//printf("pict_type:%d\n", pict_type);
-
-	if(pict_type & PP_PICT_TYPE_QP2){
-		int i;
-		const int count= mbHeight * QPStride;
-		for(i=0; i<(count>>2); i++){
-			((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
-		}
-		for(i<<=2; i<count; i++){
-			c->stdQPTable[i] = QP_store[i]>>1;
-		}
-                QP_store= c->stdQPTable;
-	}
-
-if(0){
-int x,y;
-for(y=0; y<mbHeight; y++){
-	for(x=0; x<mbWidth; x++){
-		;//printf("%2d ", QP_store[x + y*QPStride]);
-	}
-	//printf("\n");
-}
-	//printf("\n");
-}
-
-	if((pict_type&7)!=3)
-        {
-		int i;
-		const int count= mbHeight * QPStride;
-		for(i=0; i<(count>>2); i++){
-			((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
-			}
-		for(i<<=2; i<count; i++){
-			c->nonBQPTable[i] = QP_store[i] & 0x3F;
-		}
-		}
-
-	if(verbose>2)
-	{
-		//printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
-        }
-
-	postProcess(src[0], srcStride[0], dst[0], dstStride[0],
-		width, height, QP_store, QPStride, 0, mode, c);
-
-	width  = (width )>>c->hChromaSubSample;
-	height = (height)>>c->vChromaSubSample;
-
-        if(mode->chromMode)
-        {
-		postProcess(src[1], srcStride[1], dst[1], dstStride[1],
-			width, height, QP_store, QPStride, 1, mode, c);
-		postProcess(src[2], srcStride[2], dst[2], dstStride[2],
-			width, height, QP_store, QPStride, 2, mode, c);
-        }/*
-	else if(srcStride[1] == dstStride[1] && srcStride[1]>0 && srcStride[2] == dstStride[2] && srcStride[2]>0)
-        {
-		memcpy(dst[1], src[1], srcStride[1]*height);
-		memcpy(dst[2], src[2], srcStride[2]*height);
-        }*/
-        else
-        {
-                int y;
-		for(y=0; y<height; y++)
-                {
-			memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
-			memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
-                }
-        }
-}
-
-
-
-
-
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/postprocess.h b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/postprocess.h
deleted file mode 100644
index 277522cf1..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/postprocess.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
-    Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
-
-#ifndef NEWPOSTPROCESS_H
-#define NEWPOSTPROCESS_H
-
-/**
- * @file postprocess.h
- * @brief
- *     external api for the pp stuff
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define PP_QUALITY_MAX 6
-
-#define QP_STORE_T int8_t
-
-#include "postprocFilters.h"
-#define FF_CSP_ONLY
-#include "ffImgfmt.h"
-
-/**
- * Postprocessng mode.
- */
-typedef struct PPMode{
-	int lumMode; 			///< acivates filters for luminance
-	int chromMode; 			///< acivates filters for chrominance
-	int error; 			///< non zero on error
-
-	int minAllowedY; 		///< for brigtness correction
-	int maxAllowedY; 		///< for brihtness correction
-        float maxClippedThreshold;      ///< amount of "black" u r willing to loose to get a brightness corrected picture
-
-	int maxTmpNoise[3]; 		///< for Temporal Noise Reducing filter (Maximal sum of abs differences)
-
- int baseDcDiff;
- int flatnessThreshold;
-
-	int forcedQuant; 		///< quantizer if FORCE_QUANT is used
-} PPMode;
-
-typedef void pp_context_t;
-typedef PPMode pp_mode_t;
-
-void  pp_postprocess(uint8_t * src[3], stride_t srcStride[3],
-                     uint8_t * dst[3], stride_t dstStride[3],
-                     int horizontalSize, int verticalSize,
-                     QP_STORE_T *QP_store,  int QP_stride,
-                     pp_mode_t *mode, pp_context_t *ppContext, int pict_type);
-
-
-pp_context_t *pp_get_context(int width, int height, int flags);
-void pp_free_context(pp_context_t *ppContext);
-
-#define PP_CPU_CAPS_MMX   0x80000000
-#define PP_CPU_CAPS_MMX2  0x20000000
-#define PP_CPU_CAPS_3DNOW 0x40000000
-#define PP_CPU_CAPS_ALTIVEC 0x10000000
-
-#define PP_FORMAT         0x00000008
-#define PP_FORMAT_420    (0x00000011|PP_FORMAT)
-#define PP_FORMAT_422    (0x00000001|PP_FORMAT)
-#define PP_FORMAT_411    (0x00000002|PP_FORMAT)
-#define PP_FORMAT_444    (0x00000000|PP_FORMAT)
-#define PP_FORMAT_410    (0x00000022|PP_FORMAT)
-
-#define PP_PICT_TYPE_QP2  0x00000010 ///< MPEG2 style QScale
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/postprocess_internal.h b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/postprocess_internal.h
deleted file mode 100644
index 625e35855..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/postprocess_internal.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
-    Copyright (C) 2001-2002 Michael Niedermayer (michaelni@gmx.at)
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
-
-#include "postprocFilters.h"
-
-//use if u want a faster postprocessing code
-//cant differentiate between chroma & luma filters (both on or both off)
-//obviosly the -pp option at the commandline has no effect except turning the here selected
-//filters on
-//#define COMPILE_TIME_MODE 0x77
-
-#if 1
-static inline int CLIP(int a){
-	if(a&256) return ((a)>>31)^(-1);
-	else      return a;
-}
-//#define CLIP(a) (((a)&256) ? ((a)>>31)^(-1) : (a))
-#elif 0
-#define CLIP(a) clip_tab[a]
-#else
-#define CLIP(a) (a)
-#endif
-/**
- * Postprocessng filter.
- */
-struct PPFilter{
-	char *shortName;
-	char *longName;
-	int chromDefault; 	///< is chrominance filtering on by default if this filter is manually activated
-	int minLumQuality; 	///< minimum quality to turn luminance filtering on
-	int minChromQuality;	///< minimum quality to turn chrominance filtering on
-	int mask; 		///< Bitmask to turn this filter on
-};
-
-/**
- * postprocess context.
- */
-typedef struct PPContext{
-	uint8_t *tempBlocks; ///<used for the horizontal code
-
-	/**
-	 * luma histogram.
-	 * we need 64bit here otherwise we'll going to have a problem
-	 * after watching a black picture for 5 hours
-	 */
-	uint64_t *yHistogram;
-
-	uint64_t __attribute__((aligned(8))) packedYOffset;
-	uint64_t __attribute__((aligned(8))) packedYScale;
-
-	/** Temporal noise reducing buffers */
-	uint8_t *tempBlured[3];
-	int32_t *tempBluredPast[3];
-
-	/** Temporary buffers for handling the last row(s) */
-	uint8_t *tempDst;
-	uint8_t *tempSrc;
-
-	uint8_t *deintTemp;
-
-	uint64_t __attribute__((aligned(8))) pQPb;
-	uint64_t __attribute__((aligned(8))) pQPb2;
-
-	uint64_t __attribute__((aligned(8))) mmxDcOffset[64];
-	uint64_t __attribute__((aligned(8))) mmxDcThreshold[64];
-
-	QP_STORE_T *stdQPTable;       ///< used to fix MPEG2 style qscale
-	QP_STORE_T *nonBQPTable;
-	QP_STORE_T *forcedQPTable;
-
-	int QP;
-	int nonBQP;
-
-	int frameNum;
-
-	int cpuCaps;
-
-	int qpStride; ///<size of qp buffers (needed to realloc them if needed)
-	stride_t stride; ///<size of some buffers (needed to realloc them if needed)
-
-	int hChromaSubSample;
-	int vChromaSubSample;
-
-	PPMode ppMode;
-} PPContext;
-
-
-
-
-
-
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/postprocess_template.c b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/postprocess_template.c
deleted file mode 100644
index 1a5a5e0e7..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/postprocess_template.c
+++ /dev/null
@@ -1,3844 +0,0 @@
-/*
-    Copyright (C) 2001-2002 Michael Niedermayer (michaelni@gmx.at)
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
-
-/**
- * @file postprocess_template.c
- * mmx/mmx2/3dnow postprocess code.
- */
-
-
-#if ARCH_X86_64
-#  define REGa  rax
-#  define REGc  rcx
-#  define REGd  rdx
-#  define REG_a  "rax"
-#  define REG_c  "rcx"
-#  define REG_d  "rdx"
-#  define REG_SP "rsp"
-#  define ALIGN_MASK "$0xFFFFFFFFFFFFFFF8"
-#else
-#  define REGa  eax
-#  define REGc  ecx
-#  define REGd  edx
-#  define REG_a  "eax"
-#  define REG_c  "ecx"
-#  define REG_d  "edx"
-#  define REG_SP "esp"
-#  define ALIGN_MASK "$0xFFFFFFF8"
-#endif
-
-
-#undef PAVGB
-#undef PMINUB
-#undef PMAXUB
-
-#if HAVE_MMX2
-#define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
-#elif HAVE_AMD3DNOW
-#define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
-#endif
-#define PAVGB(a,b)  REAL_PAVGB(a,b)
-
-#if HAVE_MMX2
-#define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
-#elif HAVE_MMX
-#define PMINUB(b,a,t) \
- 	"movq " #a ", " #t " \n\t"\
-	"psubusb " #b ", " #t " \n\t"\
-	"psubb " #t ", " #a " \n\t"
-#endif
-
-#if HAVE_MMX2
-#define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
-#elif HAVE_MMX
-#define PMAXUB(a,b) \
-	"psubusb " #a ", " #b " \n\t"\
-	"paddb " #a ", " #b " \n\t"
-#endif
-
-//FIXME? |255-0| = 1 (shouldnt be a problem ...)
-#if HAVE_MMX
-/**
- * Check if the middle 8x8 Block in the given 8x16 block is flat
- */
-static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
-	int numEq= 0, dcOk;
-	src+= stride*4; // src points to begin of the 8x8 Block
-asm volatile(
-		"movq %0, %%mm7					\n\t"
-		"movq %1, %%mm6					\n\t"
-                : : "m" (c->mmxDcOffset[c->nonBQP]),  "m" (c->mmxDcThreshold[c->nonBQP])
-                );
-
-asm volatile(
-		"lea (%2, %3), %%"REG_a"			\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%1	eax	eax+%2	eax+2%2	%1+4%2	ecx	ecx+%2	ecx+2%2	%1+8%2	ecx+4%2
-
-		"movq (%2), %%mm0				\n\t"
-		"movq (%%"REG_a"), %%mm1			\n\t"
-                "movq %%mm0, %%mm3				\n\t"
-                "movq %%mm0, %%mm4				\n\t"
-                PMAXUB(%%mm1, %%mm4)
-                PMINUB(%%mm1, %%mm3, %%mm5)
-		"psubb %%mm1, %%mm0				\n\t" // mm0 = differnece
-		"paddb %%mm7, %%mm0				\n\t"
-		"pcmpgtb %%mm6, %%mm0				\n\t"
-
-		"movq (%%"REG_a",%3), %%mm2			\n\t"
-                PMAXUB(%%mm2, %%mm4)
-                PMINUB(%%mm2, %%mm3, %%mm5)
-		"psubb %%mm2, %%mm1				\n\t"
-		"paddb %%mm7, %%mm1				\n\t"
-		"pcmpgtb %%mm6, %%mm1				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
-
-		"movq (%%"REG_a", %3, 2), %%mm1			\n\t"
-                PMAXUB(%%mm1, %%mm4)
-                PMINUB(%%mm1, %%mm3, %%mm5)
-		"psubb %%mm1, %%mm2				\n\t"
-		"paddb %%mm7, %%mm2				\n\t"
-		"pcmpgtb %%mm6, %%mm2				\n\t"
-		"paddb %%mm2, %%mm0				\n\t"
-
-		"lea (%%"REG_a", %3, 4), %%"REG_a"		\n\t"
-
-		"movq (%2, %3, 4), %%mm2			\n\t"
-                PMAXUB(%%mm2, %%mm4)
-                PMINUB(%%mm2, %%mm3, %%mm5)
-		"psubb %%mm2, %%mm1				\n\t"
-		"paddb %%mm7, %%mm1				\n\t"
-		"pcmpgtb %%mm6, %%mm1				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
-
-		"movq (%%"REG_a"), %%mm1			\n\t"
-                PMAXUB(%%mm1, %%mm4)
-                PMINUB(%%mm1, %%mm3, %%mm5)
-		"psubb %%mm1, %%mm2				\n\t"
-		"paddb %%mm7, %%mm2				\n\t"
-		"pcmpgtb %%mm6, %%mm2				\n\t"
-		"paddb %%mm2, %%mm0				\n\t"
-
-		"movq (%%"REG_a", %3), %%mm2			\n\t"
-                PMAXUB(%%mm2, %%mm4)
-                PMINUB(%%mm2, %%mm3, %%mm5)
-		"psubb %%mm2, %%mm1				\n\t"
-		"paddb %%mm7, %%mm1				\n\t"
-		"pcmpgtb %%mm6, %%mm1				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
-
-		"movq (%%"REG_a", %3, 2), %%mm1			\n\t"
-                PMAXUB(%%mm1, %%mm4)
-                PMINUB(%%mm1, %%mm3, %%mm5)
-		"psubb %%mm1, %%mm2				\n\t"
-		"paddb %%mm7, %%mm2				\n\t"
-		"pcmpgtb %%mm6, %%mm2				\n\t"
-		"paddb %%mm2, %%mm0				\n\t"
-		"psubusb %%mm3, %%mm4				\n\t"
-
-		"						\n\t"
-#if HAVE_MMX2
-		"pxor %%mm7, %%mm7				\n\t"
-		"psadbw %%mm7, %%mm0				\n\t"
-#else
-		"movq %%mm0, %%mm1				\n\t"
-		"psrlw $8, %%mm0				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
-		"movq %%mm0, %%mm1				\n\t"
-		"psrlq $16, %%mm0				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
-		"movq %%mm0, %%mm1				\n\t"
-		"psrlq $32, %%mm0				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
-#endif
-                "movq %4, %%mm7					\n\t" // QP,..., QP
-		"paddusb %%mm7, %%mm7				\n\t" // 2QP ... 2QP
-		"psubusb %%mm7, %%mm4				\n\t" // Diff <= 2QP -> 0
-		"packssdw %%mm4, %%mm4				\n\t"
-		"movd %%mm0, %0					\n\t"
-		"movd %%mm4, %1					\n\t"
-
-		: "=r" (numEq), "=r" (dcOk)
-		: "r" (src), "r" ((stride_t)stride), "m" (c->pQPb)
-		: "%"REG_a
-		);
-
-	numEq= (-numEq) &0xFF;
-	if(numEq > c->ppMode.flatnessThreshold){
-            if(dcOk) return 0;
-            else     return 1;
-        }else{
-            return 2;
-		}
-	}
-#endif
-
-/**
- * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle)
- * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16
- */
-#ifndef HAVE_ALTIVEC
-static inline void RENAME(doVertLowPass)(uint8_t *src, stride_t stride, PPContext *c)
-{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
-	src+= stride*3;
-	asm volatile(	//"movv %0 %1 %2\n\t"
-		"movq %2, %%mm0			\n\t"  // QP,..., QP
-		"pxor %%mm4, %%mm4				\n\t"
-
-		"movq (%0), %%mm6				\n\t"
-		"movq (%0, %1), %%mm5				\n\t"
-		"movq %%mm5, %%mm1				\n\t"
-		"movq %%mm6, %%mm2				\n\t"
-		"psubusb %%mm6, %%mm5				\n\t"
-		"psubusb %%mm1, %%mm2				\n\t"
-		"por %%mm5, %%mm2				\n\t" // ABS Diff of lines
-		"psubusb %%mm0, %%mm2				\n\t" // diff <= QP -> 0
-		"pcmpeqb %%mm4, %%mm2			\n\t" // diff <= QP -> FF
-
-		"pand %%mm2, %%mm6				\n\t"
-		"pandn %%mm1, %%mm2				\n\t"
-		"por %%mm2, %%mm6				\n\t"// First Line to Filter
-
-		"movq (%0, %1, 8), %%mm5			\n\t"
-		"lea (%0, %1, 4), %%"REG_a"			\n\t"
-		"lea (%0, %1, 8), %%"REG_c"			\n\t"
-		"sub %1, %%"REG_c"				\n\t"
-		"add %1, %0					\n\t" // %0 points to line 1 not 0
-		"movq (%0, %1, 8), %%mm7			\n\t"
-		"movq %%mm5, %%mm1				\n\t"
-		"movq %%mm7, %%mm2				\n\t"
-		"psubusb %%mm7, %%mm5				\n\t"
-		"psubusb %%mm1, %%mm2				\n\t"
-		"por %%mm5, %%mm2				\n\t" // ABS Diff of lines
-		"psubusb %%mm0, %%mm2				\n\t" // diff <= QP -> 0
-		"pcmpeqb %%mm4, %%mm2			\n\t" // diff <= QP -> FF
-
-		"pand %%mm2, %%mm7				\n\t"
-		"pandn %%mm1, %%mm2				\n\t"
-		"por %%mm2, %%mm7				\n\t" // First Line to Filter
-
-
-		// 	1	2	3	4	5	6	7	8
-		//	%0	%0+%1	%0+2%1	eax	%0+4%1	eax+2%1	ecx	eax+4%1
-		// 6 4 2 2 1 1
-		// 6 4 4 2
-		// 6 8 2
-
-		"movq (%0, %1), %%mm0				\n\t" //  1
-		"movq %%mm0, %%mm1				\n\t" //  1
-		PAVGB(%%mm6, %%mm0)				      //1 1	/2
-		PAVGB(%%mm6, %%mm0)				      //3 1	/4
-
-		"movq (%0, %1, 4), %%mm2			\n\t" //     1
-		"movq %%mm2, %%mm5				\n\t" //     1
-		PAVGB((%%REGa), %%mm2)				      //    11	/2
-		PAVGB((%0, %1, 2), %%mm2)			      //   211	/4
-		"movq %%mm2, %%mm3				\n\t" //   211	/4
-		"movq (%0), %%mm4				\n\t" // 1
-		PAVGB(%%mm4, %%mm3)				      // 4 211	/8
-		PAVGB(%%mm0, %%mm3)				      //642211	/16
-		"movq %%mm3, (%0)				\n\t" // X
-		// mm1=2 mm2=3(211) mm4=1 mm5=5 mm6=0 mm7=9
-		"movq %%mm1, %%mm0				\n\t" //  1
-		PAVGB(%%mm6, %%mm0)				      //1 1	/2
-		"movq %%mm4, %%mm3				\n\t" // 1
-		PAVGB((%0,%1,2), %%mm3)				      // 1 1	/2
-		PAVGB((%%REGa,%1,2), %%mm5)			      //     11	/2
-		PAVGB((%%REGa), %%mm5)				      //    211 /4
-		PAVGB(%%mm5, %%mm3)				      // 2 2211 /8
-		PAVGB(%%mm0, %%mm3)				      //4242211 /16
-		"movq %%mm3, (%0,%1)				\n\t" //  X
-		// mm1=2 mm2=3(211) mm4=1 mm5=4(211) mm6=0 mm7=9
-		PAVGB(%%mm4, %%mm6)				      //11	/2
-		"movq (%%"REG_c"), %%mm0			\n\t" //       1
-		PAVGB((%%REGa, %1, 2), %%mm0)			      //      11/2
-		"movq %%mm0, %%mm3				\n\t" //      11/2
-		PAVGB(%%mm1, %%mm0)				      //  2   11/4
-		PAVGB(%%mm6, %%mm0)				      //222   11/8
-		PAVGB(%%mm2, %%mm0)				      //22242211/16
-		"movq (%0, %1, 2), %%mm2			\n\t" //   1
-		"movq %%mm0, (%0, %1, 2)			\n\t" //   X
-		// mm1=2 mm2=3 mm3=6(11) mm4=1 mm5=4(211) mm6=0(11) mm7=9
-		"movq (%%"REG_a", %1, 4), %%mm0			\n\t" //        1
-		PAVGB((%%REGc), %%mm0)				      //       11	/2
-		PAVGB(%%mm0, %%mm6)				      //11     11	/4
-		PAVGB(%%mm1, %%mm4)				      // 11		/2
-		PAVGB(%%mm2, %%mm1)				      //  11		/2
-		PAVGB(%%mm1, %%mm6)				      //1122   11	/8
-		PAVGB(%%mm5, %%mm6)				      //112242211	/16
-		"movq (%%"REG_a"), %%mm5			\n\t" //    1
-		"movq %%mm6, (%%"REG_a")			\n\t" //    X
-		// mm0=7(11) mm1=2(11) mm2=3 mm3=6(11) mm4=1(11) mm5=4 mm7=9
-		"movq (%%"REG_a", %1, 4), %%mm6			\n\t" //        1
-		PAVGB(%%mm7, %%mm6)				      //        11	/2
-		PAVGB(%%mm4, %%mm6)				      // 11     11	/4
-		PAVGB(%%mm3, %%mm6)				      // 11   2211	/8
-		PAVGB(%%mm5, %%mm2)				      //   11		/2
-		"movq (%0, %1, 4), %%mm4			\n\t" //     1
-		PAVGB(%%mm4, %%mm2)				      //   112		/4
-		PAVGB(%%mm2, %%mm6)				      // 112242211	/16
-		"movq %%mm6, (%0, %1, 4)			\n\t" //     X
-		// mm0=7(11) mm1=2(11) mm2=3(112) mm3=6(11) mm4=5 mm5=4 mm7=9
-		PAVGB(%%mm7, %%mm1)				      //  11     2	/4
-		PAVGB(%%mm4, %%mm5)				      //    11		/2
-		PAVGB(%%mm5, %%mm0)				      //    11 11	/4
-		"movq (%%"REG_a", %1, 2), %%mm6			\n\t" //      1
-		PAVGB(%%mm6, %%mm1)				      //  11  4  2	/8
-		PAVGB(%%mm0, %%mm1)				      //  11224222	/16
-		"movq %%mm1, (%%"REG_a", %1, 2)			\n\t" //      X
-		// mm2=3(112) mm3=6(11) mm4=5 mm5=4(11) mm6=6 mm7=9
-		PAVGB((%%REGc), %%mm2)				      //   112 4	/8
-		"movq (%%"REG_a", %1, 4), %%mm0			\n\t" //        1
-		PAVGB(%%mm0, %%mm6)				      //      1 1	/2
-		PAVGB(%%mm7, %%mm6)				      //      1 12	/4
-		PAVGB(%%mm2, %%mm6)				      //   1122424	/4
-		"movq %%mm6, (%%"REG_c")			\n\t" //       X
-		// mm0=8 mm3=6(11) mm4=5 mm5=4(11) mm7=9
-		PAVGB(%%mm7, %%mm5)				      //    11   2	/4
-		PAVGB(%%mm7, %%mm5)				      //    11   6	/8
-
-		PAVGB(%%mm3, %%mm0)				      //      112	/4
-		PAVGB(%%mm0, %%mm5)				      //    112246	/16
-		"movq %%mm5, (%%"REG_a", %1, 4)			\n\t" //        X
-		"sub %1, %0					\n\t"
-
-		:
-		: "r" (src), "r" ((stride_t)stride), "m" (c->pQPb)
-		: "%"REG_a, "%"REG_c
-	);
-#else
-	const stride_t l1= stride;
-	const stride_t l2= stride + l1;
-	const stride_t l3= stride + l2;
-	const stride_t l4= stride + l3;
-	const stride_t l5= stride + l4;
-	const stride_t l6= stride + l5;
-	const stride_t l7= stride + l6;
-	const stride_t l8= stride + l7;
-	const stride_t l9= stride + l8;
-	int x;
-	src+= stride*3;
-	for(x=0; x<BLOCK_SIZE; x++)
-	{
-		const int first= ABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1];
-		const int last= ABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8];
-
-		int sums[10];
-		sums[0] = 4*first + src[l1] + src[l2] + src[l3] + 4;
-		sums[1] = sums[0] - first  + src[l4];
-		sums[2] = sums[1] - first  + src[l5];
-		sums[3] = sums[2] - first  + src[l6];
-		sums[4] = sums[3] - first  + src[l7];
-		sums[5] = sums[4] - src[l1] + src[l8];
-		sums[6] = sums[5] - src[l2] + last;
-		sums[7] = sums[6] - src[l3] + last;
-		sums[8] = sums[7] - src[l4] + last;
-		sums[9] = sums[8] - src[l5] + last;
-
-		src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
-		src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
-		src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
-		src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
-		src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
-		src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
-		src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
-		src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
-
-		src++;
-	}
-#endif
-}
-#endif //HAVE_ALTIVEC
-
-#if 0
-/**
- * Experimental implementation of the filter (Algorithm 1) described in a paper from Ramkishor & Karandikar
- * values are correctly clipped (MMX2)
- * values are wraparound (C)
- * conclusion: its fast, but introduces ugly horizontal patterns if there is a continious gradient
-	0 8 16 24
-	x = 8
-	x/2 = 4
-	x/8 = 1
-	1 12 12 23
- */
-static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
-{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
-	src+= stride*3;
-// FIXME rounding
-	asm volatile(
-		"pxor %%mm7, %%mm7				\n\t" // 0
-		"movq "MANGLE(b80)", %%mm6			\n\t" // MIN_SIGNED_BYTE
-		"leal (%0, %1), %%"REG_a"			\n\t"
-		"leal (%%"REG_a", %1, 4), %%"REG_c"		\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	ecx	ecx+%1	ecx+2%1	%0+8%1	ecx+4%1
-		"movq "MANGLE(pQPb)", %%mm0			\n\t" // QP,..., QP
-		"movq %%mm0, %%mm1				\n\t" // QP,..., QP
-		"paddusb "MANGLE(b02)", %%mm0			\n\t"
-		"psrlw $2, %%mm0				\n\t"
-		"pand "MANGLE(b3F)", %%mm0			\n\t" // QP/4,..., QP/4
-		"paddusb %%mm1, %%mm0				\n\t" // QP*1.25 ...
-		"movq (%0, %1, 4), %%mm2			\n\t" // line 4
-		"movq (%%"REG_c"), %%mm3				\n\t" // line 5
-		"movq %%mm2, %%mm4				\n\t" // line 4
-		"pcmpeqb %%mm5, %%mm5				\n\t" // -1
-		"pxor %%mm2, %%mm5				\n\t" // -line 4 - 1
-		PAVGB(%%mm3, %%mm5)
-		"paddb %%mm6, %%mm5				\n\t" // (l5-l4)/2
-		"psubusb %%mm3, %%mm4				\n\t"
-		"psubusb %%mm2, %%mm3				\n\t"
-		"por %%mm3, %%mm4				\n\t" // |l4 - l5|
-		"psubusb %%mm0, %%mm4				\n\t"
-		"pcmpeqb %%mm7, %%mm4				\n\t"
-		"pand %%mm4, %%mm5				\n\t" // d/2
-
-//		"paddb %%mm6, %%mm2				\n\t" // line 4 + 0x80
-		"paddb %%mm5, %%mm2				\n\t"
-//		"psubb %%mm6, %%mm2				\n\t"
-		"movq %%mm2, (%0,%1, 4)				\n\t"
-
-		"movq (%%"REG_c"), %%mm2				\n\t"
-//		"paddb %%mm6, %%mm2				\n\t" // line 5 + 0x80
-		"psubb %%mm5, %%mm2				\n\t"
-//		"psubb %%mm6, %%mm2				\n\t"
-		"movq %%mm2, (%%"REG_c")				\n\t"
-
-		"paddb %%mm6, %%mm5				\n\t"
-		"psrlw $2, %%mm5				\n\t"
-		"pand "MANGLE(b3F)", %%mm5			\n\t"
-		"psubb "MANGLE(b20)", %%mm5			\n\t" // (l5-l4)/8
-
-		"movq (%%"REG_a", %1, 2), %%mm2			\n\t"
-		"paddb %%mm6, %%mm2				\n\t" // line 3 + 0x80
-		"paddsb %%mm5, %%mm2				\n\t"
-		"psubb %%mm6, %%mm2				\n\t"
-		"movq %%mm2, (%%"REG_a", %1, 2)			\n\t"
-
-		"movq (%%"REG_c", %1), %%mm2			\n\t"
-		"paddb %%mm6, %%mm2				\n\t" // line 6 + 0x80
-		"psubsb %%mm5, %%mm2				\n\t"
-		"psubb %%mm6, %%mm2				\n\t"
-		"movq %%mm2, (%%"REG_c", %1)			\n\t"
-
-		:
-		: "r" (src), "r" ((stride_t)stride)
-		: "%"REG_a, "%"REG_c
-	);
-#else
- 	const int l1= stride;
-	const int l2= stride + l1;
-	const int l3= stride + l2;
-	const int l4= stride + l3;
-	const int l5= stride + l4;
-	const int l6= stride + l5;
-//	const int l7= stride + l6;
-//	const int l8= stride + l7;
-//	const int l9= stride + l8;
-	int x;
-	const int QP15= QP + (QP>>2);
-	src+= stride*3;
-	for(x=0; x<BLOCK_SIZE; x++)
-	{
-		const int v = (src[x+l5] - src[x+l4]);
-		if(ABS(v) < QP15)
-		{
-			src[x+l3] +=v>>3;
-			src[x+l4] +=v>>1;
-			src[x+l5] -=v>>1;
-			src[x+l6] -=v>>3;
-
-		}
-	}
-
-#endif
-}
-#endif
-
-/**
- * Experimental Filter 1
- * will not damage linear gradients
- * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
- * can only smooth blocks at the expected locations (it cant smooth them if they did move)
- * MMX2 version does correct clipping C version doesnt
- */
-static inline void RENAME(vertX1Filter)(uint8_t *src, stride_t stride, PPContext *co)
-{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
-	src+= stride*3;
-
-	asm volatile(
-		"pxor %%mm7, %%mm7				\n\t" // 0
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_c"		\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	ecx	ecx+%1	ecx+2%1	%0+8%1	ecx+4%1
-		"movq (%%"REG_a", %1, 2), %%mm0			\n\t" // line 3
-		"movq (%0, %1, 4), %%mm1			\n\t" // line 4
-		"movq %%mm1, %%mm2				\n\t" // line 4
-		"psubusb %%mm0, %%mm1				\n\t"
-		"psubusb %%mm2, %%mm0				\n\t"
-		"por %%mm1, %%mm0				\n\t" // |l2 - l3|
-		"movq (%%"REG_c"), %%mm3				\n\t" // line 5
-		"movq (%%"REG_c", %1), %%mm4			\n\t" // line 6
-		"movq %%mm3, %%mm5				\n\t" // line 5
-		"psubusb %%mm4, %%mm3				\n\t"
-		"psubusb %%mm5, %%mm4				\n\t"
-		"por %%mm4, %%mm3				\n\t" // |l5 - l6|
-		PAVGB(%%mm3, %%mm0)				      // (|l2 - l3| + |l5 - l6|)/2
-		"movq %%mm2, %%mm1				\n\t" // line 4
-		"psubusb %%mm5, %%mm2				\n\t"
-		"movq %%mm2, %%mm4				\n\t"
-		"pcmpeqb %%mm7, %%mm2				\n\t" // (l4 - l5) <= 0 ? -1 : 0
-		"psubusb %%mm1, %%mm5				\n\t"
-		"por %%mm5, %%mm4				\n\t" // |l4 - l5|
-		"psubusb %%mm0, %%mm4		\n\t" //d = MAX(0, |l4-l5| - (|l2-l3| + |l5-l6|)/2)
-		"movq %%mm4, %%mm3				\n\t" // d
-		"movq %2, %%mm0			\n\t"
-                "paddusb %%mm0, %%mm0				\n\t"
-		"psubusb %%mm0, %%mm4				\n\t"
-		"pcmpeqb %%mm7, %%mm4				\n\t" // d <= QP ? -1 : 0
-		"psubusb "MANGLE(b01)", %%mm3			\n\t"
-		"pand %%mm4, %%mm3				\n\t" // d <= QP ? d : 0
-
-		PAVGB(%%mm7, %%mm3)				      // d/2
-		"movq %%mm3, %%mm1				\n\t" // d/2
-		PAVGB(%%mm7, %%mm3)				      // d/4
-		PAVGB(%%mm1, %%mm3)				      // 3*d/8
-
-		"movq (%0, %1, 4), %%mm0			\n\t" // line 4
-		"pxor %%mm2, %%mm0				\n\t" //(l4 - l5) <= 0 ? -l4-1 : l4
-		"psubusb %%mm3, %%mm0				\n\t"
-		"pxor %%mm2, %%mm0				\n\t"
-		"movq %%mm0, (%0, %1, 4)			\n\t" // line 4
-
-		"movq (%%"REG_c"), %%mm0			\n\t" // line 5
-		"pxor %%mm2, %%mm0				\n\t" //(l4 - l5) <= 0 ? -l5-1 : l5
-		"paddusb %%mm3, %%mm0				\n\t"
-		"pxor %%mm2, %%mm0				\n\t"
-		"movq %%mm0, (%%"REG_c")			\n\t" // line 5
-
-		PAVGB(%%mm7, %%mm1)				      // d/4
-
-		"movq (%%"REG_a", %1, 2), %%mm0			\n\t" // line 3
-		"pxor %%mm2, %%mm0				\n\t" //(l4 - l5) <= 0 ? -l4-1 : l4
-		"psubusb %%mm1, %%mm0				\n\t"
-		"pxor %%mm2, %%mm0				\n\t"
-		"movq %%mm0, (%%"REG_a", %1, 2)			\n\t" // line 3
-
-		"movq (%%"REG_c", %1), %%mm0			\n\t" // line 6
-		"pxor %%mm2, %%mm0				\n\t" //(l4 - l5) <= 0 ? -l5-1 : l5
-		"paddusb %%mm1, %%mm0				\n\t"
-		"pxor %%mm2, %%mm0				\n\t"
-		"movq %%mm0, (%%"REG_c", %1)			\n\t" // line 6
-
-		PAVGB(%%mm7, %%mm1)				      // d/8
-
-		"movq (%%"REG_a", %1), %%mm0			\n\t" // line 2
-		"pxor %%mm2, %%mm0				\n\t" //(l4 - l5) <= 0 ? -l2-1 : l2
-		"psubusb %%mm1, %%mm0				\n\t"
-		"pxor %%mm2, %%mm0				\n\t"
-		"movq %%mm0, (%%"REG_a", %1)			\n\t" // line 2
-
-		"movq (%%"REG_c", %1, 2), %%mm0			\n\t" // line 7
-		"pxor %%mm2, %%mm0				\n\t" //(l4 - l5) <= 0 ? -l7-1 : l7
-		"paddusb %%mm1, %%mm0				\n\t"
-		"pxor %%mm2, %%mm0				\n\t"
-		"movq %%mm0, (%%"REG_c", %1, 2)			\n\t" // line 7
-
-		:
-		: "r" (src), "r" ((stride_t)stride), "m" (co->pQPb)
-		: "%"REG_a, "%"REG_c
-	);
-#else
-
- 	const stride_t l1= stride;
-	const stride_t l2= stride + l1;
-	const stride_t l3= stride + l2;
-	const stride_t l4= stride + l3;
-	const stride_t l5= stride + l4;
-	const stride_t l6= stride + l5;
-	const stride_t l7= stride + l6;
-//	const int l8= stride + l7;
-//	const int l9= stride + l8;
-	int x;
-
-	src+= stride*3;
-	for(x=0; x<BLOCK_SIZE; x++)
-	{
-		int a= src[l3] - src[l4];
-		int b= src[l4] - src[l5];
-		int c= src[l5] - src[l6];
-
-		int d= ABS(b) - ((ABS(a) + ABS(c))>>1);
-		d= MAX(d, 0);
-
-		if(d < co->QP*2)
-		{
-			int v = d * SIGN(-b);
-
-			src[l2] +=v>>3;
-			src[l3] +=v>>2;
-			src[l4] +=(3*v)>>3;
-			src[l5] -=(3*v)>>3;
-			src[l6] -=v>>2;
-			src[l7] -=v>>3;
-
-		}
-		src++;
-	}
-#endif
-}
-
-#ifndef HAVE_ALTIVEC
-static inline void RENAME(doVertDefFilter)(uint8_t src[], stride_t stride, PPContext *c)
-{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
-/*
-	uint8_t tmp[16];
-	const int l1= stride;
-	const int l2= stride + l1;
-	const int l3= stride + l2;
-	const int l4= (int)tmp - (int)src - stride*3;
-	const int l5= (int)tmp - (int)src - stride*3 + 8;
-	const int l6= stride*3 + l3;
-	const int l7= stride + l6;
-	const int l8= stride + l7;
-
-	memcpy(tmp, src+stride*7, 8);
-	memcpy(tmp+8, src+stride*8, 8);
-*/
-	src+= stride*4;
-	asm volatile(
-
-#if 0 //sligtly more accurate and slightly slower
-		"pxor %%mm7, %%mm7				\n\t" // 0
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_c"		\n\t"
-//	0	1	2	3	4	5	6	7
-//	%0	%0+%1	%0+2%1	eax+2%1	%0+4%1	eax+4%1	ecx+%1	ecx+2%1
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	ecx	ecx+%1	ecx+2%1
-
-
-		"movq (%0, %1, 2), %%mm0			\n\t" // l2
-		"movq (%0), %%mm1				\n\t" // l0
-		"movq %%mm0, %%mm2				\n\t" // l2
-		PAVGB(%%mm7, %%mm0)				      // ~l2/2
-		PAVGB(%%mm1, %%mm0)				      // ~(l2 + 2l0)/4
-		PAVGB(%%mm2, %%mm0)				      // ~(5l2 + 2l0)/8
-
-		"movq (%%"REG_a"), %%mm1			\n\t" // l1
-		"movq (%%"REG_a", %1, 2), %%mm3			\n\t" // l3
-		"movq %%mm1, %%mm4				\n\t" // l1
-		PAVGB(%%mm7, %%mm1)				      // ~l1/2
-		PAVGB(%%mm3, %%mm1)				      // ~(l1 + 2l3)/4
-		PAVGB(%%mm4, %%mm1)				      // ~(5l1 + 2l3)/8
-
-		"movq %%mm0, %%mm4				\n\t" // ~(5l2 + 2l0)/8
-		"psubusb %%mm1, %%mm0				\n\t"
-		"psubusb %%mm4, %%mm1				\n\t"
-		"por %%mm0, %%mm1				\n\t" // ~|2l0 - 5l1 + 5l2 - 2l3|/8
-// mm1= |lenergy|, mm2= l2, mm3= l3, mm7=0
-
-		"movq (%0, %1, 4), %%mm0			\n\t" // l4
-		"movq %%mm0, %%mm4				\n\t" // l4
-		PAVGB(%%mm7, %%mm0)				      // ~l4/2
-		PAVGB(%%mm2, %%mm0)				      // ~(l4 + 2l2)/4
-		PAVGB(%%mm4, %%mm0)				      // ~(5l4 + 2l2)/8
-
-		"movq (%%"REG_c"), %%mm2			\n\t" // l5
-		"movq %%mm3, %%mm5				\n\t" // l3
-		PAVGB(%%mm7, %%mm3)				      // ~l3/2
-		PAVGB(%%mm2, %%mm3)				      // ~(l3 + 2l5)/4
-		PAVGB(%%mm5, %%mm3)				      // ~(5l3 + 2l5)/8
-
-		"movq %%mm0, %%mm6				\n\t" // ~(5l4 + 2l2)/8
-		"psubusb %%mm3, %%mm0				\n\t"
-		"psubusb %%mm6, %%mm3				\n\t"
-		"por %%mm0, %%mm3				\n\t" // ~|2l2 - 5l3 + 5l4 - 2l5|/8
-		"pcmpeqb %%mm7, %%mm0				\n\t" // SIGN(2l2 - 5l3 + 5l4 - 2l5)
-// mm0= SIGN(menergy), mm1= |lenergy|, mm2= l5, mm3= |menergy|, mm4=l4, mm5= l3, mm7=0
-
-		"movq (%%"REG_c", %1), %%mm6			\n\t" // l6
-		"movq %%mm6, %%mm5				\n\t" // l6
-		PAVGB(%%mm7, %%mm6)				      // ~l6/2
-		PAVGB(%%mm4, %%mm6)				      // ~(l6 + 2l4)/4
-		PAVGB(%%mm5, %%mm6)				      // ~(5l6 + 2l4)/8
-
-		"movq (%%"REG_c", %1, 2), %%mm5			\n\t" // l7
-		"movq %%mm2, %%mm4				\n\t" // l5
-		PAVGB(%%mm7, %%mm2)				      // ~l5/2
-		PAVGB(%%mm5, %%mm2)				      // ~(l5 + 2l7)/4
-		PAVGB(%%mm4, %%mm2)				      // ~(5l5 + 2l7)/8
-
-		"movq %%mm6, %%mm4				\n\t" // ~(5l6 + 2l4)/8
-		"psubusb %%mm2, %%mm6				\n\t"
-		"psubusb %%mm4, %%mm2				\n\t"
-		"por %%mm6, %%mm2				\n\t" // ~|2l4 - 5l5 + 5l6 - 2l7|/8
-// mm0= SIGN(menergy), mm1= |lenergy|/8, mm2= |renergy|/8, mm3= |menergy|/8, mm7=0
-
-
-		PMINUB(%%mm2, %%mm1, %%mm4)			      // MIN(|lenergy|,|renergy|)/8
-		"movq %2, %%mm4					\n\t" // QP //FIXME QP+1 ?
-		"paddusb "MANGLE(b01)", %%mm4			\n\t"
-		"pcmpgtb %%mm3, %%mm4				\n\t" // |menergy|/8 < QP
-		"psubusb %%mm1, %%mm3				\n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8
-		"pand %%mm4, %%mm3				\n\t"
-
-		"movq %%mm3, %%mm1				\n\t"
-//		"psubusb "MANGLE(b01)", %%mm3			\n\t"
-		PAVGB(%%mm7, %%mm3)
-		PAVGB(%%mm7, %%mm3)
-		"paddusb %%mm1, %%mm3				\n\t"
-//		"paddusb "MANGLE(b01)", %%mm3			\n\t"
-
-		"movq (%%"REG_a", %1, 2), %%mm6			\n\t" //l3
-		"movq (%0, %1, 4), %%mm5			\n\t" //l4
-		"movq (%0, %1, 4), %%mm4			\n\t" //l4
-		"psubusb %%mm6, %%mm5				\n\t"
-		"psubusb %%mm4, %%mm6				\n\t"
-		"por %%mm6, %%mm5				\n\t" // |l3-l4|
-		"pcmpeqb %%mm7, %%mm6				\n\t" // SIGN(l3-l4)
-		"pxor %%mm6, %%mm0				\n\t"
-		"pand %%mm0, %%mm3				\n\t"
-		PMINUB(%%mm5, %%mm3, %%mm0)
-
-		"psubusb "MANGLE(b01)", %%mm3			\n\t"
-		PAVGB(%%mm7, %%mm3)
-
-		"movq (%%"REG_a", %1, 2), %%mm0			\n\t"
-		"movq (%0, %1, 4), %%mm2			\n\t"
-		"pxor %%mm6, %%mm0				\n\t"
-		"pxor %%mm6, %%mm2				\n\t"
-		"psubb %%mm3, %%mm0				\n\t"
-		"paddb %%mm3, %%mm2				\n\t"
-		"pxor %%mm6, %%mm0				\n\t"
-		"pxor %%mm6, %%mm2				\n\t"
-		"movq %%mm0, (%%"REG_a", %1, 2)			\n\t"
-		"movq %%mm2, (%0, %1, 4)			\n\t"
-#endif
-
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"pcmpeqb %%mm6, %%mm6				\n\t" // -1
-//	0	1	2	3	4	5	6	7
-//	%0	%0+%1	%0+2%1	eax+2%1	%0+4%1	eax+4%1	ecx+%1	ecx+2%1
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	ecx	ecx+%1	ecx+2%1
-
-
-		"movq (%%"REG_a", %1, 2), %%mm1			\n\t" // l3
-		"movq (%0, %1, 4), %%mm0			\n\t" // l4
-		"pxor %%mm6, %%mm1				\n\t" // -l3-1
-		PAVGB(%%mm1, %%mm0)				      // -q+128 = (l4-l3+256)/2
-// mm1=-l3-1, mm0=128-q
-
-		"movq (%%"REG_a", %1, 4), %%mm2			\n\t" // l5
-		"movq (%%"REG_a", %1), %%mm3			\n\t" // l2
-		"pxor %%mm6, %%mm2				\n\t" // -l5-1
-		"movq %%mm2, %%mm5				\n\t" // -l5-1
-		"movq "MANGLE(b80)", %%mm4			\n\t" // 128
-		"lea (%%"REG_a", %1, 4), %%"REG_c"		\n\t"
-		PAVGB(%%mm3, %%mm2)				      // (l2-l5+256)/2
-		PAVGB(%%mm0, %%mm4)				      // ~(l4-l3)/4 + 128
-		PAVGB(%%mm2, %%mm4)				      // ~(l2-l5)/4 +(l4-l3)/8 + 128
-		PAVGB(%%mm0, %%mm4)				      // ~(l2-l5)/8 +5(l4-l3)/16 + 128
-// mm1=-l3-1, mm0=128-q, mm3=l2, mm4=menergy/16 + 128, mm5= -l5-1
-
-		"movq (%%"REG_a"), %%mm2			\n\t" // l1
-		"pxor %%mm6, %%mm2				\n\t" // -l1-1
-		PAVGB(%%mm3, %%mm2)				      // (l2-l1+256)/2
-		PAVGB((%0), %%mm1)				      // (l0-l3+256)/2
-		"movq "MANGLE(b80)", %%mm3			\n\t" // 128
-		PAVGB(%%mm2, %%mm3)				      // ~(l2-l1)/4 + 128
-		PAVGB(%%mm1, %%mm3)				      // ~(l0-l3)/4 +(l2-l1)/8 + 128
-		PAVGB(%%mm2, %%mm3)				      // ~(l0-l3)/8 +5(l2-l1)/16 + 128
-// mm0=128-q, mm3=lenergy/16 + 128, mm4= menergy/16 + 128, mm5= -l5-1
-
-		PAVGB((%%REGc, %1), %%mm5)			      // (l6-l5+256)/2
-		"movq (%%"REG_c", %1, 2), %%mm1			\n\t" // l7
-		"pxor %%mm6, %%mm1				\n\t" // -l7-1
-		PAVGB((%0, %1, 4), %%mm1)			      // (l4-l7+256)/2
-		"movq "MANGLE(b80)", %%mm2			\n\t" // 128
-		PAVGB(%%mm5, %%mm2)				      // ~(l6-l5)/4 + 128
-		PAVGB(%%mm1, %%mm2)				      // ~(l4-l7)/4 +(l6-l5)/8 + 128
-		PAVGB(%%mm5, %%mm2)				      // ~(l4-l7)/8 +5(l6-l5)/16 + 128
-// mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128
-
-		"movq "MANGLE(b00)", %%mm1			\n\t" // 0
-		"movq "MANGLE(b00)", %%mm5			\n\t" // 0
-		"psubb %%mm2, %%mm1				\n\t" // 128 - renergy/16
-		"psubb %%mm3, %%mm5				\n\t" // 128 - lenergy/16
-		PMAXUB(%%mm1, %%mm2)				      // 128 + |renergy/16|
-		PMAXUB(%%mm5, %%mm3)				      // 128 + |lenergy/16|
-		PMINUB(%%mm2, %%mm3, %%mm1)			      // 128 + MIN(|lenergy|,|renergy|)/16
-
-// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128
-
-		"movq "MANGLE(b00)", %%mm7			\n\t" // 0
-		"movq %2, %%mm2					\n\t" // QP
-		PAVGB(%%mm6, %%mm2)				      // 128 + QP/2
-		"psubb %%mm6, %%mm2				\n\t"
-
-		"movq %%mm4, %%mm1				\n\t"
-		"pcmpgtb %%mm7, %%mm1				\n\t" // SIGN(menergy)
-		"pxor %%mm1, %%mm4				\n\t"
-		"psubb %%mm1, %%mm4				\n\t" // 128 + |menergy|/16
-		"pcmpgtb %%mm4, %%mm2				\n\t" // |menergy|/16 < QP/2
-		"psubusb %%mm3, %%mm4				\n\t" //d=|menergy|/16 - MIN(|lenergy|,|renergy|)/16
-// mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16
-
-		"movq %%mm4, %%mm3				\n\t" // d
-		"psubusb "MANGLE(b01)", %%mm4			\n\t"
-		PAVGB(%%mm7, %%mm4)				      // d/32
-		PAVGB(%%mm7, %%mm4)				      // (d + 32)/64
-		"paddb %%mm3, %%mm4				\n\t" // 5d/64
-		"pand %%mm2, %%mm4				\n\t"
-
-		"movq "MANGLE(b80)", %%mm5			\n\t" // 128
-		"psubb %%mm0, %%mm5				\n\t" // q
-		"paddsb %%mm6, %%mm5				\n\t" // fix bad rounding
-		"pcmpgtb %%mm5, %%mm7				\n\t" // SIGN(q)
-		"pxor %%mm7, %%mm5				\n\t"
-
-		PMINUB(%%mm5, %%mm4, %%mm3)			      // MIN(|q|, 5d/64)
-		"pxor %%mm1, %%mm7				\n\t" // SIGN(d*q)
-
-		"pand %%mm7, %%mm4				\n\t"
-		"movq (%%"REG_a", %1, 2), %%mm0			\n\t"
-		"movq (%0, %1, 4), %%mm2			\n\t"
-		"pxor %%mm1, %%mm0				\n\t"
-		"pxor %%mm1, %%mm2				\n\t"
-		"paddb %%mm4, %%mm0				\n\t"
-		"psubb %%mm4, %%mm2				\n\t"
-		"pxor %%mm1, %%mm0				\n\t"
-		"pxor %%mm1, %%mm2				\n\t"
-		"movq %%mm0, (%%"REG_a", %1, 2)			\n\t"
-		"movq %%mm2, (%0, %1, 4)			\n\t"
-
-		:
-		: "r" (src), "r" ((stride_t)stride), "m" (c->pQPb)
-		: "%"REG_a, "%"REG_c
-	);
-
-/*
-	{
-	int x;
-	src-= stride;
-	for(x=0; x<BLOCK_SIZE; x++)
-	{
-		const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
-		if(ABS(middleEnergy)< 8*QP)
-		{
-			const int q=(src[l4] - src[l5])/2;
-			const int leftEnergy=  5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
-			const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);
-
-			int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
-			d= MAX(d, 0);
-
-			d= (5*d + 32) >> 6;
-			d*= SIGN(-middleEnergy);
-
-			if(q>0)
-			{
-				d= d<0 ? 0 : d;
-				d= d>q ? q : d;
-			}
-			else
-			{
-				d= d>0 ? 0 : d;
-				d= d<q ? q : d;
-			}
-
-        		src[l4]-= d;
-	        	src[l5]+= d;
-		}
-		src++;
-	}
-src-=8;
-	for(x=0; x<8; x++)
-	{
-		int y;
-		for(y=4; y<6; y++)
-		{
-			int d= src[x+y*stride] - tmp[x+(y-4)*8];
-			int ad= ABS(d);
-			static int max=0;
-			static int sum=0;
-			static int num=0;
-			static int bias=0;
-
-			if(max<ad) max=ad;
-			sum+= ad>3 ? 1 : 0;
-			if(ad>3)
-			{
-				src[0] = src[7] = src[stride*7] = src[(stride+1)*7]=255;
-			}
-			if(y==4) bias+=d;
-			num++;
-			if(num%1000000 == 0)
-			{
-				printf(" %d %d %d %d\n", num, sum, max, bias);
-			}
-		}
-	}
-}
-*/
-#elif HAVE_MMX
-	src+= stride*4;
-	asm volatile(
-		"pxor %%mm7, %%mm7				\n\t"
-		"lea -40(%%"REG_SP"), %%"REG_c"			\n\t" // make space for 4 8-byte vars
-		"and "ALIGN_MASK", %%"REG_c"			\n\t" // align
-//	0	1	2	3	4	5	6	7
-//	%0	%0+%1	%0+2%1	eax+2%1	%0+4%1	eax+4%1	edx+%1	edx+2%1
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1
-
-		"movq (%0), %%mm0				\n\t"
-		"movq %%mm0, %%mm1				\n\t"
-		"punpcklbw %%mm7, %%mm0				\n\t" // low part of line 0
-		"punpckhbw %%mm7, %%mm1				\n\t" // high part of line 0
-
-		"movq (%0, %1), %%mm2				\n\t"
-		"lea (%0, %1, 2), %%"REG_a"			\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t" // low part of line 1
-		"punpckhbw %%mm7, %%mm3				\n\t" // high part of line 1
-
-		"movq (%%"REG_a"), %%mm4			\n\t"
-		"movq %%mm4, %%mm5				\n\t"
-		"punpcklbw %%mm7, %%mm4				\n\t" // low part of line 2
-		"punpckhbw %%mm7, %%mm5				\n\t" // high part of line 2
-
-		"paddw %%mm0, %%mm0				\n\t" // 2L0
-		"paddw %%mm1, %%mm1				\n\t" // 2H0
-		"psubw %%mm4, %%mm2				\n\t" // L1 - L2
-		"psubw %%mm5, %%mm3				\n\t" // H1 - H2
-		"psubw %%mm2, %%mm0				\n\t" // 2L0 - L1 + L2
-		"psubw %%mm3, %%mm1				\n\t" // 2H0 - H1 + H2
-
-		"psllw $2, %%mm2				\n\t" // 4L1 - 4L2
-		"psllw $2, %%mm3				\n\t" // 4H1 - 4H2
-		"psubw %%mm2, %%mm0				\n\t" // 2L0 - 5L1 + 5L2
-		"psubw %%mm3, %%mm1				\n\t" // 2H0 - 5H1 + 5H2
-
-		"movq (%%"REG_a", %1), %%mm2			\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t" // L3
-		"punpckhbw %%mm7, %%mm3				\n\t" // H3
-
-		"psubw %%mm2, %%mm0				\n\t" // 2L0 - 5L1 + 5L2 - L3
-		"psubw %%mm3, %%mm1				\n\t" // 2H0 - 5H1 + 5H2 - H3
-		"psubw %%mm2, %%mm0				\n\t" // 2L0 - 5L1 + 5L2 - 2L3
-		"psubw %%mm3, %%mm1				\n\t" // 2H0 - 5H1 + 5H2 - 2H3
-		"movq %%mm0, (%%"REG_c")			\n\t" // 2L0 - 5L1 + 5L2 - 2L3
-		"movq %%mm1, 8(%%"REG_c")			\n\t" // 2H0 - 5H1 + 5H2 - 2H3
-
-		"movq (%%"REG_a", %1, 2), %%mm0			\n\t"
-		"movq %%mm0, %%mm1				\n\t"
-		"punpcklbw %%mm7, %%mm0				\n\t" // L4
-		"punpckhbw %%mm7, %%mm1				\n\t" // H4
-
-		"psubw %%mm0, %%mm2				\n\t" // L3 - L4
-		"psubw %%mm1, %%mm3				\n\t" // H3 - H4
-		"movq %%mm2, 16(%%"REG_c")			\n\t" // L3 - L4
-		"movq %%mm3, 24(%%"REG_c")			\n\t" // H3 - H4
-		"paddw %%mm4, %%mm4				\n\t" // 2L2
-		"paddw %%mm5, %%mm5				\n\t" // 2H2
-		"psubw %%mm2, %%mm4				\n\t" // 2L2 - L3 + L4
-		"psubw %%mm3, %%mm5				\n\t" // 2H2 - H3 + H4
-
-		"lea (%%"REG_a", %1), %0			\n\t"
-		"psllw $2, %%mm2				\n\t" // 4L3 - 4L4
-		"psllw $2, %%mm3				\n\t" // 4H3 - 4H4
-		"psubw %%mm2, %%mm4				\n\t" // 2L2 - 5L3 + 5L4
-		"psubw %%mm3, %%mm5				\n\t" // 2H2 - 5H3 + 5H4
-//50 opcodes so far
-		"movq (%0, %1, 2), %%mm2			\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t" // L5
-		"punpckhbw %%mm7, %%mm3				\n\t" // H5
-		"psubw %%mm2, %%mm4				\n\t" // 2L2 - 5L3 + 5L4 - L5
-		"psubw %%mm3, %%mm5				\n\t" // 2H2 - 5H3 + 5H4 - H5
-		"psubw %%mm2, %%mm4				\n\t" // 2L2 - 5L3 + 5L4 - 2L5
-		"psubw %%mm3, %%mm5				\n\t" // 2H2 - 5H3 + 5H4 - 2H5
-
-		"movq (%%"REG_a", %1, 4), %%mm6			\n\t"
-		"punpcklbw %%mm7, %%mm6				\n\t" // L6
-		"psubw %%mm6, %%mm2				\n\t" // L5 - L6
-		"movq (%%"REG_a", %1, 4), %%mm6			\n\t"
-		"punpckhbw %%mm7, %%mm6				\n\t" // H6
-		"psubw %%mm6, %%mm3				\n\t" // H5 - H6
-
-		"paddw %%mm0, %%mm0				\n\t" // 2L4
-		"paddw %%mm1, %%mm1				\n\t" // 2H4
-		"psubw %%mm2, %%mm0				\n\t" // 2L4 - L5 + L6
-		"psubw %%mm3, %%mm1				\n\t" // 2H4 - H5 + H6
-
-		"psllw $2, %%mm2				\n\t" // 4L5 - 4L6
-		"psllw $2, %%mm3				\n\t" // 4H5 - 4H6
-		"psubw %%mm2, %%mm0				\n\t" // 2L4 - 5L5 + 5L6
-		"psubw %%mm3, %%mm1				\n\t" // 2H4 - 5H5 + 5H6
-
-		"movq (%0, %1, 4), %%mm2			\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t" // L7
-		"punpckhbw %%mm7, %%mm3				\n\t" // H7
-
-		"paddw %%mm2, %%mm2				\n\t" // 2L7
-		"paddw %%mm3, %%mm3				\n\t" // 2H7
-		"psubw %%mm2, %%mm0				\n\t" // 2L4 - 5L5 + 5L6 - 2L7
-		"psubw %%mm3, %%mm1				\n\t" // 2H4 - 5H5 + 5H6 - 2H7
-
-		"movq (%%"REG_c"), %%mm2			\n\t" // 2L0 - 5L1 + 5L2 - 2L3
-		"movq 8(%%"REG_c"), %%mm3			\n\t" // 2H0 - 5H1 + 5H2 - 2H3
-
-#if HAVE_MMX2
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"psubw %%mm0, %%mm6				\n\t"
-		"pmaxsw %%mm6, %%mm0				\n\t" // |2L4 - 5L5 + 5L6 - 2L7|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"psubw %%mm1, %%mm6				\n\t"
-		"pmaxsw %%mm6, %%mm1				\n\t" // |2H4 - 5H5 + 5H6 - 2H7|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"psubw %%mm2, %%mm6				\n\t"
-		"pmaxsw %%mm6, %%mm2				\n\t" // |2L0 - 5L1 + 5L2 - 2L3|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"psubw %%mm3, %%mm6				\n\t"
-		"pmaxsw %%mm6, %%mm3				\n\t" // |2H0 - 5H1 + 5H2 - 2H3|
-#else
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm0, %%mm6				\n\t"
-		"pxor %%mm6, %%mm0				\n\t"
-		"psubw %%mm6, %%mm0				\n\t" // |2L4 - 5L5 + 5L6 - 2L7|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm1, %%mm6				\n\t"
-		"pxor %%mm6, %%mm1				\n\t"
-		"psubw %%mm6, %%mm1				\n\t" // |2H4 - 5H5 + 5H6 - 2H7|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm2, %%mm6				\n\t"
-		"pxor %%mm6, %%mm2				\n\t"
-		"psubw %%mm6, %%mm2				\n\t" // |2L0 - 5L1 + 5L2 - 2L3|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm3, %%mm6				\n\t"
-		"pxor %%mm6, %%mm3				\n\t"
-		"psubw %%mm6, %%mm3				\n\t" // |2H0 - 5H1 + 5H2 - 2H3|
-#endif
-
-#if HAVE_MMX2
-		"pminsw %%mm2, %%mm0				\n\t"
-		"pminsw %%mm3, %%mm1				\n\t"
-#else
-		"movq %%mm0, %%mm6				\n\t"
-		"psubusw %%mm2, %%mm6				\n\t"
-		"psubw %%mm6, %%mm0				\n\t"
-		"movq %%mm1, %%mm6				\n\t"
-		"psubusw %%mm3, %%mm6				\n\t"
-		"psubw %%mm6, %%mm1				\n\t"
-#endif
-
-		"movd %2, %%mm2					\n\t" // QP
-		"punpcklbw %%mm7, %%mm2				\n\t"
-
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm4, %%mm6				\n\t" // sign(2L2 - 5L3 + 5L4 - 2L5)
-		"pxor %%mm6, %%mm4				\n\t"
-		"psubw %%mm6, %%mm4				\n\t" // |2L2 - 5L3 + 5L4 - 2L5|
-		"pcmpgtw %%mm5, %%mm7				\n\t" // sign(2H2 - 5H3 + 5H4 - 2H5)
-		"pxor %%mm7, %%mm5				\n\t"
-		"psubw %%mm7, %%mm5				\n\t" // |2H2 - 5H3 + 5H4 - 2H5|
-// 100 opcodes
-		"psllw $3, %%mm2				\n\t" // 8QP
-		"movq %%mm2, %%mm3				\n\t" // 8QP
-		"pcmpgtw %%mm4, %%mm2				\n\t"
-		"pcmpgtw %%mm5, %%mm3				\n\t"
-		"pand %%mm2, %%mm4				\n\t"
-		"pand %%mm3, %%mm5				\n\t"
-
-
-		"psubusw %%mm0, %%mm4				\n\t" // hd
-		"psubusw %%mm1, %%mm5				\n\t" // ld
-
-
-		"movq "MANGLE(w05)", %%mm2			\n\t" // 5
-		"pmullw %%mm2, %%mm4				\n\t"
-		"pmullw %%mm2, %%mm5				\n\t"
-		"movq "MANGLE(w20)", %%mm2			\n\t" // 32
-		"paddw %%mm2, %%mm4				\n\t"
-		"paddw %%mm2, %%mm5				\n\t"
-		"psrlw $6, %%mm4				\n\t"
-		"psrlw $6, %%mm5				\n\t"
-
-		"movq 16(%%"REG_c"), %%mm0			\n\t" // L3 - L4
-		"movq 24(%%"REG_c"), %%mm1			\n\t" // H3 - H4
-
-		"pxor %%mm2, %%mm2				\n\t"
-		"pxor %%mm3, %%mm3				\n\t"
-
-		"pcmpgtw %%mm0, %%mm2				\n\t" // sign (L3-L4)
-		"pcmpgtw %%mm1, %%mm3				\n\t" // sign (H3-H4)
-		"pxor %%mm2, %%mm0				\n\t"
-		"pxor %%mm3, %%mm1				\n\t"
-		"psubw %%mm2, %%mm0				\n\t" // |L3-L4|
-		"psubw %%mm3, %%mm1				\n\t" // |H3-H4|
-		"psrlw $1, %%mm0				\n\t" // |L3 - L4|/2
-		"psrlw $1, %%mm1				\n\t" // |H3 - H4|/2
-
-		"pxor %%mm6, %%mm2				\n\t"
-		"pxor %%mm7, %%mm3				\n\t"
-		"pand %%mm2, %%mm4				\n\t"
-		"pand %%mm3, %%mm5				\n\t"
-
-#if HAVE_MMX2
-		"pminsw %%mm0, %%mm4				\n\t"
-		"pminsw %%mm1, %%mm5				\n\t"
-#else
-		"movq %%mm4, %%mm2				\n\t"
-		"psubusw %%mm0, %%mm2				\n\t"
-		"psubw %%mm2, %%mm4				\n\t"
-		"movq %%mm5, %%mm2				\n\t"
-		"psubusw %%mm1, %%mm2				\n\t"
-		"psubw %%mm2, %%mm5				\n\t"
-#endif
-		"pxor %%mm6, %%mm4				\n\t"
-		"pxor %%mm7, %%mm5				\n\t"
-		"psubw %%mm6, %%mm4				\n\t"
-		"psubw %%mm7, %%mm5				\n\t"
-		"packsswb %%mm5, %%mm4				\n\t"
-		"movq (%0), %%mm0				\n\t"
-		"paddb   %%mm4, %%mm0				\n\t"
-		"movq %%mm0, (%0)				\n\t"
-		"movq (%0, %1), %%mm0				\n\t"
-		"psubb %%mm4, %%mm0				\n\t"
-		"movq %%mm0, (%0, %1)				\n\t"
-
-		: "+r" (src)
-		: "r" ((stride_t)stride), "m" (c->pQPb)
-		: "%"REG_a, "%"REG_c
-	);
-#else
-	const stride_t l1= stride;
-	const stride_t l2= stride + l1;
-	const stride_t l3= stride + l2;
-	const stride_t l4= stride + l3;
-	const stride_t l5= stride + l4;
-	const stride_t l6= stride + l5;
-	const stride_t l7= stride + l6;
-	const stride_t l8= stride + l7;
-//	const int l9= stride + l8;
-	int x;
-	src+= stride*3;
-	for(x=0; x<BLOCK_SIZE; x++)
-	{
-		const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
-		if(ABS(middleEnergy) < 8*c->QP)
-		{
-			const int q=(src[l4] - src[l5])/2;
-			const int leftEnergy=  5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
-			const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);
-
-			int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
-			d= MAX(d, 0);
-
-			d= (5*d + 32) >> 6;
-			d*= SIGN(-middleEnergy);
-
-			if(q>0)
-			{
-				d= d<0 ? 0 : d;
-				d= d>q ? q : d;
-			}
-			else
-			{
-				d= d>0 ? 0 : d;
-				d= d<q ? q : d;
-			}
-
-        		src[l4]-= d;
-	        	src[l5]+= d;
-		}
-		src++;
-	}
-#endif
-}
-#endif //HAVE_ALTIVEC
-
-#ifndef HAVE_ALTIVEC
-static inline void RENAME(dering)(uint8_t src[], stride_t stride, PPContext *c)
-{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
-	asm volatile(
-		"pxor %%mm6, %%mm6				\n\t"
-		"pcmpeqb %%mm7, %%mm7				\n\t"
-		"movq %2, %%mm0					\n\t"
-		"punpcklbw %%mm6, %%mm0				\n\t"
-		"psrlw $1, %%mm0				\n\t"
-		"psubw %%mm7, %%mm0				\n\t"
-		"packuswb %%mm0, %%mm0				\n\t"
-		"movq %%mm0, %3					\n\t"
-
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
-
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1
-
-#undef FIND_MIN_MAX
-#if HAVE_MMX2
-#define REAL_FIND_MIN_MAX(addr)\
-		"movq " #addr ", %%mm0				\n\t"\
-		"pminub %%mm0, %%mm7				\n\t"\
-		"pmaxub %%mm0, %%mm6				\n\t"
-#else
-#define REAL_FIND_MIN_MAX(addr)\
-		"movq " #addr ", %%mm0				\n\t"\
-		"movq %%mm7, %%mm1				\n\t"\
-		"psubusb %%mm0, %%mm6				\n\t"\
-		"paddb %%mm0, %%mm6				\n\t"\
-		"psubusb %%mm0, %%mm1				\n\t"\
-		"psubb %%mm1, %%mm7				\n\t"
-#endif
-#define FIND_MIN_MAX(addr)  REAL_FIND_MIN_MAX(addr)
-
-FIND_MIN_MAX((%%REGa))
-FIND_MIN_MAX((%%REGa, %1))
-FIND_MIN_MAX((%%REGa, %1, 2))
-FIND_MIN_MAX((%0, %1, 4))
-FIND_MIN_MAX((%%REGd))
-FIND_MIN_MAX((%%REGd, %1))
-FIND_MIN_MAX((%%REGd, %1, 2))
-FIND_MIN_MAX((%0, %1, 8))
-
-		"movq %%mm7, %%mm4				\n\t"
-		"psrlq $8, %%mm7				\n\t"
-#if HAVE_MMX2
-		"pminub %%mm4, %%mm7				\n\t" // min of pixels
-		"pshufw $0xF9, %%mm7, %%mm4			\n\t"
-		"pminub %%mm4, %%mm7				\n\t" // min of pixels
-		"pshufw $0xFE, %%mm7, %%mm4			\n\t"
-		"pminub %%mm4, %%mm7				\n\t"
-#else
-		"movq %%mm7, %%mm1				\n\t"
-		"psubusb %%mm4, %%mm1				\n\t"
-		"psubb %%mm1, %%mm7				\n\t"
-		"movq %%mm7, %%mm4				\n\t"
-		"psrlq $16, %%mm7				\n\t"
-		"movq %%mm7, %%mm1				\n\t"
-		"psubusb %%mm4, %%mm1				\n\t"
-		"psubb %%mm1, %%mm7				\n\t"
-		"movq %%mm7, %%mm4				\n\t"
-		"psrlq $32, %%mm7				\n\t"
-		"movq %%mm7, %%mm1				\n\t"
-		"psubusb %%mm4, %%mm1				\n\t"
-		"psubb %%mm1, %%mm7				\n\t"
-#endif
-
-
-		"movq %%mm6, %%mm4				\n\t"
-		"psrlq $8, %%mm6				\n\t"
-#if HAVE_MMX2
-		"pmaxub %%mm4, %%mm6				\n\t" // max of pixels
-		"pshufw $0xF9, %%mm6, %%mm4			\n\t"
-		"pmaxub %%mm4, %%mm6				\n\t"
-		"pshufw $0xFE, %%mm6, %%mm4			\n\t"
-		"pmaxub %%mm4, %%mm6				\n\t"
-#else
-		"psubusb %%mm4, %%mm6				\n\t"
-		"paddb %%mm4, %%mm6				\n\t"
-		"movq %%mm6, %%mm4				\n\t"
-		"psrlq $16, %%mm6				\n\t"
-		"psubusb %%mm4, %%mm6				\n\t"
-		"paddb %%mm4, %%mm6				\n\t"
-		"movq %%mm6, %%mm4				\n\t"
-		"psrlq $32, %%mm6				\n\t"
-		"psubusb %%mm4, %%mm6				\n\t"
-		"paddb %%mm4, %%mm6				\n\t"
-#endif
-		"movq %%mm6, %%mm0				\n\t" // max
-		"psubb %%mm7, %%mm6				\n\t" // max - min
-		"movd %%mm6, %%ecx				\n\t"
-		"cmpb "MANGLE(deringThreshold)", %%cl		\n\t"
-		" jb 1f						\n\t"
-		"lea -24(%%"REG_SP"), %%"REG_c"			\n\t"
-		"and "ALIGN_MASK", %%"REG_c"			\n\t"
-		PAVGB(%%mm0, %%mm7)				      // a=(max + min)/2
-		"punpcklbw %%mm7, %%mm7				\n\t"
-		"punpcklbw %%mm7, %%mm7				\n\t"
-		"punpcklbw %%mm7, %%mm7				\n\t"
-		"movq %%mm7, (%%"REG_c")			\n\t"
-
-		"movq (%0), %%mm0				\n\t" // L10
-		"movq %%mm0, %%mm1				\n\t" // L10
-		"movq %%mm0, %%mm2				\n\t" // L10
-		"psllq $8, %%mm1				\n\t"
-		"psrlq $8, %%mm2				\n\t"
-		"movd -4(%0), %%mm3				\n\t"
-		"movd 8(%0), %%mm4				\n\t"
-		"psrlq $24, %%mm3				\n\t"
-		"psllq $56, %%mm4				\n\t"
-		"por %%mm3, %%mm1				\n\t" // L00
-		"por %%mm4, %%mm2				\n\t" // L20
-		"movq %%mm1, %%mm3				\n\t" // L00
-		PAVGB(%%mm2, %%mm1)				      // (L20 + L00)/2
-		PAVGB(%%mm0, %%mm1)				      // (L20 + L00 + 2L10)/4
-		"psubusb %%mm7, %%mm0				\n\t"
-		"psubusb %%mm7, %%mm2				\n\t"
-		"psubusb %%mm7, %%mm3				\n\t"
-		"pcmpeqb "MANGLE(b00)", %%mm0			\n\t" // L10 > a ? 0 : -1
-		"pcmpeqb "MANGLE(b00)", %%mm2			\n\t" // L20 > a ? 0 : -1
-		"pcmpeqb "MANGLE(b00)", %%mm3			\n\t" // L00 > a ? 0 : -1
-		"paddb %%mm2, %%mm0				\n\t"
-		"paddb %%mm3, %%mm0				\n\t"
-
-		"movq (%%"REG_a"), %%mm2			\n\t" // L11
-		"movq %%mm2, %%mm3				\n\t" // L11
-		"movq %%mm2, %%mm4				\n\t" // L11
-		"psllq $8, %%mm3				\n\t"
-		"psrlq $8, %%mm4				\n\t"
-		"movd -4(%%"REG_a"), %%mm5			\n\t"
-		"movd 8(%%"REG_a"), %%mm6			\n\t"
-		"psrlq $24, %%mm5				\n\t"
-		"psllq $56, %%mm6				\n\t"
-		"por %%mm5, %%mm3				\n\t" // L01
-		"por %%mm6, %%mm4				\n\t" // L21
-		"movq %%mm3, %%mm5				\n\t" // L01
-		PAVGB(%%mm4, %%mm3)				      // (L21 + L01)/2
-		PAVGB(%%mm2, %%mm3)				      // (L21 + L01 + 2L11)/4
-		"psubusb %%mm7, %%mm2				\n\t"
-		"psubusb %%mm7, %%mm4				\n\t"
-		"psubusb %%mm7, %%mm5				\n\t"
-		"pcmpeqb "MANGLE(b00)", %%mm2			\n\t" // L11 > a ? 0 : -1
-		"pcmpeqb "MANGLE(b00)", %%mm4			\n\t" // L21 > a ? 0 : -1
-		"pcmpeqb "MANGLE(b00)", %%mm5			\n\t" // L01 > a ? 0 : -1
-		"paddb %%mm4, %%mm2				\n\t"
-		"paddb %%mm5, %%mm2				\n\t"
-// 0, 2, 3, 1
-#define REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
-		"movq " #src ", " #sx "				\n\t" /* src[0] */\
-		"movq " #sx ", " #lx "				\n\t" /* src[0] */\
-		"movq " #sx ", " #t0 "				\n\t" /* src[0] */\
-		"psllq $8, " #lx "				\n\t"\
-		"psrlq $8, " #t0 "				\n\t"\
-		"movd -4" #src ", " #t1 "			\n\t"\
-		"psrlq $24, " #t1 "				\n\t"\
-		"por " #t1 ", " #lx "				\n\t" /* src[-1] */\
-		"movd 8" #src ", " #t1 "			\n\t"\
-		"psllq $56, " #t1 "				\n\t"\
-		"por " #t1 ", " #t0 "				\n\t" /* src[+1] */\
-		"movq " #lx ", " #t1 "				\n\t" /* src[-1] */\
-		PAVGB(t0, lx)				              /* (src[-1] + src[+1])/2 */\
-		PAVGB(sx, lx)				      /* (src[-1] + 2src[0] + src[+1])/4 */\
-		PAVGB(lx, pplx)					     \
-		"movq " #lx ", 8(%%"REG_c")			\n\t"\
-		"movq (%%"REG_c"), " #lx "			\n\t"\
-		"psubusb " #lx ", " #t1 "			\n\t"\
-		"psubusb " #lx ", " #t0 "			\n\t"\
-		"psubusb " #lx ", " #sx "			\n\t"\
-		"movq "MANGLE(b00)", " #lx "			\n\t"\
-		"pcmpeqb " #lx ", " #t1 "			\n\t" /* src[-1] > a ? 0 : -1*/\
-		"pcmpeqb " #lx ", " #t0 "			\n\t" /* src[+1] > a ? 0 : -1*/\
-		"pcmpeqb " #lx ", " #sx "			\n\t" /* src[0]  > a ? 0 : -1*/\
-		"paddb " #t1 ", " #t0 "				\n\t"\
-		"paddb " #t0 ", " #sx "				\n\t"\
-\
-		PAVGB(plx, pplx)				      /* filtered */\
-		"movq " #dst ", " #t0 "				\n\t" /* dst */\
-		"movq " #t0 ", " #t1 "				\n\t" /* dst */\
-		"psubusb %3, " #t0 "				\n\t"\
-		"paddusb %3, " #t1 "				\n\t"\
-		PMAXUB(t0, pplx)\
-		PMINUB(t1, pplx, t0)\
-		"paddb " #sx ", " #ppsx "			\n\t"\
-		"paddb " #psx ", " #ppsx "			\n\t"\
-		"#paddb "MANGLE(b02)", " #ppsx "		\n\t"\
-		"pand "MANGLE(b08)", " #ppsx "			\n\t"\
-		"pcmpeqb " #lx ", " #ppsx "			\n\t"\
-		"pand " #ppsx ", " #pplx "			\n\t"\
-		"pandn " #dst ", " #ppsx "			\n\t"\
-		"por " #pplx ", " #ppsx "			\n\t"\
-		"movq " #ppsx ", " #dst "			\n\t"\
-		"movq 8(%%"REG_c"), " #lx "			\n\t"
-
-#define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
-   REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1)
-/*
-0000000
-1111111
-
-1111110
-1111101
-1111100
-1111011
-1111010
-1111001
-
-1111000
-1110111
-
-*/
-//DERING_CORE(dst,src                  ,ppsx ,psx  ,sx   ,pplx ,plx  ,lx   ,t0   ,t1)
-DERING_CORE((%%REGa),(%%REGa, %1)        ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
-DERING_CORE((%%REGa, %1),(%%REGa, %1, 2) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
-DERING_CORE((%%REGa, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
-DERING_CORE((%0, %1, 4),(%%REGd)        ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
-DERING_CORE((%%REGd),(%%REGd, %1)        ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
-DERING_CORE((%%REGd, %1), (%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
-DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
-DERING_CORE((%0, %1, 8),(%%REGd, %1, 4) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
-
-		"1:			\n\t"
-		: : "r" (src), "r" ((stride_t)stride), "m" (c->pQPb), "m"(c->pQPb2)
-		: "%"REG_a, "%"REG_d, "%"REG_c
-	);
-#else
-	int y;
-	int min=255;
-	int max=0;
-	int avg;
-	uint8_t *p;
-	int s[10];
-	const int QP2= c->QP/2 + 1;
-
-	for(y=1; y<9; y++)
-	{
-		int x;
-		p= src + stride*y;
-		for(x=1; x<9; x++)
-		{
-			p++;
-			if(*p > max) max= *p;
-			if(*p < min) min= *p;
-		}
-	}
-	avg= (min + max + 1)>>1;
-
-	if(max - min <deringThreshold) return;
-
-	for(y=0; y<10; y++)
-	{
-		int t = 0;
-
-		if(src[stride*y + 0] > avg) t+= 1;
-		if(src[stride*y + 1] > avg) t+= 2;
-		if(src[stride*y + 2] > avg) t+= 4;
-		if(src[stride*y + 3] > avg) t+= 8;
-		if(src[stride*y + 4] > avg) t+= 16;
-		if(src[stride*y + 5] > avg) t+= 32;
-		if(src[stride*y + 6] > avg) t+= 64;
-		if(src[stride*y + 7] > avg) t+= 128;
-		if(src[stride*y + 8] > avg) t+= 256;
-		if(src[stride*y + 9] > avg) t+= 512;
-
-		t |= (~t)<<16;
-		t &= (t<<1) & (t>>1);
-		s[y] = t;
-	}
-
-	for(y=1; y<9; y++)
-	{
-		int t = s[y-1] & s[y] & s[y+1];
-		t|= t>>16;
-		s[y-1]= t;
-	}
-
-	for(y=1; y<9; y++)
-	{
-		int x;
-		int t = s[y-1];
-
-		p= src + stride*y;
-		for(x=1; x<9; x++)
-		{
-			p++;
-			if(t & (1<<x))
-			{
-				int f= (*(p-stride-1)) + 2*(*(p-stride)) + (*(p-stride+1))
-				      +2*(*(p     -1)) + 4*(*p         ) + 2*(*(p     +1))
-				      +(*(p+stride-1)) + 2*(*(p+stride)) + (*(p+stride+1));
-				f= (f + 8)>>4;
-
-#ifdef DEBUG_DERING_THRESHOLD
-				asm volatile("emms\n\t":);
-				{
-				static long long numPixels=0;
-				if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
-//				if((max-min)<20 || (max-min)*QP<200)
-//				if((max-min)*QP < 500)
-//				if(max-min<QP/2)
-				if(max-min < 20)
-				{
-					static int numSkiped=0;
-					static int errorSum=0;
-					static int worstQP=0;
-					static int worstRange=0;
-					static int worstDiff=0;
-					int diff= (f - *p);
-					int absDiff= ABS(diff);
-					int error= diff*diff;
-
-					if(x==1 || x==8 || y==1 || y==8) continue;
-
-					numSkiped++;
-					if(absDiff > worstDiff)
-					{
-						worstDiff= absDiff;
-						worstQP= QP;
-						worstRange= max-min;
-					}
-					errorSum+= error;
-
-					if(1024LL*1024LL*1024LL % numSkiped == 0)
-					{
-					}
-				}
-				}
-#endif
-				if     (*p + QP2 < f) *p= *p + QP2;
-				else if(*p - QP2 > f) *p= *p - QP2;
-				else *p=f;
-			}
-		}
-	}
-#ifdef DEBUG_DERING_THRESHOLD
-	if(max-min < 20)
-	{
-		for(y=1; y<9; y++)
-		{
-			int x;
-			int t = 0;
-			p= src + stride*y;
-			for(x=1; x<9; x++)
-			{
-				p++;
-				*p = MIN(*p + 20, 255);
-			}
-		}
-//		src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255;
-	}
-#endif
-#endif
-}
-#endif //HAVE_ALTIVEC
-
-/**
- * Deinterlaces the given block by linearly interpolating every second line.
- * will be called for every 8x8 block and can read & write from line 4-15
- * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
- * lines 4-12 will be read into the deblocking filter and should be deinterlaced
- */
-static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], stride_t stride)
-{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
-	src+= 4*stride;
-	asm volatile(
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_c"		\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	ecx	ecx+%1	ecx+2%1	%0+8%1	ecx+4%1
-
-		"movq (%0), %%mm0				\n\t"
-		"movq (%%"REG_a", %1), %%mm1			\n\t"
-		PAVGB(%%mm1, %%mm0)
-		"movq %%mm0, (%%"REG_a")			\n\t"
-		"movq (%0, %1, 4), %%mm0			\n\t"
-		PAVGB(%%mm0, %%mm1)
-		"movq %%mm1, (%%"REG_a", %1, 2)			\n\t"
-		"movq (%%"REG_c", %1), %%mm1			\n\t"
-		PAVGB(%%mm1, %%mm0)
-		"movq %%mm0, (%%"REG_c")			\n\t"
-		"movq (%0, %1, 8), %%mm0			\n\t"
-		PAVGB(%%mm0, %%mm1)
-		"movq %%mm1, (%%"REG_c", %1, 2)			\n\t"
-
-		: : "r" (src), "r" ((stride_t)stride)
-		: "%"REG_a, "%"REG_c
-	);
-#else
-	int a, b, x;
-	src+= 4*stride;
-
-	for(x=0; x<2; x++){
-		a= *(uint32_t*)&src[stride*0];
-		b= *(uint32_t*)&src[stride*2];
-		*(uint32_t*)&src[stride*1]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
-		a= *(uint32_t*)&src[stride*4];
-		*(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
-		b= *(uint32_t*)&src[stride*6];
-		*(uint32_t*)&src[stride*5]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
-		a= *(uint32_t*)&src[stride*8];
-		*(uint32_t*)&src[stride*7]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
-		src += 4;
-	}
-#endif
-}
-
-/**
- * Deinterlaces the given block by cubic interpolating every second line.
- * will be called for every 8x8 block and can read & write from line 4-15
- * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
- * lines 4-12 will be read into the deblocking filter and should be deinterlaced
- * this filter will read lines 3-15 and write 7-13
- */
-static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], stride_t stride)
-{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
-	src+= stride*3;
-	asm volatile(
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
-		"lea (%%"REG_d", %1, 4), %%"REG_c"		\n\t"
-		"add %1, %%"REG_c"				\n\t"
-		"pxor %%mm7, %%mm7				\n\t"
-//	0	1	2	3	4	5	6	7	8	9	10
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1 ecx
-
-#define REAL_DEINT_CUBIC(a,b,c,d,e)\
-		"movq " #a ", %%mm0				\n\t"\
-		"movq " #b ", %%mm1				\n\t"\
-		"movq " #d ", %%mm2				\n\t"\
-		"movq " #e ", %%mm3				\n\t"\
-		PAVGB(%%mm2, %%mm1)					/* (b+d) /2 */\
-		PAVGB(%%mm3, %%mm0)					/* a(a+e) /2 */\
-		"movq %%mm0, %%mm2				\n\t"\
-		"punpcklbw %%mm7, %%mm0				\n\t"\
-		"punpckhbw %%mm7, %%mm2				\n\t"\
-		"movq %%mm1, %%mm3				\n\t"\
-		"punpcklbw %%mm7, %%mm1				\n\t"\
-		"punpckhbw %%mm7, %%mm3				\n\t"\
-		"psubw %%mm1, %%mm0				\n\t"	/* L(a+e - (b+d))/2 */\
-		"psubw %%mm3, %%mm2				\n\t"	/* H(a+e - (b+d))/2 */\
-		"psraw $3, %%mm0				\n\t"	/* L(a+e - (b+d))/16 */\
-		"psraw $3, %%mm2				\n\t"	/* H(a+e - (b+d))/16 */\
-		"psubw %%mm0, %%mm1				\n\t"	/* L(9b + 9d - a - e)/16 */\
-		"psubw %%mm2, %%mm3				\n\t"	/* H(9b + 9d - a - e)/16 */\
-		"packuswb %%mm3, %%mm1				\n\t"\
-		"movq %%mm1, " #c "				\n\t"
-#define DEINT_CUBIC(a,b,c,d,e)  REAL_DEINT_CUBIC(a,b,c,d,e)
-
-DEINT_CUBIC((%0), (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4), (%%REGd, %1))
-DEINT_CUBIC((%%REGa, %1), (%0, %1, 4), (%%REGd), (%%REGd, %1), (%0, %1, 8))
-DEINT_CUBIC((%0, %1, 4), (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8), (%%REGc))
-DEINT_CUBIC((%%REGd, %1), (%0, %1, 8), (%%REGd, %1, 4), (%%REGc), (%%REGc, %1, 2))
-
-		: : "r" (src), "r" ((stride_t)stride)
-		: "%"REG_a, "%"REG_d, "%"REG_c
-	);
-#else
-	int x;
-	src+= stride*3;
-	for(x=0; x<8; x++)
-	{
-		src[stride*3] = CLIP((-src[0]        + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4);
-		src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4);
-		src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4);
-		src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
-		src++;
-	}
-#endif
-}
-
-/**
- * Deinterlaces the given block by filtering every second line with a (-1 4 2 4 -1) filter.
- * will be called for every 8x8 block and can read & write from line 4-15
- * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
- * lines 4-12 will be read into the deblocking filter and should be deinterlaced
- * this filter will read lines 4-13 and write 5-11
- */
-static inline void RENAME(deInterlaceFF)(uint8_t src[], stride_t stride, uint8_t *tmp)
-{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
-	src+= stride*4;
-	asm volatile(
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
-		"pxor %%mm7, %%mm7				\n\t"
-		"movq (%2), %%mm0				\n\t"
-//	0	1	2	3	4	5	6	7	8	9	10
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1 ecx
-
-#define REAL_DEINT_FF(a,b,c,d)\
-		"movq " #a ", %%mm1				\n\t"\
-		"movq " #b ", %%mm2				\n\t"\
-		"movq " #c ", %%mm3				\n\t"\
-		"movq " #d ", %%mm4				\n\t"\
-		PAVGB(%%mm3, %%mm1)					\
-		PAVGB(%%mm4, %%mm0)					\
-		"movq %%mm0, %%mm3				\n\t"\
-		"punpcklbw %%mm7, %%mm0				\n\t"\
-		"punpckhbw %%mm7, %%mm3				\n\t"\
-		"movq %%mm1, %%mm4				\n\t"\
-		"punpcklbw %%mm7, %%mm1				\n\t"\
-		"punpckhbw %%mm7, %%mm4				\n\t"\
-		"psllw $2, %%mm1				\n\t"\
-		"psllw $2, %%mm4				\n\t"\
-		"psubw %%mm0, %%mm1				\n\t"\
-		"psubw %%mm3, %%mm4				\n\t"\
-		"movq %%mm2, %%mm5				\n\t"\
-		"movq %%mm2, %%mm0				\n\t"\
-		"punpcklbw %%mm7, %%mm2				\n\t"\
-		"punpckhbw %%mm7, %%mm5				\n\t"\
-		"paddw %%mm2, %%mm1				\n\t"\
-		"paddw %%mm5, %%mm4				\n\t"\
-		"psraw $2, %%mm1				\n\t"\
-		"psraw $2, %%mm4				\n\t"\
-		"packuswb %%mm4, %%mm1				\n\t"\
-		"movq %%mm1, " #b "				\n\t"\
-
-#define DEINT_FF(a,b,c,d)  REAL_DEINT_FF(a,b,c,d)
-
-DEINT_FF((%0)       ,  (%%REGa)       , (%%REGa, %1), (%%REGa, %1, 2))
-DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4),  (%%REGd)       )
-DEINT_FF((%0, %1, 4),  (%%REGd)       , (%%REGd, %1), (%%REGd, %1, 2))
-DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8),  (%%REGd, %1, 4))
-
-		"movq %%mm0, (%2)				\n\t"
-		: : "r" (src), "r" ((stride_t)stride), "r"(tmp)
-		: "%"REG_a, "%"REG_d
-	);
-#else
-	int x;
-	src+= stride*4;
-	for(x=0; x<8; x++)
-	{
-		int t1= tmp[x];
-		int t2= src[stride*1];
-
-		src[stride*1]= CLIP((-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3);
-		t1= src[stride*4];
-		src[stride*3]= CLIP((-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3);
-		t2= src[stride*6];
-		src[stride*5]= CLIP((-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3);
-		t1= src[stride*8];
-		src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3);
-		tmp[x]= t1;
-
-		src++;
-	}
-#endif
-}
-
-/**
- * Deinterlaces the given block by filtering every line with a (-1 2 6 2 -1) filter.
- * will be called for every 8x8 block and can read & write from line 4-15
- * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
- * lines 4-12 will be read into the deblocking filter and should be deinterlaced
- * this filter will read lines 4-13 and write 4-11
- */
-static inline void RENAME(deInterlaceL5)(uint8_t src[], stride_t stride, uint8_t *tmp, uint8_t *tmp2)
-{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
-	src+= stride*4;
-	asm volatile(
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
-		"pxor %%mm7, %%mm7				\n\t"
-		"movq (%2), %%mm0				\n\t"
-		"movq (%3), %%mm1				\n\t"
-//	0	1	2	3	4	5	6	7	8	9	10
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1 ecx
-
-#define REAL_DEINT_L5(t1,t2,a,b,c)\
-		"movq " #a ", %%mm2				\n\t"\
-		"movq " #b ", %%mm3				\n\t"\
-		"movq " #c ", %%mm4				\n\t"\
-		PAVGB(t2, %%mm3)					\
-		PAVGB(t1, %%mm4)					\
-		"movq %%mm2, %%mm5				\n\t"\
-		"movq %%mm2, " #t1 "				\n\t"\
-		"punpcklbw %%mm7, %%mm2				\n\t"\
-		"punpckhbw %%mm7, %%mm5				\n\t"\
-		"movq %%mm2, %%mm6				\n\t"\
-		"paddw %%mm2, %%mm2				\n\t"\
-		"paddw %%mm6, %%mm2				\n\t"\
-		"movq %%mm5, %%mm6				\n\t"\
-		"paddw %%mm5, %%mm5				\n\t"\
-		"paddw %%mm6, %%mm5				\n\t"\
-		"movq %%mm3, %%mm6				\n\t"\
-		"punpcklbw %%mm7, %%mm3				\n\t"\
-		"punpckhbw %%mm7, %%mm6				\n\t"\
-		"paddw %%mm3, %%mm3				\n\t"\
-		"paddw %%mm6, %%mm6				\n\t"\
-		"paddw %%mm3, %%mm2				\n\t"\
-		"paddw %%mm6, %%mm5				\n\t"\
-		"movq %%mm4, %%mm6				\n\t"\
-		"punpcklbw %%mm7, %%mm4				\n\t"\
-		"punpckhbw %%mm7, %%mm6				\n\t"\
-		"psubw %%mm4, %%mm2				\n\t"\
-		"psubw %%mm6, %%mm5				\n\t"\
-		"psraw $2, %%mm2				\n\t"\
-		"psraw $2, %%mm5				\n\t"\
-		"packuswb %%mm5, %%mm2				\n\t"\
-		"movq %%mm2, " #a "				\n\t"\
-
-#define DEINT_L5(t1,t2,a,b,c)  REAL_DEINT_L5(t1,t2,a,b,c)
-
-DEINT_L5(%%mm0, %%mm1, (%0)           , (%%REGa)       , (%%REGa, %1)   )
-DEINT_L5(%%mm1, %%mm0, (%%REGa)       , (%%REGa, %1)   , (%%REGa, %1, 2))
-DEINT_L5(%%mm0, %%mm1, (%%REGa, %1)   , (%%REGa, %1, 2), (%0, %1, 4)   )
-DEINT_L5(%%mm1, %%mm0, (%%REGa, %1, 2), (%0, %1, 4)    , (%%REGd)       )
-DEINT_L5(%%mm0, %%mm1, (%0, %1, 4)    , (%%REGd)       , (%%REGd, %1)   )
-DEINT_L5(%%mm1, %%mm0, (%%REGd)       , (%%REGd, %1)   , (%%REGd, %1, 2))
-DEINT_L5(%%mm0, %%mm1, (%%REGd, %1)   , (%%REGd, %1, 2), (%0, %1, 8)   )
-DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8)    , (%%REGd, %1, 4))
-
-		"movq %%mm0, (%2)				\n\t"
-		"movq %%mm1, (%3)				\n\t"
-		: : "r" (src), "r" ((stride_t)stride), "r"(tmp), "r"(tmp2)
-		: "%"REG_a, "%"REG_d
-	);
-#else
-	int x;
-	src+= stride*4;
-	for(x=0; x<8; x++)
-	{
-		int t1= tmp[x];
-		int t2= tmp2[x];
-		int t3= src[0];
-
-		src[stride*0]= CLIP((-(t1 + src[stride*2]) + 2*(t2 + src[stride*1]) + 6*t3 + 4)>>3);
-		t1= src[stride*1];
-		src[stride*1]= CLIP((-(t2 + src[stride*3]) + 2*(t3 + src[stride*2]) + 6*t1 + 4)>>3);
-		t2= src[stride*2];
-		src[stride*2]= CLIP((-(t3 + src[stride*4]) + 2*(t1 + src[stride*3]) + 6*t2 + 4)>>3);
-		t3= src[stride*3];
-		src[stride*3]= CLIP((-(t1 + src[stride*5]) + 2*(t2 + src[stride*4]) + 6*t3 + 4)>>3);
-		t1= src[stride*4];
-		src[stride*4]= CLIP((-(t2 + src[stride*6]) + 2*(t3 + src[stride*5]) + 6*t1 + 4)>>3);
-		t2= src[stride*5];
-		src[stride*5]= CLIP((-(t3 + src[stride*7]) + 2*(t1 + src[stride*6]) + 6*t2 + 4)>>3);
-		t3= src[stride*6];
-		src[stride*6]= CLIP((-(t1 + src[stride*8]) + 2*(t2 + src[stride*7]) + 6*t3 + 4)>>3);
-		t1= src[stride*7];
-		src[stride*7]= CLIP((-(t2 + src[stride*9]) + 2*(t3 + src[stride*8]) + 6*t1 + 4)>>3);
-
-		tmp[x]= t3;
-		tmp2[x]= t1;
-
-		src++;
-	}
-#endif
-}
-
-/**
- * Deinterlaces the given block by filtering all lines with a (1 2 1) filter.
- * will be called for every 8x8 block and can read & write from line 4-15
- * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
- * lines 4-12 will be read into the deblocking filter and should be deinterlaced
- * this filter will read lines 4-13 and write 4-11
- */
-static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], stride_t stride, uint8_t *tmp)
-{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
-	src+= 4*stride;
-	asm volatile(
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1
-
-		"movq (%2), %%mm0				\n\t" // L0
-		"movq (%%"REG_a"), %%mm1			\n\t" // L2
-		PAVGB(%%mm1, %%mm0)				      // L0+L2
-		"movq (%0), %%mm2				\n\t" // L1
-		PAVGB(%%mm2, %%mm0)
-		"movq %%mm0, (%0)				\n\t"
-		"movq (%%"REG_a", %1), %%mm0			\n\t" // L3
-		PAVGB(%%mm0, %%mm2)				      // L1+L3
-		PAVGB(%%mm1, %%mm2)				      // 2L2 + L1 + L3
-		"movq %%mm2, (%%"REG_a")			\n\t"
-		"movq (%%"REG_a", %1, 2), %%mm2			\n\t" // L4
-		PAVGB(%%mm2, %%mm1)				      // L2+L4
-		PAVGB(%%mm0, %%mm1)				      // 2L3 + L2 + L4
-		"movq %%mm1, (%%"REG_a", %1)			\n\t"
-		"movq (%0, %1, 4), %%mm1			\n\t" // L5
-		PAVGB(%%mm1, %%mm0)				      // L3+L5
-		PAVGB(%%mm2, %%mm0)				      // 2L4 + L3 + L5
-		"movq %%mm0, (%%"REG_a", %1, 2)			\n\t"
-		"movq (%%"REG_d"), %%mm0			\n\t" // L6
-		PAVGB(%%mm0, %%mm2)				      // L4+L6
-		PAVGB(%%mm1, %%mm2)				      // 2L5 + L4 + L6
-		"movq %%mm2, (%0, %1, 4)			\n\t"
-		"movq (%%"REG_d", %1), %%mm2			\n\t" // L7
-		PAVGB(%%mm2, %%mm1)				      // L5+L7
-		PAVGB(%%mm0, %%mm1)				      // 2L6 + L5 + L7
-		"movq %%mm1, (%%"REG_d")			\n\t"
-		"movq (%%"REG_d", %1, 2), %%mm1			\n\t" // L8
-		PAVGB(%%mm1, %%mm0)				      // L6+L8
-		PAVGB(%%mm2, %%mm0)				      // 2L7 + L6 + L8
-		"movq %%mm0, (%%"REG_d", %1)			\n\t"
-		"movq (%0, %1, 8), %%mm0			\n\t" // L9
-		PAVGB(%%mm0, %%mm2)				      // L7+L9
-		PAVGB(%%mm1, %%mm2)				      // 2L8 + L7 + L9
-		"movq %%mm2, (%%"REG_d", %1, 2)			\n\t"
-		"movq %%mm1, (%2)				\n\t"
-
-		: : "r" (src), "r" ((stride_t)stride), "r" (tmp)
-		: "%"REG_a, "%"REG_d
-	);
-#else
-	int a, b, c, x;
-	src+= 4*stride;
-
-	for(x=0; x<2; x++){
-		a= *(uint32_t*)&tmp[stride*0];
-		b= *(uint32_t*)&src[stride*0];
-		c= *(uint32_t*)&src[stride*1];
-		a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
-		*(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
-
-		a= *(uint32_t*)&src[stride*2];
-		b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
-		*(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
-
-		b= *(uint32_t*)&src[stride*3];
-		c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
-		*(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
-
-		c= *(uint32_t*)&src[stride*4];
-		a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
-		*(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
-
-		a= *(uint32_t*)&src[stride*5];
-		b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
-		*(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
-
-		b= *(uint32_t*)&src[stride*6];
-		c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
-		*(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
-
-		c= *(uint32_t*)&src[stride*7];
-		a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
-		*(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
-
-		a= *(uint32_t*)&src[stride*8];
-		b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
-		*(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
-
-		*(uint32_t*)&tmp[stride*0]= c;
-		src += 4;
-		tmp += 4;
-	}
-#endif
-}
-
-/**
- * Deinterlaces the given block by applying a median filter to every second line.
- * will be called for every 8x8 block and can read & write from line 4-15,
- * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
- * lines 4-12 will be read into the deblocking filter and should be deinterlaced
- */
-static inline void RENAME(deInterlaceMedian)(uint8_t src[], stride_t stride)
-{
-#if HAVE_MMX
-	src+= 4*stride;
-#if HAVE_MMX2
-	asm volatile(
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1
-
-		"movq (%0), %%mm0				\n\t" //
-		"movq (%%"REG_a", %1), %%mm2			\n\t" //
-		"movq (%%"REG_a"), %%mm1			\n\t" //
-		"movq %%mm0, %%mm3				\n\t"
-		"pmaxub %%mm1, %%mm0				\n\t" //
-		"pminub %%mm3, %%mm1				\n\t" //
-		"pmaxub %%mm2, %%mm1				\n\t" //
-		"pminub %%mm1, %%mm0				\n\t"
-		"movq %%mm0, (%%"REG_a")			\n\t"
-
-		"movq (%0, %1, 4), %%mm0			\n\t" //
-		"movq (%%"REG_a", %1, 2), %%mm1			\n\t" //
-		"movq %%mm2, %%mm3				\n\t"
-		"pmaxub %%mm1, %%mm2				\n\t" //
-		"pminub %%mm3, %%mm1				\n\t" //
-		"pmaxub %%mm0, %%mm1				\n\t" //
-		"pminub %%mm1, %%mm2				\n\t"
-		"movq %%mm2, (%%"REG_a", %1, 2)			\n\t"
-
-		"movq (%%"REG_d"), %%mm2			\n\t" //
-		"movq (%%"REG_d", %1), %%mm1			\n\t" //
-		"movq %%mm2, %%mm3				\n\t"
-		"pmaxub %%mm0, %%mm2				\n\t" //
-		"pminub %%mm3, %%mm0				\n\t" //
-		"pmaxub %%mm1, %%mm0				\n\t" //
-		"pminub %%mm0, %%mm2				\n\t"
-		"movq %%mm2, (%%"REG_d")			\n\t"
-
-		"movq (%%"REG_d", %1, 2), %%mm2			\n\t" //
-		"movq (%0, %1, 8), %%mm0			\n\t" //
-		"movq %%mm2, %%mm3				\n\t"
-		"pmaxub %%mm0, %%mm2				\n\t" //
-		"pminub %%mm3, %%mm0				\n\t" //
-		"pmaxub %%mm1, %%mm0				\n\t" //
-		"pminub %%mm0, %%mm2				\n\t"
-		"movq %%mm2, (%%"REG_d", %1, 2)			\n\t"
-
-
-		: : "r" (src), "r" ((stride_t)stride)
-		: "%"REG_a, "%"REG_d
-	);
-
-#else // MMX without MMX2
-	asm volatile(
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1
-		"pxor %%mm7, %%mm7				\n\t"
-
-#define REAL_MEDIAN(a,b,c)\
-		"movq " #a ", %%mm0				\n\t"\
-		"movq " #b ", %%mm2				\n\t"\
-		"movq " #c ", %%mm1				\n\t"\
-		"movq %%mm0, %%mm3				\n\t"\
-		"movq %%mm1, %%mm4				\n\t"\
-		"movq %%mm2, %%mm5				\n\t"\
-		"psubusb %%mm1, %%mm3				\n\t"\
-		"psubusb %%mm2, %%mm4				\n\t"\
-		"psubusb %%mm0, %%mm5				\n\t"\
-		"pcmpeqb %%mm7, %%mm3				\n\t"\
-		"pcmpeqb %%mm7, %%mm4				\n\t"\
-		"pcmpeqb %%mm7, %%mm5				\n\t"\
-		"movq %%mm3, %%mm6				\n\t"\
-		"pxor %%mm4, %%mm3				\n\t"\
-		"pxor %%mm5, %%mm4				\n\t"\
-		"pxor %%mm6, %%mm5				\n\t"\
-		"por %%mm3, %%mm1				\n\t"\
-		"por %%mm4, %%mm2				\n\t"\
-		"por %%mm5, %%mm0				\n\t"\
-		"pand %%mm2, %%mm0				\n\t"\
-		"pand %%mm1, %%mm0				\n\t"\
-		"movq %%mm0, " #b "				\n\t"
-#define MEDIAN(a,b,c)  REAL_MEDIAN(a,b,c)
-
-MEDIAN((%0), (%%REGa), (%%REGa, %1))
-MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4))
-MEDIAN((%0, %1, 4), (%%REGd), (%%REGd, %1))
-MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
-
-		: : "r" (src), "r" ((stride_t)stride)
-		: "%"REG_a, "%"REG_d
-	);
-#endif // MMX
-#else
-	int x, y;
-	src+= 4*stride;
-	// FIXME - there should be a way to do a few columns in parallel like w/mmx
-	for(x=0; x<8; x++)
-	{
-		uint8_t *colsrc = src;
-		for (y=0; y<4; y++)
-		{
-			int a, b, c, d, e, f;
-			a = colsrc[0       ];
-			b = colsrc[stride  ];
-			c = colsrc[stride*2];
-			d = (a-b)>>31;
-			e = (b-c)>>31;
-			f = (c-a)>>31;
-			colsrc[stride  ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f));
-			colsrc += stride*2;
-		}
-		src++;
-	}
-#endif
-}
-
-#if HAVE_MMX
-/**
- * transposes and shift the given 8x8 Block into dst1 and dst2
- */
-static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride)
-{
-	asm(
-		"lea (%0, %1), %%"REG_a"	\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1
-		"movq (%0), %%mm0		\n\t" // 12345678
-		"movq (%%"REG_a"), %%mm1	\n\t" // abcdefgh
-		"movq %%mm0, %%mm2		\n\t" // 12345678
-		"punpcklbw %%mm1, %%mm0		\n\t" // 1a2b3c4d
-		"punpckhbw %%mm1, %%mm2		\n\t" // 5e6f7g8h
-
-		"movq (%%"REG_a", %1), %%mm1	\n\t"
-		"movq (%%"REG_a", %1, 2), %%mm3	\n\t"
-		"movq %%mm1, %%mm4		\n\t"
-		"punpcklbw %%mm3, %%mm1		\n\t"
-		"punpckhbw %%mm3, %%mm4		\n\t"
-
-		"movq %%mm0, %%mm3		\n\t"
-		"punpcklwd %%mm1, %%mm0		\n\t"
-		"punpckhwd %%mm1, %%mm3		\n\t"
-		"movq %%mm2, %%mm1		\n\t"
-		"punpcklwd %%mm4, %%mm2		\n\t"
-		"punpckhwd %%mm4, %%mm1		\n\t"
-
-		"movd %%mm0, 128(%2)		\n\t"
-		"psrlq $32, %%mm0		\n\t"
-		"movd %%mm0, 144(%2)		\n\t"
-		"movd %%mm3, 160(%2)		\n\t"
-		"psrlq $32, %%mm3		\n\t"
-		"movd %%mm3, 176(%2)		\n\t"
-		"movd %%mm3, 48(%3)		\n\t"
-		"movd %%mm2, 192(%2)		\n\t"
-		"movd %%mm2, 64(%3)		\n\t"
-		"psrlq $32, %%mm2		\n\t"
-		"movd %%mm2, 80(%3)		\n\t"
-		"movd %%mm1, 96(%3)		\n\t"
-		"psrlq $32, %%mm1		\n\t"
-		"movd %%mm1, 112(%3)		\n\t"
-
-		"lea (%%"REG_a", %1, 4), %%"REG_a"	\n\t"
-
-		"movq (%0, %1, 4), %%mm0	\n\t" // 12345678
-		"movq (%%"REG_a"), %%mm1	\n\t" // abcdefgh
-		"movq %%mm0, %%mm2		\n\t" // 12345678
-		"punpcklbw %%mm1, %%mm0		\n\t" // 1a2b3c4d
-		"punpckhbw %%mm1, %%mm2		\n\t" // 5e6f7g8h
-
-		"movq (%%"REG_a", %1), %%mm1	\n\t"
-		"movq (%%"REG_a", %1, 2), %%mm3	\n\t"
-		"movq %%mm1, %%mm4		\n\t"
-		"punpcklbw %%mm3, %%mm1		\n\t"
-		"punpckhbw %%mm3, %%mm4		\n\t"
-
-		"movq %%mm0, %%mm3		\n\t"
-		"punpcklwd %%mm1, %%mm0		\n\t"
-		"punpckhwd %%mm1, %%mm3		\n\t"
-		"movq %%mm2, %%mm1		\n\t"
-		"punpcklwd %%mm4, %%mm2		\n\t"
-		"punpckhwd %%mm4, %%mm1		\n\t"
-
-		"movd %%mm0, 132(%2)		\n\t"
-		"psrlq $32, %%mm0		\n\t"
-		"movd %%mm0, 148(%2)		\n\t"
-		"movd %%mm3, 164(%2)		\n\t"
-		"psrlq $32, %%mm3		\n\t"
-		"movd %%mm3, 180(%2)		\n\t"
-		"movd %%mm3, 52(%3)		\n\t"
-		"movd %%mm2, 196(%2)		\n\t"
-		"movd %%mm2, 68(%3)		\n\t"
-		"psrlq $32, %%mm2		\n\t"
-		"movd %%mm2, 84(%3)		\n\t"
-		"movd %%mm1, 100(%3)		\n\t"
-		"psrlq $32, %%mm1		\n\t"
-		"movd %%mm1, 116(%3)		\n\t"
-
-
-	:: "r" (src), "r" ((stride_t)srcStride), "r" (dst1), "r" (dst2)
-	: "%"REG_a
-	);
-}
-
-/**
- * transposes the given 8x8 block
- */
-static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src)
-{
-	asm(
-		"lea (%0, %1), %%"REG_a"	\n\t"
-		"lea (%%"REG_a",%1,4), %%"REG_d"\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1
-		"movq (%2), %%mm0		\n\t" // 12345678
-		"movq 16(%2), %%mm1		\n\t" // abcdefgh
-		"movq %%mm0, %%mm2		\n\t" // 12345678
-		"punpcklbw %%mm1, %%mm0		\n\t" // 1a2b3c4d
-		"punpckhbw %%mm1, %%mm2		\n\t" // 5e6f7g8h
-
-		"movq 32(%2), %%mm1		\n\t"
-		"movq 48(%2), %%mm3		\n\t"
-		"movq %%mm1, %%mm4		\n\t"
-		"punpcklbw %%mm3, %%mm1		\n\t"
-		"punpckhbw %%mm3, %%mm4		\n\t"
-
-		"movq %%mm0, %%mm3		\n\t"
-		"punpcklwd %%mm1, %%mm0		\n\t"
-		"punpckhwd %%mm1, %%mm3		\n\t"
-		"movq %%mm2, %%mm1		\n\t"
-		"punpcklwd %%mm4, %%mm2		\n\t"
-		"punpckhwd %%mm4, %%mm1		\n\t"
-
-		"movd %%mm0, (%0)		\n\t"
-		"psrlq $32, %%mm0		\n\t"
-		"movd %%mm0, (%%"REG_a")	\n\t"
-		"movd %%mm3, (%%"REG_a", %1)	\n\t"
-		"psrlq $32, %%mm3		\n\t"
-		"movd %%mm3, (%%"REG_a", %1, 2)	\n\t"
-		"movd %%mm2, (%0, %1, 4)	\n\t"
-		"psrlq $32, %%mm2		\n\t"
-		"movd %%mm2, (%%"REG_d")	\n\t"
-		"movd %%mm1, (%%"REG_d", %1)	\n\t"
-		"psrlq $32, %%mm1		\n\t"
-		"movd %%mm1, (%%"REG_d", %1, 2)	\n\t"
-
-
-		"movq 64(%2), %%mm0		\n\t" // 12345678
-		"movq 80(%2), %%mm1		\n\t" // abcdefgh
-		"movq %%mm0, %%mm2		\n\t" // 12345678
-		"punpcklbw %%mm1, %%mm0		\n\t" // 1a2b3c4d
-		"punpckhbw %%mm1, %%mm2		\n\t" // 5e6f7g8h
-
-		"movq 96(%2), %%mm1		\n\t"
-		"movq 112(%2), %%mm3		\n\t"
-		"movq %%mm1, %%mm4		\n\t"
-		"punpcklbw %%mm3, %%mm1		\n\t"
-		"punpckhbw %%mm3, %%mm4		\n\t"
-
-		"movq %%mm0, %%mm3		\n\t"
-		"punpcklwd %%mm1, %%mm0		\n\t"
-		"punpckhwd %%mm1, %%mm3		\n\t"
-		"movq %%mm2, %%mm1		\n\t"
-		"punpcklwd %%mm4, %%mm2		\n\t"
-		"punpckhwd %%mm4, %%mm1		\n\t"
-
-		"movd %%mm0, 4(%0)		\n\t"
-		"psrlq $32, %%mm0		\n\t"
-		"movd %%mm0, 4(%%"REG_a")		\n\t"
-		"movd %%mm3, 4(%%"REG_a", %1)	\n\t"
-		"psrlq $32, %%mm3		\n\t"
-		"movd %%mm3, 4(%%"REG_a", %1, 2)	\n\t"
-		"movd %%mm2, 4(%0, %1, 4)	\n\t"
-		"psrlq $32, %%mm2		\n\t"
-		"movd %%mm2, 4(%%"REG_d")		\n\t"
-		"movd %%mm1, 4(%%"REG_d", %1)	\n\t"
-		"psrlq $32, %%mm1		\n\t"
-		"movd %%mm1, 4(%%"REG_d", %1, 2)	\n\t"
-
-	:: "r" (dst), "r" ((stride_t)dstStride), "r" (src)
-	: "%"REG_a, "%"REG_d
-	);
-}
-#endif
-//static long test=0;
-
-#ifndef HAVE_ALTIVEC
-static inline void RENAME(tempNoiseReducer)(uint8_t *src, stride_t stride,
-				    uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
-{
-	// to save a register (FIXME do this outside of the loops)
-	tempBluredPast[127]= maxNoise[0];
-	tempBluredPast[128]= maxNoise[1];
-	tempBluredPast[129]= maxNoise[2];
-
-#define FAST_L2_DIFF
-//#define L1_DIFF //u should change the thresholds too if u try that one
-#if HAVE_MMX2 || HAVE_AMD3DNOW
-	asm volatile(
-		"lea (%2, %2, 2), %%"REG_a"			\n\t" // 3*stride
-		"lea (%2, %2, 4), %%"REG_d"			\n\t" // 5*stride
-		"lea (%%"REG_d", %2, 2), %%"REG_c"		\n\t" // 7*stride
-//	0	1	2	3	4	5	6	7	8	9
-//	%x	%x+%2	%x+2%2	%x+eax	%x+4%2	%x+edx	%x+2eax	%x+ecx	%x+8%2
-//FIXME reorder?
-#ifdef L1_DIFF //needs mmx2
-		"movq (%0), %%mm0				\n\t" // L0
-		"psadbw (%1), %%mm0				\n\t" // |L0-R0|
-		"movq (%0, %2), %%mm1				\n\t" // L1
-		"psadbw (%1, %2), %%mm1				\n\t" // |L1-R1|
-		"movq (%0, %2, 2), %%mm2			\n\t" // L2
-		"psadbw (%1, %2, 2), %%mm2			\n\t" // |L2-R2|
-		"movq (%0, %%"REG_a"), %%mm3			\n\t" // L3
-		"psadbw (%1, %%"REG_a"), %%mm3			\n\t" // |L3-R3|
-
-		"movq (%0, %2, 4), %%mm4			\n\t" // L4
-		"paddw %%mm1, %%mm0				\n\t"
-		"psadbw (%1, %2, 4), %%mm4			\n\t" // |L4-R4|
-		"movq (%0, %%"REG_d"), %%mm5			\n\t" // L5
-		"paddw %%mm2, %%mm0				\n\t"
-		"psadbw (%1, %%"REG_d"), %%mm5			\n\t" // |L5-R5|
-		"movq (%0, %%"REG_a", 2), %%mm6			\n\t" // L6
-		"paddw %%mm3, %%mm0				\n\t"
-		"psadbw (%1, %%"REG_a", 2), %%mm6		\n\t" // |L6-R6|
-		"movq (%0, %%"REG_c"), %%mm7			\n\t" // L7
-		"paddw %%mm4, %%mm0				\n\t"
-		"psadbw (%1, %%"REG_c"), %%mm7			\n\t" // |L7-R7|
-		"paddw %%mm5, %%mm6				\n\t"
-		"paddw %%mm7, %%mm6				\n\t"
-		"paddw %%mm6, %%mm0				\n\t"
-#else
-#if defined (FAST_L2_DIFF)
-		"pcmpeqb %%mm7, %%mm7				\n\t"
-		"movq "MANGLE(b80)", %%mm6			\n\t"
-		"pxor %%mm0, %%mm0				\n\t"
-#define REAL_L2_DIFF_CORE(a, b)\
-		"movq " #a ", %%mm5				\n\t"\
-		"movq " #b ", %%mm2				\n\t"\
-		"pxor %%mm7, %%mm2				\n\t"\
-		PAVGB(%%mm2, %%mm5)\
-		"paddb %%mm6, %%mm5				\n\t"\
-		"movq %%mm5, %%mm2				\n\t"\
-		"psllw $8, %%mm5				\n\t"\
-		"pmaddwd %%mm5, %%mm5				\n\t"\
-		"pmaddwd %%mm2, %%mm2				\n\t"\
-		"paddd %%mm2, %%mm5				\n\t"\
-		"psrld $14, %%mm5				\n\t"\
-		"paddd %%mm5, %%mm0				\n\t"
-
-#else
-		"pxor %%mm7, %%mm7				\n\t"
-		"pxor %%mm0, %%mm0				\n\t"
-#define REAL_L2_DIFF_CORE(a, b)\
-		"movq " #a ", %%mm5				\n\t"\
-		"movq " #b ", %%mm2				\n\t"\
-		"movq %%mm5, %%mm1				\n\t"\
-		"movq %%mm2, %%mm3				\n\t"\
-		"punpcklbw %%mm7, %%mm5				\n\t"\
-		"punpckhbw %%mm7, %%mm1				\n\t"\
-		"punpcklbw %%mm7, %%mm2				\n\t"\
-		"punpckhbw %%mm7, %%mm3				\n\t"\
-		"psubw %%mm2, %%mm5				\n\t"\
-		"psubw %%mm3, %%mm1				\n\t"\
-		"pmaddwd %%mm5, %%mm5				\n\t"\
-		"pmaddwd %%mm1, %%mm1				\n\t"\
-		"paddd %%mm1, %%mm5				\n\t"\
-		"paddd %%mm5, %%mm0				\n\t"
-
-#endif
-
-#define L2_DIFF_CORE(a, b)  REAL_L2_DIFF_CORE(a, b)
-
-L2_DIFF_CORE((%0), (%1))
-L2_DIFF_CORE((%0, %2), (%1, %2))
-L2_DIFF_CORE((%0, %2, 2), (%1, %2, 2))
-L2_DIFF_CORE((%0, %%REGa), (%1, %%REGa))
-L2_DIFF_CORE((%0, %2, 4), (%1, %2, 4))
-L2_DIFF_CORE((%0, %%REGd), (%1, %%REGd))
-L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2))
-L2_DIFF_CORE((%0, %%REGc), (%1, %%REGc))
-
-#endif
-
-		"movq %%mm0, %%mm4				\n\t"
-		"psrlq $32, %%mm0				\n\t"
-		"paddd %%mm0, %%mm4				\n\t"
-		"movd %%mm4, %%ecx				\n\t"
-		"shll $2, %%ecx					\n\t"
-		"mov %3, %%"REG_d"				\n\t"
-		"addl -4(%%"REG_d"), %%ecx			\n\t"
-		"addl 4(%%"REG_d"), %%ecx			\n\t"
-		"addl -1024(%%"REG_d"), %%ecx			\n\t"
-		"addl $4, %%ecx					\n\t"
-		"addl 1024(%%"REG_d"), %%ecx			\n\t"
-		"shrl $3, %%ecx					\n\t"
-		"movl %%ecx, (%%"REG_d")			\n\t"
-
-//		"mov %3, %%"REG_c"				\n\t"
-//		"mov %%"REG_c", test				\n\t"
-//		"jmp 4f \n\t"
-		"cmpl 512(%%"REG_d"), %%ecx			\n\t"
-		" jb 2f						\n\t"
-		"cmpl 516(%%"REG_d"), %%ecx			\n\t"
-		" jb 1f						\n\t"
-
-		"lea (%%"REG_a", %2, 2), %%"REG_d"		\n\t" // 5*stride
-		"lea (%%"REG_d", %2, 2), %%"REG_c"		\n\t" // 7*stride
-		"movq (%0), %%mm0				\n\t" // L0
-		"movq (%0, %2), %%mm1				\n\t" // L1
-		"movq (%0, %2, 2), %%mm2			\n\t" // L2
-		"movq (%0, %%"REG_a"), %%mm3			\n\t" // L3
-		"movq (%0, %2, 4), %%mm4			\n\t" // L4
-		"movq (%0, %%"REG_d"), %%mm5			\n\t" // L5
-		"movq (%0, %%"REG_a", 2), %%mm6			\n\t" // L6
-		"movq (%0, %%"REG_c"), %%mm7			\n\t" // L7
-		"movq %%mm0, (%1)				\n\t" // L0
-		"movq %%mm1, (%1, %2)				\n\t" // L1
-		"movq %%mm2, (%1, %2, 2)			\n\t" // L2
-		"movq %%mm3, (%1, %%"REG_a")			\n\t" // L3
-		"movq %%mm4, (%1, %2, 4)			\n\t" // L4
-		"movq %%mm5, (%1, %%"REG_d")			\n\t" // L5
-		"movq %%mm6, (%1, %%"REG_a", 2)			\n\t" // L6
-		"movq %%mm7, (%1, %%"REG_c")			\n\t" // L7
-		"jmp 4f						\n\t"
-
-		"1:						\n\t"
-		"lea (%%"REG_a", %2, 2), %%"REG_d"		\n\t" // 5*stride
-		"lea (%%"REG_d", %2, 2), %%"REG_c"		\n\t" // 7*stride
-		"movq (%0), %%mm0				\n\t" // L0
-		PAVGB((%1), %%mm0)				      // L0
-		"movq (%0, %2), %%mm1				\n\t" // L1
-		PAVGB((%1, %2), %%mm1)				      // L1
-		"movq (%0, %2, 2), %%mm2			\n\t" // L2
-		PAVGB((%1, %2, 2), %%mm2)			      // L2
-		"movq (%0, %%"REG_a"), %%mm3			\n\t" // L3
-		PAVGB((%1, %%REGa), %%mm3)			      // L3
-		"movq (%0, %2, 4), %%mm4			\n\t" // L4
-		PAVGB((%1, %2, 4), %%mm4)			      // L4
-		"movq (%0, %%"REG_d"), %%mm5			\n\t" // L5
-		PAVGB((%1, %%REGd), %%mm5)			      // L5
-		"movq (%0, %%"REG_a", 2), %%mm6			\n\t" // L6
-		PAVGB((%1, %%REGa, 2), %%mm6)			      // L6
-		"movq (%0, %%"REG_c"), %%mm7			\n\t" // L7
-		PAVGB((%1, %%REGc), %%mm7)			      // L7
-		"movq %%mm0, (%1)				\n\t" // R0
-		"movq %%mm1, (%1, %2)				\n\t" // R1
-		"movq %%mm2, (%1, %2, 2)			\n\t" // R2
-		"movq %%mm3, (%1, %%"REG_a")			\n\t" // R3
-		"movq %%mm4, (%1, %2, 4)			\n\t" // R4
-		"movq %%mm5, (%1, %%"REG_d")			\n\t" // R5
-		"movq %%mm6, (%1, %%"REG_a", 2)			\n\t" // R6
-		"movq %%mm7, (%1, %%"REG_c")			\n\t" // R7
-		"movq %%mm0, (%0)				\n\t" // L0
-		"movq %%mm1, (%0, %2)				\n\t" // L1
-		"movq %%mm2, (%0, %2, 2)			\n\t" // L2
-		"movq %%mm3, (%0, %%"REG_a")			\n\t" // L3
-		"movq %%mm4, (%0, %2, 4)			\n\t" // L4
-		"movq %%mm5, (%0, %%"REG_d")			\n\t" // L5
-		"movq %%mm6, (%0, %%"REG_a", 2)			\n\t" // L6
-		"movq %%mm7, (%0, %%"REG_c")			\n\t" // L7
-		"jmp 4f						\n\t"
-
-		"2:						\n\t"
-		"cmpl 508(%%"REG_d"), %%ecx			\n\t"
-		" jb 3f						\n\t"
-
-		"lea (%%"REG_a", %2, 2), %%"REG_d"		\n\t" // 5*stride
-		"lea (%%"REG_d", %2, 2), %%"REG_c"		\n\t" // 7*stride
-		"movq (%0), %%mm0				\n\t" // L0
-		"movq (%0, %2), %%mm1				\n\t" // L1
-		"movq (%0, %2, 2), %%mm2			\n\t" // L2
-		"movq (%0, %%"REG_a"), %%mm3			\n\t" // L3
-		"movq (%1), %%mm4				\n\t" // R0
-		"movq (%1, %2), %%mm5				\n\t" // R1
-		"movq (%1, %2, 2), %%mm6			\n\t" // R2
-		"movq (%1, %%"REG_a"), %%mm7			\n\t" // R3
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		"movq %%mm0, (%1)				\n\t" // R0
-		"movq %%mm1, (%1, %2)				\n\t" // R1
-		"movq %%mm2, (%1, %2, 2)			\n\t" // R2
-		"movq %%mm3, (%1, %%"REG_a")			\n\t" // R3
-		"movq %%mm0, (%0)				\n\t" // L0
-		"movq %%mm1, (%0, %2)				\n\t" // L1
-		"movq %%mm2, (%0, %2, 2)			\n\t" // L2
-		"movq %%mm3, (%0, %%"REG_a")			\n\t" // L3
-
-		"movq (%0, %2, 4), %%mm0			\n\t" // L4
-		"movq (%0, %%"REG_d"), %%mm1			\n\t" // L5
-		"movq (%0, %%"REG_a", 2), %%mm2			\n\t" // L6
-		"movq (%0, %%"REG_c"), %%mm3			\n\t" // L7
-		"movq (%1, %2, 4), %%mm4			\n\t" // R4
-		"movq (%1, %%"REG_d"), %%mm5			\n\t" // R5
-		"movq (%1, %%"REG_a", 2), %%mm6			\n\t" // R6
-		"movq (%1, %%"REG_c"), %%mm7			\n\t" // R7
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		"movq %%mm0, (%1, %2, 4)			\n\t" // R4
-		"movq %%mm1, (%1, %%"REG_d")			\n\t" // R5
-		"movq %%mm2, (%1, %%"REG_a", 2)			\n\t" // R6
-		"movq %%mm3, (%1, %%"REG_c")			\n\t" // R7
-		"movq %%mm0, (%0, %2, 4)			\n\t" // L4
-		"movq %%mm1, (%0, %%"REG_d")			\n\t" // L5
-		"movq %%mm2, (%0, %%"REG_a", 2)			\n\t" // L6
-		"movq %%mm3, (%0, %%"REG_c")			\n\t" // L7
-		"jmp 4f						\n\t"
-
-		"3:						\n\t"
-		"lea (%%"REG_a", %2, 2), %%"REG_d"		\n\t" // 5*stride
-		"lea (%%"REG_d", %2, 2), %%"REG_c"		\n\t" // 7*stride
-		"movq (%0), %%mm0				\n\t" // L0
-		"movq (%0, %2), %%mm1				\n\t" // L1
-		"movq (%0, %2, 2), %%mm2			\n\t" // L2
-		"movq (%0, %%"REG_a"), %%mm3			\n\t" // L3
-		"movq (%1), %%mm4				\n\t" // R0
-		"movq (%1, %2), %%mm5				\n\t" // R1
-		"movq (%1, %2, 2), %%mm6			\n\t" // R2
-		"movq (%1, %%"REG_a"), %%mm7			\n\t" // R3
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		"movq %%mm0, (%1)				\n\t" // R0
-		"movq %%mm1, (%1, %2)				\n\t" // R1
-		"movq %%mm2, (%1, %2, 2)			\n\t" // R2
-		"movq %%mm3, (%1, %%"REG_a")			\n\t" // R3
-		"movq %%mm0, (%0)				\n\t" // L0
-		"movq %%mm1, (%0, %2)				\n\t" // L1
-		"movq %%mm2, (%0, %2, 2)			\n\t" // L2
-		"movq %%mm3, (%0, %%"REG_a")			\n\t" // L3
-
-		"movq (%0, %2, 4), %%mm0			\n\t" // L4
-		"movq (%0, %%"REG_d"), %%mm1			\n\t" // L5
-		"movq (%0, %%"REG_a", 2), %%mm2			\n\t" // L6
-		"movq (%0, %%"REG_c"), %%mm3			\n\t" // L7
-		"movq (%1, %2, 4), %%mm4			\n\t" // R4
-		"movq (%1, %%"REG_d"), %%mm5			\n\t" // R5
-		"movq (%1, %%"REG_a", 2), %%mm6			\n\t" // R6
-		"movq (%1, %%"REG_c"), %%mm7			\n\t" // R7
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		"movq %%mm0, (%1, %2, 4)			\n\t" // R4
-		"movq %%mm1, (%1, %%"REG_d")			\n\t" // R5
-		"movq %%mm2, (%1, %%"REG_a", 2)			\n\t" // R6
-		"movq %%mm3, (%1, %%"REG_c")			\n\t" // R7
-		"movq %%mm0, (%0, %2, 4)			\n\t" // L4
-		"movq %%mm1, (%0, %%"REG_d")			\n\t" // L5
-		"movq %%mm2, (%0, %%"REG_a", 2)			\n\t" // L6
-		"movq %%mm3, (%0, %%"REG_c")			\n\t" // L7
-
-		"4:						\n\t"
-
-		:: "r" (src), "r" (tempBlured), "r"((stride_t)stride), "m" (tempBluredPast)
-		: "%"REG_a, "%"REG_d, "%"REG_c, "memory"
-		);
-//printf("%d\n", test);
-#else
-{
-	int y;
-	int d=0;
-//	int sysd=0;
-	int i;
-
-	for(y=0; y<8; y++)
-	{
-		int x;
-		for(x=0; x<8; x++)
-		{
-			int ref= tempBlured[ x + y*stride ];
-			int cur= src[ x + y*stride ];
-			int d1=ref - cur;
-//			if(x==0 || x==7) d1+= d1>>1;
-//			if(y==0 || y==7) d1+= d1>>1;
-//			d+= ABS(d1);
-			d+= d1*d1;
-//			sysd+= d1;
-		}
-	}
-	i=d;
-	d= 	(
-		4*d
-		+(*(tempBluredPast-256))
-		+(*(tempBluredPast-1))+ (*(tempBluredPast+1))
-		+(*(tempBluredPast+256))
-		+4)>>3;
-	*tempBluredPast=i;
-//	((*tempBluredPast)*3 + d + 2)>>2;
-
-//printf("%d %d %d\n", maxNoise[0], maxNoise[1], maxNoise[2]);
-/*
-Switch between
- 1  0  0  0  0  0  0  (0)
-64 32 16  8  4  2  1  (1)
-64 48 36 27 20 15 11 (33) (approx)
-64 56 49 43 37 33 29 (200) (approx)
-*/
-	if(d > maxNoise[1])
-	{
-		if(d < maxNoise[2])
-		{
-			for(y=0; y<8; y++)
-			{
-				int x;
-				for(x=0; x<8; x++)
-				{
-					int ref= tempBlured[ x + y*stride ];
-					int cur= src[ x + y*stride ];
-					tempBlured[ x + y*stride ]=
-					src[ x + y*stride ]=
-						(ref + cur + 1)>>1;
-				}
-			}
-		}
-		else
-		{
-			for(y=0; y<8; y++)
-			{
-				int x;
-				for(x=0; x<8; x++)
-				{
-					tempBlured[ x + y*stride ]= src[ x + y*stride ];
-				}
-			}
-		}
-	}
-	else
-	{
-		if(d < maxNoise[0])
-		{
-			for(y=0; y<8; y++)
-			{
-				int x;
-				for(x=0; x<8; x++)
-				{
-					int ref= tempBlured[ x + y*stride ];
-					int cur= src[ x + y*stride ];
-					tempBlured[ x + y*stride ]=
-					src[ x + y*stride ]=
-						(ref*7 + cur + 4)>>3;
-				}
-			}
-		}
-		else
-		{
-			for(y=0; y<8; y++)
-			{
-				int x;
-				for(x=0; x<8; x++)
-				{
-					int ref= tempBlured[ x + y*stride ];
-					int cur= src[ x + y*stride ];
-					tempBlured[ x + y*stride ]=
-					src[ x + y*stride ]=
-						(ref*3 + cur + 2)>>2;
-				}
-			}
-		}
-	}
-}
-#endif
-}
-#endif //HAVE_ALTIVEC
-
-#if HAVE_MMX
-/**
- * accurate deblock filter
- */
-static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){
-	int64_t dc_mask, eq_mask, both_masks;
-	int64_t sums[10*8*2];
-	src+= step*3; // src points to begin of the 8x8 Block
-//START_TIMER
-asm volatile(
-		"movq %0, %%mm7					\n\t"
-		"movq %1, %%mm6					\n\t"
-                : : "m" (c->mmxDcOffset[c->nonBQP]),  "m" (c->mmxDcThreshold[c->nonBQP])
-                );
-
-asm volatile(
-		"lea (%2, %3), %%"REG_a"			\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%1	eax	eax+%2	eax+2%2	%1+4%2	ecx	ecx+%2	ecx+2%2	%1+8%2	ecx+4%2
-
-		"movq (%2), %%mm0				\n\t"
-		"movq (%%"REG_a"), %%mm1			\n\t"
-                "movq %%mm1, %%mm3				\n\t"
-                "movq %%mm1, %%mm4				\n\t"
-		"psubb %%mm1, %%mm0				\n\t" // mm0 = differnece
-		"paddb %%mm7, %%mm0				\n\t"
-		"pcmpgtb %%mm6, %%mm0				\n\t"
-
-		"movq (%%"REG_a",%3), %%mm2			\n\t"
-                PMAXUB(%%mm2, %%mm4)
-                PMINUB(%%mm2, %%mm3, %%mm5)
-		"psubb %%mm2, %%mm1				\n\t"
-		"paddb %%mm7, %%mm1				\n\t"
-		"pcmpgtb %%mm6, %%mm1				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
-
-		"movq (%%"REG_a", %3, 2), %%mm1			\n\t"
-                PMAXUB(%%mm1, %%mm4)
-                PMINUB(%%mm1, %%mm3, %%mm5)
-		"psubb %%mm1, %%mm2				\n\t"
-		"paddb %%mm7, %%mm2				\n\t"
-		"pcmpgtb %%mm6, %%mm2				\n\t"
-		"paddb %%mm2, %%mm0				\n\t"
-
-		"lea (%%"REG_a", %3, 4), %%"REG_a"		\n\t"
-
-		"movq (%2, %3, 4), %%mm2			\n\t"
-                PMAXUB(%%mm2, %%mm4)
-                PMINUB(%%mm2, %%mm3, %%mm5)
-		"psubb %%mm2, %%mm1				\n\t"
-		"paddb %%mm7, %%mm1				\n\t"
-		"pcmpgtb %%mm6, %%mm1				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
-
-		"movq (%%"REG_a"), %%mm1			\n\t"
-                PMAXUB(%%mm1, %%mm4)
-                PMINUB(%%mm1, %%mm3, %%mm5)
-		"psubb %%mm1, %%mm2				\n\t"
-		"paddb %%mm7, %%mm2				\n\t"
-		"pcmpgtb %%mm6, %%mm2				\n\t"
-		"paddb %%mm2, %%mm0				\n\t"
-
-		"movq (%%"REG_a", %3), %%mm2			\n\t"
-                PMAXUB(%%mm2, %%mm4)
-                PMINUB(%%mm2, %%mm3, %%mm5)
-		"psubb %%mm2, %%mm1				\n\t"
-		"paddb %%mm7, %%mm1				\n\t"
-		"pcmpgtb %%mm6, %%mm1				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
-
-		"movq (%%"REG_a", %3, 2), %%mm1			\n\t"
-                PMAXUB(%%mm1, %%mm4)
-                PMINUB(%%mm1, %%mm3, %%mm5)
-		"psubb %%mm1, %%mm2				\n\t"
-		"paddb %%mm7, %%mm2				\n\t"
-		"pcmpgtb %%mm6, %%mm2				\n\t"
-		"paddb %%mm2, %%mm0				\n\t"
-
-		"movq (%2, %3, 8), %%mm2			\n\t"
-                PMAXUB(%%mm2, %%mm4)
-                PMINUB(%%mm2, %%mm3, %%mm5)
-		"psubb %%mm2, %%mm1				\n\t"
-		"paddb %%mm7, %%mm1				\n\t"
-		"pcmpgtb %%mm6, %%mm1				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
-
-		"movq (%%"REG_a", %3, 4), %%mm1			\n\t"
-		"psubb %%mm1, %%mm2				\n\t"
-		"paddb %%mm7, %%mm2				\n\t"
-		"pcmpgtb %%mm6, %%mm2				\n\t"
-		"paddb %%mm2, %%mm0				\n\t"
-		"psubusb %%mm3, %%mm4				\n\t"
-
-		"pxor %%mm6, %%mm6				\n\t"
-                "movq %4, %%mm7					\n\t" // QP,..., QP
-		"paddusb %%mm7, %%mm7				\n\t" // 2QP ... 2QP
-		"psubusb %%mm4, %%mm7				\n\t" // Diff >=2QP -> 0
-		"pcmpeqb %%mm6, %%mm7				\n\t" // Diff < 2QP -> 0
-		"pcmpeqb %%mm6, %%mm7				\n\t" // Diff < 2QP -> 0
-		"movq %%mm7, %1					\n\t"
-
-		"movq %5, %%mm7					\n\t"
-		"punpcklbw %%mm7, %%mm7				\n\t"
-		"punpcklbw %%mm7, %%mm7				\n\t"
-		"punpcklbw %%mm7, %%mm7				\n\t"
-		"psubb %%mm0, %%mm6				\n\t"
-		"pcmpgtb %%mm7, %%mm6				\n\t"
-		"movq %%mm6, %0					\n\t"
-
-		: "=m" (eq_mask), "=m" (dc_mask)
-		: "r" (src), "r" ((stride_t)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold)
-		: "%"REG_a
-		);
-
-	both_masks = dc_mask & eq_mask;
-
-	if(both_masks){
-		stride_t offset= -8*step;
-		int64_t *temp_sums= sums;
-
-		asm volatile(
-		"movq %2, %%mm0					\n\t"  // QP,..., QP
-		"pxor %%mm4, %%mm4				\n\t"
-
-		"movq (%0), %%mm6				\n\t"
-		"movq (%0, %1), %%mm5				\n\t"
-		"movq %%mm5, %%mm1				\n\t"
-		"movq %%mm6, %%mm2				\n\t"
-		"psubusb %%mm6, %%mm5				\n\t"
-		"psubusb %%mm1, %%mm2				\n\t"
-		"por %%mm5, %%mm2				\n\t" // ABS Diff of lines
-		"psubusb %%mm2, %%mm0				\n\t" // diff >= QP -> 0
-		"pcmpeqb %%mm4, %%mm0				\n\t" // diff >= QP -> FF
-
-		"pxor %%mm6, %%mm1				\n\t"
-		"pand %%mm0, %%mm1				\n\t"
-		"pxor %%mm1, %%mm6				\n\t"
-		// 0:QP  6:First
-
-		"movq (%0, %1, 8), %%mm5			\n\t"
-		"add %1, %0					\n\t" // %0 points to line 1 not 0
-		"movq (%0, %1, 8), %%mm7			\n\t"
-		"movq %%mm5, %%mm1				\n\t"
-		"movq %%mm7, %%mm2				\n\t"
-		"psubusb %%mm7, %%mm5				\n\t"
-		"psubusb %%mm1, %%mm2				\n\t"
-		"por %%mm5, %%mm2				\n\t" // ABS Diff of lines
-		"movq %2, %%mm0					\n\t"  // QP,..., QP
-		"psubusb %%mm2, %%mm0				\n\t" // diff >= QP -> 0
-		"pcmpeqb %%mm4, %%mm0				\n\t" // diff >= QP -> FF
-
-		"pxor %%mm7, %%mm1				\n\t"
-		"pand %%mm0, %%mm1				\n\t"
-		"pxor %%mm1, %%mm7				\n\t"
-
-		"movq %%mm6, %%mm5				\n\t"
-		"punpckhbw %%mm4, %%mm6				\n\t"
-		"punpcklbw %%mm4, %%mm5				\n\t"
-		// 4:0 5/6:First 7:Last
-
-		"movq %%mm5, %%mm0				\n\t"
-		"movq %%mm6, %%mm1				\n\t"
-		"psllw $2, %%mm0				\n\t"
-		"psllw $2, %%mm1				\n\t"
-		"paddw "MANGLE(w04)", %%mm0			\n\t"
-		"paddw "MANGLE(w04)", %%mm1			\n\t"
-
-#define NEXT\
-		"movq (%0), %%mm2				\n\t"\
-		"movq (%0), %%mm3				\n\t"\
-		"add %1, %0					\n\t"\
-		"punpcklbw %%mm4, %%mm2				\n\t"\
-		"punpckhbw %%mm4, %%mm3				\n\t"\
-		"paddw %%mm2, %%mm0				\n\t"\
-		"paddw %%mm3, %%mm1				\n\t"
-
-#define PREV\
-		"movq (%0), %%mm2				\n\t"\
-		"movq (%0), %%mm3				\n\t"\
-		"add %1, %0					\n\t"\
-		"punpcklbw %%mm4, %%mm2				\n\t"\
-		"punpckhbw %%mm4, %%mm3				\n\t"\
-		"psubw %%mm2, %%mm0				\n\t"\
-		"psubw %%mm3, %%mm1				\n\t"
-
-
-		NEXT //0
-		NEXT //1
-		NEXT //2
-		"movq %%mm0, (%3)				\n\t"
-		"movq %%mm1, 8(%3)				\n\t"
-
-		NEXT //3
-		"psubw %%mm5, %%mm0				\n\t"
-		"psubw %%mm6, %%mm1				\n\t"
-		"movq %%mm0, 16(%3)				\n\t"
-		"movq %%mm1, 24(%3)				\n\t"
-
-		NEXT //4
-		"psubw %%mm5, %%mm0				\n\t"
-		"psubw %%mm6, %%mm1				\n\t"
-		"movq %%mm0, 32(%3)				\n\t"
-		"movq %%mm1, 40(%3)				\n\t"
-
-		NEXT //5
-		"psubw %%mm5, %%mm0				\n\t"
-		"psubw %%mm6, %%mm1				\n\t"
-		"movq %%mm0, 48(%3)				\n\t"
-		"movq %%mm1, 56(%3)				\n\t"
-
-		NEXT //6
-		"psubw %%mm5, %%mm0				\n\t"
-		"psubw %%mm6, %%mm1				\n\t"
-		"movq %%mm0, 64(%3)				\n\t"
-		"movq %%mm1, 72(%3)				\n\t"
-
-		"movq %%mm7, %%mm6				\n\t"
-		"punpckhbw %%mm4, %%mm7				\n\t"
-		"punpcklbw %%mm4, %%mm6				\n\t"
-
-		NEXT //7
-		"mov %4, %0					\n\t"
-		"add %1, %0					\n\t"
-		PREV //0
-		"movq %%mm0, 80(%3)				\n\t"
-		"movq %%mm1, 88(%3)				\n\t"
-
-		PREV //1
-		"paddw %%mm6, %%mm0				\n\t"
-		"paddw %%mm7, %%mm1				\n\t"
-		"movq %%mm0, 96(%3)				\n\t"
-		"movq %%mm1, 104(%3)				\n\t"
-
-		PREV //2
-		"paddw %%mm6, %%mm0				\n\t"
-		"paddw %%mm7, %%mm1				\n\t"
-		"movq %%mm0, 112(%3)				\n\t"
-		"movq %%mm1, 120(%3)				\n\t"
-
-		PREV //3
-		"paddw %%mm6, %%mm0				\n\t"
-		"paddw %%mm7, %%mm1				\n\t"
-		"movq %%mm0, 128(%3)				\n\t"
-		"movq %%mm1, 136(%3)				\n\t"
-
-		PREV //4
-		"paddw %%mm6, %%mm0				\n\t"
-		"paddw %%mm7, %%mm1				\n\t"
-		"movq %%mm0, 144(%3)				\n\t"
-		"movq %%mm1, 152(%3)				\n\t"
-
-		"mov %4, %0					\n\t" //FIXME
-
-		: "+&r"(src)
-		: "r" ((stride_t)step), "m" (c->pQPb), "r"(sums), "g"(src)
-		);
-
-		src+= step; // src points to begin of the 8x8 Block
-
-		asm volatile(
-		"movq %4, %%mm6					\n\t"
-		"pcmpeqb %%mm5, %%mm5				\n\t"
-		"pxor %%mm6, %%mm5				\n\t"
-		"pxor %%mm7, %%mm7				\n\t"
-
-		"1:						\n\t"
-		"movq (%1), %%mm0				\n\t"
-		"movq 8(%1), %%mm1				\n\t"
-		"paddw 32(%1), %%mm0				\n\t"
-		"paddw 40(%1), %%mm1				\n\t"
-		"movq (%0, %3), %%mm2				\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"movq %%mm2, %%mm4				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t"
-		"punpckhbw %%mm7, %%mm3				\n\t"
-		"paddw %%mm2, %%mm0				\n\t"
-		"paddw %%mm3, %%mm1				\n\t"
-		"paddw %%mm2, %%mm0				\n\t"
-		"paddw %%mm3, %%mm1				\n\t"
-		"psrlw $4, %%mm0				\n\t"
-		"psrlw $4, %%mm1				\n\t"
-		"packuswb %%mm1, %%mm0				\n\t"
-		"pand %%mm6, %%mm0				\n\t"
-		"pand %%mm5, %%mm4				\n\t"
-		"por %%mm4, %%mm0				\n\t"
-		"movq %%mm0, (%0, %3)				\n\t"
-		"add $16, %1					\n\t"
-		"add %2, %0					\n\t"
-		" js 1b						\n\t"
-
-		: "+r"(offset), "+r"(temp_sums)
-		: "r" ((stride_t)step), "r"(src - offset), "m"(both_masks)
-		);
-	}else
-		src+= step; // src points to begin of the 8x8 Block
-
-	if(eq_mask != -1LL){
-		uint8_t *temp_src= src;
-		asm volatile(
-		"pxor %%mm7, %%mm7				\n\t"
-		"lea -40(%%"REG_SP"), %%"REG_c"			\n\t" // make space for 4 8-byte vars
-		"and "ALIGN_MASK", %%"REG_c"			\n\t" // align
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	ecx	ecx+%1	ecx+2%1	%1+8%1	ecx+4%1
-
-		"movq (%0), %%mm0				\n\t"
-		"movq %%mm0, %%mm1				\n\t"
-		"punpcklbw %%mm7, %%mm0				\n\t" // low part of line 0
-		"punpckhbw %%mm7, %%mm1				\n\t" // high part of line 0
-
-		"movq (%0, %1), %%mm2				\n\t"
-		"lea (%0, %1, 2), %%"REG_a"			\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t" // low part of line 1
-		"punpckhbw %%mm7, %%mm3				\n\t" // high part of line 1
-
-		"movq (%%"REG_a"), %%mm4			\n\t"
-		"movq %%mm4, %%mm5				\n\t"
-		"punpcklbw %%mm7, %%mm4				\n\t" // low part of line 2
-		"punpckhbw %%mm7, %%mm5				\n\t" // high part of line 2
-
-		"paddw %%mm0, %%mm0				\n\t" // 2L0
-		"paddw %%mm1, %%mm1				\n\t" // 2H0
-		"psubw %%mm4, %%mm2				\n\t" // L1 - L2
-		"psubw %%mm5, %%mm3				\n\t" // H1 - H2
-		"psubw %%mm2, %%mm0				\n\t" // 2L0 - L1 + L2
-		"psubw %%mm3, %%mm1				\n\t" // 2H0 - H1 + H2
-
-		"psllw $2, %%mm2				\n\t" // 4L1 - 4L2
-		"psllw $2, %%mm3				\n\t" // 4H1 - 4H2
-		"psubw %%mm2, %%mm0				\n\t" // 2L0 - 5L1 + 5L2
-		"psubw %%mm3, %%mm1				\n\t" // 2H0 - 5H1 + 5H2
-
-		"movq (%%"REG_a", %1), %%mm2			\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t" // L3
-		"punpckhbw %%mm7, %%mm3				\n\t" // H3
-
-		"psubw %%mm2, %%mm0				\n\t" // 2L0 - 5L1 + 5L2 - L3
-		"psubw %%mm3, %%mm1				\n\t" // 2H0 - 5H1 + 5H2 - H3
-		"psubw %%mm2, %%mm0				\n\t" // 2L0 - 5L1 + 5L2 - 2L3
-		"psubw %%mm3, %%mm1				\n\t" // 2H0 - 5H1 + 5H2 - 2H3
-		"movq %%mm0, (%%"REG_c")			\n\t" // 2L0 - 5L1 + 5L2 - 2L3
-		"movq %%mm1, 8(%%"REG_c")			\n\t" // 2H0 - 5H1 + 5H2 - 2H3
-
-		"movq (%%"REG_a", %1, 2), %%mm0			\n\t"
-		"movq %%mm0, %%mm1				\n\t"
-		"punpcklbw %%mm7, %%mm0				\n\t" // L4
-		"punpckhbw %%mm7, %%mm1				\n\t" // H4
-
-		"psubw %%mm0, %%mm2				\n\t" // L3 - L4
-		"psubw %%mm1, %%mm3				\n\t" // H3 - H4
-		"movq %%mm2, 16(%%"REG_c")			\n\t" // L3 - L4
-		"movq %%mm3, 24(%%"REG_c")			\n\t" // H3 - H4
-		"paddw %%mm4, %%mm4				\n\t" // 2L2
-		"paddw %%mm5, %%mm5				\n\t" // 2H2
-		"psubw %%mm2, %%mm4				\n\t" // 2L2 - L3 + L4
-		"psubw %%mm3, %%mm5				\n\t" // 2H2 - H3 + H4
-
-		"lea (%%"REG_a", %1), %0			\n\t"
-		"psllw $2, %%mm2				\n\t" // 4L3 - 4L4
-		"psllw $2, %%mm3				\n\t" // 4H3 - 4H4
-		"psubw %%mm2, %%mm4				\n\t" // 2L2 - 5L3 + 5L4
-		"psubw %%mm3, %%mm5				\n\t" // 2H2 - 5H3 + 5H4
-//50 opcodes so far
-		"movq (%0, %1, 2), %%mm2			\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t" // L5
-		"punpckhbw %%mm7, %%mm3				\n\t" // H5
-		"psubw %%mm2, %%mm4				\n\t" // 2L2 - 5L3 + 5L4 - L5
-		"psubw %%mm3, %%mm5				\n\t" // 2H2 - 5H3 + 5H4 - H5
-		"psubw %%mm2, %%mm4				\n\t" // 2L2 - 5L3 + 5L4 - 2L5
-		"psubw %%mm3, %%mm5				\n\t" // 2H2 - 5H3 + 5H4 - 2H5
-
-		"movq (%%"REG_a", %1, 4), %%mm6			\n\t"
-		"punpcklbw %%mm7, %%mm6				\n\t" // L6
-		"psubw %%mm6, %%mm2				\n\t" // L5 - L6
-		"movq (%%"REG_a", %1, 4), %%mm6			\n\t"
-		"punpckhbw %%mm7, %%mm6				\n\t" // H6
-		"psubw %%mm6, %%mm3				\n\t" // H5 - H6
-
-		"paddw %%mm0, %%mm0				\n\t" // 2L4
-		"paddw %%mm1, %%mm1				\n\t" // 2H4
-		"psubw %%mm2, %%mm0				\n\t" // 2L4 - L5 + L6
-		"psubw %%mm3, %%mm1				\n\t" // 2H4 - H5 + H6
-
-		"psllw $2, %%mm2				\n\t" // 4L5 - 4L6
-		"psllw $2, %%mm3				\n\t" // 4H5 - 4H6
-		"psubw %%mm2, %%mm0				\n\t" // 2L4 - 5L5 + 5L6
-		"psubw %%mm3, %%mm1				\n\t" // 2H4 - 5H5 + 5H6
-
-		"movq (%0, %1, 4), %%mm2			\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t" // L7
-		"punpckhbw %%mm7, %%mm3				\n\t" // H7
-
-		"paddw %%mm2, %%mm2				\n\t" // 2L7
-		"paddw %%mm3, %%mm3				\n\t" // 2H7
-		"psubw %%mm2, %%mm0				\n\t" // 2L4 - 5L5 + 5L6 - 2L7
-		"psubw %%mm3, %%mm1				\n\t" // 2H4 - 5H5 + 5H6 - 2H7
-
-		"movq (%%"REG_c"), %%mm2			\n\t" // 2L0 - 5L1 + 5L2 - 2L3
-		"movq 8(%%"REG_c"), %%mm3			\n\t" // 2H0 - 5H1 + 5H2 - 2H3
-
-#if HAVE_MMX2
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"psubw %%mm0, %%mm6				\n\t"
-		"pmaxsw %%mm6, %%mm0				\n\t" // |2L4 - 5L5 + 5L6 - 2L7|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"psubw %%mm1, %%mm6				\n\t"
-		"pmaxsw %%mm6, %%mm1				\n\t" // |2H4 - 5H5 + 5H6 - 2H7|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"psubw %%mm2, %%mm6				\n\t"
-		"pmaxsw %%mm6, %%mm2				\n\t" // |2L0 - 5L1 + 5L2 - 2L3|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"psubw %%mm3, %%mm6				\n\t"
-		"pmaxsw %%mm6, %%mm3				\n\t" // |2H0 - 5H1 + 5H2 - 2H3|
-#else
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm0, %%mm6				\n\t"
-		"pxor %%mm6, %%mm0				\n\t"
-		"psubw %%mm6, %%mm0				\n\t" // |2L4 - 5L5 + 5L6 - 2L7|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm1, %%mm6				\n\t"
-		"pxor %%mm6, %%mm1				\n\t"
-		"psubw %%mm6, %%mm1				\n\t" // |2H4 - 5H5 + 5H6 - 2H7|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm2, %%mm6				\n\t"
-		"pxor %%mm6, %%mm2				\n\t"
-		"psubw %%mm6, %%mm2				\n\t" // |2L0 - 5L1 + 5L2 - 2L3|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm3, %%mm6				\n\t"
-		"pxor %%mm6, %%mm3				\n\t"
-		"psubw %%mm6, %%mm3				\n\t" // |2H0 - 5H1 + 5H2 - 2H3|
-#endif
-
-#if HAVE_MMX2
-		"pminsw %%mm2, %%mm0				\n\t"
-		"pminsw %%mm3, %%mm1				\n\t"
-#else
-		"movq %%mm0, %%mm6				\n\t"
-		"psubusw %%mm2, %%mm6				\n\t"
-		"psubw %%mm6, %%mm0				\n\t"
-		"movq %%mm1, %%mm6				\n\t"
-		"psubusw %%mm3, %%mm6				\n\t"
-		"psubw %%mm6, %%mm1				\n\t"
-#endif
-
-		"movd %2, %%mm2					\n\t" // QP
-		"punpcklbw %%mm7, %%mm2				\n\t"
-
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm4, %%mm6				\n\t" // sign(2L2 - 5L3 + 5L4 - 2L5)
-		"pxor %%mm6, %%mm4				\n\t"
-		"psubw %%mm6, %%mm4				\n\t" // |2L2 - 5L3 + 5L4 - 2L5|
-		"pcmpgtw %%mm5, %%mm7				\n\t" // sign(2H2 - 5H3 + 5H4 - 2H5)
-		"pxor %%mm7, %%mm5				\n\t"
-		"psubw %%mm7, %%mm5				\n\t" // |2H2 - 5H3 + 5H4 - 2H5|
-// 100 opcodes
-		"psllw $3, %%mm2				\n\t" // 8QP
-		"movq %%mm2, %%mm3				\n\t" // 8QP
-		"pcmpgtw %%mm4, %%mm2				\n\t"
-		"pcmpgtw %%mm5, %%mm3				\n\t"
-		"pand %%mm2, %%mm4				\n\t"
-		"pand %%mm3, %%mm5				\n\t"
-
-
-		"psubusw %%mm0, %%mm4				\n\t" // hd
-		"psubusw %%mm1, %%mm5				\n\t" // ld
-
-
-		"movq "MANGLE(w05)", %%mm2			\n\t" // 5
-		"pmullw %%mm2, %%mm4				\n\t"
-		"pmullw %%mm2, %%mm5				\n\t"
-		"movq "MANGLE(w20)", %%mm2			\n\t" // 32
-		"paddw %%mm2, %%mm4				\n\t"
-		"paddw %%mm2, %%mm5				\n\t"
-		"psrlw $6, %%mm4				\n\t"
-		"psrlw $6, %%mm5				\n\t"
-
-		"movq 16(%%"REG_c"), %%mm0			\n\t" // L3 - L4
-		"movq 24(%%"REG_c"), %%mm1			\n\t" // H3 - H4
-
-		"pxor %%mm2, %%mm2				\n\t"
-		"pxor %%mm3, %%mm3				\n\t"
-
-		"pcmpgtw %%mm0, %%mm2				\n\t" // sign (L3-L4)
-		"pcmpgtw %%mm1, %%mm3				\n\t" // sign (H3-H4)
-		"pxor %%mm2, %%mm0				\n\t"
-		"pxor %%mm3, %%mm1				\n\t"
-		"psubw %%mm2, %%mm0				\n\t" // |L3-L4|
-		"psubw %%mm3, %%mm1				\n\t" // |H3-H4|
-		"psrlw $1, %%mm0				\n\t" // |L3 - L4|/2
-		"psrlw $1, %%mm1				\n\t" // |H3 - H4|/2
-
-		"pxor %%mm6, %%mm2				\n\t"
-		"pxor %%mm7, %%mm3				\n\t"
-		"pand %%mm2, %%mm4				\n\t"
-		"pand %%mm3, %%mm5				\n\t"
-
-#if HAVE_MMX2
-		"pminsw %%mm0, %%mm4				\n\t"
-		"pminsw %%mm1, %%mm5				\n\t"
-#else
-		"movq %%mm4, %%mm2				\n\t"
-		"psubusw %%mm0, %%mm2				\n\t"
-		"psubw %%mm2, %%mm4				\n\t"
-		"movq %%mm5, %%mm2				\n\t"
-		"psubusw %%mm1, %%mm2				\n\t"
-		"psubw %%mm2, %%mm5				\n\t"
-#endif
-		"pxor %%mm6, %%mm4				\n\t"
-		"pxor %%mm7, %%mm5				\n\t"
-		"psubw %%mm6, %%mm4				\n\t"
-		"psubw %%mm7, %%mm5				\n\t"
-		"packsswb %%mm5, %%mm4				\n\t"
-		"movq %3, %%mm1					\n\t"
-		"pandn %%mm4, %%mm1				\n\t"
-		"movq (%0), %%mm0				\n\t"
-		"paddb   %%mm1, %%mm0				\n\t"
-		"movq %%mm0, (%0)				\n\t"
-		"movq (%0, %1), %%mm0				\n\t"
-		"psubb %%mm1, %%mm0				\n\t"
-		"movq %%mm0, (%0, %1)				\n\t"
-
-		: "+r" (temp_src)
-		: "r" ((stride_t)step), "m" (c->pQPb), "m"(eq_mask)
-		: "%"REG_a, "%"REG_c
-		);
-	}
-/*if(step==16){
-    STOP_TIMER("step16")
-}else{
-    STOP_TIMER("stepX")
-}*/
-}
-#endif //HAVE_MMX
-
-static void RENAME(postProcess)(uint8_t src[], stride_t srcStride, uint8_t dst[], stride_t dstStride, int width, int height,
-	QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
-
-/**
- * Copies a block from src to dst and fixes the blacklevel
- * levelFix == 0 -> dont touch the brighness & contrast
- */
-#undef SCALED_CPY
-
-static inline void RENAME(blockCopy)(uint8_t dst[], stride_t dstStride, uint8_t src[], stride_t srcStride,
-	int levelFix, int64_t *packedOffsetAndScale)
-{
-#if !HAVE_MMX
-	int i;
-#endif
-	if(levelFix)
-	{
-#if HAVE_MMX
-					asm volatile(
-						"movq (%%"REG_a"), %%mm2	\n\t" // packedYOffset
-						"movq 8(%%"REG_a"), %%mm3	\n\t" // packedYScale
-						"lea (%2,%4), %%"REG_a"	\n\t"
-						"lea (%3,%5), %%"REG_d"	\n\t"
-						"pxor %%mm4, %%mm4	\n\t"
-#if HAVE_MMX2
-#define REAL_SCALED_CPY(src1, src2, dst1, dst2)					\
-						"movq " #src1 ", %%mm0	\n\t"\
-						"movq " #src1 ", %%mm5	\n\t"\
-						"movq " #src2 ", %%mm1	\n\t"\
-						"movq " #src2 ", %%mm6	\n\t"\
-						"punpcklbw %%mm0, %%mm0 \n\t"\
-						"punpckhbw %%mm5, %%mm5 \n\t"\
-						"punpcklbw %%mm1, %%mm1 \n\t"\
-						"punpckhbw %%mm6, %%mm6 \n\t"\
-						"pmulhuw %%mm3, %%mm0	\n\t"\
-						"pmulhuw %%mm3, %%mm5	\n\t"\
-						"pmulhuw %%mm3, %%mm1	\n\t"\
-						"pmulhuw %%mm3, %%mm6	\n\t"\
-						"psubw %%mm2, %%mm0	\n\t"\
-						"psubw %%mm2, %%mm5	\n\t"\
-						"psubw %%mm2, %%mm1	\n\t"\
-						"psubw %%mm2, %%mm6	\n\t"\
-						"packuswb %%mm5, %%mm0	\n\t"\
-						"packuswb %%mm6, %%mm1	\n\t"\
-						"movq %%mm0, " #dst1 "	\n\t"\
-						"movq %%mm1, " #dst2 "	\n\t"\
-
-#else //HAVE_MMX2
-#define REAL_SCALED_CPY(src1, src2, dst1, dst2)					\
-						"movq " #src1 ", %%mm0	\n\t"\
-						"movq " #src1 ", %%mm5	\n\t"\
-						"punpcklbw %%mm4, %%mm0 \n\t"\
-						"punpckhbw %%mm4, %%mm5 \n\t"\
-						"psubw %%mm2, %%mm0	\n\t"\
-						"psubw %%mm2, %%mm5	\n\t"\
-						"movq " #src2 ", %%mm1	\n\t"\
-						"psllw $6, %%mm0	\n\t"\
-						"psllw $6, %%mm5	\n\t"\
-						"pmulhw %%mm3, %%mm0	\n\t"\
-						"movq " #src2 ", %%mm6	\n\t"\
-						"pmulhw %%mm3, %%mm5	\n\t"\
-						"punpcklbw %%mm4, %%mm1 \n\t"\
-						"punpckhbw %%mm4, %%mm6 \n\t"\
-						"psubw %%mm2, %%mm1	\n\t"\
-						"psubw %%mm2, %%mm6	\n\t"\
-						"psllw $6, %%mm1	\n\t"\
-						"psllw $6, %%mm6	\n\t"\
-						"pmulhw %%mm3, %%mm1	\n\t"\
-						"pmulhw %%mm3, %%mm6	\n\t"\
-						"packuswb %%mm5, %%mm0	\n\t"\
-						"packuswb %%mm6, %%mm1	\n\t"\
-						"movq %%mm0, " #dst1 "	\n\t"\
-						"movq %%mm1, " #dst2 "	\n\t"\
-
-#endif //!HAVE_MMX2
-#define SCALED_CPY(src1, src2, dst1, dst2)\
-   REAL_SCALED_CPY(src1, src2, dst1, dst2)
-
-SCALED_CPY((%2)       , (%2, %4)      , (%3)       , (%3, %5))
-SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2))
-SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4))
-						"lea (%%"REG_a",%4,4), %%"REG_a"	\n\t"
-						"lea (%%"REG_d",%5,4), %%"REG_d"	\n\t"
-SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
-
-
-						: "=&a" (packedOffsetAndScale)
-						: "0" (packedOffsetAndScale),
-						"r"(src),
-						"r"(dst),
-						"r" ((stride_t)srcStride),
-						"r" ((stride_t)dstStride)
-						: "%"REG_d
-					);
-#else
-				for(i=0; i<8; i++)
-					memcpy(	&(dst[dstStride*i]),
-						&(src[srcStride*i]), BLOCK_SIZE);
-#endif
-	}
-	else
-	{
-#if HAVE_MMX
-					asm volatile(
-						"lea (%0,%2), %%"REG_a"	\n\t"
-						"lea (%1,%3), %%"REG_d"	\n\t"
-
-#define REAL_SIMPLE_CPY(src1, src2, dst1, dst2)				\
-						"movq " #src1 ", %%mm0	\n\t"\
-						"movq " #src2 ", %%mm1	\n\t"\
-						"movq %%mm0, " #dst1 "	\n\t"\
-						"movq %%mm1, " #dst2 "	\n\t"\
-
-#define SIMPLE_CPY(src1, src2, dst1, dst2)\
-   REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
-
-SIMPLE_CPY((%0)       , (%0, %2)      , (%1)       , (%1, %3))
-SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2))
-SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4))
-						"lea (%%"REG_a",%2,4), %%"REG_a"	\n\t"
-						"lea (%%"REG_d",%3,4), %%"REG_d"	\n\t"
-SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
-
-						: : "r" (src),
-						"r" (dst),
-						"r" ((stride_t)srcStride),
-						"r" ((stride_t)dstStride)
-						: "%"REG_a, "%"REG_d
-					);
-#else
-				for(i=0; i<8; i++)
-					memcpy(	&(dst[dstStride*i]),
-						&(src[srcStride*i]), BLOCK_SIZE);
-#endif
-	}
-}
-
-/**
- * Duplicates the given 8 src pixels ? times upward
- */
-static inline void RENAME(duplicate)(uint8_t src[], stride_t stride)
-{
-#if HAVE_MMX
-	asm volatile(
-		"movq (%0), %%mm0		\n\t"
-		"add %1, %0			\n\t"
-		"movq %%mm0, (%0)		\n\t"
-		"movq %%mm0, (%0, %1)		\n\t"
-		"movq %%mm0, (%0, %1, 2)	\n\t"
-		: "+r" (src)
-		: "r" ((stride_t)-stride)
-	);
-#else
-	int i;
-	uint8_t *p=src;
-	for(i=0; i<3; i++)
-	{
-		p-= stride;
-		memcpy(p, src, 8);
-	}
-#endif
-}
-
-/**
- * Filters array of bytes (Y or U or V values)
- */
-static void RENAME(postProcess)(uint8_t src[], stride_t srcStride, uint8_t dst[], stride_t dstStride, int width, int height,
-	QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2)
-{
-	PPContext __attribute__((aligned(8))) c= *c2; //copy to stack for faster access
-	int x,y;
-#ifdef COMPILE_TIME_MODE
-	const int mode= COMPILE_TIME_MODE;
-#else
-	const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode;
-#endif
-	int black=0, white=255; // blackest black and whitest white in the picture
-	int QPCorrecture= 256*256;
-
-	stride_t copyAhead;
-#if HAVE_MMX
-	int i;
-#endif
-
-	const int qpHShift= isColor ? 4-c.hChromaSubSample : 4;
-	const int qpVShift= isColor ? 4-c.vChromaSubSample : 4;
-
-	//FIXME remove
-	uint64_t * const yHistogram= c.yHistogram;
-	uint8_t * const tempSrc= c.tempSrc+24*width;
-	uint8_t * const tempDst= c.tempDst+24*width;
-	//const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;
-
-#if HAVE_MMX
-	for(i=0; i<57; i++){
-		int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
-		int threshold= offset*2 + 1;
-		c.mmxDcOffset[i]= 0x7F - offset;
-		c.mmxDcThreshold[i]= 0x7F - threshold;
-		c.mmxDcOffset[i]*= 0x0101010101010101LL;
-		c.mmxDcThreshold[i]*= 0x0101010101010101LL;
-	}
-#endif
-
-	if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16;
-	else if(   (mode & LINEAR_BLEND_DEINT_FILTER)
-		|| (mode & FFMPEG_DEINT_FILTER)
-		|| (mode & LOWPASS5_DEINT_FILTER)) copyAhead=14;
-	else if(   (mode & V_DEBLOCK)
-		|| (mode & LINEAR_IPOL_DEINT_FILTER)
-		|| (mode & MEDIAN_DEINT_FILTER)
-		|| (mode & V_A_DEBLOCK)) copyAhead=13;
-	else if(mode & V_X1_FILTER) copyAhead=11;
-//	else if(mode & V_RK1_FILTER) copyAhead=10;
-	else if(mode & DERING) copyAhead=9;
-	else copyAhead=8;
-
-	copyAhead-= 8;
-
-	if(!isColor)
-	{
-		uint64_t sum= 0;
-		int i;
-		uint64_t maxClipped;
-		uint64_t clipped;
-		double scale;
-
-		c.frameNum++;
-		// first frame is fscked so we ignore it
-		if(c.frameNum == 1) yHistogram[0]= width*height/64*15/256;
-
-		for(i=0; i<256; i++)
-		{
-			sum+= yHistogram[i];
-//			printf("%d ", yHistogram[i]);
-		}
-//		printf("\n\n");
-
-		/* we allways get a completly black picture first */
-		maxClipped= (uint64_t)(sum * c.ppMode.maxClippedThreshold);
-
-		clipped= sum;
-		for(black=255; black>0; black--)
-		{
-			if(clipped < maxClipped) break;
-			clipped-= yHistogram[black];
-		}
-
-		clipped= sum;
-		for(white=0; white<256; white++)
-		{
-			if(clipped < maxClipped) break;
-			clipped-= yHistogram[white];
-		}
-
-		scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black);
-
-#if HAVE_MMX2
-		c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
-		c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
-#else
-		c.packedYScale= (uint16_t)(scale*1024.0 + 0.5);
-		c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF;
-#endif
-
-		c.packedYOffset|= c.packedYOffset<<32;
-		c.packedYOffset|= c.packedYOffset<<16;
-
-		c.packedYScale|= c.packedYScale<<32;
-		c.packedYScale|= c.packedYScale<<16;
-
-		if(mode & LEVEL_FIX)	QPCorrecture= (int)(scale*256*256 + 0.5);
-		else			QPCorrecture= 256*256;
-	}
-	else
-	{
-		c.packedYScale= 0x0100010001000100LL;
-		c.packedYOffset= 0;
-		QPCorrecture= 256*256;
-	}
-
-	/* copy & deinterlace first row of blocks */
-	y=-BLOCK_SIZE;
-	{
-		uint8_t *srcBlock= &(src[y*srcStride]);
-		uint8_t *dstBlock= tempDst + dstStride;
-
-		// From this point on it is guranteed that we can read and write 16 lines downward
-		// finish 1 block before the next otherwise we might have a problem
-		// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
-		for(x=0; x<width; x+=BLOCK_SIZE)
-		{
-
-#if HAVE_MMX2
-/*
-			prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
-			prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
-			prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
-			prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
-*/
-
-			asm(
-				"mov %4, %%"REG_a"		\n\t"
-				"shr $2, %%"REG_a"		\n\t"
-				"and $6, %%"REG_a"		\n\t"
-				"add %5, %%"REG_a"		\n\t"
-				"mov %%"REG_a", %%"REG_d"	\n\t"
-				"imul %1, %%"REG_a"		\n\t"
-				"imul %3, %%"REG_d"		\n\t"
-				"prefetchnta 32(%%"REG_a", %0)	\n\t"
-				"prefetcht0 32(%%"REG_d", %2)	\n\t"
-				"add %1, %%"REG_a"		\n\t"
-				"add %3, %%"REG_d"		\n\t"
-				"prefetchnta 32(%%"REG_a", %0)	\n\t"
-				"prefetcht0 32(%%"REG_d", %2)	\n\t"
-			:: "r" (srcBlock), "r" ((stride_t)srcStride), "r" (dstBlock), "r" ((stride_t)dstStride),
-			"g" ((stride_t)x), "g" (copyAhead)
-			: "%"REG_a, "%"REG_d
-			);
-
-#elif HAVE_AMD3DNOW
-//FIXME check if this is faster on an 3dnow chip or if its faster without the prefetch or ...
-/*			prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
-			prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
-			prefetchw(dstBlock + (((x>>3)&3) + 5)*dstStride + 32);
-			prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32);
-*/
-#endif
-
-			RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
-				srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
-
-			RENAME(duplicate)(dstBlock + dstStride*8, dstStride);
-
-			if(mode & LINEAR_IPOL_DEINT_FILTER)
-				RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
-			else if(mode & LINEAR_BLEND_DEINT_FILTER)
-				RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
-			else if(mode & MEDIAN_DEINT_FILTER)
-				RENAME(deInterlaceMedian)(dstBlock, dstStride);
-			else if(mode & CUBIC_IPOL_DEINT_FILTER)
-				RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
-			else if(mode & FFMPEG_DEINT_FILTER)
-				RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
-			else if(mode & LOWPASS5_DEINT_FILTER)
-				RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
-/*			else if(mode & CUBIC_BLEND_DEINT_FILTER)
-				RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
-*/
-			dstBlock+=8;
-			srcBlock+=8;
-		}
-		if(width==dstStride)
-		memcpy(dst, tempDst + 9*dstStride, copyAhead*dstStride );
-		else
-		{
-			int i;
-			for(i=0; i<copyAhead; i++)
-			{
-				memcpy(dst + i*dstStride, tempDst + (9+i)*dstStride, width);
-			}
-		}
-	}
-
-//printf("\n");
-	for(y=0; y<height; y+=BLOCK_SIZE)
-	{
-		//1% speedup if these are here instead of the inner loop
-		uint8_t *srcBlock= &(src[y*srcStride]);
-		uint8_t *dstBlock= &(dst[y*dstStride]);
-#if HAVE_MMX
-		uint8_t *tempBlock1= c.tempBlocks;
-		uint8_t *tempBlock2= c.tempBlocks + 8;
-#endif
-		int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
-		int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*QPStride];
-		int QP=0;
-		/* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
-		   if not than use a temporary buffer */
-		if(y+15 >= height)
-		{
-			int i;
-			/* copy from line (copyAhead) to (copyAhead+7) of src, these will be copied with
-			   blockcopy to dst later */
-			for(i=0;i<MAX(height-y-copyAhead, 0);i++)
-			 memcpy(tempSrc + srcStride*copyAhead+i*srcStride, srcBlock + srcStride*copyAhead+i*srcStride,width);
-
-			/* duplicate last line of src to fill the void upto line (copyAhead+7) */
-			for(i=MAX(height-y, 8); i<copyAhead+8; i++)
-				memcpy(tempSrc + srcStride*i, src + srcStride*(height-1), width);
-
-			/* copy up to (copyAhead+1) lines of dst (line -1 to (copyAhead-1))*/
-			for(i=0;i<MIN(height-y+1, copyAhead+1);i++)
-			 memcpy(tempDst+i*dstStride, (dstBlock - dstStride)+i*dstStride, width);
-
-			/* duplicate last line of dst to fill the void upto line (copyAhead) */
-			for(i=height-y+1; i<=copyAhead; i++)
-				memcpy(tempDst + dstStride*i, dst + dstStride*(height-1), width);
-
-			dstBlock= tempDst + dstStride;
-			srcBlock= tempSrc;
-		}
-//printf("\n");
-
-		// From this point on it is guranteed that we can read and write 16 lines downward
-		// finish 1 block before the next otherwise we might have a problem
-		// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
-		for(x=0; x<width; x+=BLOCK_SIZE)
-		{
-			const stride_t stride= dstStride;
-#if HAVE_MMX
-			uint8_t *tmpXchg;
-#endif
-			if(isColor)
-			{
-				QP= QPptr[x>>qpHShift];
-				c.nonBQP= nonBQPptr[x>>qpHShift];
-			}
-			else
-			{
-				QP= QPptr[x>>4];
-				QP= (QP* QPCorrecture + 256*128)>>16;
-				c.nonBQP= nonBQPptr[x>>4];
-				c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
-				yHistogram[ srcBlock[srcStride*12 + 4] ]++;
-			}
-			c.QP= QP;
-#if HAVE_MMX
-			asm volatile(
-				"movd %1, %%mm7					\n\t"
-				"packuswb %%mm7, %%mm7				\n\t" // 0, 0, 0, QP, 0, 0, 0, QP
-				"packuswb %%mm7, %%mm7				\n\t" // 0,QP, 0, QP, 0,QP, 0, QP
-				"packuswb %%mm7, %%mm7				\n\t" // QP,..., QP
-				"movq %%mm7, %0			\n\t"
-				: "=m" (c.pQPb)
-				: "r" (QP)
-			);
-#endif
-
-
-#if HAVE_MMX2
-/*
-			prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
-			prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
-			prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
-			prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
-*/
-
-			asm(
-				"mov %4, %%"REG_a"		\n\t"
-				"shr $2, %%"REG_a"		\n\t"
-				"and $6, %%"REG_a"		\n\t"
-				"add %5, %%"REG_a"		\n\t"
-				"mov %%"REG_a", %%"REG_d"	\n\t"
-				"imul %1, %%"REG_a"		\n\t"
-				"imul %3, %%"REG_d"		\n\t"
-				"prefetchnta 32(%%"REG_a", %0)	\n\t"
-				"prefetcht0 32(%%"REG_d", %2)	\n\t"
-				"add %1, %%"REG_a"		\n\t"
-				"add %3, %%"REG_d"		\n\t"
-				"prefetchnta 32(%%"REG_a", %0)	\n\t"
-				"prefetcht0 32(%%"REG_d", %2)	\n\t"
-			:: "r" (srcBlock), "r" ((stride_t)srcStride), "r" (dstBlock), "r" ((stride_t)dstStride),
-			 "g" ((stride_t)x), "g" ((long)copyAhead)
-			: "%"REG_a, "%"REG_d
-			);
-
-#elif HAVE_AMD3DNOW
-//FIXME check if this is faster on an 3dnow chip or if its faster without the prefetch or ...
-/*			prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
-			prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
-			prefetchw(dstBlock + (((x>>3)&3) + 5)*dstStride + 32);
-			prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32);
-*/
-#endif
-
-			RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
-				srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
-
-			if(mode & LINEAR_IPOL_DEINT_FILTER)
-				RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
-			else if(mode & LINEAR_BLEND_DEINT_FILTER)
-				RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
-			else if(mode & MEDIAN_DEINT_FILTER)
-				RENAME(deInterlaceMedian)(dstBlock, dstStride);
-			else if(mode & CUBIC_IPOL_DEINT_FILTER)
-				RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
-			else if(mode & FFMPEG_DEINT_FILTER)
-				RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
-			else if(mode & LOWPASS5_DEINT_FILTER)
-				RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
-/*			else if(mode & CUBIC_BLEND_DEINT_FILTER)
-				RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
-*/
-
-			/* only deblock if we have 2 blocks */
-			if(y + 8 < height)
-			{
-				if(mode & V_X1_FILTER)
-					RENAME(vertX1Filter)(dstBlock, stride, &c);
-				else if(mode & V_DEBLOCK)
-				{
-					const int t= RENAME(vertClassify)(dstBlock, stride, &c);
-
-					if(t==1)
-							RENAME(doVertLowPass)(dstBlock, stride, &c);
-					else if(t==2)
-						RENAME(doVertDefFilter)(dstBlock, stride, &c);
-				}else if(mode & V_A_DEBLOCK){
-					RENAME(do_a_deblock)(dstBlock, stride, 1, &c);
-				}
-			}
-
-#if HAVE_MMX
-			RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
-#endif
-			/* check if we have a previous block to deblock it with dstBlock */
-			if(x - 8 >= 0)
-			{
-#if HAVE_MMX
-				if(mode & H_X1_FILTER)
-					RENAME(vertX1Filter)(tempBlock1, 16, &c);
-				else if(mode & H_DEBLOCK)
-				{
-//START_TIMER
-					const int t= RENAME(vertClassify)(tempBlock1, 16, &c);
-//STOP_TIMER("dc & minmax")
-                                        if(t==1)
-							RENAME(doVertLowPass)(tempBlock1, 16, &c);
-					else if(t==2)
-						RENAME(doVertDefFilter)(tempBlock1, 16, &c);
-				}else if(mode & H_A_DEBLOCK){
-					RENAME(do_a_deblock)(tempBlock1, 16, 1, &c);
-				}
-
-				RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);
-
-#else
-				if(mode & H_X1_FILTER)
-					horizX1Filter(dstBlock-4, stride, QP);
-				else if(mode & H_DEBLOCK)
-				{
-#ifdef HAVE_ALTIVEC
-					unsigned char __attribute__ ((aligned(16))) tempBlock[272];
-					transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
-
-					const int t=vertClassify_altivec(tempBlock-48, 16, &c);
-					if(t==1) {
-						doVertLowPass_altivec(tempBlock-48, 16, &c);
-                                                transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
-                                        }
-					else if(t==2) {
-						doVertDefFilter_altivec(tempBlock-48, 16, &c);
-                                                transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
-					}
-#else
-					const int t= RENAME(horizClassify)(dstBlock-4, stride, &c);
-
-					if(t==1)
-						RENAME(doHorizLowPass)(dstBlock-4, stride, &c);
-					else if(t==2)
-						RENAME(doHorizDefFilter)(dstBlock-4, stride, &c);
-#endif
-				}else if(mode & H_A_DEBLOCK){
-					RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
-				}
-#endif
-				if(mode & DERING)
-				{
-				//FIXME filter first line
-					if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
-				}
-
-				if(mode & TEMP_NOISE_FILTER)
-				{
-					RENAME(tempNoiseReducer)(dstBlock-8, stride,
-						c.tempBlured[isColor] + y*dstStride + x+256,
-						c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3)+256,
-						c.ppMode.maxTmpNoise);
-				}
-			}
-
-			dstBlock+=8;
-			srcBlock+=8;
-
-#if HAVE_MMX
-			tmpXchg= tempBlock1;
-			tempBlock1= tempBlock2;
-			tempBlock2 = tmpXchg;
-#endif
-		}
-
-		if(mode & DERING)
-		{
-				if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, &c);
-		}
-
-		if((mode & TEMP_NOISE_FILTER))
-		{
-			RENAME(tempNoiseReducer)(dstBlock-8, dstStride,
-				c.tempBlured[isColor] + y*dstStride + x+256,
-				c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3)+256,
-				c.ppMode.maxTmpNoise);
-		}
-
-		/* did we use a tmp buffer for the last lines*/
-		if(y+15 >= height)
-		{
-			uint8_t *dstBlock= &(dst[y*dstStride]);
-			if(width==dstStride)
-			memcpy(dstBlock, tempDst + dstStride, dstStride*(height-y) );
-			else
-			{
-				int i;
-				for(i=0; i<height-y; i++)
-				{
-					memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width);
-				}
-			}
-		}
-/*
-		for(x=0; x<width; x+=32)
-		{
-			volatile int i;
-			i+=	+ dstBlock[x + 7*dstStride] + dstBlock[x + 8*dstStride]
-				+ dstBlock[x + 9*dstStride] + dstBlock[x +10*dstStride]
-				+ dstBlock[x +11*dstStride] + dstBlock[x +12*dstStride];
-//				+ dstBlock[x +13*dstStride]
-//				+ dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride];
-		}*/
-	}
-#if HAVE_AMD3DNOW
-	asm volatile("femms");
-#elif HAVE_MMX
-	asm volatile("emms");
-#endif
-
-#ifdef DEBUG_BRIGHTNESS
-	if(!isColor)
-	{
-		int max=1;
-		int i;
-		for(i=0; i<256; i++)
-			if(yHistogram[i] > max) max=yHistogram[i];
-
-		for(i=1; i<256; i++)
-		{
-			int x;
-			int start=yHistogram[i-1]/(max/256+1);
-			int end=yHistogram[i]/(max/256+1);
-			int inc= end > start ? 1 : -1;
-			for(x=start; x!=end+inc; x+=inc)
-				dst[ i*dstStride + x]+=128;
-		}
-
-		for(i=0; i<100; i+=2)
-		{
-			dst[ (white)*dstStride + i]+=128;
-			dst[ (black)*dstStride + i]+=128;
-		}
-
-	}
-#endif
-
-	*c2= c; //copy local context back
-
-}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/rgb2rgb.c b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/rgb2rgb.c
index dff5dfbf6..f1b88d6d8 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/rgb2rgb.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/rgb2rgb.c
@@ -30,7 +30,7 @@
 #include "../libavutil/internal.h"
 #include "../libavutil/x86_cpu.h"
 #include "../libavutil/bswap.h"
-#include "../libvo/fastmemcpy.h"
+//#include "../libvo/fastmemcpy.h"
 #include "../cpudetect.h"
 #include "rgb2rgb.h"
 #include "swscale.h"
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/swscale.c b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/swscale.c
index e2c3d5bf0..0870091be 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/swscale.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/swscale.c
@@ -74,7 +74,7 @@ untested special converters
 #include "swscale_internal.h"
 #include "ffImgfmt.h"
 #include "rgb2rgb.h"
-#include "libvo/fastmemcpy.h"
+//#include "libvo/fastmemcpy.h"
 
 #undef MOVNTQ
 #undef PAVGB
@@ -907,7 +907,7 @@ static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **l
 #endif
 
 #ifdef ARCH_POWERPC
-#ifdef HAVE_ALTIVEC
+#if HAVE_ALTIVEC
 #define COMPILE_ALTIVEC
 #endif //HAVE_ALTIVEC
 #endif //ARCH_POWERPC
@@ -2420,7 +2420,7 @@ SwsContext *sws_getContextEx(int srcW, int srcH, int origSrcFormat, int dstW, in
 			/*(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags*/&params->methodChroma,params->subsampling,params->cpu,params->debug,
 				srcFilter->chrV, dstFilter->chrV);
 
-#ifdef HAVE_ALTIVEC
+#if HAVE_ALTIVEC
 		c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
 		c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH);
 
@@ -3027,7 +3027,7 @@ void sws_freeContext(SwsContext *c){
 	c->hLumFilter = NULL;
 	av_free(c->hChrFilter);
 	c->hChrFilter = NULL;
-#ifdef HAVE_ALTIVEC
+#if HAVE_ALTIVEC
 	av_free(c->vYCoeffsBank);
 	c->vYCoeffsBank = NULL;
 	av_free(c->vCCoeffsBank);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/swscale_template.c b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/swscale_template.c
index ca8bcc475..44a40e3fe 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/swscale_template.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/swscale_template.c
@@ -73,7 +73,7 @@
 #endif
 #define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
 
-#ifdef HAVE_ALTIVEC
+#if HAVE_ALTIVEC
 #include "swscale_altivec_template.c"
 #endif
 
@@ -903,7 +903,7 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t *
                 YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
         }
 #else
-#ifdef HAVE_ALTIVEC
+#if HAVE_ALTIVEC
 yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
 		      chrFilter, chrSrc, chrFilterSize,
 		      dest, uDest, vDest, dstW, chrDstW);
@@ -1119,7 +1119,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_
         }
     }
 #endif
-#ifdef HAVE_ALTIVEC
+#if HAVE_ALTIVEC
 		/* The following list of supported dstFormat values should
 		   match what's found in the body of altivec_yuv2packedX() */
 		if(c->dstFormat==IMGFMT_ABGR  || c->dstFormat==IMGFMT_BGRA  ||
@@ -2441,7 +2441,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW
 		);
 	}
 #else
-#ifdef HAVE_ALTIVEC
+#if HAVE_ALTIVEC
 	hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
 #else
 	int i;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/libswscalew32thread.c b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/sww32thread.c
index 48223cc75..0025ef6b8 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/libswscalew32thread.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/sww32thread.c
@@ -23,6 +23,10 @@
  */
 
 //#define DEBUG
+#include <stdio.h>
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <process.h>
 
 #include <inttypes.h>
 #include "config.h"
@@ -30,9 +34,6 @@
 #include "swscale.h"
 #include "swscale_internal.h"
 
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#include <process.h>
 
 typedef struct SwsThreadContext{
     SwsContext *swsctx;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/yuv2rgb.c b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/yuv2rgb.c
index 742fd6aad..63fc98205 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/yuv2rgb.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libswscale/yuv2rgb.c
@@ -35,6 +35,7 @@
 #include "swscale_internal.h"
 #include "../libavutil/x86_cpu.h"
 #include "../libavutil/internal.h"
+#include "../libavutil/mem.h"
 #include "ffImgfmt.h"
 
 #define DITHER1XBPP // only for MMX
@@ -481,13 +482,13 @@ SwsFunc yuv2rgb_get_func_ptr (SwsContext *c)
 	}
     }
 #endif
-#ifdef HAVE_MLIB
+#if HAVE_MLIB
     {
 	SwsFunc t= yuv2rgb_init_mlib(c);
 	if(t) return t;
 }
 #endif
-#ifdef HAVE_ALTIVEC
+#if HAVE_ALTIVEC
     if (c->params.cpu & SWS_CPU_CAPS_ALTIVEC)
     {
 	SwsFunc t = yuv2rgb_init_altivec(c);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libvo/aclib.c b/src/filters/transform/MPCVideoDec/ffmpeg/libvo/aclib.c
deleted file mode 100644
index 8ea7b670b..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libvo/aclib.c
+++ /dev/null
@@ -1,151 +0,0 @@
-#include "../libswscale/config.h"
-#ifdef USE_FASTMEMCPY
-
-/*
-  aclib - advanced C library ;)
-  This file contains functions which improve and expand standard C-library
-  see aclib_template.c ... this file only contains runtime cpu detection and config options stuff
-  runtime cpu detection by michael niedermayer (michaelni@gmx.at) is under GPL
-*/
-#include <string.h>
-#include <stddef.h>
-#include "../libavutil/x86_cpu.h"
-#include "../cpudetect.h"
-#include "fastmemcpy.h"
-#undef memcpy
-#include "ffImgfmt.h"
-
-#define BLOCK_SIZE 4096
-#define CONFUSION_FACTOR 0
-//Feel free to fine-tune the above 2, it might be possible to get some speedup with them :)
-
-//#define STATISTICS
-#if ARCH_X86_32 || ARCH_X86_64
-#define CAN_COMPILE_X86_ASM
-#endif
-
-//Note: we have MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
-//Plain C versions
-//#if !HAVE_MMX || defined (RUNTIME_CPUDETECT)
-//#define COMPILE_C
-//#endif
-
-#ifdef CAN_COMPILE_X86_ASM
-
-#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
-#define COMPILE_MMX
-#endif
-
-#if (HAVE_MMX2 && !HAVE_SSE2) || defined (RUNTIME_CPUDETECT)
-#define COMPILE_MMX2
-#endif
-
-#if (HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
-#define COMPILE_3DNOW
-#endif
-
-#if HAVE_SSE2 || defined (RUNTIME_CPUDETECT)
-#define COMPILE_SSE
-#endif
-
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#undef HAVE_SSE
-#undef HAVE_SSE2
-/*
-#ifdef COMPILE_C
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#undef ARCH_X86
-#define RENAME(a) a ## _C
-#include "aclib_template.c"
-#endif
-*/
-//MMX versions
-#ifdef COMPILE_MMX
-#undef RENAME
-#define HAVE_MMX 1
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#undef HAVE_SSE
-#undef HAVE_SSE2
-#define RENAME(a) a ## _MMX
-#include "aclib_template.c"
-#endif
-
-//MMX2 versions
-#ifdef COMPILE_MMX2
-#undef RENAME
-#define HAVE_MMX 1
-#define HAVE_MMX2 1
-#undef HAVE_AMD3DNOW
-#undef HAVE_SSE
-#undef HAVE_SSE2
-#define RENAME(a) a ## _MMX2
-#include "aclib_template.c"
-#endif
-
-//3DNOW versions
-#ifdef COMPILE_3DNOW
-#undef RENAME
-#define HAVE_MMX 1
-#undef HAVE_MMX2
-#define HAVE_AMD3DNOW 1
-#undef HAVE_SSE
-#undef HAVE_SSE2
-#define RENAME(a) a ## _3DNow
-#include "aclib_template.c"
-#endif
-
-//SSE versions (only used on SSE2 cpus)
-#ifdef COMPILE_SSE
-#undef RENAME
-#define HAVE_MMX 1
-#define HAVE_MMX2 1
-#undef HAVE_AMD3DNOW
-#define HAVE_SSE
-#define HAVE_SSE2
-#define RENAME(a) a ## _SSE
-#include "aclib_template.c"
-#endif
-
-#endif // CAN_COMPILE_X86_ASM
-
-void* (*fast_memcpy)(void * to, const void * from, size_t len)=NULL;
-
-void init_fast_memcpy(void)
-{
-#ifdef RUNTIME_CPUDETECT
-#ifdef CAN_COMPILE_X86_ASM
-	// ordered per speed fasterst first
-	if(gCpuCaps.hasSSE2)
-		fast_memcpy=fast_memcpy_SSE;
-	else if(gCpuCaps.hasMMX2)
-		fast_memcpy=fast_memcpy_MMX2;
-	else if(gCpuCaps.has3DNow)
-		fast_memcpy=fast_memcpy_3DNow;
-	else if(gCpuCaps.hasMMX)
-		fast_memcpy=fast_memcpy_MMX;
-	else
-#endif //CAN_COMPILE_X86_ASM
-		fast_memcpy=memcpy; // prior to mmx we use the standart memcpy
-#else
-#if HAVE_SSE2
-		fast_memcpy=fast_memcpy_SSE;
-#elif HAVE_MMX2
-		fast_memcpy=fast_memcpy_MMX2;
-#elif HAVE_AMD3DNOW
-		fast_memcpy=fast_memcpy_3DNow;
-#elif HAVE_MMX
-		fast_memcpy=fast_memcpy_MMX;
-#else
-		fast_memcpy=memcpy; // prior to mmx we use the standart memcpy
-#endif
-
-#endif //!RUNTIME_CPUDETECT
-}
-
-#endif /* use fastmemcpy */
-
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libvo/aclib_template.c b/src/filters/transform/MPCVideoDec/ffmpeg/libvo/aclib_template.c
deleted file mode 100644
index 27826f328..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libvo/aclib_template.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
-  aclib - advanced C library ;)
-  This file contains functions which improve and expand standard C-library
-*/
-
-#ifndef HAVE_SSE2
-/*
-   P3 processor has only one SSE decoder so can execute only 1 sse insn per
-   cpu clock, but it has 3 mmx decoders (include load/store unit)
-   and executes 3 mmx insns per cpu clock.
-   P4 processor has some chances, but after reading:
-   http://www.emulators.com/pentium4.htm
-   I have doubts. Anyway SSE2 version of this code can be written better.
-*/
-#undef HAVE_SSE
-#endif
-
-
-/*
- This part of code was taken by me from Linux-2.4.3 and slightly modified
-for MMX, MMX2, SSE instruction set. I have done it since linux uses page aligned
-blocks but mplayer uses weakly ordered data and original sources can not
-speedup them. Only using PREFETCHNTA and MOVNTQ together have effect!
-
->From IA-32 Intel Architecture Software Developer's Manual Volume 1,
-
-Order Number 245470:
-"10.4.6. Cacheability Control, Prefetch, and Memory Ordering Instructions"
-
-Data referenced by a program can be temporal (data will be used again) or
-non-temporal (data will be referenced once and not reused in the immediate
-future). To make efficient use of the processor's caches, it is generally
-desirable to cache temporal data and not cache non-temporal data. Overloading
-the processor's caches with non-temporal data is sometimes referred to as
-"polluting the caches".
-The non-temporal data is written to memory with Write-Combining semantics.
-
-The PREFETCHh instructions permits a program to load data into the processor
-at a suggested cache level, so that it is closer to the processors load and
-store unit when it is needed. If the data is already present in a level of
-the cache hierarchy that is closer to the processor, the PREFETCHh instruction
-will not result in any data movement.
-But we should you PREFETCHNTA: Non-temporal data fetch data into location
-close to the processor, minimizing cache pollution.
-
-The MOVNTQ (store quadword using non-temporal hint) instruction stores
-packed integer data from an MMX register to memory, using a non-temporal hint.
-The MOVNTPS (store packed single-precision floating-point values using
-non-temporal hint) instruction stores packed floating-point data from an
-XMM register to memory, using a non-temporal hint.
-
-The SFENCE (Store Fence) instruction controls write ordering by creating a
-fence for memory store operations. This instruction guarantees that the results
-of every store instruction that precedes the store fence in program order is
-globally visible before any store instruction that follows the fence. The
-SFENCE instruction provides an efficient way of ensuring ordering between
-procedures that produce weakly-ordered data and procedures that consume that
-data.
-
-If you have questions please contact with me: Nick Kurshev: nickols_k@mail.ru.
-*/
-
-// 3dnow memcpy support from kernel 2.4.2
-//  by Pontscho/fresh!mindworkz
-
-
-#undef HAVE_MMX1
-#if defined(HAVE_MMX) && !defined(HAVE_MMX2) && !defined(HAVE_AMD3DNOW) && !defined(HAVE_SSE)
-/*  means: mmx v.1. Note: Since we added alignment of destinition it speedups
-    of memory copying on PentMMX, Celeron-1 and P2 upto 12% versus
-    standard (non MMX-optimized) version.
-    Note: on K6-2+ it speedups memory copying upto 25% and
-          on K7 and P3 about 500% (5 times). */
-#define HAVE_MMX1
-#endif
-
-
-#undef HAVE_K6_2PLUS
-#if !defined( HAVE_MMX2) && defined( HAVE_AMD3DNOW)
-#define HAVE_K6_2PLUS
-#endif
-
-/* for small memory blocks (<256 bytes) this version is faster */
-#define small_memcpy(to,from,n)\
-{\
-register unsigned long int dummy;\
-__asm__ __volatile__(\
-	"rep; movsb"\
-	:"=&D"(to), "=&S"(from), "=&c"(dummy)\
-/* It's most portable way to notify compiler */\
-/* that edi, esi and ecx are clobbered in asm block. */\
-/* Thanks to A'rpi for hint!!! */\
-        :"0" (to), "1" (from),"2" (n)\
-	: "memory");\
-}
-
-#undef MMREG_SIZE
-#ifdef HAVE_SSE
-#define MMREG_SIZE 16
-#else
-#define MMREG_SIZE 64 //8
-#endif
-
-#undef PREFETCH
-#undef EMMS
-
-#ifdef HAVE_MMX2
-#define PREFETCH "prefetchnta"
-#elif defined ( HAVE_AMD3DNOW )
-#define PREFETCH  "prefetch"
-#else
-#define PREFETCH "/nop"
-#endif
-
-/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
-#ifdef HAVE_AMD3DNOW
-#define EMMS     "femms"
-#else
-#define EMMS     "emms"
-#endif
-
-#undef MOVNTQ
-#ifdef HAVE_MMX2
-#define MOVNTQ "movntq"
-#else
-#define MOVNTQ "movq"
-#endif
-
-#undef MIN_LEN
-#ifdef HAVE_MMX1
-#define MIN_LEN 0x800  /* 2K blocks */
-#else
-#define MIN_LEN 0x40  /* 64-byte blocks */
-#endif
-
-static inline void * RENAME(fast_memcpy)(void * to, const void * from, size_t len)
-{
-	void *retval;
-	size_t i;
-	retval = to;
-#ifdef STATISTICS
-	{
-		static int freq[33];
-		static int t=0;
-		int i;
-		for(i=0; len>(1<<i); i++);
-		freq[i]++;
-		t++;
-		if(1024*1024*1024 % t == 0)
-			for(i=0; i<32; i++)
-				;//printf("freq < %8d %4d\n", 1<<i, freq[i]);
-	}
-#endif
-#ifndef HAVE_MMX1
-        /* PREFETCH has effect even for MOVSB instruction ;) */
-	__asm__ __volatile__ (
-	        PREFETCH" (%0)\n"
-	        PREFETCH" 64(%0)\n"
-	        PREFETCH" 128(%0)\n"
-        	PREFETCH" 192(%0)\n"
-        	PREFETCH" 256(%0)\n"
-		: : "r" (from) );
-#endif
-        if(len >= MIN_LEN)
-	{
-	  register unsigned long int delta;
-          /* Align destinition to MMREG_SIZE -boundary */
-          delta = ((unsigned long int)to)&(MMREG_SIZE-1);
-          if(delta)
-	  {
-	    delta=MMREG_SIZE-delta;
-	    len -= delta;
-	    small_memcpy(to, from, delta);
-	  }
-	  i = len >> 6; /* len/64 */
-	  len&=63;
-        /*
-           This algorithm is top effective when the code consequently
-           reads and writes blocks which have size of cache line.
-           Size of cache line is processor-dependent.
-           It will, however, be a minimum of 32 bytes on any processors.
-           It would be better to have a number of instructions which
-           perform reading and writing to be multiple to a number of
-           processor's decoders, but it's not always possible.
-        */
-#ifdef HAVE_SSE /* Only P3 (may be Cyrix3) */
-	if(((unsigned long)from) & 15)
-	/* if SRC is misaligned */
-	for(; i>0; i--)
-	{
-		__asm__ __volatile__ (
-		PREFETCH" 320(%0)\n"
-		"movups (%0), %%xmm0\n"
-		"movups 16(%0), %%xmm1\n"
-		"movups 32(%0), %%xmm2\n"
-		"movups 48(%0), %%xmm3\n"
-		"movntps %%xmm0, (%1)\n"
-		"movntps %%xmm1, 16(%1)\n"
-		"movntps %%xmm2, 32(%1)\n"
-		"movntps %%xmm3, 48(%1)\n"
-		:: "r" (from), "r" (to) : "memory");
-		from=((const unsigned char *) from)+64;
-		to=((unsigned char *)to)+64;
-	}
-	else
-	/*
-	   Only if SRC is aligned on 16-byte boundary.
-	   It allows to use movaps instead of movups, which required data
-	   to be aligned or a general-protection exception (#GP) is generated.
-	*/
-	for(; i>0; i--)
-	{
-		__asm__ __volatile__ (
-		PREFETCH" 320(%0)\n"
-		"movaps (%0), %%xmm0\n"
-		"movaps 16(%0), %%xmm1\n"
-		"movaps 32(%0), %%xmm2\n"
-		"movaps 48(%0), %%xmm3\n"
-		"movntps %%xmm0, (%1)\n"
-		"movntps %%xmm1, 16(%1)\n"
-		"movntps %%xmm2, 32(%1)\n"
-		"movntps %%xmm3, 48(%1)\n"
-		:: "r" (from), "r" (to) : "memory");
-		from=((const unsigned char *)from)+64;
-		to=((unsigned char *)to)+64;
-	}
-#else
-	// Align destination at BLOCK_SIZE boundary
-	for(; ((int)to & (BLOCK_SIZE-1)) && i>0; i--)
-	{
-		__asm__ __volatile__ (
-#ifndef HAVE_MMX1
-        	PREFETCH" 320(%0)\n"
-#endif
-		"movq (%0), %%mm0\n"
-		"movq 8(%0), %%mm1\n"
-		"movq 16(%0), %%mm2\n"
-		"movq 24(%0), %%mm3\n"
-		"movq 32(%0), %%mm4\n"
-		"movq 40(%0), %%mm5\n"
-		"movq 48(%0), %%mm6\n"
-		"movq 56(%0), %%mm7\n"
-		MOVNTQ" %%mm0, (%1)\n"
-		MOVNTQ" %%mm1, 8(%1)\n"
-		MOVNTQ" %%mm2, 16(%1)\n"
-		MOVNTQ" %%mm3, 24(%1)\n"
-		MOVNTQ" %%mm4, 32(%1)\n"
-		MOVNTQ" %%mm5, 40(%1)\n"
-		MOVNTQ" %%mm6, 48(%1)\n"
-		MOVNTQ" %%mm7, 56(%1)\n"
-		:: "r" (from), "r" (to) : "memory");
-		from=((const unsigned char *)from)+64;
-		to=((unsigned char *)to)+64;
-	}
-
-//	printf(" %d %d\n", (int)from&1023, (int)to&1023);
-	// Pure Assembly cuz gcc is a bit unpredictable ;)
-	if(i>=BLOCK_SIZE/64)
-		asm volatile(
-			"xor %%"REG_a", %%"REG_a"	\n\t"
-			".balign 16		\n\t"
-			"1:			\n\t"
-				"movl (%0, %%"REG_a"), %%ebx 	\n\t"
-				"movl 32(%0, %%"REG_a"), %%ebx 	\n\t"
-				"movl 64(%0, %%"REG_a"), %%ebx 	\n\t"
-				"movl 96(%0, %%"REG_a"), %%ebx 	\n\t"
-				"add $128, %%"REG_a"		\n\t"
-				"cmp %3, %%"REG_a"		\n\t"
-				" jb 1b				\n\t"
-
-			"xor %%"REG_a", %%"REG_a"	\n\t"
-
-				".balign 16		\n\t"
-				"2:			\n\t"
-				"movq (%0, %%"REG_a"), %%mm0\n"
-				"movq 8(%0, %%"REG_a"), %%mm1\n"
-				"movq 16(%0, %%"REG_a"), %%mm2\n"
-				"movq 24(%0, %%"REG_a"), %%mm3\n"
-				"movq 32(%0, %%"REG_a"), %%mm4\n"
-				"movq 40(%0, %%"REG_a"), %%mm5\n"
-				"movq 48(%0, %%"REG_a"), %%mm6\n"
-				"movq 56(%0, %%"REG_a"), %%mm7\n"
-				MOVNTQ" %%mm0, (%1, %%"REG_a")\n"
-				MOVNTQ" %%mm1, 8(%1, %%"REG_a")\n"
-				MOVNTQ" %%mm2, 16(%1, %%"REG_a")\n"
-				MOVNTQ" %%mm3, 24(%1, %%"REG_a")\n"
-				MOVNTQ" %%mm4, 32(%1, %%"REG_a")\n"
-				MOVNTQ" %%mm5, 40(%1, %%"REG_a")\n"
-				MOVNTQ" %%mm6, 48(%1, %%"REG_a")\n"
-				MOVNTQ" %%mm7, 56(%1, %%"REG_a")\n"
-				"add $64, %%"REG_a"		\n\t"
-				"cmp %3, %%"REG_a"		\n\t"
-				"jb 2b				\n\t"
-
-#if CONFUSION_FACTOR > 0
-	// a few percent speedup on out of order executing CPUs
-			"mov %5, %%"REG_a"		\n\t"
-				"2:			\n\t"
-				"movl (%0), %%ebx	\n\t"
-				"movl (%0), %%ebx	\n\t"
-				"movl (%0), %%ebx	\n\t"
-				"movl (%0), %%ebx	\n\t"
-				"dec %%"REG_a"		\n\t"
-				" jnz 2b		\n\t"
-#endif
-
-			"xor %%"REG_a", %%"REG_a"	\n\t"
-			"add %3, %0		\n\t"
-			"add %3, %1		\n\t"
-			"sub %4, %2		\n\t"
-			"cmp %4, %2		\n\t"
-			" jae 1b		\n\t"
-				: "+r" (from), "+r" (to), "+r" (i)
-				: "r" ((stride_t)BLOCK_SIZE), "i" (BLOCK_SIZE/64), "i" ((long)CONFUSION_FACTOR)
-				: "%"REG_a, "%ebx"
-		);
-
-	for(; i>0; i--)
-	{
-		__asm__ __volatile__ (
-#ifndef HAVE_MMX1
-        	PREFETCH" 320(%0)\n"
-#endif
-		"movq (%0), %%mm0\n"
-		"movq 8(%0), %%mm1\n"
-		"movq 16(%0), %%mm2\n"
-		"movq 24(%0), %%mm3\n"
-		"movq 32(%0), %%mm4\n"
-		"movq 40(%0), %%mm5\n"
-		"movq 48(%0), %%mm6\n"
-		"movq 56(%0), %%mm7\n"
-		MOVNTQ" %%mm0, (%1)\n"
-		MOVNTQ" %%mm1, 8(%1)\n"
-		MOVNTQ" %%mm2, 16(%1)\n"
-		MOVNTQ" %%mm3, 24(%1)\n"
-		MOVNTQ" %%mm4, 32(%1)\n"
-		MOVNTQ" %%mm5, 40(%1)\n"
-		MOVNTQ" %%mm6, 48(%1)\n"
-		MOVNTQ" %%mm7, 56(%1)\n"
-		:: "r" (from), "r" (to) : "memory");
-		from=((const unsigned char *)from)+64;
-		to=((unsigned char *)to)+64;
-	}
-
-#endif /* Have SSE */
-#ifdef HAVE_MMX2
-                /* since movntq is weakly-ordered, a "sfence"
-		 * is needed to become ordered again. */
-		__asm__ __volatile__ ("sfence":::"memory");
-#endif
-#ifndef HAVE_SSE
-		/* enables to use FPU */
-		__asm__ __volatile__ (EMMS:::"memory");
-#endif
-	}
-	/*
-	 *	Now do the tail of the block
-	 */
-	if(len) small_memcpy(to, from, len);
-	return retval;
-}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libvo/fastmemcpy.h b/src/filters/transform/MPCVideoDec/ffmpeg/libvo/fastmemcpy.h
deleted file mode 100644
index 0dd2ff62c..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libvo/fastmemcpy.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef __MPLAYER_MEMCPY
-#define __MPLAYER_MEMCPY
-
-#include <stddef.h>
-#include "../libswscale/config.h"
-#include "../libavutil/internal.h"
-
-#if defined(USE_FASTMEMCPY) && (HAVE_MMX || HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_SSE || HAVE_SSE2)
-extern void* (*fast_memcpy)(void * to, const void * from, size_t len);
-#define memcpy(a,b,c) fast_memcpy(a,b,c)
-#endif
-void init_fast_memcpy(void);
-
-static inline void * memcpy_pic(unsigned char * dst, unsigned char * src, int bytesPerLine, int height, int dstStride, int srcStride)
-{
-	int i;
-	void *retval=dst;
-
-	if(dstStride == srcStride)
-	{
-		if (srcStride < 0) {
-	    		src += (height-1)*srcStride;
-	    		dst += (height-1)*dstStride;
-	    		srcStride = -srcStride;
-		}
-
-		memcpy(dst, src, srcStride*height);
-	}
-	else
-	{
-		for(i=0; i<height; i++)
-		{
-			memcpy(dst, src, bytesPerLine);
-			src+= srcStride;
-			dst+= dstStride;
-		}
-	}
-
-	return retval;
-}
-
-#endif
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libvo/libvoinit.c b/src/filters/transform/MPCVideoDec/ffmpeg/libvo/libvoinit.c
deleted file mode 100644
index 31d8d3a4e..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libvo/libvoinit.c
+++ /dev/null
@@ -1,22 +0,0 @@
-#include "cpudetect.h"
-#include "../libswscale/config.h"
-#include <string.h>
-#include "fastmemcpy.h"
-#include "../compiler.h"
-
-CpuCaps gCpuCaps;
-
-void init_libvo()
-{
- gCpuCaps.hasMMX=1;
- gCpuCaps.hasMMX2=1;
- gCpuCaps.has3DNow=1;
- gCpuCaps.has3DNowExt=1;
- gCpuCaps.hasSSE=1;
- gCpuCaps.hasSSE2=1;
- gCpuCaps.hasSSSE3=1;
- init_fast_memcpy();
- // Avoid using multithread if the CPU is Pentium4-HT
- // because it is not faster at all and uses more CPU.
- // (Swscaler depends much on MMX and P4HT have only one MMX unit.)
-}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/makefile_c.inc b/src/filters/transform/MPCVideoDec/ffmpeg/makefile_c.inc
index f42a8ea41..ecbe62e50 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/makefile_c.inc
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/makefile_c.inc
@@ -21,19 +21,12 @@ ifeq ($(64BIT),yes)
 else
     TARGET_OS=i686-pc-mingw32
     CFLAGS+=-DWIN32 -D_WIN32
-    OPTFLAGS+=-O3 -march=i686 -msse -mfpmath=sse
+    OPTFLAGS+=-O3 -march=i686 -mmmx
 endif
 
 CFLAGS+=-mno-cygwin -mdll -mthreads -pipe
 OPTFLAGS+=-fno-common -fno-tree-vectorize
 
-ifeq ($(SSE),yes)
-    OPTFLAGS+=-msse -mfpmath=sse
-endif
-ifeq ($(SSE2),yes)
-    OPTFLAGS+=-msse2
-endif
-
 ifeq ($(DEBUG),yes)
     CFLAGS+=-DDEBUG -D_DEBUG -g 
     YASMFLAGS+=-g cv8
@@ -66,16 +59,17 @@ GCCDEF=$(DEF)
 OBJS+=$(SRCS:.cpp=.o) $(SRCS_C:.c=.o) $(SRCS_YASM:.asm=.o) $(RESSRC:.rc=.res.o)
 
 .c.o:
-	$(FFMPEG_PREFIX)$(CC) -c $(CFLAGS) $(OPTFLAGS) $(OUTPUTFLAG)$@ $<
+	@echo $<
+	@$(FFMPEG_PREFIX)$(CC) -c $(CFLAGS) $(OPTFLAGS) $(OUTPUTFLAG)$@ $<
 
 .cpp.o:
 	$(FFMPEG_PREFIX)$(CC) -c $(CFLAGS) $(CPPFLAGS) $(OPTFLAGS) $(OUTPUTFLAG)$@ $<
 
 %.o: %.asm
 ifeq ($(64BIT),yes)
-	$(YASM) -f win64 -m amd64 -DWIN64 -DARCH_X86_64 -DPREFIX $(YASMFLAGS) -I$(<D)/ -o $@  $<
+	$(YASM) -f win64 -m amd64 -DWIN64 -DARCH_X86_64 -DPREFIX $(YASMFLAGS) -I$(<D)/ -o $@ $<
 else
-	$(YASM) -f win32 -m x86 -DWIN32 -DARCH_X86_32 -DPREFIX $(YASMFLAGS) -I$(<D)/ -o $@  $<
+	$(YASM) -f win32 -m x86 -DWIN32 -DARCH_X86_32 -DPREFIX $(YASMFLAGS) -I$(<D)/ -o $@ $<
 endif
 
 %.res.o: %.rc
author	clsid2 <clsid2@users.sourceforge.net>	2010-04-19 23:29:00 +0400
committer	clsid2 <clsid2@users.sourceforge.net>	2010-04-19 23:29:00 +0400
commit	e220e2c9e47b5c1f98eec2c8deead4074fa07c76 (patch)
tree	cf616685c536ce5089d76c46dfe3df14b17a3924 /src/filters/transform/MPCVideoDec
parent	0b7b1d06b32b43517f7b584207533ecfe5ce20fa (diff)