Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/mpc-hc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXhmikosR <xhmikosr@users.sourceforge.net>2010-08-26 03:17:34 +0400
committerXhmikosR <xhmikosr@users.sourceforge.net>2010-08-26 03:17:34 +0400
commit49b4ac58249746b3564052782f372056d90e3511 (patch)
tree946f9579bb3f69f3a881116d8dfa4500eb9dffa8
parent829422598bc67f6f63436da9d504efbb2ffd014b (diff)
updated ffmpeg
git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@2349 10f7b99b-c216-0410-bff0-8a66a9350fd8
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/Makefile4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/Makefile_20104
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/config.h34
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c3
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h12
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c9
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c3
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c8
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.h8
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6dsp.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c16
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c8
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm202
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_sse.c103
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm173
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp_init.c (renamed from src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.h)31
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.c108
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.c98
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.h30
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c8
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm50
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c26
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h35
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h7
32 files changed, 544 insertions, 458 deletions
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile
index 7c1807620..666048770 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile
@@ -155,8 +155,7 @@ SRCS_C=\
$(LAVC_DIR)/x86/vc1dsp_mmx.c \
$(LAVC_DIR)/x86/vp3dsp_mmx.c \
$(LAVC_DIR)/x86/vp3dsp_sse2.c \
- $(LAVC_DIR)/x86/vp6dsp_mmx.c \
- $(LAVC_DIR)/x86/vp6dsp_sse2.c \
+ $(LAVC_DIR)/x86/vp56dsp_init.c \
$(LAVC_DIR)/x86/vp8dsp-init.c \
\
$(LAVCORE_DIR)/avcore_utils.c \
@@ -187,6 +186,7 @@ SRCS_YASM=\
$(LAVC_DIR)/x86/h264_intrapred.asm \
$(LAVC_DIR)/x86/h264_weight_sse2.asm \
$(LAVC_DIR)/x86/vc1dsp_yasm.asm \
+ $(LAVC_DIR)/x86/vp56dsp.asm \
$(LAVC_DIR)/x86/vp8dsp.asm \
$(LAVC_DIR)/x86/x86util.asm
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile_2010 b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile_2010
index b722d5960..27b4f57f1 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile_2010
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile_2010
@@ -155,8 +155,7 @@ SRCS_C=\
$(LAVC_DIR)/x86/vc1dsp_mmx.c \
$(LAVC_DIR)/x86/vp3dsp_mmx.c \
$(LAVC_DIR)/x86/vp3dsp_sse2.c \
- $(LAVC_DIR)/x86/vp6dsp_mmx.c \
- $(LAVC_DIR)/x86/vp6dsp_sse2.c \
+ $(LAVC_DIR)/x86/vp56dsp_init.c \
$(LAVC_DIR)/x86/vp8dsp-init.c \
\
$(LAVCORE_DIR)/avcore_utils.c \
@@ -187,6 +186,7 @@ SRCS_YASM=\
$(LAVC_DIR)/x86/h264_intrapred.asm \
$(LAVC_DIR)/x86/h264_weight_sse2.asm \
$(LAVC_DIR)/x86/vc1dsp_yasm.asm \
+ $(LAVC_DIR)/x86/vp56dsp.asm \
$(LAVC_DIR)/x86/vp8dsp.asm \
$(LAVC_DIR)/x86/x86util.asm
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/config.h b/src/filters/transform/MPCVideoDec/ffmpeg/config.h
index d3b203c61..507cca37f 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/config.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/config.h
@@ -48,11 +48,12 @@
// registry switch is not read)
//#define USE_DPRINTF 1
-#define FFMPEG_LICENSE "GPL version 2.1 or later"
+#define FFMPEG_CONFIGURATION "ffdshow custom"
+#define FFMPEG_LICENSE "GPL version 2 or later"
#define CC_TYPE "gcc"
#define CC_VERSION __VERSION__
-#define ASMALIGN(ZEROBITS) ".align 1 << " #ZEROBITS "\n\t"
+#define ASMALIGN(ZEROBITS) ".p2align " #ZEROBITS "\n\t"
// MPC custom code for linking with MSVC
#if defined(__GNUC__) && ARCH_X86_64
@@ -62,7 +63,36 @@
#endif
#define EXTERN_ASM _
+#define ARCH_ALPHA 0
+#define ARCH_ARM 0
+#define ARCH_AVR32 0
+#define ARCH_AVR32_AP 0
+#define ARCH_AVR32_UC 0
+#define ARCH_BFIN 0
+#define ARCH_IA64 0
+#define ARCH_M68K 0
+#define ARCH_MIPS 0
+#define ARCH_MIPS64 0
+#define ARCH_PARISC 0
+#define ARCH_PPC 0
+#define ARCH_PPC64 0
+#define ARCH_S390 0
+#define ARCH_SH4 0
+#define ARCH_SPARC 0
+#define ARCH_SPARC64 0
+#define ARCH_TOMI 0
+
#define HAVE_ALTIVEC 0
+#define HAVE_ARMV5TE 0
+#define HAVE_ARMV6 0
+#define HAVE_ARMV6T2 0
+#define HAVE_ARMVFP 0
+#define HAVE_IWMMXT 0
+#define HAVE_MMI 0
+#define HAVE_NEON 0
+#define HAVE_PPC4XX 0
+#define HAVE_VIS 0
+
#define HAVE_ALTIVEC_H 0
#define HAVE_BIGENDIAN 0
#define HAVE_BSWAP 1
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c
index eff067ad6..e4a4a7ad6 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c
@@ -4417,9 +4417,6 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc
c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
}
- if (CONFIG_VP6_DECODER) {
- c->vp6_filter_diag4= ff_vp6_filter_diag4_c;
- }
c->h261_loop_filter= h261_loop_filter_c;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h
index 778d3dfc1..cfd1b7f33 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h
@@ -357,9 +357,6 @@ typedef struct DSPContext {
void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
- void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride,
- const int16_t *h_weights,const int16_t *v_weights);
-
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
@@ -604,7 +601,6 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){
/* should be defined by architectures supporting
one or more MultiMedia extension */
int mm_support(void);
-extern int mm_flags;
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
@@ -629,16 +625,10 @@ static inline void emms(void)
#endif
}
-
-#define emms_c() \
-{\
- if (mm_flags & FF_MM_MMX)\
- emms();\
-}
+#define emms_c() emms()
#else
-#define mm_flags 0
#define mm_support() 0
#endif
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c
index 5793dd1af..503c3b6ba 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c
@@ -542,7 +542,7 @@ retry:
#endif
#if HAVE_MMX
- if(s->codec_id == CODEC_ID_MPEG4 && s->xvid_build>=0 && avctx->idct_algo == FF_IDCT_AUTO && (mm_flags & FF_MM_MMX)){
+ if(s->codec_id == CODEC_ID_MPEG4 && s->xvid_build>=0 && avctx->idct_algo == FF_IDCT_AUTO && (mm_support() & FF_MM_MMX)){
avctx->idct_algo= FF_IDCT_XVIDMMX;
avctx->coded_width= 0; // force reinit
// dsputil_init(&s->dsp, avctx);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c
index 05999713b..fcb4f2011 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c
@@ -1024,7 +1024,7 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
if(8*len + get_bits_count(&s->gb) > s->gb.size_in_bits)
return -1;
- id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16);
+ id = get_bits_long(&s->gb, 32);
id = av_be2ne32(id);
len -= 6;
@@ -1114,7 +1114,7 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
/* Apple MJPEG-A */
if ((s->start_code == APP1) && (len > (0x28 - 8)))
{
- id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16);
+ id = get_bits_long(&s->gb, 32);
id = av_be2ne32(id);
len -= 4;
if (id == AV_RL32("mjpg")) /* Apple MJPEG-A */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c
index 4171490ae..489c66f1d 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c
@@ -937,7 +937,14 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
return -1;
s->current_picture_ptr= pic;
- s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
+ //FIXME use only the vars from current_pic
+ if(s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO) {
+ if(s->picture_structure == PICT_FRAME)
+ s->current_picture_ptr->top_field_first= s->top_field_first;
+ else
+ s->current_picture_ptr->top_field_first= (s->picture_structure == PICT_TOP_FIELD) == s->first_field;
+ } else
+ s->current_picture_ptr->top_field_first= s->top_field_first;
s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c
index 4983fa7fa..f71ddf2d5 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c
@@ -1395,8 +1395,7 @@ return -1;
#endif
if(s->msmpeg4_version==1){
- int start_code;
- start_code = (get_bits(&s->gb, 16)<<16) | get_bits(&s->gb, 16);
+ int start_code = get_bits_long(&s->gb, 32);
if(start_code!=0x00000100){
av_log(s->avctx, AV_LOG_ERROR, "invalid startcode\n");
return -1;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c
index f9da3d78d..d67604b01 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c
@@ -82,5 +82,13 @@ void ff_vp56dsp_init(VP56DSPContext *s, enum CodecID codec)
} else {
s->edge_filter_hor = vp6_edge_filter_hor;
s->edge_filter_ver = vp6_edge_filter_ver;
+
+ if (CONFIG_VP6_DECODER) {
+ s->vp6_filter_diag4= ff_vp6_filter_diag4_c;
+ }
}
+
+ #if HAVE_MMX
+ ff_vp56dsp_init_x86(s, codec);
+ #endif
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.h
index 2d6941fa2..74a9cb530 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.h
@@ -22,13 +22,21 @@
#define AVCODEC_VP56DSP_H
#include <stdint.h>
+#include "avcodec.h"
typedef struct VP56DSPContext {
void (*edge_filter_hor)(uint8_t *yuv, int stride, int t);
void (*edge_filter_ver)(uint8_t *yuv, int stride, int t);
+
+ void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride,
+ const int16_t *h_weights,const int16_t *v_weights);
} VP56DSPContext;
+void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride,
+ const int16_t *h_weights, const int16_t *v_weights);
+
void ff_vp56dsp_init(VP56DSPContext *s, enum CodecID codec);
void ff_vp56dsp_init_arm(VP56DSPContext *s, enum CodecID codec);
+void ff_vp56dsp_init_x86(VP56DSPContext* c, enum CodecID codec);
#endif /* AVCODEC_VP56DSP_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c
index de3be4084..57d357023 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c
@@ -559,7 +559,7 @@ static void vp6_filter(VP56Context *s, uint8_t *dst, uint8_t *src,
vp6_filter_hv4(dst, src+offset1, stride, stride,
vp6_block_copy_filter[select][y8]);
} else {
- s->dsp.vp6_filter_diag4(dst, src+offset1+((mv.x^mv.y)>>31), stride,
+ s->vp56dsp.vp6_filter_diag4(dst, src+offset1+((mv.x^mv.y)>>31), stride,
vp6_block_copy_filter[select][x8],
vp6_block_copy_filter[select][y8]);
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6dsp.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6dsp.c
index 69a11ee18..1119b5670 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6dsp.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6dsp.c
@@ -22,7 +22,7 @@
*/
#include "libavutil/common.h"
-#include "dsputil.h"
+#include "vp56dsp.h"
void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c
index 663be3c57..e96e3a93c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c
@@ -137,4 +137,6 @@ int mm_support(void)
(rval&FF_MM_3DNOWEXT) ? "3DNowExt ":"");
#endif
return rval;
+
+ /* TODO: allow overriding with ffdshow settings for disabling extensions */
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
index 4add01fe9..c4939ec65 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
@@ -30,15 +30,11 @@
#include "dsputil_mmx.h"
#include "vp3dsp_mmx.h"
#include "vp3dsp_sse2.h"
-#include "vp6dsp_mmx.h"
-#include "vp6dsp_sse2.h"
#include "idct_xvid.h"
//#undef NDEBUG
//#include <assert.h>
-int mm_flags; /* multimedia extension flags */
-
/* pixel operations */
DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
@@ -2504,7 +2500,7 @@ float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
{
- mm_flags = mm_support();
+ int mm_flags = mm_support();
if (avctx->dsp_mask) {
if (avctx->dsp_mask & FF_MM_FORCE)
@@ -2626,10 +2622,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx;
c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx;
- if (CONFIG_VP6_DECODER) {
- c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
- }
-
if (mm_flags & FF_MM_MMX2) {
c->prefetch = prefetch_mmx2;
@@ -2812,10 +2804,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 1, sse2);
H264_QPEL_FUNCS(3, 2, sse2);
H264_QPEL_FUNCS(3, 3, sse2);
-
- if (CONFIG_VP6_DECODER) {
- c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2;
- }
}
#if HAVE_SSSE3
if(mm_flags & FF_MM_SSSE3){
@@ -2898,7 +2886,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#if CONFIG_H264DSP
void ff_h264dsp_init_x86(H264DSPContext *c)
{
- mm_flags = mm_support();
+ int mm_flags = mm_support();
if (mm_flags & FF_MM_MMX) {
c->h264_idct_dc_add=
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c
index dba8c3faf..eb5c65ecb 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c
@@ -21,16 +21,12 @@
av_cold void ff_fft_init_mmx(FFTContext *s)
{
-/* Crashes on 64-bit?
- * ToDo: verify if that is still the case with the current code and with GCC 4.4.x and above
- */
-#if HAVE_YASM && ARCH_X86_32
+#if HAVE_YASM
int has_vectors = mm_support();
if (has_vectors & FF_MM_SSE && HAVE_SSE) {
/* SSE for P3/P4/K8 */
s->imdct_calc = ff_imdct_calc_sse;
- /* crashes DTS decoder */
- //s->imdct_half = ff_imdct_half_sse;
+ s->imdct_half = ff_imdct_half_sse;
s->fft_permute = ff_fft_permute_sse;
s->fft_calc = ff_fft_calc_sse;
} else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) {
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c
index 8226ae962..9a8108bdd 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c
@@ -56,7 +56,7 @@ void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z)
void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input)
{
x86_reg j, k;
- long n = 1 << s->mdct_bits;
+ long n = s->mdct_size;
long n2 = n >> 1;
long n4 = n >> 2;
long n8 = n >> 3;
@@ -147,7 +147,7 @@ void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
void ff_imdct_calc_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input)
{
x86_reg j, k;
- long n = 1 << s->mdct_bits;
+ long n = s->mdct_size;
long n4 = n >> 2;
ff_imdct_half_3dn2(s, output+n4, input);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
index 23a360fa6..31176d6c9 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
@@ -29,6 +29,23 @@
%include "x86inc.asm"
+%ifdef ARCH_X86_64
+%define pointer resq
+%else
+%define pointer resd
+%endif
+
+struc FFTContext
+ .nbits: resd 1
+ .reverse: resd 1
+ .revtab: pointer 1
+ .tmpbuf: pointer 1
+ .mdctsize: resd 1
+ .mdctbits: resd 1
+ .tcos: pointer 1
+ .tsin: pointer 1
+endstruc
+
SECTION_RODATA
%define M_SQRT1_2 0.70710678118654752440
@@ -428,6 +445,16 @@ DECL_PASS pass_interleave_3dn, PASS_BIG 0
%define SECTION_REL
%endif
+%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs
+ lea r2, [dispatch_tab%1]
+ mov r2, [r2 + (%2q-2)*gprsize]
+%ifdef PIC
+ lea r3, [$$]
+ add r2, r3
+%endif
+ call r2
+%endmacro ; FFT_DISPATCH
+
%macro DECL_FFT 2-3 ; nbits, cpu, suffix
%xdefine list_of_fft fft4%2 SECTION_REL, fft8%2 SECTION_REL
%if %1==5
@@ -464,13 +491,7 @@ section .text
; On x86_32, this function does the register saving and restoring for all of fft.
; The others pass args in registers and don't spill anything.
cglobal fft_dispatch%3%2, 2,5,8, z, nbits
- lea r2, [dispatch_tab%3%2]
- mov r2, [r2 + (nbitsq-2)*gprsize]
-%ifdef PIC
- lea r3, [$$]
- add r2, r3
-%endif
- call r2
+ FFT_DISPATCH %3%2, nbits
RET
%endmacro ; DECL_FFT
@@ -481,3 +502,170 @@ DECL_FFT 4, _3dn, _interleave
DECL_FFT 4, _3dn2
DECL_FFT 4, _3dn2, _interleave
+INIT_XMM
+%undef mulps
+%undef addps
+%undef subps
+%undef unpcklps
+%undef unpckhps
+
+%macro PREROTATER 5 ;-2*k, 2*k, input+n4, tcos+n8, tsin+n8
+ movaps xmm0, [%3+%2*4]
+ movaps xmm1, [%3+%1*4-0x10]
+ movaps xmm2, xmm0
+ shufps xmm0, xmm1, 0x88
+ shufps xmm1, xmm2, 0x77
+ movlps xmm4, [%4+%2*2]
+ movlps xmm5, [%5+%2*2+0x0]
+ movhps xmm4, [%4+%1*2-0x8]
+ movhps xmm5, [%5+%1*2-0x8]
+ movaps xmm2, xmm0
+ movaps xmm3, xmm1
+ mulps xmm0, xmm5
+ mulps xmm1, xmm4
+ mulps xmm2, xmm4
+ mulps xmm3, xmm5
+ subps xmm1, xmm0
+ addps xmm2, xmm3
+ movaps xmm0, xmm1
+ unpcklps xmm1, xmm2
+ unpckhps xmm0, xmm2
+%endmacro
+
+%macro PREROTATEW 3 ;addr1, addr2, xmm
+ movlps %1, %3
+ movhps %2, %3
+%endmacro
+
+%macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5
+ movaps xmm6, [%4+%1*2]
+ movaps %2, [%4+%1*2+0x10]
+ movaps %3, xmm6
+ movaps xmm7, %2
+ mulps xmm6, [%5+%1*1]
+ mulps %2, [%6+%1*1]
+ mulps %3, [%6+%1*1]
+ mulps xmm7, [%5+%1*1]
+ subps %2, xmm6
+ addps %3, xmm7
+%endmacro
+
+%macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8
+.post:
+ CMUL %1, xmm0, xmm1, %3, %4, %5
+ CMUL %2, xmm4, xmm5, %3, %4, %5
+ shufps xmm1, xmm1, 0x1b
+ shufps xmm5, xmm5, 0x1b
+ movaps xmm6, xmm4
+ unpckhps xmm4, xmm1
+ unpcklps xmm6, xmm1
+ movaps xmm2, xmm0
+ unpcklps xmm0, xmm5
+ unpckhps xmm2, xmm5
+ movaps [%3+%2*2], xmm6
+ movaps [%3+%2*2+0x10], xmm4
+ movaps [%3+%1*2], xmm0
+ movaps [%3+%1*2+0x10], xmm2
+ sub %2, 0x10
+ add %1, 0x10
+ jl .post
+%endmacro
+
+cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *input
+%ifdef ARCH_X86_64
+%define rrevtab r10
+%define rtcos r11
+%define rtsin r12
+ push r10
+ push r11
+ push r12
+ push r13
+ push r14
+%else
+%define rrevtab r6
+%define rtsin r6
+%define rtcos r5
+%endif
+ mov r3d, [r0+FFTContext.mdctsize]
+ add r2, r3
+ shr r3, 1
+ mov rtcos, [r0+FFTContext.tcos]
+ mov rtsin, [r0+FFTContext.tsin]
+ add rtcos, r3
+ add rtsin, r3
+%ifndef ARCH_X86_64
+ push rtcos
+ push rtsin
+%endif
+ shr r3, 1
+ mov rrevtab, [r0+FFTContext.revtab]
+ add rrevtab, r3
+%ifndef ARCH_X86_64
+ push rrevtab
+%endif
+
+ sub r3, 4
+%ifdef ARCH_X86_64
+ xor r4, r4
+ sub r4, r3
+%endif
+.pre:
+%ifndef ARCH_X86_64
+;unspill
+ xor r4, r4
+ sub r4, r3
+ mov rtsin, [esp+4]
+ mov rtcos, [esp+8]
+%endif
+
+ PREROTATER r4, r3, r2, rtcos, rtsin
+%ifdef ARCH_X86_64
+ movzx r5, word [rrevtab+r4*1-4]
+ movzx r6, word [rrevtab+r4*1-2]
+ movzx r13, word [rrevtab+r3*1]
+ movzx r14, word [rrevtab+r3*1+2]
+ PREROTATEW [r1+r5 *8], [r1+r6 *8], xmm0
+ PREROTATEW [r1+r13*8], [r1+r14*8], xmm1
+ add r4, 4
+%else
+ mov r6, [esp]
+ movzx r5, word [r6+r4*1-4]
+ movzx r4, word [r6+r4*1-2]
+ PREROTATEW [r1+r5*8], [r1+r4*8], xmm0
+ movzx r5, word [r6+r3*1]
+ movzx r4, word [r6+r3*1+2]
+ PREROTATEW [r1+r5*8], [r1+r4*8], xmm1
+%endif
+ sub r3, 4
+ jns .pre
+
+ mov r5, r0
+ mov r6, r1
+ mov r0, r1
+ mov r1d, [r5+FFTContext.nbits]
+
+ FFT_DISPATCH _sse, r1
+
+ mov r0d, [r5+FFTContext.mdctsize]
+ add r6, r0
+ shr r0, 1
+%ifndef ARCH_X86_64
+%define rtcos r2
+%define rtsin r3
+ mov rtcos, [esp+8]
+ mov rtsin, [esp+4]
+%endif
+ neg r0
+ mov r1, -16
+ sub r1, r0
+ POSROTATESHUF r0, r1, r6, rtcos, rtsin
+%ifdef ARCH_X86_64
+ pop r14
+ pop r13
+ pop r12
+ pop r11
+ pop r10
+%else
+ add esp, 12
+%endif
+ RET
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_sse.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_sse.c
index 726e186b5..c4082b15b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_sse.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_sse.c
@@ -71,111 +71,10 @@ void ff_fft_permute_sse(FFTContext *s, FFTComplex *z)
memcpy(z, s->tmp_buf, n*sizeof(FFTComplex));
}
-void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input)
-{
- av_unused x86_reg i, j, k, l;
- long n = 1 << s->mdct_bits;
- long n2 = n >> 1;
- long n4 = n >> 2;
- long n8 = n >> 3;
- const uint16_t *revtab = s->revtab + n8;
- const FFTSample *tcos = s->tcos;
- const FFTSample *tsin = s->tsin;
- FFTComplex *z = (FFTComplex *)output;
-
- /* pre rotation */
- for(k=n8-2; k>=0; k-=2) {
- __asm__ volatile(
- "movaps (%2,%1,2), %%xmm0 \n" // { z[k].re, z[k].im, z[k+1].re, z[k+1].im }
- "movaps -16(%2,%0,2), %%xmm1 \n" // { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im }
- "movaps %%xmm0, %%xmm2 \n"
- "shufps $0x88, %%xmm1, %%xmm0 \n" // { z[k].re, z[k+1].re, z[-k-2].re, z[-k-1].re }
- "shufps $0x77, %%xmm2, %%xmm1 \n" // { z[-k-1].im, z[-k-2].im, z[k+1].im, z[k].im }
- "movlps (%3,%1), %%xmm4 \n"
- "movlps (%4,%1), %%xmm5 \n"
- "movhps -8(%3,%0), %%xmm4 \n" // { cos[k], cos[k+1], cos[-k-2], cos[-k-1] }
- "movhps -8(%4,%0), %%xmm5 \n" // { sin[k], sin[k+1], sin[-k-2], sin[-k-1] }
- "movaps %%xmm0, %%xmm2 \n"
- "movaps %%xmm1, %%xmm3 \n"
- "mulps %%xmm5, %%xmm0 \n" // re*sin
- "mulps %%xmm4, %%xmm1 \n" // im*cos
- "mulps %%xmm4, %%xmm2 \n" // re*cos
- "mulps %%xmm5, %%xmm3 \n" // im*sin
- "subps %%xmm0, %%xmm1 \n" // -> re
- "addps %%xmm3, %%xmm2 \n" // -> im
- "movaps %%xmm1, %%xmm0 \n"
- "unpcklps %%xmm2, %%xmm1 \n" // { z[k], z[k+1] }
- "unpckhps %%xmm2, %%xmm0 \n" // { z[-k-2], z[-k-1] }
- ::"r"(-4*k), "r"(4*k),
- "r"(input+n4), "r"(tcos+n8), "r"(tsin+n8)
- );
-#if ARCH_X86_64
- // if we have enough regs, don't let gcc make the luts latency-bound
- // but if not, latency is faster than spilling
- __asm__("movlps %%xmm0, %0 \n"
- "movhps %%xmm0, %1 \n"
- "movlps %%xmm1, %2 \n"
- "movhps %%xmm1, %3 \n"
- :"=m"(z[revtab[-k-2]]),
- "=m"(z[revtab[-k-1]]),
- "=m"(z[revtab[ k ]]),
- "=m"(z[revtab[ k+1]])
- );
-#else
- __asm__("movlps %%xmm0, %0" :"=m"(z[revtab[-k-2]]));
- __asm__("movhps %%xmm0, %0" :"=m"(z[revtab[-k-1]]));
- __asm__("movlps %%xmm1, %0" :"=m"(z[revtab[ k ]]));
- __asm__("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]]));
-#endif
- }
-
- ff_fft_dispatch_sse(z, s->nbits);
-
- /* post rotation + reinterleave + reorder */
-
-#define CMUL(j,xmm0,xmm1)\
- "movaps (%2,"#j",2), %%xmm6 \n"\
- "movaps 16(%2,"#j",2), "#xmm0"\n"\
- "movaps %%xmm6, "#xmm1"\n"\
- "movaps "#xmm0",%%xmm7 \n"\
- "mulps (%3,"#j"), %%xmm6 \n"\
- "mulps (%4,"#j"), "#xmm0"\n"\
- "mulps (%4,"#j"), "#xmm1"\n"\
- "mulps (%3,"#j"), %%xmm7 \n"\
- "subps %%xmm6, "#xmm0"\n"\
- "addps %%xmm7, "#xmm1"\n"
-
- j = -n2;
- k = n2-16;
- __asm__ volatile(
- "1: \n"
- CMUL(%0, %%xmm0, %%xmm1)
- CMUL(%1, %%xmm4, %%xmm5)
- "shufps $0x1b, %%xmm1, %%xmm1 \n"
- "shufps $0x1b, %%xmm5, %%xmm5 \n"
- "movaps %%xmm4, %%xmm6 \n"
- "unpckhps %%xmm1, %%xmm4 \n"
- "unpcklps %%xmm1, %%xmm6 \n"
- "movaps %%xmm0, %%xmm2 \n"
- "unpcklps %%xmm5, %%xmm0 \n"
- "unpckhps %%xmm5, %%xmm2 \n"
- "movaps %%xmm6, (%2,%1,2) \n"
- "movaps %%xmm4, 16(%2,%1,2) \n"
- "movaps %%xmm0, (%2,%0,2) \n"
- "movaps %%xmm2, 16(%2,%0,2) \n"
- "sub $16, %1 \n"
- "add $16, %0 \n"
- "jl 1b \n"
- :"+&r"(j), "+&r"(k)
- :"r"(z+n8), "r"(tcos+n8), "r"(tsin+n8)
- :"memory"
- );
-}
-
void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input)
{
x86_reg j, k;
- long n = 1 << s->mdct_bits;
+ long n = s->mdct_size;
long n4 = n >> 2;
ff_imdct_half_sse(s, output+n4, input);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
index 35a016b2f..4b2e54603 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
@@ -2368,7 +2368,7 @@ void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int s
#if CONFIG_H264PRED
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
{
- mm_flags = mm_support();
+ int mm_flags = mm_support();
#if HAVE_YASM
if (mm_flags & FF_MM_MMX) {
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c
index f9a8847de..75ec4b2cf 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c
@@ -625,6 +625,8 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
void MPV_common_init_mmx(MpegEncContext *s)
{
+ int mm_flags = mm_support();
+
if (mm_flags & FF_MM_MMX) {
const int dct_algo = s->avctx->dct_algo;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c
index 3ce097894..eb3ad2c32 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c
@@ -714,7 +714,7 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq)
#endif
void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) {
- mm_flags = mm_support();
+ int mm_flags = mm_support();
dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx;
dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm
new file mode 100644
index 000000000..1b3165e54
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm
@@ -0,0 +1,173 @@
+;******************************************************************************
+;* MMX/SSE2-optimized functions for the VP6 decoder
+;* Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com>
+;* Copyright (C) 2009 Zuxy Meng <zuxy.meng@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+cextern pw_64
+
+SECTION .text
+
+%macro DIAG4_MMX 6
+ movq m0, [%1+%2]
+ movq m1, [%1+%3]
+ movq m3, m0
+ movq m4, m1
+ punpcklbw m0, m7
+ punpcklbw m1, m7
+ punpckhbw m3, m7
+ punpckhbw m4, m7
+ pmullw m0, [rsp+8*11] ; src[x-8 ] * biweight [0]
+ pmullw m1, [rsp+8*12] ; src[x ] * biweight [1]
+ pmullw m3, [rsp+8*11] ; src[x-8 ] * biweight [0]
+ pmullw m4, [rsp+8*12] ; src[x ] * biweight [1]
+ paddw m0, m1
+ paddw m3, m4
+ movq m1, [%1+%4]
+ movq m2, [%1+%5]
+ movq m4, m1
+ movq m5, m2
+ punpcklbw m1, m7
+ punpcklbw m2, m7
+ punpcklbw m4, m7
+ punpcklbw m5, m7
+ pmullw m1, [rsp+8*13] ; src[x+8 ] * biweight [2]
+ pmullw m2, [rsp+8*14] ; src[x+16] * biweight [3]
+ pmullw m4, [rsp+8*13] ; src[x+8 ] * biweight [2]
+ pmullw m5, [rsp+8*14] ; src[x+16] * biweight [3]
+ paddw m1, m2
+ paddw m4, m5
+ paddsw m0, m1
+ paddsw m3, m4
+ paddsw m0, m6 ; Add 64
+ paddsw m3, m6 ; Add 64
+ psraw m0, 7
+ psraw m3, 7
+ packuswb m0, m3
+ movq [%6], m0
+%endmacro
+
+%macro DIAG4_SSE2 6
+ movq m0, [%1+%2]
+ movq m1, [%1+%3]
+ punpcklbw m0, m7
+ punpcklbw m1, m7
+ pmullw m0, m4 ; src[x-8 ] * biweight [0]
+ pmullw m1, m5 ; src[x ] * biweight [1]
+ paddw m0, m1
+ movq m1, [%1+%4]
+ movq m2, [%1+%5]
+ punpcklbw m1, m7
+ punpcklbw m2, m7
+ pmullw m1, m6 ; src[x+8 ] * biweight [2]
+ pmullw m2, m3 ; src[x+16] * biweight [3]
+ paddw m1, m2
+ paddsw m0, m1
+ paddsw m0, [pw_64] ; Add 64
+ psraw m0, 7
+ packuswb m0, m0
+ movq [%6], m0
+%endmacro
+
+%macro SPLAT4REGS_MMX 0
+ movq m5, m3
+ punpcklwd m3, m3
+ movq m4, m3
+ punpckldq m3, m3
+ punpckhdq m4, m4
+ punpckhwd m5, m5
+ movq m6, m5
+ punpckhdq m6, m6
+ punpckldq m5, m5
+ movq [rsp+8*11], m3
+ movq [rsp+8*12], m4
+ movq [rsp+8*13], m5
+ movq [rsp+8*14], m6
+%endmacro
+
+%macro SPLAT4REGS_SSE2 0
+ pshuflw m4, m3, 0x0
+ pshuflw m5, m3, 0x55
+ pshuflw m6, m3, 0xAA
+ pshuflw m3, m3, 0xFF
+ punpcklqdq m4, m4
+ punpcklqdq m5, m5
+ punpcklqdq m6, m6
+ punpcklqdq m3, m3
+%endmacro
+
+%macro vp6_filter_diag4 2
+; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, int stride,
+; const int16_t h_weight[4], const int16_t v_weights[4])
+cglobal vp6_filter_diag4_%1, 5, 7, %2
+ mov r5, rsp ; backup stack pointer
+ and rsp, ~(mmsize-1) ; align stack
+%ifidn %1, sse2
+ sub rsp, 8*11
+%else
+ sub rsp, 8*15
+ movq m6, [pw_64]
+%endif
+%ifdef ARCH_X86_64
+ movsxd r2, r2d
+%endif
+
+ sub r1, r2
+
+ pxor m7, m7
+ movq m3, [r3]
+ SPLAT4REGS
+
+ mov r3, rsp
+ mov r6, 11
+.nextrow
+ DIAG4 r1, -1, 0, 1, 2, r3
+ add r3, 8
+ add r1, r2
+ dec r6
+ jnz .nextrow
+
+ movq m3, [r4]
+ SPLAT4REGS
+
+ lea r3, [rsp+8]
+ mov r6, 8
+.nextcol
+ DIAG4 r3, -8, 0, 8, 16, r0
+ add r3, 8
+ add r0, r2
+ dec r6
+ jnz .nextcol
+
+ mov rsp, r5 ; restore stack pointer
+ RET
+%endmacro
+
+INIT_MMX
+%define DIAG4 DIAG4_MMX
+%define SPLAT4REGS SPLAT4REGS_MMX
+vp6_filter_diag4 mmx, 0
+
+INIT_XMM
+%define DIAG4 DIAG4_SSE2
+%define SPLAT4REGS SPLAT4REGS_SSE2
+vp6_filter_diag4 sse2, 8
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp_init.c
index 743bc4361..5120ed231 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp_init.c
@@ -1,6 +1,7 @@
/*
- * vp6dsp MMX function declarations
- * Copyright (c) 2009 Sebastien Lucas <sebastien.lucas@gmail.com>
+ * VP6 MMX/SSE2 optimizations
+ * Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com>
+ * Copyright (C) 2009 Zuxy Meng <zuxy.meng@gmail.com>
*
* This file is part of FFmpeg.
*
@@ -19,12 +20,28 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#ifndef AVCODEC_X86_VP6DSP_MMX_H
-#define AVCODEC_X86_VP6DSP_MMX_H
-
-#include <stdint.h>
+#include "libavutil/x86_cpu.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/vp56dsp.h"
void ff_vp6_filter_diag4_mmx(uint8_t *dst, uint8_t *src, int stride,
const int16_t *h_weights,const int16_t *v_weights);
+void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride,
+ const int16_t *h_weights,const int16_t *v_weights);
+
+av_cold void ff_vp56dsp_init_x86(VP56DSPContext* c, enum CodecID codec)
+{
+#if HAVE_YASM
+ int mm_flags = mm_support();
+
+ if (CONFIG_VP6_DECODER && codec == CODEC_ID_VP6) {
+ if (mm_flags & FF_MM_MMX) {
+ c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
+ }
-#endif /* AVCODEC_X86_VP6DSP_MMX_H */
+ if (mm_flags & FF_MM_SSE2) {
+ c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2;
+ }
+ }
+#endif
+}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.c
deleted file mode 100644
index 905b3a7f0..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
- * @file
- * MMX-optimized functions for the VP6 decoder
- *
- * Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/x86_cpu.h"
-#include "libavcodec/dsputil.h"
-#include "dsputil_mmx.h"
-#include "vp6dsp_mmx.h"
-
-
-#define DIAG4_MMX(in1,in2,in3,in4) \
- "movq "#in1"(%0), %%mm0 \n\t" \
- "movq "#in2"(%0), %%mm1 \n\t" \
- "movq %%mm0, %%mm3 \n\t" \
- "movq %%mm1, %%mm4 \n\t" \
- "punpcklbw %%mm7, %%mm0 \n\t" \
- "punpcklbw %%mm7, %%mm1 \n\t" \
- "punpckhbw %%mm7, %%mm3 \n\t" \
- "punpckhbw %%mm7, %%mm4 \n\t" \
- "pmullw 0(%2), %%mm0 \n\t" /* src[x-8 ] * biweight [0] */ \
- "pmullw 8(%2), %%mm1 \n\t" /* src[x ] * biweight [1] */ \
- "pmullw 0(%2), %%mm3 \n\t" /* src[x-8 ] * biweight [0] */ \
- "pmullw 8(%2), %%mm4 \n\t" /* src[x ] * biweight [1] */ \
- "paddw %%mm1, %%mm0 \n\t" \
- "paddw %%mm4, %%mm3 \n\t" \
- "movq "#in3"(%0), %%mm1 \n\t" \
- "movq "#in4"(%0), %%mm2 \n\t" \
- "movq %%mm1, %%mm4 \n\t" \
- "movq %%mm2, %%mm5 \n\t" \
- "punpcklbw %%mm7, %%mm1 \n\t" \
- "punpcklbw %%mm7, %%mm2 \n\t" \
- "punpckhbw %%mm7, %%mm4 \n\t" \
- "punpckhbw %%mm7, %%mm5 \n\t" \
- "pmullw 16(%2), %%mm1 \n\t" /* src[x+8 ] * biweight [2] */ \
- "pmullw 24(%2), %%mm2 \n\t" /* src[x+16] * biweight [3] */ \
- "pmullw 16(%2), %%mm4 \n\t" /* src[x+8 ] * biweight [2] */ \
- "pmullw 24(%2), %%mm5 \n\t" /* src[x+16] * biweight [3] */ \
- "paddw %%mm2, %%mm1 \n\t" \
- "paddw %%mm5, %%mm4 \n\t" \
- "paddsw %%mm1, %%mm0 \n\t" \
- "paddsw %%mm4, %%mm3 \n\t" \
- "paddsw %%mm6, %%mm0 \n\t" /* Add 64 */ \
- "paddsw %%mm6, %%mm3 \n\t" /* Add 64 */ \
- "psraw $7, %%mm0 \n\t" \
- "psraw $7, %%mm3 \n\t" \
- "packuswb %%mm3, %%mm0 \n\t" \
- "movq %%mm0, (%1) \n\t"
-
-void ff_vp6_filter_diag4_mmx(uint8_t *dst, uint8_t *src, int stride,
- const int16_t *h_weights, const int16_t *v_weights)
-{
- uint8_t tmp[8*11], *t = tmp;
- int16_t weights[4*4];
- int i;
- src -= stride;
-
- for (i=0; i<4*4; i++)
- weights[i] = h_weights[i>>2];
-
- __asm__ volatile(
- "pxor %%mm7, %%mm7 \n\t"
- "movq "MANGLE(ff_pw_64)", %%mm6 \n\t"
- "1: \n\t"
- DIAG4_MMX(-1,0,1,2)
- "add $8, %1 \n\t"
- "add %3, %0 \n\t"
- "decl %4 \n\t"
- "jnz 1b \n\t"
- : "+r"(src), "+r"(t)
- : "r"(weights), "r"((x86_reg)stride), "r"(11)
- : "memory");
-
- t = tmp + 8;
- for (i=0; i<4*4; i++)
- weights[i] = v_weights[i>>2];
-
- __asm__ volatile(
- "pxor %%mm7, %%mm7 \n\t"
- "movq "MANGLE(ff_pw_64)", %%mm6 \n\t"
- "1: \n\t"
- DIAG4_MMX(-8,0,8,16)
- "add $8, %0 \n\t"
- "add %3, %1 \n\t"
- "decl %4 \n\t"
- "jnz 1b \n\t"
- : "+r"(t), "+r"(dst)
- : "r"(weights), "r"((x86_reg)stride), "r"(8)
- : "memory");
-}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.c
deleted file mode 100644
index bfd733aa7..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * @file
- * SSE2-optimized functions for the VP6 decoder
- *
- * Copyright (C) 2009 Zuxy Meng <zuxy.meng@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/x86_cpu.h"
-#include "libavcodec/dsputil.h"
-#include "dsputil_mmx.h"
-#include "vp6dsp_sse2.h"
-
-#define DIAG4_SSE2(in1,in2,in3,in4) \
- "movq "#in1"(%0), %%xmm0 \n\t" \
- "movq "#in2"(%0), %%xmm1 \n\t" \
- "punpcklbw %%xmm7, %%xmm0 \n\t" \
- "punpcklbw %%xmm7, %%xmm1 \n\t" \
- "pmullw %%xmm4, %%xmm0 \n\t" /* src[x-8 ] * biweight [0] */ \
- "pmullw %%xmm5, %%xmm1 \n\t" /* src[x ] * biweight [1] */ \
- "paddw %%xmm1, %%xmm0 \n\t" \
- "movq "#in3"(%0), %%xmm1 \n\t" \
- "movq "#in4"(%0), %%xmm2 \n\t" \
- "punpcklbw %%xmm7, %%xmm1 \n\t" \
- "punpcklbw %%xmm7, %%xmm2 \n\t" \
- "pmullw %%xmm6, %%xmm1 \n\t" /* src[x+8 ] * biweight [2] */ \
- "pmullw %%xmm3, %%xmm2 \n\t" /* src[x+16] * biweight [3] */ \
- "paddw %%xmm2, %%xmm1 \n\t" \
- "paddsw %%xmm1, %%xmm0 \n\t" \
- "paddsw "MANGLE(ff_pw_64)", %%xmm0 \n\t" /* Add 64 */ \
- "psraw $7, %%xmm0 \n\t" \
- "packuswb %%xmm0, %%xmm0 \n\t" \
- "movq %%xmm0, (%1) \n\t" \
-
-void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride,
- const int16_t *h_weights,const int16_t *v_weights)
-{
- uint8_t tmp[8*11], *t = tmp;
- src -= stride;
-
- __asm__ volatile(
- "pxor %%xmm7, %%xmm7 \n\t"
- "movq %4, %%xmm3 \n\t"
- "pshuflw $0, %%xmm3, %%xmm4 \n\t"
- "punpcklqdq %%xmm4, %%xmm4 \n\t"
- "pshuflw $85, %%xmm3, %%xmm5 \n\t"
- "punpcklqdq %%xmm5, %%xmm5 \n\t"
- "pshuflw $170, %%xmm3, %%xmm6 \n\t"
- "punpcklqdq %%xmm6, %%xmm6 \n\t"
- "pshuflw $255, %%xmm3, %%xmm3 \n\t"
- "punpcklqdq %%xmm3, %%xmm3 \n\t"
- "1: \n\t"
- DIAG4_SSE2(-1,0,1,2)
- "add $8, %1 \n\t"
- "add %2, %0 \n\t"
- "decl %3 \n\t"
- "jnz 1b \n\t"
- : "+r"(src), "+r"(t)
- : "g"((x86_reg)stride), "r"(11), "m"(*(const int64_t*)h_weights)
- : "memory");
-
- t = tmp + 8;
-
- __asm__ volatile(
- "movq %4, %%xmm3 \n\t"
- "pshuflw $0, %%xmm3, %%xmm4 \n\t"
- "punpcklqdq %%xmm4, %%xmm4 \n\t"
- "pshuflw $85, %%xmm3, %%xmm5 \n\t"
- "punpcklqdq %%xmm5, %%xmm5 \n\t"
- "pshuflw $170, %%xmm3, %%xmm6 \n\t"
- "punpcklqdq %%xmm6, %%xmm6 \n\t"
- "pshuflw $255, %%xmm3, %%xmm3 \n\t"
- "punpcklqdq %%xmm3, %%xmm3 \n\t"
- "1: \n\t"
- DIAG4_SSE2(-8,0,8,16)
- "add $8, %0 \n\t"
- "add %2, %1 \n\t"
- "decl %3 \n\t"
- "jnz 1b \n\t"
- : "+r"(t), "+r"(dst)
- : "g"((x86_reg)stride), "r"(8), "m"(*(const int64_t*)v_weights)
- : "memory");
-}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.h
deleted file mode 100644
index a30089a3e..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * vp6dsp SSE2 function declarations
- * Copyright (c) 2009 Zuxy Meng <zuxy.meng@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_X86_VP6DSP_SSE2_H
-#define AVCODEC_X86_VP6DSP_SSE2_H
-
-#include <stdint.h>
-
-void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride,
- const int16_t *h_weights,const int16_t *v_weights);
-
-#endif /* AVCODEC_X86_VP6DSP_SSE2_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c
index 40fd0e4e3..ed5cf4602 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c
@@ -282,7 +282,7 @@ DECLARE_LOOP_FILTER(sse4)
av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
{
- mm_flags = mm_support();
+ int mm_flags = mm_support();
#if HAVE_YASM
if (mm_flags & FF_MM_MMX) {
@@ -313,14 +313,12 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
/* note that 4-tap width=16 functions are missing because w=16
* is only used for luma, and luma is always a copy or sixtap. */
if (mm_flags & FF_MM_MMX2) {
-#if ARCH_X86_32
VP8_LUMA_MC_FUNC(0, 16, mmxext);
VP8_MC_FUNC(1, 8, mmxext);
VP8_MC_FUNC(2, 4, mmxext);
VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
VP8_BILINEAR_MC_FUNC(1, 8, mmxext);
VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
-#endif
c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext;
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext;
@@ -344,12 +342,10 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
}
if (mm_flags & (FF_MM_SSE2|FF_MM_SSE2SLOW)) {
-#if ARCH_X86_32
VP8_LUMA_MC_FUNC(0, 16, sse2);
VP8_MC_FUNC(1, 8, sse2);
VP8_BILINEAR_MC_FUNC(0, 16, sse2);
VP8_BILINEAR_MC_FUNC(1, 8, sse2);
-#endif
c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
@@ -373,14 +369,12 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
}
if (mm_flags & FF_MM_SSSE3) {
-#if ARCH_X86_32
VP8_LUMA_MC_FUNC(0, 16, ssse3);
VP8_MC_FUNC(1, 8, ssse3);
VP8_MC_FUNC(2, 4, ssse3);
VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
-#endif
c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm
index 6999e87b6..8cdbb3c7a 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm
@@ -211,7 +211,7 @@ cglobal put_vp8_epel%1_h6_ssse3, 6, 6, %2
; go to next line
add r0, r1
add r2, r3
- dec r4 ; next row
+ dec r4d ; next row
jg .nextrow
REP_RET
@@ -242,7 +242,7 @@ cglobal put_vp8_epel%1_h4_ssse3, 6, 6, %3
; go to next line
add r0, r1
add r2, r3
- dec r4 ; next row
+ dec r4d ; next row
jg .nextrow
REP_RET
@@ -281,7 +281,7 @@ cglobal put_vp8_epel%1_v4_ssse3, 7, 7, %2
; go to next line
add r0, r1
add r2, r3
- dec r4 ; next row
+ dec r4d ; next row
jg .nextrow
REP_RET
@@ -328,7 +328,7 @@ cglobal put_vp8_epel%1_v6_ssse3, 7, 7, %2
; go to next line
add r0, r1
add r2, r3
- dec r4 ; next row
+ dec r4d ; next row
jg .nextrow
REP_RET
%endmacro
@@ -381,7 +381,7 @@ cglobal put_vp8_epel4_h4_mmxext, 6, 6
; go to next line
add r0, r1
add r2, r3
- dec r4 ; next row
+ dec r4d ; next row
jg .nextrow
REP_RET
@@ -438,7 +438,7 @@ cglobal put_vp8_epel4_h6_mmxext, 6, 6
; go to next line
add r0, r1
add r2, r3
- dec r4 ; next row
+ dec r4d ; next row
jg .nextrow
REP_RET
@@ -486,7 +486,7 @@ cglobal put_vp8_epel8_h4_sse2, 6, 6, 10
; go to next line
add r0, r1
add r2, r3
- dec r4 ; next row
+ dec r4d ; next row
jg .nextrow
REP_RET
@@ -548,7 +548,7 @@ cglobal put_vp8_epel8_h6_sse2, 6, 6, 14
; go to next line
add r0, r1
add r2, r3
- dec r4 ; next row
+ dec r4d ; next row
jg .nextrow
REP_RET
@@ -601,7 +601,7 @@ cglobal put_vp8_epel%2_v4_%1, 7, 7, %3
; go to next line
add r0, r1
add r2, r3
- dec r4 ; next row
+ dec r4d ; next row
jg .nextrow
REP_RET
@@ -666,7 +666,7 @@ cglobal put_vp8_epel%2_v6_%1, 7, 7, %3
; go to next line
add r0, r1
add r2, r3
- dec r4 ; next row
+ dec r4d ; next row
jg .nextrow
REP_RET
%endmacro
@@ -718,7 +718,7 @@ cglobal put_vp8_bilinear%2_v_%1, 7,7,%3
lea r0, [r0+r1*2]
lea r2, [r2+r3*2]
- sub r4, 2
+ sub r4d, 2
jg .nextrow
REP_RET
@@ -764,7 +764,7 @@ cglobal put_vp8_bilinear%2_h_%1, 7,7,%3
lea r0, [r0+r1*2]
lea r2, [r2+r3*2]
- sub r4, 2
+ sub r4d, 2
jg .nextrow
REP_RET
%endmacro
@@ -807,7 +807,7 @@ cglobal put_vp8_bilinear%1_v_ssse3, 7,7
lea r0, [r0+r1*2]
lea r2, [r2+r3*2]
- sub r4, 2
+ sub r4d, 2
jg .nextrow
REP_RET
@@ -843,7 +843,7 @@ cglobal put_vp8_bilinear%1_h_ssse3, 7,7
lea r0, [r0+r1*2]
lea r2, [r2+r3*2]
- sub r4, 2
+ sub r4d, 2
jg .nextrow
REP_RET
%endmacro
@@ -1470,8 +1470,8 @@ VP8_DC_WHT sse
pshufb %1, %3
%endmacro
-%macro SIMPLE_LOOPFILTER 3
-cglobal vp8_%2_loop_filter_simple_%1, 3, %3
+%macro SIMPLE_LOOPFILTER 4
+cglobal vp8_%2_loop_filter_simple_%1, 3, %3, %4
%if mmsize == 8 ; mmx/mmxext
mov r3, 2
%endif
@@ -1612,21 +1612,21 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3
INIT_MMX
%define SPLATB_REG SPLATB_REG_MMX
-SIMPLE_LOOPFILTER mmx, v, 4
-SIMPLE_LOOPFILTER mmx, h, 5
+SIMPLE_LOOPFILTER mmx, v, 4, 0
+SIMPLE_LOOPFILTER mmx, h, 5, 0
%define SPLATB_REG SPLATB_REG_MMXEXT
-SIMPLE_LOOPFILTER mmxext, v, 4
-SIMPLE_LOOPFILTER mmxext, h, 5
+SIMPLE_LOOPFILTER mmxext, v, 4, 0
+SIMPLE_LOOPFILTER mmxext, h, 5, 0
INIT_XMM
%define SPLATB_REG SPLATB_REG_SSE2
%define WRITE_8W WRITE_8W_SSE2
-SIMPLE_LOOPFILTER sse2, v, 3
-SIMPLE_LOOPFILTER sse2, h, 5
+SIMPLE_LOOPFILTER sse2, v, 3, 8
+SIMPLE_LOOPFILTER sse2, h, 5, 8
%define SPLATB_REG SPLATB_REG_SSSE3
-SIMPLE_LOOPFILTER ssse3, v, 3
-SIMPLE_LOOPFILTER ssse3, h, 5
+SIMPLE_LOOPFILTER ssse3, v, 3, 8
+SIMPLE_LOOPFILTER ssse3, h, 5, 8
%define WRITE_8W WRITE_8W_SSE4
-SIMPLE_LOOPFILTER sse4, h, 5
+SIMPLE_LOOPFILTER sse4, h, 5, 8
;-----------------------------------------------------------------------------
; void vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, [uint8_t *v,] int stride,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c
index 84db01ae7..ebaeff16c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c
@@ -29,21 +29,12 @@ int av_get_image_linesize(enum PixelFormat pix_fmt, int width, int plane)
const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
int max_step [4]; /* max pixel step for each plane */
int max_step_comp[4]; /* the component for each plane which has the max pixel step */
- int s, i;
+ int s;
if (desc->flags & PIX_FMT_BITSTREAM)
return (width * (desc->comp[0].step_minus1+1) + 7) >> 3;
- memset(max_step , 0, sizeof(max_step ));
- memset(max_step_comp, 0, sizeof(max_step_comp));
- for (i = 0; i < 4; i++) {
- const AVComponentDescriptor *comp = &(desc->comp[i]);
- if ((comp->step_minus1+1) > max_step[comp->plane]) {
- max_step [comp->plane] = comp->step_minus1+1;
- max_step_comp[comp->plane] = i;
- }
- }
-
+ av_fill_image_max_pixsteps(max_step, max_step_comp, desc);
s = (max_step_comp[plane] == 1 || max_step_comp[plane] == 2) ? desc->log2_chroma_w : 0;
return max_step[plane] * (((width + (1 << s) - 1)) >> s);
}
@@ -65,16 +56,7 @@ int av_fill_image_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int widt
return 0;
}
- memset(max_step , 0, sizeof(max_step ));
- memset(max_step_comp, 0, sizeof(max_step_comp));
- for (i = 0; i < 4; i++) {
- const AVComponentDescriptor *comp = &(desc->comp[i]);
- if ((comp->step_minus1+1) > max_step[comp->plane]) {
- max_step [comp->plane] = comp->step_minus1+1;
- max_step_comp[comp->plane] = i;
- }
- }
-
+ av_fill_image_max_pixsteps(max_step, max_step_comp, desc);
for (i = 0; i < 4; i++) {
int s = (max_step_comp[i] == 1 || max_step_comp[i] == 2) ? desc->log2_chroma_w : 0;
linesizes[i] = max_step[i] * (((width + (1 << s) - 1)) >> s);
@@ -132,7 +114,7 @@ int av_check_image_size(unsigned int w, unsigned int h, int log_offset, void *lo
{
ImgUtils imgutils = { &imgutils_class, log_offset, log_ctx };
- if((int)w>0 && (int)h>0 && (w+128)*(uint64_t)(h+128) < INT_MAX/8)
+ if ((int)w>0 && (int)h>0 && (w+128)*(uint64_t)(h+128) < INT_MAX/8)
return 0;
av_log(&imgutils, AV_LOG_ERROR, "Picture size %ux%u is invalid\n", w, h);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h
index c2cf6eb53..8e08d4738 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h
@@ -24,10 +24,45 @@
* misc image utilities
*/
+#include "libavutil/pixdesc.h"
#include "libavutil/pixfmt.h"
#include "avcore.h"
/**
+ * Compute the max pixel step for each plane of an image with a
+ * format described by pixdesc.
+ *
+ * The pixel step is the distance in bytes between the first byte of
+ * the group of bytes which describe a pixel component and the first
+ * byte of the successive group in the same plane for the same
+ * component.
+ *
+ * @param max_pixsteps an array which is filled with the max pixel step
+ * for each plane. Since a plane may contain different pixel
+ * components, the computed max_pixsteps[plane] is relative to the
+ * component in the plane with the max pixel step.
+ * @param max_pixstep_comps an array which is filled with the component
+ * for each plane which has the max pixel step. May be NULL.
+ */
+static inline void av_fill_image_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4],
+ const AVPixFmtDescriptor *pixdesc)
+{
+ int i;
+ memset(max_pixsteps, 0, 4*sizeof(max_pixsteps[0]));
+ if (max_pixstep_comps)
+ memset(max_pixstep_comps, 0, 4*sizeof(max_pixstep_comps[0]));
+
+ for (i = 0; i < 4; i++) {
+ const AVComponentDescriptor *comp = &(pixdesc->comp[i]);
+ if ((comp->step_minus1+1) > max_pixsteps[comp->plane]) {
+ max_pixsteps[comp->plane] = comp->step_minus1+1;
+ if (max_pixstep_comps)
+ max_pixstep_comps[comp->plane] = i;
+ }
+ }
+}
+
+/**
* Compute the size of an image line with format pix_fmt and width
* width for the plane plane.
*
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h
index 13fc7cdc2..5d5e0f2c4 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h
@@ -35,6 +35,7 @@
#include <stdlib.h>
#include <string.h>
#include "attributes.h"
+#include "libavutil/avconfig.h"
#if defined(_MSC_VER) & !defined(__cplusplus)
# define inline __inline
@@ -42,6 +43,12 @@
#ifdef HAVE_AV_CONFIG_H
+#if AV_HAVE_BIGENDIAN
+# define AV_NE(be, le) (be)
+#else
+# define AV_NE(be, le) (le)
+#endif
+
//rounded division & shift
#define RSHIFT(a,b) ((a) > 0 ? ((a) + ((1<<(b))>>1))>>(b) : ((a) + ((1<<(b))>>1)-1)>>(b))
/* assume b>0 */