From 49b4ac58249746b3564052782f372056d90e3511 Mon Sep 17 00:00:00 2001 From: XhmikosR Date: Wed, 25 Aug 2010 23:17:34 +0000 Subject: updated ffmpeg git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@2349 10f7b99b-c216-0410-bff0-8a66a9350fd8 --- src/filters/transform/MPCVideoDec/ffmpeg/Makefile | 4 +- .../transform/MPCVideoDec/ffmpeg/Makefile_2010 | 4 +- src/filters/transform/MPCVideoDec/ffmpeg/config.h | 34 +++- .../MPCVideoDec/ffmpeg/libavcodec/dsputil.c | 3 - .../MPCVideoDec/ffmpeg/libavcodec/dsputil.h | 12 +- .../MPCVideoDec/ffmpeg/libavcodec/h263dec.c | 2 +- .../MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c | 4 +- .../MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c | 9 +- .../MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c | 3 +- .../MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c | 8 + .../MPCVideoDec/ffmpeg/libavcodec/vp56dsp.h | 8 + .../transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c | 2 +- .../MPCVideoDec/ffmpeg/libavcodec/vp6dsp.c | 2 +- .../MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c | 2 + .../ffmpeg/libavcodec/x86/dsputil_mmx.c | 16 +- .../MPCVideoDec/ffmpeg/libavcodec/x86/fft.c | 8 +- .../MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c | 4 +- .../MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm | 202 ++++++++++++++++++++- .../MPCVideoDec/ffmpeg/libavcodec/x86/fft_sse.c | 103 +---------- .../ffmpeg/libavcodec/x86/h264dsp_mmx.c | 2 +- .../ffmpeg/libavcodec/x86/mpegvideo_mmx.c | 2 + .../MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c | 2 +- .../MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm | 173 ++++++++++++++++++ .../ffmpeg/libavcodec/x86/vp56dsp_init.c | 47 +++++ .../MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.c | 108 ----------- .../MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.h | 30 --- .../ffmpeg/libavcodec/x86/vp6dsp_sse2.c | 98 ---------- .../ffmpeg/libavcodec/x86/vp6dsp_sse2.h | 30 --- .../ffmpeg/libavcodec/x86/vp8dsp-init.c | 8 +- .../MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm | 50 ++--- .../MPCVideoDec/ffmpeg/libavcore/imgutils.c | 26 +-- .../MPCVideoDec/ffmpeg/libavcore/imgutils.h | 35 ++++ .../MPCVideoDec/ffmpeg/libavutil/common.h | 7 + 33 files changed, 567 insertions(+), 481 deletions(-) create mode 100644 src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm create mode 100644 src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp_init.c delete mode 100644 src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.c delete mode 100644 src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.h delete mode 100644 src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.c delete mode 100644 src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.h diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile index 7c1807620..666048770 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile +++ b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile @@ -155,8 +155,7 @@ SRCS_C=\ $(LAVC_DIR)/x86/vc1dsp_mmx.c \ $(LAVC_DIR)/x86/vp3dsp_mmx.c \ $(LAVC_DIR)/x86/vp3dsp_sse2.c \ - $(LAVC_DIR)/x86/vp6dsp_mmx.c \ - $(LAVC_DIR)/x86/vp6dsp_sse2.c \ + $(LAVC_DIR)/x86/vp56dsp_init.c \ $(LAVC_DIR)/x86/vp8dsp-init.c \ \ $(LAVCORE_DIR)/avcore_utils.c \ @@ -187,6 +186,7 @@ SRCS_YASM=\ $(LAVC_DIR)/x86/h264_intrapred.asm \ $(LAVC_DIR)/x86/h264_weight_sse2.asm \ $(LAVC_DIR)/x86/vc1dsp_yasm.asm \ + $(LAVC_DIR)/x86/vp56dsp.asm \ $(LAVC_DIR)/x86/vp8dsp.asm \ $(LAVC_DIR)/x86/x86util.asm diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile_2010 b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile_2010 index b722d5960..27b4f57f1 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile_2010 +++ b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile_2010 @@ -155,8 +155,7 @@ SRCS_C=\ $(LAVC_DIR)/x86/vc1dsp_mmx.c \ $(LAVC_DIR)/x86/vp3dsp_mmx.c \ $(LAVC_DIR)/x86/vp3dsp_sse2.c \ - $(LAVC_DIR)/x86/vp6dsp_mmx.c \ - $(LAVC_DIR)/x86/vp6dsp_sse2.c \ + $(LAVC_DIR)/x86/vp56dsp_init.c \ $(LAVC_DIR)/x86/vp8dsp-init.c \ \ $(LAVCORE_DIR)/avcore_utils.c \ @@ -187,6 +186,7 @@ SRCS_YASM=\ $(LAVC_DIR)/x86/h264_intrapred.asm \ $(LAVC_DIR)/x86/h264_weight_sse2.asm \ $(LAVC_DIR)/x86/vc1dsp_yasm.asm \ + $(LAVC_DIR)/x86/vp56dsp.asm \ $(LAVC_DIR)/x86/vp8dsp.asm \ $(LAVC_DIR)/x86/x86util.asm diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/config.h b/src/filters/transform/MPCVideoDec/ffmpeg/config.h index d3b203c61..507cca37f 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/config.h +++ b/src/filters/transform/MPCVideoDec/ffmpeg/config.h @@ -48,11 +48,12 @@ // registry switch is not read) //#define USE_DPRINTF 1 -#define FFMPEG_LICENSE "GPL version 2.1 or later" +#define FFMPEG_CONFIGURATION "ffdshow custom" +#define FFMPEG_LICENSE "GPL version 2 or later" #define CC_TYPE "gcc" #define CC_VERSION __VERSION__ -#define ASMALIGN(ZEROBITS) ".align 1 << " #ZEROBITS "\n\t" +#define ASMALIGN(ZEROBITS) ".p2align " #ZEROBITS "\n\t" // MPC custom code for linking with MSVC #if defined(__GNUC__) && ARCH_X86_64 @@ -62,7 +63,36 @@ #endif #define EXTERN_ASM _ +#define ARCH_ALPHA 0 +#define ARCH_ARM 0 +#define ARCH_AVR32 0 +#define ARCH_AVR32_AP 0 +#define ARCH_AVR32_UC 0 +#define ARCH_BFIN 0 +#define ARCH_IA64 0 +#define ARCH_M68K 0 +#define ARCH_MIPS 0 +#define ARCH_MIPS64 0 +#define ARCH_PARISC 0 +#define ARCH_PPC 0 +#define ARCH_PPC64 0 +#define ARCH_S390 0 +#define ARCH_SH4 0 +#define ARCH_SPARC 0 +#define ARCH_SPARC64 0 +#define ARCH_TOMI 0 + #define HAVE_ALTIVEC 0 +#define HAVE_ARMV5TE 0 +#define HAVE_ARMV6 0 +#define HAVE_ARMV6T2 0 +#define HAVE_ARMVFP 0 +#define HAVE_IWMMXT 0 +#define HAVE_MMI 0 +#define HAVE_NEON 0 +#define HAVE_PPC4XX 0 +#define HAVE_VIS 0 + #define HAVE_ALTIVEC_H 0 #define HAVE_BIGENDIAN 0 #define HAVE_BSWAP 1 diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c index eff067ad6..e4a4a7ad6 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c @@ -4417,9 +4417,6 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c; c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c; } - if (CONFIG_VP6_DECODER) { - c->vp6_filter_diag4= ff_vp6_filter_diag4_c; - } c->h261_loop_filter= h261_loop_filter_c; diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h index 778d3dfc1..cfd1b7f33 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h @@ -357,9 +357,6 @@ typedef struct DSPContext { void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values); void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values); - void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride, - const int16_t *h_weights,const int16_t *v_weights); - /* assume len is a multiple of 4, and arrays are 16-byte aligned */ void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); @@ -604,7 +601,6 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){ /* should be defined by architectures supporting one or more MultiMedia extension */ int mm_support(void); -extern int mm_flags; void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx); @@ -629,16 +625,10 @@ static inline void emms(void) #endif } - -#define emms_c() \ -{\ - if (mm_flags & FF_MM_MMX)\ - emms();\ -} +#define emms_c() emms() #else -#define mm_flags 0 #define mm_support() 0 #endif diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c index 5793dd1af..503c3b6ba 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c @@ -542,7 +542,7 @@ retry: #endif #if HAVE_MMX - if(s->codec_id == CODEC_ID_MPEG4 && s->xvid_build>=0 && avctx->idct_algo == FF_IDCT_AUTO && (mm_flags & FF_MM_MMX)){ + if(s->codec_id == CODEC_ID_MPEG4 && s->xvid_build>=0 && avctx->idct_algo == FF_IDCT_AUTO && (mm_support() & FF_MM_MMX)){ avctx->idct_algo= FF_IDCT_XVIDMMX; avctx->coded_width= 0; // force reinit // dsputil_init(&s->dsp, avctx); diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c index 05999713b..fcb4f2011 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c @@ -1024,7 +1024,7 @@ static int mjpeg_decode_app(MJpegDecodeContext *s) if(8*len + get_bits_count(&s->gb) > s->gb.size_in_bits) return -1; - id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16); + id = get_bits_long(&s->gb, 32); id = av_be2ne32(id); len -= 6; @@ -1114,7 +1114,7 @@ static int mjpeg_decode_app(MJpegDecodeContext *s) /* Apple MJPEG-A */ if ((s->start_code == APP1) && (len > (0x28 - 8))) { - id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16); + id = get_bits_long(&s->gb, 32); id = av_be2ne32(id); len -= 4; if (id == AV_RL32("mjpg")) /* Apple MJPEG-A */ diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c index 4171490ae..489c66f1d 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c @@ -937,7 +937,14 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx) return -1; s->current_picture_ptr= pic; - s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic + //FIXME use only the vars from current_pic + if(s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO) { + if(s->picture_structure == PICT_FRAME) + s->current_picture_ptr->top_field_first= s->top_field_first; + else + s->current_picture_ptr->top_field_first= (s->picture_structure == PICT_TOP_FIELD) == s->first_field; + } else + s->current_picture_ptr->top_field_first= s->top_field_first; s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence; } diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c index 4983fa7fa..f71ddf2d5 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/msmpeg4.c @@ -1395,8 +1395,7 @@ return -1; #endif if(s->msmpeg4_version==1){ - int start_code; - start_code = (get_bits(&s->gb, 16)<<16) | get_bits(&s->gb, 16); + int start_code = get_bits_long(&s->gb, 32); if(start_code!=0x00000100){ av_log(s->avctx, AV_LOG_ERROR, "invalid startcode\n"); return -1; diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c index f9da3d78d..d67604b01 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c @@ -82,5 +82,13 @@ void ff_vp56dsp_init(VP56DSPContext *s, enum CodecID codec) } else { s->edge_filter_hor = vp6_edge_filter_hor; s->edge_filter_ver = vp6_edge_filter_ver; + + if (CONFIG_VP6_DECODER) { + s->vp6_filter_diag4= ff_vp6_filter_diag4_c; + } } + + #if HAVE_MMX + ff_vp56dsp_init_x86(s, codec); + #endif } diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.h index 2d6941fa2..74a9cb530 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.h +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.h @@ -22,13 +22,21 @@ #define AVCODEC_VP56DSP_H #include +#include "avcodec.h" typedef struct VP56DSPContext { void (*edge_filter_hor)(uint8_t *yuv, int stride, int t); void (*edge_filter_ver)(uint8_t *yuv, int stride, int t); + + void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride, + const int16_t *h_weights,const int16_t *v_weights); } VP56DSPContext; +void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride, + const int16_t *h_weights, const int16_t *v_weights); + void ff_vp56dsp_init(VP56DSPContext *s, enum CodecID codec); void ff_vp56dsp_init_arm(VP56DSPContext *s, enum CodecID codec); +void ff_vp56dsp_init_x86(VP56DSPContext* c, enum CodecID codec); #endif /* AVCODEC_VP56DSP_H */ diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c index de3be4084..57d357023 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c @@ -559,7 +559,7 @@ static void vp6_filter(VP56Context *s, uint8_t *dst, uint8_t *src, vp6_filter_hv4(dst, src+offset1, stride, stride, vp6_block_copy_filter[select][y8]); } else { - s->dsp.vp6_filter_diag4(dst, src+offset1+((mv.x^mv.y)>>31), stride, + s->vp56dsp.vp6_filter_diag4(dst, src+offset1+((mv.x^mv.y)>>31), stride, vp6_block_copy_filter[select][x8], vp6_block_copy_filter[select][y8]); } diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6dsp.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6dsp.c index 69a11ee18..1119b5670 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6dsp.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6dsp.c @@ -22,7 +22,7 @@ */ #include "libavutil/common.h" -#include "dsputil.h" +#include "vp56dsp.h" void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride, diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c index 663be3c57..e96e3a93c 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c @@ -137,4 +137,6 @@ int mm_support(void) (rval&FF_MM_3DNOWEXT) ? "3DNowExt ":""); #endif return rval; + + /* TODO: allow overriding with ffdshow settings for disabling extensions */ } diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c index 4add01fe9..c4939ec65 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c @@ -30,15 +30,11 @@ #include "dsputil_mmx.h" #include "vp3dsp_mmx.h" #include "vp3dsp_sse2.h" -#include "vp6dsp_mmx.h" -#include "vp6dsp_sse2.h" #include "idct_xvid.h" //#undef NDEBUG //#include -int mm_flags; /* multimedia extension flags */ - /* pixel operations */ DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL; DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL; @@ -2504,7 +2500,7 @@ float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) { - mm_flags = mm_support(); + int mm_flags = mm_support(); if (avctx->dsp_mask) { if (avctx->dsp_mask & FF_MM_FORCE) @@ -2626,10 +2622,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx; c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx; - if (CONFIG_VP6_DECODER) { - c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx; - } - if (mm_flags & FF_MM_MMX2) { c->prefetch = prefetch_mmx2; @@ -2812,10 +2804,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) H264_QPEL_FUNCS(3, 1, sse2); H264_QPEL_FUNCS(3, 2, sse2); H264_QPEL_FUNCS(3, 3, sse2); - - if (CONFIG_VP6_DECODER) { - c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2; - } } #if HAVE_SSSE3 if(mm_flags & FF_MM_SSSE3){ @@ -2898,7 +2886,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) #if CONFIG_H264DSP void ff_h264dsp_init_x86(H264DSPContext *c) { - mm_flags = mm_support(); + int mm_flags = mm_support(); if (mm_flags & FF_MM_MMX) { c->h264_idct_dc_add= diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c index dba8c3faf..eb5c65ecb 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c @@ -21,16 +21,12 @@ av_cold void ff_fft_init_mmx(FFTContext *s) { -/* Crashes on 64-bit? - * ToDo: verify if that is still the case with the current code and with GCC 4.4.x and above - */ -#if HAVE_YASM && ARCH_X86_32 +#if HAVE_YASM int has_vectors = mm_support(); if (has_vectors & FF_MM_SSE && HAVE_SSE) { /* SSE for P3/P4/K8 */ s->imdct_calc = ff_imdct_calc_sse; - /* crashes DTS decoder */ - //s->imdct_half = ff_imdct_half_sse; + s->imdct_half = ff_imdct_half_sse; s->fft_permute = ff_fft_permute_sse; s->fft_calc = ff_fft_calc_sse; } else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) { diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c index 8226ae962..9a8108bdd 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c @@ -56,7 +56,7 @@ void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z) void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input) { x86_reg j, k; - long n = 1 << s->mdct_bits; + long n = s->mdct_size; long n2 = n >> 1; long n4 = n >> 2; long n8 = n >> 3; @@ -147,7 +147,7 @@ void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input void ff_imdct_calc_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input) { x86_reg j, k; - long n = 1 << s->mdct_bits; + long n = s->mdct_size; long n4 = n >> 2; ff_imdct_half_3dn2(s, output+n4, input); diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm index 23a360fa6..31176d6c9 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm @@ -29,6 +29,23 @@ %include "x86inc.asm" +%ifdef ARCH_X86_64 +%define pointer resq +%else +%define pointer resd +%endif + +struc FFTContext + .nbits: resd 1 + .reverse: resd 1 + .revtab: pointer 1 + .tmpbuf: pointer 1 + .mdctsize: resd 1 + .mdctbits: resd 1 + .tcos: pointer 1 + .tsin: pointer 1 +endstruc + SECTION_RODATA %define M_SQRT1_2 0.70710678118654752440 @@ -428,6 +445,16 @@ DECL_PASS pass_interleave_3dn, PASS_BIG 0 %define SECTION_REL %endif +%macro FFT_DISPATCH 2; clobbers 5 GPRs, 8 XMMs + lea r2, [dispatch_tab%1] + mov r2, [r2 + (%2q-2)*gprsize] +%ifdef PIC + lea r3, [$$] + add r2, r3 +%endif + call r2 +%endmacro ; FFT_DISPATCH + %macro DECL_FFT 2-3 ; nbits, cpu, suffix %xdefine list_of_fft fft4%2 SECTION_REL, fft8%2 SECTION_REL %if %1==5 @@ -464,13 +491,7 @@ section .text ; On x86_32, this function does the register saving and restoring for all of fft. ; The others pass args in registers and don't spill anything. cglobal fft_dispatch%3%2, 2,5,8, z, nbits - lea r2, [dispatch_tab%3%2] - mov r2, [r2 + (nbitsq-2)*gprsize] -%ifdef PIC - lea r3, [$$] - add r2, r3 -%endif - call r2 + FFT_DISPATCH %3%2, nbits RET %endmacro ; DECL_FFT @@ -481,3 +502,170 @@ DECL_FFT 4, _3dn, _interleave DECL_FFT 4, _3dn2 DECL_FFT 4, _3dn2, _interleave +INIT_XMM +%undef mulps +%undef addps +%undef subps +%undef unpcklps +%undef unpckhps + +%macro PREROTATER 5 ;-2*k, 2*k, input+n4, tcos+n8, tsin+n8 + movaps xmm0, [%3+%2*4] + movaps xmm1, [%3+%1*4-0x10] + movaps xmm2, xmm0 + shufps xmm0, xmm1, 0x88 + shufps xmm1, xmm2, 0x77 + movlps xmm4, [%4+%2*2] + movlps xmm5, [%5+%2*2+0x0] + movhps xmm4, [%4+%1*2-0x8] + movhps xmm5, [%5+%1*2-0x8] + movaps xmm2, xmm0 + movaps xmm3, xmm1 + mulps xmm0, xmm5 + mulps xmm1, xmm4 + mulps xmm2, xmm4 + mulps xmm3, xmm5 + subps xmm1, xmm0 + addps xmm2, xmm3 + movaps xmm0, xmm1 + unpcklps xmm1, xmm2 + unpckhps xmm0, xmm2 +%endmacro + +%macro PREROTATEW 3 ;addr1, addr2, xmm + movlps %1, %3 + movhps %2, %3 +%endmacro + +%macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5 + movaps xmm6, [%4+%1*2] + movaps %2, [%4+%1*2+0x10] + movaps %3, xmm6 + movaps xmm7, %2 + mulps xmm6, [%5+%1*1] + mulps %2, [%6+%1*1] + mulps %3, [%6+%1*1] + mulps xmm7, [%5+%1*1] + subps %2, xmm6 + addps %3, xmm7 +%endmacro + +%macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8 +.post: + CMUL %1, xmm0, xmm1, %3, %4, %5 + CMUL %2, xmm4, xmm5, %3, %4, %5 + shufps xmm1, xmm1, 0x1b + shufps xmm5, xmm5, 0x1b + movaps xmm6, xmm4 + unpckhps xmm4, xmm1 + unpcklps xmm6, xmm1 + movaps xmm2, xmm0 + unpcklps xmm0, xmm5 + unpckhps xmm2, xmm5 + movaps [%3+%2*2], xmm6 + movaps [%3+%2*2+0x10], xmm4 + movaps [%3+%1*2], xmm0 + movaps [%3+%1*2+0x10], xmm2 + sub %2, 0x10 + add %1, 0x10 + jl .post +%endmacro + +cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *input +%ifdef ARCH_X86_64 +%define rrevtab r10 +%define rtcos r11 +%define rtsin r12 + push r10 + push r11 + push r12 + push r13 + push r14 +%else +%define rrevtab r6 +%define rtsin r6 +%define rtcos r5 +%endif + mov r3d, [r0+FFTContext.mdctsize] + add r2, r3 + shr r3, 1 + mov rtcos, [r0+FFTContext.tcos] + mov rtsin, [r0+FFTContext.tsin] + add rtcos, r3 + add rtsin, r3 +%ifndef ARCH_X86_64 + push rtcos + push rtsin +%endif + shr r3, 1 + mov rrevtab, [r0+FFTContext.revtab] + add rrevtab, r3 +%ifndef ARCH_X86_64 + push rrevtab +%endif + + sub r3, 4 +%ifdef ARCH_X86_64 + xor r4, r4 + sub r4, r3 +%endif +.pre: +%ifndef ARCH_X86_64 +;unspill + xor r4, r4 + sub r4, r3 + mov rtsin, [esp+4] + mov rtcos, [esp+8] +%endif + + PREROTATER r4, r3, r2, rtcos, rtsin +%ifdef ARCH_X86_64 + movzx r5, word [rrevtab+r4*1-4] + movzx r6, word [rrevtab+r4*1-2] + movzx r13, word [rrevtab+r3*1] + movzx r14, word [rrevtab+r3*1+2] + PREROTATEW [r1+r5 *8], [r1+r6 *8], xmm0 + PREROTATEW [r1+r13*8], [r1+r14*8], xmm1 + add r4, 4 +%else + mov r6, [esp] + movzx r5, word [r6+r4*1-4] + movzx r4, word [r6+r4*1-2] + PREROTATEW [r1+r5*8], [r1+r4*8], xmm0 + movzx r5, word [r6+r3*1] + movzx r4, word [r6+r3*1+2] + PREROTATEW [r1+r5*8], [r1+r4*8], xmm1 +%endif + sub r3, 4 + jns .pre + + mov r5, r0 + mov r6, r1 + mov r0, r1 + mov r1d, [r5+FFTContext.nbits] + + FFT_DISPATCH _sse, r1 + + mov r0d, [r5+FFTContext.mdctsize] + add r6, r0 + shr r0, 1 +%ifndef ARCH_X86_64 +%define rtcos r2 +%define rtsin r3 + mov rtcos, [esp+8] + mov rtsin, [esp+4] +%endif + neg r0 + mov r1, -16 + sub r1, r0 + POSROTATESHUF r0, r1, r6, rtcos, rtsin +%ifdef ARCH_X86_64 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 +%else + add esp, 12 +%endif + RET diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_sse.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_sse.c index 726e186b5..c4082b15b 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_sse.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_sse.c @@ -71,111 +71,10 @@ void ff_fft_permute_sse(FFTContext *s, FFTComplex *z) memcpy(z, s->tmp_buf, n*sizeof(FFTComplex)); } -void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input) -{ - av_unused x86_reg i, j, k, l; - long n = 1 << s->mdct_bits; - long n2 = n >> 1; - long n4 = n >> 2; - long n8 = n >> 3; - const uint16_t *revtab = s->revtab + n8; - const FFTSample *tcos = s->tcos; - const FFTSample *tsin = s->tsin; - FFTComplex *z = (FFTComplex *)output; - - /* pre rotation */ - for(k=n8-2; k>=0; k-=2) { - __asm__ volatile( - "movaps (%2,%1,2), %%xmm0 \n" // { z[k].re, z[k].im, z[k+1].re, z[k+1].im } - "movaps -16(%2,%0,2), %%xmm1 \n" // { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im } - "movaps %%xmm0, %%xmm2 \n" - "shufps $0x88, %%xmm1, %%xmm0 \n" // { z[k].re, z[k+1].re, z[-k-2].re, z[-k-1].re } - "shufps $0x77, %%xmm2, %%xmm1 \n" // { z[-k-1].im, z[-k-2].im, z[k+1].im, z[k].im } - "movlps (%3,%1), %%xmm4 \n" - "movlps (%4,%1), %%xmm5 \n" - "movhps -8(%3,%0), %%xmm4 \n" // { cos[k], cos[k+1], cos[-k-2], cos[-k-1] } - "movhps -8(%4,%0), %%xmm5 \n" // { sin[k], sin[k+1], sin[-k-2], sin[-k-1] } - "movaps %%xmm0, %%xmm2 \n" - "movaps %%xmm1, %%xmm3 \n" - "mulps %%xmm5, %%xmm0 \n" // re*sin - "mulps %%xmm4, %%xmm1 \n" // im*cos - "mulps %%xmm4, %%xmm2 \n" // re*cos - "mulps %%xmm5, %%xmm3 \n" // im*sin - "subps %%xmm0, %%xmm1 \n" // -> re - "addps %%xmm3, %%xmm2 \n" // -> im - "movaps %%xmm1, %%xmm0 \n" - "unpcklps %%xmm2, %%xmm1 \n" // { z[k], z[k+1] } - "unpckhps %%xmm2, %%xmm0 \n" // { z[-k-2], z[-k-1] } - ::"r"(-4*k), "r"(4*k), - "r"(input+n4), "r"(tcos+n8), "r"(tsin+n8) - ); -#if ARCH_X86_64 - // if we have enough regs, don't let gcc make the luts latency-bound - // but if not, latency is faster than spilling - __asm__("movlps %%xmm0, %0 \n" - "movhps %%xmm0, %1 \n" - "movlps %%xmm1, %2 \n" - "movhps %%xmm1, %3 \n" - :"=m"(z[revtab[-k-2]]), - "=m"(z[revtab[-k-1]]), - "=m"(z[revtab[ k ]]), - "=m"(z[revtab[ k+1]]) - ); -#else - __asm__("movlps %%xmm0, %0" :"=m"(z[revtab[-k-2]])); - __asm__("movhps %%xmm0, %0" :"=m"(z[revtab[-k-1]])); - __asm__("movlps %%xmm1, %0" :"=m"(z[revtab[ k ]])); - __asm__("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]])); -#endif - } - - ff_fft_dispatch_sse(z, s->nbits); - - /* post rotation + reinterleave + reorder */ - -#define CMUL(j,xmm0,xmm1)\ - "movaps (%2,"#j",2), %%xmm6 \n"\ - "movaps 16(%2,"#j",2), "#xmm0"\n"\ - "movaps %%xmm6, "#xmm1"\n"\ - "movaps "#xmm0",%%xmm7 \n"\ - "mulps (%3,"#j"), %%xmm6 \n"\ - "mulps (%4,"#j"), "#xmm0"\n"\ - "mulps (%4,"#j"), "#xmm1"\n"\ - "mulps (%3,"#j"), %%xmm7 \n"\ - "subps %%xmm6, "#xmm0"\n"\ - "addps %%xmm7, "#xmm1"\n" - - j = -n2; - k = n2-16; - __asm__ volatile( - "1: \n" - CMUL(%0, %%xmm0, %%xmm1) - CMUL(%1, %%xmm4, %%xmm5) - "shufps $0x1b, %%xmm1, %%xmm1 \n" - "shufps $0x1b, %%xmm5, %%xmm5 \n" - "movaps %%xmm4, %%xmm6 \n" - "unpckhps %%xmm1, %%xmm4 \n" - "unpcklps %%xmm1, %%xmm6 \n" - "movaps %%xmm0, %%xmm2 \n" - "unpcklps %%xmm5, %%xmm0 \n" - "unpckhps %%xmm5, %%xmm2 \n" - "movaps %%xmm6, (%2,%1,2) \n" - "movaps %%xmm4, 16(%2,%1,2) \n" - "movaps %%xmm0, (%2,%0,2) \n" - "movaps %%xmm2, 16(%2,%0,2) \n" - "sub $16, %1 \n" - "add $16, %0 \n" - "jl 1b \n" - :"+&r"(j), "+&r"(k) - :"r"(z+n8), "r"(tcos+n8), "r"(tsin+n8) - :"memory" - ); -} - void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input) { x86_reg j, k; - long n = 1 << s->mdct_bits; + long n = s->mdct_size; long n4 = n >> 2; ff_imdct_half_sse(s, output+n4, input); diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c index 35a016b2f..4b2e54603 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c @@ -2368,7 +2368,7 @@ void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int s #if CONFIG_H264PRED void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) { - mm_flags = mm_support(); + int mm_flags = mm_support(); #if HAVE_YASM if (mm_flags & FF_MM_MMX) { diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c index f9a8847de..75ec4b2cf 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c @@ -625,6 +625,8 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){ void MPV_common_init_mmx(MpegEncContext *s) { + int mm_flags = mm_support(); + if (mm_flags & FF_MM_MMX) { const int dct_algo = s->avctx->dct_algo; diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c index 3ce097894..eb3ad2c32 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c @@ -714,7 +714,7 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq) #endif void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) { - mm_flags = mm_support(); + int mm_flags = mm_support(); dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx; dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx; diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm new file mode 100644 index 000000000..1b3165e54 --- /dev/null +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm @@ -0,0 +1,173 @@ +;****************************************************************************** +;* MMX/SSE2-optimized functions for the VP6 decoder +;* Copyright (C) 2009 Sebastien Lucas +;* Copyright (C) 2009 Zuxy Meng +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86inc.asm" +%include "x86util.asm" + +cextern pw_64 + +SECTION .text + +%macro DIAG4_MMX 6 + movq m0, [%1+%2] + movq m1, [%1+%3] + movq m3, m0 + movq m4, m1 + punpcklbw m0, m7 + punpcklbw m1, m7 + punpckhbw m3, m7 + punpckhbw m4, m7 + pmullw m0, [rsp+8*11] ; src[x-8 ] * biweight [0] + pmullw m1, [rsp+8*12] ; src[x ] * biweight [1] + pmullw m3, [rsp+8*11] ; src[x-8 ] * biweight [0] + pmullw m4, [rsp+8*12] ; src[x ] * biweight [1] + paddw m0, m1 + paddw m3, m4 + movq m1, [%1+%4] + movq m2, [%1+%5] + movq m4, m1 + movq m5, m2 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m4, m7 + punpcklbw m5, m7 + pmullw m1, [rsp+8*13] ; src[x+8 ] * biweight [2] + pmullw m2, [rsp+8*14] ; src[x+16] * biweight [3] + pmullw m4, [rsp+8*13] ; src[x+8 ] * biweight [2] + pmullw m5, [rsp+8*14] ; src[x+16] * biweight [3] + paddw m1, m2 + paddw m4, m5 + paddsw m0, m1 + paddsw m3, m4 + paddsw m0, m6 ; Add 64 + paddsw m3, m6 ; Add 64 + psraw m0, 7 + psraw m3, 7 + packuswb m0, m3 + movq [%6], m0 +%endmacro + +%macro DIAG4_SSE2 6 + movq m0, [%1+%2] + movq m1, [%1+%3] + punpcklbw m0, m7 + punpcklbw m1, m7 + pmullw m0, m4 ; src[x-8 ] * biweight [0] + pmullw m1, m5 ; src[x ] * biweight [1] + paddw m0, m1 + movq m1, [%1+%4] + movq m2, [%1+%5] + punpcklbw m1, m7 + punpcklbw m2, m7 + pmullw m1, m6 ; src[x+8 ] * biweight [2] + pmullw m2, m3 ; src[x+16] * biweight [3] + paddw m1, m2 + paddsw m0, m1 + paddsw m0, [pw_64] ; Add 64 + psraw m0, 7 + packuswb m0, m0 + movq [%6], m0 +%endmacro + +%macro SPLAT4REGS_MMX 0 + movq m5, m3 + punpcklwd m3, m3 + movq m4, m3 + punpckldq m3, m3 + punpckhdq m4, m4 + punpckhwd m5, m5 + movq m6, m5 + punpckhdq m6, m6 + punpckldq m5, m5 + movq [rsp+8*11], m3 + movq [rsp+8*12], m4 + movq [rsp+8*13], m5 + movq [rsp+8*14], m6 +%endmacro + +%macro SPLAT4REGS_SSE2 0 + pshuflw m4, m3, 0x0 + pshuflw m5, m3, 0x55 + pshuflw m6, m3, 0xAA + pshuflw m3, m3, 0xFF + punpcklqdq m4, m4 + punpcklqdq m5, m5 + punpcklqdq m6, m6 + punpcklqdq m3, m3 +%endmacro + +%macro vp6_filter_diag4 2 +; void ff_vp6_filter_diag4_(uint8_t *dst, uint8_t *src, int stride, +; const int16_t h_weight[4], const int16_t v_weights[4]) +cglobal vp6_filter_diag4_%1, 5, 7, %2 + mov r5, rsp ; backup stack pointer + and rsp, ~(mmsize-1) ; align stack +%ifidn %1, sse2 + sub rsp, 8*11 +%else + sub rsp, 8*15 + movq m6, [pw_64] +%endif +%ifdef ARCH_X86_64 + movsxd r2, r2d +%endif + + sub r1, r2 + + pxor m7, m7 + movq m3, [r3] + SPLAT4REGS + + mov r3, rsp + mov r6, 11 +.nextrow + DIAG4 r1, -1, 0, 1, 2, r3 + add r3, 8 + add r1, r2 + dec r6 + jnz .nextrow + + movq m3, [r4] + SPLAT4REGS + + lea r3, [rsp+8] + mov r6, 8 +.nextcol + DIAG4 r3, -8, 0, 8, 16, r0 + add r3, 8 + add r0, r2 + dec r6 + jnz .nextcol + + mov rsp, r5 ; restore stack pointer + RET +%endmacro + +INIT_MMX +%define DIAG4 DIAG4_MMX +%define SPLAT4REGS SPLAT4REGS_MMX +vp6_filter_diag4 mmx, 0 + +INIT_XMM +%define DIAG4 DIAG4_SSE2 +%define SPLAT4REGS SPLAT4REGS_SSE2 +vp6_filter_diag4 sse2, 8 diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp_init.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp_init.c new file mode 100644 index 000000000..5120ed231 --- /dev/null +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp_init.c @@ -0,0 +1,47 @@ +/* + * VP6 MMX/SSE2 optimizations + * Copyright (C) 2009 Sebastien Lucas + * Copyright (C) 2009 Zuxy Meng + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/x86_cpu.h" +#include "libavcodec/dsputil.h" +#include "libavcodec/vp56dsp.h" + +void ff_vp6_filter_diag4_mmx(uint8_t *dst, uint8_t *src, int stride, + const int16_t *h_weights,const int16_t *v_weights); +void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride, + const int16_t *h_weights,const int16_t *v_weights); + +av_cold void ff_vp56dsp_init_x86(VP56DSPContext* c, enum CodecID codec) +{ +#if HAVE_YASM + int mm_flags = mm_support(); + + if (CONFIG_VP6_DECODER && codec == CODEC_ID_VP6) { + if (mm_flags & FF_MM_MMX) { + c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx; + } + + if (mm_flags & FF_MM_SSE2) { + c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2; + } + } +#endif +} diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.c deleted file mode 100644 index 905b3a7f0..000000000 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.c +++ /dev/null @@ -1,108 +0,0 @@ -/** - * @file - * MMX-optimized functions for the VP6 decoder - * - * Copyright (C) 2009 Sebastien Lucas - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/x86_cpu.h" -#include "libavcodec/dsputil.h" -#include "dsputil_mmx.h" -#include "vp6dsp_mmx.h" - - -#define DIAG4_MMX(in1,in2,in3,in4) \ - "movq "#in1"(%0), %%mm0 \n\t" \ - "movq "#in2"(%0), %%mm1 \n\t" \ - "movq %%mm0, %%mm3 \n\t" \ - "movq %%mm1, %%mm4 \n\t" \ - "punpcklbw %%mm7, %%mm0 \n\t" \ - "punpcklbw %%mm7, %%mm1 \n\t" \ - "punpckhbw %%mm7, %%mm3 \n\t" \ - "punpckhbw %%mm7, %%mm4 \n\t" \ - "pmullw 0(%2), %%mm0 \n\t" /* src[x-8 ] * biweight [0] */ \ - "pmullw 8(%2), %%mm1 \n\t" /* src[x ] * biweight [1] */ \ - "pmullw 0(%2), %%mm3 \n\t" /* src[x-8 ] * biweight [0] */ \ - "pmullw 8(%2), %%mm4 \n\t" /* src[x ] * biweight [1] */ \ - "paddw %%mm1, %%mm0 \n\t" \ - "paddw %%mm4, %%mm3 \n\t" \ - "movq "#in3"(%0), %%mm1 \n\t" \ - "movq "#in4"(%0), %%mm2 \n\t" \ - "movq %%mm1, %%mm4 \n\t" \ - "movq %%mm2, %%mm5 \n\t" \ - "punpcklbw %%mm7, %%mm1 \n\t" \ - "punpcklbw %%mm7, %%mm2 \n\t" \ - "punpckhbw %%mm7, %%mm4 \n\t" \ - "punpckhbw %%mm7, %%mm5 \n\t" \ - "pmullw 16(%2), %%mm1 \n\t" /* src[x+8 ] * biweight [2] */ \ - "pmullw 24(%2), %%mm2 \n\t" /* src[x+16] * biweight [3] */ \ - "pmullw 16(%2), %%mm4 \n\t" /* src[x+8 ] * biweight [2] */ \ - "pmullw 24(%2), %%mm5 \n\t" /* src[x+16] * biweight [3] */ \ - "paddw %%mm2, %%mm1 \n\t" \ - "paddw %%mm5, %%mm4 \n\t" \ - "paddsw %%mm1, %%mm0 \n\t" \ - "paddsw %%mm4, %%mm3 \n\t" \ - "paddsw %%mm6, %%mm0 \n\t" /* Add 64 */ \ - "paddsw %%mm6, %%mm3 \n\t" /* Add 64 */ \ - "psraw $7, %%mm0 \n\t" \ - "psraw $7, %%mm3 \n\t" \ - "packuswb %%mm3, %%mm0 \n\t" \ - "movq %%mm0, (%1) \n\t" - -void ff_vp6_filter_diag4_mmx(uint8_t *dst, uint8_t *src, int stride, - const int16_t *h_weights, const int16_t *v_weights) -{ - uint8_t tmp[8*11], *t = tmp; - int16_t weights[4*4]; - int i; - src -= stride; - - for (i=0; i<4*4; i++) - weights[i] = h_weights[i>>2]; - - __asm__ volatile( - "pxor %%mm7, %%mm7 \n\t" - "movq "MANGLE(ff_pw_64)", %%mm6 \n\t" - "1: \n\t" - DIAG4_MMX(-1,0,1,2) - "add $8, %1 \n\t" - "add %3, %0 \n\t" - "decl %4 \n\t" - "jnz 1b \n\t" - : "+r"(src), "+r"(t) - : "r"(weights), "r"((x86_reg)stride), "r"(11) - : "memory"); - - t = tmp + 8; - for (i=0; i<4*4; i++) - weights[i] = v_weights[i>>2]; - - __asm__ volatile( - "pxor %%mm7, %%mm7 \n\t" - "movq "MANGLE(ff_pw_64)", %%mm6 \n\t" - "1: \n\t" - DIAG4_MMX(-8,0,8,16) - "add $8, %0 \n\t" - "add %3, %1 \n\t" - "decl %4 \n\t" - "jnz 1b \n\t" - : "+r"(t), "+r"(dst) - : "r"(weights), "r"((x86_reg)stride), "r"(8) - : "memory"); -} diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.h deleted file mode 100644 index 743bc4361..000000000 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_mmx.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * vp6dsp MMX function declarations - * Copyright (c) 2009 Sebastien Lucas - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVCODEC_X86_VP6DSP_MMX_H -#define AVCODEC_X86_VP6DSP_MMX_H - -#include - -void ff_vp6_filter_diag4_mmx(uint8_t *dst, uint8_t *src, int stride, - const int16_t *h_weights,const int16_t *v_weights); - -#endif /* AVCODEC_X86_VP6DSP_MMX_H */ diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.c deleted file mode 100644 index bfd733aa7..000000000 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.c +++ /dev/null @@ -1,98 +0,0 @@ -/** - * @file - * SSE2-optimized functions for the VP6 decoder - * - * Copyright (C) 2009 Zuxy Meng - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/x86_cpu.h" -#include "libavcodec/dsputil.h" -#include "dsputil_mmx.h" -#include "vp6dsp_sse2.h" - -#define DIAG4_SSE2(in1,in2,in3,in4) \ - "movq "#in1"(%0), %%xmm0 \n\t" \ - "movq "#in2"(%0), %%xmm1 \n\t" \ - "punpcklbw %%xmm7, %%xmm0 \n\t" \ - "punpcklbw %%xmm7, %%xmm1 \n\t" \ - "pmullw %%xmm4, %%xmm0 \n\t" /* src[x-8 ] * biweight [0] */ \ - "pmullw %%xmm5, %%xmm1 \n\t" /* src[x ] * biweight [1] */ \ - "paddw %%xmm1, %%xmm0 \n\t" \ - "movq "#in3"(%0), %%xmm1 \n\t" \ - "movq "#in4"(%0), %%xmm2 \n\t" \ - "punpcklbw %%xmm7, %%xmm1 \n\t" \ - "punpcklbw %%xmm7, %%xmm2 \n\t" \ - "pmullw %%xmm6, %%xmm1 \n\t" /* src[x+8 ] * biweight [2] */ \ - "pmullw %%xmm3, %%xmm2 \n\t" /* src[x+16] * biweight [3] */ \ - "paddw %%xmm2, %%xmm1 \n\t" \ - "paddsw %%xmm1, %%xmm0 \n\t" \ - "paddsw "MANGLE(ff_pw_64)", %%xmm0 \n\t" /* Add 64 */ \ - "psraw $7, %%xmm0 \n\t" \ - "packuswb %%xmm0, %%xmm0 \n\t" \ - "movq %%xmm0, (%1) \n\t" \ - -void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride, - const int16_t *h_weights,const int16_t *v_weights) -{ - uint8_t tmp[8*11], *t = tmp; - src -= stride; - - __asm__ volatile( - "pxor %%xmm7, %%xmm7 \n\t" - "movq %4, %%xmm3 \n\t" - "pshuflw $0, %%xmm3, %%xmm4 \n\t" - "punpcklqdq %%xmm4, %%xmm4 \n\t" - "pshuflw $85, %%xmm3, %%xmm5 \n\t" - "punpcklqdq %%xmm5, %%xmm5 \n\t" - "pshuflw $170, %%xmm3, %%xmm6 \n\t" - "punpcklqdq %%xmm6, %%xmm6 \n\t" - "pshuflw $255, %%xmm3, %%xmm3 \n\t" - "punpcklqdq %%xmm3, %%xmm3 \n\t" - "1: \n\t" - DIAG4_SSE2(-1,0,1,2) - "add $8, %1 \n\t" - "add %2, %0 \n\t" - "decl %3 \n\t" - "jnz 1b \n\t" - : "+r"(src), "+r"(t) - : "g"((x86_reg)stride), "r"(11), "m"(*(const int64_t*)h_weights) - : "memory"); - - t = tmp + 8; - - __asm__ volatile( - "movq %4, %%xmm3 \n\t" - "pshuflw $0, %%xmm3, %%xmm4 \n\t" - "punpcklqdq %%xmm4, %%xmm4 \n\t" - "pshuflw $85, %%xmm3, %%xmm5 \n\t" - "punpcklqdq %%xmm5, %%xmm5 \n\t" - "pshuflw $170, %%xmm3, %%xmm6 \n\t" - "punpcklqdq %%xmm6, %%xmm6 \n\t" - "pshuflw $255, %%xmm3, %%xmm3 \n\t" - "punpcklqdq %%xmm3, %%xmm3 \n\t" - "1: \n\t" - DIAG4_SSE2(-8,0,8,16) - "add $8, %0 \n\t" - "add %2, %1 \n\t" - "decl %3 \n\t" - "jnz 1b \n\t" - : "+r"(t), "+r"(dst) - : "g"((x86_reg)stride), "r"(8), "m"(*(const int64_t*)v_weights) - : "memory"); -} diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.h deleted file mode 100644 index a30089a3e..000000000 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp6dsp_sse2.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * vp6dsp SSE2 function declarations - * Copyright (c) 2009 Zuxy Meng - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVCODEC_X86_VP6DSP_SSE2_H -#define AVCODEC_X86_VP6DSP_SSE2_H - -#include - -void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride, - const int16_t *h_weights,const int16_t *v_weights); - -#endif /* AVCODEC_X86_VP6DSP_SSE2_H */ diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c index 40fd0e4e3..ed5cf4602 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c @@ -282,7 +282,7 @@ DECLARE_LOOP_FILTER(sse4) av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) { - mm_flags = mm_support(); + int mm_flags = mm_support(); #if HAVE_YASM if (mm_flags & FF_MM_MMX) { @@ -313,14 +313,12 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) /* note that 4-tap width=16 functions are missing because w=16 * is only used for luma, and luma is always a copy or sixtap. */ if (mm_flags & FF_MM_MMX2) { -#if ARCH_X86_32 VP8_LUMA_MC_FUNC(0, 16, mmxext); VP8_MC_FUNC(1, 8, mmxext); VP8_MC_FUNC(2, 4, mmxext); VP8_BILINEAR_MC_FUNC(0, 16, mmxext); VP8_BILINEAR_MC_FUNC(1, 8, mmxext); VP8_BILINEAR_MC_FUNC(2, 4, mmxext); -#endif c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; @@ -344,12 +342,10 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) } if (mm_flags & (FF_MM_SSE2|FF_MM_SSE2SLOW)) { -#if ARCH_X86_32 VP8_LUMA_MC_FUNC(0, 16, sse2); VP8_MC_FUNC(1, 8, sse2); VP8_BILINEAR_MC_FUNC(0, 16, sse2); VP8_BILINEAR_MC_FUNC(1, 8, sse2); -#endif c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; @@ -373,14 +369,12 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) } if (mm_flags & FF_MM_SSSE3) { -#if ARCH_X86_32 VP8_LUMA_MC_FUNC(0, 16, ssse3); VP8_MC_FUNC(1, 8, ssse3); VP8_MC_FUNC(2, 4, ssse3); VP8_BILINEAR_MC_FUNC(0, 16, ssse3); VP8_BILINEAR_MC_FUNC(1, 8, ssse3); VP8_BILINEAR_MC_FUNC(2, 4, ssse3); -#endif c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3; c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3; diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm index 6999e87b6..8cdbb3c7a 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm @@ -211,7 +211,7 @@ cglobal put_vp8_epel%1_h6_ssse3, 6, 6, %2 ; go to next line add r0, r1 add r2, r3 - dec r4 ; next row + dec r4d ; next row jg .nextrow REP_RET @@ -242,7 +242,7 @@ cglobal put_vp8_epel%1_h4_ssse3, 6, 6, %3 ; go to next line add r0, r1 add r2, r3 - dec r4 ; next row + dec r4d ; next row jg .nextrow REP_RET @@ -281,7 +281,7 @@ cglobal put_vp8_epel%1_v4_ssse3, 7, 7, %2 ; go to next line add r0, r1 add r2, r3 - dec r4 ; next row + dec r4d ; next row jg .nextrow REP_RET @@ -328,7 +328,7 @@ cglobal put_vp8_epel%1_v6_ssse3, 7, 7, %2 ; go to next line add r0, r1 add r2, r3 - dec r4 ; next row + dec r4d ; next row jg .nextrow REP_RET %endmacro @@ -381,7 +381,7 @@ cglobal put_vp8_epel4_h4_mmxext, 6, 6 ; go to next line add r0, r1 add r2, r3 - dec r4 ; next row + dec r4d ; next row jg .nextrow REP_RET @@ -438,7 +438,7 @@ cglobal put_vp8_epel4_h6_mmxext, 6, 6 ; go to next line add r0, r1 add r2, r3 - dec r4 ; next row + dec r4d ; next row jg .nextrow REP_RET @@ -486,7 +486,7 @@ cglobal put_vp8_epel8_h4_sse2, 6, 6, 10 ; go to next line add r0, r1 add r2, r3 - dec r4 ; next row + dec r4d ; next row jg .nextrow REP_RET @@ -548,7 +548,7 @@ cglobal put_vp8_epel8_h6_sse2, 6, 6, 14 ; go to next line add r0, r1 add r2, r3 - dec r4 ; next row + dec r4d ; next row jg .nextrow REP_RET @@ -601,7 +601,7 @@ cglobal put_vp8_epel%2_v4_%1, 7, 7, %3 ; go to next line add r0, r1 add r2, r3 - dec r4 ; next row + dec r4d ; next row jg .nextrow REP_RET @@ -666,7 +666,7 @@ cglobal put_vp8_epel%2_v6_%1, 7, 7, %3 ; go to next line add r0, r1 add r2, r3 - dec r4 ; next row + dec r4d ; next row jg .nextrow REP_RET %endmacro @@ -718,7 +718,7 @@ cglobal put_vp8_bilinear%2_v_%1, 7,7,%3 lea r0, [r0+r1*2] lea r2, [r2+r3*2] - sub r4, 2 + sub r4d, 2 jg .nextrow REP_RET @@ -764,7 +764,7 @@ cglobal put_vp8_bilinear%2_h_%1, 7,7,%3 lea r0, [r0+r1*2] lea r2, [r2+r3*2] - sub r4, 2 + sub r4d, 2 jg .nextrow REP_RET %endmacro @@ -807,7 +807,7 @@ cglobal put_vp8_bilinear%1_v_ssse3, 7,7 lea r0, [r0+r1*2] lea r2, [r2+r3*2] - sub r4, 2 + sub r4d, 2 jg .nextrow REP_RET @@ -843,7 +843,7 @@ cglobal put_vp8_bilinear%1_h_ssse3, 7,7 lea r0, [r0+r1*2] lea r2, [r2+r3*2] - sub r4, 2 + sub r4d, 2 jg .nextrow REP_RET %endmacro @@ -1470,8 +1470,8 @@ VP8_DC_WHT sse pshufb %1, %3 %endmacro -%macro SIMPLE_LOOPFILTER 3 -cglobal vp8_%2_loop_filter_simple_%1, 3, %3 +%macro SIMPLE_LOOPFILTER 4 +cglobal vp8_%2_loop_filter_simple_%1, 3, %3, %4 %if mmsize == 8 ; mmx/mmxext mov r3, 2 %endif @@ -1612,21 +1612,21 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3 INIT_MMX %define SPLATB_REG SPLATB_REG_MMX -SIMPLE_LOOPFILTER mmx, v, 4 -SIMPLE_LOOPFILTER mmx, h, 5 +SIMPLE_LOOPFILTER mmx, v, 4, 0 +SIMPLE_LOOPFILTER mmx, h, 5, 0 %define SPLATB_REG SPLATB_REG_MMXEXT -SIMPLE_LOOPFILTER mmxext, v, 4 -SIMPLE_LOOPFILTER mmxext, h, 5 +SIMPLE_LOOPFILTER mmxext, v, 4, 0 +SIMPLE_LOOPFILTER mmxext, h, 5, 0 INIT_XMM %define SPLATB_REG SPLATB_REG_SSE2 %define WRITE_8W WRITE_8W_SSE2 -SIMPLE_LOOPFILTER sse2, v, 3 -SIMPLE_LOOPFILTER sse2, h, 5 +SIMPLE_LOOPFILTER sse2, v, 3, 8 +SIMPLE_LOOPFILTER sse2, h, 5, 8 %define SPLATB_REG SPLATB_REG_SSSE3 -SIMPLE_LOOPFILTER ssse3, v, 3 -SIMPLE_LOOPFILTER ssse3, h, 5 +SIMPLE_LOOPFILTER ssse3, v, 3, 8 +SIMPLE_LOOPFILTER ssse3, h, 5, 8 %define WRITE_8W WRITE_8W_SSE4 -SIMPLE_LOOPFILTER sse4, h, 5 +SIMPLE_LOOPFILTER sse4, h, 5, 8 ;----------------------------------------------------------------------------- ; void vp8_h/v_loop_filter_inner_(uint8_t *dst, [uint8_t *v,] int stride, diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c index 84db01ae7..ebaeff16c 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c @@ -29,21 +29,12 @@ int av_get_image_linesize(enum PixelFormat pix_fmt, int width, int plane) const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt]; int max_step [4]; /* max pixel step for each plane */ int max_step_comp[4]; /* the component for each plane which has the max pixel step */ - int s, i; + int s; if (desc->flags & PIX_FMT_BITSTREAM) return (width * (desc->comp[0].step_minus1+1) + 7) >> 3; - memset(max_step , 0, sizeof(max_step )); - memset(max_step_comp, 0, sizeof(max_step_comp)); - for (i = 0; i < 4; i++) { - const AVComponentDescriptor *comp = &(desc->comp[i]); - if ((comp->step_minus1+1) > max_step[comp->plane]) { - max_step [comp->plane] = comp->step_minus1+1; - max_step_comp[comp->plane] = i; - } - } - + av_fill_image_max_pixsteps(max_step, max_step_comp, desc); s = (max_step_comp[plane] == 1 || max_step_comp[plane] == 2) ? desc->log2_chroma_w : 0; return max_step[plane] * (((width + (1 << s) - 1)) >> s); } @@ -65,16 +56,7 @@ int av_fill_image_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int widt return 0; } - memset(max_step , 0, sizeof(max_step )); - memset(max_step_comp, 0, sizeof(max_step_comp)); - for (i = 0; i < 4; i++) { - const AVComponentDescriptor *comp = &(desc->comp[i]); - if ((comp->step_minus1+1) > max_step[comp->plane]) { - max_step [comp->plane] = comp->step_minus1+1; - max_step_comp[comp->plane] = i; - } - } - + av_fill_image_max_pixsteps(max_step, max_step_comp, desc); for (i = 0; i < 4; i++) { int s = (max_step_comp[i] == 1 || max_step_comp[i] == 2) ? desc->log2_chroma_w : 0; linesizes[i] = max_step[i] * (((width + (1 << s) - 1)) >> s); @@ -132,7 +114,7 @@ int av_check_image_size(unsigned int w, unsigned int h, int log_offset, void *lo { ImgUtils imgutils = { &imgutils_class, log_offset, log_ctx }; - if((int)w>0 && (int)h>0 && (w+128)*(uint64_t)(h+128) < INT_MAX/8) + if ((int)w>0 && (int)h>0 && (w+128)*(uint64_t)(h+128) < INT_MAX/8) return 0; av_log(&imgutils, AV_LOG_ERROR, "Picture size %ux%u is invalid\n", w, h); diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h index c2cf6eb53..8e08d4738 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h @@ -24,9 +24,44 @@ * misc image utilities */ +#include "libavutil/pixdesc.h" #include "libavutil/pixfmt.h" #include "avcore.h" +/** + * Compute the max pixel step for each plane of an image with a + * format described by pixdesc. + * + * The pixel step is the distance in bytes between the first byte of + * the group of bytes which describe a pixel component and the first + * byte of the successive group in the same plane for the same + * component. + * + * @param max_pixsteps an array which is filled with the max pixel step + * for each plane. Since a plane may contain different pixel + * components, the computed max_pixsteps[plane] is relative to the + * component in the plane with the max pixel step. + * @param max_pixstep_comps an array which is filled with the component + * for each plane which has the max pixel step. May be NULL. + */ +static inline void av_fill_image_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4], + const AVPixFmtDescriptor *pixdesc) +{ + int i; + memset(max_pixsteps, 0, 4*sizeof(max_pixsteps[0])); + if (max_pixstep_comps) + memset(max_pixstep_comps, 0, 4*sizeof(max_pixstep_comps[0])); + + for (i = 0; i < 4; i++) { + const AVComponentDescriptor *comp = &(pixdesc->comp[i]); + if ((comp->step_minus1+1) > max_pixsteps[comp->plane]) { + max_pixsteps[comp->plane] = comp->step_minus1+1; + if (max_pixstep_comps) + max_pixstep_comps[comp->plane] = i; + } + } +} + /** * Compute the size of an image line with format pix_fmt and width * width for the plane plane. diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h index 13fc7cdc2..5d5e0f2c4 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h @@ -35,6 +35,7 @@ #include #include #include "attributes.h" +#include "libavutil/avconfig.h" #if defined(_MSC_VER) & !defined(__cplusplus) # define inline __inline @@ -42,6 +43,12 @@ #ifdef HAVE_AV_CONFIG_H +#if AV_HAVE_BIGENDIAN +# define AV_NE(be, le) (be) +#else +# define AV_NE(be, le) (le) +#endif + //rounded division & shift #define RSHIFT(a,b) ((a) > 0 ? ((a) + ((1<<(b))>>1))>>(b) : ((a) + ((1<<(b))>>1)-1)>>(b)) /* assume b>0 */ -- cgit v1.2.3