diff options
author | XhmikosR <xhmikosr@users.sourceforge.net> | 2010-10-01 14:29:33 +0400 |
---|---|---|
committer | XhmikosR <xhmikosr@users.sourceforge.net> | 2010-10-01 14:29:33 +0400 |
commit | 5067b0b48d4d15b24669c8032b28e90e45202801 (patch) | |
tree | d39e51c7ac00e7b42701baf82fedb7dc12856923 /src/filters/transform | |
parent | 907cde437174371aac6741c1d435a0088d66f459 (diff) |
legacy branch: merge changes from trunk 2635-2642,2645,2658-2660, updated apps project files
git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/branches/legacy@2661 10f7b99b-c216-0410-bff0-8a66a9350fd8
Diffstat (limited to 'src/filters/transform')
15 files changed, 288 insertions, 186 deletions
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/config.h b/src/filters/transform/MPCVideoDec/ffmpeg/config.h index 5ca6a5486..65a777263 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/config.h +++ b/src/filters/transform/MPCVideoDec/ffmpeg/config.h @@ -79,6 +79,7 @@ #define ARCH_SPARC64 0
#define ARCH_TOMI 0
+#define HAVE_ALIGNED_STACK 0
#define HAVE_ALTIVEC 0
#define HAVE_ARMV5TE 0
#define HAVE_ARMV6 0
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt b/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt index 564cc184f..cd0b76ee0 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt +++ b/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt @@ -7,7 +7,6 @@ The following files have MPC-specific custom code (compared to ffdshow): * libavcodec/allcodecs.c
* libavcodec/bitstream.c
* libavcodec/CompilatorVersion.c
-* libavcodec/dsputil.c
* libavcodec/dxva.h
* libavcodec/h264.c
* libavcodec/mpc_helper.c
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h index c5f35eda2..61e6c5620 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h @@ -45,8 +45,8 @@ #include "libavutil/cpu.h"
#define LIBAVCODEC_VERSION_MAJOR 52
-#define LIBAVCODEC_VERSION_MINOR 87
-#define LIBAVCODEC_VERSION_MICRO 5
+#define LIBAVCODEC_VERSION_MINOR 91
+#define LIBAVCODEC_VERSION_MICRO 1
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
LIBAVCODEC_VERSION_MINOR, \
@@ -68,6 +68,9 @@ #ifndef FF_API_MM_FLAGS
#define FF_API_MM_FLAGS (LIBAVCODEC_VERSION_MAJOR < 53)
#endif
+#ifndef FF_API_OPT_SHOW
+#define FF_API_OPT_SHOW (LIBAVCODEC_VERSION_MAJOR < 53)
+#endif
#define AV_NOPTS_VALUE INT64_C(0x8000000000000000)
#define AV_TIME_BASE 1000000
@@ -3099,6 +3102,8 @@ typedef struct AVCodecParserContext { int flags;
#define PARSER_FLAG_COMPLETE_FRAMES 0x0001
#define PARSER_FLAG_ONCE 0x0002
+/// Set if the parser has a valid file offset
+#define PARSER_FLAG_FETCHED_OFFSET 0x0004
int64_t offset; ///< byte offset from starting packet start
int64_t cur_frame_end[AV_PARSER_PTS_NB];
@@ -3293,15 +3298,15 @@ void av_fast_malloc(void *ptr, unsigned int *size, unsigned int min_size); */
attribute_deprecated void av_free_static(void);
+#if LIBAVCODEC_VERSION_MAJOR < 53
/**
- * Copy image data in src_data to dst_data.
- *
- * @param dst_linesize linesizes for the image in dst_data
- * @param src_linesize linesizes for the image in src_data
+ * @deprecated Deprecated in favor of av_image_copy().
*/
+attribute_deprecated
void av_picture_data_copy(uint8_t *dst_data[4], int dst_linesize[4],
uint8_t *src_data[4], int src_linesize[4],
enum PixelFormat pix_fmt, int width, int height);
+#endif
/**
* Copy image src to dst. Wraps av_picture_data_copy() above.
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/golomb.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/golomb.h index 3899af4cf..ed4e7511c 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/golomb.h +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/golomb.h @@ -65,14 +65,14 @@ static inline int get_ue_golomb(GetBitContext *gb){ OPEN_READER(re, gb);
/* ffdshow custom code */
- #if defined(__INTEL_COMPILER) || defined(DEBUG)
+ #if defined (__INTEL_COMPILER) && __INTEL_COMPILER < 1100 || defined (DEBUG)
#ifdef ALT_BITSTREAM_READER_LE
re_cache= AV_RL32( ((const uint8_t *)(gb)->buffer)+(re_index>>3) ) >> (re_index&0x07);
#else
re_cache= AV_RB32( ((const uint8_t *)(gb)->buffer)+(re_index>>3) ) >> (re_index&0x07);
#endif
#else
- // ICL9.1-Release and MSVC8-DEBUG build can't process this macro properly.
+ // ICL9.1-Release, ICL10.1 and MSVC8-DEBUG build can't process this macro properly.
UPDATE_CACHE(re, gb);
#endif
buf=GET_CACHE(re, gb);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c index 7ab3e6311..260d9460c 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c @@ -25,6 +25,7 @@ * @author Michael Niedermayer <michaelni@gmx.at>
*/
+#include "libavcore/imgutils.h"
#include "internal.h"
#include "dsputil.h"
#include "avcodec.h"
@@ -1836,6 +1837,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ if(h0->current_slice == 0){
while(h->frame_num != h->prev_frame_num &&
h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
+ Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
if (ff_h264_frame_start(h) < 0)
return -1;
@@ -1844,6 +1846,21 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ s->current_picture_ptr->frame_num= h->prev_frame_num;
ff_generate_sliding_window_mmcos(h);
ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
+ /* Error concealment: if a ref is missing, copy the previous ref in its place.
+ * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
+ * about there being no actual duplicates.
+ * FIXME: this doesn't copy padding for out-of-frame motion vectors. Given we're
+ * concealing a lost frame, this probably isn't noticable by comparison, but it should
+ * be fixed. */
+ if (h->short_ref_count) {
+ if (prev) {
+ av_image_copy(h->short_ref[0]->data, h->short_ref[0]->linesize,
+ (const uint8_t**)prev->data, prev->linesize,
+ PIX_FMT_YUV420P, s->mb_width*16, s->mb_height*16);
+ h->short_ref[0]->poc = prev->poc+2;
+ }
+ h->short_ref[0]->frame_num = h->prev_frame_num;
+ }
}
/* See if we have a decoded first field looking for a pair... */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c index 489c66f1d..c4d0b085f 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c @@ -164,6 +164,18 @@ av_cold int ff_dct_common_init(MpegEncContext *s) #if HAVE_MMX
MPV_common_init_mmx(s);
+#elif ARCH_ALPHA
+ MPV_common_init_axp(s);
+#elif CONFIG_MLIB
+ MPV_common_init_mlib(s);
+#elif HAVE_MMI
+ MPV_common_init_mmi(s);
+#elif ARCH_ARM
+ MPV_common_init_arm(s);
+#elif HAVE_ALTIVEC
+ MPV_common_init_altivec(s);
+#elif ARCH_BFIN
+ MPV_common_init_bfin(s);
#endif
/* load & permutate scantables
@@ -485,7 +497,7 @@ av_cold int MPV_common_init(MpegEncContext *s) return -1;
}
- if((s->width || s->height) && av_check_image_size(s->width, s->height, 0, s->avctx))
+ if((s->width || s->height) && av_image_check_size(s->width, s->height, 0, s->avctx))
return -1;
dsputil_init(&s->dsp, s->avctx);
@@ -1081,6 +1093,7 @@ void MPV_frame_end(MpegEncContext *s) */
void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
+ /* ffdshow custom code */
if(!pict || !pict->mb_type) return;
if (s->avctx->debug_mv && pict->motion_val) {
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/parser.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/parser.c index ed26dd603..04084dd07 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/parser.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/parser.c @@ -150,6 +150,12 @@ int av_parser_parse2(AVCodecParserContext *s, int index, i;
uint8_t dummy_buf[FF_INPUT_BUFFER_PADDING_SIZE];
+ if(!(s->flags & PARSER_FLAG_FETCHED_OFFSET)) {
+ s->next_frame_offset =
+ s->cur_offset = pos;
+ s->flags |= PARSER_FLAG_FETCHED_OFFSET;
+ }
+
if (buf_size == 0) {
/* padding is always necessary even if EOF, so we add it here */
memset(dummy_buf, 0, sizeof(dummy_buf));
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c index bf624cdc9..038542eba 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c @@ -740,6 +740,17 @@ unsigned avcodec_version( void ) return LIBAVCODEC_VERSION_INT;
}
+const char *avcodec_configuration(void)
+{
+ return FFMPEG_CONFIGURATION;
+}
+
+const char *avcodec_license(void)
+{
+#define LICENSE_PREFIX "libavcodec license: "
+ return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
+}
+
void avcodec_init(void)
{
static int initialized = 0;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.c index c9ea233ed..4e7298f61 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.c @@ -306,13 +306,17 @@ int vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitConte {
v->zz_8x4 = wmv2_scantableA;
v->zz_4x8 = wmv2_scantableB;
- v->res_sm = get_bits(gb, 2); //reserved
- if (v->res_sm)
+ v->res_y411 = get_bits1(gb);
+ v->res_sprite = get_bits1(gb);
+ if (v->res_y411)
{
av_log(avctx, AV_LOG_ERROR,
- "Reserved RES_SM=%i is forbidden\n", v->res_sm);
+ "Old interlaced mode is not supported\n");
return -1;
}
+ if (v->res_sprite) {
+ av_log(avctx, AV_LOG_ERROR, "WMVP is not fully supported\n");
+ }
}
// (fps-2)/4 (->30)
@@ -382,7 +386,21 @@ int vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitConte v->quantizer_mode = get_bits(gb, 2); //common
v->finterpflag = get_bits1(gb); //common
- v->res_rtm_flag = get_bits1(gb); //reserved
+
+ if (v->res_sprite) {
+ v->s.avctx->width = v->s.avctx->coded_width = get_bits(gb, 11);
+ v->s.avctx->height = v->s.avctx->coded_height = get_bits(gb, 11);
+ skip_bits(gb, 5); //frame rate
+ v->res_x8 = get_bits1(gb);
+ if (get_bits1(gb)) { // something to do with DC VLC selection
+ av_log(avctx, AV_LOG_ERROR, "Unsupported sprite feature\n");
+ return -1;
+ }
+ skip_bits(gb, 3); //slice code
+ v->res_rtm_flag = 0;
+ } else {
+ v->res_rtm_flag = get_bits1(gb); //reserved
+ }
if (!v->res_rtm_flag)
{
// av_log(avctx, AV_LOG_ERROR,
@@ -561,6 +579,9 @@ int vc1_parse_frame_header(VC1Context *v, GetBitContext* gb) {
int pqindex, lowquant, status;
+ if(v->res_sprite) {
+ skip_bits(gb, 2); //not yet deciphered
+ }
if(v->finterpflag) v->interpfrm = get_bits1(gb);
skip_bits(gb, 2); //framecnt unused
v->rangeredfrm = 0;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.h index 24921dd5e..104c26e53 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.h +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.h @@ -160,7 +160,8 @@ typedef struct VC1Context{ /** Simple/Main Profile sequence header */
//@{
- int res_sm; ///< reserved, 2b
+ int res_sprite; ///< reserved, sprite mode
+ int res_y411; ///< reserved, old interlaced mode
int res_x8; ///< reserved
int multires; ///< frame-level RESPIC syntax element present
int res_fasttx; ///< reserved, always 1
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c index fe960473f..7f1f0884e 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c @@ -3227,6 +3227,11 @@ static int vc1_decode_frame(AVCodecContext *avctx, }
}
+ if(v->res_sprite && (s->pict_type!=FF_I_TYPE)){
+ av_free(buf2);
+ return -1;
+ }
+
// for hurry_up==5
s->current_picture.pict_type= s->pict_type;
s->current_picture.key_frame= s->pict_type == FF_I_TYPE;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c index 995df0564..ac68a6836 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c @@ -2818,6 +2818,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){
// these functions are slower than mmx on AMD, but faster on Intel
c->put_pixels_tab[0][0] = put_pixels16_sse2;
+ c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2;
c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
H264_QPEL_FUNCS(0, 0, sse2);
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm index b75ec0cc5..1dcd62918 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm @@ -154,9 +154,9 @@ IF%1 mova m5, Z(5) mova m1, %3 ; wim mova m3, m5 mulps m2, m0 ; r2*wre -IF%1 mova m6, Z(6) +IF%1 mova m6, Z2(6) mulps m3, m1 ; i2*wim -IF%1 mova m7, Z(7) +IF%1 mova m7, Z2(7) mulps m4, m1 ; r2*wim mulps m5, m0 ; i2*wre addps m2, m3 ; r2*wre + i2*wim @@ -183,14 +183,14 @@ IF%1 mova m7, Z(7) mova m4, m6 subps m6, m5 ; r3 addps m5, m4 ; r1 - mova Z(6), m6 + mova Z2(6), m6 mova Z(2), m5 mova m2, Z(3) addps m3, m0 ; t6 subps m2, m1 ; i3 mova m7, Z(1) addps m1, Z(3) ; i1 - mova Z(7), m2 + mova Z2(7), m2 mova Z(3), m1 mova m4, m7 subps m7, m3 ; i2 @@ -208,9 +208,9 @@ IF%1 mova m7, Z(7) mova m3, m5 mova m1, [wq+o1q] ; wim mulps m2, m0 ; r2*wre - mova m6, Z(6) ; r3 + mova m6, Z2(6) ; r3 mulps m3, m1 ; i2*wim - mova m7, Z(7) ; i3 + mova m7, Z2(7) ; i3 mulps m4, m1 ; r2*wim mulps m5, m0 ; i2*wre addps m2, m3 ; r2*wre + i2*wim @@ -237,14 +237,14 @@ IF%1 mova m7, Z(7) mova m4, m6 subps m6, m5 ; r3 addps m5, m4 ; r1 -IF%1 mova Z(6), m6 +IF%1 mova Z2(6), m6 IF%1 mova Z(2), m5 mova m2, Z(3) addps m3, m0 ; t6 subps m2, m1 ; i3 mova m7, Z(1) addps m1, Z(3) ; i1 -IF%1 mova Z(7), m2 +IF%1 mova Z2(7), m2 IF%1 mova Z(3), m1 mova m4, m7 subps m7, m3 ; i2 @@ -262,8 +262,8 @@ IF%1 mova Z(1), m3 mova m2, Z(4) mova Z(2), m5 mova Z(3), m4 - mova Z(6), m6 - mova Z(7), m0 + mova Z2(6), m6 + mova Z2(7), m0 mova m5, m1 ; r0 mova m4, m2 ; r2 unpcklps m1, m3 @@ -287,6 +287,7 @@ INIT_XMM %define mova movaps %define Z(x) [r0+mmsize*x] +%define Z2(x) [r0+mmsize*x] align 16 fft4_sse: @@ -326,8 +327,8 @@ fft16_sse: mova Z(2), m2 mova Z(3), m3 T4_SSE m4, m5, m6 - mova m6, Z(6) - mova m7, Z(7) + mova m6, Z2(6) + mova m7, Z2(7) T4_SSE m6, m7, m0 PASS_SMALL 0, [cos_16], [cos_16+16] ret @@ -358,8 +359,8 @@ fft8%1: T4_3DN m0, m1, m2, m3, m4, m5 mova Z(0), m0 mova Z(2), m2 - T2_3DN m4, m5, Z(4), Z(5) - T2_3DN m6, m7, Z(6), Z(7) + T2_3DN m4, m5, Z(4), Z(5) + T2_3DN m6, m7, Z2(6), Z2(7) pswapd m0, m5 pswapd m2, m7 pxor m0, [ps_m1p1] @@ -370,7 +371,7 @@ fft8%1: pfmul m7, [ps_root2] T4_3DN m1, m3, m5, m7, m0, m2 mova Z(5), m5 - mova Z(7), m7 + mova Z2(7), m7 mova m0, Z(0) mova m2, Z(2) T4_3DN m0, m2, m4, m6, m5, m7 @@ -380,12 +381,12 @@ fft8%1: mova Z(1), m5 mova Z(2), m2 mova Z(3), m7 - PUNPCK m4, Z(5), m5 - PUNPCK m6, Z(7), m7 + PUNPCK m4, Z(5), m5 + PUNPCK m6, Z2(7), m7 mova Z(4), m4 mova Z(5), m5 - mova Z(6), m6 - mova Z(7), m7 + mova Z2(6), m6 + mova Z2(7), m7 ret %endmacro @@ -405,7 +406,8 @@ FFT48_3DN _3dn2 FFT48_3DN _3dn -%define Z(x) [zq + o1q*(x&6)*((x/6)^1) + o3q*(x/6) + mmsize*(x&1)] +%define Z(x) [zq + o1q*(x&6) + mmsize*(x&1)] +%define Z2(x) [zq + o3q + mmsize*(x&1)] %macro DECL_PASS 2+ ; name, payload align 16 diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct.asm index 3311ab559..9c154f80b 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct.asm +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct.asm @@ -245,12 +245,12 @@ cglobal h264_idct8_add_sse2, 3, 4, 10 movsx %1, word [%1] add %1, 32 sar %1, 6 - movd m0, %1 + movd m0, %1d lea %1, [%2*3] %else add %3, 32 sar %3, 6 - movd m0, %3 + movd m0, %3d lea %3, [%2*3] %endif pshufw m0, m0, 0 @@ -759,107 +759,98 @@ cglobal h264_idct_add16_sse2, 5, 5, 8 add16_sse2_cycle 7, 0x26 RET -; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset, -; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) -cglobal h264_idct_add16intra_sse2, 5, 7, 8 - xor r5, r5 -%ifdef ARCH_X86_64 - mov r10, r0 -%endif -%ifdef PIC - lea r11, [scan8_mem] -%endif -.next2blocks - movzx r0, byte [scan8+r5] - movzx r0, word [r4+r0] +%macro add16intra_sse2_cycle 2 + movzx r0, word [r4+%2] test r0, r0 - jz .try_dc - mov r0d, dword [r1+r5*4] + jz .try%1dc + mov r0d, dword [r1+%1*8] %ifdef ARCH_X86_64 add r0, r10 %else add r0, r0m %endif call x264_add8x4_idct_sse2 - add r5, 2 - add r2, 64 - cmp r5, 16 - jl .next2blocks - REP_RET -.try_dc + jmp .cycle%1end +.try%1dc movsx r0, word [r2 ] or r0w, word [r2+32] - jz .skip2blocks - mov r0d, dword [r1+r5*4] + jz .cycle%1end + mov r0d, dword [r1+%1*8] %ifdef ARCH_X86_64 add r0, r10 %else add r0, r0m %endif call h264_idct_dc_add8_mmx2 -.skip2blocks - add r5, 2 +.cycle%1end +%if %1 < 7 add r2, 64 - cmp r5, 16 - jl .next2blocks - REP_RET +%endif +%endmacro -h264_idct_add8_sse2_plane: -.next2blocks - movzx r0, byte [scan8+r5] - movzx r0, word [r4+r0] +; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset, +; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) +cglobal h264_idct_add16intra_sse2, 5, 7, 8 +%ifdef ARCH_X86_64 + mov r10, r0 +%endif + add16intra_sse2_cycle 0, 0xc + add16intra_sse2_cycle 1, 0x14 + add16intra_sse2_cycle 2, 0xe + add16intra_sse2_cycle 3, 0x16 + add16intra_sse2_cycle 4, 0x1c + add16intra_sse2_cycle 5, 0x24 + add16intra_sse2_cycle 6, 0x1e + add16intra_sse2_cycle 7, 0x26 + RET + +%macro add8_sse2_cycle 2 + movzx r0, word [r4+%2] test r0, r0 - jz .try_dc + jz .try%1dc %ifdef ARCH_X86_64 - mov r0d, dword [r1+r5*4] + mov r0d, dword [r1+%1*8+64] add r0, [r10] %else - mov r0, r1m ; XXX r1m here is actually r0m of the calling func + mov r0, r0m mov r0, [r0] - add r0, dword [r1+r5*4] + add r0, dword [r1+%1*8+64] %endif call x264_add8x4_idct_sse2 - add r5, 2 - add r2, 64 - test r5, 3 - jnz .next2blocks - rep ret -.try_dc + jmp .cycle%1end +.try%1dc movsx r0, word [r2 ] or r0w, word [r2+32] - jz .skip2blocks + jz .cycle%1end %ifdef ARCH_X86_64 - mov r0d, dword [r1+r5*4] + mov r0d, dword [r1+%1*8+64] add r0, [r10] %else - mov r0, r1m ; XXX r1m here is actually r0m of the calling func + mov r0, r0m mov r0, [r0] - add r0, dword [r1+r5*4] + add r0, dword [r1+%1*8+64] %endif call h264_idct_dc_add8_mmx2 -.skip2blocks - add r5, 2 +.cycle%1end +%if %1 < 3 add r2, 64 - test r5, 3 - jnz .next2blocks - rep ret +%endif +%endmacro ; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset, ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) cglobal h264_idct_add8_sse2, 5, 7, 8 - mov r5, 16 add r2, 512 -%ifdef PIC - lea r11, [scan8_mem] -%endif %ifdef ARCH_X86_64 mov r10, r0 %endif - call h264_idct_add8_sse2_plane + add8_sse2_cycle 0, 0x09 + add8_sse2_cycle 1, 0x11 %ifdef ARCH_X86_64 add r10, gprsize %else add r0mp, gprsize %endif - call h264_idct_add8_sse2_plane + add8_sse2_cycle 2, 0x21 + add8_sse2_cycle 3, 0x29 RET diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c index efd8b78f1..c3c962ad9 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c @@ -24,7 +24,6 @@ #include "dsputil_mmx.h"
DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL;
-DECLARE_ALIGNED(8, static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL;
/***********************************/
/* IDCT */
@@ -64,9 +63,122 @@ void ff_h264_idct_add8_sse2 (uint8_t **dest, const int *block_offset, DCTEL /***********************************/
/* deblocking */
+#define h264_loop_filter_strength_iteration_mmx2(bS, nz, ref, mv, bidir, edges, step, mask_mv, dir, d_idx, mask_dir) \
+ do { \
+ x86_reg b_idx; \
+ mask_mv <<= 3; \
+ for( b_idx=0; b_idx<edges; b_idx+=step ) { \
+ if (!mask_dir) \
+ __asm__ volatile( \
+ "pxor %%mm0, %%mm0 \n\t" \
+ :: \
+ ); \
+ if(!(mask_mv & b_idx)) { \
+ if(bidir) { \
+ __asm__ volatile( \
+ "movd %a3(%0,%2), %%mm2 \n" \
+ "punpckldq %a4(%0,%2), %%mm2 \n" /* { ref0[bn], ref1[bn] } */ \
+ "pshufw $0x44, 12(%0,%2), %%mm0 \n" /* { ref0[b], ref0[b] } */ \
+ "pshufw $0x44, 52(%0,%2), %%mm1 \n" /* { ref1[b], ref1[b] } */ \
+ "pshufw $0x4E, %%mm2, %%mm3 \n" \
+ "psubb %%mm2, %%mm0 \n" /* { ref0[b]!=ref0[bn], ref0[b]!=ref1[bn] } */ \
+ "psubb %%mm3, %%mm1 \n" /* { ref1[b]!=ref1[bn], ref1[b]!=ref0[bn] } */ \
+ \
+ "por %%mm1, %%mm0 \n" \
+ "movq %a5(%1,%2,4), %%mm1 \n" \
+ "movq %a6(%1,%2,4), %%mm2 \n" \
+ "movq %%mm1, %%mm3 \n" \
+ "movq %%mm2, %%mm4 \n" \
+ "psubw 48(%1,%2,4), %%mm1 \n" \
+ "psubw 56(%1,%2,4), %%mm2 \n" \
+ "psubw 208(%1,%2,4), %%mm3 \n" \
+ "psubw 216(%1,%2,4), %%mm4 \n" \
+ "packsswb %%mm2, %%mm1 \n" \
+ "packsswb %%mm4, %%mm3 \n" \
+ "paddb %%mm6, %%mm1 \n" \
+ "paddb %%mm6, %%mm3 \n" \
+ "psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
+ "psubusb %%mm5, %%mm3 \n" \
+ "packsswb %%mm3, %%mm1 \n" \
+ \
+ "por %%mm1, %%mm0 \n" \
+ "movq %a7(%1,%2,4), %%mm1 \n" \
+ "movq %a8(%1,%2,4), %%mm2 \n" \
+ "movq %%mm1, %%mm3 \n" \
+ "movq %%mm2, %%mm4 \n" \
+ "psubw 48(%1,%2,4), %%mm1 \n" \
+ "psubw 56(%1,%2,4), %%mm2 \n" \
+ "psubw 208(%1,%2,4), %%mm3 \n" \
+ "psubw 216(%1,%2,4), %%mm4 \n" \
+ "packsswb %%mm2, %%mm1 \n" \
+ "packsswb %%mm4, %%mm3 \n" \
+ "paddb %%mm6, %%mm1 \n" \
+ "paddb %%mm6, %%mm3 \n" \
+ "psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
+ "psubusb %%mm5, %%mm3 \n" \
+ "packsswb %%mm3, %%mm1 \n" \
+ \
+ "pshufw $0x4E, %%mm1, %%mm1 \n" \
+ "por %%mm1, %%mm0 \n" \
+ "pshufw $0x4E, %%mm0, %%mm1 \n" \
+ "pminub %%mm1, %%mm0 \n" \
+ ::"r"(ref), \
+ "r"(mv), \
+ "r"(b_idx), \
+ "i"(d_idx+12), \
+ "i"(d_idx+52), \
+ "i"(d_idx*4+48), \
+ "i"(d_idx*4+56), \
+ "i"(d_idx*4+208), \
+ "i"(d_idx*4+216) \
+ ); \
+ } else { \
+ __asm__ volatile( \
+ "movd 12(%0,%2), %%mm0 \n" \
+ "psubb %a3(%0,%2), %%mm0 \n" /* ref[b] != ref[bn] */ \
+ "movq 48(%1,%2,4), %%mm1 \n" \
+ "movq 56(%1,%2,4), %%mm2 \n" \
+ "psubw %a4(%1,%2,4), %%mm1 \n" \
+ "psubw %a5(%1,%2,4), %%mm2 \n" \
+ "packsswb %%mm2, %%mm1 \n" \
+ "paddb %%mm6, %%mm1 \n" \
+ "psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
+ "packsswb %%mm1, %%mm1 \n" \
+ "por %%mm1, %%mm0 \n" \
+ ::"r"(ref), \
+ "r"(mv), \
+ "r"(b_idx), \
+ "i"(d_idx+12), \
+ "i"(d_idx*4+48), \
+ "i"(d_idx*4+56) \
+ ); \
+ } \
+ } \
+ __asm__ volatile( \
+ "movd 12(%0,%1), %%mm1 \n" \
+ "por %a2(%0,%1), %%mm1 \n" /* nnz[b] || nnz[bn] */ \
+ ::"r"(nnz), \
+ "r"(b_idx), \
+ "i"(d_idx+12) \
+ ); \
+ __asm__ volatile( \
+ "pminub %%mm7, %%mm1 \n" \
+ "pminub %%mm7, %%mm0 \n" \
+ "psllw $1, %%mm1 \n" \
+ "pxor %%mm2, %%mm2 \n" \
+ "pmaxub %%mm0, %%mm1 \n" \
+ "punpcklbw %%mm2, %%mm1 \n" \
+ "movq %%mm1, %a1(%0,%2) \n" \
+ ::"r"(bS), \
+ "i"(32*dir), \
+ "r"(b_idx) \
+ :"memory" \
+ ); \
+ } \
+ } while (0)
+
static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) {
- int dir;
__asm__ volatile(
"movq %0, %%mm7 \n"
"movq %1, %%mm6 \n"
@@ -84,95 +196,11 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] // could do a special case for dir==0 && edges==1, but it only reduces the
// average filter time by 1.2%
- for( dir=1; dir>=0; dir-- ) {
- const x86_reg d_idx = dir ? -8 : -1;
- const int mask_mv = dir ? mask_mv1 : mask_mv0;
- DECLARE_ALIGNED(8, const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL;
- int b_idx, edge;
- for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) {
- __asm__ volatile(
- "pand %0, %%mm0 \n\t"
- ::"m"(mask_dir)
- );
- if(!(mask_mv & edge)) {
- if(bidir) {
- __asm__ volatile(
- "movd (%1,%0), %%mm2 \n"
- "punpckldq 40(%1,%0), %%mm2 \n" // { ref0[bn], ref1[bn] }
- "pshufw $0x44, (%1), %%mm0 \n" // { ref0[b], ref0[b] }
- "pshufw $0x44, 40(%1), %%mm1 \n" // { ref1[b], ref1[b] }
- "pshufw $0x4E, %%mm2, %%mm3 \n"
- "psubb %%mm2, %%mm0 \n" // { ref0[b]!=ref0[bn], ref0[b]!=ref1[bn] }
- "psubb %%mm3, %%mm1 \n" // { ref1[b]!=ref1[bn], ref1[b]!=ref0[bn] }
- "1: \n"
- "por %%mm1, %%mm0 \n"
- "movq (%2,%0,4), %%mm1 \n"
- "movq 8(%2,%0,4), %%mm2 \n"
- "movq %%mm1, %%mm3 \n"
- "movq %%mm2, %%mm4 \n"
- "psubw (%2), %%mm1 \n"
- "psubw 8(%2), %%mm2 \n"
- "psubw 160(%2), %%mm3 \n"
- "psubw 168(%2), %%mm4 \n"
- "packsswb %%mm2, %%mm1 \n"
- "packsswb %%mm4, %%mm3 \n"
- "paddb %%mm6, %%mm1 \n"
- "paddb %%mm6, %%mm3 \n"
- "psubusb %%mm5, %%mm1 \n" // abs(mv[b] - mv[bn]) >= limit
- "psubusb %%mm5, %%mm3 \n"
- "packsswb %%mm3, %%mm1 \n"
- "add $40, %0 \n"
- "cmp $40, %0 \n"
- "jl 1b \n"
- "sub $80, %0 \n"
- "pshufw $0x4E, %%mm1, %%mm1 \n"
- "por %%mm1, %%mm0 \n"
- "pshufw $0x4E, %%mm0, %%mm1 \n"
- "pminub %%mm1, %%mm0 \n"
- ::"r"(d_idx),
- "r"(ref[0]+b_idx),
- "r"(mv[0]+b_idx)
- );
- } else {
- __asm__ volatile(
- "movd (%1), %%mm0 \n"
- "psubb (%1,%0), %%mm0 \n" // ref[b] != ref[bn]
- "movq (%2), %%mm1 \n"
- "movq 8(%2), %%mm2 \n"
- "psubw (%2,%0,4), %%mm1 \n"
- "psubw 8(%2,%0,4), %%mm2 \n"
- "packsswb %%mm2, %%mm1 \n"
- "paddb %%mm6, %%mm1 \n"
- "psubusb %%mm5, %%mm1 \n" // abs(mv[b] - mv[bn]) >= limit
- "packsswb %%mm1, %%mm1 \n"
- "por %%mm1, %%mm0 \n"
- ::"r"(d_idx),
- "r"(ref[0]+b_idx),
- "r"(mv[0]+b_idx)
- );
- }
- }
- __asm__ volatile(
- "movd %0, %%mm1 \n"
- "por %1, %%mm1 \n" // nnz[b] || nnz[bn]
- ::"m"(nnz[b_idx]),
- "m"(nnz[b_idx+d_idx])
- );
- __asm__ volatile(
- "pminub %%mm7, %%mm1 \n"
- "pminub %%mm7, %%mm0 \n"
- "psllw $1, %%mm1 \n"
- "pxor %%mm2, %%mm2 \n"
- "pmaxub %%mm0, %%mm1 \n"
- "punpcklbw %%mm2, %%mm1 \n"
- "movq %%mm1, %0 \n"
- :"=m"(*bS[dir][edge])
- ::"memory"
- );
- }
- edges = 4;
- step = 1;
- }
+ step <<= 3;
+ edges <<= 3;
+ h264_loop_filter_strength_iteration_mmx2(bS, nnz, ref, mv, bidir, edges, step, mask_mv1, 1, -8, 0);
+ h264_loop_filter_strength_iteration_mmx2(bS, nnz, ref, mv, bidir, 32, 8, mask_mv0, 0, -1, -1);
+
__asm__ volatile(
"movq (%0), %%mm0 \n\t"
"movq 8(%0), %%mm1 \n\t"
@@ -326,12 +354,13 @@ void ff_h264dsp_init_x86(H264DSPContext *c) c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2;
c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2;
-#if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110
+#if HAVE_ALIGNED_STACK
c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2;
c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2;
#endif
+
c->h264_idct_add16 = ff_h264_idct_add16_sse2;
c->h264_idct_add8 = ff_h264_idct_add8_sse2;
c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
|