Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/mpc-hc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXhmikosR <xhmikosr@users.sourceforge.net>2010-10-01 14:29:33 +0400
committerXhmikosR <xhmikosr@users.sourceforge.net>2010-10-01 14:29:33 +0400
commit5067b0b48d4d15b24669c8032b28e90e45202801 (patch)
treed39e51c7ac00e7b42701baf82fedb7dc12856923 /src/filters/transform
parent907cde437174371aac6741c1d435a0088d66f459 (diff)
legacy branch: merge changes from trunk 2635-2642,2645,2658-2660, updated apps project files
git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/branches/legacy@2661 10f7b99b-c216-0410-bff0-8a66a9350fd8
Diffstat (limited to 'src/filters/transform')
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/config.h1
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt1
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h17
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/golomb.h4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c17
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c15
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/parser.c6
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c11
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.c29
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.h3
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c5
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c1
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm42
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct.asm109
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c213
15 files changed, 288 insertions, 186 deletions
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/config.h b/src/filters/transform/MPCVideoDec/ffmpeg/config.h
index 5ca6a5486..65a777263 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/config.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/config.h
@@ -79,6 +79,7 @@
#define ARCH_SPARC64 0
#define ARCH_TOMI 0
+#define HAVE_ALIGNED_STACK 0
#define HAVE_ALTIVEC 0
#define HAVE_ARMV5TE 0
#define HAVE_ARMV6 0
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt b/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt
index 564cc184f..cd0b76ee0 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt
@@ -7,7 +7,6 @@ The following files have MPC-specific custom code (compared to ffdshow):
* libavcodec/allcodecs.c
* libavcodec/bitstream.c
* libavcodec/CompilatorVersion.c
-* libavcodec/dsputil.c
* libavcodec/dxva.h
* libavcodec/h264.c
* libavcodec/mpc_helper.c
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h
index c5f35eda2..61e6c5620 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h
@@ -45,8 +45,8 @@
#include "libavutil/cpu.h"
#define LIBAVCODEC_VERSION_MAJOR 52
-#define LIBAVCODEC_VERSION_MINOR 87
-#define LIBAVCODEC_VERSION_MICRO 5
+#define LIBAVCODEC_VERSION_MINOR 91
+#define LIBAVCODEC_VERSION_MICRO 1
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
LIBAVCODEC_VERSION_MINOR, \
@@ -68,6 +68,9 @@
#ifndef FF_API_MM_FLAGS
#define FF_API_MM_FLAGS (LIBAVCODEC_VERSION_MAJOR < 53)
#endif
+#ifndef FF_API_OPT_SHOW
+#define FF_API_OPT_SHOW (LIBAVCODEC_VERSION_MAJOR < 53)
+#endif
#define AV_NOPTS_VALUE INT64_C(0x8000000000000000)
#define AV_TIME_BASE 1000000
@@ -3099,6 +3102,8 @@ typedef struct AVCodecParserContext {
int flags;
#define PARSER_FLAG_COMPLETE_FRAMES 0x0001
#define PARSER_FLAG_ONCE 0x0002
+/// Set if the parser has a valid file offset
+#define PARSER_FLAG_FETCHED_OFFSET 0x0004
int64_t offset; ///< byte offset from starting packet start
int64_t cur_frame_end[AV_PARSER_PTS_NB];
@@ -3293,15 +3298,15 @@ void av_fast_malloc(void *ptr, unsigned int *size, unsigned int min_size);
*/
attribute_deprecated void av_free_static(void);
+#if LIBAVCODEC_VERSION_MAJOR < 53
/**
- * Copy image data in src_data to dst_data.
- *
- * @param dst_linesize linesizes for the image in dst_data
- * @param src_linesize linesizes for the image in src_data
+ * @deprecated Deprecated in favor of av_image_copy().
*/
+attribute_deprecated
void av_picture_data_copy(uint8_t *dst_data[4], int dst_linesize[4],
uint8_t *src_data[4], int src_linesize[4],
enum PixelFormat pix_fmt, int width, int height);
+#endif
/**
* Copy image src to dst. Wraps av_picture_data_copy() above.
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/golomb.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/golomb.h
index 3899af4cf..ed4e7511c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/golomb.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/golomb.h
@@ -65,14 +65,14 @@ static inline int get_ue_golomb(GetBitContext *gb){
OPEN_READER(re, gb);
/* ffdshow custom code */
- #if defined(__INTEL_COMPILER) || defined(DEBUG)
+ #if defined (__INTEL_COMPILER) && __INTEL_COMPILER < 1100 || defined (DEBUG)
#ifdef ALT_BITSTREAM_READER_LE
re_cache= AV_RL32( ((const uint8_t *)(gb)->buffer)+(re_index>>3) ) >> (re_index&0x07);
#else
re_cache= AV_RB32( ((const uint8_t *)(gb)->buffer)+(re_index>>3) ) >> (re_index&0x07);
#endif
#else
- // ICL9.1-Release and MSVC8-DEBUG build can't process this macro properly.
+ // ICL9.1-Release, ICL10.1 and MSVC8-DEBUG build can't process this macro properly.
UPDATE_CACHE(re, gb);
#endif
buf=GET_CACHE(re, gb);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c
index 7ab3e6311..260d9460c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c
@@ -25,6 +25,7 @@
* @author Michael Niedermayer <michaelni@gmx.at>
*/
+#include "libavcore/imgutils.h"
#include "internal.h"
#include "dsputil.h"
#include "avcodec.h"
@@ -1836,6 +1837,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
if(h0->current_slice == 0){
while(h->frame_num != h->prev_frame_num &&
h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
+ Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
if (ff_h264_frame_start(h) < 0)
return -1;
@@ -1844,6 +1846,21 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
s->current_picture_ptr->frame_num= h->prev_frame_num;
ff_generate_sliding_window_mmcos(h);
ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
+ /* Error concealment: if a ref is missing, copy the previous ref in its place.
+ * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
+ * about there being no actual duplicates.
+ * FIXME: this doesn't copy padding for out-of-frame motion vectors. Given we're
+ * concealing a lost frame, this probably isn't noticable by comparison, but it should
+ * be fixed. */
+ if (h->short_ref_count) {
+ if (prev) {
+ av_image_copy(h->short_ref[0]->data, h->short_ref[0]->linesize,
+ (const uint8_t**)prev->data, prev->linesize,
+ PIX_FMT_YUV420P, s->mb_width*16, s->mb_height*16);
+ h->short_ref[0]->poc = prev->poc+2;
+ }
+ h->short_ref[0]->frame_num = h->prev_frame_num;
+ }
}
/* See if we have a decoded first field looking for a pair... */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c
index 489c66f1d..c4d0b085f 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpegvideo.c
@@ -164,6 +164,18 @@ av_cold int ff_dct_common_init(MpegEncContext *s)
#if HAVE_MMX
MPV_common_init_mmx(s);
+#elif ARCH_ALPHA
+ MPV_common_init_axp(s);
+#elif CONFIG_MLIB
+ MPV_common_init_mlib(s);
+#elif HAVE_MMI
+ MPV_common_init_mmi(s);
+#elif ARCH_ARM
+ MPV_common_init_arm(s);
+#elif HAVE_ALTIVEC
+ MPV_common_init_altivec(s);
+#elif ARCH_BFIN
+ MPV_common_init_bfin(s);
#endif
/* load & permutate scantables
@@ -485,7 +497,7 @@ av_cold int MPV_common_init(MpegEncContext *s)
return -1;
}
- if((s->width || s->height) && av_check_image_size(s->width, s->height, 0, s->avctx))
+ if((s->width || s->height) && av_image_check_size(s->width, s->height, 0, s->avctx))
return -1;
dsputil_init(&s->dsp, s->avctx);
@@ -1081,6 +1093,7 @@ void MPV_frame_end(MpegEncContext *s)
*/
void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
+ /* ffdshow custom code */
if(!pict || !pict->mb_type) return;
if (s->avctx->debug_mv && pict->motion_val) {
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/parser.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/parser.c
index ed26dd603..04084dd07 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/parser.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/parser.c
@@ -150,6 +150,12 @@ int av_parser_parse2(AVCodecParserContext *s,
int index, i;
uint8_t dummy_buf[FF_INPUT_BUFFER_PADDING_SIZE];
+ if(!(s->flags & PARSER_FLAG_FETCHED_OFFSET)) {
+ s->next_frame_offset =
+ s->cur_offset = pos;
+ s->flags |= PARSER_FLAG_FETCHED_OFFSET;
+ }
+
if (buf_size == 0) {
/* padding is always necessary even if EOF, so we add it here */
memset(dummy_buf, 0, sizeof(dummy_buf));
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c
index bf624cdc9..038542eba 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c
@@ -740,6 +740,17 @@ unsigned avcodec_version( void )
return LIBAVCODEC_VERSION_INT;
}
+const char *avcodec_configuration(void)
+{
+ return FFMPEG_CONFIGURATION;
+}
+
+const char *avcodec_license(void)
+{
+#define LICENSE_PREFIX "libavcodec license: "
+ return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
+}
+
void avcodec_init(void)
{
static int initialized = 0;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.c
index c9ea233ed..4e7298f61 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.c
@@ -306,13 +306,17 @@ int vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitConte
{
v->zz_8x4 = wmv2_scantableA;
v->zz_4x8 = wmv2_scantableB;
- v->res_sm = get_bits(gb, 2); //reserved
- if (v->res_sm)
+ v->res_y411 = get_bits1(gb);
+ v->res_sprite = get_bits1(gb);
+ if (v->res_y411)
{
av_log(avctx, AV_LOG_ERROR,
- "Reserved RES_SM=%i is forbidden\n", v->res_sm);
+ "Old interlaced mode is not supported\n");
return -1;
}
+ if (v->res_sprite) {
+ av_log(avctx, AV_LOG_ERROR, "WMVP is not fully supported\n");
+ }
}
// (fps-2)/4 (->30)
@@ -382,7 +386,21 @@ int vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitConte
v->quantizer_mode = get_bits(gb, 2); //common
v->finterpflag = get_bits1(gb); //common
- v->res_rtm_flag = get_bits1(gb); //reserved
+
+ if (v->res_sprite) {
+ v->s.avctx->width = v->s.avctx->coded_width = get_bits(gb, 11);
+ v->s.avctx->height = v->s.avctx->coded_height = get_bits(gb, 11);
+ skip_bits(gb, 5); //frame rate
+ v->res_x8 = get_bits1(gb);
+ if (get_bits1(gb)) { // something to do with DC VLC selection
+ av_log(avctx, AV_LOG_ERROR, "Unsupported sprite feature\n");
+ return -1;
+ }
+ skip_bits(gb, 3); //slice code
+ v->res_rtm_flag = 0;
+ } else {
+ v->res_rtm_flag = get_bits1(gb); //reserved
+ }
if (!v->res_rtm_flag)
{
// av_log(avctx, AV_LOG_ERROR,
@@ -561,6 +579,9 @@ int vc1_parse_frame_header(VC1Context *v, GetBitContext* gb)
{
int pqindex, lowquant, status;
+ if(v->res_sprite) {
+ skip_bits(gb, 2); //not yet deciphered
+ }
if(v->finterpflag) v->interpfrm = get_bits1(gb);
skip_bits(gb, 2); //framecnt unused
v->rangeredfrm = 0;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.h
index 24921dd5e..104c26e53 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1.h
@@ -160,7 +160,8 @@ typedef struct VC1Context{
/** Simple/Main Profile sequence header */
//@{
- int res_sm; ///< reserved, 2b
+ int res_sprite; ///< reserved, sprite mode
+ int res_y411; ///< reserved, old interlaced mode
int res_x8; ///< reserved
int multires; ///< frame-level RESPIC syntax element present
int res_fasttx; ///< reserved, always 1
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c
index fe960473f..7f1f0884e 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c
@@ -3227,6 +3227,11 @@ static int vc1_decode_frame(AVCodecContext *avctx,
}
}
+ if(v->res_sprite && (s->pict_type!=FF_I_TYPE)){
+ av_free(buf2);
+ return -1;
+ }
+
// for hurry_up==5
s->current_picture.pict_type= s->pict_type;
s->current_picture.key_frame= s->pict_type == FF_I_TYPE;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
index 995df0564..ac68a6836 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
@@ -2818,6 +2818,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){
// these functions are slower than mmx on AMD, but faster on Intel
c->put_pixels_tab[0][0] = put_pixels16_sse2;
+ c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2;
c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
H264_QPEL_FUNCS(0, 0, sse2);
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
index b75ec0cc5..1dcd62918 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
@@ -154,9 +154,9 @@ IF%1 mova m5, Z(5)
mova m1, %3 ; wim
mova m3, m5
mulps m2, m0 ; r2*wre
-IF%1 mova m6, Z(6)
+IF%1 mova m6, Z2(6)
mulps m3, m1 ; i2*wim
-IF%1 mova m7, Z(7)
+IF%1 mova m7, Z2(7)
mulps m4, m1 ; r2*wim
mulps m5, m0 ; i2*wre
addps m2, m3 ; r2*wre + i2*wim
@@ -183,14 +183,14 @@ IF%1 mova m7, Z(7)
mova m4, m6
subps m6, m5 ; r3
addps m5, m4 ; r1
- mova Z(6), m6
+ mova Z2(6), m6
mova Z(2), m5
mova m2, Z(3)
addps m3, m0 ; t6
subps m2, m1 ; i3
mova m7, Z(1)
addps m1, Z(3) ; i1
- mova Z(7), m2
+ mova Z2(7), m2
mova Z(3), m1
mova m4, m7
subps m7, m3 ; i2
@@ -208,9 +208,9 @@ IF%1 mova m7, Z(7)
mova m3, m5
mova m1, [wq+o1q] ; wim
mulps m2, m0 ; r2*wre
- mova m6, Z(6) ; r3
+ mova m6, Z2(6) ; r3
mulps m3, m1 ; i2*wim
- mova m7, Z(7) ; i3
+ mova m7, Z2(7) ; i3
mulps m4, m1 ; r2*wim
mulps m5, m0 ; i2*wre
addps m2, m3 ; r2*wre + i2*wim
@@ -237,14 +237,14 @@ IF%1 mova m7, Z(7)
mova m4, m6
subps m6, m5 ; r3
addps m5, m4 ; r1
-IF%1 mova Z(6), m6
+IF%1 mova Z2(6), m6
IF%1 mova Z(2), m5
mova m2, Z(3)
addps m3, m0 ; t6
subps m2, m1 ; i3
mova m7, Z(1)
addps m1, Z(3) ; i1
-IF%1 mova Z(7), m2
+IF%1 mova Z2(7), m2
IF%1 mova Z(3), m1
mova m4, m7
subps m7, m3 ; i2
@@ -262,8 +262,8 @@ IF%1 mova Z(1), m3
mova m2, Z(4)
mova Z(2), m5
mova Z(3), m4
- mova Z(6), m6
- mova Z(7), m0
+ mova Z2(6), m6
+ mova Z2(7), m0
mova m5, m1 ; r0
mova m4, m2 ; r2
unpcklps m1, m3
@@ -287,6 +287,7 @@ INIT_XMM
%define mova movaps
%define Z(x) [r0+mmsize*x]
+%define Z2(x) [r0+mmsize*x]
align 16
fft4_sse:
@@ -326,8 +327,8 @@ fft16_sse:
mova Z(2), m2
mova Z(3), m3
T4_SSE m4, m5, m6
- mova m6, Z(6)
- mova m7, Z(7)
+ mova m6, Z2(6)
+ mova m7, Z2(7)
T4_SSE m6, m7, m0
PASS_SMALL 0, [cos_16], [cos_16+16]
ret
@@ -358,8 +359,8 @@ fft8%1:
T4_3DN m0, m1, m2, m3, m4, m5
mova Z(0), m0
mova Z(2), m2
- T2_3DN m4, m5, Z(4), Z(5)
- T2_3DN m6, m7, Z(6), Z(7)
+ T2_3DN m4, m5, Z(4), Z(5)
+ T2_3DN m6, m7, Z2(6), Z2(7)
pswapd m0, m5
pswapd m2, m7
pxor m0, [ps_m1p1]
@@ -370,7 +371,7 @@ fft8%1:
pfmul m7, [ps_root2]
T4_3DN m1, m3, m5, m7, m0, m2
mova Z(5), m5
- mova Z(7), m7
+ mova Z2(7), m7
mova m0, Z(0)
mova m2, Z(2)
T4_3DN m0, m2, m4, m6, m5, m7
@@ -380,12 +381,12 @@ fft8%1:
mova Z(1), m5
mova Z(2), m2
mova Z(3), m7
- PUNPCK m4, Z(5), m5
- PUNPCK m6, Z(7), m7
+ PUNPCK m4, Z(5), m5
+ PUNPCK m6, Z2(7), m7
mova Z(4), m4
mova Z(5), m5
- mova Z(6), m6
- mova Z(7), m7
+ mova Z2(6), m6
+ mova Z2(7), m7
ret
%endmacro
@@ -405,7 +406,8 @@ FFT48_3DN _3dn2
FFT48_3DN _3dn
-%define Z(x) [zq + o1q*(x&6)*((x/6)^1) + o3q*(x/6) + mmsize*(x&1)]
+%define Z(x) [zq + o1q*(x&6) + mmsize*(x&1)]
+%define Z2(x) [zq + o3q + mmsize*(x&1)]
%macro DECL_PASS 2+ ; name, payload
align 16
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct.asm
index 3311ab559..9c154f80b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct.asm
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct.asm
@@ -245,12 +245,12 @@ cglobal h264_idct8_add_sse2, 3, 4, 10
movsx %1, word [%1]
add %1, 32
sar %1, 6
- movd m0, %1
+ movd m0, %1d
lea %1, [%2*3]
%else
add %3, 32
sar %3, 6
- movd m0, %3
+ movd m0, %3d
lea %3, [%2*3]
%endif
pshufw m0, m0, 0
@@ -759,107 +759,98 @@ cglobal h264_idct_add16_sse2, 5, 5, 8
add16_sse2_cycle 7, 0x26
RET
-; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
-; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
-cglobal h264_idct_add16intra_sse2, 5, 7, 8
- xor r5, r5
-%ifdef ARCH_X86_64
- mov r10, r0
-%endif
-%ifdef PIC
- lea r11, [scan8_mem]
-%endif
-.next2blocks
- movzx r0, byte [scan8+r5]
- movzx r0, word [r4+r0]
+%macro add16intra_sse2_cycle 2
+ movzx r0, word [r4+%2]
test r0, r0
- jz .try_dc
- mov r0d, dword [r1+r5*4]
+ jz .try%1dc
+ mov r0d, dword [r1+%1*8]
%ifdef ARCH_X86_64
add r0, r10
%else
add r0, r0m
%endif
call x264_add8x4_idct_sse2
- add r5, 2
- add r2, 64
- cmp r5, 16
- jl .next2blocks
- REP_RET
-.try_dc
+ jmp .cycle%1end
+.try%1dc
movsx r0, word [r2 ]
or r0w, word [r2+32]
- jz .skip2blocks
- mov r0d, dword [r1+r5*4]
+ jz .cycle%1end
+ mov r0d, dword [r1+%1*8]
%ifdef ARCH_X86_64
add r0, r10
%else
add r0, r0m
%endif
call h264_idct_dc_add8_mmx2
-.skip2blocks
- add r5, 2
+.cycle%1end
+%if %1 < 7
add r2, 64
- cmp r5, 16
- jl .next2blocks
- REP_RET
+%endif
+%endmacro
-h264_idct_add8_sse2_plane:
-.next2blocks
- movzx r0, byte [scan8+r5]
- movzx r0, word [r4+r0]
+; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
+; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
+cglobal h264_idct_add16intra_sse2, 5, 7, 8
+%ifdef ARCH_X86_64
+ mov r10, r0
+%endif
+ add16intra_sse2_cycle 0, 0xc
+ add16intra_sse2_cycle 1, 0x14
+ add16intra_sse2_cycle 2, 0xe
+ add16intra_sse2_cycle 3, 0x16
+ add16intra_sse2_cycle 4, 0x1c
+ add16intra_sse2_cycle 5, 0x24
+ add16intra_sse2_cycle 6, 0x1e
+ add16intra_sse2_cycle 7, 0x26
+ RET
+
+%macro add8_sse2_cycle 2
+ movzx r0, word [r4+%2]
test r0, r0
- jz .try_dc
+ jz .try%1dc
%ifdef ARCH_X86_64
- mov r0d, dword [r1+r5*4]
+ mov r0d, dword [r1+%1*8+64]
add r0, [r10]
%else
- mov r0, r1m ; XXX r1m here is actually r0m of the calling func
+ mov r0, r0m
mov r0, [r0]
- add r0, dword [r1+r5*4]
+ add r0, dword [r1+%1*8+64]
%endif
call x264_add8x4_idct_sse2
- add r5, 2
- add r2, 64
- test r5, 3
- jnz .next2blocks
- rep ret
-.try_dc
+ jmp .cycle%1end
+.try%1dc
movsx r0, word [r2 ]
or r0w, word [r2+32]
- jz .skip2blocks
+ jz .cycle%1end
%ifdef ARCH_X86_64
- mov r0d, dword [r1+r5*4]
+ mov r0d, dword [r1+%1*8+64]
add r0, [r10]
%else
- mov r0, r1m ; XXX r1m here is actually r0m of the calling func
+ mov r0, r0m
mov r0, [r0]
- add r0, dword [r1+r5*4]
+ add r0, dword [r1+%1*8+64]
%endif
call h264_idct_dc_add8_mmx2
-.skip2blocks
- add r5, 2
+.cycle%1end
+%if %1 < 3
add r2, 64
- test r5, 3
- jnz .next2blocks
- rep ret
+%endif
+%endmacro
; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add8_sse2, 5, 7, 8
- mov r5, 16
add r2, 512
-%ifdef PIC
- lea r11, [scan8_mem]
-%endif
%ifdef ARCH_X86_64
mov r10, r0
%endif
- call h264_idct_add8_sse2_plane
+ add8_sse2_cycle 0, 0x09
+ add8_sse2_cycle 1, 0x11
%ifdef ARCH_X86_64
add r10, gprsize
%else
add r0mp, gprsize
%endif
- call h264_idct_add8_sse2_plane
+ add8_sse2_cycle 2, 0x21
+ add8_sse2_cycle 3, 0x29
RET
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
index efd8b78f1..c3c962ad9 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
@@ -24,7 +24,6 @@
#include "dsputil_mmx.h"
DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL;
-DECLARE_ALIGNED(8, static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL;
/***********************************/
/* IDCT */
@@ -64,9 +63,122 @@ void ff_h264_idct_add8_sse2 (uint8_t **dest, const int *block_offset, DCTEL
/***********************************/
/* deblocking */
+#define h264_loop_filter_strength_iteration_mmx2(bS, nz, ref, mv, bidir, edges, step, mask_mv, dir, d_idx, mask_dir) \
+ do { \
+ x86_reg b_idx; \
+ mask_mv <<= 3; \
+ for( b_idx=0; b_idx<edges; b_idx+=step ) { \
+ if (!mask_dir) \
+ __asm__ volatile( \
+ "pxor %%mm0, %%mm0 \n\t" \
+ :: \
+ ); \
+ if(!(mask_mv & b_idx)) { \
+ if(bidir) { \
+ __asm__ volatile( \
+ "movd %a3(%0,%2), %%mm2 \n" \
+ "punpckldq %a4(%0,%2), %%mm2 \n" /* { ref0[bn], ref1[bn] } */ \
+ "pshufw $0x44, 12(%0,%2), %%mm0 \n" /* { ref0[b], ref0[b] } */ \
+ "pshufw $0x44, 52(%0,%2), %%mm1 \n" /* { ref1[b], ref1[b] } */ \
+ "pshufw $0x4E, %%mm2, %%mm3 \n" \
+ "psubb %%mm2, %%mm0 \n" /* { ref0[b]!=ref0[bn], ref0[b]!=ref1[bn] } */ \
+ "psubb %%mm3, %%mm1 \n" /* { ref1[b]!=ref1[bn], ref1[b]!=ref0[bn] } */ \
+ \
+ "por %%mm1, %%mm0 \n" \
+ "movq %a5(%1,%2,4), %%mm1 \n" \
+ "movq %a6(%1,%2,4), %%mm2 \n" \
+ "movq %%mm1, %%mm3 \n" \
+ "movq %%mm2, %%mm4 \n" \
+ "psubw 48(%1,%2,4), %%mm1 \n" \
+ "psubw 56(%1,%2,4), %%mm2 \n" \
+ "psubw 208(%1,%2,4), %%mm3 \n" \
+ "psubw 216(%1,%2,4), %%mm4 \n" \
+ "packsswb %%mm2, %%mm1 \n" \
+ "packsswb %%mm4, %%mm3 \n" \
+ "paddb %%mm6, %%mm1 \n" \
+ "paddb %%mm6, %%mm3 \n" \
+ "psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
+ "psubusb %%mm5, %%mm3 \n" \
+ "packsswb %%mm3, %%mm1 \n" \
+ \
+ "por %%mm1, %%mm0 \n" \
+ "movq %a7(%1,%2,4), %%mm1 \n" \
+ "movq %a8(%1,%2,4), %%mm2 \n" \
+ "movq %%mm1, %%mm3 \n" \
+ "movq %%mm2, %%mm4 \n" \
+ "psubw 48(%1,%2,4), %%mm1 \n" \
+ "psubw 56(%1,%2,4), %%mm2 \n" \
+ "psubw 208(%1,%2,4), %%mm3 \n" \
+ "psubw 216(%1,%2,4), %%mm4 \n" \
+ "packsswb %%mm2, %%mm1 \n" \
+ "packsswb %%mm4, %%mm3 \n" \
+ "paddb %%mm6, %%mm1 \n" \
+ "paddb %%mm6, %%mm3 \n" \
+ "psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
+ "psubusb %%mm5, %%mm3 \n" \
+ "packsswb %%mm3, %%mm1 \n" \
+ \
+ "pshufw $0x4E, %%mm1, %%mm1 \n" \
+ "por %%mm1, %%mm0 \n" \
+ "pshufw $0x4E, %%mm0, %%mm1 \n" \
+ "pminub %%mm1, %%mm0 \n" \
+ ::"r"(ref), \
+ "r"(mv), \
+ "r"(b_idx), \
+ "i"(d_idx+12), \
+ "i"(d_idx+52), \
+ "i"(d_idx*4+48), \
+ "i"(d_idx*4+56), \
+ "i"(d_idx*4+208), \
+ "i"(d_idx*4+216) \
+ ); \
+ } else { \
+ __asm__ volatile( \
+ "movd 12(%0,%2), %%mm0 \n" \
+ "psubb %a3(%0,%2), %%mm0 \n" /* ref[b] != ref[bn] */ \
+ "movq 48(%1,%2,4), %%mm1 \n" \
+ "movq 56(%1,%2,4), %%mm2 \n" \
+ "psubw %a4(%1,%2,4), %%mm1 \n" \
+ "psubw %a5(%1,%2,4), %%mm2 \n" \
+ "packsswb %%mm2, %%mm1 \n" \
+ "paddb %%mm6, %%mm1 \n" \
+ "psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
+ "packsswb %%mm1, %%mm1 \n" \
+ "por %%mm1, %%mm0 \n" \
+ ::"r"(ref), \
+ "r"(mv), \
+ "r"(b_idx), \
+ "i"(d_idx+12), \
+ "i"(d_idx*4+48), \
+ "i"(d_idx*4+56) \
+ ); \
+ } \
+ } \
+ __asm__ volatile( \
+ "movd 12(%0,%1), %%mm1 \n" \
+ "por %a2(%0,%1), %%mm1 \n" /* nnz[b] || nnz[bn] */ \
+ ::"r"(nnz), \
+ "r"(b_idx), \
+ "i"(d_idx+12) \
+ ); \
+ __asm__ volatile( \
+ "pminub %%mm7, %%mm1 \n" \
+ "pminub %%mm7, %%mm0 \n" \
+ "psllw $1, %%mm1 \n" \
+ "pxor %%mm2, %%mm2 \n" \
+ "pmaxub %%mm0, %%mm1 \n" \
+ "punpcklbw %%mm2, %%mm1 \n" \
+ "movq %%mm1, %a1(%0,%2) \n" \
+ ::"r"(bS), \
+ "i"(32*dir), \
+ "r"(b_idx) \
+ :"memory" \
+ ); \
+ } \
+ } while (0)
+
static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) {
- int dir;
__asm__ volatile(
"movq %0, %%mm7 \n"
"movq %1, %%mm6 \n"
@@ -84,95 +196,11 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40]
// could do a special case for dir==0 && edges==1, but it only reduces the
// average filter time by 1.2%
- for( dir=1; dir>=0; dir-- ) {
- const x86_reg d_idx = dir ? -8 : -1;
- const int mask_mv = dir ? mask_mv1 : mask_mv0;
- DECLARE_ALIGNED(8, const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL;
- int b_idx, edge;
- for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) {
- __asm__ volatile(
- "pand %0, %%mm0 \n\t"
- ::"m"(mask_dir)
- );
- if(!(mask_mv & edge)) {
- if(bidir) {
- __asm__ volatile(
- "movd (%1,%0), %%mm2 \n"
- "punpckldq 40(%1,%0), %%mm2 \n" // { ref0[bn], ref1[bn] }
- "pshufw $0x44, (%1), %%mm0 \n" // { ref0[b], ref0[b] }
- "pshufw $0x44, 40(%1), %%mm1 \n" // { ref1[b], ref1[b] }
- "pshufw $0x4E, %%mm2, %%mm3 \n"
- "psubb %%mm2, %%mm0 \n" // { ref0[b]!=ref0[bn], ref0[b]!=ref1[bn] }
- "psubb %%mm3, %%mm1 \n" // { ref1[b]!=ref1[bn], ref1[b]!=ref0[bn] }
- "1: \n"
- "por %%mm1, %%mm0 \n"
- "movq (%2,%0,4), %%mm1 \n"
- "movq 8(%2,%0,4), %%mm2 \n"
- "movq %%mm1, %%mm3 \n"
- "movq %%mm2, %%mm4 \n"
- "psubw (%2), %%mm1 \n"
- "psubw 8(%2), %%mm2 \n"
- "psubw 160(%2), %%mm3 \n"
- "psubw 168(%2), %%mm4 \n"
- "packsswb %%mm2, %%mm1 \n"
- "packsswb %%mm4, %%mm3 \n"
- "paddb %%mm6, %%mm1 \n"
- "paddb %%mm6, %%mm3 \n"
- "psubusb %%mm5, %%mm1 \n" // abs(mv[b] - mv[bn]) >= limit
- "psubusb %%mm5, %%mm3 \n"
- "packsswb %%mm3, %%mm1 \n"
- "add $40, %0 \n"
- "cmp $40, %0 \n"
- "jl 1b \n"
- "sub $80, %0 \n"
- "pshufw $0x4E, %%mm1, %%mm1 \n"
- "por %%mm1, %%mm0 \n"
- "pshufw $0x4E, %%mm0, %%mm1 \n"
- "pminub %%mm1, %%mm0 \n"
- ::"r"(d_idx),
- "r"(ref[0]+b_idx),
- "r"(mv[0]+b_idx)
- );
- } else {
- __asm__ volatile(
- "movd (%1), %%mm0 \n"
- "psubb (%1,%0), %%mm0 \n" // ref[b] != ref[bn]
- "movq (%2), %%mm1 \n"
- "movq 8(%2), %%mm2 \n"
- "psubw (%2,%0,4), %%mm1 \n"
- "psubw 8(%2,%0,4), %%mm2 \n"
- "packsswb %%mm2, %%mm1 \n"
- "paddb %%mm6, %%mm1 \n"
- "psubusb %%mm5, %%mm1 \n" // abs(mv[b] - mv[bn]) >= limit
- "packsswb %%mm1, %%mm1 \n"
- "por %%mm1, %%mm0 \n"
- ::"r"(d_idx),
- "r"(ref[0]+b_idx),
- "r"(mv[0]+b_idx)
- );
- }
- }
- __asm__ volatile(
- "movd %0, %%mm1 \n"
- "por %1, %%mm1 \n" // nnz[b] || nnz[bn]
- ::"m"(nnz[b_idx]),
- "m"(nnz[b_idx+d_idx])
- );
- __asm__ volatile(
- "pminub %%mm7, %%mm1 \n"
- "pminub %%mm7, %%mm0 \n"
- "psllw $1, %%mm1 \n"
- "pxor %%mm2, %%mm2 \n"
- "pmaxub %%mm0, %%mm1 \n"
- "punpcklbw %%mm2, %%mm1 \n"
- "movq %%mm1, %0 \n"
- :"=m"(*bS[dir][edge])
- ::"memory"
- );
- }
- edges = 4;
- step = 1;
- }
+ step <<= 3;
+ edges <<= 3;
+ h264_loop_filter_strength_iteration_mmx2(bS, nnz, ref, mv, bidir, edges, step, mask_mv1, 1, -8, 0);
+ h264_loop_filter_strength_iteration_mmx2(bS, nnz, ref, mv, bidir, 32, 8, mask_mv0, 0, -1, -1);
+
__asm__ volatile(
"movq (%0), %%mm0 \n\t"
"movq 8(%0), %%mm1 \n\t"
@@ -326,12 +354,13 @@ void ff_h264dsp_init_x86(H264DSPContext *c)
c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2;
c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2;
-#if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110
+#if HAVE_ALIGNED_STACK
c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2;
c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2;
#endif
+
c->h264_idct_add16 = ff_h264_idct_add16_sse2;
c->h264_idct_add8 = ff_h264_idct_add8_sse2;
c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;