Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/mpc-hc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorclsid2 <clsid2@users.sourceforge.net>2010-04-19 23:29:00 +0400
committerclsid2 <clsid2@users.sourceforge.net>2010-04-19 23:29:00 +0400
commite220e2c9e47b5c1f98eec2c8deead4074fa07c76 (patch)
treecf616685c536ce5089d76c46dfe3df14b17a3924 /src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86
parent0b7b1d06b32b43517f7b584207533ecfe5ce20fa (diff)
Updated FFmpeg
git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@1792 10f7b99b-c216-0410-bff0-8a66a9350fd8
Diffstat (limited to 'src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86')
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_mmx.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c220
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h14
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fdct_mmx.c30
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.h2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm19
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c26
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c38
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx_xvid.c1
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mathops.h19
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx_template.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/rv40dsp_mmx.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c7
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.c44
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.h1
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.c3
17 files changed, 257 insertions, 177 deletions
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_mmx.c
index 9aa8a52aa..ff359230c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_mmx.c
@@ -27,8 +27,8 @@
*/
static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y, const uint64_t *rnd_reg)
{
- DECLARE_ALIGNED_8(uint64_t, AA);
- DECLARE_ALIGNED_8(uint64_t, DD);
+ DECLARE_ALIGNED(8, uint64_t, AA);
+ DECLARE_ALIGNED(8, uint64_t, DD);
int i;
if(y==0 && x==0) {
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
index 95cd1fe99..efb9651bb 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
@@ -24,6 +24,7 @@
#include "libavutil/x86_cpu.h"
#include "libavcodec/dsputil.h"
+#include "libavcodec/h264dsp.h"
#include "libavcodec/mpegvideo.h"
#include "libavcodec/simple_idct.h"
#include "dsputil_mmx.h"
@@ -39,38 +40,38 @@
int mm_flags; /* multimedia extension flags */
/* pixel operations */
-DECLARE_ALIGNED_8 (const uint64_t, ff_bone) = 0x0101010101010101ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
-DECLARE_ALIGNED_16(const uint64_t, ff_pdw_80000000)[2] =
+DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] =
{0x8000000080000000ULL, 0x8000000080000000ULL};
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL;
-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL};
-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x0008000800080008ULL};
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;
-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x0010001000100010ULL};
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;
-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C001C001C001CULL};
-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL};
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;
-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x0040004000400040ULL};
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
-
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;
-
-DECLARE_ALIGNED_16(const double, ff_pd_1)[2] = { 1.0, 1.0 };
-DECLARE_ALIGNED_16(const double, ff_pd_2)[2] = { 2.0, 2.0 };
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL;
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL};
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x0008000800080008ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x0010001000100010ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C001C001C001CULL};
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x0040004000400040ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
+
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;
+
+DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };
+DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
#define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)
#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)
@@ -1818,9 +1819,6 @@ PREFETCH(prefetch_3dnow, prefetch)
#include "rv40dsp_mmx.c"
/* CAVS specific */
-void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx);
-void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx);
-
void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
put_pixels8_mmx(dst, src, stride, 8);
}
@@ -1835,8 +1833,6 @@ void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
}
/* VC1 specific */
-void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);
-
void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
put_pixels8_mmx(dst, src, stride, 8);
}
@@ -1844,10 +1840,6 @@ void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, in
avg_pixels8_mmx2(dst, src, stride, 8);
}
-/* external functions, from idct_mmx.c */
-void ff_mmx_idct(DCTELEM *block);
-void ff_mmxext_idct(DCTELEM *block);
-
/* XXX: those functions should be suppressed ASAP when all IDCTs are
converted */
#if CONFIG_GPL
@@ -2026,7 +2018,7 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], int out_c
} else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] && matrix_cmp[3][0]==matrix_cmp[4][0]) {
MIX5(IF1,IF0);
} else {
- DECLARE_ALIGNED_16(float, matrix_simd)[in_ch][2][4];
+ DECLARE_ALIGNED(16, float, matrix_simd)[in_ch][2][4];
j = 2*in_ch*sizeof(float);
__asm__ volatile(
"1: \n"
@@ -2379,7 +2371,7 @@ static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int al
#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
- DECLARE_ALIGNED_16(int16_t, tmp)[len];\
+ DECLARE_ALIGNED(16, int16_t, tmp)[len];\
int i,j,c;\
for(c=0; c<channels; c++){\
float_to_int16_##cpu(tmp, src[c], len);\
@@ -2591,16 +2583,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx;
c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx;
- c->h264_idct_dc_add=
- c->h264_idct_add= ff_h264_idct_add_mmx;
- c->h264_idct8_dc_add=
- c->h264_idct8_add= ff_h264_idct8_add_mmx;
-
- c->h264_idct_add16 = ff_h264_idct_add16_mmx;
- c->h264_idct8_add4 = ff_h264_idct8_add4_mmx;
- c->h264_idct_add8 = ff_h264_idct_add8_mmx;
- c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx;
-
if (CONFIG_VP6_DECODER) {
c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
}
@@ -2622,13 +2604,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
- c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
- c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
- c->h264_idct_add16 = ff_h264_idct_add16_mmx2;
- c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2;
- c->h264_idct_add8 = ff_h264_idct_add8_mmx2;
- c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;
-
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
@@ -2642,6 +2617,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->vp3_h_loop_filter= ff_vp3_h_loop_filter_mmx2;
}
}
+ if (CONFIG_VP3_DECODER) {
+ c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2;
+ }
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \
c->PFX ## _pixels_tab[IDX][ 0] = PFX ## SIZE ## _mc00_ ## CPU; \
@@ -2689,31 +2667,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2;
c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2;
c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2;
- c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2;
- c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2;
- c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2;
- c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2;
- c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2;
- c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2;
- c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
-
- c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
- c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
- c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
- c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
- c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
- c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
- c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
- c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
-
- c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
- c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
- c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
- c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
- c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
- c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
- c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
- c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
#if HAVE_YASM && ARCH_X86_32
c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
@@ -2793,16 +2746,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){
// these functions are slower than mmx on AMD, but faster on Intel
-/* FIXME works in most codecs, but crashes svq1 due to unaligned chroma
c->put_pixels_tab[0][0] = put_pixels16_sse2;
c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
-*/
H264_QPEL_FUNCS(0, 0, sse2);
}
if(mm_flags & FF_MM_SSE2){
- c->h264_idct8_add = ff_h264_idct8_add_sse2;
- c->h264_idct8_add4= ff_h264_idct8_add4_sse2;
-
H264_QPEL_FUNCS(0, 1, sse2);
H264_QPEL_FUNCS(0, 2, sse2);
H264_QPEL_FUNCS(0, 3, sse2);
@@ -2849,26 +2797,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
#endif
-#if CONFIG_GPL && HAVE_YASM && ARCH_X86_32
- if (mm_flags & FF_MM_MMX2){
-#if ARCH_X86_32
- c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext;
- c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext;
-#endif
- if( mm_flags&FF_MM_SSE2 ){
-#if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110
- c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
- c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
- c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2;
- c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2;
-#endif
- c->h264_idct_add16 = ff_h264_idct_add16_sse2;
- c->h264_idct_add8 = ff_h264_idct_add8_sse2;
- c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
- }
- }
-#endif
-
/* disable audio related ASM for 64-bit builds */
#if ARCH_X86_32
if(mm_flags & FF_MM_3DNOW){
@@ -2916,6 +2844,84 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
dsputilenc_init_mmx(c, avctx);
}
+#if CONFIG_H264DSP
+void ff_h264dsp_init_x86(H264DSPContext *c)
+{
+ mm_flags = mm_support();
+
+ if (mm_flags & FF_MM_MMX) {
+ c->h264_idct_dc_add=
+ c->h264_idct_add= ff_h264_idct_add_mmx;
+ c->h264_idct8_dc_add=
+ c->h264_idct8_add= ff_h264_idct8_add_mmx;
+
+ c->h264_idct_add16 = ff_h264_idct_add16_mmx;
+ c->h264_idct8_add4 = ff_h264_idct8_add4_mmx;
+ c->h264_idct_add8 = ff_h264_idct_add8_mmx;
+ c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx;
+
+ if (mm_flags & FF_MM_MMX2) {
+ c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
+ c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
+ c->h264_idct_add16 = ff_h264_idct_add16_mmx2;
+ c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2;
+ c->h264_idct_add8 = ff_h264_idct_add8_mmx2;
+ c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;
+
+ c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2;
+ c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2;
+ c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2;
+ c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2;
+ c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2;
+ c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2;
+ c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
+
+ c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
+ c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
+ c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
+ c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
+ c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
+ c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
+ c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
+ c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
+
+ c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
+ c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
+ c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
+ c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
+ c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
+ c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
+ c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
+ c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
+ }
+ if(mm_flags & FF_MM_SSE2){
+ c->h264_idct8_add = ff_h264_idct8_add_sse2;
+ c->h264_idct8_add4= ff_h264_idct8_add4_sse2;
+ }
+
+#if CONFIG_GPL && HAVE_YASM && ARCH_X86_32
+ if (mm_flags & FF_MM_MMX2){
+#if ARCH_X86_32
+ c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext;
+ c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext;
+#endif
+ if( mm_flags&FF_MM_SSE2 ){
+#if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110
+ c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
+ c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
+ c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2;
+ c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2;
+#endif
+ c->h264_idct_add16 = ff_h264_idct_add16_sse2;
+ c->h264_idct_add8 = ff_h264_idct_add8_sse2;
+ c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
+ }
+ }
+#endif
+ }
+}
+#endif /* CONFIG_H264DSP */
+
const char* avcodec_get_current_idct_mmx(AVCodecContext *avctx,DSPContext *c)
{
if (c->idct_put==ff_idct_xvid_mmx_put)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h
index c0ef49b30..64d23f553 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h
@@ -156,4 +156,18 @@ void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
+void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx);
+void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx);
+void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
+void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
+
+void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);
+void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);
+void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd);
+
+void ff_mmx_idct(DCTELEM *block);
+void ff_mmxext_idct(DCTELEM *block);
+
#endif /* AVCODEC_X86_DSPUTIL_MMX_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fdct_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fdct_mmx.c
index 319daf28b..f790cb804 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fdct_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fdct_mmx.c
@@ -33,8 +33,6 @@
#include "libavutil/common.h"
#include "libavcodec/dsputil.h"
-#define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align)))
-
//////////////////////////////////////////////////////////////////////
//
// constants for the forward DCT
@@ -55,30 +53,30 @@
#define X8(x) x,x,x,x,x,x,x,x
//concatenated table, for forward DCT transformation
-static const int16_t fdct_tg_all_16[24] ATTR_ALIGN(16) = {
+DECLARE_ALIGNED(16, static const int16_t, fdct_tg_all_16)[24] = {
X8(13036), // tg * (2<<16) + 0.5
X8(27146), // tg * (2<<16) + 0.5
X8(-21746) // tg * (2<<16) + 0.5
};
-static const int16_t ocos_4_16[8] ATTR_ALIGN(16) = {
+DECLARE_ALIGNED(16, static const int16_t, ocos_4_16)[8] = {
X8(23170) //cos * (2<<15) + 0.5
};
-static const int16_t fdct_one_corr[8] ATTR_ALIGN(16) = { X8(1) };
+DECLARE_ALIGNED(16, static const int16_t, fdct_one_corr)[8] = { X8(1) };
-static const int32_t fdct_r_row[2] ATTR_ALIGN(8) = {RND_FRW_ROW, RND_FRW_ROW };
+DECLARE_ALIGNED(8, static const int32_t, fdct_r_row)[2] = {RND_FRW_ROW, RND_FRW_ROW };
static struct
{
- const int32_t fdct_r_row_sse2[4] ATTR_ALIGN(16);
-} fdct_r_row_sse2 ATTR_ALIGN(16)=
+ DECLARE_ALIGNED(16, const int32_t, fdct_r_row_sse2)[4];
+} fdct_r_row_sse2 =
{{
RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW
}};
-//static const long fdct_r_row_sse2[4] ATTR_ALIGN(16) = {RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW};
+//DECLARE_ALIGNED(16, static const long, fdct_r_row_sse2)[4] = {RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW};
-static const int16_t tab_frw_01234567[] ATTR_ALIGN(8) = { // forward_dct coeff table
+DECLARE_ALIGNED(8, static const int16_t, tab_frw_01234567)[] = { // forward_dct coeff table
16384, 16384, 22725, 19266,
16384, 16384, 12873, 4520,
21407, 8867, 19266, -4520,
@@ -154,10 +152,10 @@ static const int16_t tab_frw_01234567[] ATTR_ALIGN(8) = { // forward_dct coeff
static struct
{
- const int16_t tab_frw_01234567_sse2[256] ATTR_ALIGN(16);
-} tab_frw_01234567_sse2 ATTR_ALIGN(16) =
+ DECLARE_ALIGNED(16, const int16_t, tab_frw_01234567_sse2)[256];
+} tab_frw_01234567_sse2 =
{{
-//static const int16_t tab_frw_01234567_sse2[] ATTR_ALIGN(16) = { // forward_dct coeff table
+//DECLARE_ALIGNED(16, static const int16_t, tab_frw_01234567_sse2)[] = { // forward_dct coeff table
#define TABLE_SSE2 C4, C4, C1, C3, -C6, -C2, -C1, -C5, \
C4, C4, C5, C7, C2, C6, C3, -C7, \
-C4, C4, C7, C3, C6, -C2, C7, -C5, \
@@ -535,7 +533,7 @@ static av_always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const
void ff_fdct_mmx(int16_t *block)
{
- int64_t align_tmp[16] ATTR_ALIGN(8);
+ DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
int16_t * block1= (int16_t*)align_tmp;
const int16_t *table= tab_frw_01234567;
int i;
@@ -553,7 +551,7 @@ void ff_fdct_mmx(int16_t *block)
void ff_fdct_mmx2(int16_t *block)
{
- int64_t align_tmp[16] ATTR_ALIGN(8);
+ DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
int16_t *block1= (int16_t*)align_tmp;
const int16_t *table= tab_frw_01234567;
int i;
@@ -571,7 +569,7 @@ void ff_fdct_mmx2(int16_t *block)
void ff_fdct_sse2(int16_t *block)
{
- int64_t align_tmp[16] ATTR_ALIGN(16);
+ DECLARE_ALIGNED(16, int64_t, align_tmp)[16];
int16_t * const block1= (int16_t*)align_tmp;
fdct_col_sse2(block, block1, 0);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.h
index b0fff1b12..7ef583914 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.h
@@ -19,7 +19,7 @@
#ifndef AVCODEC_X86_FFT_H
#define AVCODEC_X86_FFT_H
-#include "libavcodec/dsputil.h"
+#include "libavcodec/fft.h"
void ff_fft_permute_sse(FFTContext *s, FFTComplex *z);
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c
index 160caeecf..8226ae962 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_3dn2.c
@@ -23,7 +23,7 @@
#include "libavcodec/dsputil.h"
#include "fft.h"
-DECLARE_ALIGNED_8(static const int, m1m1)[2] = { 1<<31, 1<<31 };
+DECLARE_ALIGNED(8, static const int, m1m1)[2] = { 1<<31, 1<<31 };
#ifdef EMULATE_3DNOWEXT
#define PSWAPD(s,d)\
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
index 26bd748ae..9cb0ae1bf 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
@@ -419,18 +419,23 @@ DECL_PASS pass_interleave_3dn, PASS_BIG 0
%define pass_3dn2 pass_3dn
%define pass_interleave_3dn2 pass_interleave_3dn
+%ifdef PIC
+%define SECTION_REL - $$
+%else
+%define SECTION_REL
+%endif
%macro DECL_FFT 2-3 ; nbits, cpu, suffix
-%xdefine list_of_fft fft4%2, fft8%2
+%xdefine list_of_fft fft4%2 SECTION_REL, fft8%2 SECTION_REL
%if %1==5
-%xdefine list_of_fft list_of_fft, fft16%2
+%xdefine list_of_fft list_of_fft, fft16%2 SECTION_REL
%endif
%assign n 1<<%1
%rep 17-%1
%assign n2 n/2
%assign n4 n/4
-%xdefine list_of_fft list_of_fft, fft %+ n %+ %3%2
+%xdefine list_of_fft list_of_fft, fft %+ n %+ %3%2 SECTION_REL
align 16
fft %+ n %+ %3%2:
@@ -448,10 +453,6 @@ fft %+ n %+ %3%2:
%endrep
%undef n
-%ifidn __OUTPUT_FORMAT__,macho64
-section .rodata
-%endif
-
align 8
dispatch_tab%3%2: pointer list_of_fft
@@ -462,6 +463,10 @@ section .text
cglobal fft_dispatch%3%2, 2,5,8, z, nbits
lea r2, [dispatch_tab%3%2 GLOBAL]
mov r2, [r2 + (nbitsq-2)*gprsize]
+%ifdef PIC
+ lea r3, [$$ GLOBAL]
+ add r2, r3
+%endif
call r2
RET
%endmacro ; DECL_FFT
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
index cfe3429aa..a6d822c9e 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
@@ -20,8 +20,8 @@
#include "dsputil_mmx.h"
-DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL;
-DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL;
+DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL;
+DECLARE_ALIGNED(8, static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL;
/***********************************/
/* IDCT */
@@ -157,12 +157,12 @@ static inline void h264_idct8_1d(int16_t *block)
static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
{
int i;
- DECLARE_ALIGNED_8(int16_t, b2)[64];
+ DECLARE_ALIGNED(8, int16_t, b2)[64];
block[0] += 32;
for(i=0; i<2; i++){
- DECLARE_ALIGNED_8(uint64_t, tmp);
+ DECLARE_ALIGNED(8, uint64_t, tmp);
h264_idct8_1d(block+4*i);
@@ -628,7 +628,7 @@ static void ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset, DCTE
static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
{
- DECLARE_ALIGNED_8(uint64_t, tmp0)[2];
+ DECLARE_ALIGNED(8, uint64_t, tmp0)[2];
__asm__ volatile(
"movq (%2,%4), %%mm0 \n\t" //p1
@@ -690,7 +690,7 @@ static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, in
{
//FIXME: could cut some load/stores by merging transpose with filter
// also, it only needs to transpose 6x8
- DECLARE_ALIGNED_8(uint8_t, trans)[8*8];
+ DECLARE_ALIGNED(8, uint8_t, trans)[8*8];
int i;
for(i=0; i<2; i++, pix+=8*stride, tc0+=2) {
if((tc0[0] & tc0[1]) < 0)
@@ -734,7 +734,7 @@ static void h264_v_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha,
static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
{
//FIXME: could cut some load/stores by merging transpose with filter
- DECLARE_ALIGNED_8(uint8_t, trans)[8*4];
+ DECLARE_ALIGNED(8, uint8_t, trans)[8*4];
transpose4x4(trans, pix-2, 8, stride);
transpose4x4(trans+4, pix-2+4*stride, 8, stride);
h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0);
@@ -784,7 +784,7 @@ static void h264_v_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a
static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta)
{
//FIXME: could cut some load/stores by merging transpose with filter
- DECLARE_ALIGNED_8(uint8_t, trans)[8*4];
+ DECLARE_ALIGNED(8, uint8_t, trans)[8*4];
transpose4x4(trans, pix-2, 8, stride);
transpose4x4(trans+4, pix-2+4*stride, 8, stride);
h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1);
@@ -815,7 +815,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40]
for( dir=1; dir>=0; dir-- ) {
const x86_reg d_idx = dir ? -8 : -1;
const int mask_mv = dir ? mask_mv1 : mask_mv0;
- DECLARE_ALIGNED_8(const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL;
+ DECLARE_ALIGNED(8, const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL;
int b_idx, edge;
for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) {
__asm__ volatile(
@@ -964,8 +964,8 @@ static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uin
\
__asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
- "movq %5, %%mm4 \n\t"\
- "movq %6, %%mm5 \n\t"\
+ "movq "MANGLE(ff_pw_5) ", %%mm4\n\t"\
+ "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\
"1: \n\t"\
"movd -1(%0), %%mm1 \n\t"\
"movd (%0), %%mm2 \n\t"\
@@ -995,7 +995,7 @@ static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uin
"decl %2 \n\t"\
" jnz 1b \n\t"\
: "+a"(src), "+c"(dst), "+g"(h)\
- : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
: "memory"\
);\
}\
@@ -2106,7 +2106,7 @@ H264_MC_816(H264_MC_HV, ssse3)
#endif
/* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */
-DECLARE_ALIGNED_8(static const uint64_t, h264_rnd_reg)[4] = {
+DECLARE_ALIGNED(8, static const uint64_t, h264_rnd_reg)[4] = {
0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL
};
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c
index a39f54b3f..d1859080c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c
@@ -23,10 +23,9 @@
#include "libavutil/common.h"
#include "libavcodec/dsputil.h"
+#include "dsputil_mmx.h"
#include "mmx.h"
-#define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align)))
-
#define ROW_SHIFT 11
#define COL_SHIFT 6
@@ -399,10 +398,10 @@ static inline void idct_col (int16_t * const col, const int offset)
#define T3 43790
#define C4 23170
- static const short t1_vector[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
- static const short t2_vector[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
- static const short t3_vector[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
- static const short c4_vector[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
+ DECLARE_ALIGNED(8, static const short, t1_vector)[] = {T1,T1,T1,T1};
+ DECLARE_ALIGNED(8, static const short, t2_vector)[] = {T2,T2,T2,T2};
+ DECLARE_ALIGNED(8, static const short, t3_vector)[] = {T3,T3,T3,T3};
+ DECLARE_ALIGNED(8, static const short, c4_vector)[] = {C4,C4,C4,C4};
/* column code adapted from Peter Gubanov */
/* http://www.elecard.com/peter/idct.shtml */
@@ -541,20 +540,20 @@ static inline void idct_col (int16_t * const col, const int offset)
}
-static const int32_t rounder0[] ATTR_ALIGN(8) =
+DECLARE_ALIGNED(8, static const int32_t, rounder0)[] =
rounder ((1 << (COL_SHIFT - 1)) - 0.5);
-static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
-static const int32_t rounder1[] ATTR_ALIGN(8) =
+DECLARE_ALIGNED(8, static const int32_t, rounder4)[] = rounder (0);
+DECLARE_ALIGNED(8, static const int32_t, rounder1)[] =
rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
-static const int32_t rounder7[] ATTR_ALIGN(8) =
+DECLARE_ALIGNED(8, static const int32_t, rounder7)[] =
rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
-static const int32_t rounder2[] ATTR_ALIGN(8) =
+DECLARE_ALIGNED(8, static const int32_t, rounder2)[] =
rounder (0.60355339059); /* C2 * (C6+C2)/2 */
-static const int32_t rounder6[] ATTR_ALIGN(8) =
+DECLARE_ALIGNED(8, static const int32_t, rounder6)[] =
rounder (-0.25); /* C2 * (C6-C2)/2 */
-static const int32_t rounder3[] ATTR_ALIGN(8) =
+DECLARE_ALIGNED(8, static const int32_t, rounder3)[] =
rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
-static const int32_t rounder5[] ATTR_ALIGN(8) =
+DECLARE_ALIGNED(8, static const int32_t, rounder5)[] =
rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
#undef COL_SHIFT
@@ -563,13 +562,13 @@ static const int32_t rounder5[] ATTR_ALIGN(8) =
#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
void idct (int16_t * const block) \
{ \
- static const int16_t table04[] ATTR_ALIGN(16) = \
+ DECLARE_ALIGNED(16, static const int16_t, table04)[] = \
table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
- static const int16_t table17[] ATTR_ALIGN(16) = \
+ DECLARE_ALIGNED(16, static const int16_t, table17)[] = \
table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
- static const int16_t table26[] ATTR_ALIGN(16) = \
+ DECLARE_ALIGNED(16, static const int16_t, table26)[] = \
table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
- static const int16_t table35[] ATTR_ALIGN(16) = \
+ DECLARE_ALIGNED(16, static const int16_t, table35)[] = \
table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
\
idct_row_head (block, 0*8, table04); \
@@ -594,9 +593,6 @@ void idct (int16_t * const block) \
idct_col (block, 4); \
}
-void ff_mmx_idct(DCTELEM *block);
-void ff_mmxext_idct(DCTELEM *block);
-
declare_idct (ff_mmxext_idct, mmxext_table,
mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx_xvid.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx_xvid.c
index 22cd50019..50260ae6e 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx_xvid.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx_xvid.c
@@ -41,6 +41,7 @@
#include <inttypes.h>
#include "libavcodec/avcodec.h"
+#include "idct_xvid.h"
//=============================================================================
// Macros and other preprocessor constants
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mathops.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mathops.h
index 691a200fd..010cfb70b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mathops.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mathops.h
@@ -78,4 +78,23 @@ __asm__ volatile(\
);
#endif
+// avoid +32 for shift optimization (gcc should do that ...)
+#define NEG_SSR32 NEG_SSR32
+static inline int32_t NEG_SSR32( int32_t a, int8_t s){
+ __asm__ ("sarl %1, %0\n\t"
+ : "+r" (a)
+ : "ic" ((uint8_t)(-s))
+ );
+ return a;
+}
+
+#define NEG_USR32 NEG_USR32
+static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
+ __asm__ ("shrl %1, %0\n\t"
+ : "+r" (a)
+ : "ic" ((uint8_t)(-s))
+ );
+ return a;
+}
+
#endif /* AVCODEC_X86_MATHOPS_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx_template.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx_template.c
index 1e5cd2cb4..4a6840c98 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx_template.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx_template.c
@@ -98,7 +98,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
x86_reg last_non_zero_p1;
int level=0, q; //=0 is because gcc says uninitialized ...
const uint16_t *qmat, *bias;
- DECLARE_ALIGNED_16(int16_t, temp_block)[64];
+ DECLARE_ALIGNED(16, int16_t, temp_block)[64];
assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly?
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/rv40dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/rv40dsp_mmx.c
index 9a702084a..2b154c0a0 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/rv40dsp_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/rv40dsp_mmx.c
@@ -24,7 +24,7 @@
#include "dsputil_mmx.h"
/* bias interleaved with bias div 8, use p+1 to access bias div 8 */
-DECLARE_ALIGNED_8(static const uint64_t, rv40_bias_reg)[4][8] = {
+DECLARE_ALIGNED(8, static const uint64_t, rv40_bias_reg)[4][8] = {
{ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0010001000100010ULL, 0x0002000200020002ULL,
0x0020002000200020ULL, 0x0004000400040004ULL, 0x0010001000100010ULL, 0x0002000200020002ULL },
{ 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c
index bf96f2470..e0b1f5b99 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c
@@ -73,7 +73,7 @@
"movq %%mm"#R1", "#OFF"(%1) \n\t" \
"add %2, %0 \n\t"
-DECLARE_ALIGNED_16(const uint64_t, ff_pw_9) = 0x0009000900090009ULL;
+DECLARE_ALIGNED(16, const uint64_t, ff_pw_9) = 0x0009000900090009ULL;
/** Sacrifying mm6 allows to pipeline loads from src */
static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,
@@ -442,7 +442,7 @@ static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\
static const int shift_value[] = { 0, 5, 1, 5 };\
int shift = (shift_value[hmode]+shift_value[vmode])>>1;\
int r;\
- DECLARE_ALIGNED_16(int16_t, tmp)[12*8];\
+ DECLARE_ALIGNED(16, int16_t, tmp)[12*8];\
\
r = (1<<(shift-1)) + rnd-1;\
vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift);\
@@ -463,9 +463,6 @@ static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\
VC1_MSPEL_MC(put_)
VC1_MSPEL_MC(avg_)
-void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);
-void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd);
-
/** Macro to ease bicubic filter interpolation functions declarations */
#define DECLARE_FUNCTION(a, b) \
static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.c
index df0fcb9f1..89ecc7406 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.c
@@ -19,13 +19,14 @@
*/
/**
- * @file vp3dsp_mmx.c
+ * @file libavcodec/x86/vp3dsp_mmx.c
* MMX-optimized functions cribbed from the original VP3 source code.
*/
#include "libavutil/x86_cpu.h"
#include "libavcodec/dsputil.h"
#include "dsputil_mmx.h"
+#include "vp3dsp_mmx.h"
extern const uint16_t ff_vp3_idct_data[];
@@ -394,3 +395,44 @@ void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
ff_vp3_idct_mmx(block);
add_pixels_clamped_mmx(block, dest, line_size);
}
+
+void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int linesize, const DCTELEM *block)
+{
+ int dc = block[0];
+ dc = (46341*dc)>>16;
+ dc = (46341*dc + (8<<16))>>20;
+
+ __asm__ volatile(
+ "movd %3, %%mm0 \n\t"
+ "pshufw $0, %%mm0, %%mm0 \n\t"
+ "pxor %%mm1, %%mm1 \n\t"
+ "psubw %%mm0, %%mm1 \n\t"
+ "packuswb %%mm0, %%mm0 \n\t"
+ "packuswb %%mm1, %%mm1 \n\t"
+
+#define DC_ADD \
+ "movq (%0), %%mm2 \n\t" \
+ "movq (%0,%1), %%mm3 \n\t" \
+ "paddusb %%mm0, %%mm2 \n\t" \
+ "movq (%0,%1,2), %%mm4 \n\t" \
+ "paddusb %%mm0, %%mm3 \n\t" \
+ "movq (%0,%2), %%mm5 \n\t" \
+ "paddusb %%mm0, %%mm4 \n\t" \
+ "paddusb %%mm0, %%mm5 \n\t" \
+ "psubusb %%mm1, %%mm2 \n\t" \
+ "psubusb %%mm1, %%mm3 \n\t" \
+ "movq %%mm2, (%0) \n\t" \
+ "psubusb %%mm1, %%mm4 \n\t" \
+ "movq %%mm3, (%0,%1) \n\t" \
+ "psubusb %%mm1, %%mm5 \n\t" \
+ "movq %%mm4, (%0,%1,2) \n\t" \
+ "movq %%mm5, (%0,%2) \n\t"
+
+ DC_ADD
+ "lea (%0,%1,4), %0 \n\t"
+ DC_ADD
+
+ : "+r"(dest)
+ : "r"((x86_reg)linesize), "r"((x86_reg)3*linesize), "r"(dc)
+ );
+}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.h
index e565a3302..e0ebf0b0f 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.h
@@ -28,6 +28,7 @@
void ff_vp3_idct_mmx(int16_t *data);
void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block);
void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
+void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size, const DCTELEM *block);
void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.c
index e7745b372..fb1ff4504 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.c
@@ -25,8 +25,9 @@
#include "libavcodec/dsputil.h"
#include "dsputil_mmx.h"
+#include "vp3dsp_sse2.h"
-DECLARE_ALIGNED_16(const uint16_t, ff_vp3_idct_data)[7 * 8] =
+DECLARE_ALIGNED(16, const uint16_t, ff_vp3_idct_data)[7 * 8] =
{
64277,64277,64277,64277,64277,64277,64277,64277,
60547,60547,60547,60547,60547,60547,60547,60547,