diff options
author | XhmikosR <xhmikosr@users.sourceforge.net> | 2010-09-15 19:46:24 +0400 |
---|---|---|
committer | XhmikosR <xhmikosr@users.sourceforge.net> | 2010-09-15 19:46:24 +0400 |
commit | 88174d2d1720fc8341ee21f0967e7aa726a2c3e8 (patch) | |
tree | 970b020eac57e2f6b7ec9c1bba66bbedc74b5d4c /src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h | |
parent | 08fabf0cb807f2cb1d5b2fc9dca3c180827e07c3 (diff) |
merge changes from trunk r2365-r2561
git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/branches/legacy@2562 10f7b99b-c216-0410-bff0-8a66a9350fd8
Diffstat (limited to 'src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h')
-rw-r--r-- | src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h | 37 |
1 files changed, 33 insertions, 4 deletions
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h index 33dafed1f..58256fd40 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h @@ -57,7 +57,7 @@ extern const uint64_t ff_pb_7; extern const uint64_t ff_pb_1F; extern const uint64_t ff_pb_3F; extern const uint64_t ff_pb_81; -extern const uint64_t ff_pb_A1; +extern const xmm_reg ff_pb_A1; extern const xmm_reg ff_pb_F8; extern const uint64_t ff_pb_FC; extern const xmm_reg ff_pb_FE; @@ -94,6 +94,35 @@ extern const double ff_pd_2[2]; SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\ SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */ +static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){ + __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ... + "movd %4, %%mm0 \n\t" + "movd %5, %%mm1 \n\t" + "movd %6, %%mm2 \n\t" + "movd %7, %%mm3 \n\t" + "punpcklbw %%mm1, %%mm0 \n\t" + "punpcklbw %%mm3, %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "punpcklwd %%mm2, %%mm0 \n\t" + "punpckhwd %%mm2, %%mm1 \n\t" + "movd %%mm0, %0 \n\t" + "punpckhdq %%mm0, %%mm0 \n\t" + "movd %%mm0, %1 \n\t" + "movd %%mm1, %2 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movd %%mm1, %3 \n\t" + + : "=m" (*(uint32_t*)(dst + 0*dst_stride)), + "=m" (*(uint32_t*)(dst + 1*dst_stride)), + "=m" (*(uint32_t*)(dst + 2*dst_stride)), + "=m" (*(uint32_t*)(dst + 3*dst_stride)) + : "m" (*(uint32_t*)(src + 0*src_stride)), + "m" (*(uint32_t*)(src + 1*src_stride)), + "m" (*(uint32_t*)(src + 2*src_stride)), + "m" (*(uint32_t*)(src + 3*src_stride)) + ); +} + // e,f,g,h can be memory // out: a,d,t,c #define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\ @@ -158,9 +187,9 @@ extern const double ff_pd_2[2]; void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx); void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx); -void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); -void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); -void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); +void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); +void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); +void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); |