diff options
Diffstat (limited to 'Libraries/libtheora-1.1.1/lib/x86_vc/mmxfrag.c')
-rw-r--r-- | Libraries/libtheora-1.1.1/lib/x86_vc/mmxfrag.c | 337 |
1 files changed, 0 insertions, 337 deletions
diff --git a/Libraries/libtheora-1.1.1/lib/x86_vc/mmxfrag.c b/Libraries/libtheora-1.1.1/lib/x86_vc/mmxfrag.c deleted file mode 100644 index 4eb2084d..00000000 --- a/Libraries/libtheora-1.1.1/lib/x86_vc/mmxfrag.c +++ /dev/null @@ -1,337 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: mmxfrag.c 16578 2009-09-25 19:50:48Z cristianadam $ - - ********************************************************************/ - -/*MMX acceleration of fragment reconstruction for motion compensation. - Originally written by Rudolf Marek. - Additional optimization by Nils Pipenbrinck. - Note: Loops are unrolled for best performance. - The iteration each instruction belongs to is marked in the comments as #i.*/ -#include <stddef.h> -#include "x86int.h" -#include "mmxfrag.h" - -#if defined(OC_X86_ASM) - -/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes - between rows.*/ -void oc_frag_copy_mmx(unsigned char *_dst, - const unsigned char *_src,int _ystride){ -#define SRC edx -#define DST eax -#define YSTRIDE ecx -#define YSTRIDE3 esi - OC_FRAG_COPY_MMX(_dst,_src,_ystride); -#undef SRC -#undef DST -#undef YSTRIDE -#undef YSTRIDE3 -} - -void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride, - const ogg_int16_t *_residue){ - __asm{ -#define DST edx -#define DST4 esi -#define YSTRIDE eax -#define YSTRIDE3 edi -#define RESIDUE ecx - mov DST,_dst - mov YSTRIDE,_ystride - mov RESIDUE,_residue - lea DST4,[DST+YSTRIDE*4] - lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] - /*Set mm0 to 0xFFFFFFFFFFFFFFFF.*/ - pcmpeqw mm0,mm0 - /*#0 Load low residue.*/ - movq mm1,[0*8+RESIDUE] - /*#0 Load high residue.*/ - movq mm2,[1*8+RESIDUE] - /*Set mm0 to 0x8000800080008000.*/ - psllw mm0,15 - /*#1 Load low residue.*/ - movq mm3,[2*8+RESIDUE] - /*#1 Load high residue.*/ - movq mm4,[3*8+RESIDUE] - /*Set mm0 to 0x0080008000800080.*/ - psrlw mm0,8 - /*#2 Load low residue.*/ - movq mm5,[4*8+RESIDUE] - /*#2 Load high residue.*/ - movq mm6,[5*8+RESIDUE] - /*#0 Bias low residue.*/ - paddsw mm1,mm0 - /*#0 Bias high residue.*/ - paddsw mm2,mm0 - /*#0 Pack to byte.*/ - packuswb mm1,mm2 - /*#1 Bias low residue.*/ - paddsw mm3,mm0 - /*#1 Bias high residue.*/ - paddsw mm4,mm0 - /*#1 Pack to byte.*/ - packuswb mm3,mm4 - /*#2 Bias low residue.*/ - paddsw mm5,mm0 - /*#2 Bias high residue.*/ - paddsw mm6,mm0 - /*#2 Pack to byte.*/ - packuswb mm5,mm6 - /*#0 Write row.*/ - movq [DST],mm1 - /*#1 Write row.*/ - movq [DST+YSTRIDE],mm3 - /*#2 Write row.*/ - movq [DST+YSTRIDE*2],mm5 - /*#3 Load low residue.*/ - movq mm1,[6*8+RESIDUE] - /*#3 Load high residue.*/ - movq mm2,[7*8+RESIDUE] - /*#4 Load high residue.*/ - movq mm3,[8*8+RESIDUE] - /*#4 Load high residue.*/ - movq mm4,[9*8+RESIDUE] - /*#5 Load high residue.*/ - movq mm5,[10*8+RESIDUE] - /*#5 Load high residue.*/ - movq mm6,[11*8+RESIDUE] - /*#3 Bias low residue.*/ - paddsw mm1,mm0 - /*#3 Bias high residue.*/ - paddsw mm2,mm0 - /*#3 Pack to byte.*/ - packuswb mm1,mm2 - /*#4 Bias low residue.*/ - paddsw mm3,mm0 - /*#4 Bias high residue.*/ - paddsw mm4,mm0 - /*#4 Pack to byte.*/ - packuswb mm3,mm4 - /*#5 Bias low residue.*/ - paddsw mm5,mm0 - /*#5 Bias high residue.*/ - paddsw mm6,mm0 - /*#5 Pack to byte.*/ - packuswb mm5,mm6 - /*#3 Write row.*/ - movq [DST+YSTRIDE3],mm1 - /*#4 Write row.*/ - movq [DST4],mm3 - /*#5 Write row.*/ - movq [DST4+YSTRIDE],mm5 - /*#6 Load low residue.*/ - movq mm1,[12*8+RESIDUE] - /*#6 Load high residue.*/ - movq mm2,[13*8+RESIDUE] - /*#7 Load low residue.*/ - movq mm3,[14*8+RESIDUE] - /*#7 Load high residue.*/ - movq mm4,[15*8+RESIDUE] - /*#6 Bias low residue.*/ - paddsw mm1,mm0 - /*#6 Bias high residue.*/ - paddsw mm2,mm0 - /*#6 Pack to byte.*/ - packuswb mm1,mm2 - /*#7 Bias low residue.*/ - paddsw mm3,mm0 - /*#7 Bias high residue.*/ - paddsw mm4,mm0 - /*#7 Pack to byte.*/ - packuswb mm3,mm4 - /*#6 Write row.*/ - movq [DST4+YSTRIDE*2],mm1 - /*#7 Write row.*/ - movq [DST4+YSTRIDE3],mm3 -#undef DST -#undef DST4 -#undef YSTRIDE -#undef YSTRIDE3 -#undef RESIDUE - } -} - -void oc_frag_recon_inter_mmx(unsigned char *_dst,const unsigned char *_src, - int _ystride,const ogg_int16_t *_residue){ - int i; - /*Zero mm0.*/ - __asm pxor mm0,mm0; - for(i=4;i-->0;){ - __asm{ -#define DST edx -#define SRC ecx -#define YSTRIDE edi -#define RESIDUE eax - mov DST,_dst - mov SRC,_src - mov YSTRIDE,_ystride - mov RESIDUE,_residue - /*#0 Load source.*/ - movq mm3,[SRC] - /*#1 Load source.*/ - movq mm7,[SRC+YSTRIDE] - /*#0 Get copy of src.*/ - movq mm4,mm3 - /*#0 Expand high source.*/ - punpckhbw mm4,mm0 - /*#0 Expand low source.*/ - punpcklbw mm3,mm0 - /*#0 Add residue high.*/ - paddsw mm4,[8+RESIDUE] - /*#1 Get copy of src.*/ - movq mm2,mm7 - /*#0 Add residue low.*/ - paddsw mm3,[RESIDUE] - /*#1 Expand high source.*/ - punpckhbw mm2,mm0 - /*#0 Pack final row pixels.*/ - packuswb mm3,mm4 - /*#1 Expand low source.*/ - punpcklbw mm7,mm0 - /*#1 Add residue low.*/ - paddsw mm7,[16+RESIDUE] - /*#1 Add residue high.*/ - paddsw mm2,[24+RESIDUE] - /*Advance residue.*/ - lea RESIDUE,[32+RESIDUE] - /*#1 Pack final row pixels.*/ - packuswb mm7,mm2 - /*Advance src.*/ - lea SRC,[SRC+YSTRIDE*2] - /*#0 Write row.*/ - movq [DST],mm3 - /*#1 Write row.*/ - movq [DST+YSTRIDE],mm7 - /*Advance dst.*/ - lea DST,[DST+YSTRIDE*2] - mov _residue,RESIDUE - mov _dst,DST - mov _src,SRC -#undef DST -#undef SRC -#undef YSTRIDE -#undef RESIDUE - } - } -} - -void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1, - const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue){ - int i; - /*Zero mm7.*/ - __asm pxor mm7,mm7; - for(i=4;i-->0;){ - __asm{ -#define SRC1 ecx -#define SRC2 edi -#define YSTRIDE esi -#define RESIDUE edx -#define DST eax - mov YSTRIDE,_ystride - mov DST,_dst - mov RESIDUE,_residue - mov SRC1,_src1 - mov SRC2,_src2 - /*#0 Load src1.*/ - movq mm0,[SRC1] - /*#0 Load src2.*/ - movq mm2,[SRC2] - /*#0 Copy src1.*/ - movq mm1,mm0 - /*#0 Copy src2.*/ - movq mm3,mm2 - /*#1 Load src1.*/ - movq mm4,[SRC1+YSTRIDE] - /*#0 Unpack lower src1.*/ - punpcklbw mm0,mm7 - /*#1 Load src2.*/ - movq mm5,[SRC2+YSTRIDE] - /*#0 Unpack higher src1.*/ - punpckhbw mm1,mm7 - /*#0 Unpack lower src2.*/ - punpcklbw mm2,mm7 - /*#0 Unpack higher src2.*/ - punpckhbw mm3,mm7 - /*Advance src1 ptr.*/ - lea SRC1,[SRC1+YSTRIDE*2] - /*Advance src2 ptr.*/ - lea SRC2,[SRC2+YSTRIDE*2] - /*#0 Lower src1+src2.*/ - paddsw mm0,mm2 - /*#0 Higher src1+src2.*/ - paddsw mm1,mm3 - /*#1 Copy src1.*/ - movq mm2,mm4 - /*#0 Build lo average.*/ - psraw mm0,1 - /*#1 Copy src2.*/ - movq mm3,mm5 - /*#1 Unpack lower src1.*/ - punpcklbw mm4,mm7 - /*#0 Build hi average.*/ - psraw mm1,1 - /*#1 Unpack higher src1.*/ - punpckhbw mm2,mm7 - /*#0 low+=residue.*/ - paddsw mm0,[RESIDUE] - /*#1 Unpack lower src2.*/ - punpcklbw mm5,mm7 - /*#0 high+=residue.*/ - paddsw mm1,[8+RESIDUE] - /*#1 Unpack higher src2.*/ - punpckhbw mm3,mm7 - /*#1 Lower src1+src2.*/ - paddsw mm5,mm4 - /*#0 Pack and saturate.*/ - packuswb mm0,mm1 - /*#1 Higher src1+src2.*/ - paddsw mm3,mm2 - /*#0 Write row.*/ - movq [DST],mm0 - /*#1 Build lo average.*/ - psraw mm5,1 - /*#1 Build hi average.*/ - psraw mm3,1 - /*#1 low+=residue.*/ - paddsw mm5,[16+RESIDUE] - /*#1 high+=residue.*/ - paddsw mm3,[24+RESIDUE] - /*#1 Pack and saturate.*/ - packuswb mm5,mm3 - /*#1 Write row ptr.*/ - movq [DST+YSTRIDE],mm5 - /*Advance residue ptr.*/ - add RESIDUE,32 - /*Advance dest ptr.*/ - lea DST,[DST+YSTRIDE*2] - mov _dst,DST - mov _residue,RESIDUE - mov _src1,SRC1 - mov _src2,SRC2 -#undef SRC1 -#undef SRC2 -#undef YSTRIDE -#undef RESIDUE -#undef DST - } - } -} - -void oc_restore_fpu_mmx(void){ - __asm emms; -} - -#endif |