From 3ffc8d1ea618fd835d8bf040629d2e2b85d79415 Mon Sep 17 00:00:00 2001 From: kinddragon Date: Fri, 21 May 2010 21:15:15 +0000 Subject: Removed redundant files BitBltFromI420ToYUY2Interlaced fixed with x64 build git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@1922 10f7b99b-c216-0410-bff0-8a66a9350fd8 --- src/DSUtil/a_yuv2rgb.asm | 2472 -------------------------------------------- src/DSUtil/a_yuvtable.asm | 610 ----------- src/DSUtil/convert_a.asm | 296 ------ src/DSUtil/cpuid_32_64.cpp | 0 src/DSUtil/vd.cpp | 13 +- 5 files changed, 6 insertions(+), 3385 deletions(-) delete mode 100644 src/DSUtil/a_yuv2rgb.asm delete mode 100644 src/DSUtil/a_yuvtable.asm delete mode 100644 src/DSUtil/convert_a.asm delete mode 100644 src/DSUtil/cpuid_32_64.cpp (limited to 'src/DSUtil') diff --git a/src/DSUtil/a_yuv2rgb.asm b/src/DSUtil/a_yuv2rgb.asm deleted file mode 100644 index 70db5008b..000000000 --- a/src/DSUtil/a_yuv2rgb.asm +++ /dev/null @@ -1,2472 +0,0 @@ -; VirtualDub - Video processing and capture application -; Copyright (C) 1998-2001 Avery Lee -; -; This program is free software; you can redistribute it and/or modify -; it under the terms of the GNU General Public License as published by -; the Free Software Foundation; either version 2 of the License, or -; (at your option) any later version. -; -; This program is distributed in the hope that it will be useful, -; but WITHOUT ANY WARRANTY; without even the implied warranty of -; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -; GNU General Public License for more details. -; -; You should have received a copy of the GNU General Public License -; along with this program; if not, write to the Free Software -; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - extern _YUV_Y_table: dword - extern _YUV_U_table: dword - extern _YUV_V_table: dword - extern _YUV_clip_table: byte - extern _YUV_clip_table16: byte - - segment .rdata, align=16 - - align 16 - - global _asm_YUVtoRGB_row_constants_SSE2 -_asm_YUVtoRGB_row_constants_SSE2: -SSE2_80w dq 00080008000800080h, 00080008000800080h -SSE2_Ublucoeff dq 00081008100810081h, 00081008100810081h -SSE2_Vredcoeff dq 00066006600660066h, 00066006600660066h -SSE2_Ugrncoeff dq 0FFE7FFE7FFE7FFE7h, 0FFE7FFE7FFE7FFE7h -SSE2_Vgrncoeff dq 0FFCCFFCCFFCCFFCCh, 0FFCCFFCCFFCCFFCCh -SSE2_Ylow dq 000FF00FF00FF00FFh, 000FF00FF00FF00FFh -SSE2_Ybias dq 00010001000100010h, 00010001000100010h -SSE2_Ycoeff dq 0004A004A004A004Ah, 0004A004A004A004Ah - -SSE2_Ucoeff0 dq 000810000FFE70081h, 0FFE700810000FFE7h -SSE2_Ucoeff1 dq 00000FFE700810000h, 000810000FFE70081h -SSE2_Ucoeff2 dq 0FFE700810000FFE7h, 00000FFE700810000h -SSE2_Vcoeff0 dq 000000066FFCC0000h, 0FFCC00000066FFCCh -SSE2_Vcoeff1 dq 00066FFCC00000066h, 000000066FFCC0000h -SSE2_Vcoeff2 dq 0FFCC00000066FFCCh, 00066FFCC00000066h - -%assign offs_var_begin 0 -%assign offs_rgb_pitch offs_var_begin + 0 -%assign offs_y_pitch offs_var_begin + 4 -%assign offs_uv_pitch offs_var_begin + 8 -%assign offs_width offs_var_begin + 12 -%assign offs_height offs_var_begin + 16 - -%assign offs_const_begin 32 - -%assign offs_SSE2_80w offs_const_begin + 0 -%assign offs_SSE2_Ublucoeff offs_const_begin + 16 -%assign offs_SSE2_Vredcoeff offs_const_begin + 32 -%assign offs_SSE2_Ugrncoeff offs_const_begin + 48 -%assign offs_SSE2_Vgrncoeff offs_const_begin + 64 -%assign offs_SSE2_Ylow offs_const_begin + 80 -%assign offs_SSE2_Ybias offs_const_begin + 96 -%assign offs_SSE2_Ycoeff offs_const_begin + 112 - -%assign offs_SSE2_Ucoeff0 offs_const_begin + 128 -%assign offs_SSE2_Ucoeff1 offs_const_begin + 144 -%assign offs_SSE2_Ucoeff2 offs_const_begin + 160 -%assign offs_SSE2_Vcoeff0 offs_const_begin + 176 -%assign offs_SSE2_Vcoeff1 offs_const_begin + 192 -%assign offs_SSE2_Vcoeff2 offs_const_begin + 208 - - -MMX_10w dq 00010001000100010h -MMX_80w dq 00080008000800080h -MMX_00FFw dq 000FF00FF00FF00FFh -MMX_FF00w dq 0FF00FF00FF00FF00h -MMX_Ublucoeff dq 00081008100810081h -MMX_Vredcoeff dq 00066006600660066h -MMX_Ugrncoeff dq 0FFE7FFE7FFE7FFE7h -MMX_Vgrncoeff dq 0FFCCFFCCFFCCFFCCh -MMX_Ycoeff dq 0004A004A004A004Ah -MMX_rbmask dq 07c1f7c1f7c1f7c1fh -MMX_grnmask dq 003e003e003e003e0h -MMX_grnmask2 dq 000f800f800f800f8h -MMX_clip dq 07c007c007c007c00h - -MMX_Ucoeff0 dq 000810000FFE70081h -MMX_Ucoeff1 dq 0FFE700810000FFE7h -MMX_Ucoeff2 dq 00000FFE700810000h -MMX_Vcoeff0 dq 000000066FFCC0000h -MMX_Vcoeff1 dq 0FFCC00000066FFCCh -MMX_Vcoeff2 dq 00066FFCC00000066h - - segment .text - - global _asm_YUVtoRGB32_row - global _asm_YUVtoRGB32_row_MMX - global _asm_YUVtoRGB32_row_ISSE - global _asm_YUVtoRGB32_row_SSE2 - global _asm_YUVtoRGB24_row - global _asm_YUVtoRGB24_row_MMX - global _asm_YUVtoRGB24_SSE2 - global _asm_YUVtoRGB16_row - global _asm_YUVtoRGB16_row_MMX - global _asm_YUVtoRGB16_row_ISSE - -; asm_YUVtoRGB_row( -; Pixel *ARGB1_pointer, -; Pixel *ARGB2_pointer, -; YUVPixel *Y1_pointer, -; YUVPixel *Y2_pointer, -; YUVPixel *U_pointer, -; YUVPixel *V_pointer, -; long width -; ); - -%define ARGB1_pointer [esp+ 4+16] -%define ARGB2_pointer [esp+ 8+16] -%define Y1_pointer [esp+12+16] -%define Y2_pointer [esp+16+16] -%define U_pointer [esp+20+16] -%define V_pointer [esp+24+16] -%define count [esp+28+16] -%define context_pointer [esp+32+16+8] - -_asm_YUVtoRGB32_row: - push ebx - push esi - push edi - push ebp - - mov eax,count - mov ebp,eax - mov ebx,eax - shl ebx,3 - add eax,eax - add ARGB1_pointer,ebx - add ARGB2_pointer,ebx - add Y1_pointer,eax - add Y2_pointer,eax - add U_pointer,ebp - add V_pointer,ebp - neg ebp - - mov esi,U_pointer ;[C] - mov edi,V_pointer ;[C] - xor edx,edx ;[C] - xor ecx,ecx ;[C] - jmp short col_loop_start - -col_loop: - mov ch,[_YUV_clip_table+ebx-3f00h] ;[4] edx = [0][0][red][green] - mov esi,U_pointer ;[C] - shl ecx,8 ;[4] edx = [0][red][green][0] - mov edi,V_pointer ;[C] - mov cl,[_YUV_clip_table+edx-3f00h] ;[4] edx = [0][r][g][b] !! - xor edx,edx ;[C] - mov [eax+ebp*8-4],ecx ;[4] - xor ecx,ecx ;[C] -col_loop_start: - mov cl,[esi + ebp] ;[C] eax = U - mov dl,[edi + ebp] ;[C] ebx = V - mov eax,Y1_pointer ;[1] - xor ebx,ebx ;[1] - mov esi,[_YUV_U_table + ecx*4] ;[C] eax = [b impact][u-g impact] - mov ecx,[_YUV_V_table + edx*4] ;[C] ebx = [r impact][v-g impact] - mov edi,esi ;[C] - mov bl,[eax + ebp*2] ;[1] ebx = Y1 value - shr esi,16 ;[C] eax = blue impact - add edi,ecx ;[C] edi = [junk][g impact] - mov ebx,[_YUV_Y_table + ebx*4] ;[1] ebx = Y impact - and ecx,0ffff0000h ;[C] - mov edx,ebx ;[1] edx = Y impact - add esi,ecx ;[C] eax = [r impact][b impact] - and edi,0000ffffh ;[C] - add ebx,esi ;[1] ebx = [red][blue] - mov ecx,ebx ;[1] edi = [red][blue] - and edx,0000ffffh ;[1] ecx = green - shr ebx,16 ;[1] ebx = red - and ecx,0000ffffh ;[1] edi = blue - mov dl,[_YUV_clip_table+edx+edi-3f00h] ;[1] edx = [0][0][junk][green] - mov eax,Y1_pointer ;[2] - mov dh,[_YUV_clip_table+ebx-3f00h] ;[1] edx = [0][0][red][green] - xor ebx,ebx ;[2] - shl edx,8 ;[1] edx = [0][red][green][0] - mov bl,[eax + ebp*2 + 1] ;[2] ebx = Y1 value - mov eax,ARGB1_pointer ;[1] - mov dl,[_YUV_clip_table+ecx-3f00h] ;[1] edx = [0][r][g][b] !! - mov ebx,[_YUV_Y_table + ebx*4] ;[2] ebx = Y impact - mov ecx,0000ffffh ;[2] - - and ecx,ebx ;[2] - add ebx,esi ;[2] ebx = [red][blue] - - mov [eax+ebp*8],edx ;[1] - mov edx,ebx ;[2] - - shr ebx,16 ;[2] ebx = red - mov eax,Y2_pointer ;[3] - - and edx,0000ffffh ;[2] - mov cl,[_YUV_clip_table+ecx+edi-3f00h] ;[2] edx = [0][0][junk][green] - - mov al,[eax + ebp*2] ;[3] ebx = Y1 value - mov ch,[_YUV_clip_table+ebx-3f00h] ;[2] edx = [0][0][red][green] - - shl ecx,8 ;[2] edx = [0][red][green][0] - and eax,000000ffh ;[3] - - mov cl,[_YUV_clip_table+edx-3f00h] ;[2] edx = [0][r][g][b] !! - mov edx,ARGB1_pointer ;[2] - - mov ebx,[_YUV_Y_table + eax*4] ;[3] ebx = Y impact - mov eax,0000ffffh - - and eax,ebx ;[3] edi = [red][blue] - add ebx,esi ;[3] ebx = [red][blue] - - mov [edx+ebp*8+4],ecx ;[2] - mov edx,ebx ;[3] - - shr ebx,16 ;[3] ebx = red - mov ecx,Y2_pointer ;[4] - - and edx,0000ffffh ;[3] ecx = green - mov al,[_YUV_clip_table+eax+edi-3f00h] ;[3] edx = [0][0][junk][green] - - mov cl,[ecx + ebp*2+1] ;[4] ebx = Y1 value - mov ah,[_YUV_clip_table+ebx-3f00h] ;[3] edx = [0][0][red][green] - - shl eax,8 ;[3] edx = [0][red][green][0] - and ecx,000000ffh ;[4] - - mov al,[_YUV_clip_table+edx-3f00h] ;[3] edx = [0][r][g][b] !! - mov edx,ARGB2_pointer ;[3] - - mov ebx,[_YUV_Y_table + ecx*4] ;[4] ebx = Y impact - mov ecx,0000ffffh ;[4] - - and ecx,ebx ;[4] ecx = [0][Y-impact] - add ebx,esi ;[4] ebx = [red][blue] - - mov [edx+ebp*8],eax ;[3] - mov edx,ebx ;[4] edx = [red][blue] - - shr ebx,16 ;[4] ebx = red - mov cl,[_YUV_clip_table+ecx+edi-3f00h] ;[4] edx = [0][0][junk][green] - - and edx,0000ffffh ;[4] edx = blue - mov eax,ARGB2_pointer ;[4] - - inc ebp - - jnz col_loop - - mov ch,[_YUV_clip_table+ebx-3f00h] ;[4] edx = [0][0][red][green] - shl ecx,8 ;[4] edx = [0][red][green][0] - mov cl,[_YUV_clip_table+edx-3f00h] ;[4] edx = [0][r][g][b] !! - mov [eax+ebp*8-4],ecx ;[4] - - pop ebp - pop edi - pop esi - pop ebx - ret - -;MMX_test dq 7060504030201000h - -_asm_YUVtoRGB32_row_MMX: - push ebx - push esi - push edi - push ebp - - mov eax,count - mov ebp,eax - mov ebx,eax - shl ebx,3 - add eax,eax - add ARGB1_pointer,ebx - add ARGB2_pointer,ebx - add Y1_pointer,eax - add Y2_pointer,eax - add U_pointer,ebp - add V_pointer,ebp - neg ebp - - mov esi,U_pointer - mov edi,V_pointer - mov ecx,Y1_pointer - mov edx,Y2_pointer - mov eax,ARGB1_pointer - mov ebx,ARGB2_pointer - -col_loop_MMX: - movd mm0,dword [esi+ebp] ;U (byte) - pxor mm7,mm7 - - movd mm1,dword [edi+ebp] ;V (byte) - punpcklbw mm0,mm7 ;U (word) - - psubw mm0,[MMX_80w] - punpcklbw mm1,mm7 ;V (word) - - psubw mm1,[MMX_80w] - movq mm2,mm0 - - pmullw mm2,[MMX_Ugrncoeff] - movq mm3,mm1 - - pmullw mm3,[MMX_Vgrncoeff] - pmullw mm0,[MMX_Ublucoeff] - pmullw mm1,[MMX_Vredcoeff] - paddw mm2,mm3 - - ;mm0: blue - ;mm1: red - ;mm2: green - - movq mm6,[ecx+ebp*2] ;Y - pand mm6,[MMX_00FFw] - psubw mm6,[MMX_10w] - pmullw mm6,[MMX_Ycoeff] - movq mm4,mm6 - paddw mm6,mm0 ;mm6: - movq mm5,mm4 - paddw mm4,mm1 ;mm4: - paddw mm5,mm2 ;mm5: - psraw mm6,6 - psraw mm4,6 - packuswb mm6,mm6 ;mm6: B3B2B1B0B3B2B1B0 - psraw mm5,6 - packuswb mm4,mm4 ;mm4: R3R2R1R0R3R2R1R0 - punpcklbw mm6,mm4 ;mm6: R3B3R2B2R1B1R0B0 - packuswb mm5,mm5 ;mm5: G3G2G1G0G3G2G1G0 - punpcklbw mm5,mm5 ;mm5: G3G3G2G2G1G1G0G0 - movq mm4,mm6 - punpcklbw mm6,mm5 ;mm6: G1R1G1B2G0R0G0B0 - punpckhbw mm4,mm5 ;mm4: G3R3G3B3G2R2G2B2 - - movq mm7,[ecx+ebp*2] ;Y - psrlw mm7,8 - psubw mm7,[MMX_10w] - pmullw mm7,[MMX_Ycoeff] - movq mm3,mm7 - paddw mm7,mm0 ;mm7: final blue - movq mm5,mm3 - paddw mm3,mm1 ;mm3: final red - paddw mm5,mm2 ;mm5: final green - psraw mm7,6 - psraw mm3,6 - packuswb mm7,mm7 ;mm7: B3B2B1B0B3B2B1B0 - psraw mm5,6 - packuswb mm3,mm3 ;mm3: R3R2R1R0R3R2R1R0 - punpcklbw mm7,mm3 ;mm7: R3B3R2B2R1B1R0B0 - packuswb mm5,mm5 ;mm5: G3G2G1G0G3G2G1G0 - punpcklbw mm5,mm5 ;mm5: G3G3G2G2G1G1G0G0 - movq mm3,mm7 - punpcklbw mm7,mm5 ;mm7: G1R1G1B2G0R0G0B0 - punpckhbw mm3,mm5 ;mm3: G3R3G3B3G2R2G2B2 - - ;mm3 P7:P5 - ;mm4 P6:P4 - ;mm6 P2:P0 - ;mm7 P3:P1 - - movq mm5,mm6 - punpckldq mm5,mm7 ;P1:P0 - punpckhdq mm6,mm7 ;P3:P2 - movq mm7,mm4 - punpckldq mm4,mm3 ;P5:P4 - punpckhdq mm7,mm3 ;P7:P6 - - movq [eax+ebp*8],mm5 - movq [eax+ebp*8+8],mm6 - movq [eax+ebp*8+16],mm4 - movq [eax+ebp*8+24],mm7 - - movq mm6,[edx+ebp*2] ;Y - pand mm6,[MMX_00FFw] - psubw mm6,[MMX_10w] - pmullw mm6,[MMX_Ycoeff] - movq mm4,mm6 - paddw mm6,mm0 ;mm6: - movq mm5,mm4 - paddw mm4,mm1 ;mm4: - paddw mm5,mm2 ;mm5: - psraw mm6,6 - psraw mm4,6 - packuswb mm6,mm6 ;mm6: B3B2B1B0B3B2B1B0 - psraw mm5,6 - packuswb mm4,mm4 ;mm4: R3R2R1R0R3R2R1R0 - punpcklbw mm6,mm4 ;mm6: R3B3R2B2R1B1R0B0 - packuswb mm5,mm5 ;mm5: G3G2G1G0G3G2G1G0 - punpcklbw mm5,mm5 ;mm5: G3G3G2G2G1G1G0G0 - movq mm4,mm6 - punpcklbw mm6,mm5 ;mm6: G1R1G1B2G0R0G0B0 - punpckhbw mm4,mm5 ;mm4: G3R3G3B3G2R2G2B2 - - movq mm7,[edx+ebp*2] ;Y - psrlw mm7,8 - psubw mm7,[MMX_10w] - pmullw mm7,[MMX_Ycoeff] - movq mm3,mm7 - paddw mm7,mm0 ;mm7: final blue - movq mm5,mm3 - paddw mm3,mm1 ;mm3: final red - paddw mm5,mm2 ;mm5: final green - psraw mm7,6 - psraw mm3,6 - packuswb mm7,mm7 ;mm7: B3B2B1B0B3B2B1B0 - psraw mm5,6 - packuswb mm3,mm3 ;mm3: R3R2R1R0R3R2R1R0 - punpcklbw mm7,mm3 ;mm7: R3B3R2B2R1B1R0B0 - packuswb mm5,mm5 ;mm5: G3G2G1G0G3G2G1G0 - punpcklbw mm5,mm5 ;mm5: G3G3G2G2G1G1G0G0 - movq mm3,mm7 - punpcklbw mm7,mm5 ;mm7: G1R1G1B2G0R0G0B0 - punpckhbw mm3,mm5 ;mm3: G3R3G3B3G2R2G2B2 - - ;mm3 P7:P5 - ;mm4 P6:P4 - ;mm6 P2:P0 - ;mm7 P3:P1 - - movq mm5,mm6 - punpckldq mm5,mm7 ;P1:P0 - punpckhdq mm6,mm7 ;P3:P2 - movq mm7,mm4 - punpckldq mm4,mm3 ;P5:P4 - punpckhdq mm7,mm3 ;P7:P6 - - movq [ebx+ebp*8 ],mm5 - movq [ebx+ebp*8+ 8],mm6 - - movq [ebx+ebp*8+16],mm4 - movq [ebx+ebp*8+24],mm7 - - add ebp,4 - - jnz col_loop_MMX - - pop ebp - pop edi - pop esi - pop ebx - ret - -;************************************************************************** -; -; asm_YUVtoRGB24_row( -; Pixel *ARGB1_pointer, -; Pixel *ARGB2_pointer, -; YUVPixel *Y1_pointer, -; YUVPixel *Y2_pointer, -; YUVPixel *U_pointer, -; YUVPixel *V_pointer, -; long width -; ); - -%define ARGB1_pointer [esp+ 4+16] -%define ARGB2_pointer [esp+ 8+16] -%define Y1_pointer [esp+12+16] -%define Y2_pointer [esp+16+16] -%define U_pointer [esp+20+16] -%define V_pointer [esp+24+16] -%define count [esp+28+16] - -_asm_YUVtoRGB24_row: - push ebx - push esi - push edi - push ebp - - mov eax,count - mov ebp,eax - add eax,eax - add Y1_pointer,eax - add Y2_pointer,eax - add U_pointer,ebp - add V_pointer,ebp - neg ebp - - mov esi,U_pointer ;[C] - mov edi,V_pointer ;[C] - xor edx,edx ;[C] - xor ecx,ecx ;[C] - -col_loop24: - mov esi,U_pointer - mov edi,V_pointer - xor eax,eax - xor ebx,ebx - mov al,[esi + ebp] ;eax = U - mov bl,[edi + ebp] ;ebx = V - mov eax,[_YUV_U_table + eax*4] ;eax = [b impact][u-g impact] - mov edi,[_YUV_V_table + ebx*4] ;edi = [r impact][v-g impact] - - mov ecx,eax ;[C] - mov esi,Y1_pointer ;[1] - - mov edx,edi ;[C] - xor ebx,ebx ;[1] - - shr eax,16 ;[C] eax = blue impact - mov bl,[esi + ebp*2] ;[1] ebx = Y1 value - - and edi,0ffff0000h ;[C] edi = [r impact][0] - add ecx,edx ;[C] ecx = [junk][g impact] - - add eax,edi ;[C] eax = [r impact][b impact] - mov ebx,[_YUV_Y_table + ebx*4] ;[1] ebx = Y impact - - ;eax = [r][b] - ;ecx = [g] - - mov esi,ebx ;[1] - add ebx,eax ;[1] ebx = [red][blue] - - add esi,ecx ;[1] edx = [junk][green] - mov edi,ebx ;[1] edi = [red][blue] - - shr ebx,16 ;[1] ebx = red - and esi,0000ffffh ;[1] ecx = green - - and edi,0000ffffh ;edi = blue - xor edx,edx - - mov bh,[_YUV_clip_table+ebx-3f00h] ;bh = red - mov dl,[_YUV_clip_table+esi-3f00h] ;dl = green - - mov esi,Y1_pointer ;[2] - mov bl,[_YUV_clip_table+edi-3f00h] ;bl = blue - - mov edi,ARGB1_pointer ;[1] - mov [edi+2],bh ;[1] - - mov [edi+0],bl ;[1] - xor ebx,ebx ;[2] - - mov [edi+1],dl ;[1] - - mov bl,[esi + ebp*2 + 1] ;[2] ebx = Y1 value - mov esi,ecx ;[2] - - mov ebx,[_YUV_Y_table + ebx*4] ;[2] ebx = Y impact - mov edi,0000ffffh ;[2] - - add esi,ebx ;[2] edx = [junk][green] - add ebx,eax ;[2] ebx = [red][blue] - - and edi,ebx ;[2] edi = blue - and esi,0000ffffh ;[2] ecx = green - - shr ebx,16 ;ebx = red - xor edx,edx - - mov bh,[_YUV_clip_table+ebx-3f00h] ;bh = red - mov dl,[_YUV_clip_table+esi-3f00h] ;dl = green - - mov esi,Y2_pointer ;[3] - mov bl,[_YUV_clip_table+edi-3f00h] ;bl = blue - - mov edi,ARGB1_pointer ;[2] - mov [edi+5],bh ;[2] - - mov [edi+4],dl ;[2] - mov [edi+3],bl ;[2] - - xor ebx,ebx ;[3] - - mov bl,[esi + ebp*2] ;[3] ebx = Y1 value - mov edi,ecx ;[2] - - mov ebx,[_YUV_Y_table + ebx*4] ;[3] ebx = Y impact - mov esi,0000ffffh ;[3] - - add edi,ebx ;[3] edx = [junk][green] - add ebx,eax ;[3] ebx = [red][blue] - - and esi,ebx ;[3] edi = blue - and edi,0000ffffh ;ecx = green - - shr ebx,16 ;ebx = red - xor edx,edx - - mov dl,[_YUV_clip_table+edi-3f00h] ;dl = green - mov edi,ARGB2_pointer ;[3] - - mov bh,[_YUV_clip_table+ebx-3f00h] ;bh = red - mov bl,[_YUV_clip_table+esi-3f00h] ;bl = blue - - mov esi,Y2_pointer ;[4] - mov [edi+2],bh - - mov [edi+0],bl - xor ebx,ebx ;[4] - - mov [edi+1],dl - mov bl,[esi + ebp*2 + 1] ;[4] ebx = Y1 value - - mov edi,0000ffffh ;[4] - - mov ebx,[_YUV_Y_table + ebx*4] ;[4] ebx = Y impact - xor edx,edx - - add ecx,ebx ;[4] ecx = [junk][green] - add ebx,eax ;ebx = [red][blue] - - and edi,ebx ;edi = blue - and ecx,0000ffffh ;ecx = green - - shr ebx,16 ;ebx = red - mov esi,ARGB2_pointer - - mov bl,[_YUV_clip_table+ebx-3f00h] ;bh = red - mov dl,[_YUV_clip_table+ecx-3f00h] ;dl = green - - mov al,[_YUV_clip_table+edi-3f00h] ;bl = blue - mov [esi+5],bl - - mov [esi+4],dl - mov ecx,ARGB1_pointer - - mov [esi+3],al - add esi,6 - - mov ARGB2_pointer,esi - add ecx,6 - - mov ARGB1_pointer,ecx - - inc ebp - jnz col_loop24 - - pop ebp - pop edi - pop esi - pop ebx - ret - -_asm_YUVtoRGB24_row_MMX: - push ebx - push esi - push edi - push ebp - - mov eax,count - mov ebp,eax - add eax,eax - add Y1_pointer,eax - add Y2_pointer,eax - add U_pointer,ebp - add V_pointer,ebp - neg ebp - - mov esi,U_pointer - mov edi,V_pointer - mov ecx,Y1_pointer - mov edx,Y2_pointer - mov eax,ARGB1_pointer - mov ebx,ARGB2_pointer - -col_loop_MMX24: - movd mm0,dword [esi+ebp] ;U (byte) - pxor mm7,mm7 - - movd mm1,dword [edi+ebp] ;V (byte) - punpcklbw mm0,mm7 ;U (word) - - movd mm2,dword [ecx+ebp*2] ;Y low - punpcklbw mm1,mm7 ;V (word) - - movd mm3,dword [edx+ebp*2] ;Y high - punpcklbw mm2,mm7 ;Y1 (word) - - psubw mm2,[MMX_10w] - punpcklbw mm3,mm7 ;Y2 (word) - - psubw mm3,[MMX_10w] - - psubw mm0,[MMX_80w] - psubw mm1,[MMX_80w] - - ;group 1 - - pmullw mm2,[MMX_Ycoeff] ;[lazy] - movq mm6,mm0 - pmullw mm3,[MMX_Ycoeff] ;[lazy] - movq mm7,mm1 - punpcklwd mm6,mm6 ;mm6 = U1U1U0U0 - movq mm4,mm2 ;mm4 = Y3Y2Y1Y0 [high] - punpckldq mm6,mm6 ;mm6 = U0U0U0U0 - movq mm5,mm3 ;mm3 = Y3Y2Y1Y0 [low] - punpcklwd mm7,mm7 ;mm7 = V1V1V0V0 - punpckldq mm7,mm7 ;mm7 = V0V0V0V0 - - pmullw mm6,[MMX_Ucoeff0] - punpcklwd mm4,mm4 ;mm4 = Y1Y1Y0Y0 [high] - pmullw mm7,[MMX_Vcoeff0] - punpcklwd mm5,mm5 ;mm5 = Y1Y1Y0Y0 [low] - - punpcklwd mm4,mm2 ;mm4 = Y1Y0Y0Y0 - punpcklwd mm5,mm3 ;mm5 = Y1Y0Y0Y0 - - paddw mm4,mm6 - paddw mm5,mm6 - paddw mm4,mm7 - paddw mm5,mm7 - - psraw mm4,6 - psraw mm5,6 - - packuswb mm4,mm4 - packuswb mm5,mm5 - - ;group 2 - - movd dword [eax],mm4 ;[lazy write] - movq mm4,mm0 - movd dword [ebx],mm5 ;[lazy write] - movq mm5,mm1 - - punpcklwd mm4,mm4 ;mm6 = U1U1U0U0 - movq mm6,mm2 ;mm4 = Y3Y2Y1Y0 [high] - punpcklwd mm5,mm5 ;mm6 = V1V1V0V0 - movq mm7,mm3 ;mm3 = Y3Y2Y1Y0 [low] - - pmullw mm4,[MMX_Ucoeff1] - psrlq mm6,16 ;mm4 = 00Y3Y2Y1 [high] - pmullw mm5,[MMX_Vcoeff1] - psrlq mm7,16 ;mm4 = 00Y3Y2Y1 [low] - - punpcklwd mm6,mm6 ;mm4 = Y2Y2Y1Y1 [high] - punpcklwd mm7,mm7 ;mm5 = Y2Y2Y1Y1 [high] - - paddw mm6,mm4 - paddw mm7,mm4 - paddw mm6,mm5 - paddw mm7,mm5 - - psraw mm6,6 - psraw mm7,6 - - packuswb mm6,mm6 - packuswb mm7,mm7 - - ;group 3 - - movd dword [eax+4],mm6 ;[lazy write] - movq mm6,mm0 - movd dword [ebx+4],mm7 ;[lazy write] - movq mm7,mm1 - - movq mm4,mm2 ;mm4 = Y3Y2Y1Y0 [high] - punpcklwd mm6,mm6 ;mm6 = U1U1U0U0 - movq mm5,mm3 ;mm3 = Y3Y2Y1Y0 [low] - punpckhdq mm6,mm6 ;mm6 = U1U1U1U1 - punpcklwd mm7,mm7 ;mm7 = V1V1V0V0 - punpckhdq mm7,mm7 ;mm7 = V1V1V1V1 - - pmullw mm6,[MMX_Ucoeff2] - punpckhwd mm2,mm2 ;mm2 = Y3Y3Y2Y2 [high] - pmullw mm7,[MMX_Vcoeff2] - punpckhwd mm3,mm3 ;mm3 = Y3Y3Y2Y2 [low] - - punpckhdq mm4,mm2 ;mm4 = Y3Y3Y3Y2 [high] - punpckhdq mm5,mm3 ;mm5 = Y3Y3Y3Y2 [low] - - paddw mm4,mm6 - paddw mm5,mm6 - paddw mm4,mm7 - paddw mm5,mm7 - - psraw mm4,6 - psraw mm5,6 - - ;next 3 groups - - movd mm2,dword [ecx+ebp*2+4] ;Y low - packuswb mm4,mm4 ;[lazy] - - movd mm3,dword [edx+ebp*2+4] ;Y high - packuswb mm5,mm5 ;[lazy] - - movd dword [eax+8],mm4 ;[lazy write] - pxor mm7,mm7 - - movd dword [ebx+8],mm5 ;[lazy write] - punpcklbw mm2,mm7 ;U (word) - - - psubw mm2,[MMX_10w] - punpcklbw mm3,mm7 ;V (word) - - psubw mm3,[MMX_10w] - - - ;group 1 - - pmullw mm2,[MMX_Ycoeff] ;[init] - movq mm6,mm0 - - pmullw mm3,[MMX_Ycoeff] ;[init] - punpckhwd mm6,mm6 ;mm6 = U3U3U2U2 - - movq mm7,mm1 - punpckldq mm6,mm6 ;mm6 = U2U2U2U2 - movq mm4,mm2 ;mm4 = Y3Y2Y1Y0 [high] - punpckhwd mm7,mm7 ;mm7 = V3V3V2V2 - movq mm5,mm3 ;mm3 = Y3Y2Y1Y0 [low] - punpckldq mm7,mm7 ;mm7 = V2V2V2V2 - - pmullw mm6,[MMX_Ucoeff0] - punpcklwd mm4,mm4 ;mm4 = Y1Y1Y0Y0 [high] - pmullw mm7,[MMX_Vcoeff0] - punpcklwd mm5,mm5 ;mm5 = Y1Y1Y0Y0 [low] - - punpcklwd mm4,mm2 ;mm4 = Y1Y0Y0Y0 - punpcklwd mm5,mm3 ;mm5 = Y1Y0Y0Y0 - - paddw mm4,mm6 - paddw mm5,mm6 - paddw mm4,mm7 - paddw mm5,mm7 - - psraw mm4,6 - psraw mm5,6 - - packuswb mm4,mm4 - packuswb mm5,mm5 - - ;group 2 - - movd dword [eax+12],mm4 - movq mm6,mm0 - movd dword [ebx+12],mm5 - movq mm7,mm1 - - punpckhwd mm6,mm6 ;mm6 = U3U3U2U2 - movq mm4,mm2 ;mm4 = Y3Y2Y1Y0 [high] - punpckhwd mm7,mm7 ;mm6 = V3V3V2V2 - movq mm5,mm3 ;mm3 = Y3Y2Y1Y0 [low] - - pmullw mm6,[MMX_Ucoeff1] - psrlq mm4,16 ;mm4 = 00Y3Y2Y1 [high] - pmullw mm7,[MMX_Vcoeff1] - psrlq mm5,16 ;mm4 = 00Y3Y2Y1 [low] - - punpcklwd mm4,mm4 ;mm4 = Y2Y2Y1Y1 [high] - punpcklwd mm5,mm5 ;mm5 = Y2Y2Y1Y1 [high] - - paddw mm4,mm6 - paddw mm5,mm6 - paddw mm4,mm7 - paddw mm5,mm7 - - psraw mm4,6 - psraw mm5,6 - - packuswb mm4,mm4 - packuswb mm5,mm5 - - ;group 3 - - movq mm6,mm2 ;mm4 = Y3Y2Y1Y0 [high] - punpckhwd mm0,mm0 ;mm6 = U3U3U2U2 - - movq mm7,mm3 ;mm3 = Y3Y2Y1Y0 [low] - punpckhdq mm0,mm0 ;mm6 = U3U3U3U3 - - movd dword [eax+16],mm4 ;[lazy write] - punpckhwd mm1,mm1 ;mm7 = V3V3V2V2 - - movd dword [ebx+16],mm5 ;[lazy write] - punpckhdq mm1,mm1 ;mm7 = V3V3V3V3 - - pmullw mm0,[MMX_Ucoeff2] - punpckhwd mm2,mm2 ;mm2 = Y3Y3Y2Y2 [high] - pmullw mm1,[MMX_Vcoeff2] - punpckhwd mm3,mm3 ;mm3 = Y3Y3Y2Y2 [low] - - punpckhdq mm6,mm2 ;mm4 = Y3Y3Y3Y2 [high] - punpckhdq mm7,mm3 ;mm5 = Y3Y3Y3Y2 [low] - - paddw mm6,mm0 - paddw mm7,mm0 - paddw mm6,mm1 - paddw mm7,mm1 - - psraw mm6,6 - psraw mm7,6 - - packuswb mm6,mm6 - packuswb mm7,mm7 - - movd dword [eax+20],mm6 - add eax,24 - movd dword [ebx+20],mm7 - add ebx,24 - - ;done - - add ebp,4 - jnz col_loop_MMX24 - - pop ebp - pop edi - pop esi - pop ebx - ret - -;************************************************************************** - -_asm_YUVtoRGB16_row: - push ebx - push esi - push edi - push ebp - - mov eax,count - mov ebp,eax - mov ebx,eax - shl ebx,2 - add ARGB1_pointer,ebx - add ARGB2_pointer,ebx - add eax,eax - add Y1_pointer,eax - add Y2_pointer,eax - add U_pointer,ebp - add V_pointer,ebp - neg ebp - - mov esi,U_pointer ;[C] - mov edi,V_pointer ;[C] - xor edx,edx ;[C] - xor ecx,ecx ;[C] - -col_loop16: - mov esi,U_pointer - mov edi,V_pointer - xor eax,eax - xor ebx,ebx - mov al,[esi + ebp] ;eax = U - mov bl,[edi + ebp] ;ebx = V - mov eax,[_YUV_U_table + eax*4] ;eax = [b impact][u-g impact] - mov edi,[_YUV_V_table + ebx*4] ;edi = [r impact][v-g impact] - - mov ecx,eax ;[C] - mov esi,Y1_pointer ;[1] - - mov edx,edi ;[C] - xor ebx,ebx ;[1] - - shr eax,16 ;[C] eax = blue impact - mov bl,[esi + ebp*2] ;[1] ebx = Y1 value - - and edi,0ffff0000h ;[C] edi = [r impact][0] - add ecx,edx ;[C] ecx = [junk][g impact] - - add eax,edi ;[C] eax = [r impact][b impact] - mov ebx,[_YUV_Y_table + ebx*4] ;[1] ebx = Y impact - - ;eax = [r][b] - ;ecx = [g] - - mov esi,ebx ;[1] - add ebx,eax ;[1] ebx = [red][blue] - - add esi,ecx ;[1] edx = [junk][green] - mov edi,ebx ;[1] edi = [red][blue] - - shr ebx,16 ;[1] ebx = red - and esi,0000ffffh ;[1] ecx = green - - and edi,0000ffffh ;edi = blue - xor edx,edx - - mov bh,[_YUV_clip_table16+ebx-3f00h] ;bh = red - mov dl,[_YUV_clip_table16+esi-3f00h] ;dl = green - - mov bl,[_YUV_clip_table16+edi-3f00h] ;bl = blue - xor dh,dh ;[1] - - shl bh,2 ;[1] - mov edi,ARGB1_pointer ;[1] - - shl edx,5 ;[1] - mov esi,Y1_pointer ;[2] - - add edx,ebx ;[1] - xor ebx,ebx ;[2] - - mov [edi+ebp*4+0],dl ;[1] - mov bl,[esi + ebp*2 + 1] ;[2] ebx = Y1 value - - mov [edi+ebp*4+1],dh ;[1] - mov esi,ecx ;[2] - - mov ebx,[_YUV_Y_table + ebx*4] ;[2] ebx = Y impact - mov edi,0000ffffh ;[2] - - add esi,ebx ;[2] edx = [junk][green] - add ebx,eax ;[2] ebx = [red][blue] - - and edi,ebx ;[2] edi = blue - and esi,0000ffffh ;[2] ecx = green - - shr ebx,16 ;ebx = red - xor edx,edx - - mov bh,[_YUV_clip_table16+ebx-3f00h] ;bh = red - - mov dl,[_YUV_clip_table16+esi-3f00h] ;dl = green - mov bl,[_YUV_clip_table16+edi-3f00h] ;bl = blue - - shl edx,5 ;[2] - mov edi,ARGB1_pointer ;[2] - - shl bh,2 ;[2] - mov esi,Y2_pointer ;[3] - - add edx,ebx ;[2] - xor ebx,ebx ;[3] - - mov [edi+ebp*4+2],dl ;[2] - mov bl,[esi + ebp*2] ;[3] ebx = Y1 value - - mov [edi+ebp*4+3],dh ;[2] - mov edi,ecx ;[2] - - mov ebx,[_YUV_Y_table + ebx*4] ;[3] ebx = Y impact - mov esi,0000ffffh ;[3] - - add edi,ebx ;[3] edx = [junk][green] - add ebx,eax ;[3] ebx = [red][blue] - - and esi,ebx ;[3] edi = blue - and edi,0000ffffh ;ecx = green - - shr ebx,16 ;ebx = red - xor edx,edx - - mov dl,[_YUV_clip_table16+edi-3f00h] ;dl = green - mov edi,ARGB2_pointer ;[3] - - shl edx,5 - mov bh,[_YUV_clip_table16+ebx-3f00h] ;bh = red - - mov bl,[_YUV_clip_table16+esi-3f00h] ;bl = blue - mov esi,Y2_pointer ;[4] - - shl bh,2 ;[3] - nop - - add edx,ebx ;[3] - xor ebx,ebx ;[4] - - mov [edi+ebp*4+0],dl ;[3] - mov bl,[esi + ebp*2 + 1] ;[4] ebx = Y1 value - - mov [edi+ebp*4+1],dh ;[3] - mov edi,0000ffffh ;[4] - - mov ebx,[_YUV_Y_table + ebx*4] ;[4] ebx = Y impact - xor edx,edx - - add ecx,ebx ;[4] ecx = [junk][green] - add ebx,eax ;ebx = [red][blue] - - and edi,ebx ;edi = blue - and ecx,0000ffffh ;ecx = green - - shr ebx,16 ;ebx = red - mov esi,ARGB2_pointer - - mov dl,[_YUV_clip_table16+ecx-3f00h] ;dl = green - mov al,[_YUV_clip_table16+edi-3f00h] ;bl = blue - - shl edx,5 - mov ah,[_YUV_clip_table16+ebx-3f00h] ;bh = red - - shl ah,2 - - add eax,edx - - mov [esi+ebp*4+2],al - mov [esi+ebp*4+3],ah - - inc ebp - jnz col_loop16 - - pop ebp - pop edi - pop esi - pop ebx - ret - - - -_asm_YUVtoRGB16_row_MMX: - push ebx - push esi - push edi - push ebp - - mov eax,count - mov ebp,eax - mov ebx,eax - shl ebx,2 - add eax,eax - add ARGB1_pointer,ebx - add ARGB2_pointer,ebx - add Y1_pointer,eax - add Y2_pointer,eax - add U_pointer,ebp - add V_pointer,ebp - neg ebp - - mov esi,U_pointer - mov edi,V_pointer - mov ecx,Y1_pointer - mov edx,Y2_pointer - mov eax,ARGB1_pointer - mov ebx,ARGB2_pointer - -col_loop_MMX16: - movd mm0,dword [esi+ebp] ;[0 ] U (byte) - pxor mm7,mm7 ;[0 7] - - movd mm1,dword [edi+ebp] ;[01 7] V (byte) - punpcklbw mm0,mm7 ;[01 7] U (word) - - psubw mm0,[MMX_80w] ;[01 7] - punpcklbw mm1,mm7 ;[01 7] V (word) - - psubw mm1,[MMX_80w] ;[01 ] - movq mm2,mm0 ;[012 ] - - pmullw mm2,[MMX_Ugrncoeff] ;[012 ] - movq mm3,mm1 ;[0123 ] - - ;mm0: blue - ;mm1: red - ;mm2: green - - movq mm6,[ecx+ebp*2] ;[0123 6 ] [1] Y - ;<--> - - pmullw mm3,[MMX_Vgrncoeff] ;[0123 ] - movq mm7,mm6 ;[012 67] [2] Y - - pmullw mm0,[MMX_Ublucoeff] ;[0123 ] - psrlw mm7,8 ;[012 67] [2] - - pmullw mm1,[MMX_Vredcoeff] ;[0123 ] - ;<--> - - pand mm6,[MMX_00FFw] ;[012 67] [1] - paddw mm2,mm3 ;[012 6 ] [C] - - psubw mm6,[MMX_10w] ;[012 67] [1] - - pmullw mm6,[MMX_Ycoeff] ;[012 67] [1] - - psubw mm7,[MMX_10w] ;[012 67] [2] - movq mm4,mm6 ;[012 4 67] [1] - - pmullw mm7,[MMX_Ycoeff] ;[012 67] [2] - movq mm5,mm6 ;[012 4567] [1] - - paddw mm6,mm0 ;[012 4 67] [1] mm6: - paddw mm4,mm1 ;[012 4567] [1] mm4: - - paddw mm5,mm2 ;[012 4567] [1] mm5: - psraw mm4,6 ;[012 4567] [1] - - movq mm3,mm7 ;[01234567] [2] - psraw mm5,4 ;[01234567] [1] - - paddw mm7,mm0 ;[01234567] [2] mm6: - psraw mm6,6 ;[01234567] [1] - - paddsw mm5,[MMX_clip] - packuswb mm6,mm6 ;[01234567] [1] mm6: B3B2B1B0B3B2B1B0 - - psubusw mm5,[MMX_clip] - packuswb mm4,mm4 ;[01234567] [1] mm4: R3R2R1R0R3R2R1R0 - - pand mm5,[MMX_grnmask] ;[01234567] [1] mm7: - psrlq mm6,2 ;[01234567] [1] - - punpcklbw mm6,mm4 ;[0123 567] [1] mm4: R3B3R2B2R1B1R0B0 - - movq mm4,[edx+ebp*2] ;[01234567] [3] Y - psrlw mm6,1 ;[01234567] [1] - - pand mm6,[MMX_rbmask] ;[01234567] [1] mm6: - - por mm6,mm5 ;[01234 67] [1] mm6: P6P4P2P0 - movq mm5,mm3 ;[01234567] [2] - - paddw mm3,mm1 ;[01234567] [2] mm4: - paddw mm5,mm2 ;[01234567] [2] mm5: - - pand mm4,[MMX_00FFw] ;[01234567] [3] - psraw mm3,6 ;[01234567] [2] - - psubw mm4,[MMX_10w] ;[01234567] [3] - psraw mm5,4 ;[01234567] [2] - - pmullw mm4,[MMX_Ycoeff] ;[01234567] [3] - psraw mm7,6 ;[01234567] [2] - - paddsw mm5,[MMX_clip] - packuswb mm3,mm3 ;[01234567] [2] mm4: R3R2R1R0R3R2R1R0 - - psubusw mm5,[MMX_clip] - packuswb mm7,mm7 ;[01234567] [2] mm6: B3B2B1B0B3B2B1B0 - - pand mm5,[MMX_grnmask] ;[012 4567] [2] mm7: - psrlq mm7,2 ;[01234567] [2] - - punpcklbw mm7,mm3 ;[012 4567] [2] mm6: R3B3R2B2R1B1R0B0 - - movq mm3,[edx+ebp*2] ;[01234567] [4] Y - psrlw mm7,1 ;[01234567] [2] - - pand mm7,[MMX_rbmask] ;[01234567] [2] mm6: - psrlw mm3,8 ;[01234567] [4] - - por mm7,mm5 ;[01234567] [2] mm7: P7P5P3P1 - movq mm5,mm6 ;[01234567] [A] - - psubw mm3,[MMX_10w] ;[01234567] [4] - punpcklwd mm6,mm7 ;[01234567] [A] mm4: P3P2P1P0 - - pmullw mm3,[MMX_Ycoeff] ;[0123456 ] [4] - punpckhwd mm5,mm7 ;[0123456 ] [A} mm5: P7P6P5P4 - - movq [eax+ebp*4 ],mm6 ;[012345 ] [A] - movq mm6,mm4 ;[0123456 ] [3] - - movq [eax+ebp*4+ 8],mm5 ;[0123456 ] [A] - paddw mm6,mm0 ;[01234 6 ] [3] mm6: - - movq mm5,mm4 ;[0123456 ] [3] - paddw mm4,mm1 ;[0123456 ] [3] mm4: - - paddw mm5,mm2 ;[0123456 ] [3] mm5: - psraw mm4,6 ;[0123456 ] [3] - - movq mm7,mm3 ;[01234567] [4] - psraw mm5,4 ;[01234567] [3] - - paddw mm7,mm0 ;[01234567] [4] mm6: - psraw mm6,6 ;[01234567] [3] - - movq mm0,mm3 ;[01234567] [4] - packuswb mm4,mm4 ;[01234567] [3] mm4: R3R2R1R0R3R2R1R0 - - - packuswb mm6,mm6 ;[01 34567] [3] mm6: B3B2B1B0B3B2B1B0 - paddw mm3,mm1 ;[01234567] [4] mm4: - - psrlq mm6,2 - paddw mm0,mm2 ;[01 34567] [4] mm5: - - paddsw mm5,[MMX_clip] - punpcklbw mm6,mm4 ;[01 3 567] [3] mm6: B3B3B2B2B1B1B0B0 - - psubusw mm5,[MMX_clip] - psrlw mm6,1 ;[01 3 567] [3] - - pand mm6,[MMX_rbmask] ;[01 3 567] [3] mm6: - psraw mm3,6 ;[01 3 567] [4] - - pand mm5,[MMX_grnmask] ;[01 3 567] [3] mm7: - psraw mm0,4 ;[01 3 567] [4] - - por mm6,mm5 ;[01 3 67] [3] mm4: P6P4P2P0 - psraw mm7,6 ;[01 3 67] [4] - - paddsw mm0,[MMX_clip] - packuswb mm3,mm3 ;[01 3 67] [4] mm4: R3R2R1R0R3R2R1R0 - - psubusw mm0,[MMX_clip] - packuswb mm7,mm7 ;[01 3 67] mm6: B3B2B1B0B3B2B1B0 - - pand mm0,[MMX_grnmask] ;[01 67] mm7: - psrlq mm7,2 - - punpcklbw mm7,mm3 ;[01 67] mm6: R3B3R2B2R1B1R0B0 - movq mm1,mm6 - - psrlw mm7,1 - add ebp,4 - - pand mm7,[MMX_rbmask] ;[01 67] mm6: - - por mm0,mm7 ;[01 67] mm0: P7P5P3P1 - - punpcklwd mm6,mm0 ;[01 6 ] mm4: P3P2P1P0 - - punpckhwd mm1,mm0 ;[ 1 6 ] mm5: P7P6P5P4 - movq [ebx+ebp*4-16],mm6 - - movq [ebx+ebp*4- 8],mm1 - jnz col_loop_MMX16 - - pop ebp - pop edi - pop esi - pop ebx - ret - -;-------------------------------------------------------------------------- - -_asm_YUVtoRGB32_row_ISSE: - push ebx - push esi - push edi - push ebp - - mov eax,count - mov ebp,eax - mov ebx,eax - shl ebx,3 - add eax,eax - add ARGB1_pointer,ebx - add ARGB2_pointer,ebx - add Y1_pointer,eax - add Y2_pointer,eax - add U_pointer,ebp - add V_pointer,ebp - neg ebp - - mov esi,U_pointer - mov edi,V_pointer - mov ecx,Y1_pointer - mov edx,Y2_pointer - mov eax,ARGB1_pointer - mov ebx,ARGB2_pointer - -col_loop_SSE: - prefetchnta [esi+ebp+32] - prefetchnta [edi+ebp+32] - prefetchnta [ecx+ebp*2+32] - prefetchnta [edx+ebp*2+32] - - movd mm0,dword [esi+ebp] ;U (byte) - pxor mm7,mm7 - - movd mm1,dword [edi+ebp] ;V (byte) - punpcklbw mm0,mm7 ;U (word) - - psubw mm0,[MMX_80w] - punpcklbw mm1,mm7 ;V (word) - - psubw mm1,[MMX_80w] - movq mm2,mm0 - - pmullw mm2,[MMX_Ugrncoeff] - movq mm3,mm1 - - pmullw mm3,[MMX_Vgrncoeff] - pmullw mm0,[MMX_Ublucoeff] - pmullw mm1,[MMX_Vredcoeff] - paddw mm2,mm3 - - ;mm0: blue - ;mm1: red - ;mm2: green - - movq mm6,[ecx+ebp*2] ;Y - pand mm6,[MMX_00FFw] - psubw mm6,[MMX_10w] - pmullw mm6,[MMX_Ycoeff] - movq mm4,mm6 - paddw mm6,mm0 ;mm6: - movq mm5,mm4 - paddw mm4,mm1 ;mm4: - paddw mm5,mm2 ;mm5: - psraw mm6,6 - psraw mm4,6 - packuswb mm6,mm6 ;mm6: B3B2B1B0B3B2B1B0 - psraw mm5,6 - packuswb mm4,mm4 ;mm4: R3R2R1R0R3R2R1R0 - punpcklbw mm6,mm4 ;mm6: R3B3R2B2R1B1R0B0 - packuswb mm5,mm5 ;mm5: G3G2G1G0G3G2G1G0 - punpcklbw mm5,mm5 ;mm5: G3G3G2G2G1G1G0G0 - movq mm4,mm6 - punpcklbw mm6,mm5 ;mm6: G1R1G1B2G0R0G0B0 - punpckhbw mm4,mm5 ;mm4: G3R3G3B3G2R2G2B2 - - movq mm7,[ecx+ebp*2] ;Y - psrlw mm7,8 - psubw mm7,[MMX_10w] - pmullw mm7,[MMX_Ycoeff] - movq mm3,mm7 - paddw mm7,mm0 ;mm7: final blue - movq mm5,mm3 - paddw mm3,mm1 ;mm3: final red - paddw mm5,mm2 ;mm5: final green - psraw mm7,6 - psraw mm3,6 - packuswb mm7,mm7 ;mm7: B3B2B1B0B3B2B1B0 - psraw mm5,6 - packuswb mm3,mm3 ;mm3: R3R2R1R0R3R2R1R0 - punpcklbw mm7,mm3 ;mm7: R3B3R2B2R1B1R0B0 - packuswb mm5,mm5 ;mm5: G3G2G1G0G3G2G1G0 - punpcklbw mm5,mm5 ;mm5: G3G3G2G2G1G1G0G0 - movq mm3,mm7 - punpcklbw mm7,mm5 ;mm7: G1R1G1B2G0R0G0B0 - punpckhbw mm3,mm5 ;mm3: G3R3G3B3G2R2G2B2 - - ;mm3 P7:P5 - ;mm4 P6:P4 - ;mm6 P2:P0 - ;mm7 P3:P1 - - movq mm5,mm6 - punpckldq mm5,mm7 ;P1:P0 - punpckhdq mm6,mm7 ;P3:P2 - movq mm7,mm4 - punpckldq mm4,mm3 ;P5:P4 - punpckhdq mm7,mm3 ;P7:P6 - - movntq [eax+ebp*8],mm5 - movntq [eax+ebp*8+8],mm6 - movntq [eax+ebp*8+16],mm4 - movntq [eax+ebp*8+24],mm7 - - movq mm6,[edx+ebp*2] ;Y - pand mm6,[MMX_00FFw] - psubw mm6,[MMX_10w] - pmullw mm6,[MMX_Ycoeff] - movq mm4,mm6 - paddw mm6,mm0 ;mm6: - movq mm5,mm4 - paddw mm4,mm1 ;mm4: - paddw mm5,mm2 ;mm5: - psraw mm6,6 - psraw mm4,6 - packuswb mm6,mm6 ;mm6: B3B2B1B0B3B2B1B0 - psraw mm5,6 - packuswb mm4,mm4 ;mm4: R3R2R1R0R3R2R1R0 - punpcklbw mm6,mm4 ;mm6: R3B3R2B2R1B1R0B0 - packuswb mm5,mm5 ;mm5: G3G2G1G0G3G2G1G0 - punpcklbw mm5,mm5 ;mm5: G3G3G2G2G1G1G0G0 - movq mm4,mm6 - punpcklbw mm6,mm5 ;mm6: G1R1G1B2G0R0G0B0 - punpckhbw mm4,mm5 ;mm4: G3R3G3B3G2R2G2B2 - - movq mm7,[edx+ebp*2] ;Y - psrlw mm7,8 - psubw mm7,[MMX_10w] - pmullw mm7,[MMX_Ycoeff] - movq mm3,mm7 - paddw mm7,mm0 ;mm7: final blue - movq mm5,mm3 - paddw mm3,mm1 ;mm3: final red - paddw mm5,mm2 ;mm5: final green - psraw mm7,6 - psraw mm3,6 - packuswb mm7,mm7 ;mm7: B3B2B1B0B3B2B1B0 - psraw mm5,6 - packuswb mm3,mm3 ;mm3: R3R2R1R0R3R2R1R0 - punpcklbw mm7,mm3 ;mm7: R3B3R2B2R1B1R0B0 - packuswb mm5,mm5 ;mm5: G3G2G1G0G3G2G1G0 - punpcklbw mm5,mm5 ;mm5: G3G3G2G2G1G1G0G0 - movq mm3,mm7 - punpcklbw mm7,mm5 ;mm7: G1R1G1B2G0R0G0B0 - punpckhbw mm3,mm5 ;mm3: G3R3G3B3G2R2G2B2 - - ;mm3 P7:P5 - ;mm4 P6:P4 - ;mm6 P2:P0 - ;mm7 P3:P1 - - movq mm5,mm6 - punpckldq mm5,mm7 ;P1:P0 - punpckhdq mm6,mm7 ;P3:P2 - movq mm7,mm4 - punpckldq mm4,mm3 ;P5:P4 - punpckhdq mm7,mm3 ;P7:P6 - - movntq [ebx+ebp*8 ],mm5 - movntq [ebx+ebp*8+ 8],mm6 - - movntq [ebx+ebp*8+16],mm4 - movntq [ebx+ebp*8+24],mm7 - - add ebp,4 - - jnz col_loop_SSE - - pop ebp - pop edi - pop esi - pop ebx - ret - - global _asm_YUVtoRGB24_row_ISSE -_asm_YUVtoRGB24_row_ISSE: - ;.FPO (7, 9, 0, 0, 0, 0) - push ebx - push esi - push edi - push ebp - - mov eax,count - mov ebp,eax - add eax,eax - add Y1_pointer,eax - add Y2_pointer,eax - add U_pointer,ebp - add V_pointer,ebp - neg ebp - - mov esi,U_pointer - mov edi,V_pointer - mov ecx,Y1_pointer - mov edx,Y2_pointer - mov eax,ARGB1_pointer - mov ebx,ARGB2_pointer - - movd mm0,esp - sub esp,20 - and esp,-8 - movd dword [esp+16],mm0 - -col_loop_ISSE24: - prefetchnta [esi+ebp+32] - prefetchnta [edi+ebp+32] - prefetchnta [ecx+ebp*2+32] - prefetchnta [edx+ebp*2+32] - - movd mm0,dword [esi+ebp] ;U (byte) - pxor mm7,mm7 - - movd mm1,dword [edi+ebp] ;V (byte) - punpcklbw mm0,mm7 ;U (word) - - movd mm2,dword [ecx+ebp*2] ;Y low - punpcklbw mm1,mm7 ;V (word) - - movd mm3,dword [edx+ebp*2] ;Y high - punpcklbw mm2,mm7 ;Y1 (word) - - psubw mm2,[MMX_10w] - punpcklbw mm3,mm7 ;Y2 (word) - - psubw mm3,[MMX_10w] - - psubw mm0,[MMX_80w] - psubw mm1,[MMX_80w] - - movq [esp+0],mm0 - movq [esp+8],mm1 - - ;group 1 - - pmullw mm2,[MMX_Ycoeff] ;[lazy] - pmullw mm3,[MMX_Ycoeff] ;[lazy] - - pshufw mm6,mm0,00000000b ;mm6 = U0U0U0U0 - pshufw mm7,mm1,00000000b ;mm7 = V0V0V0V0 - - pmullw mm6,[MMX_Ucoeff0] - pshufw mm4,mm2,01000000b ;mm4 = Y1Y0Y0Y0 [high] - pmullw mm7,[MMX_Vcoeff0] - pshufw mm5,mm3,01000000b ;mm4 = Y1Y0Y0Y0 [low] - - paddw mm4,mm6 - paddw mm5,mm6 - paddw mm4,mm7 - paddw mm5,mm7 - - psraw mm4,6 - psraw mm5,6 - - ;group 2 - - pshufw mm6,[esp+0],01010000b ;mm6 = U1U1U0U0 - pshufw mm7,[esp+8],01010000b ;mm7 = V1V1V0V0 - - pmullw mm6,[MMX_Ucoeff1] - pshufw mm0,mm2,10100101b ;mm0 = Y2Y2Y1Y1 [high] - pmullw mm7,[MMX_Vcoeff1] - pshufw mm1,mm3,10100101b ;mm1 = Y2Y2Y1Y1 [low] - - paddw mm0,mm6 - paddw mm1,mm6 - paddw mm0,mm7 - paddw mm1,mm7 - - psraw mm0,6 - psraw mm1,6 - - packuswb mm4,mm0 - packuswb mm5,mm1 - - ;group 3 - - pshufw mm6,[esp+0],01010101b ;mm6 = U1U1U1U1 - pshufw mm7,[esp+8],01010101b ;mm7 = V1V1V1V1 - - movntq [eax],mm4 ;[lazy write] - movntq [ebx],mm5 ;[lazy write] - - pmullw mm6,[MMX_Ucoeff2] - pshufw mm4,mm2,11111110b ;mm4 = Y3Y3Y3Y2 [high] - pmullw mm7,[MMX_Vcoeff2] - pshufw mm5,mm3,11111110b ;mm5 = Y3Y3Y3Y2 [low] - - paddw mm4,mm6 - paddw mm5,mm6 - paddw mm4,mm7 - paddw mm5,mm7 - - psraw mm4,6 - psraw mm5,6 - - ;next 3 groups - - movd mm2,dword [ecx+ebp*2+4] ;Y low - pxor mm7,mm7 - - movd mm3,dword [edx+ebp*2+4] ;Y high - punpcklbw mm2,mm7 ;U (word) - - psubw mm2,[MMX_10w] - punpcklbw mm3,mm7 ;V (word) - - psubw mm3,[MMX_10w] - - - ;group 1 - - pmullw mm2,[MMX_Ycoeff] ;[init] - pmullw mm3,[MMX_Ycoeff] ;[init] - - pshufw mm6,[esp+0],10101010b ;mm6 = U2U2U2U2 - pshufw mm7,[esp+8],10101010b ;mm7 = V2V2V2V2 - - pmullw mm6,[MMX_Ucoeff0] - pshufw mm0,mm2,01000000b ;mm0 = Y1Y0Y0Y0 [high] - pmullw mm7,[MMX_Vcoeff0] - pshufw mm1,mm3,01000000b ;mm1 = Y1Y0Y0Y0 [low] - - paddw mm0,mm6 - paddw mm1,mm6 - paddw mm0,mm7 - paddw mm1,mm7 - - psraw mm0,6 - psraw mm1,6 - - packuswb mm4,mm0 - packuswb mm5,mm1 - - ;group 2 - - pshufw mm6,[esp+0],11111010b ;mm6 = U3U3U2U2 - pshufw mm7,[esp+8],11111010b ;mm7 = V3V3V2V2 - - movntq [eax+8],mm4 - movntq [ebx+8],mm5 - - pmullw mm6,[MMX_Ucoeff1] - pshufw mm4,mm2,10100101b ;mm4 = Y2Y2Y1Y1 [high] - pmullw mm7,[MMX_Vcoeff1] - pshufw mm5,mm3,10100101b ;mm5 = Y2Y2Y1Y1 [low] - - paddw mm4,mm6 - paddw mm5,mm6 - paddw mm4,mm7 - paddw mm5,mm7 - - psraw mm4,6 - psraw mm5,6 - - ;group 3 - - pshufw mm0,[esp+0],11111111b ;mm6 = U3U3U3U3 - pshufw mm1,[esp+8],11111111b ;mm7 = V3V3V3V3 - - pmullw mm0,[MMX_Ucoeff2] - pshufw mm2,mm2,11111110b ;mm6 = Y3Y3Y3Y2 [high] - pmullw mm1,[MMX_Vcoeff2] - pshufw mm3,mm3,11111110b ;mm7 = Y3Y3Y3Y2 [low] - - paddw mm2,mm0 - paddw mm3,mm0 - paddw mm2,mm1 - paddw mm3,mm1 - - psraw mm2,6 - psraw mm3,6 - - packuswb mm4,mm2 - packuswb mm5,mm3 - - movntq [eax+16],mm4 - add eax,24 - movntq [ebx+16],mm5 - add ebx,24 - - ;done - - add ebp,4 - jnz col_loop_ISSE24 - - mov esp,[esp+16] - - pop ebp - pop edi - pop esi - pop ebx - ret - -_asm_YUVtoRGB16_row_ISSE: - push ebx - push esi - push edi - push ebp - - mov eax,count - mov ebp,eax - mov ebx,eax - shl ebx,2 - add eax,eax - add ARGB1_pointer,ebx - add ARGB2_pointer,ebx - add Y1_pointer,eax - add Y2_pointer,eax - add U_pointer,ebp - add V_pointer,ebp - neg ebp - - mov esi,U_pointer - mov edi,V_pointer - mov ecx,Y1_pointer - mov edx,Y2_pointer - mov eax,ARGB1_pointer - mov ebx,ARGB2_pointer - -col_loop_ISSE16: - prefetchnta [esi+ebp+32] - prefetchnta [edi+ebp+32] - - movd mm0,dword [esi+ebp] ;[0 ] U (byte) - pxor mm7,mm7 ;[0 7] - - movd mm1,dword [edi+ebp] ;[01 7] V (byte) - punpcklbw mm0,mm7 ;[01 7] U (word) - - psubw mm0,[MMX_80w] ;[01 7] - punpcklbw mm1,mm7 ;[01 7] V (word) - - psubw mm1,[MMX_80w] ;[01 ] - movq mm2,mm0 ;[012 ] - - pmullw mm2,[MMX_Ugrncoeff] ;[012 ] - movq mm3,mm1 ;[0123 ] - - ;mm0: blue - ;mm1: red - ;mm2: green - - prefetchnta [ecx+ebp*2+32] - prefetchnta [edx+ebp*2+32] - - movq mm6,[ecx+ebp*2] ;[0123 6 ] [1] Y - ;<--> - - pmullw mm3,[MMX_Vgrncoeff] ;[0123 ] - movq mm7,mm6 ;[012 67] [2] Y - - pmullw mm0,[MMX_Ublucoeff] ;[0123 ] - psrlw mm7,8 ;[012 67] [2] - - pmullw mm1,[MMX_Vredcoeff] ;[0123 ] - ;<--> - - pand mm6,[MMX_00FFw] ;[012 67] [1] - paddw mm2,mm3 ;[012 6 ] [C] - - psubw mm6,[MMX_10w] ;[012 67] [1] - - pmullw mm6,[MMX_Ycoeff] ;[012 67] [1] - - psubw mm7,[MMX_10w] ;[012 67] [2] - movq mm4,mm6 ;[012 4 67] [1] - - pmullw mm7,[MMX_Ycoeff] ;[012 67] [2] - movq mm5,mm6 ;[012 4567] [1] - - paddw mm6,mm0 ;[012 4 67] [1] mm6: - paddw mm4,mm1 ;[012 4567] [1] mm4: - - paddw mm5,mm2 ;[012 4567] [1] mm5: - psraw mm4,6 ;[012 4567] [1] - - movq mm3,mm7 ;[01234567] [2] - psraw mm5,4 ;[01234567] [1] - - paddw mm7,mm0 ;[01234567] [2] mm6: - psraw mm6,6 ;[01234567] [1] - - paddsw mm5,[MMX_clip] - packuswb mm6,mm6 ;[01234567] [1] mm6: B3B2B1B0B3B2B1B0 - - psubusw mm5,[MMX_clip] - packuswb mm4,mm4 ;[01234567] [1] mm4: R3R2R1R0R3R2R1R0 - - pand mm5,[MMX_grnmask] ;[01234567] [1] mm7: - psrlq mm6,2 ;[01234567] [1] - - punpcklbw mm6,mm4 ;[0123 567] [1] mm4: R3B3R2B2R1B1R0B0 - - movq mm4,[edx+ebp*2] ;[01234567] [3] Y - psrlw mm6,1 ;[01234567] [1] - - pand mm6,[MMX_rbmask] ;[01234567] [1] mm6: - - por mm6,mm5 ;[01234 67] [1] mm6: P6P4P2P0 - movq mm5,mm3 ;[01234567] [2] - - paddw mm3,mm1 ;[01234567] [2] mm4: - paddw mm5,mm2 ;[01234567] [2] mm5: - - pand mm4,[MMX_00FFw] ;[01234567] [3] - psraw mm3,6 ;[01234567] [2] - - psubw mm4,[MMX_10w] ;[01234567] [3] - psraw mm5,4 ;[01234567] [2] - - pmullw mm4,[MMX_Ycoeff] ;[01234567] [3] - psraw mm7,6 ;[01234567] [2] - - paddsw mm5,[MMX_clip] - packuswb mm3,mm3 ;[01234567] [2] mm4: R3R2R1R0R3R2R1R0 - - psubusw mm5,[MMX_clip] - packuswb mm7,mm7 ;[01234567] [2] mm6: B3B2B1B0B3B2B1B0 - - pand mm5,[MMX_grnmask] ;[012 4567] [2] mm7: - psrlq mm7,2 ;[01234567] [2] - - punpcklbw mm7,mm3 ;[012 4567] [2] mm6: R3B3R2B2R1B1R0B0 - - movq mm3,[edx+ebp*2] ;[01234567] [4] Y - psrlw mm7,1 ;[01234567] [2] - - pand mm7,[MMX_rbmask] ;[01234567] [2] mm6: - psrlw mm3,8 ;[01234567] [4] - - por mm7,mm5 ;[01234567] [2] mm7: P7P5P3P1 - movq mm5,mm6 ;[01234567] [A] - - psubw mm3,[MMX_10w] ;[01234567] [4] - punpcklwd mm6,mm7 ;[01234567] [A] mm4: P3P2P1P0 - - pmullw mm3,[MMX_Ycoeff] ;[0123456 ] [4] - punpckhwd mm5,mm7 ;[0123456 ] [A} mm5: P7P6P5P4 - - movntq [eax+ebp*4 ],mm6 ;[012345 ] [A] - movq mm6,mm4 ;[0123456 ] [3] - - movntq [eax+ebp*4+ 8],mm5 ;[0123456 ] [A] - paddw mm6,mm0 ;[01234 6 ] [3] mm6: - - movq mm5,mm4 ;[0123456 ] [3] - paddw mm4,mm1 ;[0123456 ] [3] mm4: - - paddw mm5,mm2 ;[0123456 ] [3] mm5: - psraw mm4,6 ;[0123456 ] [3] - - movq mm7,mm3 ;[01234567] [4] - psraw mm5,4 ;[01234567] [3] - - paddw mm7,mm0 ;[01234567] [4] mm6: - psraw mm6,6 ;[01234567] [3] - - movq mm0,mm3 ;[01234567] [4] - packuswb mm4,mm4 ;[01234567] [3] mm4: R3R2R1R0R3R2R1R0 - - - packuswb mm6,mm6 ;[01 34567] [3] mm6: B3B2B1B0B3B2B1B0 - paddw mm3,mm1 ;[01234567] [4] mm4: - - psrlq mm6,2 - paddw mm0,mm2 ;[01 34567] [4] mm5: - - paddsw mm5,[MMX_clip] - punpcklbw mm6,mm4 ;[01 3 567] [3] mm6: B3B3B2B2B1B1B0B0 - - psubusw mm5,[MMX_clip] - psrlw mm6,1 ;[01 3 567] [3] - - pand mm6,[MMX_rbmask] ;[01 3 567] [3] mm6: - psraw mm3,6 ;[01 3 567] [4] - - pand mm5,[MMX_grnmask] ;[01 3 567] [3] mm7: - psraw mm0,4 ;[01 3 567] [4] - - por mm6,mm5 ;[01 3 67] [3] mm4: P6P4P2P0 - psraw mm7,6 ;[01 3 67] [4] - - paddsw mm0,[MMX_clip] - packuswb mm3,mm3 ;[01 3 67] [4] mm4: R3R2R1R0R3R2R1R0 - - psubusw mm0,[MMX_clip] - packuswb mm7,mm7 ;[01 3 67] mm6: B3B2B1B0B3B2B1B0 - - pand mm0,[MMX_grnmask] ;[01 67] mm7: - psrlq mm7,2 - - punpcklbw mm7,mm3 ;[01 67] mm6: R3B3R2B2R1B1R0B0 - movq mm1,mm6 - - psrlw mm7,1 - add ebp,4 - - pand mm7,[MMX_rbmask] ;[01 67] mm6: - - por mm0,mm7 ;[01 67] mm0: P7P5P3P1 - - punpcklwd mm6,mm0 ;[01 6 ] mm4: P3P2P1P0 - - punpckhwd mm1,mm0 ;[ 1 6 ] mm5: P7P6P5P4 - movntq [ebx+ebp*4-16],mm6 - - movntq [ebx+ebp*4- 8],mm1 - jnz col_loop_ISSE16 - - pop ebp - pop edi - pop esi - pop ebx - ret - -;========================================================================== -; -; SSE2 (Pentium 4) implementation -; -;========================================================================== - -_asm_YUVtoRGB32_row_SSE2: - push ebx - push esi - push edi - push ebp - - mov eax,count - mov ebp,eax - mov ebx,eax - shl ebx,3 - add eax,eax - add ARGB1_pointer,ebx - add ARGB2_pointer,ebx - add Y1_pointer,eax - add Y2_pointer,eax - add U_pointer,ebp - add V_pointer,ebp - neg ebp - - mov esi,U_pointer - mov edi,V_pointer - mov ecx,Y1_pointer - mov edx,Y2_pointer - mov eax,ARGB1_pointer - mov ebx,ARGB2_pointer - -col_loop_SSE2: - prefetchnta [esi+ebp+32] - prefetchnta [edi+ebp+32] - prefetchnta [ecx+ebp*2+32] - prefetchnta [edx+ebp*2+32] - - movq xmm0,qword [esi+ebp];xmm0 = U7|U6|U5|U4|U3|U2|U1|U0 - pxor xmm7,xmm7 - - movq xmm1,qword [edi+ebp];xmm1 = V7|V6|V5|V4|V3|V2|V1|V0 - - punpcklbw xmm0,xmm7 - punpcklbw xmm1,xmm7 - - psubw xmm0, [SSE2_80w] ;xmm0 = U3|U2|U1|U0 - psubw xmm1, [SSE2_80w] ;xmm1 = V3|V2|V1|V0 - - movdqa xmm2,xmm0 - pmullw xmm0, [SSE2_Ugrncoeff] - pmullw xmm2, [SSE2_Ublucoeff] - - movdqa xmm3,xmm1 - pmullw xmm1, [SSE2_Vredcoeff] - pmullw xmm3, [SSE2_Vgrncoeff] - - paddw xmm0,xmm1 ;xmm0 = cG7|cG6|cG5|cG4|cG3|cG2|cG1|cG0 - - movdqu xmm3,[ecx+ebp*2] ;xmm4 = YF|YE|YD|YC|YB|YA|Y9|Y8|Y7|Y6|Y5|Y4|Y3|Y2|Y1|Y0 - movq xmm4,xmm4 ;xmm5 = YF|YE|YD|YC|YB|YA|Y9|Y8|Y7|Y6|Y5|Y4|Y3|Y2|Y1|Y0 - pand xmm3, [SSE2_Ylow] ;xmm4 = YE|YC|YA|Y8|Y6|Y4|Y2|Y0 - psrlw xmm4,8 ;xmm5 = YF|YD|YB|Y9|Y7|Y5|Y3|Y1 - - psubw xmm3, [SSE2_Ybias] - pmullw xmm3, [SSE2_Ycoeff] - psubw xmm4, [SSE2_Ybias] - pmullw xmm4, [SSE2_Ycoeff] - - ;register layout at this point: - ;xmm0: chroma green - ;xmm1: chroma red - ;xmm2: chroma blue - ;xmm3: Y low - ;xmm4: Y high - - movdqa xmm5,xmm4 - movdqa xmm6,xmm4 - paddw xmm4,xmm0 ;xmm4 = green high - paddw xmm5,xmm1 ;xmm5 = red high - paddw xmm6,xmm2 ;xmm6 = blue high - paddw xmm0,xmm3 ;xmm0 = green low - paddw xmm1,xmm3 ;xmm1 = red low - paddw xmm2,xmm3 ;xmm2 = blue low - - psraw xmm0,6 - psraw xmm1,6 - psraw xmm2,6 - psraw xmm4,6 - psraw xmm5,6 - psraw xmm6,6 - - packuswb xmm0,xmm0 - packuswb xmm1,xmm1 - packuswb xmm2,xmm2 - packuswb xmm4,xmm4 - packuswb xmm5,xmm5 - packuswb xmm6,xmm6 - - punpcklbw xmm0,xmm0 ;xmm3 = GE|GE|GC|GC|GA|GA|G8|G8|G6|G6|G4|G4|G2|G2|G0|G0 - punpcklbw xmm4,xmm4 ;xmm4 = GF|GF|GD|GD|GB|GB|G9|G9|G7|G7|G5|G5|G3|G3|G1|G1 - punpcklbw xmm2,xmm1 ;xmm2 = RE|BE|RC|BC|RA|BA|R8|B8|R6|B6|R4|B4|R2|B2|R0|B0 - punpcklbw xmm6,xmm5 ;xmm6 = RF|BF|RD|BD|RB|BB|R9|B9|R7|B7|R5|B5|R3|B3|B1|B1 - - movdqa xmm1,xmm2 - movdqa xmm5,xmm6 - - punpcklbw xmm1,xmm0 ;xmm1 = p6|p4|p2|p0 - punpckhbw xmm2,xmm0 ;xmm2 = pE|pC|pA|p8 - punpcklbw xmm5,xmm4 ;xmm5 = p7|p5|p3|p1 - punpckhbw xmm6,xmm4 ;xmm6 = pF|pD|pB|p9 - - movdqa xmm0,xmm1 - punpckldq xmm0,xmm5 ;xmm0 = p3|p2|p1|p0 - punpckhdq xmm1,xmm5 ;xmm1 = p7|p6|p5|p4 - movdqa xmm3,xmm2 - punpckldq xmm2,xmm6 ;xmm2 = pB|pA|p9|p8 - punpckhdq xmm3,xmm6 ;xmm3 = pF|pE|pD|pC - - movdqu [ebx+ebp*8 ],xmm0 - movdqu [ebx+ebp*8+ 8],xmm1 - - movdqu [ebx+ebp*8+16],xmm2 - movdqu [ebx+ebp*8+24],xmm3 - - add ebp,4 - - jnz col_loop_SSE2 - - pop ebp - pop edi - pop esi - pop ebx - ret - -_asm_YUVtoRGB24_SSE2: - push ebx - push esi - push edi - push ebp - - mov eax,count - mov ebp,eax - add eax,eax - mov esi,U_pointer - mov edi,V_pointer - add esi,ebp - add edi,ebp - mov ecx,Y1_pointer - mov edx,Y2_pointer - add ecx,eax - add edx,eax - mov eax,ARGB1_pointer - mov ebx,ARGB2_pointer - neg ebp - - ;store esp in the SEH chain and set esp=constant_struct - push 0 - push dword [fs:0] - mov dword [fs:0],esp - mov esp, context_pointer - - ;---- we have no stack at this point! - - mov [esp+offs_width], ebp - -row_loop_SSE2_24: - mov ebp, [esp+offs_width] - -col_loop_SSE2_24: - prefetchnta [esi+ebp+128] - prefetchnta [edi+ebp+128] - prefetchnta [ecx+ebp*2+128] - prefetchnta [edx+ebp*2+128] - - ;U1|U1|U0|U0|U0|U0|U0|U0 - ;U2|U2|U2|U2|U1|U1|U1|U1 - ;U3|U3|U3|U3|U3|U3|U2|U2 - - movd xmm0,dword [esi+ebp];xmm0 = U3|U2|U1|U0 - pxor xmm7,xmm7 - punpcklbw xmm0,xmm7 ;xmm0 = U3|U2|U1|U0 - psubw xmm0,[esp+offs_SSE2_80w] - punpcklwd xmm0,xmm0 ;xmm0 = U3|U3|U2|U2|U1|U1|U0|U0 - pshufd xmm2,xmm0,11111110b ;xmm2 = U3|U3|U3|U3|U3|U3|U2|U2 - pshufd xmm1,xmm0,10100101b ;xmm1 = U2|U2|U2|U2|U1|U1|U1|U1 - pshufd xmm0,xmm0,01000000b ;xmm0 = U1|U1|U0|U0|U0|U0|U0|U0 - - pmullw xmm0,[esp+offs_SSE2_Ucoeff0] - pmullw xmm1,[esp+offs_SSE2_Ucoeff1] - pmullw xmm2,[esp+offs_SSE2_Ucoeff2] - - movd xmm3,dword [edi+ebp];xmm3 = V3|V2|V1|V0 - punpcklbw xmm3,xmm7 ;xmm3 = V3|V2|V1|V0 - psubw xmm3,[esp+offs_SSE2_80w] - punpcklwd xmm3,xmm3 - pshufd xmm5,xmm3,11111110b ;xmm5 = V7|V6|V7|V6|V7|V6|V5|V4 - pshufd xmm4,xmm3,10100101b ;xmm4 = V5|V4|V5|V4|V3|V2|V3|V2 - pshufd xmm3,xmm3,01000000b ;xmm3 = V3|V2|V1|V0|V1|V0|V1|V0 - - pmullw xmm3,[esp+offs_SSE2_Vcoeff0] - pmullw xmm4,[esp+offs_SSE2_Vcoeff1] - pmullw xmm5,[esp+offs_SSE2_Vcoeff2] - - paddw xmm0,xmm3 - paddw xmm1,xmm4 - paddw xmm2,xmm5 - - movq xmm3,qword [ecx+ebp*2];xmm3 = Y7 | Y6 | Y5 | Y4 | Y3 | Y2 | Y1 | Y0 - punpcklbw xmm3,xmm7 - psubw xmm3,[esp+offs_SSE2_Ybias] - pmullw xmm3,[esp+offs_SSE2_Ycoeff] - pshufd xmm5,xmm3,11111110b ;xmm5 = Y7|Y6|Y7|Y6|Y7|Y6|Y5|Y4 - pshufd xmm4,xmm3,10100101b ;xmm4 = Y5|Y4|Y5|Y4|Y3|Y2|Y3|Y2 - pshufd xmm3,xmm3,01000000b ;xmm3 = Y3|Y2|Y1|Y0|Y1|Y0|Y1|Y0 - pshufhw xmm5,xmm5,11111110b ;xmm5 = Y7|Y7|Y7|Y6|Y7|Y6|Y5|Y4 - pshuflw xmm5,xmm5,10100101b ;xmm5 = Y7|Y7|Y7|Y6|Y6|Y6|Y5|Y5 - pshufhw xmm4,xmm4,01000000b ;xmm4 = Y5|Y4|Y4|Y4|Y3|Y2|Y3|Y2 - pshuflw xmm4,xmm4,11111110b ;xmm4 = Y5|Y4|Y4|Y4|Y3|Y3|Y3|Y2 - pshufhw xmm3,xmm3,10100101b ;xmm3 = Y2|Y2|Y1|Y1|Y1|Y0|Y1|Y0 - pshuflw xmm3,xmm3,01000000b ;xmm3 = Y2|Y2|Y1|Y1|Y1|Y0|Y0|Y0 - - paddw xmm3,xmm0 - paddw xmm4,xmm1 - paddw xmm5,xmm2 - - psraw xmm3,6 - psraw xmm4,6 - psraw xmm5,6 - - packuswb xmm3,xmm3 - packuswb xmm4,xmm4 - packuswb xmm5,xmm5 - - movdq2q mm0,xmm3 - movdq2q mm1,xmm4 - movdq2q mm2,xmm5 - - movq xmm3,qword [edx+ebp*2] ;xmm3 = Y7 | Y6 | Y5 | Y4 | Y3 | Y2 | Y1 | Y0 - punpcklbw xmm3,xmm7 - psubw xmm3,[esp+offs_SSE2_Ybias] - pmullw xmm3,[esp+offs_SSE2_Ycoeff] - pshufd xmm5,xmm3,11111110b ;xmm5 = Y7|Y6|Y7|Y6|Y7|Y6|Y5|Y4 - pshufd xmm4,xmm3,10100101b ;xmm4 = Y5|Y4|Y5|Y4|Y3|Y2|Y3|Y2 - pshufd xmm3,xmm3,01000000b ;xmm3 = Y3|Y2|Y1|Y0|Y1|Y0|Y1|Y0 - pshufhw xmm5,xmm5,11111110b ;xmm5 = Y7|Y7|Y7|Y6|Y7|Y6|Y5|Y4 - pshuflw xmm5,xmm5,10100101b ;xmm5 = Y7|Y7|Y7|Y6|Y6|Y6|Y5|Y5 - pshufhw xmm4,xmm4,01000000b ;xmm4 = Y5|Y4|Y4|Y4|Y3|Y2|Y3|Y2 - pshuflw xmm4,xmm4,11111110b ;xmm4 = Y5|Y4|Y4|Y4|Y3|Y3|Y3|Y2 - pshufhw xmm3,xmm3,10100101b ;xmm3 = Y2|Y2|Y1|Y1|Y1|Y0|Y1|Y0 - pshuflw xmm3,xmm3,01000000b ;xmm3 = Y2|Y2|Y1|Y1|Y1|Y0|Y0|Y0 - - paddw xmm3,xmm0 - paddw xmm4,xmm1 - paddw xmm5,xmm2 - - psraw xmm3,6 - psraw xmm4,6 - psraw xmm5,6 - - packuswb xmm3,xmm3 - packuswb xmm4,xmm4 - packuswb xmm5,xmm5 - - movdq2q mm3,xmm3 - movdq2q mm4,xmm4 - movdq2q mm5,xmm5 - - movntq [eax],mm0 - movntq [eax+8],mm1 - movntq [eax+16],mm2 - movntq [ebx],mm3 - movntq [ebx+8],mm4 - movntq [ebx+16],mm5 - add eax,24 - add ebx,24 - - ;done - - add ebp,4 - jnz col_loop_SSE2_24 - - mov ebp, [esp+offs_rgb_pitch] - add eax, ebp - add ebx, ebp - mov ebp, [esp+offs_y_pitch] - add ecx, ebp - add edx, ebp - mov ebp, [esp+offs_uv_pitch] - add esi, ebp - add edi, ebp - - dec dword [esp+offs_height] - jnz row_loop_SSE2_24 - - ;restore esp from SEH chain - mov esp, dword [fs:0] - pop dword [fs:0] - pop eax - - pop ebp - pop edi - pop esi - pop ebx - ret - -_asm_YUVtoRGB16_row_SSE2: - push ebx - push esi - push edi - push ebp - - mov eax,count - mov ebp,eax - mov ebx,eax - shl ebx,2 - add eax,eax - add ARGB1_pointer,ebx - add ARGB2_pointer,ebx - add Y1_pointer,eax - add Y2_pointer,eax - add U_pointer,ebp - add V_pointer,ebp - neg ebp - - mov esi,U_pointer - mov edi,V_pointer - mov ecx,Y1_pointer - mov edx,Y2_pointer - mov eax,ARGB1_pointer - mov ebx,ARGB2_pointer - -col_loop_SSE2_16: - prefetchnta [esi+ebp+32] - prefetchnta [edi+ebp+32] - - movd mm0,dword [esi+ebp] ;[0 ] U (byte) - pxor mm7,mm7 ;[0 7] - - movd mm1,dword [edi+ebp] ;[01 7] V (byte) - punpcklbw mm0,mm7 ;[01 7] U (word) - - psubw mm0,[MMX_80w] ;[01 7] - punpcklbw mm1,mm7 ;[01 7] V (word) - - psubw mm1,[MMX_80w] ;[01 ] - movq mm2,mm0 ;[012 ] - - pmullw mm2,[MMX_Ugrncoeff] ;[012 ] - movq mm3,mm1 ;[0123 ] - - ;mm0: blue - ;mm1: red - ;mm2: green - - prefetchnta [ecx+ebp*2+32] - prefetchnta [edx+ebp*2+32] - - movq mm6,[ecx+ebp*2] ;[0123 6 ] [1] Y - ;<--> - - pmullw mm3,[MMX_Vgrncoeff] ;[0123 ] - movq mm7,mm6 ;[012 67] [2] Y - - pmullw mm0,[MMX_Ublucoeff] ;[0123 ] - psrlw mm7,8 ;[012 67] [2] - - pmullw mm1,[MMX_Vredcoeff] ;[0123 ] - ;<--> - - pand mm6,[MMX_00FFw] ;[012 67] [1] - paddw mm2,mm3 ;[012 6 ] [C] - - psubw mm6,[MMX_10w] ;[012 67] [1] - - pmullw mm6,[MMX_Ycoeff] ;[012 67] [1] - - psubw mm7,[MMX_10w] ;[012 67] [2] - movq mm4,mm6 ;[012 4 67] [1] - - pmullw mm7,[MMX_Ycoeff] ;[012 67] [2] - movq mm5,mm6 ;[012 4567] [1] - - paddw mm6,mm0 ;[012 4 67] [1] mm6: - paddw mm4,mm1 ;[012 4567] [1] mm4: - - paddw mm5,mm2 ;[012 4567] [1] mm5: - psraw mm4,6 ;[012 4567] [1] - - movq mm3,mm7 ;[01234567] [2] - psraw mm5,4 ;[01234567] [1] - - paddw mm7,mm0 ;[01234567] [2] mm6: - psraw mm6,6 ;[01234567] [1] - - paddsw mm5,[MMX_clip] - packuswb mm6,mm6 ;[01234567] [1] mm6: B3B2B1B0B3B2B1B0 - - psubusw mm5,[MMX_clip] - packuswb mm4,mm4 ;[01234567] [1] mm4: R3R2R1R0R3R2R1R0 - - pand mm5,[MMX_grnmask] ;[01234567] [1] mm7: - psrlq mm6,2 ;[01234567] [1] - - punpcklbw mm6,mm4 ;[0123 567] [1] mm4: R3B3R2B2R1B1R0B0 - - movq mm4,[edx+ebp*2] ;[01234567] [3] Y - psrlw mm6,1 ;[01234567] [1] - - pand mm6,[MMX_rbmask] ;[01234567] [1] mm6: - - por mm6,mm5 ;[01234 67] [1] mm6: P6P4P2P0 - movq mm5,mm3 ;[01234567] [2] - - paddw mm3,mm1 ;[01234567] [2] mm4: - paddw mm5,mm2 ;[01234567] [2] mm5: - - pand mm4,[MMX_00FFw] ;[01234567] [3] - psraw mm3,6 ;[01234567] [2] - - psubw mm4,[MMX_10w] ;[01234567] [3] - psraw mm5,4 ;[01234567] [2] - - pmullw mm4,[MMX_Ycoeff] ;[01234567] [3] - psraw mm7,6 ;[01234567] [2] - - paddsw mm5,[MMX_clip] - packuswb mm3,mm3 ;[01234567] [2] mm4: R3R2R1R0R3R2R1R0 - - psubusw mm5,[MMX_clip] - packuswb mm7,mm7 ;[01234567] [2] mm6: B3B2B1B0B3B2B1B0 - - pand mm5,[MMX_grnmask] ;[012 4567] [2] mm7: - psrlq mm7,2 ;[01234567] [2] - - punpcklbw mm7,mm3 ;[012 4567] [2] mm6: R3B3R2B2R1B1R0B0 - - movq mm3,[edx+ebp*2] ;[01234567] [4] Y - psrlw mm7,1 ;[01234567] [2] - - pand mm7,[MMX_rbmask] ;[01234567] [2] mm6: - psrlw mm3,8 ;[01234567] [4] - - por mm7,mm5 ;[01234567] [2] mm7: P7P5P3P1 - movq mm5,mm6 ;[01234567] [A] - - psubw mm3,[MMX_10w] ;[01234567] [4] - punpcklwd mm6,mm7 ;[01234567] [A] mm4: P3P2P1P0 - - pmullw mm3,[MMX_Ycoeff] ;[0123456 ] [4] - punpckhwd mm5,mm7 ;[0123456 ] [A} mm5: P7P6P5P4 - - movntq [eax+ebp*4 ],mm6 ;[012345 ] [A] - movq mm6,mm4 ;[0123456 ] [3] - - movntq [eax+ebp*4+ 8],mm5 ;[0123456 ] [A] - paddw mm6,mm0 ;[01234 6 ] [3] mm6: - - movq mm5,mm4 ;[0123456 ] [3] - paddw mm4,mm1 ;[0123456 ] [3] mm4: - - paddw mm5,mm2 ;[0123456 ] [3] mm5: - psraw mm4,6 ;[0123456 ] [3] - - movq mm7,mm3 ;[01234567] [4] - psraw mm5,4 ;[01234567] [3] - - paddw mm7,mm0 ;[01234567] [4] mm6: - psraw mm6,6 ;[01234567] [3] - - movq mm0,mm3 ;[01234567] [4] - packuswb mm4,mm4 ;[01234567] [3] mm4: R3R2R1R0R3R2R1R0 - - - packuswb mm6,mm6 ;[01 34567] [3] mm6: B3B2B1B0B3B2B1B0 - paddw mm3,mm1 ;[01234567] [4] mm4: - - psrlq mm6,2 - paddw mm0,mm2 ;[01 34567] [4] mm5: - - paddsw mm5,[MMX_clip] - punpcklbw mm6,mm4 ;[01 3 567] [3] mm6: B3B3B2B2B1B1B0B0 - - psubusw mm5,[MMX_clip] - psrlw mm6,1 ;[01 3 567] [3] - - pand mm6,[MMX_rbmask] ;[01 3 567] [3] mm6: - psraw mm3,6 ;[01 3 567] [4] - - pand mm5,[MMX_grnmask] ;[01 3 567] [3] mm7: - psraw mm0,4 ;[01 3 567] [4] - - por mm6,mm5 ;[01 3 67] [3] mm4: P6P4P2P0 - psraw mm7,6 ;[01 3 67] [4] - - paddsw mm0,[MMX_clip] - packuswb mm3,mm3 ;[01 3 67] [4] mm4: R3R2R1R0R3R2R1R0 - - psubusw mm0,[MMX_clip] - packuswb mm7,mm7 ;[01 3 67] mm6: B3B2B1B0B3B2B1B0 - - pand mm0,[MMX_grnmask] ;[01 67] mm7: - psrlq mm7,2 - - punpcklbw mm7,mm3 ;[01 67] mm6: R3B3R2B2R1B1R0B0 - movq mm1,mm6 - - psrlw mm7,1 - add ebp,4 - - pand mm7,[MMX_rbmask] ;[01 67] mm6: - - por mm0,mm7 ;[01 67] mm0: P7P5P3P1 - - punpcklwd mm6,mm0 ;[01 6 ] mm4: P3P2P1P0 - - punpckhwd mm1,mm0 ;[ 1 6 ] mm5: P7P6P5P4 - movntq [ebx+ebp*4-16],mm6 - - movntq [ebx+ebp*4- 8],mm1 - jnz col_loop_SSE2_16 - - pop ebp - pop edi - pop esi - pop ebx - ret - - end diff --git a/src/DSUtil/a_yuvtable.asm b/src/DSUtil/a_yuvtable.asm deleted file mode 100644 index ae1f58017..000000000 --- a/src/DSUtil/a_yuvtable.asm +++ /dev/null @@ -1,610 +0,0 @@ -; VirtualDub - Video processing and capture application -; Copyright (C) 1998-2001 Avery Lee -; -; This program is free software; you can redistribute it and/or modify -; it under the terms of the GNU General Public License as published by -; the Free Software Foundation; either version 2 of the License, or -; (at your option) any later version. -; -; This program is distributed in the hope that it will be useful, -; but WITHOUT ANY WARRANTY; without even the implied warranty of -; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -; GNU General Public License for more details. -; -; You should have received a copy of the GNU General Public License -; along with this program; if not, write to the Free Software -; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - segment .rdata, align=16 - - global _YUV_Y_table - global _YUV_U_table - global _YUV_V_table - global _YUV_clip_table - global _YUV_clip_table16 - -_YUV_Y_table dd 1FED1FEDh, 1FEF1FEFh, 1FF01FF0h, 1FF11FF1h - dd 1FF21FF2h, 1FF31FF3h, 1FF41FF4h, 1FF61FF6h - dd 1FF71FF7h, 1FF81FF8h, 1FF91FF9h, 1FFA1FFAh - dd 1FFB1FFBh, 1FFD1FFDh, 1FFE1FFEh, 1FFF1FFFh - dd 20002000h, 20012001h, 20022002h, 20032003h - dd 20052005h, 20062006h, 20072007h, 20082008h - dd 20092009h, 200A200Ah, 200C200Ch, 200D200Dh - dd 200E200Eh, 200F200Fh, 20102010h, 20112011h - dd 20132013h, 20142014h, 20152015h, 20162016h - dd 20172017h, 20182018h, 201A201Ah, 201B201Bh - dd 201C201Ch, 201D201Dh, 201E201Eh, 201F201Fh - dd 20212021h, 20222022h, 20232023h, 20242024h - dd 20252025h, 20262026h, 20282028h, 20292029h - dd 202A202Ah, 202B202Bh, 202C202Ch, 202D202Dh - dd 202F202Fh, 20302030h, 20312031h, 20322032h - dd 20332033h, 20342034h, 20362036h, 20372037h - dd 20382038h, 20392039h, 203A203Ah, 203B203Bh - dd 203D203Dh, 203E203Eh, 203F203Fh, 20402040h - dd 20412041h, 20422042h, 20442044h, 20452045h - dd 20462046h, 20472047h, 20482048h, 20492049h - dd 204A204Ah, 204C204Ch, 204D204Dh, 204E204Eh - dd 204F204Fh, 20502050h, 20512051h, 20532053h - dd 20542054h, 20552055h, 20562056h, 20572057h - dd 20582058h, 205A205Ah, 205B205Bh, 205C205Ch - dd 205D205Dh, 205E205Eh, 205F205Fh, 20612061h - dd 20622062h, 20632063h, 20642064h, 20652065h - dd 20662066h, 20682068h, 20692069h, 206A206Ah - dd 206B206Bh, 206C206Ch, 206D206Dh, 206F206Fh - dd 20702070h, 20712071h, 20722072h, 20732073h - dd 20742074h, 20762076h, 20772077h, 20782078h - dd 20792079h, 207A207Ah, 207B207Bh, 207D207Dh - dd 207E207Eh, 207F207Fh, 20802080h, 20812081h - dd 20822082h, 20842084h, 20852085h, 20862086h - dd 20872087h, 20882088h, 20892089h, 208B208Bh - dd 208C208Ch, 208D208Dh, 208E208Eh, 208F208Fh - dd 20902090h, 20922092h, 20932093h, 20942094h - dd 20952095h, 20962096h, 20972097h, 20982098h - dd 209A209Ah, 209B209Bh, 209C209Ch, 209D209Dh - dd 209E209Eh, 209F209Fh, 20A120A1h, 20A220A2h - dd 20A320A3h, 20A420A4h, 20A520A5h, 20A620A6h - dd 20A820A8h, 20A920A9h, 20AA20AAh, 20AB20ABh - dd 20AC20ACh, 20AD20ADh, 20AF20AFh, 20B020B0h - dd 20B120B1h, 20B220B2h, 20B320B3h, 20B420B4h - dd 20B620B6h, 20B720B7h, 20B820B8h, 20B920B9h - dd 20BA20BAh, 20BB20BBh, 20BD20BDh, 20BE20BEh - dd 20BF20BFh, 20C020C0h, 20C120C1h, 20C220C2h - dd 20C420C4h, 20C520C5h, 20C620C6h, 20C720C7h - dd 20C820C8h, 20C920C9h, 20CB20CBh, 20CC20CCh - dd 20CD20CDh, 20CE20CEh, 20CF20CFh, 20D020D0h - dd 20D220D2h, 20D320D3h, 20D420D4h, 20D520D5h - dd 20D620D6h, 20D720D7h, 20D920D9h, 20DA20DAh - dd 20DB20DBh, 20DC20DCh, 20DD20DDh, 20DE20DEh - dd 20DF20DFh, 20E120E1h, 20E220E2h, 20E320E3h - dd 20E420E4h, 20E520E5h, 20E620E6h, 20E820E8h - dd 20E920E9h, 20EA20EAh, 20EB20EBh, 20EC20ECh - dd 20ED20EDh, 20EF20EFh, 20F020F0h, 20F120F1h - dd 20F220F2h, 20F320F3h, 20F420F4h, 20F620F6h - dd 20F720F7h, 20F820F8h, 20F920F9h, 20FA20FAh - dd 20FB20FBh, 20FD20FDh, 20FE20FEh, 20FF20FFh - dd 21002100h, 21012101h, 21022102h, 21042104h - dd 21052105h, 21062106h, 21072107h, 21082108h - dd 21092109h, 210B210Bh, 210C210Ch, 210D210Dh - dd 210E210Eh, 210F210Fh, 21102110h, 21122112h - dd 21132113h, 21142114h, 21152115h, 21162116h - -_YUV_U_table dd 1EFE1032h, 1F001032h, 1F021031h, 1F041031h - dd 1F061030h, 1F081030h, 1F0A1030h, 1F0C102Fh - dd 1F0E102Fh, 1F10102Fh, 1F12102Eh, 1F14102Eh - dd 1F16102Dh, 1F18102Dh, 1F1A102Dh, 1F1C102Ch - dd 1F1E102Ch, 1F20102Bh, 1F22102Bh, 1F24102Bh - dd 1F26102Ah, 1F28102Ah, 1F2A1029h, 1F2C1029h - dd 1F2E1029h, 1F301028h, 1F321028h, 1F341027h - dd 1F361027h, 1F381027h, 1F3A1026h, 1F3C1026h - dd 1F3E1026h, 1F401025h, 1F421025h, 1F441024h - dd 1F461024h, 1F481024h, 1F4A1023h, 1F4C1023h - dd 1F4E1022h, 1F501022h, 1F521022h, 1F541021h - dd 1F561021h, 1F591020h, 1F5B1020h, 1F5D1020h - dd 1F5F101Fh, 1F61101Fh, 1F63101Eh, 1F65101Eh - dd 1F67101Eh, 1F69101Dh, 1F6B101Dh, 1F6D101Dh - dd 1F6F101Ch, 1F71101Ch, 1F73101Bh, 1F75101Bh - dd 1F77101Bh, 1F79101Ah, 1F7B101Ah, 1F7D1019h - dd 1F7F1019h, 1F811019h, 1F831018h, 1F851018h - dd 1F871017h, 1F891017h, 1F8B1017h, 1F8D1016h - dd 1F8F1016h, 1F911016h, 1F931015h, 1F951015h - dd 1F971014h, 1F991014h, 1F9B1014h, 1F9D1013h - dd 1F9F1013h, 1FA11012h, 1FA31012h, 1FA51012h - dd 1FA71011h, 1FA91011h, 1FAB1010h, 1FAD1010h - dd 1FAF1010h, 1FB1100Fh, 1FB3100Fh, 1FB5100Eh - dd 1FB7100Eh, 1FB9100Eh, 1FBB100Dh, 1FBD100Dh - dd 1FBF100Dh, 1FC1100Ch, 1FC3100Ch, 1FC5100Bh - dd 1FC7100Bh, 1FCA100Bh, 1FCC100Ah, 1FCE100Ah - dd 1FD01009h, 1FD21009h, 1FD41009h, 1FD61008h - dd 1FD81008h, 1FDA1007h, 1FDC1007h, 1FDE1007h - dd 1FE01006h, 1FE21006h, 1FE41005h, 1FE61005h - dd 1FE81005h, 1FEA1004h, 1FEC1004h, 1FEE1004h - dd 1FF01003h, 1FF21003h, 1FF41002h, 1FF61002h - dd 1FF81002h, 1FFA1001h, 1FFC1001h, 1FFE1000h - dd 20001000h, 20021000h, 20040FFFh, 20060FFFh - dd 20080FFEh, 200A0FFEh, 200C0FFEh, 200E0FFDh - dd 20100FFDh, 20120FFCh, 20140FFCh, 20160FFCh - dd 20180FFBh, 201A0FFBh, 201C0FFBh, 201E0FFAh - dd 20200FFAh, 20220FF9h, 20240FF9h, 20260FF9h - dd 20280FF8h, 202A0FF8h, 202C0FF7h, 202E0FF7h - dd 20300FF7h, 20320FF6h, 20340FF6h, 20360FF5h - dd 20390FF5h, 203B0FF5h, 203D0FF4h, 203F0FF4h - dd 20410FF3h, 20430FF3h, 20450FF3h, 20470FF2h - dd 20490FF2h, 204B0FF2h, 204D0FF1h, 204F0FF1h - dd 20510FF0h, 20530FF0h, 20550FF0h, 20570FEFh - dd 20590FEFh, 205B0FEEh, 205D0FEEh, 205F0FEEh - dd 20610FEDh, 20630FEDh, 20650FECh, 20670FECh - dd 20690FECh, 206B0FEBh, 206D0FEBh, 206F0FEAh - dd 20710FEAh, 20730FEAh, 20750FE9h, 20770FE9h - dd 20790FE9h, 207B0FE8h, 207D0FE8h, 207F0FE7h - dd 20810FE7h, 20830FE7h, 20850FE6h, 20870FE6h - dd 20890FE5h, 208B0FE5h, 208D0FE5h, 208F0FE4h - dd 20910FE4h, 20930FE3h, 20950FE3h, 20970FE3h - dd 20990FE2h, 209B0FE2h, 209D0FE2h, 209F0FE1h - dd 20A10FE1h, 20A30FE0h, 20A50FE0h, 20A70FE0h - dd 20AA0FDFh, 20AC0FDFh, 20AE0FDEh, 20B00FDEh - dd 20B20FDEh, 20B40FDDh, 20B60FDDh, 20B80FDCh - dd 20BA0FDCh, 20BC0FDCh, 20BE0FDBh, 20C00FDBh - dd 20C20FDAh, 20C40FDAh, 20C60FDAh, 20C80FD9h - dd 20CA0FD9h, 20CC0FD9h, 20CE0FD8h, 20D00FD8h - dd 20D20FD7h, 20D40FD7h, 20D60FD7h, 20D80FD6h - dd 20DA0FD6h, 20DC0FD5h, 20DE0FD5h, 20E00FD5h - dd 20E20FD4h, 20E40FD4h, 20E60FD3h, 20E80FD3h - dd 20EA0FD3h, 20EC0FD2h, 20EE0FD2h, 20F00FD1h - dd 20F20FD1h, 20F40FD1h, 20F60FD0h, 20F80FD0h - dd 20FA0FD0h, 20FC0FCFh, 20FE0FCFh, 21000FCEh - -_YUV_V_table dd 1F341068h, 1F351067h, 1F371066h, 1F391066h - dd 1F3A1065h, 1F3C1064h, 1F3D1063h, 1F3F1062h - dd 1F401062h, 1F421061h, 1F441060h, 1F45105Fh - dd 1F47105Eh, 1F48105Dh, 1F4A105Dh, 1F4C105Ch - dd 1F4D105Bh, 1F4F105Ah, 1F501059h, 1F521059h - dd 1F541058h, 1F551057h, 1F571056h, 1F581055h - dd 1F5A1055h, 1F5C1054h, 1F5D1053h, 1F5F1052h - dd 1F601051h, 1F621050h, 1F641050h, 1F65104Fh - dd 1F67104Eh, 1F68104Dh, 1F6A104Ch, 1F6C104Ch - dd 1F6D104Bh, 1F6F104Ah, 1F701049h, 1F721048h - dd 1F741048h, 1F751047h, 1F771046h, 1F781045h - dd 1F7A1044h, 1F7C1043h, 1F7D1043h, 1F7F1042h - dd 1F801041h, 1F821040h, 1F84103Fh, 1F85103Fh - dd 1F87103Eh, 1F88103Dh, 1F8A103Ch, 1F8B103Bh - dd 1F8D103Bh, 1F8F103Ah, 1F901039h, 1F921038h - dd 1F931037h, 1F951036h, 1F971036h, 1F981035h - dd 1F9A1034h, 1F9B1033h, 1F9D1032h, 1F9F1032h - dd 1FA01031h, 1FA21030h, 1FA3102Fh, 1FA5102Eh - dd 1FA7102Eh, 1FA8102Dh, 1FAA102Ch, 1FAB102Bh - dd 1FAD102Ah, 1FAF1029h, 1FB01029h, 1FB21028h - dd 1FB31027h, 1FB51026h, 1FB71025h, 1FB81025h - dd 1FBA1024h, 1FBB1023h, 1FBD1022h, 1FBF1021h - dd 1FC01021h, 1FC21020h, 1FC3101Fh, 1FC5101Eh - dd 1FC7101Dh, 1FC8101Ch, 1FCA101Ch, 1FCB101Bh - dd 1FCD101Ah, 1FCF1019h, 1FD01018h, 1FD21018h - dd 1FD31017h, 1FD51016h, 1FD71015h, 1FD81014h - dd 1FDA1014h, 1FDB1013h, 1FDD1012h, 1FDE1011h - dd 1FE01010h, 1FE2100Fh, 1FE3100Fh, 1FE5100Eh - dd 1FE6100Dh, 1FE8100Ch, 1FEA100Bh, 1FEB100Bh - dd 1FED100Ah, 1FEE1009h, 1FF01008h, 1FF21007h - dd 1FF31007h, 1FF51006h, 1FF61005h, 1FF81004h - dd 1FFA1003h, 1FFB1002h, 1FFD1002h, 1FFE1001h - dd 20001000h, 20020FFFh, 20030FFEh, 20050FFEh - dd 20060FFDh, 20080FFCh, 200A0FFBh, 200B0FFAh - dd 200D0FF9h, 200E0FF9h, 20100FF8h, 20120FF7h - dd 20130FF6h, 20150FF5h, 20160FF5h, 20180FF4h - dd 201A0FF3h, 201B0FF2h, 201D0FF1h, 201E0FF1h - dd 20200FF0h, 20220FEFh, 20230FEEh, 20250FEDh - dd 20260FECh, 20280FECh, 20290FEBh, 202B0FEAh - dd 202D0FE9h, 202E0FE8h, 20300FE8h, 20310FE7h - dd 20330FE6h, 20350FE5h, 20360FE4h, 20380FE4h - dd 20390FE3h, 203B0FE2h, 203D0FE1h, 203E0FE0h - dd 20400FDFh, 20410FDFh, 20430FDEh, 20450FDDh - dd 20460FDCh, 20480FDBh, 20490FDBh, 204B0FDAh - dd 204D0FD9h, 204E0FD8h, 20500FD7h, 20510FD7h - dd 20530FD6h, 20550FD5h, 20560FD4h, 20580FD3h - dd 20590FD2h, 205B0FD2h, 205D0FD1h, 205E0FD0h - dd 20600FCFh, 20610FCEh, 20630FCEh, 20650FCDh - dd 20660FCCh, 20680FCBh, 20690FCAh, 206B0FCAh - dd 206D0FC9h, 206E0FC8h, 20700FC7h, 20710FC6h - dd 20730FC5h, 20750FC5h, 20760FC4h, 20780FC3h - dd 20790FC2h, 207B0FC1h, 207C0FC1h, 207E0FC0h - dd 20800FBFh, 20810FBEh, 20830FBDh, 20840FBDh - dd 20860FBCh, 20880FBBh, 20890FBAh, 208B0FB9h - dd 208C0FB8h, 208E0FB8h, 20900FB7h, 20910FB6h - dd 20930FB5h, 20940FB4h, 20960FB4h, 20980FB3h - dd 20990FB2h, 209B0FB1h, 209C0FB0h, 209E0FB0h - dd 20A00FAFh, 20A10FAEh, 20A30FADh, 20A40FACh - dd 20A60FABh, 20A80FABh, 20A90FAAh, 20AB0FA9h - dd 20AC0FA8h, 20AE0FA7h, 20B00FA7h, 20B10FA6h - dd 20B30FA5h, 20B40FA4h, 20B60FA3h, 20B80FA3h - dd 20B90FA2h, 20BB0FA1h, 20BC0FA0h, 20BE0F9Fh - dd 20C00F9Eh, 20C10F9Eh, 20C30F9Dh, 20C40F9Ch - dd 20C60F9Bh, 20C70F9Ah, 20C90F9Ah, 20CB0F99h - -_YUV_clip_table db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 1, 2, 3 - db 4, 5, 6, 7 - db 8, 9, 10, 11 - db 12, 13, 14, 15 - db 16, 17, 18, 19 - db 20, 21, 22, 23 - db 24, 25, 26, 27 - db 28, 29, 30, 31 - db 32, 33, 34, 35 - db 36, 37, 38, 39 - db 40, 41, 42, 43 - db 44, 45, 46, 47 - db 48, 49, 50, 51 - db 52, 53, 54, 55 - db 56, 57, 58, 59 - db 60, 61, 62, 63 - db 64, 65, 66, 67 - db 68, 69, 70, 71 - db 72, 73, 74, 75 - db 76, 77, 78, 79 - db 80, 81, 82, 83 - db 84, 85, 86, 87 - db 88, 89, 90, 91 - db 92, 93, 94, 95 - db 96, 97, 98, 99 - db 100, 101, 102, 103 - db 104, 105, 106, 107 - db 108, 109, 110, 111 - db 112, 113, 114, 115 - db 116, 117, 118, 119 - db 120, 121, 122, 123 - db 124, 125, 126, 127 - db 128, 129, 130, 131 - db 132, 133, 134, 135 - db 136, 137, 138, 139 - db 140, 141, 142, 143 - db 144, 145, 146, 147 - db 148, 149, 150, 151 - db 152, 153, 154, 155 - db 156, 157, 158, 159 - db 160, 161, 162, 163 - db 164, 165, 166, 167 - db 168, 169, 170, 171 - db 172, 173, 174, 175 - db 176, 177, 178, 179 - db 180, 181, 182, 183 - db 184, 185, 186, 187 - db 188, 189, 190, 191 - db 192, 193, 194, 195 - db 196, 197, 198, 199 - db 200, 201, 202, 203 - db 204, 205, 206, 207 - db 208, 209, 210, 211 - db 212, 213, 214, 215 - db 216, 217, 218, 219 - db 220, 221, 222, 223 - db 224, 225, 226, 227 - db 228, 229, 230, 231 - db 232, 233, 234, 235 - db 236, 237, 238, 239 - db 240, 241, 242, 243 - db 244, 245, 246, 247 - db 248, 249, 250, 251 - db 252, 253, 254, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - db 255, 255, 255, 255 - - -_YUV_clip_table16 db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 0, 0, 0, 0 - db 1, 1, 1, 1 - db 1, 1, 1, 1 - db 2, 2, 2, 2 - db 2, 2, 2, 2 - db 3, 3, 3, 3 - db 3, 3, 3, 3 - db 4, 4, 4, 4 - db 4, 4, 4, 4 - db 5, 5, 5, 5 - db 5, 5, 5, 5 - db 6, 6, 6, 6 - db 6, 6, 6, 6 - db 7, 7, 7, 7 - db 7, 7, 7, 7 - db 8, 8, 8, 8 - db 8, 8, 8, 8 - db 9, 9, 9, 9 - db 9, 9, 9, 9 - db 10, 10, 10, 10 - db 10, 10, 10, 10 - db 11, 11, 11, 11 - db 11, 11, 11, 11 - db 12, 12, 12, 12 - db 12, 12, 12, 12 - db 13, 13, 13, 13 - db 13, 13, 13, 13 - db 14, 14, 14, 14 - db 14, 14, 14, 14 - db 15, 15, 15, 15 - db 15, 15, 15, 15 - db 16, 16, 16, 16 - db 16, 16, 16, 16 - db 17, 17, 17, 17 - db 17, 17, 17, 17 - db 18, 18, 18, 18 - db 18, 18, 18, 18 - db 19, 19, 19, 19 - db 19, 19, 19, 19 - db 20, 20, 20, 20 - db 20, 20, 20, 20 - db 21, 21, 21, 21 - db 21, 21, 21, 21 - db 22, 22, 22, 22 - db 22, 22, 22, 22 - db 23, 23, 23, 23 - db 23, 23, 23, 23 - db 24, 24, 24, 24 - db 24, 24, 24, 24 - db 25, 25, 25, 25 - db 25, 25, 25, 25 - db 26, 26, 26, 26 - db 26, 26, 26, 26 - db 27, 27, 27, 27 - db 27, 27, 27, 27 - db 28, 28, 28, 28 - db 28, 28, 28, 28 - db 29, 29, 29, 29 - db 29, 29, 29, 29 - db 30, 30, 30, 30 - db 30, 30, 30, 30 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - db 31, 31, 31, 31 - - - end - diff --git a/src/DSUtil/convert_a.asm b/src/DSUtil/convert_a.asm deleted file mode 100644 index 3cc9c7308..000000000 --- a/src/DSUtil/convert_a.asm +++ /dev/null @@ -1,296 +0,0 @@ -; Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al. -; http://www.avisynth.org -; -; This program is free software; you can redistribute it and/or modify -; it under the terms of the GNU General Public License as published by -; the Free Software Foundation; either version 2 of the License, or -; (at your option) any later version. -; -; This program is distributed in the hope that it will be useful, -; but WITHOUT ANY WARRANTY; without even the implied warranty of -; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -; GNU General Public License for more details. -; -; You should have received a copy of the GNU General Public License -; along with this program; if not, write to the Free Software -; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit -; http://www.gnu.org/copyleft/gpl.html . -; -; Linking Avisynth statically or dynamically with other modules is making a -; combined work based on Avisynth. Thus, the terms and conditions of the GNU -; General Public License cover the whole combination. -; -; As a special exception, the copyright holders of Avisynth give you -; permission to link Avisynth with independent modules that communicate with -; Avisynth solely through the interfaces defined in avisynth.h, regardless of the license -; terms of these independent modules, and to copy and distribute the -; resulting combined work under terms of your choice, provided that -; every copy of the combined work is accompanied by a complete copy of -; the source code of Avisynth (the version of Avisynth used to produce the -; combined work), being distributed under the terms of the GNU General -; Public License plus this exception. An independent module is a module -; which is not derived from or based on Avisynth, such as 3rd-party filters, -; import and export plugins, or graphical user interfaces. - - .586 - .mmx - .model flat - -; alignment has to be 'page' so that I can use 'align 32' below - -_TEXT64 segment page public use32 'CODE' - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - align 8 - -yuv2rgb_constants_rec601: - -x0000_0000_0010_0010 dq 00000000000100010h ; 16 -x0080_0080_0080_0080 dq 00080008000800080h ; 128 -x00FF_00FF_00FF_00FF dq 000FF00FF00FF00FFh -x00002000_00002000 dq 00000200000002000h ; 8192 = (0.5)<<14 -xFF000000_FF000000 dq 0FF000000FF000000h -cy dq 000004A8500004A85h ; 19077 = (255./219.)<<14+0.5 -crv dq 03313000033130000h ; 13075 = ((1-0.299)*255./112.)<<13+0.5 -cgu_cgv dq 0E5FCF377E5FCF377h ; -6660, -3209 = ((K-1)*K/0.587*255./112.)<<13-0.5, K=(0.299, 0.114) -cbu dq 00000408D0000408Dh ; 16525 = ((1-0.114)*255./112.)<<13+0.5 - -yuv2rgb_constants_PC_601: - - dq 00000000000000000h ; 0 - dq 00080008000800080h ; 128 - dq 000FF00FF00FF00FFh - dq 00000200000002000h ; 8192 = (0.5)<<14 - dq 0FF000000FF000000h - dq 00000400000004000h ; 16384 = (1.)<<14+0.5 - dq 02D0B00002D0B0000h ; 11531 = ((1-0.299)*255./127.)<<13+0.5 - dq 0E90FF4F2E90FF4F2h ; -5873, -2830 = (((K-1)*K/0.587)*255./127.)<<13-0.5, K=(0.299, 0.114) - dq 0000038ED000038EDh ; 14573 = ((1-0.114)*255./127.)<<13+0.5 - -yuv2rgb_constants_rec709: - - dq 00000000000100010h ; 16 - dq 00080008000800080h ; 128 - dq 000FF00FF00FF00FFh - dq 00000200000002000h ; 8192 = (0.5)<<14 - dq 0FF000000FF000000h - dq 000004A8500004A85h ; 19077 = (255./219.)<<14+0.5 - dq 0395E0000395E0000h ; 14686 = ((1-0.2126)*255./112.)<<13+0.5 - dq 0EEF2F92DEEF2F92Dh ; -4366, -1747 = ((K-1)*K/0.7152*255./112.)<<13-0.5, K=(0.2126, 0.0722) - dq 00000439900004399h ; 17305 = ((1-0.0722)*255./112.)<<13+0.5 - -yuv2rgb_constants_PC_709: - - dq 00000000000000000h ; 0 - dq 00080008000800080h ; 128 - dq 000FF00FF00FF00FFh - dq 00000200000002000h ; 8192 = (0.5)<<14 - dq 0FF000000FF000000h - dq 00000400000004000h ; 16384 = (1.)<<14+0.5 - dq 03298000032980000h ; 12952 = ((1-0.2126)*255./127.)<<13+0.5 - dq 0F0F6F9FBF0F6F9FBh ; -3850, -1541 = (((K-1)*K/0.7152)*255./127.)<<13-0.5, K=(0.2126, 0.0722) - dq 000003B9D00003B9Dh ; 15261 = ((1-0.0722)*255./127.)<<13+0.5 - -ofs_x0000_0000_0010_0010 = 0 -ofs_x0080_0080_0080_0080 = 8 -ofs_x00FF_00FF_00FF_00FF = 16 -ofs_x00002000_00002000 = 24 -ofs_xFF000000_FF000000 = 32 -ofs_cy = 40 -ofs_crv = 48 -ofs_cgu_cgv = 56 -ofs_cbu = 64 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -GET_Y MACRO mma,uyvy -IF &uyvy - psrlw mma,8 -ELSE - pand mma,[edx+ofs_x00FF_00FF_00FF_00FF] -ENDIF - ENDM - -GET_UV MACRO mma,uyvy - GET_Y mma,1-uyvy - ENDM - -YUV2RGB_INNER_LOOP MACRO uyvy,rgb32,no_next_pixel - -;; This YUV422->RGB conversion code uses only four MMX registers per -;; source dword, so I convert two dwords in parallel. Lines corresponding -;; to the "second pipe" are indented an extra space. There's almost no -;; overlap, except at the end and in the three lines marked ***. -;; revised 4july,2002 to properly set alpha in rgb32 to default "on" & other small memory optimizations - - movd mm0, DWORD PTR [esi] ; DWORD PTR for compatibility woth masm8 - movd mm5, DWORD PTR [esi+4] - movq mm1,mm0 - GET_Y mm0,&uyvy ; mm0 = __________Y1__Y0 - movq mm4,mm5 - GET_UV mm1,&uyvy ; mm1 = __________V0__U0 - GET_Y mm4,&uyvy ; mm4 = __________Y3__Y2 - movq mm2,mm5 ; *** avoid reload from [esi+4] - GET_UV mm5,&uyvy ; mm5 = __________V2__U2 - psubw mm0,[edx+ofs_x0000_0000_0010_0010] ; (Y-16) - movd mm6, DWORD PTR [esi+8-4*(no_next_pixel)] - GET_UV mm2,&uyvy ; mm2 = __________V2__U2 - psubw mm4,[edx+ofs_x0000_0000_0010_0010] ; (Y-16) - paddw mm2,mm1 ; 2*UV1=UV0+UV2 - GET_UV mm6,&uyvy ; mm6 = __________V4__U4 - psubw mm1,[edx+ofs_x0080_0080_0080_0080] ; (UV-128) - paddw mm6,mm5 ; 2*UV3=UV2+UV4 - psllq mm2,32 - psubw mm5,[edx+ofs_x0080_0080_0080_0080] ; (UV-128) - punpcklwd mm0,mm2 ; mm0 = ______Y1______Y0 - psllq mm6,32 - pmaddwd mm0,[edx+ofs_cy] ; (Y-16)*(255./219.)<<14 - punpcklwd mm4,mm6 - paddw mm1,mm1 ; 2*UV0=UV0+UV0 - pmaddwd mm4,[edx+ofs_cy] - paddw mm5,mm5 ; 2*UV2=UV2+UV2 - paddw mm1,mm2 ; mm1 = __V1__U1__V0__U0 * 2 - paddd mm0,[edx+ofs_x00002000_00002000] ; +=0.5<<14 - paddw mm5,mm6 ; mm5 = __V3__U3__V2__U2 * 2 - movq mm2,mm1 - paddd mm4,[edx+ofs_x00002000_00002000] ; +=0.5<<14 - movq mm3,mm1 - movq mm6,mm5 - pmaddwd mm1,[edx+ofs_crv] - movq mm7,mm5 - paddd mm1,mm0 - pmaddwd mm5,[edx+ofs_crv] - psrad mm1,14 ; mm1 = RRRRRRRRrrrrrrrr - paddd mm5,mm4 - pmaddwd mm2,[edx+ofs_cgu_cgv] - psrad mm5,14 - paddd mm2,mm0 - pmaddwd mm6,[edx+ofs_cgu_cgv] - psrad mm2,14 ; mm2 = GGGGGGGGgggggggg - paddd mm6,mm4 - pmaddwd mm3,[edx+ofs_cbu] - psrad mm6,14 - paddd mm3,mm0 - pmaddwd mm7,[edx+ofs_cbu] - add esi,8 - add edi,12+4*rgb32 -IFE &no_next_pixel - cmp esi,ecx -ENDIF - psrad mm3,14 ; mm3 = BBBBBBBBbbbbbbbb - paddd mm7,mm4 - pxor mm0,mm0 - psrad mm7,14 - packssdw mm3,mm2 ; mm3 = GGGGggggBBBBbbbb - packssdw mm7,mm6 - packssdw mm1,mm0 ; mm1 = ________RRRRrrrr - packssdw mm5,mm0 ; *** avoid pxor mm4,mm4 - movq mm2,mm3 - movq mm6,mm7 - punpcklwd mm2,mm1 ; mm2 = RRRRBBBBrrrrbbbb - punpcklwd mm6,mm5 - punpckhwd mm3,mm1 ; mm3 = ____GGGG____gggg - punpckhwd mm7,mm5 - movq mm0,mm2 - movq mm4,mm6 - punpcklwd mm0,mm3 ; mm0 = ____rrrrggggbbbb - punpcklwd mm4,mm7 -IFE &rgb32 - psllq mm0,16 - psllq mm4,16 -ENDIF - punpckhwd mm2,mm3 ; mm2 = ____RRRRGGGGBBBB - punpckhwd mm6,mm7 - packuswb mm0,mm2 ; mm0 = __RRGGBB__rrggbb <- ta dah! - packuswb mm4,mm6 - -IF &rgb32 - por mm0, [edx+ofs_xFF000000_FF000000] ; set alpha channels "on" - por mm4, [edx+ofs_xFF000000_FF000000] - movq [edi-16],mm0 ; store the quadwords independently - movq [edi-8],mm4 -ELSE - psrlq mm0,8 ; pack the two quadwords into 12 bytes - psllq mm4,8 ; (note: the two shifts above leave - movd DWORD PTR [edi-12],mm0 ; mm0,4 = __RRGGBBrrggbb__) - psrlq mm0,32 - por mm4,mm0 - movd DWORD PTR [edi-8],mm4 - psrlq mm4,32 - movd DWORD PTR [edi-4],mm4 -ENDIF - - ENDM - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -YUV2RGB_PROC MACRO procname,uyvy,rgb32 - - PUBLIC C _&procname - -;;void __cdecl procname( -;; [esp+ 4] const BYTE* src, -;; [esp+ 8] BYTE* dst, -;; [esp+12] const BYTE* src_end, -;; [esp+16] int src_pitch, -;; [esp+20] int row_size, -;; [esp+24] rec709 matrix); 0=rec601, 1=rec709, 3=PC_601, 7=PC_709 - -_&procname PROC - - push esi - push edi - push ebx - - mov eax,[esp+16+12] ; src_pitch - mov esi,[esp+12+12] ; src_end - read source bottom-up - mov edi,[esp+8+12] ; dstp - mov ebx,[esp+20+12] ; row_size - mov edx,offset yuv2rgb_constants_rec601 - test byte ptr [esp+24+12],1 - jz loop0 - mov edx,offset yuv2rgb_constants_rec709 - - test byte ptr [esp+24+12],2 - jz loop0 - mov edx,offset yuv2rgb_constants_PC_601 - - test byte ptr [esp+24+12],4 - jz loop0 - mov edx,offset yuv2rgb_constants_PC_709 - -loop0: - sub esi,eax - lea ecx,[esi+ebx-8] - - align 32 -loop1: - YUV2RGB_INNER_LOOP uyvy,rgb32,0 - jb loop1 - - YUV2RGB_INNER_LOOP uyvy,rgb32,1 - - sub esi,ebx - cmp esi,[esp+4+12] ; src - ja loop0 - - emms - pop ebx - pop edi - pop esi - retn - -_&procname ENDP - - ENDM - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -YUV2RGB_PROC mmx_YUY2toRGB24,0,0 -YUV2RGB_PROC mmx_YUY2toRGB32,0,1 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - END - diff --git a/src/DSUtil/cpuid_32_64.cpp b/src/DSUtil/cpuid_32_64.cpp deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/DSUtil/vd.cpp b/src/DSUtil/vd.cpp index 2d6c11c98..6c0c10bdb 100644 --- a/src/DSUtil/vd.cpp +++ b/src/DSUtil/vd.cpp @@ -29,6 +29,7 @@ #include #include +#include #include #include @@ -148,15 +149,13 @@ bool BitBltFromI420ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* }; switch(dbpp) { - case 16: dstpxm.format = nsVDPixmap::kPixFormat_XRGB1555; break; + case 16: dstpxm.format = nsVDPixmap::kPixFormat_RGB565; break; case 24: dstpxm.format = nsVDPixmap::kPixFormat_RGB888; break; case 32: dstpxm.format = nsVDPixmap::kPixFormat_XRGB8888; break; default: VDASSERT(false); } - // TODO: check correct conversion work (555->565) when dpp == 16 - return VDPixmapBlt(dstpxm, srcbm); } @@ -214,7 +213,7 @@ bool BitBltFromRGBToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* s switch(dbpp) { case 8: srcbm.format = nsVDPixmap::kPixFormat_Pal8; break; - case 16: srcbm.format = nsVDPixmap::kPixFormat_XRGB1555; break; + case 16: srcbm.format = nsVDPixmap::kPixFormat_RGB565; break; case 24: srcbm.format = nsVDPixmap::kPixFormat_RGB888; break; case 32: srcbm.format = nsVDPixmap::kPixFormat_XRGB8888; break; default: @@ -231,7 +230,7 @@ bool BitBltFromRGBToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* s switch(dbpp) { case 8: dstpxm.format = nsVDPixmap::kPixFormat_Pal8; break; - case 16: dstpxm.format = nsVDPixmap::kPixFormat_XRGB1555; break; + case 16: dstpxm.format = nsVDPixmap::kPixFormat_RGB565; break; case 24: dstpxm.format = nsVDPixmap::kPixFormat_RGB888; break; case 32: dstpxm.format = nsVDPixmap::kPixFormat_XRGB8888; break; default: @@ -262,7 +261,7 @@ bool BitBltFromYUY2ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* }; switch(dbpp) { - case 16: dstpxm.format = nsVDPixmap::kPixFormat_XRGB1555; break; + case 16: dstpxm.format = nsVDPixmap::kPixFormat_RGB565; break; case 24: dstpxm.format = nsVDPixmap::kPixFormat_RGB888; break; case 32: dstpxm.format = nsVDPixmap::kPixFormat_XRGB8888; break; default: @@ -333,7 +332,7 @@ bool BitBltFromI420ToYUY2Interlaced(int w, int h, BYTE* dst, int dstpitch, BYTE* yuvtoyuy2row_avg(dst + dstpitch, srcy + srcpitch, srcu, srcv, w, halfsrcpitch); dst += 2*dstpitch; - srcy += halfsrcpitch; + srcy += 2*srcpitch; srcu += halfsrcpitch; srcv += halfsrcpitch; } -- cgit v1.2.3