Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/mpc-hc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXhmikosR <xhmikosr@users.sourceforge.net>2012-06-13 13:33:54 +0400
committerXhmikosR <xhmikosr@users.sourceforge.net>2012-06-13 13:33:54 +0400
commitd72c77c61e57e8e3fb10ddda9557ed2094a2e359 (patch)
tree8fd1026d540ec1b0c08f4094d7e69525660fcce4 /src/DSUtil/vd_asm.cpp
parent0b9ec9a361f62ce15fc58763ca8b533afe405973 (diff)
converted tabs to spaces for most files in our repo
git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@5111 10f7b99b-c216-0410-bff0-8a66a9350fd8
Diffstat (limited to 'src/DSUtil/vd_asm.cpp')
-rw-r--r--src/DSUtil/vd_asm.cpp626
1 files changed, 313 insertions, 313 deletions
diff --git a/src/DSUtil/vd_asm.cpp b/src/DSUtil/vd_asm.cpp
index 2b836465a..040dd5005 100644
--- a/src/DSUtil/vd_asm.cpp
+++ b/src/DSUtil/vd_asm.cpp
@@ -1,25 +1,25 @@
-// VirtualDub - Video processing and capture application
-// Graphics support library
-// Copyright (C) 1998-2007 Avery Lee
+// VirtualDub - Video processing and capture application
+// Graphics support library
+// Copyright (C) 1998-2007 Avery Lee
//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
//
// Notes:
// - VDPixmapBlt is from VirtualDub
// - sse2 yv12 to yuy2 conversion by Haali
-// (- vd.cpp/h should be renamed to something more sensible already :)
+// (- vd.cpp/h should be renamed to something more sensible already :)
#include "stdafx.h"
@@ -30,403 +30,403 @@
#ifndef _WIN64
void __declspec(naked) yuvtoyuy2row_MMX(BYTE* dst, BYTE* srcy, BYTE* srcu, BYTE* srcv, DWORD width)
{
- __asm {
- push ebp
- push edi
- push esi
- push ebx
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
- mov edi, [esp+20] // dst
- mov ebp, [esp+24] // srcy
- mov ebx, [esp+28] // srcu
- mov esi, [esp+32] // srcv
- mov ecx, [esp+36] // width
+ mov edi, [esp+20] // dst
+ mov ebp, [esp+24] // srcy
+ mov ebx, [esp+28] // srcu
+ mov esi, [esp+32] // srcv
+ mov ecx, [esp+36] // width
- shr ecx, 3
+ shr ecx, 3
yuvtoyuy2row_loop:
- movd mm0, [ebx]
- punpcklbw mm0, [esi]
+ movd mm0, [ebx]
+ punpcklbw mm0, [esi]
- movq mm1, [ebp]
- movq mm2, mm1
- punpcklbw mm1, mm0
- punpckhbw mm2, mm0
+ movq mm1, [ebp]
+ movq mm2, mm1
+ punpcklbw mm1, mm0
+ punpckhbw mm2, mm0
- movq [edi], mm1
- movq [edi+8], mm2
+ movq [edi], mm1
+ movq [edi+8], mm2
- add ebp, 8
- add ebx, 4
- add esi, 4
- add edi, 16
+ add ebp, 8
+ add ebx, 4
+ add esi, 4
+ add edi, 16
- dec ecx
- jnz yuvtoyuy2row_loop
+ dec ecx
+ jnz yuvtoyuy2row_loop
- pop ebx
- pop esi
- pop edi
- pop ebp
- ret
- };
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ };
}
void __declspec(naked) yuvtoyuy2row_avg_MMX(BYTE* dst, BYTE* srcy, BYTE* srcu, BYTE* srcv, DWORD width, DWORD pitchuv)
{
- static const __int64 mask = 0x7f7f7f7f7f7f7f7fi64;
+ static const __int64 mask = 0x7f7f7f7f7f7f7f7fi64;
- __asm {
- push ebp
- push edi
- push esi
- push ebx
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
- movq mm7, mask
+ movq mm7, mask
- mov edi, [esp+20] // dst
- mov ebp, [esp+24] // srcy
- mov ebx, [esp+28] // srcu
- mov esi, [esp+32] // srcv
- mov ecx, [esp+36] // width
- mov eax, [esp+40] // pitchuv
+ mov edi, [esp+20] // dst
+ mov ebp, [esp+24] // srcy
+ mov ebx, [esp+28] // srcu
+ mov esi, [esp+32] // srcv
+ mov ecx, [esp+36] // width
+ mov eax, [esp+40] // pitchuv
- shr ecx, 3
+ shr ecx, 3
yuvtoyuy2row_avg_loop:
- movd mm0, [ebx]
- punpcklbw mm0, [esi]
- movq mm1, mm0
+ movd mm0, [ebx]
+ punpcklbw mm0, [esi]
+ movq mm1, mm0
- movd mm2, [ebx + eax]
- punpcklbw mm2, [esi + eax]
- movq mm3, mm2
+ movd mm2, [ebx + eax]
+ punpcklbw mm2, [esi + eax]
+ movq mm3, mm2
- // (x+y)>>1 == (x&y)+((x^y)>>1)
+ // (x+y)>>1 == (x&y)+((x^y)>>1)
- pand mm0, mm2
- pxor mm1, mm3
- psrlq mm1, 1
- pand mm1, mm7
- paddb mm0, mm1
+ pand mm0, mm2
+ pxor mm1, mm3
+ psrlq mm1, 1
+ pand mm1, mm7
+ paddb mm0, mm1
- movq mm1, [ebp]
- movq mm2, mm1
- punpcklbw mm1, mm0
- punpckhbw mm2, mm0
+ movq mm1, [ebp]
+ movq mm2, mm1
+ punpcklbw mm1, mm0
+ punpckhbw mm2, mm0
- movq [edi], mm1
- movq [edi+8], mm2
+ movq [edi], mm1
+ movq [edi+8], mm2
- add ebp, 8
- add ebx, 4
- add esi, 4
- add edi, 16
+ add ebp, 8
+ add ebx, 4
+ add esi, 4
+ add edi, 16
- dec ecx
- jnz yuvtoyuy2row_avg_loop
+ dec ecx
+ jnz yuvtoyuy2row_avg_loop
- pop ebx
- pop esi
- pop edi
- pop ebp
- ret
- };
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ };
}
void __declspec(naked) yv12_yuy2_row_sse2() {
- __asm {
- // ebx - Y
- // edx - U
- // esi - V
- // edi - dest
- // ecx - halfwidth
- xor eax, eax
+ __asm {
+ // ebx - Y
+ // edx - U
+ // esi - V
+ // edi - dest
+ // ecx - halfwidth
+ xor eax, eax
one:
- movdqa xmm0, [ebx + eax*2] // YYYYYYYY
- movdqa xmm1, [ebx + eax*2 + 16] // YYYYYYYY
+ movdqa xmm0, [ebx + eax*2] // YYYYYYYY
+ movdqa xmm1, [ebx + eax*2 + 16] // YYYYYYYY
- movdqa xmm2, [edx + eax] // UUUUUUUU
- movdqa xmm3, [esi + eax] // VVVVVVVV
+ movdqa xmm2, [edx + eax] // UUUUUUUU
+ movdqa xmm3, [esi + eax] // VVVVVVVV
- movdqa xmm4, xmm2
- movdqa xmm5, xmm0
- movdqa xmm6, xmm1
- punpcklbw xmm2, xmm3 // VUVUVUVU
- punpckhbw xmm4, xmm3 // VUVUVUVU
+ movdqa xmm4, xmm2
+ movdqa xmm5, xmm0
+ movdqa xmm6, xmm1
+ punpcklbw xmm2, xmm3 // VUVUVUVU
+ punpckhbw xmm4, xmm3 // VUVUVUVU
- punpcklbw xmm0, xmm2 // VYUYVYUY
- punpcklbw xmm1, xmm4
- punpckhbw xmm5, xmm2
- punpckhbw xmm6, xmm4
+ punpcklbw xmm0, xmm2 // VYUYVYUY
+ punpcklbw xmm1, xmm4
+ punpckhbw xmm5, xmm2
+ punpckhbw xmm6, xmm4
- movntdq [edi + eax*4], xmm0
- movntdq [edi + eax*4 + 16], xmm5
- movntdq [edi + eax*4 + 32], xmm1
- movntdq [edi + eax*4 + 48], xmm6
+ movntdq [edi + eax*4], xmm0
+ movntdq [edi + eax*4 + 16], xmm5
+ movntdq [edi + eax*4 + 32], xmm1
+ movntdq [edi + eax*4 + 48], xmm6
- add eax, 16
- cmp eax, ecx
+ add eax, 16
+ cmp eax, ecx
- jb one
+ jb one
- ret
- };
+ ret
+ };
}
void __declspec(naked) yv12_yuy2_row_sse2_linear() {
- __asm {
- // ebx - Y
- // edx - U
- // esi - V
- // edi - dest
- // ecx - width
- // ebp - uv_stride
- xor eax, eax
+ __asm {
+ // ebx - Y
+ // edx - U
+ // esi - V
+ // edi - dest
+ // ecx - width
+ // ebp - uv_stride
+ xor eax, eax
one:
- movdqa xmm0, [ebx + eax*2] // YYYYYYYY
- movdqa xmm1, [ebx + eax*2 + 16] // YYYYYYYY
-
- movdqa xmm2, [edx]
- movdqa xmm3, [esi]
- pavgb xmm2, [edx + ebp] // UUUUUUUU
- pavgb xmm3, [esi + ebp] // VVVVVVVV
-
- movdqa xmm4, xmm2
- movdqa xmm5, xmm0
- movdqa xmm6, xmm1
- punpcklbw xmm2, xmm3 // VUVUVUVU
- punpckhbw xmm4, xmm3 // VUVUVUVU
-
- punpcklbw xmm0, xmm2 // VYUYVYUY
- punpcklbw xmm1, xmm4
- punpckhbw xmm5, xmm2
- punpckhbw xmm6, xmm4
-
- movntdq [edi + eax*4], xmm0
- movntdq [edi + eax*4 + 16], xmm5
- movntdq [edi + eax*4 + 32], xmm1
- movntdq [edi + eax*4 + 48], xmm6
-
- add eax, 16
- add edx, 16
- add esi, 16
- cmp eax, ecx
-
- jb one
-
- ret
- };
+ movdqa xmm0, [ebx + eax*2] // YYYYYYYY
+ movdqa xmm1, [ebx + eax*2 + 16] // YYYYYYYY
+
+ movdqa xmm2, [edx]
+ movdqa xmm3, [esi]
+ pavgb xmm2, [edx + ebp] // UUUUUUUU
+ pavgb xmm3, [esi + ebp] // VVVVVVVV
+
+ movdqa xmm4, xmm2
+ movdqa xmm5, xmm0
+ movdqa xmm6, xmm1
+ punpcklbw xmm2, xmm3 // VUVUVUVU
+ punpckhbw xmm4, xmm3 // VUVUVUVU
+
+ punpcklbw xmm0, xmm2 // VYUYVYUY
+ punpcklbw xmm1, xmm4
+ punpckhbw xmm5, xmm2
+ punpckhbw xmm6, xmm4
+
+ movntdq [edi + eax*4], xmm0
+ movntdq [edi + eax*4 + 16], xmm5
+ movntdq [edi + eax*4 + 32], xmm1
+ movntdq [edi + eax*4 + 48], xmm6
+
+ add eax, 16
+ add edx, 16
+ add esi, 16
+ cmp eax, ecx
+
+ jb one
+
+ ret
+ };
}
void __declspec(naked) yv12_yuy2_row_sse2_linear_interlaced() {
- __asm {
- // ebx - Y
- // edx - U
- // esi - V
- // edi - dest
- // ecx - width
- // ebp - uv_stride
- xor eax, eax
+ __asm {
+ // ebx - Y
+ // edx - U
+ // esi - V
+ // edi - dest
+ // ecx - width
+ // ebp - uv_stride
+ xor eax, eax
one:
- movdqa xmm0, [ebx + eax*2] // YYYYYYYY
- movdqa xmm1, [ebx + eax*2 + 16] // YYYYYYYY
-
- movdqa xmm2, [edx]
- movdqa xmm3, [esi]
- pavgb xmm2, [edx + ebp*2] // UUUUUUUU
- pavgb xmm3, [esi + ebp*2] // VVVVVVVV
-
- movdqa xmm4, xmm2
- movdqa xmm5, xmm0
- movdqa xmm6, xmm1
- punpcklbw xmm2, xmm3 // VUVUVUVU
- punpckhbw xmm4, xmm3 // VUVUVUVU
-
- punpcklbw xmm0, xmm2 // VYUYVYUY
- punpcklbw xmm1, xmm4
- punpckhbw xmm5, xmm2
- punpckhbw xmm6, xmm4
-
- movntdq [edi + eax*4], xmm0
- movntdq [edi + eax*4 + 16], xmm5
- movntdq [edi + eax*4 + 32], xmm1
- movntdq [edi + eax*4 + 48], xmm6
-
- add eax, 16
- add edx, 16
- add esi, 16
- cmp eax, ecx
-
- jb one
-
- ret
- };
+ movdqa xmm0, [ebx + eax*2] // YYYYYYYY
+ movdqa xmm1, [ebx + eax*2 + 16] // YYYYYYYY
+
+ movdqa xmm2, [edx]
+ movdqa xmm3, [esi]
+ pavgb xmm2, [edx + ebp*2] // UUUUUUUU
+ pavgb xmm3, [esi + ebp*2] // VVVVVVVV
+
+ movdqa xmm4, xmm2
+ movdqa xmm5, xmm0
+ movdqa xmm6, xmm1
+ punpcklbw xmm2, xmm3 // VUVUVUVU
+ punpckhbw xmm4, xmm3 // VUVUVUVU
+
+ punpcklbw xmm0, xmm2 // VYUYVYUY
+ punpcklbw xmm1, xmm4
+ punpckhbw xmm5, xmm2
+ punpckhbw xmm6, xmm4
+
+ movntdq [edi + eax*4], xmm0
+ movntdq [edi + eax*4 + 16], xmm5
+ movntdq [edi + eax*4 + 32], xmm1
+ movntdq [edi + eax*4 + 48], xmm6
+
+ add eax, 16
+ add edx, 16
+ add esi, 16
+ cmp eax, ecx
+
+ jb one
+
+ ret
+ };
}
void __declspec(naked) yv12_yuy2_sse2(const BYTE *Y, const BYTE *U, const BYTE *V,
- int halfstride, unsigned halfwidth, unsigned height,
- BYTE *YUY2, int d_stride)
+ int halfstride, unsigned halfwidth, unsigned height,
+ BYTE *YUY2, int d_stride)
{
- __asm {
- push ebx
- push esi
- push edi
- push ebp
-
- mov ebx, [esp + 20] // Y
- mov edx, [esp + 24] // U
- mov esi, [esp + 28] // V
- mov edi, [esp + 44] // D
- mov ebp, [esp + 32] // uv_stride
- mov ecx, [esp + 36] // uv_width
-
- mov eax, ecx
- add eax, 15
- and eax, 0xfffffff0
- sub [esp + 32], eax
-
- cmp dword ptr [esp + 40], 2
- jbe last2
+ __asm {
+ push ebx
+ push esi
+ push edi
+ push ebp
+
+ mov ebx, [esp + 20] // Y
+ mov edx, [esp + 24] // U
+ mov esi, [esp + 28] // V
+ mov edi, [esp + 44] // D
+ mov ebp, [esp + 32] // uv_stride
+ mov ecx, [esp + 36] // uv_width
+
+ mov eax, ecx
+ add eax, 15
+ and eax, 0xfffffff0
+ sub [esp + 32], eax
+
+ cmp dword ptr [esp + 40], 2
+ jbe last2
row:
- sub dword ptr [esp + 40], 2
- call yv12_yuy2_row_sse2
+ sub dword ptr [esp + 40], 2
+ call yv12_yuy2_row_sse2
- lea ebx, [ebx + ebp*2]
- add edi, [esp + 48]
+ lea ebx, [ebx + ebp*2]
+ add edi, [esp + 48]
- call yv12_yuy2_row_sse2_linear
+ call yv12_yuy2_row_sse2_linear
- add edx, [esp + 32]
- add esi, [esp + 32]
+ add edx, [esp + 32]
+ add esi, [esp + 32]
- lea ebx, [ebx + ebp*2]
- add edi, [esp + 48]
+ lea ebx, [ebx + ebp*2]
+ add edi, [esp + 48]
- cmp dword ptr [esp + 40], 2
- ja row
+ cmp dword ptr [esp + 40], 2
+ ja row
last2:
- call yv12_yuy2_row_sse2
+ call yv12_yuy2_row_sse2
- dec dword ptr [esp + 40]
- jz done
+ dec dword ptr [esp + 40]
+ jz done
- lea ebx, [ebx + ebp*2]
- add edi, [esp + 48]
- call yv12_yuy2_row_sse2
+ lea ebx, [ebx + ebp*2]
+ add edi, [esp + 48]
+ call yv12_yuy2_row_sse2
done:
- pop ebp
- pop edi
- pop esi
- pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ pop ebx
- ret
- };
+ ret
+ };
}
void __declspec(naked) yv12_yuy2_sse2_interlaced(const BYTE *Y, const BYTE *U, const BYTE *V,
- int halfstride, unsigned halfwidth, unsigned height,
- BYTE *YUY2, int d_stride)
+ int halfstride, unsigned halfwidth, unsigned height,
+ BYTE *YUY2, int d_stride)
{
- __asm {
- push ebx
- push esi
- push edi
- push ebp
-
- mov ebx, [esp + 20] // Y
- mov edx, [esp + 24] // U
- mov esi, [esp + 28] // V
- mov edi, [esp + 44] // D
- mov ebp, [esp + 32] // uv_stride
- mov ecx, [esp + 36] // uv_width
-
- mov eax, ecx
- add eax, 15
- and eax, 0xfffffff0
- sub [esp + 32], eax
-
- cmp dword ptr [esp + 40], 4
- jbe last4
+ __asm {
+ push ebx
+ push esi
+ push edi
+ push ebp
+
+ mov ebx, [esp + 20] // Y
+ mov edx, [esp + 24] // U
+ mov esi, [esp + 28] // V
+ mov edi, [esp + 44] // D
+ mov ebp, [esp + 32] // uv_stride
+ mov ecx, [esp + 36] // uv_width
+
+ mov eax, ecx
+ add eax, 15
+ and eax, 0xfffffff0
+ sub [esp + 32], eax
+
+ cmp dword ptr [esp + 40], 4
+ jbe last4
row:
- sub dword ptr [esp + 40], 4
- call yv12_yuy2_row_sse2 // first row, first field
+ sub dword ptr [esp + 40], 4
+ call yv12_yuy2_row_sse2 // first row, first field
- lea ebx, [ebx + ebp*2]
- add edi, [esp + 48]
+ lea ebx, [ebx + ebp*2]
+ add edi, [esp + 48]
- add edx, ebp
- add esi, ebp
+ add edx, ebp
+ add esi, ebp
- call yv12_yuy2_row_sse2 // first row, second field
+ call yv12_yuy2_row_sse2 // first row, second field
- lea ebx, [ebx + ebp*2]
- add edi, [esp + 48]
+ lea ebx, [ebx + ebp*2]
+ add edi, [esp + 48]
- sub edx, ebp
- sub esi, ebp
+ sub edx, ebp
+ sub esi, ebp
- call yv12_yuy2_row_sse2_linear_interlaced // second row, first field
+ call yv12_yuy2_row_sse2_linear_interlaced // second row, first field
- add edx, [esp + 32]
- add esi, [esp + 32]
+ add edx, [esp + 32]
+ add esi, [esp + 32]
- lea ebx, [ebx + ebp*2]
- add edi, [esp + 48]
+ lea ebx, [ebx + ebp*2]
+ add edi, [esp + 48]
- call yv12_yuy2_row_sse2_linear_interlaced // second row, second field
+ call yv12_yuy2_row_sse2_linear_interlaced // second row, second field
- add edx, [esp + 32]
- add esi, [esp + 32]
+ add edx, [esp + 32]
+ add esi, [esp + 32]
- lea ebx, [ebx + ebp*2]
- add edi, [esp + 48]
+ lea ebx, [ebx + ebp*2]
+ add edi, [esp + 48]
- cmp dword ptr [esp + 40], 4
- ja row
+ cmp dword ptr [esp + 40], 4
+ ja row
last4:
- call yv12_yuy2_row_sse2
+ call yv12_yuy2_row_sse2
- lea ebx, [ebx + ebp*2]
- add edi, [esp + 48]
+ lea ebx, [ebx + ebp*2]
+ add edi, [esp + 48]
- add edx, ebp
- add esi, ebp
+ add edx, ebp
+ add esi, ebp
- call yv12_yuy2_row_sse2
+ call yv12_yuy2_row_sse2
- lea ebx, [ebx + ebp*2]
- add edi, [esp + 48]
+ lea ebx, [ebx + ebp*2]
+ add edi, [esp + 48]
- sub edx, ebp
- sub esi, ebp
+ sub edx, ebp
+ sub esi, ebp
- call yv12_yuy2_row_sse2
+ call yv12_yuy2_row_sse2
- lea ebx, [ebx + ebp*2]
- add edi, [esp + 48]
+ lea ebx, [ebx + ebp*2]
+ add edi, [esp + 48]
- add edx, ebp
- add esi, ebp
+ add edx, ebp
+ add esi, ebp
- call yv12_yuy2_row_sse2
+ call yv12_yuy2_row_sse2
- pop ebp
- pop edi
- pop esi
- pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ pop ebx
- ret
- };
+ ret
+ };
}
#endif