Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/mpc-hc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkinddragon <kinddragon@users.sourceforge.net>2010-05-21 04:53:52 +0400
committerkinddragon <kinddragon@users.sourceforge.net>2010-05-21 04:53:52 +0400
commit37f62abd654047d060c86d6c76cd2f6862f89b94 (patch)
tree83eb125bd86f8a685928e290e2ec929ce633bc53 /src/DSUtil
parentdae6425e0c23576dac77c3afae1dc6de22f983d5 (diff)
DSUtil now use new VirtualDub libraries (SSE2 deinterlacing for MPEG2 decoder)
AudioSwitcher rare memory corruption fixed git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@1907 10f7b99b-c216-0410-bff0-8a66a9350fd8
Diffstat (limited to 'src/DSUtil')
-rw-r--r--src/DSUtil/deinterlace.cpp526
-rw-r--r--src/DSUtil/dsutil.vcproj132
-rw-r--r--src/DSUtil/vd.cpp970
-rw-r--r--src/DSUtil/vd.h18
-rw-r--r--src/DSUtil/vd_asm.cpp290
-rw-r--r--src/DSUtil/vd_asm.h11
6 files changed, 777 insertions, 1170 deletions
diff --git a/src/DSUtil/deinterlace.cpp b/src/DSUtil/deinterlace.cpp
new file mode 100644
index 000000000..a66915dfd
--- /dev/null
+++ b/src/DSUtil/deinterlace.cpp
@@ -0,0 +1,526 @@
+// VirtualDub - Video processing and capture application
+// Copyright (C) 1998-2001 Avery Lee
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#include "stdafx.h"
+#include <emmintrin.h>
+#include <vd2/system/cpuaccel.h>
+
+#define uint8 unsigned char
+#define uint32 unsigned int
+#define uint64 unsigned __int64
+
+#ifdef _M_IX86
+#define VD_CPU_X86
+#endif
+
+#ifdef _M_X64
+#define VD_CPU_AMD64
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+
+#pragma warning(disable: 4799) // warning C4799: function has no EMMS instruction
+
+///////////////////////////////////////////////////////////////////////////
+
+#ifdef _M_IX86
+static void __declspec(naked) asm_blend_row_clipped(void *dst, const void *src, uint32 w, ptrdiff_t srcpitch) {
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov edi,[esp+20]
+ mov esi,[esp+24]
+ sub edi,esi
+ mov ebp,[esp+28]
+ mov edx,[esp+32]
+
+xloop:
+ mov ecx,[esi]
+ mov eax,0fefefefeh
+
+ mov ebx,[esi+edx]
+ and eax,ecx
+
+ shr eax,1
+ and ebx,0fefefefeh
+
+ shr ebx,1
+ add esi,4
+
+ add eax,ebx
+ dec ebp
+
+ mov [edi+esi-4],eax
+ jnz xloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ };
+}
+
+static void __declspec(naked) asm_blend_row(void *dst, const void *src, uint32 w, ptrdiff_t srcpitch) {
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov edi,[esp+20]
+ mov esi,[esp+24]
+ sub edi,esi
+ mov ebp,[esp+28]
+ mov edx,[esp+32]
+
+xloop:
+ mov ecx,[esi]
+ mov eax,0fcfcfcfch
+
+ mov ebx,[esi+edx]
+ and eax,ecx
+
+ shr ebx,1
+ mov ecx,[esi+edx*2]
+
+ shr ecx,2
+ and ebx,07f7f7f7fh
+
+ shr eax,2
+ and ecx,03f3f3f3fh
+
+ add eax,ebx
+ add esi,4
+
+ add eax,ecx
+ dec ebp
+
+ mov [edi+esi-4],eax
+ jnz xloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ };
+}
+
+static void __declspec(naked) asm_blend_row_MMX(void *dst, const void *src, uint32 w, ptrdiff_t srcpitch) {
+ static const __declspec(align(8)) __int64 mask0 = 0xfcfcfcfcfcfcfcfci64;
+ static const __declspec(align(8)) __int64 mask1 = 0x7f7f7f7f7f7f7f7fi64;
+ static const __declspec(align(8)) __int64 mask2 = 0x3f3f3f3f3f3f3f3fi64;
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov edi,[esp+20]
+ mov esi,[esp+24]
+ sub edi,esi
+ mov ebp,[esp+28]
+ mov edx,[esp+32]
+
+ movq mm5,mask0
+ movq mm6,mask1
+ movq mm7,mask2
+ inc ebp
+ shr ebp,1
+xloop:
+ movq mm2,[esi]
+ movq mm0,mm5
+
+ movq mm1,[esi+edx]
+ pand mm0,mm2
+
+ psrlq mm1,1
+ movq mm2,[esi+edx*2]
+
+ psrlq mm2,2
+ pand mm1,mm6
+
+ psrlq mm0,2
+ pand mm2,mm7
+
+ paddb mm0,mm1
+ add esi,8
+
+ paddb mm0,mm2
+ dec ebp
+
+ movq [edi+esi-8],mm0
+ jne xloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ };
+}
+
+static void __declspec(naked) asm_blend_row_ISSE(void *dst, const void *src, uint32 w, ptrdiff_t srcpitch) {
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov edi,[esp+20]
+ mov esi,[esp+24]
+ sub edi,esi
+ mov ebp,[esp+28]
+ mov edx,[esp+32]
+
+ inc ebp
+ shr ebp,1
+ pcmpeqb mm7, mm7
+
+ align 16
+xloop:
+ movq mm0, [esi]
+ movq mm2, mm7
+ pxor mm0, mm7
+
+ pxor mm2, [esi+edx*2]
+ pavgb mm0, mm2
+ pxor mm0, mm7
+
+ pavgb mm0, [esi+edx]
+ add esi,8
+
+ movq [edi+esi-8],mm0
+ dec ebp
+ jne xloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ };
+}
+#else
+static void asm_blend_row_clipped(void *dst0, const void *src0, uint32 w, ptrdiff_t srcpitch) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint32 *src = (const uint32 *)src0;
+ const uint32 *src2 = (const uint32 *)((const char *)src + srcpitch);
+
+ do {
+ const uint32 x = *src++;
+ const uint32 y = *src2++;
+
+ *dst++ = (x|y) - (((x^y)&0xfefefefe)>>1);
+ } while(--w);
+}
+
+static void asm_blend_row(void *dst0, const void *src0, uint32 w, ptrdiff_t srcpitch) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint32 *src = (const uint32 *)src0;
+ const uint32 *src2 = (const uint32 *)((const char *)src + srcpitch);
+ const uint32 *src3 = (const uint32 *)((const char *)src2 + srcpitch);
+
+ do {
+ const uint32 a = *src++;
+ const uint32 b = *src2++;
+ const uint32 c = *src3++;
+ const uint32 hi = (a & 0xfcfcfc) + 2*(b & 0xfcfcfc) + (c & 0xfcfcfc);
+ const uint32 lo = (a & 0x030303) + 2*(b & 0x030303) + (c & 0x030303) + 0x020202;
+
+ *dst++ = (hi + (lo & 0x0c0c0c))>>2;
+ } while(--w);
+}
+#endif
+
+#if defined(VD_CPU_X86) || defined(VD_CPU_AMD64)
+ static void asm_blend_row_SSE2(void *dst, const void *src, uint32 w, ptrdiff_t srcpitch) {
+ __m128i zero = _mm_setzero_si128();
+ __m128i inv = _mm_cmpeq_epi8(zero, zero);
+
+ w = (w + 3) >> 2;
+
+ const __m128i *src1 = (const __m128i *)src;
+ const __m128i *src2 = (const __m128i *)((const char *)src + srcpitch);
+ const __m128i *src3 = (const __m128i *)((const char *)src + srcpitch*2);
+ __m128i *dstrow = (__m128i *)dst;
+ do {
+ __m128i a = *src1++;
+ __m128i b = *src2++;
+ __m128i c = *src3++;
+
+ *dstrow++ = _mm_avg_epu8(_mm_xor_si128(_mm_avg_epu8(_mm_xor_si128(a, inv), _mm_xor_si128(c, inv)), inv), b);
+ } while(--w);
+ }
+
+#endif
+
+namespace {
+
+ void Average_scalar(void *dst, ptrdiff_t dstPitch, const void *src1, const void *src2, ptrdiff_t srcPitch, uint32 w16, uint32 h) {
+ uint32 w4 = w16 << 2;
+ do {
+ uint32 *dstv = (uint32 *)dst;
+ uint32 *src1v = (uint32 *)src1;
+ uint32 *src2v = (uint32 *)src2;
+
+ for(uint32 i=0; i<w4; ++i) {
+ uint32 a = src1v[i];
+ uint32 b = src2v[i];
+
+ dstv[i] = (a|b) - (((a^b) & 0xfefefefe) >> 1);
+ }
+
+ dst = (char *)dst + dstPitch;
+ src1 = (char *)src1 + srcPitch;
+ src2 = (char *)src2 + srcPitch;
+ } while(--h);
+ }
+
+#if defined(VD_CPU_X86)
+ void __declspec(naked) __cdecl Average_MMX(void *dst, ptrdiff_t dstPitch, const void *src1, const void *src2, ptrdiff_t srcPitch, uint32 w16, uint32 h) {
+ static const __declspec(align(8)) uint64 x7fb = 0x7f7f7f7f7f7f7f7f;
+ static const __declspec(align(8)) uint64 xfeb = 0xfefefefefefefefe;
+
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov esi, [esp+24+16]
+ mov eax, [esp+4+16]
+ shl esi, 4
+ mov ecx, [esp+12+16]
+ mov edx, [esp+16+16]
+ mov ebp, [esp+20+16]
+ mov edi, [esp+8+16]
+ sub edi, esi
+ sub ebp, esi
+
+ movq mm6, x7fb
+ movq mm7, xfeb
+
+ mov esi, [esp+28+16]
+yloop:
+ mov ebx, [esp+24+16]
+mainRowLoop:
+ movq mm0, [ecx]
+ movq mm3, [ecx + 8]
+ movq mm1, mm0
+ movq mm2, [edx]
+ movq mm4, mm3
+ movq mm5, [edx + 8]
+ por mm1, mm2
+ pxor mm0, mm2
+ por mm4, mm5
+ pxor mm3, mm5
+ psrlq mm0, 1
+ pand mm3, mm7
+ pand mm0, mm6
+ psrlq mm3, 1
+ psubb mm1, mm0
+ psubb mm4, mm3
+ add ecx, 16
+ movq [eax], mm1
+ movq [eax+8], mm4
+ add edx, 16
+ add eax, 16
+ dec ebx
+ jne mainRowLoop
+
+ add eax, edi
+ add ecx, ebp
+ add edx, ebp
+ dec esi
+ jne yloop
+
+ emms
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ }
+ }
+
+ void __declspec(naked) __cdecl Average_ISSE(void *dst, ptrdiff_t dstPitch, const void *src1, const void *src2, ptrdiff_t srcPitch, uint32 w16, uint32 h) {
+ static const __declspec(align(8)) uint64 x7fb = 0x7f7f7f7f7f7f7f7f;
+ static const __declspec(align(8)) uint64 xfeb = 0xfefefefefefefefe;
+
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov esi, [esp+24+16]
+ mov eax, [esp+4+16]
+ shl esi, 4
+ mov ecx, [esp+12+16]
+ mov edx, [esp+16+16]
+ mov ebp, [esp+20+16]
+ mov edi, [esp+8+16]
+ sub edi, esi
+ sub ebp, esi
+
+ movq mm6, x7fb
+ movq mm7, xfeb
+
+ mov esi, [esp+28+16]
+yloop:
+ mov ebx, [esp+24+16]
+mainRowLoop:
+ movq mm0, [ecx]
+ movq mm1, [ecx + 8]
+ movq mm2, [edx]
+ movq mm3, [edx + 8]
+ pavgb mm0, mm2
+ pavgb mm1, mm3
+ movq [eax], mm0
+ add ecx, 16
+ add edx, 16
+ movq [eax+8], mm1
+ add eax, 16
+ dec ebx
+ jne mainRowLoop
+
+ add eax, edi
+ add ecx, ebp
+ add edx, ebp
+ dec esi
+ jne yloop
+
+ emms
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ }
+ }
+#endif
+
+#if defined(VD_CPU_X86) || defined(VD_CPU_AMD64)
+ void Average_SSE2(void *dst, ptrdiff_t dstPitch, const void *src1, const void *src2, ptrdiff_t srcPitch, uint32 w16, uint32 h) {
+ do {
+ __m128i *dstv = (__m128i *)dst;
+ __m128i *src1v = (__m128i *)src1;
+ __m128i *src2v = (__m128i *)src2;
+
+ for(uint32 i=0; i<w16; ++i)
+ dstv[i] = _mm_avg_epu8(src1v[i], src2v[i]);
+
+ dst = (char *)dst + dstPitch;
+ src1 = (char *)src1 + srcPitch;
+ src2 = (char *)src2 + srcPitch;
+ } while(--h);
+ }
+#endif
+
+ void InterpPlane_Bob(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, uint32 w, uint32 h, bool interpField2) {
+ void (*blend_func)(void *dst, ptrdiff_t dstPitch, const void *src1, const void *src2, ptrdiff_t srcPitch, uint32 w16, uint32 h);
+#if defined(VD_CPU_X86)
+ if (SSE2_enabled)
+ blend_func = Average_SSE2;
+ else if (ISSE_enabled)
+ blend_func = Average_ISSE;
+ else if (MMX_enabled)
+ blend_func = Average_MMX;
+ else
+ blend_func = Average_scalar;
+#else
+ blend_func = Average_SSE2;
+#endif
+
+ w = (w + 3) >> 2;
+
+ int y0 = interpField2 ? 1 : 2;
+
+ if (!interpField2)
+ memcpy(dst, src, w * 4);
+
+ if (h > y0) {
+ ASSERT(((UINT_PTR)dst & 0xF) == 0);
+ ASSERT((dstpitch & 0xF) == 0);
+ ASSERT(((UINT_PTR)src & 0xF) == 0);
+ ASSERT((srcpitch*(y0 - 1) & 0xF) == 0);
+ blend_func((char *)dst + dstpitch*y0,
+ dstpitch*2,
+ (const char *)src + srcpitch*(y0 - 1),
+ (const char *)src + srcpitch*(y0 + 1),
+ srcpitch*2,
+ (w + 3) >> 2,
+ (h - y0) >> 1);
+ }
+
+ if (interpField2)
+ memcpy((char *)dst + dstpitch*(h - 1), (const char *)src + srcpitch*(h - 1), w*4);
+
+#ifdef _M_IX86
+ if (MMX_enabled)
+ __asm emms
+#endif
+ }
+
+ void BlendPlane(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, uint32 w, uint32 h) {
+ void (*blend_func)(void *, const void *, uint32, ptrdiff_t);
+#if defined(VD_CPU_X86)
+ if (SSE2_enabled)
+ blend_func = asm_blend_row_SSE2;
+ else
+ blend_func = ISSE_enabled ? asm_blend_row_ISSE : MMX_enabled ? asm_blend_row_MMX : asm_blend_row;
+#else
+ blend_func = asm_blend_row_SSE2;
+#endif
+
+ w = (w + 3) >> 2;
+
+ asm_blend_row_clipped(dst, src, w, srcpitch);
+ if (h-=2)
+ do {
+ dst = ((char *)dst + dstpitch);
+
+ blend_func(dst, src, w, srcpitch);
+
+ src = ((char *)src + srcpitch);
+ } while(--h);
+
+ asm_blend_row_clipped((char *)dst + dstpitch, src, w, srcpitch);
+
+#ifdef _M_IX86
+ if (MMX_enabled)
+ __asm emms
+#endif
+ }
+}
+
+void DeinterlaceBlend(BYTE* dst, BYTE* src, DWORD w, DWORD h, DWORD dstpitch, DWORD srcpitch)
+{
+ BlendPlane(dst, dstpitch, src, srcpitch, w, h);
+}
+
+void DeinterlaceBob(BYTE* dst, BYTE* src, DWORD w, DWORD h, DWORD dstpitch, DWORD srcpitch, bool topfield)
+{
+ topfield = !topfield;
+
+ InterpPlane_Bob(dst, dstpitch, src, srcpitch, w, h, topfield);
+}
diff --git a/src/DSUtil/dsutil.vcproj b/src/DSUtil/dsutil.vcproj
index 49162423b..69a625c18 100644
--- a/src/DSUtil/dsutil.vcproj
+++ b/src/DSUtil/dsutil.vcproj
@@ -44,7 +44,7 @@
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
+ AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;;..\thirdparty\VirtualDub\h"
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;"
/>
<Tool
@@ -105,7 +105,7 @@
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
+ AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;;..\thirdparty\VirtualDub\h"
PreprocessorDefinitions="_WIN64;_DEBUG;_LIB;"
Detect64BitPortabilityProblems="false"
DebugInformationFormat="3"
@@ -167,7 +167,7 @@
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
+ AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;;..\thirdparty\VirtualDub\h"
PreprocessorDefinitions="WIN32;NDEBUG;_LIB"
BufferSecurityCheck="true"
EnableEnhancedInstructionSet="1"
@@ -233,7 +233,7 @@
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
+ AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;;..\thirdparty\VirtualDub\h"
PreprocessorDefinitions="_WIN64;NDEBUG;_LIB"
BufferSecurityCheck="true"
EnableEnhancedInstructionSet="0"
@@ -278,130 +278,8 @@
Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm"
>
<File
- RelativePath=".\a_yuv2rgb.asm"
+ RelativePath=".\deinterlace.cpp"
>
- <FileConfiguration
- Name="Debug Unicode|Win32"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="yasm -X vc -g cv8 -f &quot;$(PlatformName)&quot; -o &quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Debug Unicode|x64"
- ExcludedFromBuild="true"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="ml64 /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release Unicode|Win32"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="yasm -X vc -g cv8 -f &quot;$(PlatformName)&quot; -o &quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release Unicode|x64"
- ExcludedFromBuild="true"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="yasm -X vc -g cv8 -f &quot;$(PlatformName)&quot; -o &quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- </File>
- <File
- RelativePath=".\a_yuvtable.asm"
- >
- <FileConfiguration
- Name="Debug Unicode|Win32"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="yasm -X vc -g cv8 -f &quot;$(PlatformName)&quot; -o &quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Debug Unicode|x64"
- ExcludedFromBuild="true"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="ml64 /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release Unicode|Win32"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="yasm -X vc -g cv8 -f &quot;$(PlatformName)&quot; -o &quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release Unicode|x64"
- ExcludedFromBuild="true"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="ml64 /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- </File>
- <File
- RelativePath=".\convert_a.asm"
- >
- <FileConfiguration
- Name="Debug Unicode|Win32"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Debug Unicode|x64"
- ExcludedFromBuild="true"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="ml64 /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release Unicode|Win32"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release Unicode|x64"
- ExcludedFromBuild="true"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="ml64 /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
</File>
<File
RelativePath=".\DSMPropertyBag.cpp"
diff --git a/src/DSUtil/vd.cpp b/src/DSUtil/vd.cpp
index 063fbfe06..0d7f77aaa 100644
--- a/src/DSUtil/vd.cpp
+++ b/src/DSUtil/vd.cpp
@@ -1,5 +1,6 @@
// VirtualDub - Video processing and capture application
-// Copyright (C) 1998-2001 Avery Lee
+// Graphics support library
+// Copyright (C) 1998-2007 Avery Lee
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -16,8 +17,7 @@
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
//
// Notes:
-// - BitBltFromI420ToRGB is from VirtualDub
-// - The core assembly function of CCpuID is from DVD2AVI
+// - VDPixmapBlt is from VirtualDub
// - sse2 yv12 to yuy2 conversion by Haali
// (- vd.cpp/h should be renamed to something more sensible already :)
@@ -27,300 +27,272 @@
#include "vd_asm.h"
#include <intrin.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/system/memory.h>
+
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/Kasumi/pixmapops.h>
+
#pragma warning(disable : 4799) // no emms... blahblahblah
+void VDCPUTest() {
+ SYSTEM_INFO si;
+
+ long lEnableFlags = CPUCheckForExtensions();
+
+ GetSystemInfo(&si);
+
+ if (si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_INTEL)
+ if (si.wProcessorLevel < 4)
+ lEnableFlags &= ~CPUF_SUPPORTS_FPU; // Not strictly true, but very slow anyway
+
+ // Enable FPU support...
+
+ CPUEnableExtensions(lEnableFlags);
+
+ VDFastMemcpyAutodetect();
+}
+
CCpuID g_cpuid;
CCpuID::CCpuID()
-{
- int CPUInfo[4] = {-1};
- __cpuid(CPUInfo, 1);
- int t = CPUInfo[3];
-
- int mflags = 0;
- mflags |= ((t&0x00800000)!=0) ? mmx : 0; // STD MMX
- mflags |= ((t&0x02000000)!=0) ? ssemmx+ssefpu : 0; // STD SSE
- mflags |= ((t&0x04000000)!=0) ? sse2 : 0; // SSE2
+{
+ VDCPUTest();
- t = CPUInfo[2];
- mflags |= ((t&0x00000001)!=0) ? sse3 : 0; // SSE3
+ long lEnableFlags = CPUGetEnabledExtensions();
- // 3dnow
- __cpuid(CPUInfo, 0x80000001);
- t = CPUInfo[3];
- mflags |= ((t&0x80000000)!=0) ? _3dnow : 0; // 3D NOW
- mflags |= ((t&0x00400000)!=0) ? ssemmx : 0; // SSE MMX
+ int flags = 0;
+ flags |= !!(lEnableFlags & CPUF_SUPPORTS_MMX) ? mmx : 0; // STD MMX
+ flags |= !!(lEnableFlags & CPUF_SUPPORTS_INTEGER_SSE) ? ssemmx : 0; // SSE MMX
+ flags |= !!(lEnableFlags & CPUF_SUPPORTS_SSE) ? ssefpu : 0; // STD SSE
+ flags |= !!(lEnableFlags & CPUF_SUPPORTS_SSE2) ? sse2 : 0; // SSE2
+ flags |= !!(lEnableFlags & CPUF_SUPPORTS_3DNOW) ? _3dnow : 0; // 3DNow
// result
- m_flags = (flag_t)mflags;
+ m_flags = (flag_t)flags;
}
-void memcpy_accel(void* dst, const void* src, size_t len)
+bool BitBltFromI420ToI420(int w, int h, BYTE* dsty, BYTE* dstu, BYTE* dstv, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
{
-#ifndef _WIN64
- if((g_cpuid.m_flags & CCpuID::ssefpu) && len >= 128
- && !((DWORD)src&15) && !((DWORD)dst&15))
- {
- __asm
- {
- mov esi, dword ptr [src]
- mov edi, dword ptr [dst]
- mov ecx, len
- shr ecx, 7
- memcpy_accel_sse_loop:
- prefetchnta [esi+16*8]
- movaps xmm0, [esi]
- movaps xmm1, [esi+16*1]
- movaps xmm2, [esi+16*2]
- movaps xmm3, [esi+16*3]
- movaps xmm4, [esi+16*4]
- movaps xmm5, [esi+16*5]
- movaps xmm6, [esi+16*6]
- movaps xmm7, [esi+16*7]
- movntps [edi], xmm0
- movntps [edi+16*1], xmm1
- movntps [edi+16*2], xmm2
- movntps [edi+16*3], xmm3
- movntps [edi+16*4], xmm4
- movntps [edi+16*5], xmm5
- movntps [edi+16*6], xmm6
- movntps [edi+16*7], xmm7
- add esi, 128
- add edi, 128
- dec ecx
- jne memcpy_accel_sse_loop
- mov ecx, len
- and ecx, 127
- cmp ecx, 0
- je memcpy_accel_sse_end
- memcpy_accel_sse_loop2:
- mov dl, byte ptr[esi]
- mov byte ptr[edi], dl
- inc esi
- inc edi
- dec ecx
- jne memcpy_accel_sse_loop2
- memcpy_accel_sse_end:
- emms
- sfence
- }
- }
- else if((g_cpuid.m_flags & CCpuID::mmx) && len >= 64
- && !((DWORD)src&7) && !((DWORD)dst&7))
- {
- __asm
- {
- mov esi, dword ptr [src]
- mov edi, dword ptr [dst]
- mov ecx, len
- shr ecx, 6
- memcpy_accel_mmx_loop:
- movq mm0, qword ptr [esi]
- movq mm1, qword ptr [esi+8*1]
- movq mm2, qword ptr [esi+8*2]
- movq mm3, qword ptr [esi+8*3]
- movq mm4, qword ptr [esi+8*4]
- movq mm5, qword ptr [esi+8*5]
- movq mm6, qword ptr [esi+8*6]
- movq mm7, qword ptr [esi+8*7]
- movq qword ptr [edi], mm0
- movq qword ptr [edi+8*1], mm1
- movq qword ptr [edi+8*2], mm2
- movq qword ptr [edi+8*3], mm3
- movq qword ptr [edi+8*4], mm4
- movq qword ptr [edi+8*5], mm5
- movq qword ptr [edi+8*6], mm6
- movq qword ptr [edi+8*7], mm7
- add esi, 64
- add edi, 64
- loop memcpy_accel_mmx_loop
- mov ecx, len
- and ecx, 63
- cmp ecx, 0
- je memcpy_accel_mmx_end
- memcpy_accel_mmx_loop2:
- mov dl, byte ptr [esi]
- mov byte ptr [edi], dl
- inc esi
- inc edi
- dec ecx
- jne memcpy_accel_mmx_loop2
- memcpy_accel_mmx_end:
- emms
- }
- }
- else
-#endif
- {
- memcpy(dst, src, len);
- }
+ VDPixmap srcbm = {0};
+
+ srcbm.data = srcy;
+ srcbm.pitch = srcpitch;
+ srcbm.w = w;
+ srcbm.h = h;
+ srcbm.format = nsVDPixmap::kPixFormat_YUV420_Planar;
+ srcbm.data2 = srcu;
+ srcbm.pitch2 = srcpitch / 2;
+ srcbm.data3 = srcv;
+ srcbm.pitch3 = srcpitch / 2;
+
+ VDPixmap dstpxm = {0};
+
+ dstpxm.data = dsty;
+ dstpxm.pitch = dstpitch;
+ dstpxm.w = w;
+ dstpxm.h = h;
+ dstpxm.format = nsVDPixmap::kPixFormat_YUV420_Planar;
+ dstpxm.data2 = dstu;
+ dstpxm.pitch2 = dstpitch / 2;
+ dstpxm.data3 = dstv;
+ dstpxm.pitch3 = dstpitch / 2;
+
+ return VDPixmapBlt(dstpxm, srcbm);
}
-static void yuvtoyuy2row_c(BYTE* dst, BYTE* srcy, BYTE* srcu, BYTE* srcv, DWORD width)
+bool BitBltFromYUY2ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* src, int srcpitch)
{
- WORD* dstw = (WORD*)dst;
- for(; width > 1; width -= 2)
- {
- *dstw++ = (*srcu++<<8)|*srcy++;
- *dstw++ = (*srcv++<<8)|*srcy++;
- }
+ VDPixmap srcbm = {0};
+
+ srcbm.data = src;
+ srcbm.pitch = srcpitch;
+ srcbm.w = w;
+ srcbm.h = h;
+ srcbm.format = nsVDPixmap::kPixFormat_YUV422_YUYV;
+
+ VDPixmap dstpxm = {
+ dst,
+ NULL,
+ w,
+ h,
+ dstpitch
+ };
+
+ dstpxm.format = nsVDPixmap::kPixFormat_YUV422_YUYV;
+
+ return VDPixmapBlt(dstpxm, srcbm);
}
-static void yuvtoyuy2row_avg_c(BYTE* dst, BYTE* srcy, BYTE* srcu, BYTE* srcv, DWORD width, DWORD pitchuv)
+bool BitBltFromI420ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
{
- WORD* dstw = (WORD*)dst;
- for(; width > 1; width -= 2, srcu++, srcv++)
- {
- *dstw++ = (((srcu[0]+srcu[pitchuv])>>1)<<8)|*srcy++;
- *dstw++ = (((srcv[0]+srcv[pitchuv])>>1)<<8)|*srcy++;
- }
+ VDPixmap srcbm = {0};
+
+ srcbm.data = srcy;
+ srcbm.pitch = srcpitch;
+ srcbm.w = w;
+ srcbm.h = h;
+ srcbm.format = nsVDPixmap::kPixFormat_YUV420_Planar;
+ srcbm.data2 = srcu;
+ srcbm.pitch2 = srcpitch/2;
+ srcbm.data3 = srcv;
+ srcbm.pitch3 = srcpitch/2;
+
+ VDPixmap dstpxm = {
+ (char *)dst + dstpitch * (h - 1),
+ NULL,
+ w,
+ h,
+ -dstpitch
+ };
+
+ switch(dbpp) {
+ case 16: dstpxm.format = nsVDPixmap::kPixFormat_XRGB1555; break;
+ case 24: dstpxm.format = nsVDPixmap::kPixFormat_RGB888; break;
+ case 32: dstpxm.format = nsVDPixmap::kPixFormat_XRGB8888; break;
+ default:
+ VDASSERT(false);
+ }
+
+ // TODO: check correct conversion work (555->565) when dpp == 16
+
+ return VDPixmapBlt(dstpxm, srcbm);
}
-static void asm_blend_row_clipped_c(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
+bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
{
- BYTE* src2 = src + srcpitch;
- do
+ if(srcpitch == 0) srcpitch = w;
+
+#ifndef _WIN64
+ if((g_cpuid.m_flags & CCpuID::sse2)
+ && !((DWORD_PTR)srcy&15) && !((DWORD_PTR)srcu&15) && !((DWORD_PTR)srcv&15) && !(srcpitch&31)
+ && !((DWORD_PTR)dst&15) && !(dstpitch&15))
{
- *dst++ = (*src++ + *src2++ + 1) >> 1;
- } while(w--);
+ if(w<=0 || h<=0 || (w&1) || (h&1))
+ return(false);
+
+ yv12_yuy2_sse2(srcy, srcu, srcv, srcpitch/2, w/2, h, dst, dstpitch);
+ return(true);
+ }
+#endif
+
+ VDPixmap srcbm = {0};
+
+ srcbm.data = srcy;
+ srcbm.pitch = srcpitch;
+ srcbm.w = w;
+ srcbm.h = h;
+ srcbm.format = nsVDPixmap::kPixFormat_YUV420_Planar;
+ srcbm.data2 = srcu;
+ srcbm.pitch2 = srcpitch/2;
+ srcbm.data3 = srcv;
+ srcbm.pitch3 = srcpitch/2;
+
+ VDPixmap dstpxm = {
+ dst,
+ NULL,
+ w,
+ h,
+ dstpitch
+ };
+
+ dstpxm.format = nsVDPixmap::kPixFormat_YUV422_YUYV;
+
+ return VDPixmapBlt(dstpxm, srcbm);
}
-static void asm_blend_row_c(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
+bool BitBltFromRGBToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch, int sbpp)
{
- BYTE* src2 = src + srcpitch;
- BYTE* src3 = src2 + srcpitch;
- do
- {
- *dst++ = (*src++ + (*src2++ << 1) + *src3++ + 2) >> 2;
- } while(w--);
+ VDPixmap srcbm = {
+ (char *)src + srcpitch * (h - 1),
+ NULL,
+ w,
+ h,
+ -srcpitch
+ };
+
+ switch(dbpp) {
+ case 8: srcbm.format = nsVDPixmap::kPixFormat_Pal8; break;
+ case 16: srcbm.format = nsVDPixmap::kPixFormat_XRGB1555; break;
+ case 24: srcbm.format = nsVDPixmap::kPixFormat_RGB888; break;
+ case 32: srcbm.format = nsVDPixmap::kPixFormat_XRGB8888; break;
+ default:
+ VDASSERT(false);
+ }
+
+ VDPixmap dstpxm = {
+ (char *)dst + dstpitch * (h - 1),
+ NULL,
+ w,
+ h,
+ -dstpitch
+ };
+
+ switch(dbpp) {
+ case 8: dstpxm.format = nsVDPixmap::kPixFormat_Pal8; break;
+ case 16: dstpxm.format = nsVDPixmap::kPixFormat_XRGB1555; break;
+ case 24: dstpxm.format = nsVDPixmap::kPixFormat_RGB888; break;
+ case 32: dstpxm.format = nsVDPixmap::kPixFormat_XRGB8888; break;
+ default:
+ VDASSERT(false);
+ }
+
+ return VDPixmapBlt(dstpxm, srcbm);
}
-bool BitBltFromI420ToI420(int w, int h, BYTE* dsty, BYTE* dstu, BYTE* dstv, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
+bool BitBltFromYUY2ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch)
{
- if((w&1))
- return(false);
-
- if(w > 0 && w == srcpitch && w == dstpitch)
- {
- memcpy_accel(dsty, srcy, h*srcpitch);
- memcpy_accel(dstu, srcu, h/2*srcpitch/2);
- memcpy_accel(dstv, srcv, h/2*srcpitch/2);
- }
- else
- {
- int pitch = min(abs(srcpitch), abs(dstpitch));
-
- for(ptrdiff_t y = 0; y < h; y++, srcy += srcpitch, dsty += dstpitch)
- memcpy_accel(dsty, srcy, pitch);
+ if(srcpitch == 0) srcpitch = w;
- srcpitch >>= 1;
- dstpitch >>= 1;
+ VDPixmap srcbm = {0};
- pitch = min(abs(srcpitch), abs(dstpitch));
+ srcbm.data = src;
+ srcbm.pitch = srcpitch;
+ srcbm.w = w;
+ srcbm.h = h;
+ srcbm.format = nsVDPixmap::kPixFormat_YUV422_YUYV;
- for(ptrdiff_t y = 0; y < h; y+=2, srcu += srcpitch, dstu += dstpitch)
- memcpy_accel(dstu, srcu, pitch);
+ VDPixmap dstpxm = {
+ (char *)dst + dstpitch * (h - 1),
+ NULL,
+ w,
+ h,
+ -dstpitch
+ };
- for(ptrdiff_t y = 0; y < h; y+=2, srcv += srcpitch, dstv += dstpitch)
- memcpy_accel(dstv, srcv, pitch);
+ switch(dbpp) {
+ case 16: dstpxm.format = nsVDPixmap::kPixFormat_XRGB1555; break;
+ case 24: dstpxm.format = nsVDPixmap::kPixFormat_RGB888; break;
+ case 32: dstpxm.format = nsVDPixmap::kPixFormat_XRGB8888; break;
+ default:
+ VDASSERT(false);
}
- return(true);
+ return VDPixmapBlt(dstpxm, srcbm);
}
-bool BitBltFromYUY2ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* src, int srcpitch)
+static void yuvtoyuy2row_c(BYTE* dst, BYTE* srcy, BYTE* srcu, BYTE* srcv, DWORD width)
{
- if(w > 0 && w == srcpitch && w == dstpitch)
- {
- memcpy_accel(dst, src, h*srcpitch);
- }
- else
+ WORD* dstw = (WORD*)dst;
+ for(; width > 1; width -= 2)
{
- int pitch = min(abs(srcpitch), abs(dstpitch));
-
- for(ptrdiff_t y = 0; y < h; y++, src += srcpitch, dst += dstpitch)
- memcpy_accel(dst, src, pitch);
+ *dstw++ = (*srcu++<<8)|*srcy++;
+ *dstw++ = (*srcv++<<8)|*srcy++;
}
-
- return(true);
}
-#ifndef _WIN64
-extern "C" void asm_YUVtoRGB32_row(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB24_row(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB16_row(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB32_row_MMX(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB24_row_MMX(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB16_row_MMX(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB32_row_ISSE(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB24_row_ISSE(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB16_row_ISSE(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-#endif
-
-bool BitBltFromI420ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
+static void yuvtoyuy2row_avg_c(BYTE* dst, BYTE* srcy, BYTE* srcu, BYTE* srcv, DWORD width, DWORD pitchuv)
{
- if(w<=0 || h<=0 || (w&1) || (h&1))
- return(false);
-
-#ifndef _WIN64
- void (*asm_YUVtoRGB_row)(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width) = NULL;;
-
- if((g_cpuid.m_flags & CCpuID::ssefpu) && !(w&7))
- {
- switch(dbpp)
- {
- case 16: asm_YUVtoRGB_row = asm_YUVtoRGB16_row/*_ISSE*/; break; // TODO: fix _ISSE (555->565)
- case 24: asm_YUVtoRGB_row = asm_YUVtoRGB24_row_ISSE; break;
- case 32: asm_YUVtoRGB_row = asm_YUVtoRGB32_row_ISSE; break;
- }
- }
- else if((g_cpuid.m_flags & CCpuID::mmx) && !(w&7))
- {
- switch(dbpp)
- {
- case 16: asm_YUVtoRGB_row = asm_YUVtoRGB16_row/*_MMX*/; break; // TODO: fix _MMX (555->565)
- case 24: asm_YUVtoRGB_row = asm_YUVtoRGB24_row_MMX; break;
- case 32: asm_YUVtoRGB_row = asm_YUVtoRGB32_row_MMX; break;
- }
- }
- else
- {
- switch(dbpp)
- {
- case 16: asm_YUVtoRGB_row = asm_YUVtoRGB16_row; break;
- case 24: asm_YUVtoRGB_row = asm_YUVtoRGB24_row; break;
- case 32: asm_YUVtoRGB_row = asm_YUVtoRGB32_row; break;
- }
- }
-
- if(!asm_YUVtoRGB_row)
- return(false);
-
- do
+ WORD* dstw = (WORD*)dst;
+ for(; width > 1; width -= 2, srcu++, srcv++)
{
- asm_YUVtoRGB_row(dst + dstpitch, dst, srcy + srcpitch, srcy, srcu, srcv, w/2);
-
- dst += 2*dstpitch;
- srcy += srcpitch*2;
- srcu += srcpitch/2;
- srcv += srcpitch/2;
+ *dstw++ = (((srcu[0]+srcu[pitchuv])>>1)<<8)|*srcy++;
+ *dstw++ = (((srcv[0]+srcv[pitchuv])>>1)<<8)|*srcy++;
}
- while(h -= 2);
-
- if(g_cpuid.m_flags & CCpuID::mmx)
- __asm emms
-
- if(g_cpuid.m_flags & CCpuID::ssefpu)
- __asm sfence
-
- return(true);
-#else
- ASSERT(FALSE);
- return(false);
-#endif
}
-bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch, bool fInterlaced)
+bool BitBltFromI420ToYUY2Interlaced(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
{
if(w<=0 || h<=0 || (w&1) || (h&1))
return(false);
@@ -332,16 +304,15 @@ bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYT
#ifndef _WIN64
if((g_cpuid.m_flags & CCpuID::sse2)
- && !((DWORD_PTR)srcy&15) && !((DWORD_PTR)srcu&15) && !((DWORD_PTR)srcv&15) && !(srcpitch&31)
- && !((DWORD_PTR)dst&15) && !(dstpitch&15))
+ && !((DWORD_PTR)srcy&15) && !((DWORD_PTR)srcu&15) && !((DWORD_PTR)srcv&15) && !(srcpitch&31)
+ && !((DWORD_PTR)dst&15) && !(dstpitch&15))
{
- if(!fInterlaced) yv12_yuy2_sse2(srcy, srcu, srcv, srcpitch/2, w/2, h, dst, dstpitch);
- else yv12_yuy2_sse2_interlaced(srcy, srcu, srcv, srcpitch/2, w/2, h, dst, dstpitch);
+ yv12_yuy2_sse2_interlaced(srcy, srcu, srcv, srcpitch/2, w/2, h, dst, dstpitch);
return(true);
}
else
{
- ASSERT(!fInterlaced);
+ ASSERT(FALSE);
}
if((g_cpuid.m_flags & CCpuID::mmx) && !(w&7))
@@ -359,15 +330,16 @@ bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYT
if(!yuvtoyuy2row)
return(false);
+ int halfsrcpitch = srcpitch/2;
do
{
yuvtoyuy2row(dst, srcy, srcu, srcv, w);
- yuvtoyuy2row_avg(dst + dstpitch, srcy + srcpitch, srcu, srcv, w, srcpitch/2);
+ yuvtoyuy2row_avg(dst + dstpitch, srcy + srcpitch, srcu, srcv, w, halfsrcpitch);
dst += 2*dstpitch;
- srcy += srcpitch*2;
- srcu += srcpitch/2;
- srcv += srcpitch/2;
+ srcy += halfsrcpitch;
+ srcu += halfsrcpitch;
+ srcv += halfsrcpitch;
}
while((h -= 2) > 2);
@@ -381,481 +353,3 @@ bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYT
return(true);
}
-
-bool BitBltFromRGBToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch, int sbpp)
-{
- if(dbpp == sbpp)
- {
- int rowbytes = w*dbpp>>3;
-
- if(rowbytes > 0 && rowbytes == srcpitch && rowbytes == dstpitch)
- {
- memcpy_accel(dst, src, h*rowbytes);
- }
- else
- {
- for(ptrdiff_t y = 0; y < h; y++, src += srcpitch, dst += dstpitch)
- memcpy_accel(dst, src, rowbytes);
- }
-
- return(true);
- }
-
- if(sbpp != 16 && sbpp != 24 && sbpp != 32
- || dbpp != 16 && dbpp != 24 && dbpp != 32)
- return(false);
-
- if(dbpp == 16)
- {
- for(ptrdiff_t y = 0; y < h; y++, src += srcpitch, dst += dstpitch)
- {
- if(sbpp == 24)
- {
- BYTE* s = (BYTE*)src;
- WORD* d = (WORD*)dst;
- for(ptrdiff_t x = 0; x < w; x++, s+=3, d++)
- *d = (WORD)(((*((DWORD*)s)>>8)&0xf800)|((*((DWORD*)s)>>5)&0x07e0)|((*((DWORD*)s)>>3)&0x1f));
- }
- else if(sbpp == 32)
- {
- DWORD* s = (DWORD*)src;
- WORD* d = (WORD*)dst;
- for(ptrdiff_t x = 0; x < w; x++, s++, d++)
- *d = (WORD)(((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x1f));
- }
- }
- }
- else if(dbpp == 24)
- {
- for(ptrdiff_t y = 0; y < h; y++, src += srcpitch, dst += dstpitch)
- {
- if(sbpp == 16)
- {
- WORD* s = (WORD*)src;
- BYTE* d = (BYTE*)dst;
- for(ptrdiff_t x = 0; x < w; x++, s++, d+=3)
- { // not tested, r-g-b might be in reverse
- d[0] = (*s&0x001f)<<3;
- d[1] = (*s&0x07e0)<<5;
- d[2] = (*s&0xf800)<<8;
- }
- }
- else if(sbpp == 32)
- {
- BYTE* s = (BYTE*)src;
- BYTE* d = (BYTE*)dst;
- for(ptrdiff_t x = 0; x < w; x++, s+=4, d+=3)
- {d[0] = s[0]; d[1] = s[1]; d[2] = s[2];}
- }
- }
- }
- else if(dbpp == 32)
- {
- for(ptrdiff_t y = 0; y < h; y++, src += srcpitch, dst += dstpitch)
- {
- if(sbpp == 16)
- {
- WORD* s = (WORD*)src;
- DWORD* d = (DWORD*)dst;
- for(ptrdiff_t x = 0; x < w; x++, s++, d++)
- *d = ((*s&0xf800)<<8)|((*s&0x07e0)<<5)|((*s&0x001f)<<3);
- }
- else if(sbpp == 24)
- {
- BYTE* s = (BYTE*)src;
- DWORD* d = (DWORD*)dst;
- for(ptrdiff_t x = 0; x < w; x++, s+=3, d++)
- *d = *((DWORD*)s)&0xffffff;
- }
- }
- }
-
- return(true);
-}
-
-void DeinterlaceBlend(BYTE* dst, BYTE* src, DWORD rowbytes, DWORD h, DWORD dstpitch, DWORD srcpitch)
-{
- void (*blend_row_clipped)(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch) = NULL;
- void (*blend_row)(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch) = NULL;
-
-#ifndef _WIN64
- if((g_cpuid.m_flags & CCpuID::sse2) && !((DWORD)src&0xf) && !((DWORD)dst&0xf) && !(srcpitch&0xf))
- {
- blend_row_clipped = asm_blend_row_clipped_SSE2;
- blend_row = asm_blend_row_SSE2;
- }
- else if(g_cpuid.m_flags & CCpuID::mmx)
- {
- blend_row_clipped = asm_blend_row_clipped_MMX;
- blend_row = asm_blend_row_MMX;
- }
- else
-#endif
- {
- blend_row_clipped = asm_blend_row_clipped_c;
- blend_row = asm_blend_row_c;
- }
-
- if(!blend_row_clipped)
- return;
-
- blend_row_clipped(dst, src, rowbytes, srcpitch);
-
- if((h -= 2) > 0) do
- {
- dst += dstpitch;
- blend_row(dst, src, rowbytes, srcpitch);
- src += srcpitch;
- }
- while(--h);
-
- blend_row_clipped(dst + dstpitch, src, rowbytes, srcpitch);
-
-#ifndef _WIN64
- if(g_cpuid.m_flags & CCpuID::mmx)
- __asm emms
-#endif
-}
-
-void DeinterlaceBob(BYTE* dst, BYTE* src, DWORD rowbytes, DWORD h, DWORD dstpitch, DWORD srcpitch, bool topfield)
-{
- if(topfield)
- {
- BitBltFromRGBToRGB(rowbytes, h/2, dst, dstpitch*2, 8, src, srcpitch*2, 8);
- AvgLines8(dst, h, dstpitch);
- }
- else
- {
- BitBltFromRGBToRGB(rowbytes, h/2, dst + dstpitch, dstpitch*2, 8, src + srcpitch, srcpitch*2, 8);
- AvgLines8(dst + dstpitch, h-1, dstpitch);
- }
-}
-
-void AvgLines8(BYTE* dst, DWORD h, DWORD pitch)
-{
- if(h <= 1)
- return;
-
- BYTE* s = dst;
- BYTE* d = dst + (h-2)*pitch;
-
- for(; s < d; s += pitch*2)
- {
- BYTE* tmp = s;
-
-#ifndef _WIN64
- if((g_cpuid.m_flags & CCpuID::sse2) && !((DWORD)tmp&0xf) && !((DWORD)pitch&0xf))
- {
- __asm
- {
- mov esi, tmp
- mov ebx, pitch
-
- mov ecx, ebx
- shr ecx, 4
-
-AvgLines8_sse2_loop:
- movdqa xmm0, [esi]
- pavgb xmm0, [esi+ebx*2]
- movdqa [esi+ebx], xmm0
- add esi, 16
-
- dec ecx
- jnz AvgLines8_sse2_loop
-
- mov tmp, esi
- }
-
- for(ptrdiff_t i = pitch&7; i--; tmp++)
- {
- tmp[pitch] = (tmp[0] + tmp[pitch<<1] + 1) >> 1;
- }
- }
- else if(g_cpuid.m_flags & CCpuID::mmx)
- {
- __asm
- {
- mov esi, tmp
- mov ebx, pitch
-
- mov ecx, ebx
- shr ecx, 3
-
- pxor mm7, mm7
-AvgLines8_mmx_loop:
- movq mm0, [esi]
- movq mm1, mm0
-
- punpcklbw mm0, mm7
- punpckhbw mm1, mm7
-
- movq mm2, [esi+ebx*2]
- movq mm3, mm2
-
- punpcklbw mm2, mm7
- punpckhbw mm3, mm7
-
- paddw mm0, mm2
- psrlw mm0, 1
-
- paddw mm1, mm3
- psrlw mm1, 1
-
- packuswb mm0, mm1
-
- movq [esi+ebx], mm0
-
- lea esi, [esi+8]
-
- dec ecx
- jnz AvgLines8_mmx_loop
-
- mov tmp, esi
- }
-
- for(ptrdiff_t i = pitch&7; i--; tmp++)
- {
- tmp[pitch] = (tmp[0] + tmp[pitch<<1] + 1) >> 1;
- }
- }
- else
-#endif
- {
- for(ptrdiff_t i = pitch; i--; tmp++)
- {
- tmp[pitch] = (tmp[0] + tmp[pitch<<1] + 1) >> 1;
- }
- }
- }
-
- if(!(h&1) && h >= 2)
- {
- dst += (h-2)*pitch;
- memcpy_accel(dst + pitch, dst, pitch);
- }
-
-#ifndef _WIN64
- __asm emms;
-#endif
-}
-
-void AvgLines555(BYTE* dst, DWORD h, DWORD pitch)
-{
- if(h <= 1)
- return;
-
- unsigned __int64 __0x03e003e003e003e0 = 0x03e003e003e003e0;
- unsigned __int64 __0x001f001f001f001f = 0x001f001f001f001f;
-
- BYTE* s = dst;
- BYTE* d = dst + (h-2)*pitch;
-
- for(; s < d; s += pitch*2)
- {
- BYTE* tmp = s;
-
-#ifndef _WIN64
- __asm
- {
- mov esi, tmp
- mov ebx, pitch
-
- mov ecx, ebx
- shr ecx, 3
-
- movq mm6, __0x03e003e003e003e0
- movq mm7, __0x001f001f001f001f
-
-AvgLines555_loop:
- movq mm0, [esi]
- movq mm1, mm0
- movq mm2, mm0
-
- psrlw mm0, 10 // red1 bits: mm0 = 001f001f001f001f
- pand mm1, mm6 // green1 bits: mm1 = 03e003e003e003e0
- pand mm2, mm7 // blue1 bits: mm2 = 001f001f001f001f
-
- movq mm3, [esi+ebx*2]
- movq mm4, mm3
- movq mm5, mm3
-
- psrlw mm3, 10 // red2 bits: mm3 = 001f001f001f001f
- pand mm4, mm6 // green2 bits: mm4 = 03e003e003e003e0
- pand mm5, mm7 // blue2 bits: mm5 = 001f001f001f001f
-
- paddw mm0, mm3
- psrlw mm0, 1 // (red1+red2)/2
- psllw mm0, 10 // red bits at 7c007c007c007c00
-
- paddw mm1, mm4
- psrlw mm1, 1 // (green1+green2)/2
- pand mm1, mm6 // green bits at 03e003e003e003e0
-
- paddw mm2, mm5
- psrlw mm2, 1 // (blue1+blue2)/2
- // blue bits at 001f001f001f001f (no need to pand, lower bits were discareded)
-
- por mm0, mm1
- por mm0, mm2
-
- movq [esi+ebx], mm0
-
- lea esi, [esi+8]
-
- dec ecx
- jnz AvgLines555_loop
-
- mov tmp, esi
- }
-#endif
-
- for(ptrdiff_t i = (pitch&7)>>1; i--; tmp++)
- {
- tmp[pitch] =
- ((((*tmp&0x7c00) + (tmp[pitch<<1]&0x7c00)) >> 1)&0x7c00)|
- ((((*tmp&0x03e0) + (tmp[pitch<<1]&0x03e0)) >> 1)&0x03e0)|
- ((((*tmp&0x001f) + (tmp[pitch<<1]&0x001f)) >> 1)&0x001f);
- }
- }
-
- if(!(h&1) && h >= 2)
- {
- dst += (h-2)*pitch;
- memcpy_accel(dst + pitch, dst, pitch);
- }
-
-#ifndef _WIN64
- __asm emms;
-#endif
-}
-
-void AvgLines565(BYTE* dst, DWORD h, DWORD pitch)
-{
- if(h <= 1)
- return;
-
- unsigned __int64 __0x07e007e007e007e0 = 0x07e007e007e007e0;
- unsigned __int64 __0x001f001f001f001f = 0x001f001f001f001f;
-
- BYTE* s = dst;
- BYTE* d = dst + (h-2)*pitch;
-
- for(; s < d; s += pitch*2)
- {
- WORD* tmp = (WORD*)s;
-
-#ifndef _WIN64
- __asm
- {
- mov esi, tmp
- mov ebx, pitch
-
- mov ecx, ebx
- shr ecx, 3
-
- movq mm6, __0x07e007e007e007e0
- movq mm7, __0x001f001f001f001f
-
-AvgLines565_loop:
- movq mm0, [esi]
- movq mm1, mm0
- movq mm2, mm0
-
- psrlw mm0, 11 // red1 bits: mm0 = 001f001f001f001f
- pand mm1, mm6 // green1 bits: mm1 = 07e007e007e007e0
- pand mm2, mm7 // blue1 bits: mm2 = 001f001f001f001f
-
- movq mm3, [esi+ebx*2]
- movq mm4, mm3
- movq mm5, mm3
-
- psrlw mm3, 11 // red2 bits: mm3 = 001f001f001f001f
- pand mm4, mm6 // green2 bits: mm4 = 07e007e007e007e0
- pand mm5, mm7 // blue2 bits: mm5 = 001f001f001f001f
-
- paddw mm0, mm3
- psrlw mm0, 1 // (red1+red2)/2
- psllw mm0, 11 // red bits at f800f800f800f800
-
- paddw mm1, mm4
- psrlw mm1, 1 // (green1+green2)/2
- pand mm1, mm6 // green bits at 03e003e003e003e0
-
- paddw mm2, mm5
- psrlw mm2, 1 // (blue1+blue2)/2
- // blue bits at 001f001f001f001f (no need to pand, lower bits were discareded)
-
- por mm0, mm1
- por mm0, mm2
-
- movq [esi+ebx], mm0
-
- lea esi, [esi+8]
-
- dec ecx
- jnz AvgLines565_loop
-
- mov tmp, esi
- }
-#else
- for(ptrdiff_t wd=(pitch>>3);wd--;tmp++)
- {
- tmp[0] =
- ((((*tmp&0xf800) + (tmp[pitch<<1]&0xf800)) >> 1)&0xf800)|
- ((((*tmp&0x07e0) + (tmp[pitch<<1]&0x07e0)) >> 1)&0x07e0)|
- ((((*tmp&0x001f) + (tmp[pitch<<1]&0x001f)) >> 1)&0x001f);
- }
-#endif
-
- for(ptrdiff_t i = (pitch&7)>>1; i--; tmp++)
- {
- tmp[pitch] =
- ((((*tmp&0xf800) + (tmp[pitch<<1]&0xf800)) >> 1)&0xf800)|
- ((((*tmp&0x07e0) + (tmp[pitch<<1]&0x07e0)) >> 1)&0x07e0)|
- ((((*tmp&0x001f) + (tmp[pitch<<1]&0x001f)) >> 1)&0x001f);
- }
- }
-
- if(!(h&1) && h >= 2)
- {
- dst += (h-2)*pitch;
- memcpy_accel(dst + pitch, dst, pitch);
- }
-
-#ifndef _WIN64
- __asm emms;
-#endif
-}
-
-#ifndef _WIN64
-extern "C" void mmx_YUY2toRGB24(const BYTE* src, BYTE* dst, const BYTE* src_end, int src_pitch, int row_size, bool rec709);
-extern "C" void mmx_YUY2toRGB32(const BYTE* src, BYTE* dst, const BYTE* src_end, int src_pitch, int row_size, bool rec709);
-#endif
-
-bool BitBltFromYUY2ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch)
-{
- void (* YUY2toRGB)(const BYTE* src, BYTE* dst, const BYTE* src_end, int src_pitch, int row_size, bool rec709) = NULL;
-
-#ifndef _WIN64
- if(g_cpuid.m_flags & CCpuID::mmx)
- {
- YUY2toRGB =
- dbpp == 32 ? mmx_YUY2toRGB32 :
- dbpp == 24 ? mmx_YUY2toRGB24 :
- // dbpp == 16 ? mmx_YUY2toRGB16 : // TODO
- NULL;
- }
- else
-#endif
- {
- ASSERT(FALSE);
- // TODO
- }
-
- if(!YUY2toRGB)
- return(false);
-
- YUY2toRGB(src, dst, src + h*srcpitch, srcpitch, w, false);
-
- return(true);
-}
diff --git a/src/DSUtil/vd.h b/src/DSUtil/vd.h
index a69e406c0..0db586cec 100644
--- a/src/DSUtil/vd.h
+++ b/src/DSUtil/vd.h
@@ -1,5 +1,6 @@
// VirtualDub - Video processing and capture application
-// Copyright (C) 1998-2001 Avery Lee
+// Graphics support library
+// Copyright (C) 1998-2007 Avery Lee
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -16,25 +17,22 @@
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
//
// Notes:
-// - BitBltFromI420ToRGB is from VirtualDub
-// - BitBltFromYUY2ToRGB is from AviSynth 2.52
+// - VDPixmapBlt is from VirtualDub
+// - sse2 yv12 to yuy2 conversion by Haali
// (- vd.cpp/h should be renamed to something more sensible already :)
#pragma once
-class CCpuID {public: CCpuID(); enum flag_t {mmx=1, ssemmx=2, ssefpu=4, sse2=8, _3dnow=16, sse3=32} m_flags;};
+class CCpuID {public: CCpuID(); enum flag_t {mmx=1, ssemmx=2, ssefpu=4, sse2=8, _3dnow=16} m_flags;};
extern CCpuID g_cpuid;
extern bool BitBltFromI420ToI420(int w, int h, BYTE* dsty, BYTE* dstu, BYTE* dstv, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch);
-extern bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch, bool fInterlaced = false);
+extern bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch);
+extern bool BitBltFromI420ToYUY2Interlaced(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch);
extern bool BitBltFromI420ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch /* TODO: , bool fInterlaced = false */);
extern bool BitBltFromYUY2ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* src, int srcpitch);
extern bool BitBltFromYUY2ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch);
extern bool BitBltFromRGBToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch, int sbpp);
extern void DeinterlaceBlend(BYTE* dst, BYTE* src, DWORD rowbytes, DWORD h, DWORD dstpitch, DWORD srcpitch);
-extern void DeinterlaceBob(BYTE* dst, BYTE* src, DWORD rowbytes, DWORD h, DWORD dstpitch, DWORD srcpitch, bool topfield);
-
-extern void AvgLines8(BYTE* dst, DWORD h, DWORD pitch);
-extern void AvgLines555(BYTE* dst, DWORD h, DWORD pitch);
-extern void AvgLines565(BYTE* dst, DWORD h, DWORD pitch); \ No newline at end of file
+extern void DeinterlaceBob(BYTE* dst, BYTE* src, DWORD rowbytes, DWORD h, DWORD dstpitch, DWORD srcpitch, bool topfield); \ No newline at end of file
diff --git a/src/DSUtil/vd_asm.cpp b/src/DSUtil/vd_asm.cpp
index 851449089..3fc521844 100644
--- a/src/DSUtil/vd_asm.cpp
+++ b/src/DSUtil/vd_asm.cpp
@@ -1,5 +1,6 @@
// VirtualDub - Video processing and capture application
-// Copyright (C) 1998-2001 Avery Lee
+// Graphics support library
+// Copyright (C) 1998-2007 Avery Lee
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -16,7 +17,7 @@
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
//
// Notes:
-// - BitBltFromI420ToRGB is from VirtualDub
+// - VDPixmapBlt is from VirtualDub
// - sse2 yv12 to yuy2 conversion by Haali
// (- vd.cpp/h should be renamed to something more sensible already :)
@@ -428,289 +429,4 @@ last4:
ret
};
}
-
-void __declspec(naked) asm_blend_row_clipped_MMX(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
-{
- static const __int64 _x0001000100010001 = 0x0001000100010001;
-
- __asm {
- push ebp
- push edi
- push esi
- push ebx
-
- mov edi,[esp+20]
- mov esi,[esp+24]
- sub edi,esi
- mov ebp,[esp+28]
- mov edx,[esp+32]
-
- shr ebp, 3
-
- movq mm6, _x0001000100010001
- pxor mm7, mm7
-
-xloop:
- movq mm0, [esi]
- movq mm3, mm0
- punpcklbw mm0, mm7
- punpckhbw mm3, mm7
-
- movq mm1, [esi+edx]
- movq mm4, mm1
- punpcklbw mm1, mm7
- punpckhbw mm4, mm7
-
- paddw mm1, mm0
- paddw mm1, mm6
- psrlw mm1, 1
-
- paddw mm4, mm3
- paddw mm4, mm6
- psrlw mm4, 1
-
- add esi, 8
- packuswb mm1, mm4
- movq [edi+esi-8], mm1
-
- dec ebp
- jne xloop
-
- pop ebx
- pop esi
- pop edi
- pop ebp
- ret
- };
-}
-
-void __declspec(naked) asm_blend_row_MMX(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
-{
- static const __int64 mask0 = 0xfcfcfcfcfcfcfcfci64;
- static const __int64 mask1 = 0x7f7f7f7f7f7f7f7fi64;
- static const __int64 mask2 = 0x3f3f3f3f3f3f3f3fi64;
- static const __int64 _x0002000200020002 = 0x0002000200020002;
-
- __asm {
- push ebp
- push edi
- push esi
- push ebx
-
- mov edi, [esp+20]
- mov esi, [esp+24]
- sub edi, esi
- mov ebp, [esp+28]
- mov edx, [esp+32]
-
- shr ebp, 3
-
- movq mm6, _x0002000200020002
- pxor mm7, mm7
-
-xloop:
- movq mm0, [esi]
- movq mm3, mm0
- punpcklbw mm0, mm7
- punpckhbw mm3, mm7
-
- movq mm1, [esi+edx]
- movq mm4, mm1
- punpcklbw mm1, mm7
- punpckhbw mm4, mm7
-
- movq mm2, [esi+edx*2]
- movq mm5, mm2
- punpcklbw mm2, mm7
- punpckhbw mm5, mm7
-
- psllw mm1, 1
- paddw mm1, mm0
- paddw mm1, mm2
- paddw mm1, mm6
- psrlw mm1, 2
-
- psllw mm4, 1
- paddw mm4, mm3
- paddw mm4, mm5
- paddw mm4, mm6
- psrlw mm4, 2
-
- add esi, 8
- packuswb mm1, mm4
- movq [edi+esi-8], mm1
-
- dec ebp
- jne xloop
-
- // sadly the original code makes a lot of visible banding artifacts on yuv
- // (it seems those shiftings without rounding introduce too much error)
-/*
- mov edi,[esp+20]
- mov esi,[esp+24]
- sub edi,esi
- mov ebp,[esp+28]
- mov edx,[esp+32]
-
- movq mm5,mask0
- movq mm6,mask1
- movq mm7,mask2
- shr ebp,1
- jz oddpart
-
-xloop:
- movq mm2,[esi]
- movq mm0,mm5
-
- movq mm1,[esi+edx]
- pand mm0,mm2
-
- psrlq mm1,1
- movq mm2,[esi+edx*2]
-
- psrlq mm2,2
- pand mm1,mm6
-
- psrlq mm0,2
- pand mm2,mm7
-
- paddb mm0,mm1
- add esi,8
-
- paddb mm0,mm2
- dec ebp
-
- movq [edi+esi-8],mm0
- jne xloop
-
-oddpart:
- test byte ptr [esp+28],1
- jz nooddpart
-
- mov ecx,[esi]
- mov eax,0fcfcfcfch
- mov ebx,[esi+edx]
- and eax,ecx
- shr ebx,1
- mov ecx,[esi+edx*2]
- shr ecx,2
- and ebx,07f7f7f7fh
- shr eax,2
- and ecx,03f3f3f3fh
- add eax,ebx
- add eax,ecx
- mov [edi+esi],eax
-
-nooddpart:
-*/
- pop ebx
- pop esi
- pop edi
- pop ebp
- ret
- };
-}
-
-__declspec(align(16)) static BYTE const_1_16_bytes[] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
-
-void asm_blend_row_SSE2(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
-{
- __asm
- {
- mov edx, srcpitch
- mov esi, src
- mov edi, dst
- sub edi, esi
- mov ecx, w
- mov ebx, ecx
- shr ecx, 4
- and ebx, 15
-
- movdqa xmm7, [const_1_16_bytes]
-
-asm_blend_row_SSE2_loop:
- movdqa xmm0, [esi]
- movdqa xmm1, [esi+edx]
- movdqa xmm2, [esi+edx*2]
- pavgb xmm0, xmm1
- pavgb xmm2, xmm1
- psubusb xmm0, xmm7
- pavgb xmm0, xmm2
- movdqa [esi+edi], xmm0
- add esi, 16
- dec ecx
- jnz asm_blend_row_SSE2_loop
-
- test ebx,15
- jz asm_blend_row_SSE2_end
-
- mov ecx, ebx
- xor ax, ax
- xor bx, bx
- xor dx, dx
-asm_blend_row_SSE2_loop2:
- mov al, [esi]
- mov bl, [esi+edx]
- mov dl, [esi+edx*2]
- add ax, bx
- inc ax
- shr ax, 1
- add dx, bx
- inc dx
- shr dx, 1
- add ax, dx
- shr ax, 1
- mov [esi+edi], al
- inc esi
- dec ecx
- jnz asm_blend_row_SSE2_loop2
-
-asm_blend_row_SSE2_end:
- }
-}
-
-void asm_blend_row_clipped_SSE2(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
-{
- __asm
- {
- mov edx, srcpitch
- mov esi, src
- mov edi, dst
- sub edi, esi
- mov ecx, w
- mov ebx, ecx
- shr ecx, 4
- and ebx, 15
-
- movdqa xmm7, [const_1_16_bytes]
-
-asm_blend_row_clipped_SSE2_loop:
- movdqa xmm0, [esi]
- movdqa xmm1, [esi+edx]
- pavgb xmm0, xmm1
- movdqa [esi+edi], xmm0
- add esi, 16
- dec ecx
- jnz asm_blend_row_clipped_SSE2_loop
-
- test ebx,15
- jz asm_blend_row_clipped_SSE2_end
-
- mov ecx, ebx
- xor ax, ax
- xor bx, bx
-asm_blend_row_clipped_SSE2_loop2:
- mov al, [esi]
- mov bl, [esi+edx]
- add ax, bx
- inc ax
- shr ax, 1
- mov [esi+edi], al
- inc esi
- dec ecx
- jnz asm_blend_row_clipped_SSE2_loop2
-
-asm_blend_row_clipped_SSE2_end:
- }
-}
#endif
diff --git a/src/DSUtil/vd_asm.h b/src/DSUtil/vd_asm.h
index c1c78f39b..7c1f2f134 100644
--- a/src/DSUtil/vd_asm.h
+++ b/src/DSUtil/vd_asm.h
@@ -1,5 +1,6 @@
// VirtualDub - Video processing and capture application
-// Copyright (C) 1998-2001 Avery Lee
+// Graphics support library
+// Copyright (C) 1998-2007 Avery Lee
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -16,8 +17,7 @@
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
//
// Notes:
-// - BitBltFromI420ToRGB is from VirtualDub
-// - BitBltFromYUY2ToRGB is from AviSynth 2.52
+// - VDPixmapBlt is from VirtualDub
// (- vd.cpp/h should be renamed to something more sensible already :)
#pragma once
@@ -31,9 +31,4 @@ void yv12_yuy2_row_sse2_linear();
void yv12_yuy2_row_sse2_linear_interlaced();
void yv12_yuy2_sse2(const BYTE *Y, const BYTE *U, const BYTE *V, int halfstride, unsigned halfwidth, unsigned height, BYTE *YUY2, int d_stride);
void yv12_yuy2_sse2_interlaced(const BYTE *Y, const BYTE *U, const BYTE *V, int halfstride, unsigned halfwidth, unsigned height, BYTE *YUY2, int d_stride);
-
-void asm_blend_row_clipped_MMX(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch);
-void asm_blend_row_MMX(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch);
-void asm_blend_row_SSE2(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch);
-void asm_blend_row_clipped_SSE2(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch);
#endif