DSUtil now use new VirtualDub libraries (SSE2 deinterlacing for MPEG2 decoder)

AudioSwitcher rare memory corruption fixed git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@1907 10f7b99b-c216-0410-bff0-8a66a9350fd8
author: kinddragon <kinddragon@users.sourceforge.net> 2010-05-21 04:53:52 +0400
committer: kinddragon <kinddragon@users.sourceforge.net> 2010-05-21 04:53:52 +0400
commit: 37f62abd654047d060c86d6c76cd2f6862f89b94 (patch)
tree: 83eb125bd86f8a685928e290e2ec929ce633bc53 /src
parent: dae6425e0c23576dac77c3afae1dc6de22f983d5 (diff)
193 files changed, 51711 insertions, 1178 deletions
diff --git a/src/DSUtil/deinterlace.cpp b/src/DSUtil/deinterlace.cpp
new file mode 100644
index 000000000..a66915dfd
--- /dev/null
+++ b/src/DSUtil/deinterlace.cpp
@@ -0,0 +1,526 @@
+//	VirtualDub - Video processing and capture application
+//	Copyright (C) 1998-2001 Avery Lee
+//
+//	This program is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	This program is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with this program; if not, write to the Free Software
+//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#include "stdafx.h"
+#include <emmintrin.h>
+#include <vd2/system/cpuaccel.h>
+
+#define uint8	unsigned char
+#define uint32	unsigned int
+#define uint64	unsigned __int64
+
+#ifdef _M_IX86
+#define VD_CPU_X86
+#endif
+
+#ifdef _M_X64
+#define VD_CPU_AMD64
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+
+#pragma warning(disable: 4799)		// warning C4799: function has no EMMS instruction
+
+///////////////////////////////////////////////////////////////////////////
+
+#ifdef _M_IX86
+static void __declspec(naked) asm_blend_row_clipped(void *dst, const void *src, uint32 w, ptrdiff_t srcpitch) {
+	__asm {
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		edi,[esp+20]
+		mov		esi,[esp+24]
+		sub		edi,esi
+		mov		ebp,[esp+28]
+		mov		edx,[esp+32]
+
+xloop:
+		mov		ecx,[esi]
+		mov		eax,0fefefefeh
+
+		mov		ebx,[esi+edx]
+		and		eax,ecx
+
+		shr		eax,1
+		and		ebx,0fefefefeh
+
+		shr		ebx,1
+		add		esi,4
+
+		add		eax,ebx
+		dec		ebp
+
+		mov		[edi+esi-4],eax
+		jnz		xloop
+
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+		ret
+	};
+}
+
+static void __declspec(naked) asm_blend_row(void *dst, const void *src, uint32 w, ptrdiff_t srcpitch) {
+	__asm {
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		edi,[esp+20]
+		mov		esi,[esp+24]
+		sub		edi,esi
+		mov		ebp,[esp+28]
+		mov		edx,[esp+32]
+
+xloop:
+		mov		ecx,[esi]
+		mov		eax,0fcfcfcfch
+
+		mov		ebx,[esi+edx]
+		and		eax,ecx
+
+		shr		ebx,1
+		mov		ecx,[esi+edx*2]
+
+		shr		ecx,2
+		and		ebx,07f7f7f7fh
+
+		shr		eax,2
+		and		ecx,03f3f3f3fh
+
+		add		eax,ebx
+		add		esi,4
+
+		add		eax,ecx
+		dec		ebp
+
+		mov		[edi+esi-4],eax
+		jnz		xloop
+
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+		ret
+	};
+}
+
+static void __declspec(naked) asm_blend_row_MMX(void *dst, const void *src, uint32 w, ptrdiff_t srcpitch) {
+	static const __declspec(align(8)) __int64 mask0 = 0xfcfcfcfcfcfcfcfci64;
+	static const __declspec(align(8)) __int64 mask1 = 0x7f7f7f7f7f7f7f7fi64;
+	static const __declspec(align(8)) __int64 mask2 = 0x3f3f3f3f3f3f3f3fi64;
+	__asm {
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		edi,[esp+20]
+		mov		esi,[esp+24]
+		sub		edi,esi
+		mov		ebp,[esp+28]
+		mov		edx,[esp+32]
+
+		movq	mm5,mask0
+		movq	mm6,mask1
+		movq	mm7,mask2
+		inc		ebp
+		shr		ebp,1
+xloop:
+		movq	mm2,[esi]
+		movq	mm0,mm5
+
+		movq	mm1,[esi+edx]
+		pand	mm0,mm2
+
+		psrlq	mm1,1
+		movq	mm2,[esi+edx*2]
+
+		psrlq	mm2,2
+		pand	mm1,mm6
+
+		psrlq	mm0,2
+		pand	mm2,mm7
+
+		paddb	mm0,mm1
+		add		esi,8
+
+		paddb	mm0,mm2
+		dec		ebp
+
+		movq	[edi+esi-8],mm0
+		jne		xloop
+
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+		ret
+	};
+}
+
+static void __declspec(naked) asm_blend_row_ISSE(void *dst, const void *src, uint32 w, ptrdiff_t srcpitch) {
+	__asm {
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		edi,[esp+20]
+		mov		esi,[esp+24]
+		sub		edi,esi
+		mov		ebp,[esp+28]
+		mov		edx,[esp+32]
+
+		inc		ebp
+		shr		ebp,1
+		pcmpeqb	mm7, mm7
+
+		align	16
+xloop:
+		movq	mm0, [esi]
+		movq	mm2, mm7
+		pxor	mm0, mm7
+
+		pxor	mm2, [esi+edx*2]
+		pavgb	mm0, mm2
+		pxor	mm0, mm7
+
+		pavgb	mm0, [esi+edx]
+		add		esi,8
+
+		movq	[edi+esi-8],mm0
+		dec		ebp
+		jne		xloop
+
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+		ret
+	};
+}
+#else
+static void asm_blend_row_clipped(void *dst0, const void *src0, uint32 w, ptrdiff_t srcpitch) {
+	uint32 *dst = (uint32 *)dst0;
+	const uint32 *src = (const uint32 *)src0;
+	const uint32 *src2 = (const uint32 *)((const char *)src + srcpitch);
+
+	do {
+		const uint32 x = *src++;
+		const uint32 y = *src2++;
+
+		*dst++ = (x|y) - (((x^y)&0xfefefefe)>>1);
+	} while(--w);
+}
+
+static void asm_blend_row(void *dst0, const void *src0, uint32 w, ptrdiff_t srcpitch) {
+	uint32 *dst = (uint32 *)dst0;
+	const uint32 *src = (const uint32 *)src0;
+	const uint32 *src2 = (const uint32 *)((const char *)src + srcpitch);
+	const uint32 *src3 = (const uint32 *)((const char *)src2 + srcpitch);
+
+	do {
+		const uint32 a = *src++;
+		const uint32 b = *src2++;
+		const uint32 c = *src3++;
+		const uint32 hi = (a & 0xfcfcfc) + 2*(b & 0xfcfcfc) + (c & 0xfcfcfc);
+		const uint32 lo = (a & 0x030303) + 2*(b & 0x030303) + (c & 0x030303) + 0x020202;
+
+		*dst++ = (hi + (lo & 0x0c0c0c))>>2;
+	} while(--w);
+}
+#endif
+
+#if defined(VD_CPU_X86) || defined(VD_CPU_AMD64)
+	static void asm_blend_row_SSE2(void *dst, const void *src, uint32 w, ptrdiff_t srcpitch) {
+		__m128i zero = _mm_setzero_si128();
+		__m128i inv = _mm_cmpeq_epi8(zero, zero);
+
+		w = (w + 3) >> 2;
+
+		const __m128i *src1 = (const __m128i *)src;
+		const __m128i *src2 = (const __m128i *)((const char *)src + srcpitch);
+		const __m128i *src3 = (const __m128i *)((const char *)src + srcpitch*2);
+		__m128i *dstrow = (__m128i *)dst;
+		do {
+			__m128i a = *src1++;
+			__m128i b = *src2++;
+			__m128i c = *src3++;
+
+			*dstrow++ = _mm_avg_epu8(_mm_xor_si128(_mm_avg_epu8(_mm_xor_si128(a, inv), _mm_xor_si128(c, inv)), inv), b);
+		} while(--w);
+	}
+
+#endif
+
+namespace {
+
+	void Average_scalar(void *dst, ptrdiff_t dstPitch, const void *src1, const void *src2, ptrdiff_t srcPitch, uint32 w16, uint32 h) {
+		uint32 w4 = w16 << 2;
+		do {
+			uint32 *dstv = (uint32 *)dst;
+			uint32 *src1v = (uint32 *)src1;
+			uint32 *src2v = (uint32 *)src2;
+
+			for(uint32 i=0; i<w4; ++i) {
+				uint32 a = src1v[i];
+				uint32 b = src2v[i];
+
+				dstv[i] = (a|b) - (((a^b) & 0xfefefefe) >> 1);
+			}
+
+			dst = (char *)dst + dstPitch;
+			src1 = (char *)src1 + srcPitch;
+			src2 = (char *)src2 + srcPitch;
+		} while(--h);
+	}
+
+#if defined(VD_CPU_X86)
+	void __declspec(naked) __cdecl Average_MMX(void *dst, ptrdiff_t dstPitch, const void *src1, const void *src2, ptrdiff_t srcPitch, uint32 w16, uint32 h) {
+		static const __declspec(align(8)) uint64 x7fb = 0x7f7f7f7f7f7f7f7f;
+		static const __declspec(align(8)) uint64 xfeb = 0xfefefefefefefefe;
+
+		__asm {
+			push		ebp
+			push		edi
+			push		esi
+			push		ebx
+
+			mov			esi, [esp+24+16]
+			mov			eax, [esp+4+16]
+			shl			esi, 4
+			mov			ecx, [esp+12+16]
+			mov			edx, [esp+16+16]
+			mov			ebp, [esp+20+16]
+			mov			edi, [esp+8+16]
+			sub			edi, esi
+			sub			ebp, esi
+
+			movq		mm6, x7fb
+			movq		mm7, xfeb
+
+			mov			esi, [esp+28+16]
+yloop:
+			mov			ebx, [esp+24+16]
+mainRowLoop:
+			movq		mm0, [ecx]
+			movq		mm3, [ecx + 8]
+			movq		mm1, mm0
+			movq		mm2, [edx]
+			movq		mm4, mm3
+			movq		mm5, [edx + 8]
+			por			mm1, mm2
+			pxor		mm0, mm2
+			por			mm4, mm5
+			pxor		mm3, mm5
+			psrlq		mm0, 1
+			pand		mm3, mm7
+			pand		mm0, mm6
+			psrlq		mm3, 1
+			psubb		mm1, mm0
+			psubb		mm4, mm3
+			add			ecx, 16
+			movq		[eax], mm1
+			movq		[eax+8], mm4
+			add			edx, 16
+			add			eax, 16
+			dec			ebx
+			jne			mainRowLoop
+
+			add			eax, edi
+			add			ecx, ebp
+			add			edx, ebp
+			dec			esi
+			jne			yloop
+
+			emms
+			pop			ebx
+			pop			esi
+			pop			edi
+			pop			ebp
+			ret	
+		}
+	}
+
+	void __declspec(naked) __cdecl Average_ISSE(void *dst, ptrdiff_t dstPitch, const void *src1, const void *src2, ptrdiff_t srcPitch, uint32 w16, uint32 h) {
+		static const __declspec(align(8)) uint64 x7fb = 0x7f7f7f7f7f7f7f7f;
+		static const __declspec(align(8)) uint64 xfeb = 0xfefefefefefefefe;
+
+		__asm {
+			push		ebp
+			push		edi
+			push		esi
+			push		ebx
+
+			mov			esi, [esp+24+16]
+			mov			eax, [esp+4+16]
+			shl			esi, 4
+			mov			ecx, [esp+12+16]
+			mov			edx, [esp+16+16]
+			mov			ebp, [esp+20+16]
+			mov			edi, [esp+8+16]
+			sub			edi, esi
+			sub			ebp, esi
+
+			movq		mm6, x7fb
+			movq		mm7, xfeb
+
+			mov			esi, [esp+28+16]
+yloop:
+			mov			ebx, [esp+24+16]
+mainRowLoop:
+			movq		mm0, [ecx]
+			movq		mm1, [ecx + 8]
+			movq		mm2, [edx]
+			movq		mm3, [edx + 8]
+			pavgb		mm0, mm2
+			pavgb		mm1, mm3
+			movq		[eax], mm0
+			add			ecx, 16
+			add			edx, 16
+			movq		[eax+8], mm1
+			add			eax, 16
+			dec			ebx
+			jne			mainRowLoop
+
+			add			eax, edi
+			add			ecx, ebp
+			add			edx, ebp
+			dec			esi
+			jne			yloop
+
+			emms
+			pop			ebx
+			pop			esi
+			pop			edi
+			pop			ebp
+			ret	
+		}
+	}
+#endif
+
+#if defined(VD_CPU_X86) || defined(VD_CPU_AMD64)
+	void Average_SSE2(void *dst, ptrdiff_t dstPitch, const void *src1, const void *src2, ptrdiff_t srcPitch, uint32 w16, uint32 h) {
+		do {
+			__m128i *dstv = (__m128i *)dst;
+			__m128i *src1v = (__m128i *)src1;
+			__m128i *src2v = (__m128i *)src2;
+
+			for(uint32 i=0; i<w16; ++i)
+				dstv[i] = _mm_avg_epu8(src1v[i], src2v[i]);
+
+			dst = (char *)dst + dstPitch;
+			src1 = (char *)src1 + srcPitch;
+			src2 = (char *)src2 + srcPitch;
+		} while(--h);
+	}
+#endif
+
+	void InterpPlane_Bob(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, uint32 w, uint32 h, bool interpField2) {
+		void (*blend_func)(void *dst, ptrdiff_t dstPitch, const void *src1, const void *src2, ptrdiff_t srcPitch, uint32 w16, uint32 h);
+#if defined(VD_CPU_X86)
+		if (SSE2_enabled)
+			blend_func = Average_SSE2;
+		else if (ISSE_enabled)
+			blend_func = Average_ISSE;
+		else if (MMX_enabled)
+			blend_func = Average_MMX;
+		else
+			blend_func = Average_scalar;
+#else
+		blend_func = Average_SSE2;
+#endif
+
+		w = (w + 3) >> 2;
+
+		int y0 = interpField2 ? 1 : 2;
+
+		if (!interpField2)
+			memcpy(dst, src, w * 4);
+
+		if (h > y0) {
+			ASSERT(((UINT_PTR)dst & 0xF) == 0);
+			ASSERT((dstpitch & 0xF) == 0);
+			ASSERT(((UINT_PTR)src & 0xF) == 0);
+			ASSERT((srcpitch*(y0 - 1) & 0xF) == 0);
+			blend_func((char *)dst + dstpitch*y0,
+				dstpitch*2,
+				(const char *)src + srcpitch*(y0 - 1),
+				(const char *)src + srcpitch*(y0 + 1),
+				srcpitch*2,
+				(w + 3) >> 2,
+				(h - y0) >> 1);
+		}
+
+		if (interpField2)
+			memcpy((char *)dst + dstpitch*(h - 1), (const char *)src + srcpitch*(h - 1), w*4);
+
+#ifdef _M_IX86
+		if (MMX_enabled)
+			__asm emms
+#endif
+	}
+
+	void BlendPlane(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, uint32 w, uint32 h) {
+		void (*blend_func)(void *, const void *, uint32, ptrdiff_t);
+#if defined(VD_CPU_X86)
+		if (SSE2_enabled)
+			blend_func = asm_blend_row_SSE2;
+		else
+			blend_func = ISSE_enabled ? asm_blend_row_ISSE : MMX_enabled ? asm_blend_row_MMX : asm_blend_row;
+#else
+		blend_func = asm_blend_row_SSE2;
+#endif
+
+		w = (w + 3) >> 2;
+
+		asm_blend_row_clipped(dst, src, w, srcpitch);
+		if (h-=2)
+			do {
+				dst = ((char *)dst + dstpitch);
+
+				blend_func(dst, src, w, srcpitch);
+
+				src = ((char *)src + srcpitch);
+			} while(--h);
+
+		asm_blend_row_clipped((char *)dst + dstpitch, src, w, srcpitch);
+
+#ifdef _M_IX86
+		if (MMX_enabled)
+			__asm emms
+#endif
+	}
+}
+
+void DeinterlaceBlend(BYTE* dst, BYTE* src, DWORD w, DWORD h, DWORD dstpitch, DWORD srcpitch)
+{
+	BlendPlane(dst, dstpitch, src, srcpitch, w, h);
+}
+
+void DeinterlaceBob(BYTE* dst, BYTE* src, DWORD w, DWORD h, DWORD dstpitch, DWORD srcpitch, bool topfield)
+{
+	topfield = !topfield;
+
+	InterpPlane_Bob(dst, dstpitch, src, srcpitch, w, h, topfield);
+}
diff --git a/src/DSUtil/dsutil.vcproj b/src/DSUtil/dsutil.vcproj
index 49162423b..69a625c18 100644
--- a/src/DSUtil/dsutil.vcproj
+++ b/src/DSUtil/dsutil.vcproj
@@ -44,7 +44,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				AdditionalOptions="/MP"
-				AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
+				AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;;..\thirdparty\VirtualDub\h"
 				PreprocessorDefinitions="WIN32;_DEBUG;_LIB;"
 			/>
 			<Tool
@@ -105,7 +105,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				AdditionalOptions="/MP"
-				AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
+				AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;;..\thirdparty\VirtualDub\h"
 				PreprocessorDefinitions="_WIN64;_DEBUG;_LIB;"
 				Detect64BitPortabilityProblems="false"
 				DebugInformationFormat="3"
@@ -167,7 +167,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				AdditionalOptions="/MP"
-				AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
+				AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;;..\thirdparty\VirtualDub\h"
 				PreprocessorDefinitions="WIN32;NDEBUG;_LIB"
 				BufferSecurityCheck="true"
 				EnableEnhancedInstructionSet="1"
@@ -233,7 +233,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				AdditionalOptions="/MP"
-				AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
+				AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;;..\thirdparty\VirtualDub\h"
 				PreprocessorDefinitions="_WIN64;NDEBUG;_LIB"
 				BufferSecurityCheck="true"
 				EnableEnhancedInstructionSet="0"
@@ -278,130 +278,8 @@
 			Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm"
 			>
 			<File
-				RelativePath=".\a_yuv2rgb.asm"
+				RelativePath=".\deinterlace.cpp"
 				>
-				<FileConfiguration
-					Name="Debug Unicode|Win32"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="yasm -X vc -g cv8 -f &quot;$(PlatformName)&quot; -o &quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
-						Outputs="$(OutDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Debug Unicode|x64"
-					ExcludedFromBuild="true"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="ml64 /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
-						Outputs="$(OutDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release Unicode|Win32"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="yasm -X vc -g cv8 -f &quot;$(PlatformName)&quot; -o &quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
-						Outputs="$(OutDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release Unicode|x64"
-					ExcludedFromBuild="true"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="yasm -X vc -g cv8 -f &quot;$(PlatformName)&quot; -o &quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
-						Outputs="$(OutDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-			</File>
-			<File
-				RelativePath=".\a_yuvtable.asm"
-				>
-				<FileConfiguration
-					Name="Debug Unicode|Win32"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="yasm -X vc -g cv8 -f &quot;$(PlatformName)&quot; -o &quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
-						Outputs="$(OutDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Debug Unicode|x64"
-					ExcludedFromBuild="true"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="ml64 /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
-						Outputs="$(OutDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release Unicode|Win32"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="yasm -X vc -g cv8 -f &quot;$(PlatformName)&quot; -o &quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
-						Outputs="$(OutDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release Unicode|x64"
-					ExcludedFromBuild="true"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="ml64 /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
-						Outputs="$(OutDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-			</File>
-			<File
-				RelativePath=".\convert_a.asm"
-				>
-				<FileConfiguration
-					Name="Debug Unicode|Win32"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
-						Outputs="$(OutDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Debug Unicode|x64"
-					ExcludedFromBuild="true"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="ml64 /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
-						Outputs="$(OutDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release Unicode|Win32"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
-						Outputs="$(OutDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release Unicode|x64"
-					ExcludedFromBuild="true"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="ml64 /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
-						Outputs="$(OutDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
 			</File>
 			<File
 				RelativePath=".\DSMPropertyBag.cpp"
diff --git a/src/DSUtil/vd.cpp b/src/DSUtil/vd.cpp
index 063fbfe06..0d7f77aaa 100644
--- a/src/DSUtil/vd.cpp
+++ b/src/DSUtil/vd.cpp
@@ -1,5 +1,6 @@
 //	VirtualDub - Video processing and capture application
-//	Copyright (C) 1998-2001 Avery Lee
+//	Graphics support library
+//	Copyright (C) 1998-2007 Avery Lee
 //
 //	This program is free software; you can redistribute it and/or modify
 //	it under the terms of the GNU General Public License as published by
@@ -16,8 +17,7 @@
 //	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 //
 //  Notes: 
-//  - BitBltFromI420ToRGB is from VirtualDub
-//	- The core assembly function of CCpuID is from DVD2AVI
+//  - VDPixmapBlt is from VirtualDub
 //  - sse2 yv12 to yuy2 conversion by Haali
 //	(- vd.cpp/h should be renamed to something more sensible already :)
 
@@ -27,300 +27,272 @@
 #include "vd_asm.h"
 #include <intrin.h>
 
+#include <vd2/system/cpuaccel.h>
+#include <vd2/system/memory.h>
+
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/Kasumi/pixmapops.h>
+
 #pragma warning(disable : 4799) // no emms... blahblahblah
 
+void VDCPUTest() {
+	SYSTEM_INFO si;
+
+	long lEnableFlags = CPUCheckForExtensions();
+
+	GetSystemInfo(&si);
+
+	if (si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_INTEL)
+		if (si.wProcessorLevel < 4)
+			lEnableFlags &= ~CPUF_SUPPORTS_FPU;		// Not strictly true, but very slow anyway
+
+	// Enable FPU support...
+
+	CPUEnableExtensions(lEnableFlags);
+
+	VDFastMemcpyAutodetect();
+}
+
 CCpuID g_cpuid;
 
 CCpuID::CCpuID()
-{       
-	int CPUInfo[4] = {-1};
-	__cpuid(CPUInfo, 1);
-	int t = CPUInfo[3];
-
-	int mflags = 0;
-	mflags |= ((t&0x00800000)!=0) ? mmx : 0;			// STD MMX
-	mflags |= ((t&0x02000000)!=0) ? ssemmx+ssefpu : 0;	// STD SSE
-	mflags |= ((t&0x04000000)!=0) ? sse2 : 0;			// SSE2
+{
+	VDCPUTest();
 
-	t = CPUInfo[2];
-	mflags |= ((t&0x00000001)!=0) ? sse3 : 0;			// SSE3
+	long lEnableFlags = CPUGetEnabledExtensions();
 
-	// 3dnow
-	__cpuid(CPUInfo, 0x80000001);
-	t = CPUInfo[3];
-	mflags |= ((t&0x80000000)!=0) ? _3dnow : 0;			// 3D NOW
-	mflags |= ((t&0x00400000)!=0) ? ssemmx : 0;			// SSE MMX
+	int flags = 0;
+	flags |= !!(lEnableFlags & CPUF_SUPPORTS_MMX)			? mmx		: 0;			// STD MMX
+	flags |= !!(lEnableFlags & CPUF_SUPPORTS_INTEGER_SSE)	? ssemmx	: 0;			// SSE MMX
+	flags |= !!(lEnableFlags & CPUF_SUPPORTS_SSE)			? ssefpu	: 0;			// STD SSE
+	flags |= !!(lEnableFlags & CPUF_SUPPORTS_SSE2)			? sse2		: 0;			// SSE2
+	flags |= !!(lEnableFlags & CPUF_SUPPORTS_3DNOW)			? _3dnow	: 0;			// 3DNow
 
 	// result
-	m_flags = (flag_t)mflags;
+	m_flags = (flag_t)flags;
 }
 
-void memcpy_accel(void* dst, const void* src, size_t len)
+bool BitBltFromI420ToI420(int w, int h, BYTE* dsty, BYTE* dstu, BYTE* dstv, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
 {
-#ifndef _WIN64
-	if((g_cpuid.m_flags & CCpuID::ssefpu) && len >= 128 
-		&& !((DWORD)src&15) && !((DWORD)dst&15))
-	{
-		__asm
-		{
-			mov     esi, dword ptr [src]
-			mov     edi, dword ptr [dst]
-			mov     ecx, len
-			shr     ecx, 7
-	memcpy_accel_sse_loop:
-			prefetchnta	[esi+16*8]
-			movaps		xmm0, [esi]
-			movaps		xmm1, [esi+16*1]
-			movaps		xmm2, [esi+16*2]
-			movaps		xmm3, [esi+16*3]
-			movaps		xmm4, [esi+16*4]
-			movaps		xmm5, [esi+16*5]
-			movaps		xmm6, [esi+16*6]
-			movaps		xmm7, [esi+16*7]
-			movntps		[edi], xmm0
-			movntps		[edi+16*1], xmm1
-			movntps		[edi+16*2], xmm2
-			movntps		[edi+16*3], xmm3
-			movntps		[edi+16*4], xmm4
-			movntps		[edi+16*5], xmm5
-			movntps		[edi+16*6], xmm6
-			movntps		[edi+16*7], xmm7
-			add			esi, 128
-			add			edi, 128
-			dec			ecx
-			jne			memcpy_accel_sse_loop
-			mov     ecx, len
-			and     ecx, 127
-			cmp     ecx, 0
-			je		memcpy_accel_sse_end
-	memcpy_accel_sse_loop2:
-			mov		dl, byte ptr[esi] 
-			mov		byte ptr[edi], dl
-			inc		esi
-			inc		edi
-			dec		ecx
-			jne		memcpy_accel_sse_loop2
-	memcpy_accel_sse_end:
-			emms
-			sfence
-		}
-	}
-	else if((g_cpuid.m_flags & CCpuID::mmx) && len >= 64
-		&& !((DWORD)src&7) && !((DWORD)dst&7))
-	{
-		__asm 
-		{
-			mov     esi, dword ptr [src]
-			mov     edi, dword ptr [dst]
-			mov     ecx, len
-			shr     ecx, 6
-	memcpy_accel_mmx_loop:
-			movq    mm0, qword ptr [esi]
-			movq    mm1, qword ptr [esi+8*1]
-			movq    mm2, qword ptr [esi+8*2]
-			movq    mm3, qword ptr [esi+8*3]
-			movq    mm4, qword ptr [esi+8*4]
-			movq    mm5, qword ptr [esi+8*5]
-			movq    mm6, qword ptr [esi+8*6]
-			movq    mm7, qword ptr [esi+8*7]
-			movq    qword ptr [edi], mm0
-			movq    qword ptr [edi+8*1], mm1
-			movq    qword ptr [edi+8*2], mm2
-			movq    qword ptr [edi+8*3], mm3
-			movq    qword ptr [edi+8*4], mm4
-			movq    qword ptr [edi+8*5], mm5
-			movq    qword ptr [edi+8*6], mm6
-			movq    qword ptr [edi+8*7], mm7
-			add     esi, 64
-			add     edi, 64
-			loop	memcpy_accel_mmx_loop
-			mov     ecx, len
-			and     ecx, 63
-			cmp     ecx, 0
-			je		memcpy_accel_mmx_end
-	memcpy_accel_mmx_loop2:
-			mov		dl, byte ptr [esi] 
-			mov		byte ptr [edi], dl
-			inc		esi
-			inc		edi
-			dec		ecx
-			jne		memcpy_accel_mmx_loop2
-	memcpy_accel_mmx_end:
-			emms
-		}
-	}
-	else
-#endif
-	{
-		memcpy(dst, src, len);
-	}
+	VDPixmap srcbm = {0};
+
+	srcbm.data		= srcy;
+	srcbm.pitch		= srcpitch;
+	srcbm.w			= w;
+	srcbm.h			= h;
+	srcbm.format	= nsVDPixmap::kPixFormat_YUV420_Planar;
+	srcbm.data2		= srcu;
+	srcbm.pitch2	= srcpitch / 2;
+	srcbm.data3		= srcv;
+	srcbm.pitch3	= srcpitch / 2;
+
+	VDPixmap dstpxm = {0};
+
+	dstpxm.data		= dsty;
+	dstpxm.pitch	= dstpitch;
+	dstpxm.w		= w;
+	dstpxm.h		= h;
+	dstpxm.format	= nsVDPixmap::kPixFormat_YUV420_Planar;
+	dstpxm.data2	= dstu;
+	dstpxm.pitch2	= dstpitch / 2;
+	dstpxm.data3	= dstv;
+	dstpxm.pitch3	= dstpitch / 2;
+
+	return VDPixmapBlt(dstpxm, srcbm);
 }
 
-static void yuvtoyuy2row_c(BYTE* dst, BYTE* srcy, BYTE* srcu, BYTE* srcv, DWORD width)
+bool BitBltFromYUY2ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* src, int srcpitch)
 {
-	WORD* dstw = (WORD*)dst;
-	for(; width > 1; width -= 2)
-	{
-		*dstw++ = (*srcu++<<8)|*srcy++;
-		*dstw++ = (*srcv++<<8)|*srcy++;
-	}
+	VDPixmap srcbm = {0};
+
+	srcbm.data		= src;
+	srcbm.pitch		= srcpitch;
+	srcbm.w			= w;
+	srcbm.h			= h;
+	srcbm.format	= nsVDPixmap::kPixFormat_YUV422_YUYV;
+
+	VDPixmap dstpxm = {
+		dst,
+		NULL,
+		w,
+		h,
+		dstpitch
+	};
+
+	dstpxm.format = nsVDPixmap::kPixFormat_YUV422_YUYV;
+
+	return VDPixmapBlt(dstpxm, srcbm);
 }
 
-static void yuvtoyuy2row_avg_c(BYTE* dst, BYTE* srcy, BYTE* srcu, BYTE* srcv, DWORD width, DWORD pitchuv)
+bool BitBltFromI420ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
 {
-	WORD* dstw = (WORD*)dst;
-	for(; width > 1; width -= 2, srcu++, srcv++)
-	{
-		*dstw++ = (((srcu[0]+srcu[pitchuv])>>1)<<8)|*srcy++;
-		*dstw++ = (((srcv[0]+srcv[pitchuv])>>1)<<8)|*srcy++;
-	}
+	VDPixmap srcbm = {0};
+
+	srcbm.data		= srcy;
+	srcbm.pitch		= srcpitch;
+	srcbm.w			= w;
+	srcbm.h			= h;
+	srcbm.format	= nsVDPixmap::kPixFormat_YUV420_Planar;
+	srcbm.data2		= srcu;
+	srcbm.pitch2	= srcpitch/2;
+	srcbm.data3		= srcv;
+	srcbm.pitch3	= srcpitch/2;
+
+	VDPixmap dstpxm = {
+		(char *)dst + dstpitch * (h - 1),
+		NULL,
+		w,
+		h,
+		-dstpitch
+	};
+
+	switch(dbpp) {
+	case 16:	dstpxm.format = nsVDPixmap::kPixFormat_XRGB1555; break;
+	case 24:	dstpxm.format = nsVDPixmap::kPixFormat_RGB888; break;
+	case 32:	dstpxm.format = nsVDPixmap::kPixFormat_XRGB8888; break;
+	default:
+		VDASSERT(false);
+	}
+
+	// TODO: check correct conversion work (555->565) when dpp == 16
+
+	return VDPixmapBlt(dstpxm, srcbm);
 }
 
-static void asm_blend_row_clipped_c(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
+bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
 {
-	BYTE* src2 = src + srcpitch;
-	do 
+	if(srcpitch == 0) srcpitch = w;
+
+#ifndef _WIN64
+	if((g_cpuid.m_flags & CCpuID::sse2)
+		&& !((DWORD_PTR)srcy&15) && !((DWORD_PTR)srcu&15) && !((DWORD_PTR)srcv&15) && !(srcpitch&31) 
+		&& !((DWORD_PTR)dst&15) && !(dstpitch&15))
 	{
-		*dst++ = (*src++ + *src2++ + 1) >> 1;
-	} while(w--);
+		if(w<=0 || h<=0 || (w&1) || (h&1))
+			return(false);
+
+		yv12_yuy2_sse2(srcy, srcu, srcv, srcpitch/2, w/2, h, dst, dstpitch);
+		return(true);
+	}
+#endif
+
+	VDPixmap srcbm = {0};
+
+	srcbm.data		= srcy;
+	srcbm.pitch		= srcpitch;
+	srcbm.w			= w;
+	srcbm.h			= h;
+	srcbm.format	= nsVDPixmap::kPixFormat_YUV420_Planar;
+	srcbm.data2		= srcu;
+	srcbm.pitch2	= srcpitch/2;
+	srcbm.data3		= srcv;
+	srcbm.pitch3	= srcpitch/2;
+
+	VDPixmap dstpxm = {
+		dst,
+		NULL,
+		w,
+		h,
+		dstpitch
+	};
+
+	dstpxm.format = nsVDPixmap::kPixFormat_YUV422_YUYV;
+
+	return VDPixmapBlt(dstpxm, srcbm);
 }
 
-static void asm_blend_row_c(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
+bool BitBltFromRGBToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch, int sbpp)
 {
-	BYTE* src2 = src + srcpitch;
-	BYTE* src3 = src2 + srcpitch;
-	do
-	{
-		*dst++ = (*src++ + (*src2++ << 1) + *src3++ + 2) >> 2;
-	} while(w--);
+	VDPixmap srcbm = {
+		(char *)src + srcpitch * (h - 1),
+		NULL,
+		w,
+		h,
+		-srcpitch
+	};
+
+	switch(dbpp) {
+	case 8:		srcbm.format = nsVDPixmap::kPixFormat_Pal8; break;
+	case 16:	srcbm.format = nsVDPixmap::kPixFormat_XRGB1555; break;
+	case 24:	srcbm.format = nsVDPixmap::kPixFormat_RGB888; break;
+	case 32:	srcbm.format = nsVDPixmap::kPixFormat_XRGB8888; break;
+	default:
+		VDASSERT(false);
+	}
+
+	VDPixmap dstpxm = {
+		(char *)dst + dstpitch * (h - 1),
+		NULL,
+		w,
+		h,
+		-dstpitch
+	};
+
+	switch(dbpp) {
+	case 8:		dstpxm.format = nsVDPixmap::kPixFormat_Pal8; break;
+	case 16:	dstpxm.format = nsVDPixmap::kPixFormat_XRGB1555; break;
+	case 24:	dstpxm.format = nsVDPixmap::kPixFormat_RGB888; break;
+	case 32:	dstpxm.format = nsVDPixmap::kPixFormat_XRGB8888; break;
+	default:
+		VDASSERT(false);
+	}
+
+	return VDPixmapBlt(dstpxm, srcbm);
 }
 
-bool BitBltFromI420ToI420(int w, int h, BYTE* dsty, BYTE* dstu, BYTE* dstv, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
+bool BitBltFromYUY2ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch)
 {
-	if((w&1))
-		return(false);
-
-	if(w > 0 && w == srcpitch && w == dstpitch)
-	{
-		memcpy_accel(dsty, srcy, h*srcpitch);
-		memcpy_accel(dstu, srcu, h/2*srcpitch/2);
-		memcpy_accel(dstv, srcv, h/2*srcpitch/2);
-	}
-	else
-	{
-		int pitch = min(abs(srcpitch), abs(dstpitch));
-
-		for(ptrdiff_t y = 0; y < h; y++, srcy += srcpitch, dsty += dstpitch)
-			memcpy_accel(dsty, srcy, pitch);
+	if(srcpitch == 0) srcpitch = w;
 
-		srcpitch >>= 1;
-		dstpitch >>= 1;
+	VDPixmap srcbm = {0};
 
-		pitch = min(abs(srcpitch), abs(dstpitch));
+	srcbm.data		= src;
+	srcbm.pitch		= srcpitch;
+	srcbm.w			= w;
+	srcbm.h			= h;
+	srcbm.format	= nsVDPixmap::kPixFormat_YUV422_YUYV;
 
-		for(ptrdiff_t y = 0; y < h; y+=2, srcu += srcpitch, dstu += dstpitch)
-			memcpy_accel(dstu, srcu, pitch);
+	VDPixmap dstpxm = {
+		(char *)dst + dstpitch * (h - 1),
+		NULL,
+		w,
+		h,
+		-dstpitch
+	};
 
-		for(ptrdiff_t y = 0; y < h; y+=2, srcv += srcpitch, dstv += dstpitch)
-			memcpy_accel(dstv, srcv, pitch);
+	switch(dbpp) {
+	case 16:	dstpxm.format = nsVDPixmap::kPixFormat_XRGB1555; break;
+	case 24:	dstpxm.format = nsVDPixmap::kPixFormat_RGB888; break;
+	case 32:	dstpxm.format = nsVDPixmap::kPixFormat_XRGB8888; break;
+	default:
+		VDASSERT(false);
 	}
 
-	return(true);
+	return VDPixmapBlt(dstpxm, srcbm);
 }
 
-bool BitBltFromYUY2ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* src, int srcpitch)
+static void yuvtoyuy2row_c(BYTE* dst, BYTE* srcy, BYTE* srcu, BYTE* srcv, DWORD width)
 {
-	if(w > 0 && w == srcpitch && w == dstpitch)
-	{
-		memcpy_accel(dst, src, h*srcpitch);
-	}
-	else
+	WORD* dstw = (WORD*)dst;
+	for(; width > 1; width -= 2)
 	{
-		int pitch = min(abs(srcpitch), abs(dstpitch));
-
-		for(ptrdiff_t y = 0; y < h; y++, src += srcpitch, dst += dstpitch)
-			memcpy_accel(dst, src, pitch);
+		*dstw++ = (*srcu++<<8)|*srcy++;
+		*dstw++ = (*srcv++<<8)|*srcy++;
 	}
-
-	return(true);
 }
 
-#ifndef _WIN64
-extern "C" void asm_YUVtoRGB32_row(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB24_row(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB16_row(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB32_row_MMX(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB24_row_MMX(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB16_row_MMX(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB32_row_ISSE(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB24_row_ISSE(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB16_row_ISSE(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-#endif
-
-bool BitBltFromI420ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
+static void yuvtoyuy2row_avg_c(BYTE* dst, BYTE* srcy, BYTE* srcu, BYTE* srcv, DWORD width, DWORD pitchuv)
 {
-	if(w<=0 || h<=0 || (w&1) || (h&1))
-		return(false);
-
-#ifndef _WIN64
-	void (*asm_YUVtoRGB_row)(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width) = NULL;;
-
-	if((g_cpuid.m_flags & CCpuID::ssefpu) && !(w&7))
-	{
-		switch(dbpp)
-		{
-		case 16: asm_YUVtoRGB_row = asm_YUVtoRGB16_row/*_ISSE*/; break; // TODO: fix _ISSE (555->565)
-		case 24: asm_YUVtoRGB_row = asm_YUVtoRGB24_row_ISSE; break;
-		case 32: asm_YUVtoRGB_row = asm_YUVtoRGB32_row_ISSE; break;
-		}
-	}
-	else if((g_cpuid.m_flags & CCpuID::mmx) && !(w&7))
-	{
-		switch(dbpp)
-		{
-		case 16: asm_YUVtoRGB_row = asm_YUVtoRGB16_row/*_MMX*/; break; // TODO: fix _MMX (555->565)
-		case 24: asm_YUVtoRGB_row = asm_YUVtoRGB24_row_MMX; break;
-		case 32: asm_YUVtoRGB_row = asm_YUVtoRGB32_row_MMX; break;
-		}
-	}
-	else
-	{
-		switch(dbpp)
-		{
-		case 16: asm_YUVtoRGB_row = asm_YUVtoRGB16_row; break;
-		case 24: asm_YUVtoRGB_row = asm_YUVtoRGB24_row; break;
-		case 32: asm_YUVtoRGB_row = asm_YUVtoRGB32_row; break;
-		}
-	}
-
-	if(!asm_YUVtoRGB_row) 
-		return(false);
-
-	do
+	WORD* dstw = (WORD*)dst;
+	for(; width > 1; width -= 2, srcu++, srcv++)
 	{
-		asm_YUVtoRGB_row(dst + dstpitch, dst, srcy + srcpitch, srcy, srcu, srcv, w/2);
-
-		dst += 2*dstpitch;
-		srcy += srcpitch*2;
-		srcu += srcpitch/2;
-		srcv += srcpitch/2;
+		*dstw++ = (((srcu[0]+srcu[pitchuv])>>1)<<8)|*srcy++;
+		*dstw++ = (((srcv[0]+srcv[pitchuv])>>1)<<8)|*srcy++;
 	}
-	while(h -= 2);
-
-	if(g_cpuid.m_flags & CCpuID::mmx)
-		__asm emms
-
-	if(g_cpuid.m_flags & CCpuID::ssefpu)
-		__asm sfence
-
-	return(true);
-#else
-	ASSERT(FALSE);
-	return(false);
-#endif
 }
 
-bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch, bool fInterlaced)
+bool BitBltFromI420ToYUY2Interlaced(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
 {
 	if(w<=0 || h<=0 || (w&1) || (h&1))
 		return(false);
@@ -332,16 +304,15 @@ bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYT
 
 #ifndef _WIN64
 	if((g_cpuid.m_flags & CCpuID::sse2) 
-        && !((DWORD_PTR)srcy&15) && !((DWORD_PTR)srcu&15) && !((DWORD_PTR)srcv&15) && !(srcpitch&31) 
-        && !((DWORD_PTR)dst&15) && !(dstpitch&15))
+		&& !((DWORD_PTR)srcy&15) && !((DWORD_PTR)srcu&15) && !((DWORD_PTR)srcv&15) && !(srcpitch&31) 
+		&& !((DWORD_PTR)dst&15) && !(dstpitch&15))
 	{
-		if(!fInterlaced) yv12_yuy2_sse2(srcy, srcu, srcv, srcpitch/2, w/2, h, dst, dstpitch);
-		else yv12_yuy2_sse2_interlaced(srcy, srcu, srcv, srcpitch/2, w/2, h, dst, dstpitch);
+		yv12_yuy2_sse2_interlaced(srcy, srcu, srcv, srcpitch/2, w/2, h, dst, dstpitch);
 		return(true);
 	}
 	else
 	{
-		ASSERT(!fInterlaced);
+		ASSERT(FALSE);
 	}
 
 	if((g_cpuid.m_flags & CCpuID::mmx) && !(w&7))
@@ -359,15 +330,16 @@ bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYT
 	if(!yuvtoyuy2row) 
 		return(false);
 
+	int halfsrcpitch = srcpitch/2;
 	do
 	{
 		yuvtoyuy2row(dst, srcy, srcu, srcv, w);
-		yuvtoyuy2row_avg(dst + dstpitch, srcy + srcpitch, srcu, srcv, w, srcpitch/2);
+		yuvtoyuy2row_avg(dst + dstpitch, srcy + srcpitch, srcu, srcv, w, halfsrcpitch);
 
 		dst += 2*dstpitch;
-		srcy += srcpitch*2;
-		srcu += srcpitch/2;
-		srcv += srcpitch/2;
+		srcy += halfsrcpitch;
+		srcu += halfsrcpitch;
+		srcv += halfsrcpitch;
 	}
 	while((h -= 2) > 2);
 
@@ -381,481 +353,3 @@ bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYT
 
 	return(true);
 }
-
-bool BitBltFromRGBToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch, int sbpp)
-{
-	if(dbpp == sbpp)
-	{
-		int rowbytes = w*dbpp>>3;
-
-		if(rowbytes > 0 && rowbytes == srcpitch && rowbytes == dstpitch)
-		{
-			memcpy_accel(dst, src, h*rowbytes);
-		}
-		else
-		{
-			for(ptrdiff_t y = 0; y < h; y++, src += srcpitch, dst += dstpitch)
-				memcpy_accel(dst, src, rowbytes);
-		}
-
-		return(true);
-	}
-	
-	if(sbpp != 16 && sbpp != 24 && sbpp != 32
-	|| dbpp != 16 && dbpp != 24 && dbpp != 32)
-		return(false);
-
-	if(dbpp == 16)
-	{
-		for(ptrdiff_t y = 0; y < h; y++, src += srcpitch, dst += dstpitch)
-		{
-			if(sbpp == 24)
-			{
-				BYTE* s = (BYTE*)src;
-				WORD* d = (WORD*)dst;
-				for(ptrdiff_t x = 0; x < w; x++, s+=3, d++)
-					*d = (WORD)(((*((DWORD*)s)>>8)&0xf800)|((*((DWORD*)s)>>5)&0x07e0)|((*((DWORD*)s)>>3)&0x1f));
-			}
-			else if(sbpp == 32)
-			{
-				DWORD* s = (DWORD*)src;
-				WORD* d = (WORD*)dst;
-				for(ptrdiff_t x = 0; x < w; x++, s++, d++)
-					*d = (WORD)(((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x1f));
-			}
-		}
-	}
-	else if(dbpp == 24)
-	{
-		for(ptrdiff_t y = 0; y < h; y++, src += srcpitch, dst += dstpitch)
-		{
-			if(sbpp == 16)
-			{
-				WORD* s = (WORD*)src;
-				BYTE* d = (BYTE*)dst;
-				for(ptrdiff_t x = 0; x < w; x++, s++, d+=3)
-				{	// not tested, r-g-b might be in reverse
-					d[0] = (*s&0x001f)<<3;
-					d[1] = (*s&0x07e0)<<5;
-					d[2] = (*s&0xf800)<<8;
-				}
-			}
-			else if(sbpp == 32)
-			{
-				BYTE* s = (BYTE*)src;
-				BYTE* d = (BYTE*)dst;
-				for(ptrdiff_t x = 0; x < w; x++, s+=4, d+=3)
-					{d[0] = s[0]; d[1] = s[1]; d[2] = s[2];}
-			}
-		}
-	}
-	else if(dbpp == 32)
-	{
-		for(ptrdiff_t y = 0; y < h; y++, src += srcpitch, dst += dstpitch)
-		{
-			if(sbpp == 16)
-			{
-				WORD* s = (WORD*)src;
-				DWORD* d = (DWORD*)dst;
-				for(ptrdiff_t x = 0; x < w; x++, s++, d++)
-					*d = ((*s&0xf800)<<8)|((*s&0x07e0)<<5)|((*s&0x001f)<<3);
-			}
-			else if(sbpp == 24)
-			{	
-				BYTE* s = (BYTE*)src;
-				DWORD* d = (DWORD*)dst;
-				for(ptrdiff_t x = 0; x < w; x++, s+=3, d++)
-					*d = *((DWORD*)s)&0xffffff;
-			}
-		}
-	}
-
-	return(true);
-}
-
-void DeinterlaceBlend(BYTE* dst, BYTE* src, DWORD rowbytes, DWORD h, DWORD dstpitch, DWORD srcpitch)
-{
-	void (*blend_row_clipped)(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch) = NULL;
-	void (*blend_row)(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch) = NULL;
-
-#ifndef _WIN64
-	if((g_cpuid.m_flags & CCpuID::sse2) && !((DWORD)src&0xf) && !((DWORD)dst&0xf) && !(srcpitch&0xf))
-	{
-		blend_row_clipped = asm_blend_row_clipped_SSE2;
-		blend_row = asm_blend_row_SSE2;
-	}
-	else if(g_cpuid.m_flags & CCpuID::mmx)
-	{
-		blend_row_clipped = asm_blend_row_clipped_MMX;
-		blend_row = asm_blend_row_MMX;
-	}
-	else
-#endif
-	{
-		blend_row_clipped = asm_blend_row_clipped_c;
-		blend_row = asm_blend_row_c;
-	}
-
-	if(!blend_row_clipped)
-		return;
-
-	blend_row_clipped(dst, src, rowbytes, srcpitch);
-
-	if((h -= 2) > 0) do
-	{
-		dst += dstpitch;
-		blend_row(dst, src, rowbytes, srcpitch);
-        src += srcpitch;
-	}
-	while(--h);
-
-	blend_row_clipped(dst + dstpitch, src, rowbytes, srcpitch);
-
-#ifndef _WIN64
-	if(g_cpuid.m_flags & CCpuID::mmx)
-		__asm emms
-#endif
-}
-
-void DeinterlaceBob(BYTE* dst, BYTE* src, DWORD rowbytes, DWORD h, DWORD dstpitch, DWORD srcpitch, bool topfield)
-{
-	if(topfield)
-	{
-		BitBltFromRGBToRGB(rowbytes, h/2, dst, dstpitch*2, 8, src, srcpitch*2, 8);
-		AvgLines8(dst, h, dstpitch);
-	}
-	else
-	{
-		BitBltFromRGBToRGB(rowbytes, h/2, dst + dstpitch, dstpitch*2, 8, src + srcpitch, srcpitch*2, 8);
-		AvgLines8(dst + dstpitch, h-1, dstpitch);
-	}
-}
-
-void AvgLines8(BYTE* dst, DWORD h, DWORD pitch)
-{
-	if(h <= 1)
-		return;
-
-	BYTE* s = dst;
-	BYTE* d = dst + (h-2)*pitch;
-
-	for(; s < d; s += pitch*2)
-	{
-		BYTE* tmp = s;
-
-#ifndef _WIN64
-		if((g_cpuid.m_flags & CCpuID::sse2) && !((DWORD)tmp&0xf) && !((DWORD)pitch&0xf))
-		{
-			__asm
-			{
-				mov		esi, tmp
-				mov		ebx, pitch
-
-				mov		ecx, ebx
-				shr		ecx, 4
-
-AvgLines8_sse2_loop:
-				movdqa	xmm0, [esi]
-				pavgb	xmm0, [esi+ebx*2]
-				movdqa	[esi+ebx], xmm0
-				add		esi, 16
-
-				dec		ecx
-				jnz		AvgLines8_sse2_loop
-
-				mov		tmp, esi
-			}
-
-			for(ptrdiff_t i = pitch&7; i--; tmp++)
-			{
-				tmp[pitch] = (tmp[0] + tmp[pitch<<1] + 1) >> 1;
-			}
-		}
-		else if(g_cpuid.m_flags & CCpuID::mmx)
-		{
-			__asm
-			{
-				mov		esi, tmp
-				mov		ebx, pitch
-
-				mov		ecx, ebx
-				shr		ecx, 3
-
-				pxor	mm7, mm7
-AvgLines8_mmx_loop:
-				movq	mm0, [esi]
-				movq	mm1, mm0
-
-				punpcklbw	mm0, mm7
-				punpckhbw	mm1, mm7
-
-				movq	mm2, [esi+ebx*2]
-				movq	mm3, mm2
-
-				punpcklbw	mm2, mm7
-				punpckhbw	mm3, mm7
-
-				paddw	mm0, mm2
-				psrlw	mm0, 1
-
-				paddw	mm1, mm3
-				psrlw	mm1, 1
-
-				packuswb	mm0, mm1
-
-				movq	[esi+ebx], mm0
-
-				lea		esi, [esi+8]
-
-				dec		ecx
-				jnz		AvgLines8_mmx_loop
-
-				mov		tmp, esi
-			}
-
-			for(ptrdiff_t i = pitch&7; i--; tmp++)
-			{
-				tmp[pitch] = (tmp[0] + tmp[pitch<<1] + 1) >> 1;
-			}
-		}
-		else
-#endif
-		{
-			for(ptrdiff_t i = pitch; i--; tmp++)
-			{
-				tmp[pitch] = (tmp[0] + tmp[pitch<<1] + 1) >> 1;
-			}
-		}
-	}
-
-	if(!(h&1) && h >= 2)
-	{
-		dst += (h-2)*pitch;
-		memcpy_accel(dst + pitch, dst, pitch);
-	}
-
-#ifndef _WIN64
-	__asm emms;
-#endif
-}
-
-void AvgLines555(BYTE* dst, DWORD h, DWORD pitch)
-{
-	if(h <= 1)
-		return;
-
-	unsigned __int64 __0x03e003e003e003e0 = 0x03e003e003e003e0;
-	unsigned __int64 __0x001f001f001f001f = 0x001f001f001f001f;
-
-	BYTE* s = dst;
-	BYTE* d = dst + (h-2)*pitch;
-
-	for(; s < d; s += pitch*2)
-	{
-		BYTE* tmp = s;
-
-#ifndef _WIN64
-		__asm
-		{
-			mov		esi, tmp
-			mov		ebx, pitch
-
-			mov		ecx, ebx
-			shr		ecx, 3
-
-			movq	mm6, __0x03e003e003e003e0
-			movq	mm7, __0x001f001f001f001f
-
-AvgLines555_loop:
-			movq	mm0, [esi]
-			movq	mm1, mm0
-			movq	mm2, mm0
-
-			psrlw	mm0, 10				// red1 bits: mm0 = 001f001f001f001f
-			pand	mm1, mm6			// green1 bits: mm1 = 03e003e003e003e0
-			pand	mm2, mm7			// blue1 bits: mm2 = 001f001f001f001f
-
-			movq	mm3, [esi+ebx*2]
-			movq	mm4, mm3
-			movq	mm5, mm3
-
-			psrlw	mm3, 10				// red2 bits: mm3 = 001f001f001f001f
-			pand	mm4, mm6			// green2 bits: mm4 = 03e003e003e003e0
-			pand	mm5, mm7			// blue2 bits: mm5 = 001f001f001f001f
-
-			paddw	mm0, mm3
-			psrlw	mm0, 1				// (red1+red2)/2
-			psllw	mm0, 10				// red bits at 7c007c007c007c00
-
-			paddw	mm1, mm4
-			psrlw	mm1, 1				// (green1+green2)/2
-			pand	mm1, mm6			// green bits at 03e003e003e003e0
-
-			paddw	mm2, mm5
-			psrlw	mm2, 1				// (blue1+blue2)/2
-										// blue bits at 001f001f001f001f (no need to pand, lower bits were discareded)
-
-			por		mm0, mm1
-			por		mm0, mm2
-
-			movq	[esi+ebx], mm0
-
-			lea		esi, [esi+8]
-
-			dec		ecx
-			jnz		AvgLines555_loop
-
-			mov		tmp, esi
-		}
-#endif
-
-		for(ptrdiff_t i = (pitch&7)>>1; i--; tmp++)
-		{
-			tmp[pitch] = 
-				((((*tmp&0x7c00) + (tmp[pitch<<1]&0x7c00)) >> 1)&0x7c00)|
-				((((*tmp&0x03e0) + (tmp[pitch<<1]&0x03e0)) >> 1)&0x03e0)|
-				((((*tmp&0x001f) + (tmp[pitch<<1]&0x001f)) >> 1)&0x001f);
-		}
-	}
-
-	if(!(h&1) && h >= 2)
-	{
-		dst += (h-2)*pitch;
-		memcpy_accel(dst + pitch, dst, pitch);
-	}
-
-#ifndef _WIN64
-	__asm emms;
-#endif
-}
-
-void AvgLines565(BYTE* dst, DWORD h, DWORD pitch)
-{
-	if(h <= 1)
-		return;
-
-	unsigned __int64 __0x07e007e007e007e0 = 0x07e007e007e007e0;
-	unsigned __int64 __0x001f001f001f001f = 0x001f001f001f001f;
-
-	BYTE* s = dst;
-	BYTE* d = dst + (h-2)*pitch;
-
-	for(; s < d; s += pitch*2)
-	{
-		WORD* tmp = (WORD*)s;
-
-#ifndef _WIN64
-		__asm
-		{
-			mov		esi, tmp
-			mov		ebx, pitch
-
-			mov		ecx, ebx
-			shr		ecx, 3
-
-			movq	mm6, __0x07e007e007e007e0
-			movq	mm7, __0x001f001f001f001f
-
-AvgLines565_loop:
-			movq	mm0, [esi]
-			movq	mm1, mm0
-			movq	mm2, mm0
-
-			psrlw	mm0, 11				// red1 bits: mm0 = 001f001f001f001f
-			pand	mm1, mm6			// green1 bits: mm1 = 07e007e007e007e0
-			pand	mm2, mm7			// blue1 bits: mm2 = 001f001f001f001f
-
-			movq	mm3, [esi+ebx*2]
-			movq	mm4, mm3
-			movq	mm5, mm3
-
-			psrlw	mm3, 11				// red2 bits: mm3 = 001f001f001f001f
-			pand	mm4, mm6			// green2 bits: mm4 = 07e007e007e007e0
-			pand	mm5, mm7			// blue2 bits: mm5 = 001f001f001f001f
-
-			paddw	mm0, mm3
-			psrlw	mm0, 1				// (red1+red2)/2
-			psllw	mm0, 11				// red bits at f800f800f800f800
-
-			paddw	mm1, mm4
-			psrlw	mm1, 1				// (green1+green2)/2
-			pand	mm1, mm6			// green bits at 03e003e003e003e0
-
-			paddw	mm2, mm5
-			psrlw	mm2, 1				// (blue1+blue2)/2
-										// blue bits at 001f001f001f001f (no need to pand, lower bits were discareded)
-
-			por		mm0, mm1
-			por		mm0, mm2
-
-			movq	[esi+ebx], mm0
-
-			lea		esi, [esi+8]
-
-			dec		ecx
-			jnz		AvgLines565_loop
-
-			mov		tmp, esi
-		}
-#else
-		for(ptrdiff_t wd=(pitch>>3);wd--;tmp++)
-		{
-			tmp[0] = 
-				((((*tmp&0xf800) + (tmp[pitch<<1]&0xf800)) >> 1)&0xf800)|
-				((((*tmp&0x07e0) + (tmp[pitch<<1]&0x07e0)) >> 1)&0x07e0)|
-				((((*tmp&0x001f) + (tmp[pitch<<1]&0x001f)) >> 1)&0x001f);
-		}
-#endif
-
-		for(ptrdiff_t i = (pitch&7)>>1; i--; tmp++)
-		{
-			tmp[pitch] = 
-				((((*tmp&0xf800) + (tmp[pitch<<1]&0xf800)) >> 1)&0xf800)|
-				((((*tmp&0x07e0) + (tmp[pitch<<1]&0x07e0)) >> 1)&0x07e0)|
-				((((*tmp&0x001f) + (tmp[pitch<<1]&0x001f)) >> 1)&0x001f);
-		}
-	}
-
-	if(!(h&1) && h >= 2)
-	{
-		dst += (h-2)*pitch;
-		memcpy_accel(dst + pitch, dst, pitch);
-	}
-
-#ifndef _WIN64
-	__asm emms;
-#endif
-}
-
-#ifndef _WIN64
-extern "C" void mmx_YUY2toRGB24(const BYTE* src, BYTE* dst, const BYTE* src_end, int src_pitch, int row_size, bool rec709);
-extern "C" void mmx_YUY2toRGB32(const BYTE* src, BYTE* dst, const BYTE* src_end, int src_pitch, int row_size, bool rec709);
-#endif
-
-bool BitBltFromYUY2ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch)
-{
-	void (* YUY2toRGB)(const BYTE* src, BYTE* dst, const BYTE* src_end, int src_pitch, int row_size, bool rec709) = NULL;
-
-#ifndef _WIN64
-	if(g_cpuid.m_flags & CCpuID::mmx)
-	{
-		YUY2toRGB = 
-			dbpp == 32 ? mmx_YUY2toRGB32 :
-			dbpp == 24 ? mmx_YUY2toRGB24 :
-			// dbpp == 16 ? mmx_YUY2toRGB16 : // TODO
-			NULL;
-	}
-	else
-#endif
-	{
-		ASSERT(FALSE);
-		// TODO
-	}
-
-	if(!YUY2toRGB)
-		return(false);
-
-	YUY2toRGB(src, dst, src + h*srcpitch, srcpitch, w, false);
-
-	return(true);
-}
diff --git a/src/DSUtil/vd.h b/src/DSUtil/vd.h
index a69e406c0..0db586cec 100644
--- a/src/DSUtil/vd.h
+++ b/src/DSUtil/vd.h
@@ -1,5 +1,6 @@
 //	VirtualDub - Video processing and capture application
-//	Copyright (C) 1998-2001 Avery Lee
+//	Graphics support library
+//	Copyright (C) 1998-2007 Avery Lee
 //
 //	This program is free software; you can redistribute it and/or modify
 //	it under the terms of the GNU General Public License as published by
@@ -16,25 +17,22 @@
 //	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 //
 //  Notes: 
-//  - BitBltFromI420ToRGB is from VirtualDub
-//  - BitBltFromYUY2ToRGB is from AviSynth 2.52
+//  - VDPixmapBlt is from VirtualDub
+//  - sse2 yv12 to yuy2 conversion by Haali
 //	(- vd.cpp/h should be renamed to something more sensible already :)
 
 #pragma once
 
-class CCpuID {public: CCpuID(); enum flag_t {mmx=1, ssemmx=2, ssefpu=4, sse2=8, _3dnow=16, sse3=32} m_flags;};
+class CCpuID {public: CCpuID(); enum flag_t {mmx=1, ssemmx=2, ssefpu=4, sse2=8, _3dnow=16} m_flags;};
 extern CCpuID g_cpuid;
 
 extern bool BitBltFromI420ToI420(int w, int h, BYTE* dsty, BYTE* dstu, BYTE* dstv, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch);
-extern bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch, bool fInterlaced = false);
+extern bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch);
+extern bool BitBltFromI420ToYUY2Interlaced(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch);
 extern bool BitBltFromI420ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch /* TODO: , bool fInterlaced = false */);
 extern bool BitBltFromYUY2ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* src, int srcpitch);
 extern bool BitBltFromYUY2ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch);
 extern bool BitBltFromRGBToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch, int sbpp);
 
 extern void DeinterlaceBlend(BYTE* dst, BYTE* src, DWORD rowbytes, DWORD h, DWORD dstpitch, DWORD srcpitch);
-extern void DeinterlaceBob(BYTE* dst, BYTE* src, DWORD rowbytes, DWORD h, DWORD dstpitch, DWORD srcpitch, bool topfield);
-
-extern void AvgLines8(BYTE* dst, DWORD h, DWORD pitch);
-extern void AvgLines555(BYTE* dst, DWORD h, DWORD pitch);
-extern void AvgLines565(BYTE* dst, DWORD h, DWORD pitch);
-\ No newline at end of file
+extern void DeinterlaceBob(BYTE* dst, BYTE* src, DWORD rowbytes, DWORD h, DWORD dstpitch, DWORD srcpitch, bool topfield);
+\ No newline at end of file
diff --git a/src/DSUtil/vd_asm.cpp b/src/DSUtil/vd_asm.cpp
index 851449089..3fc521844 100644
--- a/src/DSUtil/vd_asm.cpp
+++ b/src/DSUtil/vd_asm.cpp
@@ -1,5 +1,6 @@
 //	VirtualDub - Video processing and capture application
-//	Copyright (C) 1998-2001 Avery Lee
+//	Graphics support library
+//	Copyright (C) 1998-2007 Avery Lee
 //
 //	This program is free software; you can redistribute it and/or modify
 //	it under the terms of the GNU General Public License as published by
@@ -16,7 +17,7 @@
 //	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 //
 //  Notes: 
-//  - BitBltFromI420ToRGB is from VirtualDub
+//  - VDPixmapBlt is from VirtualDub
 //  - sse2 yv12 to yuy2 conversion by Haali
 //	(- vd.cpp/h should be renamed to something more sensible already :)
 
@@ -428,289 +429,4 @@ last4:
     ret
   };
 }
-
-void __declspec(naked) asm_blend_row_clipped_MMX(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
-{
-	static const __int64 _x0001000100010001 = 0x0001000100010001;
-
-	__asm {
-		push	ebp
-		push	edi
-		push	esi
-		push	ebx
-
-		mov		edi,[esp+20]
-		mov		esi,[esp+24]
-		sub		edi,esi
-		mov		ebp,[esp+28]
-		mov		edx,[esp+32]
-
-		shr		ebp, 3
-
-		movq	mm6, _x0001000100010001
-		pxor	mm7, mm7
-
-xloop:
-		movq		mm0, [esi]
-		movq		mm3, mm0
-		punpcklbw	mm0, mm7
-		punpckhbw	mm3, mm7
-
-		movq		mm1, [esi+edx]
-		movq		mm4, mm1
-		punpcklbw	mm1, mm7
-		punpckhbw	mm4, mm7
-
-		paddw		mm1, mm0
-		paddw		mm1, mm6
-		psrlw		mm1, 1
-
-		paddw		mm4, mm3
-		paddw		mm4, mm6
-		psrlw		mm4, 1
-
-		add			esi, 8
-		packuswb	mm1, mm4
-		movq		[edi+esi-8], mm1
-
-		dec		ebp
-		jne		xloop
-
-		pop		ebx
-		pop		esi
-		pop		edi
-		pop		ebp
-		ret
-	};
-}
-
-void __declspec(naked) asm_blend_row_MMX(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
-{
-	static const __int64 mask0 = 0xfcfcfcfcfcfcfcfci64;
-	static const __int64 mask1 = 0x7f7f7f7f7f7f7f7fi64;
-	static const __int64 mask2 = 0x3f3f3f3f3f3f3f3fi64;
-	static const __int64 _x0002000200020002 = 0x0002000200020002;
-
-	__asm {
-		push	ebp
-		push	edi
-		push	esi
-		push	ebx
-
-		mov		edi, [esp+20]
-		mov		esi, [esp+24]
-		sub		edi, esi
-		mov		ebp, [esp+28]
-		mov		edx, [esp+32]
-
-		shr		ebp, 3
-
-		movq	mm6, _x0002000200020002
-		pxor	mm7, mm7
-
-xloop:
-		movq		mm0, [esi]
-		movq		mm3, mm0
-		punpcklbw	mm0, mm7
-		punpckhbw	mm3, mm7
-
-		movq		mm1, [esi+edx]
-		movq		mm4, mm1
-		punpcklbw	mm1, mm7
-		punpckhbw	mm4, mm7
-
-		movq		mm2, [esi+edx*2]
-		movq		mm5, mm2
-		punpcklbw	mm2, mm7
-		punpckhbw	mm5, mm7
-
-		psllw		mm1, 1
-		paddw		mm1, mm0
-		paddw		mm1, mm2
-		paddw		mm1, mm6
-		psrlw		mm1, 2
-
-		psllw		mm4, 1
-		paddw		mm4, mm3
-		paddw		mm4, mm5
-		paddw		mm4, mm6
-		psrlw		mm4, 2
-
-		add			esi, 8
-		packuswb	mm1, mm4
-		movq		[edi+esi-8], mm1
-
-		dec		ebp
-		jne		xloop
-
-		// sadly the original code makes a lot of visible banding artifacts on yuv
-		// (it seems those shiftings without rounding introduce too much error)
-/*
-		mov		edi,[esp+20]
-		mov		esi,[esp+24]
-		sub		edi,esi
-		mov		ebp,[esp+28]
-		mov		edx,[esp+32]
-
-		movq	mm5,mask0
-		movq	mm6,mask1
-		movq	mm7,mask2
-		shr		ebp,1
-		jz		oddpart
-
-xloop:
-		movq	mm2,[esi]
-		movq	mm0,mm5
-
-		movq	mm1,[esi+edx]
-		pand	mm0,mm2
-
-		psrlq	mm1,1
-		movq	mm2,[esi+edx*2]
-
-		psrlq	mm2,2
-		pand	mm1,mm6
-
-		psrlq	mm0,2
-		pand	mm2,mm7
-
-		paddb	mm0,mm1
-		add		esi,8
-
-		paddb	mm0,mm2
-		dec		ebp
-
-		movq	[edi+esi-8],mm0
-		jne		xloop
-
-oddpart:
-		test	byte ptr [esp+28],1
-		jz		nooddpart
-
-		mov		ecx,[esi]
-		mov		eax,0fcfcfcfch
-		mov		ebx,[esi+edx]
-		and		eax,ecx
-		shr		ebx,1
-		mov		ecx,[esi+edx*2]
-		shr		ecx,2
-		and		ebx,07f7f7f7fh
-		shr		eax,2
-		and		ecx,03f3f3f3fh
-		add		eax,ebx
-		add		eax,ecx
-		mov		[edi+esi],eax
-
-nooddpart:
-*/
-		pop		ebx
-		pop		esi
-		pop		edi
-		pop		ebp
-		ret
-	};
-}
-
-__declspec(align(16)) static BYTE const_1_16_bytes[] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
-
-void asm_blend_row_SSE2(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
-{
-	__asm
-	{
-		mov edx, srcpitch
-		mov esi, src
-		mov edi, dst
-		sub edi, esi
-		mov ecx, w
-		mov ebx, ecx
-		shr ecx, 4
-		and ebx, 15
-
-		movdqa xmm7, [const_1_16_bytes] 
-
-asm_blend_row_SSE2_loop:
-		movdqa xmm0, [esi]
-		movdqa xmm1, [esi+edx]
-		movdqa xmm2, [esi+edx*2]
-		pavgb xmm0, xmm1
-		pavgb xmm2, xmm1
-		psubusb xmm0, xmm7
-		pavgb xmm0, xmm2
-		movdqa [esi+edi], xmm0
-		add esi, 16
-		dec	ecx
-		jnz asm_blend_row_SSE2_loop
-
-		test ebx,15
-		jz asm_blend_row_SSE2_end
-
-		mov ecx, ebx
-		xor ax, ax
-		xor bx, bx
-		xor dx, dx
-asm_blend_row_SSE2_loop2:
-		mov al, [esi]
-		mov bl, [esi+edx]
-		mov dl, [esi+edx*2]
-		add ax, bx
-		inc ax
-		shr ax, 1
-		add dx, bx
-		inc dx
-		shr dx, 1
-		add ax, dx
-		shr ax, 1
-		mov [esi+edi], al
-		inc esi
-		dec	ecx
-		jnz asm_blend_row_SSE2_loop2
-
-asm_blend_row_SSE2_end:
-	}
-}
-
-void asm_blend_row_clipped_SSE2(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
-{
-	__asm
-	{
-		mov edx, srcpitch
-		mov esi, src
-		mov edi, dst
-		sub edi, esi
-		mov ecx, w
-		mov ebx, ecx
-		shr ecx, 4
-		and ebx, 15
-
-		movdqa xmm7, [const_1_16_bytes] 
-
-asm_blend_row_clipped_SSE2_loop:
-		movdqa xmm0, [esi]
-		movdqa xmm1, [esi+edx]
-		pavgb xmm0, xmm1
-		movdqa [esi+edi], xmm0
-		add esi, 16
-		dec	ecx
-		jnz asm_blend_row_clipped_SSE2_loop
-
-		test ebx,15
-		jz asm_blend_row_clipped_SSE2_end
-
-		mov ecx, ebx
-		xor ax, ax
-		xor bx, bx
-asm_blend_row_clipped_SSE2_loop2:
-		mov al, [esi]
-		mov bl, [esi+edx]
-		add ax, bx
-		inc ax
-		shr ax, 1
-		mov [esi+edi], al
-		inc esi
-		dec	ecx
-		jnz asm_blend_row_clipped_SSE2_loop2
-
-asm_blend_row_clipped_SSE2_end:
-	}
-}
 #endif
diff --git a/src/DSUtil/vd_asm.h b/src/DSUtil/vd_asm.h
index c1c78f39b..7c1f2f134 100644
--- a/src/DSUtil/vd_asm.h
+++ b/src/DSUtil/vd_asm.h
@@ -1,5 +1,6 @@
 //	VirtualDub - Video processing and capture application
-//	Copyright (C) 1998-2001 Avery Lee
+//	Graphics support library
+//	Copyright (C) 1998-2007 Avery Lee
 //
 //	This program is free software; you can redistribute it and/or modify
 //	it under the terms of the GNU General Public License as published by
@@ -16,8 +17,7 @@
 //	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 //
 //  Notes: 
-//  - BitBltFromI420ToRGB is from VirtualDub
-//  - BitBltFromYUY2ToRGB is from AviSynth 2.52
+//  - VDPixmapBlt is from VirtualDub
 //	(- vd.cpp/h should be renamed to something more sensible already :)
 
 #pragma once
@@ -31,9 +31,4 @@ void yv12_yuy2_row_sse2_linear();
 void yv12_yuy2_row_sse2_linear_interlaced();
 void yv12_yuy2_sse2(const BYTE *Y, const BYTE *U, const BYTE *V, int halfstride, unsigned halfwidth, unsigned height, BYTE *YUY2, int d_stride);
 void yv12_yuy2_sse2_interlaced(const BYTE *Y, const BYTE *U, const BYTE *V, int halfstride, unsigned halfwidth, unsigned height, BYTE *YUY2, int d_stride);
-
-void asm_blend_row_clipped_MMX(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch);
-void asm_blend_row_MMX(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch);
-void asm_blend_row_SSE2(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch);
-void asm_blend_row_clipped_SSE2(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch);
 #endif
diff --git a/src/YASM.rules b/src/YASM.rules
new file mode 100644
index 000000000..e212a4f17
--- /dev/null
+++ b/src/YASM.rules
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="utf-8"?>
+<VisualStudioToolFile
+	Name="YASM"
+	Version="8.00"
+	>
+	<Rules>
+		<CustomBuildRule
+			Name="YASM"
+			DisplayName="YASM"
+			CommandLine="yasm -X vc -g cv8 -f $(PlatformName) -o &quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+			Outputs="$(IntDir)\$(InputName).obj"
+			FileExtensions="*.asm;*. asm64"
+			ExecutionDescription="Assembling: $(InputFileName)"
+			>
+			<Properties>
+			</Properties>
+		</CustomBuildRule>
+	</Rules>
+</VisualStudioToolFile>
diff --git a/src/common.vsprops b/src/common.vsprops
index b3e6a9f51..dcd9ad94c 100644
--- a/src/common.vsprops
+++ b/src/common.vsprops
@@ -8,6 +8,7 @@
 	>
 	<Tool
 		Name="VCCLCompilerTool"
+		AdditionalIncludeDirectories="&quot;$(SolutionDir)src\DSUtil\&quot;"
 		PreprocessorDefinitions="WINVER=0x0600"
 		EnableFunctionLevelLinking="true"
 		WarningLevel="3"
diff --git a/src/filters/renderer/VideoRenderers/DX9AllocatorPresenter.cpp b/src/filters/renderer/VideoRenderers/DX9AllocatorPresenter.cpp
index 49630a357..8ab629875 100644
--- a/src/filters/renderer/VideoRenderers/DX9AllocatorPresenter.cpp
+++ b/src/filters/renderer/VideoRenderers/DX9AllocatorPresenter.cpp
@@ -1888,11 +1888,11 @@ HRESULT CDX9AllocatorPresenter::AlphaBlt(RECT* pSrc, RECT* pDst, IDirect3DTextur
             {(float)dst.right, (float)dst.bottom, 0.5f, 2.0f, (float)src.right / w, (float)src.bottom / h},
         };
         /*
-        		for(int i = 0; i < countof(pVertices); i++)
-        		{
-        			pVertices[i].x -= 0.5;
-        			pVertices[i].y -= 0.5;
-        		}
+		for(int i = 0; i < countof(pVertices); i++)
+		{
+			pVertices[i].x -= 0.5;
+			pVertices[i].y -= 0.5;
+		}
         */
 
         hr = m_pD3DDev->SetTexture(0, pTexture);
diff --git a/src/filters/switcher/AudioSwitcher/AudioSwitcher.cpp b/src/filters/switcher/AudioSwitcher/AudioSwitcher.cpp
index 687785c43..b6844870d 100644
--- a/src/filters/switcher/AudioSwitcher/AudioSwitcher.cpp
+++ b/src/filters/switcher/AudioSwitcher/AudioSwitcher.cpp
@@ -268,12 +268,13 @@ HRESULT CAudioSwitcherFilter::Transform(IMediaSample* pIn, IMediaSample* pOut)
 	if(FAILED(hr = pOut->GetPointer(&pDataOut))) return hr;
 
 	if(!pDataIn || !pDataOut || len < 0 || lenout < 0) return S_FALSE;
-	// len = 0 doesn't mean it's failed, return S_OK otherwise might skrew the sound
+	// len = 0 doesn't mean it's failed, return S_OK otherwise might screw the sound
 	if(len == 0) {pOut->SetActualDataLength(0); return S_OK;}
 
 	if(m_fCustomChannelMapping)
 	{
-		if(m_chs[wfe->nChannels-1].GetCount() > 0)
+		size_t channelsCount = m_chs[wfe->nChannels-1].GetCount();
+		if(channelsCount > 0 && wfeout->nChannels <= channelsCount)
 		{
 			for(int i = 0; i < wfeout->nChannels; i++)
 			{
diff --git a/src/filters/transform/BaseVideoFilter/BaseVideoFilter.cpp b/src/filters/transform/BaseVideoFilter/BaseVideoFilter.cpp
index ac9f2a811..f24d3be27 100644
--- a/src/filters/transform/BaseVideoFilter/BaseVideoFilter.cpp
+++ b/src/filters/transform/BaseVideoFilter/BaseVideoFilter.cpp
@@ -319,7 +319,10 @@ HRESULT CBaseVideoFilter::CopyBuffer(BYTE* pOut, BYTE** ppIn, int w, int h, int
 
 		if(bihOut.biCompression == '2YUY')
 		{
-			BitBltFromI420ToYUY2(w, h, pOut, bihOut.biWidth*2, pIn, pInU, pInV, pitchIn, fInterlaced);
+			if (!fInterlaced)
+				BitBltFromI420ToYUY2(w, h, pOut, bihOut.biWidth*2, pIn, pInU, pInV, pitchIn);
+			else
+				BitBltFromI420ToYUY2Interlaced(w, h, pOut, bihOut.biWidth*2, pIn, pInU, pInV, pitchIn);
 		}
 		else if(bihOut.biCompression == '024I' || bihOut.biCompression == 'VUYI' || bihOut.biCompression == '21VY')
 		{
diff --git a/src/filters/transform/Mpeg2DecFilter/libmpeg2.cpp b/src/filters/transform/Mpeg2DecFilter/libmpeg2.cpp
index bb3aceb00..6251a2bf5 100644
--- a/src/filters/transform/Mpeg2DecFilter/libmpeg2.cpp
+++ b/src/filters/transform/Mpeg2DecFilter/libmpeg2.cpp
@@ -1314,6 +1314,7 @@ int CMpeg2Dec::sequence_ext()
 	if(!(buffer[1] & 8))
 	{
 		sequence->flags &= ~SEQ_FLAG_PROGRESSIVE_SEQUENCE;
+		sequence->width = (sequence->width + 31) & ~31;
 		sequence->height = (sequence->height + 31) & ~31;
 	}
 
diff --git a/src/thirdparty/VirtualDub/Kasumi/Kasumi.vcproj b/src/thirdparty/VirtualDub/Kasumi/Kasumi.vcproj
new file mode 100644
index 000000000..40e1e5220
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/Kasumi.vcproj
@@ -0,0 +1,1527 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="9,00"
+	Name="Kasumi"
+	ProjectGUID="{0D252872-7542-4232-8D02-53F9182AEE15}"
+	RootNamespace="Kasumi"
+	TargetFrameworkVersion="131072"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+		<Platform
+			Name="x64"
+		/>
+	</Platforms>
+	<ToolFiles>
+		<ToolFile
+			RelativePath="..\..\..\YASM.rules"
+		/>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\..\..\common.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="YASM"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				InlineFunctionExpansion="1"
+				AdditionalIncludeDirectories="h;..\h"
+				PreprocessorDefinitions="NDEBUG;WIN32;_WINDOWS;WIN32_LEAN_AND_MEAN;NOMINMAX"
+				StringPooling="true"
+				MinimalRebuild="true"
+				RuntimeLibrary="0"
+				EnableFunctionLevelLinking="true"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				DebugInformationFormat="3"
+				DisableSpecificWarnings="4244;4267"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				SuppressStartupBanner="true"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+				SuppressStartupBanner="true"
+				OutputFile=".\..\lib\Release/Kasumi.bsc"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|x64"
+			OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\..\..\common.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="YASM"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				InlineFunctionExpansion="1"
+				AdditionalIncludeDirectories="h;..\h"
+				PreprocessorDefinitions="NDEBUG;WIN32;_WINDOWS;WIN32_LEAN_AND_MEAN;NOMINMAX"
+				StringPooling="true"
+				MinimalRebuild="true"
+				RuntimeLibrary="0"
+				EnableFunctionLevelLinking="true"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				DebugInformationFormat="3"
+				DisableSpecificWarnings="4244;4267"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				SuppressStartupBanner="true"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+				SuppressStartupBanner="true"
+				OutputFile=".\..\lib\Release/Kasumi.bsc"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\..\..\common.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="YASM"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="h;..\h"
+				PreprocessorDefinitions="_DEBUG;WIN32;_WINDOWS;WIN32_LEAN_AND_MEAN;NOMINMAX"
+				StringPooling="true"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="1"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				DebugInformationFormat="3"
+				DisableSpecificWarnings="4244;4267"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				SuppressStartupBanner="true"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+				SuppressStartupBanner="true"
+				OutputFile=".\..\lib\Debug/Kasumi.bsc"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|x64"
+			OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\..\..\common.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="YASM"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="h;..\h"
+				PreprocessorDefinitions="_DEBUG;WIN32;_WINDOWS;WIN32_LEAN_AND_MEAN;NOMINMAX"
+				StringPooling="true"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="1"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				DebugInformationFormat="3"
+				DisableSpecificWarnings="4244;4267"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				SuppressStartupBanner="true"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+				SuppressStartupBanner="true"
+				OutputFile=".\..\lib\Debug/Kasumi.bsc"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+			>
+			<File
+				RelativePath=".\source\alphablt.cpp"
+				>
+			</File>
+			<File
+				RelativePath="source\blt.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\blt_reference.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\blt_reference_pal.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\blt_reference_rgb.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\blt_reference_yuv.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\blt_reference_yuv2yuv.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\blt_reference_yuvrev.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\blt_setup.cpp"
+				>
+			</File>
+			<File
+				RelativePath="source\blt_spanutils.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\blt_uberblit.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\source\pixel.cpp"
+				>
+			</File>
+			<File
+				RelativePath="source\pixmaputils.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\region.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\resample.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\resample_kernels.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\source\resample_stages.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\source\resample_stages_reference.cpp"
+				>
+			</File>
+			<File
+				RelativePath="source\stretchblt_reference.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\tables.cpp"
+				>
+			</File>
+			<File
+				RelativePath="source\triblt.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\uberblit.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\source\uberblit_16f.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\source\uberblit_gen.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\source\uberblit_resample.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\source\uberblit_resample_special.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\source\uberblit_swizzle.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\source\uberblit_swizzle_x86.cpp"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\uberblit_v210.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\source\uberblit_ycbcr_x86.cpp"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl"
+			>
+			<File
+				RelativePath="h\bitutils.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\blt_setup.h"
+				>
+			</File>
+			<File
+				RelativePath="h\blt_spanutils.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\blt_spanutils_x86.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\Kasumi\pixel.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\Kasumi\pixmap.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\Kasumi\pixmapops.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\Kasumi\pixmaputils.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\Kasumi\region.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\Kasumi\resample.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\resample_stages.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\resample_stages_reference.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\resample_stages_x64.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\resample_stages_x86.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\Kasumi\tables.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\Kasumi\text.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\Kasumi\triblt.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit_16f.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit_base.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit_fill.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit_gen.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit_input.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit_pal.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit_resample.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit_resample_special.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit_resample_special_x86.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit_rgb.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit_rgb_x86.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit_swizzle.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit_swizzle_x86.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit_v210.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit_ycbcr.h"
+				>
+			</File>
+			<File
+				RelativePath=".\h\uberblit_ycbcr_x86.h"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Assembly files (x86)"
+			Filter="asm"
+			>
+			<File
+				RelativePath="source\a_bltrgb.asm"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\a_bltrgb2yuv_mmx.asm"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\a_bltrgb_mmx.asm"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\a_bltyuv2rgb_sse2.asm"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\a_resample_mmx.asm"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\a_resample_sse41.asm"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\a_spanutils_isse.asm"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\a_stretchrgb_mmx.asm"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\a_stretchrgb_point.asm"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\a_triblt_mmx.asm"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\a_triblt_scalar.asm"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\a_triblt_sse2.asm"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+		<Filter
+			Name="Source Files (x86)"
+			>
+			<File
+				RelativePath=".\source\blt_spanutils_x86.cpp"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\blt_x86.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\resample_stages_x86.cpp"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\uberblit_resample_special_x86.cpp"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+		<Filter
+			Name="Assembly files (AMD64)"
+			Filter=".asm64"
+			>
+			<File
+				RelativePath="source\a64_resample.asm64"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+		<Filter
+			Name="Source Files (x64)"
+			>
+			<File
+				RelativePath=".\source\resample_stages_x64.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+		<Filter
+			Name="Interface Header Files"
+			>
+			<File
+				RelativePath="..\h\vd2\Kasumi\blitter.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\Kasumi\resample_kernels.h"
+				>
+			</File>
+		</Filter>
+		<File
+			RelativePath="source\a_triblt.inc"
+			>
+		</File>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/bitutils.h b/src/thirdparty/VirtualDub/Kasumi/h/bitutils.h
new file mode 100644
index 000000000..8cba85ffd
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/bitutils.h
@@ -0,0 +1,26 @@
+#ifndef f_VD2_KASUMI_BITUTILS_H
+#define f_VD2_KASUMI_BITUTILS_H
+
+#include <vd2/system/vdtypes.h>
+
+namespace nsVDPixmapBitUtils {
+	inline uint32 avg_8888_11(uint32 x, uint32 y) {
+		return (x|y) - (((x^y)&0xfefefefe)>>1);
+	}
+
+	inline uint32 avg_8888_121(uint32 x, uint32 y, uint32 z) {
+		return avg_8888_11(avg_8888_11(x,z), y);
+	}
+
+	inline uint32 avg_0808_14641(uint32 a, uint32 b, uint32 c, uint32 d, uint32 e) {
+		a &= 0xff00ff;
+		b &= 0xff00ff;
+		c &= 0xff00ff;
+		d &= 0xff00ff;
+		e &= 0xff00ff;
+
+		return (((a+e) + 4*(b+d) + 6*c + 0x080008)&0x0ff00ff0)>>4;
+	}
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/blt_setup.h b/src/thirdparty/VirtualDub/Kasumi/h/blt_setup.h
new file mode 100644
index 000000000..19b7bc62c
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/blt_setup.h
@@ -0,0 +1,62 @@
+#ifndef f_VD2_KASUMI_BLT_SETUP_H
+#define f_VD2_KASUMI_BLT_SETUP_H
+
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+
+typedef void (*VDPixmapPalettedBlitterFn)(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h, const void *pal);
+typedef void (*VDPixmapChunkyBlitterFn)(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+
+void VDPixmapBltDirectPalettedConversion(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h, VDPixmapPalettedBlitterFn pBlitter);
+
+template<VDPixmapPalettedBlitterFn palettedBlitter>
+void VDPixmapBlitterPalettedAdapter(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h)
+{
+	if (dst.format == nsVDPixmap::kPixFormat_XRGB8888)
+		palettedBlitter(dst.data, dst.pitch, src.data, src.pitch, w, h, src.palette);
+	else
+		VDPixmapBltDirectPalettedConversion(dst, src, w, h, palettedBlitter);
+}
+
+template<VDPixmapChunkyBlitterFn chunkyBlitter>
+void VDPixmapBlitterChunkyAdapter(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h)
+{
+	chunkyBlitter(dst.data, dst.pitch, src.data, src.pitch, w, h);
+}
+
+struct VDPixmapFormatSubset {
+public:
+	VDPixmapFormatSubset() : mFormatCount(0) {}
+
+	VDPixmapFormatSubset& operator=(int format) {
+		mFormatCount = 0;
+		mFormats[mFormatCount++] = format;
+		return *this;
+	}
+
+	VDPixmapFormatSubset& operator,(int format) {
+		VDASSERT(mFormatCount < nsVDPixmap::kPixFormat_Max_Standard);
+		mFormats[mFormatCount++] = format;
+		return *this;
+	}
+
+	int mFormatCount;
+	int mFormats[nsVDPixmap::kPixFormat_Max_Standard];
+};
+
+class VDPixmapBlitterTable {
+public:
+	void Clear();
+	void AddBlitter(int srcFormat, int dstFormat, VDPixmapBlitterFn blitter);
+	void AddBlitter(const VDPixmapFormatSubset& srcFormats, VDPixmapFormatSubset& dstFormats, VDPixmapBlitterFn blitter);
+
+	VDPixmapBlitterFn mTable[nsVDPixmap::kPixFormat_Max_Standard][nsVDPixmap::kPixFormat_Max_Standard];
+};
+
+inline void VDPixmapBlitterTable::AddBlitter(int srcFormat, int dstFormat, VDPixmapBlitterFn blitter) {
+	mTable[srcFormat][dstFormat] = blitter;
+}
+
+
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/blt_spanutils.h b/src/thirdparty/VirtualDub/Kasumi/h/blt_spanutils.h
new file mode 100644
index 000000000..ef723b3f8
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/blt_spanutils.h
@@ -0,0 +1,23 @@
+#ifndef f_VD2_KASUMI_BLT_SPANUTILS_H
+#define f_VD2_KASUMI_BLT_SPANUTILS_H
+
+#include <vd2/system/vdtypes.h>
+
+namespace nsVDPixmapSpanUtils {
+	void horiz_expand2x_centered	(uint8 *dst, const uint8 *src, sint32 w);
+	void horiz_expand2x_coaligned	(uint8 *dst, const uint8 *src, sint32 w);
+	void horiz_expand4x_coaligned	(uint8 *dst, const uint8 *src, sint32 w);
+	void horiz_compress2x_coaligned	(uint8 *dst, const uint8 *src, sint32 w);
+	void horiz_compress2x_centered	(uint8 *dst, const uint8 *src, sint32 w);
+	void horiz_compress4x_coaligned	(uint8 *dst, const uint8 *src, sint32 w);
+	void horiz_compress4x_centered	(uint8 *dst, const uint8 *src, sint32 w);
+	void horiz_realign_to_centered	(uint8 *dst, const uint8 *src, sint32 w);
+	void horiz_realign_to_coaligned	(uint8 *dst, const uint8 *src, sint32 w);
+	void vert_expand2x_centered		(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase);
+	void vert_expand4x_centered		(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase);
+	void vert_compress2x_centered_fast	(uint8 *dst, const uint8 *const *srcarray, sint32 w, uint8 phase);
+	void vert_compress2x_centered	(uint8 *dst, const uint8 *const *srcarray, sint32 w, uint8 phase);
+	void vert_compress4x_centered(uint8 *dst, const uint8 *const *srcarray, sint32 w, uint8 phase);
+}
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/blt_spanutils_x86.h b/src/thirdparty/VirtualDub/Kasumi/h/blt_spanutils_x86.h
new file mode 100644
index 000000000..c697485a2
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/blt_spanutils_x86.h
@@ -0,0 +1,35 @@
+//	VirtualDub - Video processing and capture application
+//	Graphics support library
+//	Copyright (C) 1998-2007 Avery Lee
+//
+//	This program is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	This program is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with this program; if not, write to the Free Software
+//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#ifndef f_VD2_KASUMI_BLT_SPANUTILS_X86_H
+#define f_VD2_KASUMI_BLT_SPANUTILS_X86_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+
+namespace nsVDPixmapSpanUtils {
+	void horiz_expand2x_coaligned_ISSE(uint8 *dst, const uint8 *src, sint32 w);
+	void horiz_expand4x_coaligned_MMX(uint8 *dst, const uint8 *src, sint32 w);
+	void vert_expand2x_centered_ISSE(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase);
+	void vert_expand4x_centered_ISSE(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase);
+}
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/resample_stages.h b/src/thirdparty/VirtualDub/Kasumi/h/resample_stages.h
new file mode 100644
index 000000000..588fda9ad
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/resample_stages.h
@@ -0,0 +1,80 @@
+#ifndef f_VD2_KASUMI_RESAMPLE_STAGES_H
+#define f_VD2_KASUMI_RESAMPLE_STAGES_H
+
+#include <vd2/Kasumi/pixmap.h>
+
+class IVDResamplerFilter;
+struct VDResamplerAxis;
+
+class VDSteppedAllocator {
+public:
+	typedef	size_t		size_type;
+	typedef	ptrdiff_t	difference_type;
+
+	VDSteppedAllocator(size_t initialSize = 1024);
+	~VDSteppedAllocator();
+
+	void clear();
+	void *allocate(size_type n);
+
+protected:
+	struct Block {
+		Block *next;
+	};
+
+	Block *mpHead;
+	char *mpAllocNext;
+	size_t	mAllocLeft;
+	size_t	mAllocNext;
+	size_t	mAllocInit;
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (common)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class IVDResamplerStage {
+public:
+	virtual ~IVDResamplerStage() {}
+
+#if 0
+	void *operator new(size_t n, VDSteppedAllocator& a) {
+		return a.allocate(n);
+	}
+
+	void operator delete(void *p, VDSteppedAllocator& a) {
+	}
+
+private:
+	// these should NEVER be called
+	void operator delete(void *p) {}
+#endif
+};
+
+class IVDResamplerSeparableRowStage2 {
+public:
+	virtual void Init(const VDResamplerAxis& axis, uint32 srcw) = 0;
+	virtual void Process(void *dst, const void *src, uint32 w) = 0;
+};
+
+class IVDResamplerSeparableRowStage : public IVDResamplerStage {
+public:
+	virtual IVDResamplerSeparableRowStage2 *AsRowStage2() { return NULL; }
+	virtual void Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx) = 0;
+	virtual int GetWindowSize() const = 0;
+};
+
+class IVDResamplerSeparableColStage : public IVDResamplerStage {
+public:
+	virtual int GetWindowSize() const = 0;
+	virtual void Process(void *dst, const void *const *src, uint32 w, sint32 phase) = 0;
+};
+
+void VDResamplerGenerateTable(sint32 *dst, const IVDResamplerFilter& filter);
+void VDResamplerGenerateTableF(float *dst, const IVDResamplerFilter& filter);
+void VDResamplerGenerateTable2(sint32 *dst, const IVDResamplerFilter& filter, sint32 count, sint32 u, sint32 dudx);
+void VDResamplerSwizzleTable(sint32 *dst, unsigned pairs);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_reference.h b/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_reference.h
new file mode 100644
index 000000000..296882ceb
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_reference.h
@@ -0,0 +1,156 @@
+#ifndef f_VD2_KASUMI_RESAMPLE_STAGES_REFERENCE_H
+#define f_VD2_KASUMI_RESAMPLE_STAGES_REFERENCE_H
+
+#include <vd2/system/vdstl.h>
+#include "resample_stages.h"
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (portable)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDResamplerRowStageSeparablePoint8 : public IVDResamplerSeparableRowStage {
+public:
+	int GetWindowSize() const;
+	void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerRowStageSeparablePoint16 : public IVDResamplerSeparableRowStage {
+public:
+	int GetWindowSize() const;
+	void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerRowStageSeparablePoint32 : public IVDResamplerSeparableRowStage {
+public:
+	int GetWindowSize() const;
+	void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerRowStageSeparableLinear8 : public IVDResamplerSeparableRowStage {
+public:
+	int GetWindowSize() const;
+	virtual void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerRowStageSeparableLinear8_phaseZeroStepHalf : public VDResamplerRowStageSeparableLinear8 {
+public:
+	void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerRowStageSeparableLinear32 : public IVDResamplerSeparableRowStage {
+public:
+	int GetWindowSize() const;
+	void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerColStageSeparableLinear8 : public IVDResamplerSeparableColStage {
+public:
+	int GetWindowSize() const;
+	void Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase);
+};
+
+class VDResamplerColStageSeparableLinear32 : public IVDResamplerSeparableColStage {
+public:
+	int GetWindowSize() const;
+	void Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase);
+};
+
+class VDResamplerRowStageSeparableTable8 : public IVDResamplerSeparableRowStage {
+public:
+	VDResamplerRowStageSeparableTable8(const IVDResamplerFilter& filter);
+
+	int GetWindowSize() const;
+
+	void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+
+protected:
+	vdblock<sint32, vdaligned_alloc<sint32> >	mFilterBank;
+};
+
+class VDResamplerRowStageSeparableTable32 : public IVDResamplerSeparableRowStage {
+public:
+	VDResamplerRowStageSeparableTable32(const IVDResamplerFilter& filter);
+
+	int GetWindowSize() const;
+
+	void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+
+protected:
+	vdblock<sint32, vdaligned_alloc<sint32> >	mFilterBank;
+};
+
+class VDResamplerRowStageSeparableTable32F : public IVDResamplerSeparableRowStage {
+public:
+	VDResamplerRowStageSeparableTable32F(const IVDResamplerFilter& filter);
+
+	int GetWindowSize() const;
+
+	void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+
+protected:
+	vdblock<float, vdaligned_alloc<float> >	mFilterBank;
+};
+
+class VDResamplerRowStageSeparableTable32Fx4 : public IVDResamplerSeparableRowStage {
+public:
+	VDResamplerRowStageSeparableTable32Fx4(const IVDResamplerFilter& filter);
+
+	int GetWindowSize() const;
+
+	void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+
+protected:
+	vdblock<float, vdaligned_alloc<float> >	mFilterBank;
+};
+
+class VDResamplerColStageSeparableTable8 : public IVDResamplerSeparableColStage {
+public:
+	VDResamplerColStageSeparableTable8(const IVDResamplerFilter& filter);
+
+	int GetWindowSize() const;
+
+	void Process(void *dst0, const void *const *src0, uint32 w, sint32 phase);
+
+protected:
+	vdblock<sint32, vdaligned_alloc<sint32> >	mFilterBank;
+};
+
+class VDResamplerColStageSeparableTable32 : public IVDResamplerSeparableColStage {
+public:
+	VDResamplerColStageSeparableTable32(const IVDResamplerFilter& filter);
+
+	int GetWindowSize() const;
+
+	void Process(void *dst0, const void *const *src0, uint32 w, sint32 phase);
+
+protected:
+	vdblock<sint32, vdaligned_alloc<sint32> >	mFilterBank;
+};
+
+class VDResamplerColStageSeparableTable32F : public IVDResamplerSeparableColStage {
+public:
+	VDResamplerColStageSeparableTable32F(const IVDResamplerFilter& filter);
+
+	int GetWindowSize() const;
+
+	void Process(void *dst0, const void *const *src0, uint32 w, sint32 phase);
+
+protected:
+	vdblock<float, vdaligned_alloc<float> >	mFilterBank;
+};
+
+class VDResamplerColStageSeparableTable32Fx4 : public IVDResamplerSeparableColStage {
+public:
+	VDResamplerColStageSeparableTable32Fx4(const IVDResamplerFilter& filter);
+
+	int GetWindowSize() const;
+
+	void Process(void *dst0, const void *const *src0, uint32 w, sint32 phase);
+
+protected:
+	vdblock<float, vdaligned_alloc<float> >	mFilterBank;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_x64.h b/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_x64.h
new file mode 100644
index 000000000..fd719f732
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_x64.h
@@ -0,0 +1,26 @@
+#ifndef f_VD2_KASUMI_RESAMPLE_STAGES_X64_H
+#define f_VD2_KASUMI_RESAMPLE_STAGES_X64_H
+
+#include "resample_stages_reference.h"
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (SSE2, AMD64)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDResamplerSeparableTableRowStageSSE2 : public VDResamplerRowStageSeparableTable32 {
+public:
+	VDResamplerSeparableTableRowStageSSE2(const IVDResamplerFilter& filter);
+
+	void Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerSeparableTableColStageSSE2 : public VDResamplerColStageSeparableTable32 {
+public:
+	VDResamplerSeparableTableColStageSSE2(const IVDResamplerFilter& filter);
+
+	void Process(void *dst, const void *const *src, uint32 w, sint32 phase);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_x86.h b/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_x86.h
new file mode 100644
index 000000000..41e16b23d
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_x86.h
@@ -0,0 +1,193 @@
+#ifndef f_VD2_KASUMI_RESAMPLE_STAGES_X86
+#define f_VD2_KASUMI_RESAMPLE_STAGES_X86
+
+#include "resample_stages_reference.h"
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (scalar, x86)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDResamplerSeparablePointRowStageX86 : public IVDResamplerSeparableRowStage {
+public:
+	int GetWindowSize() const;
+	void Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx);
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (MMX, x86)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDResamplerSeparablePointRowStageMMX : public IVDResamplerSeparableRowStage {
+public:
+	int GetWindowSize() const;
+	void Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerSeparableLinearRowStageMMX : public IVDResamplerSeparableRowStage {
+public:
+	int GetWindowSize() const;
+	void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerSeparableLinearColStageMMX : public IVDResamplerSeparableColStage {
+public:
+	int GetWindowSize() const;
+	void Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase);
+};
+
+class VDResamplerSeparableCubicRowStageMMX : public IVDResamplerSeparableRowStage {
+public:
+	VDResamplerSeparableCubicRowStageMMX(double A);
+
+	int GetWindowSize() const;
+	void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+
+protected:
+	vdblock<sint32, vdaligned_alloc<sint32> > mFilterBank;
+};
+
+class VDResamplerSeparableCubicColStageMMX : public IVDResamplerSeparableColStage {
+public:
+	VDResamplerSeparableCubicColStageMMX(double A);
+
+	int GetWindowSize() const;
+	void Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase);
+
+protected:
+	vdblock<sint32, vdaligned_alloc<sint32> > mFilterBank;
+};
+
+class VDResamplerSeparableTableRowStage8MMX : public VDResamplerRowStageSeparableTable32, public IVDResamplerSeparableRowStage2 {
+public:
+	VDResamplerSeparableTableRowStage8MMX(const IVDResamplerFilter& filter);
+
+	IVDResamplerSeparableRowStage2 *AsRowStage2() { return this; } 
+
+	void Init(const VDResamplerAxis& axis, uint32 srcw);
+	void Process(void *dst, const void *src, uint32 w);
+	void Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx);
+
+protected:
+	void RedoRowFilters(const VDResamplerAxis& axis, uint32 w, uint32 srcw);
+
+	int		mAlignedKernelWidth;
+	int		mAlignedKernelSize;
+	ptrdiff_t	mRowKernelSize;
+	uint32	mLastSrcWidth;
+	uint32	mLastDstWidth;
+	sint32	mLastU;
+	sint32	mLastDUDX;
+
+	bool	mbQuadOptimizationEnabled[4];
+	int		mKernelSizeByOffset[4];
+	ptrdiff_t	mTailOffset[4];
+
+	vdfastvector<sint16, vdaligned_alloc<sint16> > mRowKernels;
+};
+
+class VDResamplerSeparableTableRowStageMMX : public VDResamplerRowStageSeparableTable32 {
+public:
+	VDResamplerSeparableTableRowStageMMX(const IVDResamplerFilter& filter);
+
+	void Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerSeparableTableColStage8MMX : public VDResamplerColStageSeparableTable8 {
+public:
+	VDResamplerSeparableTableColStage8MMX(const IVDResamplerFilter& filter);
+
+	void Process(void *dst, const void *const *src, uint32 w, sint32 phase);
+};
+
+class VDResamplerSeparableTableColStageMMX : public VDResamplerColStageSeparableTable32 {
+public:
+	VDResamplerSeparableTableColStageMMX(const IVDResamplerFilter& filter);
+
+	void Process(void *dst, const void *const *src, uint32 w, sint32 phase);
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (ISSE, x86)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDResamplerRowStageSeparableLinear8_phaseZeroStepHalf_ISSE : public VDResamplerRowStageSeparableLinear8 {
+public:
+	void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+};
+
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (SSE2, x86)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDResamplerSeparableCubicColStageSSE2 : public VDResamplerSeparableCubicColStageMMX {
+public:
+	VDResamplerSeparableCubicColStageSSE2(double A);
+
+	void Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase);
+};
+
+class VDResamplerSeparableTableRowStageSSE2 : public VDResamplerSeparableTableRowStageMMX {
+public:
+	VDResamplerSeparableTableRowStageSSE2(const IVDResamplerFilter& filter);
+
+	void Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerSeparableTableColStageSSE2 : public VDResamplerSeparableTableColStageMMX {
+public:
+	VDResamplerSeparableTableColStageSSE2(const IVDResamplerFilter& filter);
+
+	void Process(void *dst, const void *const *src, uint32 w, sint32 phase);
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (SSE4.1, x86)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDResamplerSeparableTableRowStage8SSE41 : public VDResamplerRowStageSeparableTable32, public IVDResamplerSeparableRowStage2 {
+public:
+	VDResamplerSeparableTableRowStage8SSE41(const IVDResamplerFilter& filter);
+
+	IVDResamplerSeparableRowStage2 *AsRowStage2() { return this; } 
+
+	void Init(const VDResamplerAxis& axis, uint32 srcw);
+	void Process(void *dst, const void *src, uint32 w);
+	void Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx);
+
+protected:
+	void RedoRowFilters(const VDResamplerAxis& axis, uint32 w, uint32 srcw);
+
+	int		mAlignedKernelWidth;
+	int		mAlignedKernelSize;
+	ptrdiff_t	mRowKernelSize;
+	uint32	mLastSrcWidth;
+	uint32	mLastDstWidth;
+	sint32	mLastU;
+	sint32	mLastDUDX;
+
+	bool	mbQuadOptimizationEnabled[8];
+	int		mKernelSizeByOffset[8];
+	ptrdiff_t	mTailOffset[8];
+
+	vdfastvector<sint16, vdaligned_alloc<sint16> > mRowKernels;
+};
+
+class VDResamplerSeparableTableColStage8SSE41 : public VDResamplerColStageSeparableTable8 {
+public:
+	VDResamplerSeparableTableColStage8SSE41(const IVDResamplerFilter& filter);
+
+	void Process(void *dst, const void *const *src, uint32 w, sint32 phase);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit.h
new file mode 100644
index 000000000..72f8ee060
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit.h
@@ -0,0 +1,83 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_H
+#define f_VD2_KASUMI_UBERBLIT_H
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/system/vectors.h>
+#include <vd2/Kasumi/blitter.h>
+
+struct VDPixmap;
+
+enum VDPixmapFormatToken {
+	kVDPixType_1			= 0x00000001,
+	kVDPixType_2			= 0x00000002,
+	kVDPixType_4			= 0x00000003,
+	kVDPixType_8			= 0x00000004,
+	kVDPixType_555_LE		= 0x00000005,
+	kVDPixType_565_LE		= 0x00000006,
+	kVDPixType_1555_LE		= 0x00000007,
+	kVDPixType_888			= 0x00000008,
+	kVDPixType_8888			= 0x00000009,
+	kVDPixType_16F_LE		= 0x0000000A,
+	kVDPixType_16Fx4_LE		= 0x0000000B,
+	kVDPixType_16F_16F_16F_LE	= 0x0000000C,
+	kVDPixType_32F_LE		= 0x0000000D,
+	kVDPixType_32Fx4_LE		= 0x0000000E,
+	kVDPixType_32F_32F_32F_LE	= 0x0000000F,
+	kVDPixType_8_8_8		= 0x00000010,
+	kVDPixType_B8G8_R8G8	= 0x00000011,		// UYVY
+	kVDPixType_G8B8_G8R8	= 0x00000012,		// YUYV
+	kVDPixType_V210			= 0x00000013,		// v210 (4:2:2 10 bit)
+	kVDPixType_8_B8R8		= 0x00000014,		// NV12
+	kVDPixType_B8R8			= 0x00000015,
+	kVDPixType_Mask			= 0x0000003F,
+
+	kVDPixSamp_444			= 0x00000040,
+	kVDPixSamp_422			= 0x00000080,
+	kVDPixSamp_422_JPEG		= 0x000000C0,
+	kVDPixSamp_420_MPEG2	= 0x00000100,
+	kVDPixSamp_420_MPEG2INT	= 0x00000140,
+	kVDPixSamp_420_MPEG1	= 0x00000180,
+	kVDPixSamp_420_DVPAL	= 0x000001C0,
+	kVDPixSamp_411			= 0x00000200,
+	kVDPixSamp_410			= 0x00000240,
+	kVDPixSamp_Mask			= 0x00000FC0,
+	kVDPixSamp_Bits			= 6,
+
+	kVDPixSpace_Pal			= 0x00001000,
+	kVDPixSpace_RGB			= 0x00002000,
+	kVDPixSpace_BGR			= 0x00003000,
+	kVDPixSpace_BGRA		= 0x00004000,
+	kVDPixSpace_Y_601		= 0x00005000,
+	kVDPixSpace_Y_709		= 0x00006000,
+	kVDPixSpace_YCC_601		= 0x00007000,
+	kVDPixSpace_YCC_709		= 0x00008000,
+	kVDPixSpace_YCC_JPEG	= 0x00009000,
+	kVDPixSpace_Mask		= 0x0003F000,
+};
+
+struct VDPixmapSamplingInfo {
+	int		mCXOffset16;
+	int		mCrYOffset16;
+	int		mCbYOffset16;
+	int		mCXBits;
+	int		mCYBits;
+};
+
+uint32 VDPixmapGetFormatTokenFromFormat(int format);
+const VDPixmapSamplingInfo& VDPixmapGetSamplingInfo(uint32 samplingToken);
+
+class IVDPixmapGen {
+public:
+	virtual ~IVDPixmapGen() {}
+	virtual void AddWindowRequest(int minY, int maxY) = 0;
+	virtual void Start() = 0;
+	virtual sint32 GetWidth(int srcIndex) const = 0;
+	virtual sint32 GetHeight(int srcIndex) const = 0;
+	virtual bool IsStateful() const = 0;
+	virtual uint32 GetType(uint32 output) const = 0;
+	virtual const void *GetRow(sint32 y, uint32 output) = 0;
+	virtual void ProcessRow(void *dst, sint32 y) = 0;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_16f.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_16f.h
new file mode 100644
index 000000000..513c4fb4f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_16f.h
@@ -0,0 +1,39 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_16F_H
+#define f_VD2_KASUMI_UBERBLIT_16F_H
+
+#include <vd2/system/cpuaccel.h>
+#include "uberblit_base.h"
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	32F -> 16F
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_32F_To_16F : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start();
+
+	uint32 GetType(uint32 output) const;
+
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	16F -> 32F
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_16F_To_32F : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start();
+
+	uint32 GetType(uint32 output) const;
+
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_base.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_base.h
new file mode 100644
index 000000000..675619a7b
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_base.h
@@ -0,0 +1,129 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_BASE_H
+#define f_VD2_KASUMI_UBERBLIT_BASE_H
+
+#include <vd2/system/vdstl.h>
+#include "uberblit.h"
+
+class VDPixmapGenWindowBased : public IVDPixmapGen {
+public:
+	VDPixmapGenWindowBased()
+		: mWindowMinDY(0xffff)
+		, mWindowMaxDY(-0xffff) {}
+
+	void SetOutputSize(sint32 w, sint32 h) {
+		mWidth = w;
+		mHeight = h;
+	}
+
+	void AddWindowRequest(int minDY, int maxDY) {
+		if (mWindowMinDY > minDY)
+			mWindowMinDY = minDY;
+		if (mWindowMaxDY < maxDY)
+			mWindowMaxDY = maxDY;
+	}
+
+	void StartWindow(uint32 rowbytes, int outputCount = 1) {
+		VDASSERT(mWindowMaxDY >= mWindowMinDY);
+		mWindowSize = mWindowMaxDY + 1 - mWindowMinDY;
+
+		mWindowPitch = (rowbytes + 15) & ~15;
+		mWindowBuffer.resize(mWindowPitch * mWindowSize * outputCount);
+		mWindow.resize(mWindowSize * 2);
+
+		for(sint32 i=0; i<mWindowSize; ++i)
+			mWindow[i] = mWindow[i + mWindowSize] = &mWindowBuffer[mWindowPitch * outputCount * i];
+
+		mWindowIndex = 0;
+		mWindowLastY = -0x3FFFFFFF;
+	}
+
+	sint32 GetWidth(int) const { return mWidth; }
+	sint32 GetHeight(int) const { return mHeight; }
+
+	bool IsStateful() const {
+		return true;
+	}
+
+	const void *GetRow(sint32 y, uint32 index) {
+		sint32 tostep = y - mWindowLastY;
+		VDASSERT(y >= mWindowLastY - (sint32)mWindowSize + 1);
+
+		if (tostep >= mWindowSize) {
+			mWindowLastY = y - 1;
+			tostep = 1;
+		}
+
+		while(tostep-- > 0) {
+			++mWindowLastY;
+			Compute(mWindow[mWindowIndex], mWindowLastY);
+			if (++mWindowIndex >= mWindowSize)
+				mWindowIndex = 0;
+		}
+
+		return mWindow[y + mWindowSize - 1 - mWindowLastY + mWindowIndex];
+	}
+
+	void ProcessRow(void *dst, sint32 y) {
+		Compute(dst, y);
+	}
+
+protected:
+	virtual void Compute(void *dst0, sint32 y) = 0;
+
+	vdfastvector<uint8> mWindowBuffer;
+	vdfastvector<uint8 *> mWindow;
+	sint32 mWindowPitch;
+	sint32 mWindowIndex;
+	sint32 mWindowMinDY;
+	sint32 mWindowMaxDY;
+	sint32 mWindowSize;
+	sint32 mWindowLastY;
+	sint32 mWidth;
+	sint32 mHeight;
+};
+
+class VDPixmapGenWindowBasedOneSource : public VDPixmapGenWindowBased {
+public:
+	void InitSource(IVDPixmapGen *src, uint32 srcindex) {
+		mpSrc = src;
+		mSrcIndex = srcindex;
+		mSrcWidth = src->GetWidth(srcindex);
+		mSrcHeight = src->GetHeight(srcindex);
+		mWidth = mSrcWidth;
+		mHeight = mSrcHeight;
+	}
+
+	void AddWindowRequest(int minDY, int maxDY) {
+		VDPixmapGenWindowBased::AddWindowRequest(minDY, maxDY);
+		mpSrc->AddWindowRequest(minDY, maxDY);
+	}
+
+	void StartWindow(uint32 rowbytes, int outputCount = 1) {
+		mpSrc->Start();
+
+		VDPixmapGenWindowBased::StartWindow(rowbytes, outputCount);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return mpSrc->GetType(mSrcIndex);
+	}
+
+protected:
+	virtual void Compute(void *dst0, sint32 y) = 0;
+
+	IVDPixmapGen *mpSrc;
+	uint32 mSrcIndex;
+	sint32 mSrcWidth;
+	sint32 mSrcHeight;
+};
+
+class VDPixmapGenWindowBasedOneSourceSimple : public VDPixmapGenWindowBasedOneSource {
+public:
+	void Init(IVDPixmapGen *src, uint32 srcindex) {
+		InitSource(src, srcindex);
+
+		src->AddWindowRequest(0, 0);
+	}
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_fill.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_fill.h
new file mode 100644
index 000000000..ba02a2877
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_fill.h
@@ -0,0 +1,55 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_FILL_H
+#define f_VD2_KASUMI_UBERBLIT_FILL_H
+
+#include "uberblit.h"
+#include "uberblit_base.h"
+
+class VDPixmapGenFill8 : public IVDPixmapGen {
+public:
+	void Init(uint8 fill, uint32 bpr, sint32 width, sint32 height, uint32 type) {
+		mRow.resize(bpr, fill);
+		mWidth = width;
+		mHeight = height;
+		mType = type;
+	}
+
+	void AddWindowRequest(int minY, int maxY) {
+	}
+
+	void Start() {
+	}
+
+	sint32 GetWidth(int) const {
+		return mWidth;
+	}
+
+	sint32 GetHeight(int) const {
+		return mHeight;
+	}
+
+	bool IsStateful() const {
+		return false;
+	}
+
+	const void *GetRow(sint32 y, uint32 output) {
+		return mRow.data();
+	}
+
+	void ProcessRow(void *dst, sint32 y) {
+		if (!mRow.empty())
+			memset(dst, mRow[0], mRow.size());
+	}
+
+	uint32 GetType(uint32 index) const {
+		return mType;
+	}
+
+protected:
+	sint32		mWidth;
+	sint32		mHeight;
+	uint32		mType;
+
+	vdfastvector<uint8> mRow;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_gen.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_gen.h
new file mode 100644
index 000000000..3937fbba7
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_gen.h
@@ -0,0 +1,167 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_GEN_H
+#define f_VD2_KASUMI_UBERBLIT_GEN_H
+
+#include <vd2/system/vectors.h>
+#include "uberblit.h"
+
+class IVDPixmapGenSrc;
+
+class VDPixmapUberBlitterDirectCopy : public IVDPixmapBlitter {
+public:
+	VDPixmapUberBlitterDirectCopy();
+	~VDPixmapUberBlitterDirectCopy();
+
+	void Blit(const VDPixmap& dst, const VDPixmap& src);
+	void Blit(const VDPixmap& dst, const vdrect32 *rDst, const VDPixmap& src);
+};
+
+class VDPixmapUberBlitter : public IVDPixmapBlitter {
+public:
+	VDPixmapUberBlitter();
+	~VDPixmapUberBlitter();
+
+	void Blit(const VDPixmap& dst, const VDPixmap& src);
+	void Blit(const VDPixmap& dst, const vdrect32 *rDst, const VDPixmap& src);
+
+protected:
+	void Blit(const VDPixmap& dst, const vdrect32 *rDst);
+	void Blit3(const VDPixmap& dst, const vdrect32 *rDst);
+	void Blit3Split(const VDPixmap& dst, const vdrect32 *rDst);
+	void Blit3Separated(const VDPixmap& px, const vdrect32 *rDst);
+	void Blit2(const VDPixmap& dst, const vdrect32 *rDst);
+	void Blit2Separated(const VDPixmap& px, const vdrect32 *rDst);
+
+	friend class VDPixmapUberBlitterGenerator;
+
+	struct OutputEntry {
+		IVDPixmapGen *mpSrc;
+		int mSrcIndex;
+	} mOutputs[3];
+
+	struct SourceEntry {
+		IVDPixmapGenSrc *mpSrc;
+		int mSrcIndex;
+		int mSrcPlane;
+		int mSrcX;
+		int mSrcY;
+	};
+
+	typedef vdfastvector<IVDPixmapGen *> Generators;
+	Generators mGenerators;
+
+	typedef vdfastvector<SourceEntry> Sources;
+	Sources mSources;
+
+	bool mbIndependentChromaPlanes;
+	bool mbIndependentPlanes;
+};
+
+class VDPixmapUberBlitterGenerator {
+public:
+	VDPixmapUberBlitterGenerator();
+	~VDPixmapUberBlitterGenerator();
+
+	void swap(int index);
+	void dup();
+	void pop();
+
+	void ldsrc(int srcIndex, int srcPlane, int x, int y, uint32 w, uint32 h, uint32 type, uint32 bpr);
+
+	void ldconst(uint8 fill, uint32 bpr, uint32 w, uint32 h, uint32 type);
+
+	void extract_8in16(int offset, uint32 w, uint32 h);
+	void extract_8in32(int offset, uint32 w, uint32 h);
+	void swap_8in16(uint32 w, uint32 h, uint32 bpr);
+
+	void conv_Pal1_to_8888(int srcIndex);
+	void conv_Pal2_to_8888(int srcIndex);
+	void conv_Pal4_to_8888(int srcIndex);
+	void conv_Pal8_to_8888(int srcIndex);
+
+	void conv_555_to_8888();
+	void conv_565_to_8888();
+	void conv_888_to_8888();
+	void conv_555_to_565();
+	void conv_565_to_555();
+	void conv_8888_to_X32F();
+	void conv_8_to_32F();
+	void conv_16F_to_32F();
+	void conv_V210_to_32F();
+
+	void conv_8888_to_555();
+	void conv_8888_to_565();
+	void conv_8888_to_888();
+	void conv_32F_to_8();
+	void conv_X32F_to_8888();
+	void conv_32F_to_16F();
+	void conv_32F_to_V210();
+
+	void convd_8888_to_555();
+	void convd_8888_to_565();
+	void convd_32F_to_8();
+	void convd_X32F_to_8888();
+
+	void interleave_B8G8_R8G8();
+	void interleave_G8B8_G8R8();
+	void interleave_X8R8G8B8();
+	void interleave_B8R8();
+
+	void ycbcr601_to_rgb32();
+	void ycbcr709_to_rgb32();
+	void rgb32_to_ycbcr601();
+	void rgb32_to_ycbcr709();
+
+	void ycbcr601_to_rgb32_32f();
+	void ycbcr709_to_rgb32_32f();
+	void rgb32_to_ycbcr601_32f();
+	void rgb32_to_ycbcr709_32f();
+
+	void ycbcr601_to_ycbcr709();
+	void ycbcr709_to_ycbcr601();
+
+	void pointh(float xoffset, float xfactor, uint32 w);
+	void pointv(float yoffset, float yfactor, uint32 h);
+	void linearh(float xoffset, float xfactor, uint32 w, bool interpOnly);
+	void linearv(float yoffset, float yfactor, uint32 h, bool interpOnly);
+	void linear(float xoffset, float xfactor, uint32 w, float yoffset, float yfactor, uint32 h);
+	void cubich(float xoffset, float xfactor, uint32 w, float splineFactor, bool interpOnly);
+	void cubicv(float yoffset, float yfactor, uint32 h, float splineFactor, bool interpOnly);
+	void cubic(float xoffset, float xfactor, uint32 w, float yoffset, float yfactor, uint32 h, float splineFactor);
+	void lanczos3h(float xoffset, float xfactor, uint32 w);
+	void lanczos3v(float yoffset, float yfactor, uint32 h);
+	void lanczos3(float xoffset, float xfactor, uint32 w, float yoffset, float yfactor, uint32 h);
+
+	IVDPixmapBlitter *create();
+
+protected:
+	void MarkDependency(IVDPixmapGen *dst, IVDPixmapGen *src);
+
+	struct StackEntry {
+		IVDPixmapGen *mpSrc;
+		uint32 mSrcIndex;
+
+		StackEntry() {}
+		StackEntry(IVDPixmapGen *src, uint32 index) : mpSrc(src), mSrcIndex(index) {}
+	};
+
+	vdfastvector<StackEntry> mStack;
+
+	typedef vdfastvector<IVDPixmapGen *> Generators;
+	Generators mGenerators;
+
+	struct Dependency {
+		int mDstIdx;
+		int mSrcIdx;
+	};
+
+	vdfastvector<Dependency> mDependencies;
+
+	typedef VDPixmapUberBlitter::SourceEntry SourceEntry;
+	vdfastvector<SourceEntry> mSources;
+};
+
+void VDPixmapGenerate(void *dst, ptrdiff_t pitch, sint32 bpr, sint32 height, IVDPixmapGen *gen, int genIndex);
+IVDPixmapBlitter *VDCreatePixmapUberBlitterDirectCopy(const VDPixmap& dst, const VDPixmap& src);
+IVDPixmapBlitter *VDCreatePixmapUberBlitterDirectCopy(const VDPixmapLayout& dst, const VDPixmapLayout& src);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_input.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_input.h
new file mode 100644
index 000000000..bfd5ebad5
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_input.h
@@ -0,0 +1,69 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_INPUT_H
+#define f_VD2_KASUMI_UBERBLIT_INPUT_H
+
+#include "uberblit.h"
+#include "uberblit_base.h"
+
+class IVDPixmapGenSrc {
+public:
+	virtual void SetSource(const void *src, ptrdiff_t pitch, const uint32 *palette) = 0;
+};
+
+class VDPixmapGenSrc : public IVDPixmapGen, public IVDPixmapGenSrc {
+public:
+	void Init(sint32 width, sint32 height, uint32 type, uint32 bpr) {
+		mWidth = width;
+		mHeight = height;
+		mType = type;
+		mBpr = bpr;
+	}
+
+	void SetSource(const void *src, ptrdiff_t pitch, const uint32 *palette) {
+		mpSrc = src;
+		mPitch = pitch;
+	}
+
+	void AddWindowRequest(int minY, int maxY) {
+	}
+
+	void Start() {
+	}
+
+	sint32 GetWidth(int) const {
+		return mWidth;
+	}
+
+	sint32 GetHeight(int) const {
+		return mHeight;
+	}
+
+	bool IsStateful() const {
+		return false;
+	}
+
+	const void *GetRow(sint32 y, uint32 output) {
+		if (y < 0)
+			y = 0;
+		else if (y >= mHeight)
+			y = mHeight - 1;
+		return vdptroffset(mpSrc, mPitch*y);
+	}
+
+	void ProcessRow(void *dst, sint32 y) {
+		memcpy(dst, GetRow(y, 0), mBpr);
+	}
+
+	uint32 GetType(uint32 index) const {
+		return mType;
+	}
+
+protected:
+	const void *mpSrc;
+	ptrdiff_t	mPitch;
+	size_t		mBpr;
+	sint32		mWidth;
+	sint32		mHeight;
+	uint32		mType;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_pal.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_pal.h
new file mode 100644
index 000000000..e3958b458
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_pal.h
@@ -0,0 +1,148 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_PAL_H
+#define f_VD2_KASUMI_UBERBLIT_PAL_H
+
+#include "uberblit_base.h"
+#include "uberblit_input.h"
+
+class VDPixmapGenBase_Pal_To_X8R8G8B8 : public VDPixmapGenWindowBasedOneSourceSimple, public IVDPixmapGenSrc {
+public:
+	void Start() {
+		StartWindow(mWidth * 4);
+	}
+
+	void Init(IVDPixmapGen *src, int srcIndex) {
+		InitSource(src, srcIndex);
+	}
+
+	void SetSource(const void *src, ptrdiff_t pitch, const uint32 *palette) {
+		mpPal = palette;
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8888;
+	}
+
+protected:
+	const uint32 *mpPal;
+};
+
+class VDPixmapGen_Pal1_To_X8R8G8B8 : public VDPixmapGenBase_Pal_To_X8R8G8B8 {
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint32 *dst = (uint32 *)dst0;
+		const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+		sint32 h = mHeight;
+
+		const uint32 *pal = mpPal;
+
+		src += (w-1) >> 3;
+		dst += (w-1) & ~7;
+
+		int wt = w;
+
+		uint8 v = src[0] >> ((-wt) & 7);
+		
+		switch(wt & 7) {
+			do {
+				v = src[0];
+
+		case 0:	dst[7] = pal[v&1];	v >>= 1;
+		case 7:	dst[6] = pal[v&1];	v >>= 1;
+		case 6:	dst[5] = pal[v&1];	v >>= 1;
+		case 5:	dst[4] = pal[v&1];	v >>= 1;
+		case 4:	dst[3] = pal[v&1];	v >>= 1;
+		case 3:	dst[2] = pal[v&1];	v >>= 1;
+		case 2:	dst[1] = pal[v&1];	v >>= 1;
+		case 1:	dst[0] = pal[v&1];	v >>= 1;
+
+				dst -= 8;
+				--src;
+			} while((wt -= 8) > 0);
+		}
+	}
+};
+
+class VDPixmapGen_Pal2_To_X8R8G8B8 : public VDPixmapGenBase_Pal_To_X8R8G8B8 {
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint32 *dst = (uint32 *)dst0;
+		const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+		sint32 h = mHeight;
+
+		const uint32 *pal = mpPal;
+
+		src += (w-1) >> 2;
+		dst += (w-1) & ~3;
+
+		int wt = w;
+
+		uint8 v = src[0] >> (((-wt) & 3)*2);
+		
+		switch(wt & 3) {
+			do {
+				v = src[0];
+
+		case 0:	dst[3] = pal[v&3];	v >>= 2;
+		case 3:	dst[2] = pal[v&3];	v >>= 2;
+		case 2:	dst[1] = pal[v&3];	v >>= 2;
+		case 1:	dst[0] = pal[v&3];	v >>= 2;
+
+				dst -= 4;
+				--src;
+			} while((wt -= 4) > 0);
+		}
+	}
+};
+
+class VDPixmapGen_Pal4_To_X8R8G8B8 : public VDPixmapGenBase_Pal_To_X8R8G8B8 {
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint32 *dst = (uint32 *)dst0;
+		const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+		sint32 h = mHeight;
+
+		const uint32 *pal = mpPal;
+
+		src += (w-1) >> 1;
+		dst += ((w-1) & ~1);
+
+		int wt = w;
+
+		uint8 v = src[0] >> (((-wt) & 1)*4);
+		
+		switch(wt & 1) {
+			do {
+				v = src[0];
+
+		case 0:	dst[1] = pal[v&15];	v >>= 4;
+		case 1:	dst[0] = pal[v&15];	v >>= 4;
+
+				dst -= 2;
+				--src;
+			} while((wt -= 2) > 0);
+		}
+	}
+};
+
+class VDPixmapGen_Pal8_To_X8R8G8B8 : public VDPixmapGenBase_Pal_To_X8R8G8B8 {
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint32 *dst = (uint32 *)dst0;
+		const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+		sint32 h = mHeight;
+
+		const uint32 *pal = mpPal;
+
+		int wt = w;
+
+		do {
+			*dst++ = pal[*src++];
+		} while(--wt);
+	}
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample.h
new file mode 100644
index 000000000..a3bb7e70c
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample.h
@@ -0,0 +1,83 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_RESAMPLE_H
+#define f_VD2_KASUMI_UBERBLIT_RESAMPLE_H
+
+#include <vd2/system/vdstl.h>
+#include <vd2/system/math.h>
+#include "uberblit.h"
+#include "uberblit_base.h"
+#include <vd2/Kasumi/resample_kernels.h>
+
+class IVDResamplerSeparableRowStage;
+class IVDResamplerSeparableRowStage2;
+class IVDResamplerSeparableColStage;
+
+namespace nsVDPixmap {
+	enum FilterMode {
+		kFilterPoint,
+		kFilterLinear,
+		kFilterCubic,
+		kFilterLanczos3,
+		kFilterCount
+	};
+}
+
+class VDPixmapGenResampleRow : public VDPixmapGenWindowBasedOneSource {
+public:
+	VDPixmapGenResampleRow();
+	~VDPixmapGenResampleRow();
+
+	void Init(IVDPixmapGen *src, uint32 srcIndex, uint32 width, float offset, float step, nsVDPixmap::FilterMode filterMode, float filterFactor, bool interpolationOnly);
+
+	void Start();
+
+	uint32 GetType(uint32 output) const {
+		return mpSrc->GetType(mSrcIndex);
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y);
+	void Compute8(void *dst0, sint32 y);
+	void Compute32(void *dst0, sint32 y);
+	void Compute128(void *dst0, sint32 y);
+
+	IVDResamplerSeparableRowStage *mpRowStage;
+	IVDResamplerSeparableRowStage2 *mpRowStage2;
+
+	uint32				mRowFiltW;
+	uint32				mBytesPerSample;
+
+	VDResamplerAxis		mAxis;
+
+	vdblock<void *>	mWindow;
+	void				**mpAllocWindow;
+	vdblock<uint32, vdaligned_alloc<uint32> >		mTempSpace;
+};
+
+class VDPixmapGenResampleCol : public VDPixmapGenWindowBasedOneSource {
+public:
+	VDPixmapGenResampleCol();
+	~VDPixmapGenResampleCol();
+
+	void Init(IVDPixmapGen *src, uint32 srcIndex, uint32 height, float offset, float step, nsVDPixmap::FilterMode filterMode, float filterFactor, bool interpolationOnly);
+
+	void Start();
+
+	uint32 GetType(uint32 output) const {
+		return mpSrc->GetType(mSrcIndex);
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y);
+
+	IVDResamplerSeparableColStage *mpColStage;
+
+	uint32				mWinSize;
+	uint32				mBytesPerSample;
+	uint32				mBytesPerRow;
+
+	VDResamplerAxis		mAxis;
+
+	vdblock<const void *>	mWindow;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample_special.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample_special.h
new file mode 100644
index 000000000..0f97ba1cf
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample_special.h
@@ -0,0 +1,81 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_RESAMPLE_SPECIAL_H
+#define f_VD2_KASUMI_UBERBLIT_RESAMPLE_SPECIAL_H
+
+#include <vd2/system/vdstl.h>
+#include <vd2/system/math.h>
+#include "uberblit.h"
+#include "uberblit_base.h"
+
+class VDPixmapGenResampleRow_d2_p0_lin_u8 : public VDPixmapGenWindowBasedOneSource {
+public:
+	void Init(IVDPixmapGen *src, uint32 srcIndex);
+	void Start();
+
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleRow_d4_p0_lin_u8 : public VDPixmapGenWindowBasedOneSource {
+public:
+	void Init(IVDPixmapGen *src, uint32 srcIndex);
+	void Start();
+
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleRow_x2_p0_lin_u8 : public VDPixmapGenWindowBasedOneSource {
+public:
+	void Init(IVDPixmapGen *src, uint32 srcIndex);
+	void Start();
+
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleRow_x4_p0_lin_u8 : public VDPixmapGenWindowBasedOneSource {
+public:
+	void Init(IVDPixmapGen *src, uint32 srcIndex);
+	void Start();
+
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleCol_x2_phalf_lin_u8: public VDPixmapGenWindowBasedOneSource {
+public:
+	void Init(IVDPixmapGen *src, uint32 srcIndex);
+	void Start();
+
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleCol_x4_p1half_lin_u8: public VDPixmapGenWindowBasedOneSource {
+public:
+	void Init(IVDPixmapGen *src, uint32 srcIndex);
+	void Start();
+
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleCol_d2_pnqrtr_lin_u8: public VDPixmapGenWindowBasedOneSource {
+public:
+	void Init(IVDPixmapGen *src, uint32 srcIndex);
+	void Start();
+
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleCol_d4_pn38_lin_u8: public VDPixmapGenWindowBasedOneSource {
+public:
+	void Init(IVDPixmapGen *src, uint32 srcIndex);
+	void Start();
+
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample_special_x86.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample_special_x86.h
new file mode 100644
index 000000000..6634869aa
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample_special_x86.h
@@ -0,0 +1,26 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_RESAMPLE_SPECIAL_X86_H
+#define f_VD2_KASUMI_UBERBLIT_RESAMPLE_SPECIAL_X86_H
+
+#include "uberblit_resample_special.h"
+
+class VDPixmapGenResampleRow_x2_p0_lin_u8_ISSE : public VDPixmapGenResampleRow_x2_p0_lin_u8 {
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleRow_x4_p0_lin_u8_MMX : public VDPixmapGenResampleRow_x4_p0_lin_u8 {
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleCol_d2_pnqrtr_lin_u8_ISSE: public VDPixmapGenResampleCol_d2_pnqrtr_lin_u8 {
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleCol_d4_pn38_lin_u8_ISSE: public VDPixmapGenResampleCol_d4_pn38_lin_u8 {
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_rgb.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_rgb.h
new file mode 100644
index 000000000..21925af2a
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_rgb.h
@@ -0,0 +1,552 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_RGB_H
+#define f_VD2_KASUMI_UBERBLIT_RGB_H
+
+#include <vd2/system/cpuaccel.h>
+#include "uberblit_base.h"
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	16-bit crossconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_X1R5G5B5_To_R5G6B5 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start() {
+		StartWindow(mWidth * 2);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_565_LE;
+	}
+
+protected:
+	virtual void Compute(void *dst0, sint32 y) {
+		uint16 *dst = (uint16 *)dst0;
+		const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+
+		for(sint32 i=0; i<w; ++i) {
+			uint32 px = src[i];
+
+			px += (px & 0x7fe0);
+			px += (px & 0x400) >> 5;
+
+			dst[i] = (uint16)px;
+		}
+	}
+};
+
+class VDPixmapGen_R5G6B5_To_X1R5G5B5 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start() {
+		StartWindow(mWidth * 2);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8888;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint16 *dst = (uint16 *)dst0;
+		const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+
+		for(sint32 i=0; i<w; ++i) {
+			uint32 px = src[i];
+
+			px &= 0xffdf;
+			px -= (px & 0xffc0) >> 1;
+
+			dst[i] = (uint16)px;
+		}
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	32-bit upconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_X1R5G5B5_To_X8R8G8B8 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start() {
+		StartWindow(mWidth * 4);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8888;
+	}
+
+protected:
+	virtual void Compute(void *dst0, sint32 y) {
+		uint32 *dst = (uint32 *)dst0;
+		const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+
+		for(sint32 i=0; i<w; ++i) {
+			uint32 px = src[i];
+			uint32 px5 = ((px & 0x7c00) << 9) + ((px & 0x03e0) << 6) + ((px & 0x001f) << 3);
+
+			dst[i] = px5 + ((px5 >> 5) & 0x070707);
+		}
+	}
+};
+
+class VDPixmapGen_R5G6B5_To_X8R8G8B8 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start() {
+		StartWindow(mWidth * 4);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8888;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint32 *dst = (uint32 *)dst0;
+		const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+
+		for(sint32 i=0; i<w; ++i) {
+			uint32 px = src[i];
+			uint32 px_rb5 = ((px & 0xf800) << 8) + ((px & 0x001f) << 3);
+			uint32 px_g6 = ((px & 0x07e0) << 5);
+
+			dst[i] = px_rb5 + px_g6 + (((px_rb5 >> 5) + (px_g6 >> 6)) & 0x070307);
+		}
+	}
+};
+
+class VDPixmapGen_R8G8B8_To_A8R8G8B8 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start() {
+		StartWindow(mWidth * 4);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8888;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint8 *dst = (uint8 *)dst0;
+		const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+
+		for(sint32 i=0; i<w; ++i) {
+			dst[0] = src[0];
+			dst[1] = src[1];
+			dst[2] = src[2];
+			dst[3] = 255;
+			dst += 4;
+			src += 3;
+		}
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	32-bit downconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_X8R8G8B8_To_X1R5G5B5 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start() {
+		StartWindow(mWidth * 2);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_1555_LE;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint16 *dst = (uint16 *)dst0;
+		const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+
+		for(sint32 i=0; i<w; ++i) {
+			uint32 px = src[i];
+
+			dst[i] = ((px >> 9) & 0x7c00) + ((px >> 6) & 0x03e0) + ((px >> 3) & 0x001f);
+		}
+	}
+};
+
+class VDPixmapGen_X8R8G8B8_To_R5G6B5 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start() {
+		StartWindow(mWidth * 2);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_565_LE;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint16 *dst = (uint16 *)dst0;
+		const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+
+		for(sint32 i=0; i<w; ++i) {
+			uint32 px = src[i];
+
+			dst[i] = ((px >> 8) & 0xf800) + ((px >> 5) & 0x07e0) + ((px >> 3) & 0x001f);
+		}
+	}
+};
+
+class VDPixmapGen_X8R8G8B8_To_R8G8B8 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start() {
+		StartWindow(mWidth * 3);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_888;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint8 *dst = (uint8 *)dst0;
+		const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+
+		for(sint32 i=0; i<w; ++i) {
+			dst[0] = src[0];
+			dst[1] = src[1];
+			dst[2] = src[2];
+
+			dst += 3;
+			src += 4;
+		}
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	32-bit downconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_X8R8G8B8_To_X1R5G5B5_Dithered : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start() {
+		StartWindow(mWidth * 2);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_1555_LE;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint16 *dst = (uint16 *)dst0;
+		const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+
+		static const uint32 kDitherMatrix[4][4][2]={
+			{ 0x00000000, 0x00000000, 0x04000400, 0x00040000, 0x01000100, 0x00010000, 0x05000500, 0x00050000 },
+			{ 0x06000600, 0x00060000, 0x02000200, 0x00020000, 0x07000700, 0x00070000, 0x03000300, 0x00030000 },
+			{ 0x01800180, 0x00018000, 0x05800580, 0x00058000, 0x00800080, 0x00008000, 0x04800480, 0x00048000 },
+			{ 0x07800780, 0x00078000, 0x03800380, 0x00038000, 0x06800680, 0x00068000, 0x02800280, 0x00028000 },
+		};
+
+		const uint32 (*drow)[2] = kDitherMatrix[y & 3];
+
+		for(sint32 i=0; i<w; ++i) {
+			uint32 px = src[i];
+			uint32 drg = drow[i & 3][0];
+			uint32 db = drow[i & 3][1];
+			uint32 rb = (px & 0xff00ff) * 249 + drg;
+			uint32 g = (px & 0xff00) * 249 + db;
+
+			dst[i] = ((rb >> 17) & 0x7c00) + ((g >> 14) & 0x03e0) + ((rb >> 11) & 0x001f);
+		}
+	}
+};
+
+class VDPixmapGen_X8R8G8B8_To_R5G6B5_Dithered : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start() {
+		StartWindow(mWidth * 2);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_565_LE;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint16 *dst = (uint16 *)dst0;
+		const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+
+		static const uint32 kDitherMatrix[4][4][2]={
+			{ 0x00000000, 0x00000000, 0x04000400, 0x00020000, 0x01000100, 0x00008000, 0x05000500, 0x00028000 },
+			{ 0x06000600, 0x00030000, 0x02000200, 0x00010000, 0x07000700, 0x00038000, 0x03000300, 0x00018000 },
+			{ 0x01800180, 0x0000c000, 0x05800580, 0x0002c000, 0x00800080, 0x00004000, 0x04800480, 0x00024000 },
+			{ 0x07800780, 0x0003c000, 0x03800380, 0x0001c000, 0x06800680, 0x00034000, 0x02800280, 0x00014000 },
+		};
+
+		const uint32 (*drow)[2] = kDitherMatrix[y & 3];
+
+		for(sint32 i=0; i<w; ++i) {
+			uint32 px = src[i];
+			uint32 drg = drow[i & 3][0];
+			uint32 db = drow[i & 3][1];
+			uint32 rb = (px & 0xff00ff) * 249 + drg;
+			uint32 g = (px & 0xff00) * 253 + db;
+
+			dst[i] = ((rb >> 16) & 0xf800) + ((g >> 13) & 0x07e0) + ((rb >> 11) & 0x001f);
+		}
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	32F upconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_8_To_32F : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start() {
+		StartWindow(mWidth * 4);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_32F_LE;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		float *dst = (float *)dst0;
+		const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+
+		VDCPUCleanupExtensions();
+
+		for(sint32 i=0; i<w; ++i)
+			*dst++ = (float)*src++ * (1.0f / 255.0f);
+	}
+};
+
+class VDPixmapGen_X8R8G8B8_To_X32B32G32R32F : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start() {
+		StartWindow(mWidth * 16);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_32Fx4_LE;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		float *dst = (float *)dst0;
+		const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+
+		VDCPUCleanupExtensions();
+
+		for(sint32 i=0; i<w; ++i) {
+			dst[0] = (float)src[2] * (1.0f / 255.0f);
+			dst[1] = (float)src[1] * (1.0f / 255.0f);
+			dst[2] = (float)src[0] * (1.0f / 255.0f);
+			dst[3] = 1.0f;
+			dst += 4;
+			src += 4;
+		}
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	32F downconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_32F_To_8 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start() {
+		StartWindow(mWidth);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint8 *dst = (uint8 *)dst0;
+		const float *src = (const float *)mpSrc->GetRow(y, mSrcIndex);
+		sint32 w = mWidth;
+
+		VDCPUCleanupExtensions();
+
+		for(sint32 i=0; i<w; ++i) {
+			float b = *src++;
+
+			uint32 ib = VDClampedRoundFixedToUint8Fast(b);
+
+			dst[i] = (uint8)ib;
+		}
+	}
+};
+
+class VDPixmapGen_32F_To_8_Dithered : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start() {
+		StartWindow(mWidth);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint8 *dst = (uint8 *)dst0;
+		const float *src = (const float *)mpSrc->GetRow(y, mSrcIndex);
+		VDCPUCleanupExtensions();
+
+		sint32 w = mWidth;
+
+#define X(v) ((v) - 0x49400000)
+
+		static const sint32 kDitherMatrix[4][4]={
+			{ X( 0), X( 8), X( 2), X(10), },
+			{ X(12), X( 4), X(14), X( 6), },
+			{ X( 3), X(11), X( 1), X( 9), },
+			{ X(15), X( 7), X(13), X( 5), },
+		};
+
+#undef X
+
+		const sint32 *pDitherRow = kDitherMatrix[y & 3];
+
+		for(sint32 i=0; i<w; ++i) {
+			float b = *src++;
+
+			sint32 addend = pDitherRow[i & 3];
+			union {
+				float f;
+				sint32 i;
+			}	cb = {b * 255.0f + 786432.0f};
+
+			sint32 vb = ((sint32)cb.i + addend) >> 4;
+
+			if ((uint32)vb >= 0x100)
+				vb = (uint8)(~vb >> 31);
+
+			dst[i] = (uint8)vb;
+		}
+	}
+};
+
+class VDPixmapGen_X32B32G32R32F_To_X8R8G8B8 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start() {
+		StartWindow(mWidth * 4);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8888;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint32 *dst = (uint32 *)dst0;
+		const float *src = (const float *)mpSrc->GetRow(y, mSrcIndex);
+
+		VDCPUCleanupExtensions();
+
+		sint32 w = mWidth;
+
+		for(sint32 i=0; i<w; ++i) {
+			float r = src[0];
+			float g = src[1];
+			float b = src[2];
+			src += 4;
+
+			uint32 ir = VDClampedRoundFixedToUint8Fast(r) << 16;
+			uint32 ig = VDClampedRoundFixedToUint8Fast(g) << 8;
+			uint32 ib = VDClampedRoundFixedToUint8Fast(b);
+
+			dst[i] = ir + ig + ib;
+		}
+	}
+};
+
+class VDPixmapGen_X32B32G32R32F_To_X8R8G8B8_Dithered : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start() {
+		StartWindow(mWidth * 4);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8888;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint32 *dst = (uint32 *)dst0;
+		const float *src = (const float *)mpSrc->GetRow(y, mSrcIndex);
+
+		VDCPUCleanupExtensions();
+
+		sint32 w = mWidth;
+
+#define X(v) ((v) - 0x49400000)
+
+		static const sint32 kDitherMatrix[4][4]={
+			{ X( 0), X( 8), X( 2), X(10), },
+			{ X(12), X( 4), X(14), X( 6), },
+			{ X( 3), X(11), X( 1), X( 9), },
+			{ X(15), X( 7), X(13), X( 5), },
+		};
+
+#undef X
+
+		const sint32 *pDitherRow = kDitherMatrix[y & 3];
+
+		for(sint32 i=0; i<w; ++i) {
+			float r = src[0];
+			float g = src[1];
+			float b = src[2];
+			src += 4;
+
+			sint32 addend = pDitherRow[i & 3];
+			union {
+				float f;
+				sint32 i;
+			}	cr = {r * 255.0f + 786432.0f},
+				cg = {g * 255.0f + 786432.0f},
+				cb = {b * 255.0f + 786432.0f};
+
+			sint32 vr = ((sint32)cr.i + addend) >> 4;
+			sint32 vg = ((sint32)cg.i + addend) >> 4;
+			sint32 vb = ((sint32)cb.i + addend) >> 4;
+
+			if ((uint32)vr >= 0x100)
+				vr = (uint8)(~vr >> 31);
+
+			if ((uint32)vg >= 0x100)
+				vg = (uint8)(~vg >> 31);
+
+			if ((uint32)vb >= 0x100)
+				vb = (uint8)(~vb >> 31);
+
+			dst[i] = (vr << 16) + (vg << 8) + vb;
+		}
+	}
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_rgb_x86.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_rgb_x86.h
new file mode 100644
index 000000000..ececed120
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_rgb_x86.h
@@ -0,0 +1,114 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_RGB_X86_H
+#define f_VD2_KASUMI_UBERBLIT_RGB_X86_H
+
+#include <vd2/system/cpuaccel.h>
+#include "uberblit_base.h"
+
+extern "C" void vdasm_pixblt_XRGB1555_to_XRGB8888_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+extern "C" void vdasm_pixblt_RGB565_to_XRGB8888_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+extern "C" void vdasm_pixblt_RGB565_to_XRGB1555_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+extern "C" void vdasm_pixblt_XRGB1555_to_RGB565_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+extern "C" void vdasm_pixblt_XRGB8888_to_XRGB1555_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+extern "C" void vdasm_pixblt_XRGB8888_to_RGB565_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+extern "C" void vdasm_pixblt_XRGB8888_to_RGB888_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+extern "C" void vdasm_pixblt_RGB888_to_XRGB8888_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	16-bit crossconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_X1R5G5B5_To_R5G6B5_MMX : public VDPixmapGen_X1R5G5B5_To_R5G6B5 {
+protected:
+	virtual void Compute(void *dst0, sint32 y) {
+		uint16 *dst = (uint16 *)dst0;
+		const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+
+		vdasm_pixblt_XRGB1555_to_RGB565_MMX(dst, 0, src, 0, mWidth, 1);
+	}
+};
+
+class VDPixmapGen_R5G6B5_To_X1R5G5B5_MMX : public VDPixmapGen_R5G6B5_To_X1R5G5B5 {
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint16 *dst = (uint16 *)dst0;
+		const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+
+		vdasm_pixblt_RGB565_to_XRGB1555_MMX(dst, 0, src, 0, mWidth, 1);
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	32-bit upconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_X1R5G5B5_To_X8R8G8B8_MMX : public VDPixmapGen_X1R5G5B5_To_X8R8G8B8 {
+protected:
+	virtual void Compute(void *dst0, sint32 y) {
+		uint32 *dst = (uint32 *)dst0;
+		const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+
+		vdasm_pixblt_XRGB1555_to_XRGB8888_MMX(dst, 0, src, 0, mWidth, 1);
+	}
+};
+
+class VDPixmapGen_R5G6B5_To_X8R8G8B8_MMX : public VDPixmapGen_R5G6B5_To_X8R8G8B8 {
+protected:
+	virtual void Compute(void *dst0, sint32 y) {
+		uint32 *dst = (uint32 *)dst0;
+		const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+
+		vdasm_pixblt_RGB565_to_XRGB8888_MMX(dst, 0, src, 0, mWidth, 1);
+	}
+};
+
+class VDPixmapGen_R8G8B8_To_X8R8G8B8_MMX : public VDPixmapGen_R8G8B8_To_A8R8G8B8 {
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint8 *dst = (uint8 *)dst0;
+		const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+		vdasm_pixblt_RGB888_to_XRGB8888_MMX(dst, 0, src, 0, mWidth, 1);
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	32-bit downconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_X8R8G8B8_To_X1R5G5B5_MMX : public VDPixmapGen_X8R8G8B8_To_X1R5G5B5 {
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint16 *dst = (uint16 *)dst0;
+		const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+
+		vdasm_pixblt_XRGB8888_to_XRGB1555_MMX(dst, 0, src, 0, mWidth, 1);
+	}
+};
+
+class VDPixmapGen_X8R8G8B8_To_R5G6B5_MMX : public VDPixmapGen_X8R8G8B8_To_R5G6B5 {
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint16 *dst = (uint16 *)dst0;
+		const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+
+		vdasm_pixblt_XRGB8888_to_RGB565_MMX(dst, 0, src, 0, mWidth, 1);
+	}
+};
+
+class VDPixmapGen_X8R8G8B8_To_R8G8B8_MMX : public VDPixmapGen_X8R8G8B8_To_R8G8B8 {
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint8 *dst = (uint8 *)dst0;
+		const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+		vdasm_pixblt_XRGB8888_to_RGB888_MMX(dst, 0, src, 0, mWidth, 1);
+	}
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_swizzle.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_swizzle.h
new file mode 100644
index 000000000..a87fe1f5c
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_swizzle.h
@@ -0,0 +1,343 @@
+//	VirtualDub - Video processing and capture application
+//	Graphics support library
+//	Copyright (C) 1998-2008 Avery Lee
+//
+//	This program is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	This program is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with this program; if not, write to the Free Software
+//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#ifndef f_VD2_KASUMI_UBERBLIT_SWIZZLE_H
+#define f_VD2_KASUMI_UBERBLIT_SWIZZLE_H
+
+#include <vd2/system/cpuaccel.h>
+#include "uberblit_base.h"
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	generic converters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_Swap8In16 : public VDPixmapGenWindowBasedOneSource {
+public:
+	void Init(IVDPixmapGen *gen, int srcIndex, uint32 w, uint32 h, uint32 bpr);
+	void Start();
+
+	uint32 GetType(uint32 index) const;
+
+protected:
+	void Compute(void *dst0, sint32 y);
+
+	uint32 mRowLength;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	32-bit deinterleavers
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_8In16 : public VDPixmapGenWindowBasedOneSource {
+public:
+	void Init(IVDPixmapGen *gen, int srcIndex, int offset, uint32 w, uint32 h) {
+		InitSource(gen, srcIndex);
+		mOffset = offset;
+		SetOutputSize(w, h);
+		gen->AddWindowRequest(0, 0);
+	}
+
+	void Start() {
+		StartWindow(mWidth);
+	}
+
+	uint32 GetType(uint32 index) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		const uint8 *srcp = (const uint8 *)mpSrc->GetRow(y, mSrcIndex) + mOffset;
+		uint8 *dst = (uint8 *)dst0;
+		sint32 w = mWidth;
+		for(sint32 x=0; x<w; ++x) {
+			*dst++ = *srcp;
+			srcp += 2;
+		}
+	}
+
+	int mOffset;
+};
+
+class VDPixmapGen_8In32 : public VDPixmapGenWindowBasedOneSource {
+public:
+	void Init(IVDPixmapGen *gen, int srcIndex, int offset, uint32 w, uint32 h) {
+		InitSource(gen, srcIndex);
+		mOffset = offset;
+		SetOutputSize(w, h);
+		gen->AddWindowRequest(0, 0);
+	}
+
+	void Start() {
+		StartWindow(mWidth);
+	}
+
+	uint32 GetType(uint32 index) const {
+		return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		const uint8 *srcp = (const uint8 *)mpSrc->GetRow(y, mSrcIndex) + mOffset;
+		uint8 *dst = (uint8 *)dst0;
+		sint32 w = mWidth;
+		for(sint32 x=0; x<w; ++x) {
+			*dst++ = *srcp;
+			srcp += 4;
+		}
+	}
+
+	int mOffset;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	16-bit interleavers
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_B8x2_To_B8R8 : public VDPixmapGenWindowBased {
+public:
+	void Init(IVDPixmapGen *srcCb, uint32 srcindexCb, IVDPixmapGen *srcCr, uint32 srcindexCr);
+	void Start();
+	uint32 GetType(uint32 output) const;
+
+protected:
+	void Compute(void *dst0, sint32 y);
+
+	IVDPixmapGen *mpSrcCb;
+	uint32 mSrcIndexCb;
+	IVDPixmapGen *mpSrcCr;
+	uint32 mSrcIndexCr;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	32-bit interleavers
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_B8x3_To_G8B8_G8R8 : public VDPixmapGenWindowBased {
+public:
+	void Init(IVDPixmapGen *srcCr, uint32 srcindexCr, IVDPixmapGen *srcY, uint32 srcindexY, IVDPixmapGen *srcCb, uint32 srcindexCb) {
+		mpSrcY = srcY;
+		mSrcIndexY = srcindexY;
+		mpSrcCb = srcCb;
+		mSrcIndexCb = srcindexCb;
+		mpSrcCr = srcCr;
+		mSrcIndexCr = srcindexCr;
+		mWidth = srcY->GetWidth(srcindexY);
+		mHeight = srcY->GetHeight(srcindexY);
+
+		srcY->AddWindowRequest(0, 0);
+		srcCb->AddWindowRequest(0, 0);
+		srcCr->AddWindowRequest(0, 0);
+	}
+
+	void Start() {
+		mpSrcY->Start();
+		mpSrcCb->Start();
+		mpSrcCr->Start();
+
+		StartWindow(((mWidth + 1) & ~1) * 2);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrcY->GetType(mSrcIndexY) & ~kVDPixType_Mask) | kVDPixType_B8G8_R8G8;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint8 *VDRESTRICT dst = (uint8 *)dst0;
+		const uint8 *VDRESTRICT srcY = (const uint8 *)mpSrcY->GetRow(y, mSrcIndexY);
+		const uint8 *VDRESTRICT srcCb = (const uint8 *)mpSrcCb->GetRow(y, mSrcIndexCb);
+		const uint8 *VDRESTRICT srcCr = (const uint8 *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+		sint32 w = mWidth >> 1;
+		for(sint32 x=0; x<w; ++x) {
+			uint8 y1 = srcY[0];
+			uint8 cb = srcCb[0];
+			uint8 y2 = srcY[1];
+			uint8 cr = srcCr[0];
+
+			dst[0] = y1;
+			dst[1] = cb;
+			dst[2] = y2;
+			dst[3] = cr;
+
+			srcY += 2;
+			++srcCb;
+			++srcCr;
+			dst += 4;
+		}
+
+		if (mWidth & 1) {
+			uint8 y1 = srcY[0];
+			uint8 cb = srcCb[0];
+			uint8 cr = srcCr[0];
+
+			dst[0] = y1;
+			dst[1] = cb;
+			dst[2] = y1;
+			dst[3] = cr;
+		}
+	}
+
+	IVDPixmapGen *mpSrcY;
+	uint32 mSrcIndexY;
+	IVDPixmapGen *mpSrcCb;
+	uint32 mSrcIndexCb;
+	IVDPixmapGen *mpSrcCr;
+	uint32 mSrcIndexCr;
+};
+
+class VDPixmapGen_B8x3_To_B8G8_R8G8 : public VDPixmapGenWindowBased {
+public:
+	void Init(IVDPixmapGen *srcCr, uint32 srcindexCr, IVDPixmapGen *srcY, uint32 srcindexY, IVDPixmapGen *srcCb, uint32 srcindexCb) {
+		mpSrcY = srcY;
+		mSrcIndexY = srcindexY;
+		mpSrcCb = srcCb;
+		mSrcIndexCb = srcindexCb;
+		mpSrcCr = srcCr;
+		mSrcIndexCr = srcindexCr;
+		mWidth = srcY->GetWidth(srcindexY);
+		mHeight = srcY->GetHeight(srcindexY);
+
+		srcY->AddWindowRequest(0, 0);
+		srcCb->AddWindowRequest(0, 0);
+		srcCr->AddWindowRequest(0, 0);
+	}
+
+	void Start() {
+		mpSrcY->Start();
+		mpSrcCb->Start();
+		mpSrcCr->Start();
+
+		StartWindow(((mWidth + 1) & ~1) * 2);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrcY->GetType(mSrcIndexY) & ~kVDPixType_Mask) | kVDPixType_G8B8_G8R8;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint8 * VDRESTRICT dst = (uint8 *)dst0;
+		const uint8 *VDRESTRICT srcY = (const uint8 *)mpSrcY->GetRow(y, mSrcIndexY);
+		const uint8 *VDRESTRICT srcCb = (const uint8 *)mpSrcCb->GetRow(y, mSrcIndexCb);
+		const uint8 *VDRESTRICT srcCr = (const uint8 *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+		sint32 w2 = mWidth >> 1;
+		for(sint32 x=0; x<w2; ++x) {
+			uint8 cb = srcCb[0];
+			uint8 y1 = srcY[0];
+			uint8 cr = srcCr[0];
+			uint8 y2 = srcY[1];
+
+			dst[0] = cb;
+			dst[1] = y1;
+			dst[2] = cr;
+			dst[3] = y2;
+			dst += 4;
+			srcY += 2;
+			++srcCb;
+			++srcCr;
+		}
+
+		if (mWidth & 1) {
+			uint8 cb = srcCb[0];
+			uint8 y1 = srcY[0];
+			uint8 cr = srcCr[0];
+
+			dst[0] = cb;
+			dst[1] = y1;
+			dst[2] = cr;
+			dst[3] = y1;
+		}
+	}
+
+	IVDPixmapGen *mpSrcY;
+	uint32 mSrcIndexY;
+	IVDPixmapGen *mpSrcCb;
+	uint32 mSrcIndexCb;
+	IVDPixmapGen *mpSrcCr;
+	uint32 mSrcIndexCr;
+};
+
+class VDPixmapGen_B8x3_To_X8R8G8B8 : public VDPixmapGenWindowBased {
+public:
+	void Init(IVDPixmapGen *srcCr, uint32 srcindexCr, IVDPixmapGen *srcY, uint32 srcindexY, IVDPixmapGen *srcCb, uint32 srcindexCb) {
+		mpSrcY = srcY;
+		mSrcIndexY = srcindexY;
+		mpSrcCb = srcCb;
+		mSrcIndexCb = srcindexCb;
+		mpSrcCr = srcCr;
+		mSrcIndexCr = srcindexCr;
+		mWidth = srcY->GetWidth(srcindexY);
+		mHeight = srcY->GetHeight(srcindexY);
+
+		srcY->AddWindowRequest(0, 0);
+		srcCb->AddWindowRequest(0, 0);
+		srcCr->AddWindowRequest(0, 0);
+	}
+
+	void Start() {
+		mpSrcY->Start();
+		mpSrcCb->Start();
+		mpSrcCr->Start();
+
+		StartWindow(mWidth * 4);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrcY->GetType(mSrcIndexY) & ~kVDPixType_Mask) | kVDPixType_8888;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint8 *dst = (uint8 *)dst0;
+		const uint8 *srcY = (const uint8 *)mpSrcY->GetRow(y, mSrcIndexY);
+		const uint8 *srcCb = (const uint8 *)mpSrcCb->GetRow(y, mSrcIndexCb);
+		const uint8 *srcCr = (const uint8 *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+		for(sint32 x=0; x<mWidth; ++x) {
+			uint8 y = *srcY++;
+			uint8 cb = *srcCb++;
+			uint8 cr = *srcCr++;
+
+			dst[0] = cb;
+			dst[1] = y;
+			dst[2] = cr;
+			dst[3] = 255;
+			dst += 4;
+		}
+	}
+
+	IVDPixmapGen *mpSrcY;
+	uint32 mSrcIndexY;
+	IVDPixmapGen *mpSrcCb;
+	uint32 mSrcIndexCb;
+	IVDPixmapGen *mpSrcCr;
+	uint32 mSrcIndexCr;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_swizzle_x86.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_swizzle_x86.h
new file mode 100644
index 000000000..fecec9a53
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_swizzle_x86.h
@@ -0,0 +1,71 @@
+//	VirtualDub - Video processing and capture application
+//	Graphics support library
+//	Copyright (C) 1998-2008 Avery Lee
+//
+//	This program is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	This program is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with this program; if not, write to the Free Software
+//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#ifndef f_VD2_KASUMI_UBERBLIT_SWIZZLE_X86_H
+#define f_VD2_KASUMI_UBERBLIT_SWIZZLE_X86_H
+
+#include "uberblit_swizzle.h"
+
+class VDPixmapGen_8In16_Even_MMX : public VDPixmapGen_8In16 {
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGen_8In16_Odd_MMX : public VDPixmapGen_8In16 {
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGen_8In32_MMX : public VDPixmapGen_8In32 {
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGen_Swap8In16_MMX : public VDPixmapGen_Swap8In16 {
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	16-bit interleavers
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_B8x2_To_B8R8_MMX : public VDPixmapGen_B8x2_To_B8R8 {
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	32-bit interleavers
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_B8x3_To_G8B8_G8R8_MMX : public VDPixmapGen_B8x3_To_G8B8_G8R8 {
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGen_B8x3_To_B8G8_R8G8_MMX : public VDPixmapGen_B8x3_To_B8G8_R8G8 {
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_v210.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_v210.h
new file mode 100644
index 000000000..aa734aa36
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_v210.h
@@ -0,0 +1,72 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_V210_H
+#define f_VD2_KASUMI_UBERBLIT_V210_H
+
+#include <vd2/system/cpuaccel.h>
+#include "uberblit_base.h"
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	32F -> V210
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_32F_To_V210 : public VDPixmapGenWindowBased {
+public:
+	void Init(IVDPixmapGen *srcR, uint32 srcindexR, IVDPixmapGen *srcG, uint32 srcindexG, IVDPixmapGen *srcB, uint32 srcindexB) {
+		mpSrcR = srcR;
+		mSrcIndexR = srcindexR;
+		mpSrcG = srcG;
+		mSrcIndexG = srcindexG;
+		mpSrcB = srcB;
+		mSrcIndexB = srcindexB;
+		mWidth = srcG->GetWidth(srcindexG);
+		mHeight = srcG->GetHeight(srcindexG);
+
+		srcR->AddWindowRequest(0, 0);
+		srcG->AddWindowRequest(0, 0);
+		srcB->AddWindowRequest(0, 0);
+	}
+
+	void Start() {
+		mpSrcR->Start();
+		mpSrcG->Start();
+		mpSrcB->Start();
+
+		int qw = (mWidth + 47) / 48;
+		StartWindow(qw * 128);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrcG->GetType(mSrcIndexG) & ~kVDPixType_Mask) | kVDPixType_V210;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y);
+
+	IVDPixmapGen *mpSrcR;
+	uint32 mSrcIndexR;
+	IVDPixmapGen *mpSrcG;
+	uint32 mSrcIndexG;
+	IVDPixmapGen *mpSrcB;
+	uint32 mSrcIndexB;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	V210 -> 32F
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_V210_To_32F : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+	void Start();
+	const void *GetRow(sint32 y, uint32 index);
+
+	sint32 GetWidth(int index) const;
+	uint32 GetType(uint32 output) const;
+
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+#endif	// f_VD2_KASUMI_UBERBLIT_V210_H
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_ycbcr.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_ycbcr.h
new file mode 100644
index 000000000..2eb62da01
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_ycbcr.h
@@ -0,0 +1,584 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_YCBCR_H
+#define f_VD2_KASUMI_UBERBLIT_YCBCR_H
+
+#include <vd2/system/cpuaccel.h>
+#include <vd2/system/math.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include "uberblit.h"
+#include "uberblit_base.h"
+
+class VDPixmapGenYCbCrToRGBBase : public VDPixmapGenWindowBased {
+public:
+	void Init(IVDPixmapGen *srcCr, uint32 srcindexCr, IVDPixmapGen *srcY, uint32 srcindexY, IVDPixmapGen *srcCb, uint32 srcindexCb) {
+		mpSrcY = srcY;
+		mSrcIndexY = srcindexY;
+		mpSrcCb = srcCb;
+		mSrcIndexCb = srcindexCb;
+		mpSrcCr = srcCr;
+		mSrcIndexCr = srcindexCr;
+		mWidth = srcY->GetWidth(srcindexY);
+		mHeight = srcY->GetHeight(srcindexY);
+
+		srcY->AddWindowRequest(0, 0);
+		srcCb->AddWindowRequest(0, 0);
+		srcCr->AddWindowRequest(0, 0);
+	}
+
+
+protected:
+	IVDPixmapGen *mpSrcY;
+	uint32 mSrcIndexY;
+	IVDPixmapGen *mpSrcCb;
+	uint32 mSrcIndexCb;
+	IVDPixmapGen *mpSrcCr;
+	uint32 mSrcIndexCr;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	Rec.601 converters
+//
+//	-->Kr=0.299; Kb=0.114; Z=0; S=255; L = [Kr 1-Kr-Kb Kb]; Y = [219*(L-Z)/S 16]; U = [112*([0 0 1]-L)/((1-Kb)*S) 128]; V =
+//	[112*([1 0 0]-L)/((1-Kr)*S) 128]; M = [Y; U; V; 0 0 0 1]; disp(M); disp(inv(M));
+//
+//	!   0.2567882    0.5041294    0.0979059    16.  !
+//	! - 0.1482229  - 0.2909928    0.4392157    128. !
+//	!   0.4392157  - 0.3677883  - 0.0714274    128. !
+//	!   0.           0.           0.           1.   !
+//
+//	!   1.1643836  - 5.599D-17    1.5960268  - 222.92157 !
+//	!   1.1643836  - 0.3917623  - 0.8129676    135.57529 !
+//	!   1.1643836    2.0172321  - 1.110D-16  - 276.83585 !
+//	!   0.           0.           0.           1.        !
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGenYCbCr601ToRGB32 : public VDPixmapGenYCbCrToRGBBase {
+public:
+	void Start() {
+		mpSrcY->Start();
+		mpSrcCb->Start();
+		mpSrcCr->Start();
+
+		StartWindow(mWidth * 4);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrcY->GetType(mSrcIndexY) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_8888 | kVDPixSpace_BGR;
+	}
+
+protected:
+	virtual void Compute(void *dst0, sint32 y) {
+		uint8 *dst = (uint8 *)dst0;
+		const uint8 *srcY = (const uint8 *)mpSrcY->GetRow(y, mSrcIndexY);
+		const uint8 *srcCb = (const uint8 *)mpSrcCb->GetRow(y, mSrcIndexCb);
+		const uint8 *srcCr = (const uint8 *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+		VDCPUCleanupExtensions();
+
+		for(sint32 i=0; i<mWidth; ++i) {
+			sint32 y = srcY[i];
+			sint32 cb = srcCb[i];
+			sint32 cr = srcCr[i];
+
+			float yf = (1.164f / 255.0f)*(y - 16);
+
+			dst[0] = VDClampedRoundFixedToUint8Fast(yf + (2.018f / 255.0f) * (cb - 128));
+			dst[1] = VDClampedRoundFixedToUint8Fast(yf - (0.813f / 255.0f) * (cr - 128) - (0.391f / 255.0f) * (cb - 128));
+			dst[2] = VDClampedRoundFixedToUint8Fast(yf + (1.596f / 255.0f) * (cr - 128));
+			dst[3] = 0xff;
+
+			dst += 4;
+		}
+	}
+};
+
+class VDPixmapGenYCbCr601ToRGB32F : public VDPixmapGenYCbCrToRGBBase {
+public:
+	void Start() {
+		mpSrcY->Start();
+		mpSrcCb->Start();
+		mpSrcCr->Start();
+
+		StartWindow(mWidth * 16);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrcY->GetType(mSrcIndexY) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_32Fx4_LE | kVDPixSpace_BGR;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		float *dst = (float *)dst0;
+		const float *srcY = (const float *)mpSrcY->GetRow(y, mSrcIndexY);
+		const float *srcCb = (const float *)mpSrcCb->GetRow(y, mSrcIndexCb);
+		const float *srcCr = (const float *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+		VDCPUCleanupExtensions();
+
+		for(sint32 i=0; i<mWidth; ++i) {
+			float y = srcY[i];
+			float cb = srcCb[i] - (128.0f / 255.0f);
+			float cr = srcCr[i] - (128.0f / 255.0f);
+
+			float yf = 1.164f * (y - 16.0f / 255.0f);
+
+			dst[0] = yf + 1.596f * cr;
+			dst[1] = yf - 0.813f * cr - 0.391f * cb;
+			dst[2] = yf + 2.018f * cb;
+			dst[3] = 1.0f;
+			dst += 4;
+		}
+	}
+};
+
+class VDPixmapGenRGB32ToYCbCr601 : public VDPixmapGenWindowBasedOneSource {
+public:
+	void Init(IVDPixmapGen *src, uint32 srcindex) {
+		InitSource(src, srcindex);
+	}
+
+	void Start() {
+		StartWindow(mWidth, 3);
+	}
+
+	const void *GetRow(sint32 y, uint32 index) {
+		return (const uint8 *)VDPixmapGenWindowBasedOneSource::GetRow(y, index) + mWindowPitch * index;
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_8 | kVDPixSpace_YCC_601;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint8 *dstCb = (uint8 *)dst0;
+		uint8 *dstY = dstCb + mWindowPitch;
+		uint8 *dstCr = dstY + mWindowPitch;
+
+		const uint8 *srcRGB = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+		for(sint32 i=0; i<mWidth; ++i) {
+			int r = (int)srcRGB[2];
+			int g = (int)srcRGB[1];
+			int b = (int)srcRGB[0];
+			srcRGB += 4;			
+
+
+			// -2->round(inv([1 0 0 0; 0 1 0 0; 0 0 1 0; -16 -128 -128 1] * [1.1643828 1.1643828 1.1643828 0; 1.5960273 -0.8129688 0 0;
+			//  0 -0.3917617 2.0172305 0; 0 0 0 1]) .* 65536)
+			// ans  =
+			// 
+			// !   16829.      28784.    - 9714.       0.     !
+			// !   33039.    - 24103.    - 19071.      0.     !
+			// !   6416.     - 4681.       28784.      0.     !
+			// !   1048576.    8388608.    8388608.    65536. !   
+
+			*dstCb++ = (28784*r - 24103*g -  4681*b + 8388608 + 32768) >> 16;
+			*dstY ++ = (16829*r + 33039*g +  6416*b + 1048576 + 32768) >> 16;
+			*dstCr++ = (-9714*r - 19071*g + 28784*b + 8388608 + 32768) >> 16;
+		}
+	}
+};
+
+class VDPixmapGenRGB32FToYCbCr601 : public VDPixmapGenWindowBasedOneSource {
+public:
+	void Init(IVDPixmapGen *src, uint32 srcindex) {
+		InitSource(src, srcindex);
+	}
+
+	void Start() {
+		StartWindow(mWidth * sizeof(float), 3);
+	}
+
+	const void *GetRow(sint32 y, uint32 index) {
+		return (const uint8 *)VDPixmapGenWindowBasedOneSource::GetRow(y, index) + mWindowPitch * index;
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_32F_LE | kVDPixSpace_YCC_709;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		float *dstCb = (float *)dst0;
+		float *dstY  = dstCb + mWindowPitch;
+		float *dstCr = dstY + mWindowPitch;
+
+		const float *srcRGB = (const float *)mpSrc->GetRow(y, mSrcIndex);
+
+		for(sint32 i=0; i<mWidth; ++i) {
+			float r = srcRGB[2];
+			float g = srcRGB[1];
+			float b = srcRGB[0];
+			srcRGB += 4;			
+
+			*dstCb++ = -0.1482229f*r - 0.2909928f*g + 0.4392157f*b + (128.0f / 255.0f);
+			*dstY++  =  0.2567882f*r + 0.5041294f*g + 0.0979059f*b + ( 16.0f / 255.0f);
+			*dstCr++ =  0.4392157f*r - 0.3677883f*g - 0.0714274f*b + (128.0f / 255.0f);
+		}
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	Rec.709 converters
+//
+//
+//	-->Kr=0.2126; Kb=0.0722; Z=0; S=255; L = [Kr 1-Kr-Kb Kb]; Y = [219*(L-Z)/S 16]; U = [112*([0 0 1]-L)/((1-Kb)*S) 128]; V
+//	= [112*([1 0 0]-L)/((1-Kr)*S) 128]; M = [Y; U; V; 0 0 0 1]; disp(M); disp(inv(M));
+//
+//	!   0.1825859    0.6142306    0.0620071    16.  !
+//	! - 0.1006437  - 0.3385720    0.4392157    128. !
+//	!   0.4392157  - 0.3989422  - 0.0402735    128. !
+//	!   0.           0.           0.           1.   !
+//
+//	!   1.1643836  - 2.932D-17    1.7927411  - 248.10099 !
+//	!   1.1643836  - 0.2132486  - 0.5329093    76.87808  !
+//	!   1.1643836    2.1124018  - 5.551D-17  - 289.01757 !
+//	!   0.           0.           0.           1.        !                     
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGenYCbCr709ToRGB32 : public VDPixmapGenYCbCrToRGBBase {
+public:
+	void Start() {
+		mpSrcY->Start();
+		mpSrcCb->Start();
+		mpSrcCr->Start();
+
+		StartWindow(mWidth * 4);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrcY->GetType(mSrcIndexY) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_8888 | kVDPixSpace_BGR;
+	}
+
+protected:
+	virtual void Compute(void *dst0, sint32 y) {
+		uint8 *dst = (uint8 *)dst0;
+		const uint8 *srcY = (const uint8 *)mpSrcY->GetRow(y, mSrcIndexY);
+		const uint8 *srcCb = (const uint8 *)mpSrcCb->GetRow(y, mSrcIndexCb);
+		const uint8 *srcCr = (const uint8 *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+		VDCPUCleanupExtensions();
+
+		for(sint32 i=0; i<mWidth; ++i) {
+			sint32 y = srcY[i];
+			sint32 cb = srcCb[i];
+			sint32 cr = srcCr[i];
+
+			float yf = (1.164f / 255.0f)*(y - 16);
+
+			dst[0] = VDClampedRoundFixedToUint8Fast(yf + (2.112f / 255.0f) * (cb - 128));
+			dst[1] = VDClampedRoundFixedToUint8Fast(yf - (0.533f / 255.0f) * (cr - 128) - (0.213f / 255.0f) * (cb - 128));
+			dst[2] = VDClampedRoundFixedToUint8Fast(yf + (1.793f / 255.0f) * (cr - 128));
+			dst[3] = 0xff;
+
+			dst += 4;
+		}
+	}
+};
+
+class VDPixmapGenYCbCr709ToRGB32F : public VDPixmapGenYCbCrToRGBBase {
+public:
+	void Start() {
+		mpSrcY->Start();
+		mpSrcCb->Start();
+		mpSrcCr->Start();
+
+		StartWindow(mWidth * 16);
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrcY->GetType(mSrcIndexY) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_32Fx4_LE | kVDPixSpace_BGR;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		float *dst = (float *)dst0;
+		const float *srcY = (const float *)mpSrcY->GetRow(y, mSrcIndexY);
+		const float *srcCb = (const float *)mpSrcCb->GetRow(y, mSrcIndexCb);
+		const float *srcCr = (const float *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+		VDCPUCleanupExtensions();
+
+		for(sint32 i=0; i<mWidth; ++i) {
+			float y = srcY[i];
+			float cb = srcCb[i] - (128.0f/255.0f);
+			float cr = srcCr[i] - (128.0f/255.0f);
+
+			float yf = 1.164f * (y - 16.0f / 255.0f);
+
+			dst[0] = yf + 1.793f * cr;
+			dst[1] = yf - 0.533f * cr - 0.213f * cb;
+			dst[2] = yf + 2.112f * cb;
+			dst[3] = 1.0f;
+			dst += 4;
+		}
+	}
+};
+
+class VDPixmapGenRGB32ToYCbCr709 : public VDPixmapGenWindowBasedOneSource {
+public:
+	void Init(IVDPixmapGen *src, uint32 srcindex) {
+		InitSource(src, srcindex);
+	}
+
+	void Start() {
+		StartWindow(mWidth, 3);
+	}
+
+	const void *GetRow(sint32 y, uint32 index) {
+		return (const uint8 *)VDPixmapGenWindowBasedOneSource::GetRow(y, index) + mWindowPitch * index;
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_8 | kVDPixSpace_YCC_709;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint8 *dstCb = (uint8 *)dst0;
+		uint8 *dstY = dstCb + mWindowPitch;
+		uint8 *dstCr = dstY + mWindowPitch;
+
+		const uint8 *srcRGB = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+		for(sint32 i=0; i<mWidth; ++i) {
+			int r = (int)srcRGB[2];
+			int g = (int)srcRGB[1];
+			int b = (int)srcRGB[0];
+			srcRGB += 4;			
+
+			*dstCb++ = (28784*r - 26145*g -  2639*b + 8388608 + 32768) >> 16;
+			*dstY ++ = (11966*r + 40254*g +  4064*b + 1048576 + 32768) >> 16;
+			*dstCr++ = (-6596*r - 22189*g + 28784*b + 8388608 + 32768) >> 16;
+		}
+	}
+};
+
+class VDPixmapGenRGB32FToYCbCr709 : public VDPixmapGenWindowBasedOneSource {
+public:
+	void Init(IVDPixmapGen *src, uint32 srcindex) {
+		InitSource(src, srcindex);
+	}
+
+	void Start() {
+		StartWindow(mWidth * sizeof(float), 3);
+	}
+
+	const void *GetRow(sint32 y, uint32 index) {
+		return (const uint8 *)VDPixmapGenWindowBasedOneSource::GetRow(y, index) + mWindowPitch * index;
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrc->GetType(mSrcIndex) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_32F_LE | kVDPixSpace_YCC_709;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 y) {
+		float *dstCb = (float *)dst0;
+		float *dstY  = dstCb + mWindowPitch;
+		float *dstCr = dstY + mWindowPitch;
+
+		const float *srcRGB = (const float *)mpSrc->GetRow(y, mSrcIndex);
+
+		VDCPUCleanupExtensions();
+
+		for(sint32 i=0; i<mWidth; ++i) {
+			float r = srcRGB[2];
+			float g = srcRGB[1];
+			float b = srcRGB[0];
+			srcRGB += 4;			
+
+			*dstCb++ = -0.1006437f*r - 0.3385720f*g + 0.4392157f*b + (128.0f / 255.0f);
+			*dstY++  =  0.1825859f*r + 0.6142306f*g + 0.0620071f*b + ( 16.0f / 255.0f);
+			*dstCr++ =  0.4392157f*r - 0.3989422f*g - 0.0402735f*b + (128.0f / 255.0f);
+		}
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//	Rec.601 <-> Rec.709 converters
+//
+//	Rec.601 to Rec.709:
+//
+//    1.  - 0.1155497  - 0.2079376    41.406386
+//    0     1.0186397    0.1146180  - 17.056983
+//    0     0.0750494    1.0253271  - 12.848195
+//
+//	Rec.709 to Rec.601:
+//
+//    1.    0.0993117    0.1916995  - 37.249435
+//    0     0.9898538  - 0.1106525    15.462234
+//    0   - 0.0724530    0.9833978    11.399058
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGenYCbCr601ToYCbCr709 : public VDPixmapGenYCbCrToRGBBase {
+public:
+	void Start() {
+		mpSrcY->Start();
+		mpSrcCb->Start();
+		mpSrcCr->Start();
+
+		StartWindow(mWidth, 3);
+	}
+
+	const void *GetRow(sint32 y, uint32 index) {
+		return (const uint8 *)VDPixmapGenYCbCrToRGBBase::GetRow(y, index) + mWindowPitch * index;
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrcY->GetType(mSrcIndexY) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_8 | kVDPixSpace_YCC_709;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 ypos) {
+		uint8 *dstCr = (uint8 *)dst0;
+		uint8 *dstY  = dstCr + mWindowPitch;
+		uint8 *dstCb = dstY + mWindowPitch;
+
+		const uint8 *srcY  = (const uint8 *)mpSrcY->GetRow(ypos, mSrcIndexY);
+		const uint8 *srcCb = (const uint8 *)mpSrcCb->GetRow(ypos, mSrcIndexCb);
+		const uint8 *srcCr = (const uint8 *)mpSrcCr->GetRow(ypos, mSrcIndexCr);
+
+		for(sint32 i=0; i<mWidth; ++i) {
+			sint32 y = srcY[i];
+			sint32 cb = srcCb[i];
+			sint32 cr = srcCr[i];
+
+			*dstY++  = y + ((-7573*cb - 13627*cr + 2713609 + 32768) >> 16);
+			*dstCb++ = (66758*cb + 7512*cr - 1117846 + 32768) >> 16;
+			*dstCr++ = (4918*cb + 67196*cr - 842019 + 32768) >> 16;
+		}
+	}
+};
+
+class VDPixmapGenYCbCr709ToYCbCr601 : public VDPixmapGenYCbCrToRGBBase {
+public:
+	void Start() {
+		mpSrcY->Start();
+		mpSrcCb->Start();
+		mpSrcCr->Start();
+
+		StartWindow(mWidth, 3);
+	}
+
+	const void *GetRow(sint32 y, uint32 index) {
+		return (const uint8 *)VDPixmapGenYCbCrToRGBBase::GetRow(y, index) + mWindowPitch * index;
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrcY->GetType(mSrcIndexY) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_8 | kVDPixSpace_YCC_709;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 ypos) {
+		uint8 *dstCr = (uint8 *)dst0;
+		uint8 *dstY  = dstCr + mWindowPitch;
+		uint8 *dstCb = dstY + mWindowPitch;
+
+		const uint8 *srcY  = (const uint8 *)mpSrcY->GetRow(ypos, mSrcIndexY);
+		const uint8 *srcCb = (const uint8 *)mpSrcCb->GetRow(ypos, mSrcIndexCb);
+		const uint8 *srcCr = (const uint8 *)mpSrcCr->GetRow(ypos, mSrcIndexCr);
+
+		for(sint32 i=0; i<mWidth; ++i) {
+			sint32 y = srcY[i];
+			sint32 cb = srcCb[i];
+			sint32 cr = srcCr[i];
+
+			*dstY++  = y + ((6508*cb + 12563*cr - 2441088 + 32768) >> 16);
+			*dstCb++ = (64871*cb - 7252*cr + 1013376 + 32768) >> 16;
+			*dstCr++ = (-4748*cb + 64448*cr + 747008 + 32768) >> 16;
+		}
+	}
+};
+
+class VDPixmapGenYCbCr601ToYCbCr709_32F : public VDPixmapGenYCbCrToRGBBase {
+public:
+	void Start() {
+		mpSrcY->Start();
+		mpSrcCb->Start();
+		mpSrcCr->Start();
+
+		StartWindow(mWidth * sizeof(float), 3);
+	}
+
+	const void *GetRow(sint32 y, uint32 index) {
+		return (const uint8 *)VDPixmapGenYCbCrToRGBBase::GetRow(y, index) + mWindowPitch * index;
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrcY->GetType(mSrcIndexY) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_32F_LE | kVDPixSpace_YCC_709;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 ypos) {
+		float *dstCr = (float *)dst0;
+		float *dstY  = vdptroffset(dstCr, mWindowPitch);
+		float *dstCb = vdptroffset(dstY, mWindowPitch);
+
+		const float *srcY  = (const float *)mpSrcY->GetRow(ypos, mSrcIndexY);
+		const float *srcCb = (const float *)mpSrcCb->GetRow(ypos, mSrcIndexCb);
+		const float *srcCr = (const float *)mpSrcCr->GetRow(ypos, mSrcIndexCr);
+
+		VDCPUCleanupExtensions();
+
+		for(sint32 i=0; i<mWidth; ++i) {
+			float y = srcY[i];
+			float cb = srcCb[i] - (128.0f / 255.0f);
+			float cr = srcCr[i] - (128.0f / 255.0f);
+
+			*dstY++ = y - 0.1155497f*cb - 0.2079376f*cr;
+			*dstCb++ = 1.0186397f*cb + 0.1146180f*cr + (128.0f / 255.0f);
+			*dstCr++ = 0.0750494f*cb + 1.0253271f*cr + (128.0f / 255.0f);
+		}
+	}
+};
+
+class VDPixmapGenYCbCr709ToYCbCr601_32F : public VDPixmapGenYCbCrToRGBBase {
+public:
+	void Start() {
+		mpSrcY->Start();
+		mpSrcCb->Start();
+		mpSrcCr->Start();
+
+		StartWindow(mWidth * sizeof(float), 3);
+	}
+
+	const void *GetRow(sint32 y, uint32 index) {
+		return (const uint8 *)VDPixmapGenYCbCrToRGBBase::GetRow(y, index) + mWindowPitch * index;
+	}
+
+	uint32 GetType(uint32 output) const {
+		return (mpSrcY->GetType(mSrcIndexY) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_32F_LE | kVDPixSpace_YCC_709;
+	}
+
+protected:
+	void Compute(void *dst0, sint32 ypos) {
+		float *dstCr = (float *)dst0;
+		float *dstY  = vdptroffset(dstCr, mWindowPitch);
+		float *dstCb = vdptroffset(dstY, mWindowPitch);
+
+		const float *srcY  = (const float *)mpSrcY->GetRow(ypos, mSrcIndexY);
+		const float *srcCb = (const float *)mpSrcCb->GetRow(ypos, mSrcIndexCb);
+		const float *srcCr = (const float *)mpSrcCr->GetRow(ypos, mSrcIndexCr);
+
+		VDCPUCleanupExtensions();
+
+		for(sint32 i=0; i<mWidth; ++i) {
+			float y = srcY[i];
+			float cb = srcCb[i] - (128.0f / 255.0f);
+			float cr = srcCr[i] - (128.0f / 255.0f);
+
+			*dstY++  = y - 0.1155497f*cb - 0.2079376f*cr;
+			*dstCb++ =     0.9898538f*cb - 0.1106525f*cr + (128.0f / 255.0f);
+			*dstCr++ =   - 0.0724530f*cb + 0.9833978f*cr + (128.0f / 255.0f);
+		}
+	}
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_ycbcr_x86.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_ycbcr_x86.h
new file mode 100644
index 000000000..fd9a66908
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_ycbcr_x86.h
@@ -0,0 +1,27 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_YCBCR_X86_H
+#define f_VD2_KASUMI_UBERBLIT_YCBCR_X86_H
+
+#include <vd2/system/cpuaccel.h>
+#include "uberblit.h"
+#include "uberblit_ycbcr.h"
+
+extern "C" void __cdecl vdasm_pixblt_YUV444Planar_to_XRGB8888_scan_MMX(void *dst, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 count);
+
+class VDPixmapGenYCbCr601ToRGB32_MMX : public VDPixmapGenYCbCr601ToRGB32 {
+protected:
+	void Compute(void *dst0, sint32 y) {
+		uint8 *dst = (uint8 *)dst0;
+		const uint8 *srcY = (const uint8 *)mpSrcY->GetRow(y, mSrcIndexY);
+		const uint8 *srcCb = (const uint8 *)mpSrcCb->GetRow(y, mSrcIndexCb);
+		const uint8 *srcCr = (const uint8 *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+		vdasm_pixblt_YUV444Planar_to_XRGB8888_scan_MMX(dst, srcY, srcCb, srcCr, mWidth);
+	}
+};
+
+class VDPixmapGenRGB32ToYCbCr601_SSE2 : public VDPixmapGenRGB32ToYCbCr601 {
+protected:
+	void Compute(void *dst0, sint32 y);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a64_resample.asm64 b/src/thirdparty/VirtualDub/Kasumi/source/a64_resample.asm64
new file mode 100644
index 000000000..e6de1eabf
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a64_resample.asm64
@@ -0,0 +1,620 @@
+;	VirtualDub - Video processing and capture application
+;	Graphics support library
+;	Copyright (C) 1998-2004 Avery Lee
+;
+;	This program is free software; you can redistribute it and/or modify
+;	it under the terms of the GNU General Public License as published by
+;	the Free Software Foundation; either version 2 of the License, or
+;	(at your option) any later version.
+;
+;	This program is distributed in the hope that it will be useful,
+;	but WITHOUT ANY WARRANTY; without even the implied warranty of
+;	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;	GNU General Public License for more details.
+;
+;	You should have received a copy of the GNU General Public License	
+;	along with this program; if not, write to the Free Software
+;	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+;
+
+	default	rel
+
+	segment	.rdata, align=16
+
+	align 16
+roundval		dq	0000200000002000h, 0000200000002000h
+
+
+	segment	.text
+
+
+%macro VDSAVE 1-*
+
+	%rep	%0
+		%rotate -1
+		push		%1
+		[pushreg	%1]
+	%endrep
+
+%endmacro
+
+%macro VDRESTORE 1-*
+
+	%rep	%0
+		pop			%1
+
+		%rotate 1
+	%endrep
+
+%endmacro
+
+%macro VDSAVEXMM128	2
+%assign	%%count		%2 + 1 - %1
+%assign %%stkoffset	0
+%assign %%reg		%1
+
+	sub rsp, %%count*16+8
+	[allocstack %%count*16]
+
+	%rep %%count
+		movdqa	oword [rsp+%%stkoffset], xmm %+ %%reg
+		[savexmm128 xmm %+ %%reg, %%stkoffset]
+
+		%assign	%%stkoffset	%%stkoffset + 16
+		%assign %%reg		%%reg + 1
+	%endrep
+%endmacro
+
+%macro VDRESTOREXMM128	2
+%assign	%%count		%2+1-%1
+%assign	%%stkoffset	%%count*16
+%assign	%%reg		%2
+
+	%rep	%%count
+		%assign %%stkoffset %%stkoffset-16
+		movdqa xmm %+ %%reg, oword [rsp+%%stkoffset]
+
+		%assign %%reg %%reg-1
+	%endrep
+
+	add rsp, %%count*16+8
+%endmacro
+
+;-------------------------------------------------------------------------
+;
+;	long vdasm_resize_table_row_SSE2(
+;		Pixel *out,			// rcx
+;		Pixel *in,			// rdx
+;		int *filter,		// r8
+;		int filter_width,	// r9d
+;		PixDim w,			// [rsp+40]
+;		long accum,			// [rsp+48]
+;		long frac);			// [rsp+56]
+;
+	global vdasm_resize_table_row_SSE2
+proc_frame vdasm_resize_table_row_SSE2
+
+	VDSAVE			rbx, rsi, rdi, rbp, r12, r13, r14, r15
+	VDSAVEXMM128	6, 15
+end_prolog
+
+	.parms equ rsp+168+64
+
+	mov			r10d, dword [.parms+40]
+	shl			r10, 2
+	add			rcx, r10
+	neg			r10
+	shl			r9d, 2					;filter_width <<= 2
+
+	movaps		xmm6, oword [roundval]
+	pxor		xmm5, xmm5
+	mov			rsi, rdx
+	shr			rsi, 2
+
+	mov			edi, [.parms+48]
+	mov			eax, edi
+	shl			edi, 16
+	sar			rax, 16
+	add			rsi, rax
+	mov			ebp, [.parms+56]
+	movsxd		r11, ebp
+	shl			ebp, 16
+	sar			r11, 16
+
+	;register map
+	;
+	;eax		temp coefficient pair counter
+	;rbx		temp coefficient pointer
+	;rcx		destination
+	;rdx		temp source
+	;rsi		source/4
+	;edi		accumulator
+	;ebp		fractional increment
+	;r8			filter
+	;r9			filter_width*4
+	;r10		-width*4
+	;r11		integral increment
+	;r12
+	;r13
+	;r14
+	;r15
+
+	cmp			r9d, 16
+	jz			.accel_4coeff
+	cmp			r9d, 24
+	jz			.accel_6coeff
+
+	test		r9d, 8
+	jz			.pixelloop_even_pairs
+	cmp			r9d, 8
+	jnz			.pixelloop_odd_pairs
+
+.pixelloop_single_pairs:
+	mov			eax, edi
+	shr			eax, 24
+	imul		eax, r9d
+	
+	lea			rdx, [rsi*4]
+
+	movd		xmm0, dword [rdx]			;xmm0 = p0
+	movd		xmm1, dword [rdx+4]		;xmm1 = p1
+	punpcklbw	xmm0, xmm1
+	punpcklbw	xmm0, xmm5
+	movq		xmm1, qword [r8+rax]
+	pshufd		xmm1, xmm1, 01000100b
+	pmaddwd		xmm0, xmm1
+	
+	movdqa		xmm4, xmm6
+	paddd		xmm4, xmm0
+
+	psrad		xmm4, 14
+	packssdw	xmm4, xmm4
+	packuswb	xmm4, xmm4
+
+	add			edi, ebp
+	adc			rsi, r11
+
+	movd		dword [rcx+r10], xmm4
+	add			r10, 4
+	jnz			.pixelloop_single_pairs
+	jmp			.xit
+
+.pixelloop_odd_pairs:
+	movdqa		xmm4, xmm6
+
+	mov			eax, edi
+	shr			eax, 24
+	imul		eax, r9d
+	lea			rbx, [r8+rax]
+
+	lea			rdx, [rsi*4]
+	lea			rax, [r9-8]
+.coeffloop_odd_pairs:
+	movd		xmm0, dword [rdx]			;xmm0 = p0
+	movd		xmm1, dword [rdx+4]		;xmm1 = p1
+	movd		xmm2, dword [rdx+8]		;xmm2 = p2
+	movd		xmm3, dword [rdx+12]		;xmm3 = p3
+	add			rdx, 16
+	punpcklbw	xmm0, xmm1
+	punpcklbw	xmm2, xmm3
+	punpcklbw	xmm0, xmm5
+	punpcklbw	xmm2, xmm5
+	movq		xmm1, qword [rbx]
+	movq		xmm3, qword [rbx+8]
+	add			rbx, 16
+	pshufd		xmm1, xmm1, 01000100b
+	pshufd		xmm3, xmm3, 01000100b
+	pmaddwd		xmm0, xmm1
+	pmaddwd		xmm2, xmm3
+	paddd		xmm0, xmm2
+	paddd		xmm4, xmm0
+	sub			eax, 16
+	jnz			.coeffloop_odd_pairs
+
+	movd		xmm0, dword [rdx]			;xmm0 = p0
+	movd		xmm1, dword [rdx+4]		;xmm1 = p1
+	punpcklbw	xmm0, xmm1
+	punpcklbw	xmm0, xmm5
+	movq		xmm1, qword [rbx]
+	pshufd		xmm1, xmm1, 01000100b
+	pmaddwd		xmm0, xmm1
+	paddd		xmm4, xmm0
+
+	psrad		xmm4, 14
+	packssdw	xmm4, xmm4
+	packuswb	xmm4, xmm4
+
+	add			edi, ebp
+	adc			rsi, r11
+
+	movd		dword [rcx+r10], xmm4
+	add			r10, 4
+	jnz			.pixelloop_odd_pairs
+	jmp			.xit
+
+.pixelloop_even_pairs:
+	movdqa		xmm4, xmm6
+
+	mov			eax, edi
+	shr			eax, 24
+	imul		eax, r9d
+	lea			rbx, [r8+rax]
+
+	lea			rdx, [rsi*4]
+	mov			eax, r9d
+.coeffloop_even_pairs:
+	movd		xmm0, dword [rdx]			;xmm0 = p0
+	movd		xmm1, dword [rdx+4]		;xmm1 = p1
+	movd		xmm2, dword [rdx+8]		;xmm2 = p2
+	movd		xmm3, dword [rdx+12]		;xmm3 = p3
+	add			rdx, 16
+	punpcklbw	xmm0, xmm1
+	punpcklbw	xmm2, xmm3
+	punpcklbw	xmm0, xmm5
+	punpcklbw	xmm2, xmm5
+	movq		xmm1, qword [rbx]
+	movq		xmm3, qword [rbx+8]
+	add			rbx, 16
+	pshufd		xmm1, xmm1, 01000100b
+	pshufd		xmm3, xmm3, 01000100b
+	pmaddwd		xmm0, xmm1
+	pmaddwd		xmm2, xmm3
+	paddd		xmm0, xmm2
+	paddd		xmm4, xmm0
+	sub			eax, 16
+	jnz			.coeffloop_even_pairs
+
+	psrad		xmm4, 14
+	packssdw	xmm4, xmm4
+	packuswb	xmm4, xmm4
+
+	add			edi, ebp
+	adc			rsi, r11
+
+	movd		dword [rcx+r10], xmm4
+	add			r10, 4
+	jnz			.pixelloop_even_pairs
+
+.xit:
+	VDRESTOREXMM128	6, 15
+	VDRESTORE	rbx, rsi, rdi, rbp, r12, r13, r14, r15
+	ret
+
+.accel_4coeff:
+.pixelloop_4coeff:
+	pxor		xmm5, xmm5
+	movdqa		xmm4, xmm6
+
+	mov			eax, 0ff000000h
+	lea			rdx, [rsi*4]
+	and			eax, edi
+	shr			eax, 20
+	lea			rbx, [r8+rax]
+
+	movd		xmm0, dword [rdx]			;xmm0 = p0
+	movd		xmm1, dword [rdx+4]		;xmm1 = p1
+	movd		xmm2, dword [rdx+8]		;xmm2 = p2
+	movd		xmm3, dword [rdx+12]		;xmm3 = p3
+	punpcklbw	xmm0, xmm1
+	punpcklbw	xmm2, xmm3
+	punpcklbw	xmm0, xmm5
+	punpcklbw	xmm2, xmm5
+	movq		xmm1, qword [rbx]
+	movq		xmm3, qword [rbx+8]
+	pshufd		xmm1, xmm1, 01000100b
+	pshufd		xmm3, xmm3, 01000100b
+	pmaddwd		xmm0, xmm1
+	pmaddwd		xmm2, xmm3
+	paddd		xmm0, xmm2
+	paddd		xmm4, xmm0
+
+	psrad		xmm4, 14
+	packssdw	xmm4, xmm4
+	packuswb	xmm4, xmm4
+
+	add			edi, ebp
+	adc			rsi, r11
+
+	movd		dword [rcx+r10], xmm4
+	add			r10, 4
+	jnz			.pixelloop_4coeff
+	jmp			.xit
+
+.accel_6coeff:
+.pixelloop_6coeff:
+	pxor		xmm5, xmm5
+	movdqa		xmm4, xmm6
+
+	lea			rdx, [rsi*4]
+	mov			eax, edi
+	shr			eax, 24
+	lea			rax, [rax+rax*2]
+	lea			rbx, [r8+rax*8]
+
+	movd		xmm0, dword [rdx]			;xmm0 = p0
+	movd		xmm1, dword [rdx+4]		;xmm1 = p1
+	movd		xmm2, dword [rdx+8]		;xmm2 = p2
+	movd		xmm3, dword [rdx+12]		;xmm3 = p3
+	movd		xmm8, dword [rdx+16]		;xmm6 = p4
+	movd		xmm9, dword [rdx+20]		;xmm7 = p5
+	punpcklbw	xmm0, xmm1
+	punpcklbw	xmm2, xmm3
+	punpcklbw	xmm8, xmm9
+	punpcklbw	xmm0, xmm5
+	punpcklbw	xmm2, xmm5
+	punpcklbw	xmm8, xmm5
+	movq		xmm1, qword [rbx]
+	movq		xmm3, qword [rbx+8]
+	movq		xmm9, qword [rbx+16]
+	pshufd		xmm1, xmm1, 01000100b
+	pshufd		xmm3, xmm3, 01000100b
+	pshufd		xmm9, xmm9, 01000100b
+	pmaddwd		xmm0, xmm1
+	pmaddwd		xmm2, xmm3
+	pmaddwd		xmm8, xmm9
+	paddd		xmm0, xmm2
+	paddd		xmm4, xmm0
+	paddd		xmm4, xmm8
+
+	psrad		xmm4, 14
+	packssdw	xmm4, xmm4
+	packuswb	xmm4, xmm4
+
+	add			edi, ebp
+	adc			rsi, r11
+
+	movd		dword [rcx+r10], xmm4
+	add			r10, 4
+	jnz			.pixelloop_6coeff
+	jmp			.xit
+endproc_frame
+
+
+;--------------------------------------------------------------------------
+;
+;	vdasm_resize_table_col_SSE2(
+;		uint32 *dst,				// rcx
+;		const uint32 *const *srcs,	// rdx
+;		int *filter,		// r8
+;		int filter_width,	// r9d
+;		PixDim w,			// [rsp+40] -> r10d
+;		);
+;
+	global	vdasm_resize_table_col_SSE2
+proc_frame	vdasm_resize_table_col_SSE2
+	VDSAVE			rbx, rsi, rdi, rbp, r12, r13, r14, r15
+	VDSAVEXMM128	6, 15
+end_prolog
+
+	.parms equ rsp+168+64
+
+	mov			r10d, [.parms+40]			;r10d = w
+
+	pxor		xmm5, xmm5
+	movdqa		xmm4, oword [roundval]
+	xor			rbx, rbx					;rbx = source offset
+
+	cmp			r9d, 4
+	jz			.accel_4coeff
+	cmp			r9d, 6
+	jz			.accel_6coeff
+
+	shr			r9d, 1						;r9d = filter pair count
+
+.pixelloop:
+	mov			rax, rdx					;rax = row pointer table
+	mov			rdi, r8						;rdi = filter
+	mov			r11d, r9d					;r11d = filter width counter
+	movdqa		xmm2, xmm4
+.coeffloop:
+	mov			rsi, [rax]
+
+	movd		xmm0, dword [rsi+rbx]
+
+	mov			rsi, [rax+8]
+	add			rax, 16
+
+	movd		xmm1, dword [rsi+rbx]
+	punpcklbw	xmm0, xmm1
+
+	punpcklbw	xmm0, xmm5
+
+	movq		xmm1, qword [rdi]
+	pshufd		xmm1, xmm1, 01000100b
+
+	pmaddwd		xmm0, xmm1
+
+	paddd		xmm2, xmm0
+
+	add			rdi,8
+
+	sub			r11d,1
+	jne			.coeffloop
+
+	psrad		xmm2,14
+	packssdw	xmm2,xmm2
+	add			rbx,4
+	packuswb	xmm2,xmm2
+
+	movd		dword [rcx],xmm2
+	add			rcx,4
+	sub			r10d,1
+	jne			.pixelloop
+
+.xit:
+	VDRESTOREXMM128	6, 15
+	VDRESTORE	rbx, rsi, rdi, rbp, r12, r13, r14, r15
+	ret
+
+.accel_4coeff:
+	mov			r12, [rdx]
+	mov			r13, [rdx+8]
+	mov			r14, [rdx+16]
+	mov			r15, [rdx+24]
+	movq		xmm8, qword [r8]
+	punpcklqdq	xmm8, xmm8
+	movq		xmm9, qword [r8+8]
+	punpcklqdq	xmm9, xmm9
+
+	sub			r10d, 1
+	jc			.oddpixel_4coeff
+.pixelloop_4coeff:
+	movq		xmm0, qword [r12+rbx]
+	movq		xmm1, qword [r13+rbx]
+	movq		xmm2, qword [r14+rbx]
+	movq		xmm3, qword [r15+rbx]
+
+	punpcklbw	xmm0, xmm1
+	punpcklbw	xmm2, xmm3
+
+	movdqa		xmm1, xmm0
+	movdqa		xmm3, xmm2
+
+	punpcklbw	xmm0, xmm5
+	punpckhbw	xmm1, xmm5
+	punpcklbw	xmm2, xmm5
+	punpckhbw	xmm3, xmm5
+
+	pmaddwd		xmm0, xmm8
+	pmaddwd		xmm1, xmm8
+	pmaddwd		xmm2, xmm9
+	pmaddwd		xmm3, xmm9
+
+	paddd		xmm0, xmm4
+	paddd		xmm1, xmm4
+	paddd		xmm0, xmm2
+	paddd		xmm1, xmm3
+
+	psrad		xmm0, 14
+	psrad		xmm1, 14
+	packssdw	xmm0, xmm1
+	packuswb	xmm0, xmm0
+
+	movq		qword [rcx], xmm0
+	add			rcx, 8
+	add			rbx, 8
+	sub			r10d, 2
+	ja			.pixelloop_4coeff
+	jnz			.xit
+.oddpixel_4coeff:
+	movd		xmm0, dword [r12+rbx]
+	movd		xmm1, dword [r13+rbx]
+	movd		xmm2, dword [r14+rbx]
+	movd		xmm3, dword [r15+rbx]
+
+	punpcklbw	xmm0, xmm1
+	punpcklbw	xmm2, xmm3
+	punpcklbw	xmm0, xmm5
+	punpcklbw	xmm2, xmm5
+
+	pmaddwd		xmm0, xmm8
+	pmaddwd		xmm2, xmm9
+
+	paddd		xmm0, xmm4
+	paddd		xmm0, xmm2
+
+	psrad		xmm0, 14
+	packssdw	xmm0, xmm0
+	packuswb	xmm0, xmm0
+
+	movd		dword [rcx], xmm0
+
+	jmp			.xit
+
+.accel_6coeff:
+	mov			r12, [rdx]
+	mov			r13, [rdx+8]
+	mov			r14, [rdx+16]
+	mov			r15, [rdx+24]
+	mov			rsi, [rdx+32]
+	mov			rdx, [rdx+40]
+	movq		xmm10, qword [r8]
+	punpcklqdq	xmm10, xmm10
+	movq		xmm11, qword [r8+8]
+	punpcklqdq	xmm11, xmm11
+	movq		xmm12, qword [r8+16]
+	punpcklqdq	xmm12, xmm12
+
+	sub			r10d, 1
+	jc			.oddpixel_6coeff
+.pixelloop_6coeff:
+	movq		xmm0, qword [r12+rbx]
+	movq		xmm1, qword [r13+rbx]
+	movq		xmm2, qword [r14+rbx]
+	movq		xmm3, qword [r15+rbx]
+	movq		xmm8, qword [rsi+rbx]
+	movq		xmm9, qword [rdx+rbx]
+
+	punpcklbw	xmm0, xmm1
+	punpcklbw	xmm2, xmm3
+	punpcklbw	xmm8, xmm9
+
+	movdqa		xmm1, xmm0
+	movdqa		xmm3, xmm2
+	movdqa		xmm9, xmm8
+
+	punpcklbw	xmm0, xmm5
+	punpckhbw	xmm1, xmm5
+	punpcklbw	xmm2, xmm5
+	punpckhbw	xmm3, xmm5
+	punpcklbw	xmm8, xmm5
+	punpckhbw	xmm9, xmm5
+
+	pmaddwd		xmm0, xmm10
+	pmaddwd		xmm1, xmm10
+	pmaddwd		xmm2, xmm11
+	pmaddwd		xmm3, xmm11
+	pmaddwd		xmm8, xmm12
+	pmaddwd		xmm9, xmm12
+
+	paddd		xmm0, xmm4
+	paddd		xmm1, xmm4
+	paddd		xmm2, xmm8
+	paddd		xmm3, xmm9
+	paddd		xmm0, xmm2
+	paddd		xmm1, xmm3
+
+	psrad		xmm0, 14
+	psrad		xmm1, 14
+	packssdw	xmm0, xmm1
+	packuswb	xmm0, xmm0
+
+	movq		qword [rcx], xmm0
+	add			rcx, 8
+	add			rbx, 8
+	sub			r10d, 2
+	ja			.pixelloop_6coeff
+	jnz			.xit
+.oddpixel_6coeff:
+	movd		xmm0, dword [r12+rbx]
+	movd		xmm1, dword [r13+rbx]
+	movd		xmm2, dword [r14+rbx]
+	movd		xmm3, dword [r15+rbx]
+	movd		xmm8, dword [rsi+rbx]
+	movd		xmm9, dword [rdx+rbx]
+
+	punpcklbw	xmm0, xmm1
+	punpcklbw	xmm2, xmm3
+	punpcklbw	xmm8, xmm9
+	punpcklbw	xmm0, xmm5
+	punpcklbw	xmm2, xmm5
+	punpcklbw	xmm8, xmm5
+
+	pmaddwd		xmm0, xmm10
+	pmaddwd		xmm2, xmm11
+	pmaddwd		xmm8, xmm12
+
+	paddd		xmm0, xmm4
+	paddd		xmm2, xmm8
+	paddd		xmm0, xmm2
+
+	psrad		xmm0, 14
+	packssdw	xmm0, xmm0
+	packuswb	xmm0, xmm0
+
+	movd		dword [rcx], xmm0
+
+	jmp			.xit
+endproc_frame
+
+	end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb.asm
new file mode 100644
index 000000000..f3503807e
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb.asm
@@ -0,0 +1,812 @@
+		section	.text
+
+		global	_vdasm_pixblt_RGB565_to_XRGB1555
+_vdasm_pixblt_RGB565_to_XRGB1555:
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		ebp, [esp+20+16]
+		mov		edi, [esp+24+16]
+		add		ebp, ebp
+		mov		edx, [esp+4+16]
+		mov		ecx, [esp+12+16]
+		lea		edx, [edx+ebp-2]
+		lea		ecx, [ecx+ebp-2]
+		neg		ebp
+		mov		[esp+20+16], ebp
+
+.yloop:
+		mov		ebp, [esp+20+16]
+		add		ebp, 2
+		jbe		.odd
+
+.xloop:
+		mov		eax, [ecx+ebp]
+		mov		ebx, 0ffc0ffc0h
+
+		and		ebx, eax
+		and		eax, 0001f001fh
+
+		shr		ebx, 1
+
+		add		eax, ebx
+
+		mov		[edx+ebp], eax
+		add		ebp, 4
+
+		jnc		.xloop
+		jnz		.noodd
+.odd:
+		movzx	eax, word [ecx]
+		mov		ebx, 0ffc0ffc0h
+		and		ebx, eax
+		and		eax, 0001f001fh
+		shr		ebx, 1
+		add		eax, ebx
+		mov		[edx], ax
+.noodd:
+		add		ecx, [esp+16+16]
+		add		edx, [esp+8+16]
+		dec		edi
+		jne		.yloop
+
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+		ret
+
+
+		global	_vdasm_pixblt_RGB888_to_XRGB1555
+_vdasm_pixblt_RGB888_to_XRGB1555:
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		esi,[esp+12+16]
+		mov		edi,[esp+4+16]
+
+		mov		ebp,[esp+20+16]
+		lea		eax,[ebp+ebp]
+		lea		ebx,[ebp+eax]
+		sub		[esp+8+16],eax
+		sub		[esp+16+16],ebx
+
+		mov		edx,[esp+24+16]
+.yloop:
+		mov		ebp,[esp+20+16]
+		push	ebp
+		push	edx
+		shr		ebp,1
+		jz		.checkodd
+.xloop:
+		mov		eax,[esi+2]		;u
+		add		esi,6			;v
+
+		mov		ebx,eax			;u
+		mov		ecx,eax			;v
+		shr		ebx,11			;u
+		and		ecx,00f80000h	;v
+		shr		eax,17			;u
+		and		ebx,0000001fh	;v
+		shr		ecx,14			;u
+		and		eax,00007c00h	;v
+		or		ebx,ecx			;u
+		add		edi,4			;v
+		or		ebx,eax			;u
+
+		mov		ecx,[esi-6]		;v
+		mov		edx,ebx			;u
+		mov		eax,ecx			;v
+
+		shl		edx,16			;u
+		mov		ebx,ecx			;v
+		shr		ebx,3			;u
+		and		ecx,0000f800h	;v
+		shr		eax,9			;u
+		and		ebx,0000001fh	;v
+		shr		ecx,6			;u
+		and		eax,00007c00h	;v
+		or		eax,ecx			;u
+		or		edx,ebx			;v
+		or		edx,eax			;u
+		sub		ebp,1			;v
+		mov		[edi-4],edx		;u
+		jne		.xloop			;v
+.checkodd:
+		pop		edx
+		pop		ebp
+		and		ebp,1
+		jz		.noodd
+		movzx	eax,word [esi]
+		movzx	ebx,byte [esi+2]
+		shl		ebx,16
+		add		esi,3
+		add		eax,ebx
+
+		mov		ebx,eax
+		mov		ecx,eax
+		shr		ebx,3
+		and		ecx,0000f800h
+		shr		eax,9
+		and		ebx,0000001fh
+		shr		ecx,6
+		and		eax,00007c00h
+		or		ebx,ecx
+		or		ebx,eax
+		mov		[edi+0],bl
+		mov		[edi+1],bh
+		add		edi,2
+.noodd:
+
+		add		esi,[esp+16+16]
+		add		edi,[esp+ 8+16]
+
+		sub		edx,1
+		jne		.yloop
+
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+
+		ret
+
+		global	_vdasm_pixblt_XRGB8888_to_XRGB1555
+_vdasm_pixblt_XRGB8888_to_XRGB1555:
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		ebp, [esp+20+16]
+		mov		edx, [esp+4+16]
+		add		ebp, ebp
+		mov		ecx, [esp+12+16]
+		lea		edx, [edx+ebp-2]
+		lea		ecx, [ecx+ebp*2-4]
+		neg		ebp
+		mov		[esp+20+16], ebp
+
+.yloop:
+		mov		ebp, [esp+20+16]
+		add		ebp, 2
+		jbe		.odd
+
+.xloop:
+		mov		eax, [ecx+ebp*2]
+		mov		ebx, 00f80000h
+		and		ebx, eax
+		mov		esi, eax
+		shr		ebx, 9
+		and		esi, 0000f800h
+		shr		esi, 6
+		and		eax, 000000f8h
+		shr		eax, 3
+		add		ebx, esi
+		mov		esi, [ecx+ebp*2+4]
+		add		eax, ebx
+		mov		ebx, esi
+		and		esi, 00f80000h
+		shl		esi, 7
+		mov		edi, ebx
+		and		edi, 0000f800h
+		add		eax, esi
+		shl		edi, 10
+		and		ebx, 000000f8h
+		shl		ebx, 13
+		add		eax, edi
+		add		eax, ebx
+		mov		[edx+ebp], eax
+		add		ebp, 4
+		jnc		.xloop
+		jnz		.noodd
+.odd:
+		mov		eax, [ecx]
+		mov		ebx, 00f80000h
+		and		ebx, eax
+		mov		esi, eax
+		shr		ebx, 9
+		and		esi, 0000f800h
+		shr		esi, 6
+		and		eax, 000000f8h
+		shr		eax, 3
+		add		ebx, esi
+		add		eax, ebx
+		mov		[edx], ax
+.noodd:
+		add		ecx, [esp+16+16]
+		add		edx, [esp+8+16]
+		dec		dword [esp+24+16]
+		jne		.yloop
+
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+		ret
+
+		global	_vdasm_pixblt_XRGB1555_to_RGB565
+_vdasm_pixblt_XRGB1555_to_RGB565:
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		ebp, [esp+20+16]
+		mov		edi, [esp+24+16]
+		add		ebp, ebp
+		mov		edx, [esp+4+16]
+		mov		ecx, [esp+12+16]
+		lea		edx, [edx+ebp-2]
+		lea		ecx, [ecx+ebp-2]
+		neg		ebp
+		mov		[esp+20+16], ebp
+
+.yloop:
+		mov		ebp, [esp+20+16]
+		add		ebp, 2
+		jbe		.odd
+
+.xloop:
+		mov		eax, [ecx+ebp]
+		mov		ebx, 02000200h
+
+		mov		esi, eax
+		and		ebx, eax
+
+		shr		ebx, 4
+		and		esi, 0ffe0ffe0h
+
+		add		eax, esi
+
+		add		eax, ebx
+
+		mov		[edx+ebp], eax
+		add		ebp, 4
+
+		jnc		.xloop
+		jnz		.noodd
+.odd:
+		movzx	eax, word [ecx]
+		mov		ebx, 02000200h
+		mov		esi, eax
+		and		ebx, eax
+		shr		ebx, 4
+		and		esi, 0ffe0ffe0h
+		add		eax, esi
+		add		eax, ebx
+		mov		[edx], ax
+.noodd:
+		add		ecx, [esp+16+16]
+		add		edx, [esp+8+16]
+		dec		edi
+		jne		.yloop
+
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+		ret
+
+		global	_vdasm_pixblt_RGB888_to_RGB565
+_vdasm_pixblt_RGB888_to_RGB565:
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		esi,[esp+12+16]
+		mov		edi,[esp+4+16]
+
+		mov		ebp,[esp+20+16]
+		lea		eax,[ebp+ebp]
+		lea		ebx,[ebp+eax]
+		sub		[esp+8+16],eax
+		sub		[esp+16+16],ebx
+
+		mov		edx,[esp+24+16]
+.yloop:
+		mov		ebp,[esp+20+16]
+		push	ebp
+		push	edx
+		shr		ebp,1
+		jz		.checkodd
+.xloop:
+		mov		eax,[esi+2]		;u
+		add		esi,6			;v
+
+		mov		ebx,eax			;u
+		mov		ecx,eax			;v
+		shr		ebx,11			;u
+		and		ecx,00fc0000h	;v
+		shr		eax,16			;u
+		and		ebx,0000001fh	;v
+		shr		ecx,13			;u
+		and		eax,0000f800h	;v
+		or		ebx,ecx			;u
+		add		edi,4			;v
+		or		ebx,eax			;u
+
+		mov		ecx,[esi-6]		;v
+		mov		edx,ebx			;u
+		mov		eax,ecx			;v
+
+		shl		edx,16			;u
+		mov		ebx,ecx			;v
+		shr		ebx,3			;u
+		and		ecx,0000fc00h	;v
+		shr		eax,8			;u
+		and		ebx,0000001fh	;v
+		shr		ecx,5			;u
+		and		eax,0000f800h	;v
+		or		eax,ecx			;u
+		or		edx,ebx			;v
+		or		edx,eax			;u
+		sub		ebp,1			;v
+		mov		[edi-4],edx		;u
+		jne		.xloop			;v
+.checkodd:
+		pop		edx
+		pop		ebp
+		and		ebp,1
+		jz		.noodd
+		movzx	eax,word [esi]
+		movzx	ebx,byte [esi+2]
+		shl		ebx,16
+		add		esi,3
+		add		eax,ebx
+
+		mov		ebx,eax
+		mov		ecx,eax
+		shr		ebx,3
+		and		ecx,0000fc00h
+		shr		eax,8
+		and		ebx,0000001fh
+		shr		ecx,5
+		and		eax,0000f800h
+		or		ebx,ecx
+		or		ebx,eax
+		mov		[edi+0],bl
+		mov		[edi+1],bh
+		add		edi,2
+.noodd:
+
+		add		esi,[esp+16+16]
+		add		edi,[esp+ 8+16]
+
+		sub		edx,1
+		jne		.yloop
+
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+
+		ret
+
+		global	_vdasm_pixblt_XRGB8888_to_RGB565
+_vdasm_pixblt_XRGB8888_to_RGB565:
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		ebp, [esp+20+16]
+		mov		edx, [esp+4+16]
+		add		ebp, ebp
+		mov		ecx, [esp+12+16]
+		lea		edx, [edx+ebp-2]
+		lea		ecx, [ecx+ebp*2-4]
+		neg		ebp
+		mov		[esp+20+16], ebp
+
+.yloop:
+		mov		ebp, [esp+20+16]
+		add		ebp, 2
+		jbe		.odd
+
+.xloop:
+		mov		eax, [ecx+ebp*2]
+		mov		ebx, 00f80000h
+		and		ebx, eax
+		mov		esi, eax
+		shr		ebx, 8
+		and		esi, 0000fc00h
+		shr		esi, 5
+		and		eax, 000000f8h
+		shr		eax, 3
+		add		ebx, esi
+		mov		esi, [ecx+ebp*2+4]
+		add		eax, ebx
+		mov		ebx, esi
+		and		esi, 00f80000h
+		shl		esi, 8
+		mov		edi, ebx
+		and		edi, 0000fc00h
+		add		eax, esi
+		shl		edi, 11
+		and		ebx, 000000f8h
+		shl		ebx, 13
+		add		eax, edi
+		add		eax, ebx
+		mov		[edx+ebp], eax
+		add		ebp, 4
+		jnc		.xloop
+		jnz		.noodd
+.odd:
+		mov		eax, [ecx]
+		mov		ebx, 00f80000h
+		and		ebx, eax
+		mov		esi, eax
+		shr		ebx, 8
+		and		esi, 0000fc00h
+		shr		esi, 5
+		and		eax, 000000f8h
+		shr		eax, 3
+		add		ebx, esi
+		add		eax, ebx
+		mov		[edx], ax
+.noodd:
+		add		ecx, [esp+16+16]
+		add		edx, [esp+8+16]
+		dec		dword [esp+24+16]
+		jne		.yloop
+
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+		ret
+
+
+		global	_vdasm_pixblt_XRGB8888_to_RGB888
+_vdasm_pixblt_XRGB8888_to_RGB888:
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		esi,[esp+12+16]
+		mov		edi,[esp+4+16]
+
+		mov		ecx,[esp+20+16]
+		lea		eax,[ecx+ecx*2]
+		lea		ebx,[ecx*4]
+		sub		[esp+8+16],eax
+		sub		[esp+16+16],ebx
+
+		mov		edx,[esp+24+16]
+.yloop:
+		mov		ecx,[esp+20+16]
+		push	ecx
+		push	edx
+		shr		ecx,2
+		jz		.checkodd
+.xloop:
+		mov		eax,[esi]		;EAX = xxr0g0b0
+		mov		ebx,[esi+4]		;EBX = xxr1g1b1
+		mov		edx,ebx			;EDX = xxr1g1b1
+		mov		ebp,[esi+8]		;EBP = xxr2g2b2
+		shl		ebx,24			;EBX = b1000000
+		and		eax,00ffffffh		;EAX = 00r0g0b0
+		shr		edx,8			;EDX = 00xxr1g1
+		or		eax,ebx			;EAX = b1r0g0b0
+		mov		[edi],eax
+		mov		ebx,ebp			;EBX = xxr2g2b2
+		shl		ebp,16			;EBP = g2b20000
+		and		edx,0000ffffh		;EDX = 0000r1g1
+		or		ebp,edx			;EBP = g2b2r1g1
+		mov		eax,[esi+12]		;EAX = xxr3g3b3
+		shr		ebx,16			;EBX = 0000xxr2
+		add		edi,12
+		shl		eax,8			;EAX = r3g3b300
+		and		ebx,000000ffh		;EBX = 000000r2
+		or		eax,ebx			;EAX = r3g3b3r2
+		mov		[edi+4-12],ebp
+		add		esi,16
+		mov		[edi+8-12],eax
+		sub		ecx,1
+		jne		.xloop
+.checkodd:
+		pop		edx
+		pop		ecx
+		and		ecx,3
+		jz		.noodd
+.oddloop:
+		mov		eax,[esi]
+		add		esi,4
+		mov		[edi],ax
+		shr		eax,16
+		mov		[edi+2],al
+		add		edi,3
+		sub		ecx,1
+		jnz		.oddloop
+.noodd:
+		add		esi,[esp+16+16]
+		add		edi,[esp+ 8+16]
+
+		sub		edx,1
+		jne		.yloop
+
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+		ret
+
+		global	_vdasm_pixblt_XRGB1555_to_XRGB8888
+_vdasm_pixblt_XRGB1555_to_XRGB8888:
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		ebp, [esp+20+16]
+		add		ebp, ebp
+		mov		edx, [esp+4+16]
+		mov		ecx, [esp+12+16]
+		lea		edx, [edx+ebp*2-4]
+		lea		ecx, [ecx+ebp-2]
+		neg		ebp
+		mov		[esp+20+16], ebp
+
+.yloop:
+		mov		ebp, [esp+20+16]
+		add		ebp, 2
+		jbe		.odd
+
+.xloop:
+		mov		eax, [ecx+ebp]
+		mov		ebx, 00007c00h
+		and		ebx, eax
+		mov		esi, eax
+		shl		ebx, 9
+		and		esi, 000003e0h
+		shl		esi, 6
+		mov		edi, eax
+		and		eax, 0000001fh
+		add		ebx, esi
+		shl		eax, 3
+		mov		esi, edi
+		shr		edi, 7
+		add		eax, ebx
+		and		edi, 00f80000h
+		mov		ebx, esi
+		shr		esi, 13
+		and		ebx, 03e00000h
+		shr		ebx, 10
+		and		esi, 000000f8h
+		add		ebx, edi
+		add		ebx, esi
+		mov		edi, eax
+		and		eax, 00e0e0e0h
+		shr		eax, 5
+		mov		esi, ebx
+		shr		ebx, 5
+		add		eax, edi
+		and		ebx, 00070707h
+		add		ebx, esi
+		mov		[edx+ebp*2], eax
+		mov		[edx+ebp*2+4], ebx
+		add		ebp, 4
+		jnc		.xloop
+		jnz		.noodd
+.odd:
+		movzx	eax, word [ecx]
+		mov		ebx, 00007c00h
+		and		ebx, eax
+		mov		esi, eax
+		shl		ebx, 9
+		and		esi, 000003e0h
+		shl		esi, 6
+		and		eax, 0000001fh
+		shl		eax, 3
+		add		ebx, esi
+		add		eax, ebx
+		mov		ebx, 00e0e0e0h
+		and		ebx, eax
+		shr		ebx, 5
+		add		eax, ebx
+		mov		[edx], eax
+.noodd:
+		add		ecx, [esp+16+16]
+		add		edx, [esp+8+16]
+		dec		dword [esp+24+16]
+		jne		.yloop
+
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+		ret
+
+
+		global	_vdasm_pixblt_RGB565_to_XRGB8888
+_vdasm_pixblt_RGB565_to_XRGB8888:
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		ebp, [esp+20+16]
+		add		ebp, ebp
+		mov		edx, [esp+4+16]
+		mov		ecx, [esp+12+16]
+		lea		edx, [edx+ebp*2-4]
+		lea		ecx, [ecx+ebp-2]
+		neg		ebp
+		mov		[esp+20+16], ebp
+
+.yloop:
+		mov		ebp, [esp+20+16]
+		add		ebp, 2
+		jbe		.odd
+
+.xloop:
+		movzx	eax, word [ecx+ebp]
+		mov		ebx, 0000f800h
+		and		ebx, eax
+		mov		esi, eax
+		shl		ebx, 8
+		mov		edi, eax
+		shl		eax, 3
+		and		esi, 000007e0h
+		and		eax, 000000f8h
+		add		ebx, eax
+		shl		esi, 5
+		mov		eax, ebx
+		shr		ebx, 5
+		and		edi, 00000600h
+		shr		edi, 1
+		and		ebx, 00070007h
+		add		esi, edi
+		add		eax, ebx
+		add		eax, esi
+		mov		[edx+ebp*2], eax
+
+		movzx	eax, word [ecx+ebp+2]
+		mov		ebx, 0000f800h
+		and		ebx, eax
+		mov		esi, eax
+		shl		ebx, 8
+		mov		edi, eax
+		shl		eax, 3
+		and		esi, 000007e0h
+		and		eax, 000000f8h
+		add		ebx, eax
+		shl		esi, 5
+		mov		eax, ebx
+		shr		ebx, 5
+		and		edi, 00000600h
+		shr		edi, 1
+		and		ebx, 00070007h
+		add		esi, edi
+		add		eax, ebx
+		add		eax, esi
+		mov		[edx+ebp*2+4], eax
+
+		add		ebp, 4
+
+		jnc		.xloop
+		jnz		.noodd
+.odd:
+		movzx	eax, word [ecx]
+		mov		ebx, 0000f800h
+		and		ebx, eax
+		mov		esi, eax
+		shl		ebx, 8
+		mov		edi, eax
+		shl		eax, 3
+		and		esi, 000007e0h
+		and		eax, 000000f8h
+		add		ebx, eax
+		shl		esi, 5
+		mov		eax, ebx
+		shr		ebx, 5
+		and		edi, 00000600h
+		shr		edi, 1
+		and		ebx, 00070007h
+		add		esi, edi
+		add		eax, ebx
+		add		eax, esi
+		mov		[edx], eax
+.noodd:
+		add		ecx, [esp+16+16]
+		add		edx, [esp+8+16]
+		dec		dword [esp+24+16]
+		jne		.yloop
+
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+		ret
+
+
+		global	_vdasm_pixblt_RGB888_to_XRGB8888
+_vdasm_pixblt_RGB888_to_XRGB8888:
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		esi,[esp+12+16]
+		mov		edi,[esp+4+16]
+
+		mov		ecx,[esp+20+16]
+		lea		eax,[ecx+ecx*2]
+		lea		ebx,[ecx*4]
+		sub		[esp+8+16],ebx
+		sub		[esp+16+16],eax
+
+		mov		edx,[esp+24+16]
+.yloop:
+		mov		ebp,[esp+20+16]
+		shr		ebp,2
+		push	edx
+		jz		.checkodd
+.xloop:
+		mov		eax,[esi]			;EAX: b1r0g0b0
+		mov		ebx,[esi+4]			;EBX: g2b2r1g1
+
+		mov		[edi],eax
+		mov		ecx,ebx				;ECX: g2b2r1g1
+
+		shr		eax,24				;EAX: ------b1
+		mov		edx,[esi+8]			;EDX: r3g3b3r2
+
+		shr		ecx,16				;ECX: ----g2b2
+		add		edi,16
+
+		shl		ebx,8				;EBX: b2r1g1--
+		add		esi,12
+
+		or		eax,ebx				;EAX: b2r1g1b1
+		mov		ebx,edx				;EBX: r3g3b3r2
+
+		shr		ebx,8				;EBX: --r3g3b3
+		mov		[edi+4-16],eax
+
+		shl		edx,16				;EDX: b3r2----
+		mov		[edi+12-16],ebx
+
+		or		edx,ecx				;EDX: b3r2g2b2
+		sub		ebp,1
+
+		mov		[edi+8-16],edx
+		jne		.xloop
+
+.checkodd:
+		pop		edx
+		mov		ebx,[esp+20+16]
+		and		ebx,3
+		jz		.noodd
+.oddloop:
+		mov		ax,[esi]
+		mov		cl,[esi+2]
+		mov		[edi],ax
+		mov		[edi+2],cl
+		add		esi,3
+		add		edi,4
+		sub		ebx,1
+		jne		.oddloop
+.noodd:
+
+		add		esi,[esp+16+16]
+		add		edi,[esp+ 8+16]
+
+		sub		edx,1
+		jne		.yloop
+
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+
+		ret
+
+		end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb2yuv_mmx.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb2yuv_mmx.asm
new file mode 100644
index 000000000..6a00d826f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb2yuv_mmx.asm
@@ -0,0 +1,652 @@
+		section	.rdata, rdata
+
+y_co	dq		0004a004a004a004ah
+cr_co_r	dq		000cc00cc00cc00cch
+cb_co_b	dq		00081008100810081h		;note: divided by two
+cr_co_g	dq		0ff98ff98ff98ff98h
+cb_co_g	dq		0ffceffceffceffceh
+y_bias	dq		0fb7afb7afb7afb7ah
+c_bias	dq		0ff80ff80ff80ff80h
+interp	dq		06000400020000000h
+rb_mask_555	dq		07c1f7c1f7c1f7c1fh
+g_mask_555	dq		003e003e003e003e0h
+rb_mask_565	dq		0f81ff81ff81ff81fh
+g_mask_565	dq		007e007e007e007e0h
+
+cr_coeff	dq	000003313e5fc0000h
+cb_coeff	dq	000000000f377408dh
+rgb_bias	dq	000007f2180887eebh
+
+msb_inv	dq		08000800080008000h
+
+		section	.text
+
+;============================================================================
+
+%macro YUV411PLANAR_TO_RGB_PROLOG 0
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		mov			eax, [esp+4+16]
+		mov			ecx, [esp+8+16]
+		mov			edx, [esp+12+16]
+		mov			ebx, [esp+16+16]
+		mov			ebp, [esp+20+16]
+
+		pxor		mm7, mm7
+%endmacro
+
+%macro YUV411PLANAR_TO_RGB_CORE_MMX 0
+		movd		mm0, dword [ecx]		;mm0 = Y3Y2Y1Y0
+		add			ecx, 4
+		punpcklbw	mm0, mm7			;mm0 = Y3 | Y2 | Y1 | Y0
+		movq		mm1, mm0
+		pmullw		mm0, [y_co]
+		paddw		mm1, [y_bias]
+		paddsw		mm0, mm0
+		paddsw		mm0, mm1
+
+		movzx		esi, word [ebx]
+		movzx		edi, word [edx]
+		add			ebx, 1
+		add			edx, 1
+
+		movd		mm1, esi
+		movd		mm2, edi
+
+		punpcklbw	mm1, mm7
+		paddw		mm1, [c_bias]
+		punpcklwd	mm1, mm1
+		movq		mm3, mm1
+		punpckldq	mm1, mm1
+		punpckhdq	mm3, mm3
+
+		punpcklbw	mm2, mm7
+		paddw		mm2, [c_bias]
+		punpcklwd	mm2, mm2
+		movq		mm4, mm2
+		punpckldq	mm2, mm2
+		punpckhdq	mm4, mm4
+
+		psubw		mm3, mm1
+		psubw		mm4, mm2
+		paddw		mm3, mm3
+		paddw		mm4, mm4
+
+		pmulhw		mm3, [interp]
+		pmulhw		mm4, [interp]
+
+		paddw		mm1, mm3
+		paddw		mm2, mm4
+
+		movq		mm3, mm1
+		movq		mm4, mm2
+
+		pmullw		mm1, [cr_co_r]
+		pmullw		mm2, [cb_co_b]
+		pmullw		mm3, [cr_co_g]
+		pmullw		mm4, [cb_co_g]
+
+		paddsw		mm2, mm2
+		paddsw		mm1, mm0
+		paddsw		mm3, mm4
+		paddsw		mm2, mm0
+		paddsw		mm3, mm0
+
+		psraw		mm1, 7
+		psraw		mm2, 7
+		psraw		mm3, 7
+
+		packuswb	mm1, mm1
+		packuswb	mm2, mm2
+		packuswb	mm3, mm3
+%endmacro
+
+%macro YUV411PLANAR_TO_RGB_CORE_ISSE 0
+		movd		mm0, dword [ecx]		;mm0 = Y3Y2Y1Y0
+		add			ecx, 4
+		punpcklbw	mm0, mm7			;mm0 = Y3 | Y2 | Y1 | Y0
+		movq		mm1, mm0
+		pmullw		mm0, [y_co]
+		paddw		mm1, [y_bias]
+		paddsw		mm0, mm0
+		paddsw		mm0, mm1
+
+		movzx		esi, word [ebx]
+		movzx		edi, word [edx]
+		add			ebx, 1
+		add			edx, 1
+
+		movd		mm1, esi
+		movd		mm2, edi
+
+		punpcklbw	mm1, mm7
+		paddw		mm1, [c_bias]
+		pshufw		mm3, mm1, 01010101b
+		pshufw		mm1, mm1, 00000000b
+
+		punpcklbw	mm2, mm7
+		paddw		mm2, [c_bias]
+		pshufw		mm4, mm2, 01010101b
+		pshufw		mm2, mm2, 00000000b
+
+		psubw		mm3, mm1
+		psubw		mm4, mm2
+		paddw		mm3, mm3
+		paddw		mm4, mm4
+
+		pmulhw		mm3, [interp]
+		pmulhw		mm4, [interp]
+
+		paddw		mm1, mm3
+		paddw		mm2, mm4
+
+		psllw		mm1, 3
+		psllw		mm2, 3
+
+		movq		mm3, [cr_co_g]
+		movq		mm4, [cb_co_g]
+
+		pmullw		mm3, mm1
+		pmullw		mm4, mm2
+		pmullw		mm1, [cr_co_r]
+		pmullw		mm2, [cb_co_b]
+
+		paddsw		mm2, mm2
+		paddsw		mm1, mm0
+		paddsw		mm3, mm4
+		paddsw		mm2, mm0
+		paddsw		mm3, mm0
+
+		psraw		mm1, 7
+		psraw		mm2, 7
+		psraw		mm3, 7
+
+		packuswb	mm1, mm1
+		packuswb	mm2, mm2
+		packuswb	mm3, mm3
+%endmacro
+
+%macro YUV411PLANAR_TO_RGB_EPILOG 0
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+%endmacro
+
+	global	_vdasm_pixblt_YUV411Planar_to_XRGB1555_scan_MMX
+_vdasm_pixblt_YUV411Planar_to_XRGB1555_scan_MMX:
+		YUV411PLANAR_TO_RGB_PROLOG
+.xloop:
+		YUV411PLANAR_TO_RGB_CORE_MMX
+
+		psrlw		mm1, 1
+		psrlw		mm2, 3
+		punpcklbw	mm2, mm1
+		punpcklbw	mm3, mm3
+		psllw		mm3, 2
+		pand		mm2, [rb_mask_555]
+		pand		mm3, [g_mask_555]
+		por			mm2, mm3
+
+		movq		[eax], mm2
+		add			eax, 8
+
+		sub			ebp, 1
+		jne			.xloop
+
+		YUV411PLANAR_TO_RGB_EPILOG
+
+;============================================================================
+
+	global	_vdasm_pixblt_YUV411Planar_to_RGB565_scan_MMX
+_vdasm_pixblt_YUV411Planar_to_RGB565_scan_MMX:
+		YUV411PLANAR_TO_RGB_PROLOG
+.xloop:
+		YUV411PLANAR_TO_RGB_CORE_MMX
+
+		psrlw		mm2, 3
+		punpcklbw	mm2, mm1
+		punpcklbw	mm3, mm3
+		psllw		mm3, 3
+		pand		mm2, [rb_mask_565]
+		pand		mm3, [g_mask_565]
+		por			mm2, mm3
+
+		movq		[eax], mm2
+		add			eax, 8
+
+		sub			ebp, 1
+		jne			.xloop
+
+		YUV411PLANAR_TO_RGB_EPILOG
+
+;============================================================================
+
+	global	_vdasm_pixblt_YUV411Planar_to_XRGB8888_scan_MMX
+_vdasm_pixblt_YUV411Planar_to_XRGB8888_scan_MMX:
+		YUV411PLANAR_TO_RGB_PROLOG
+.xloop:
+		YUV411PLANAR_TO_RGB_PROLOG
+
+		punpcklbw	mm2, mm1
+		punpcklbw	mm3, mm3
+		movq		mm1, mm2
+		punpcklbw	mm1, mm3
+		punpckhbw	mm2, mm3
+
+		movq		[eax], mm1
+		movq		[eax+8], mm2
+		add			eax, 16
+
+		sub			ebp, 1
+		jne			.xloop
+
+		YUV411PLANAR_TO_RGB_EPILOG
+
+;============================================================================
+
+	global	_vdasm_pixblt_YUV411Planar_to_XRGB1555_scan_ISSE
+_vdasm_pixblt_YUV411Planar_to_XRGB1555_scan_ISSE:
+		YUV411PLANAR_TO_RGB_PROLOG
+.xloop:
+		YUV411PLANAR_TO_RGB_CORE_ISSE
+
+		psrlw		mm1, 1
+		psrlw		mm2, 3
+		punpcklbw	mm2, mm1
+		punpcklbw	mm3, mm3
+		psllw		mm3, 2
+		pand		mm2, [rb_mask_555]
+		pand		mm3, [g_mask_555]
+		por			mm2, mm3
+
+		movq		[eax], mm2
+		add			eax, 8
+
+		sub			ebp, 1
+		jne			.xloop
+
+		YUV411PLANAR_TO_RGB_EPILOG
+
+;============================================================================
+
+	global	_vdasm_pixblt_YUV411Planar_to_RGB565_scan_ISSE
+_vdasm_pixblt_YUV411Planar_to_RGB565_scan_ISSE:
+		YUV411PLANAR_TO_RGB_PROLOG
+.xloop:
+		YUV411PLANAR_TO_RGB_CORE_ISSE
+
+		psrlw		mm2, 3
+		punpcklbw	mm2, mm1
+		punpcklbw	mm3, mm3
+		psllw		mm3, 3
+		pand		mm2, [rb_mask_565]
+		pand		mm3, [g_mask_565]
+		por			mm2, mm3
+
+		movq		[eax], mm2
+		add			eax, 8
+
+		sub			ebp, 1
+		jne			.xloop
+
+		YUV411PLANAR_TO_RGB_EPILOG
+
+;============================================================================
+
+	global	_vdasm_pixblt_YUV411Planar_to_XRGB8888_scan_ISSE
+_vdasm_pixblt_YUV411Planar_to_XRGB8888_scan_ISSE:
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		mov			eax, [esp+4+16]
+		mov			ecx, [esp+8+16]
+		mov			edx, [esp+12+16]
+		mov			ebx, [esp+16+16]
+		mov			ebp, [esp+20+16]
+
+		pxor		mm7, mm7
+
+		movzx		esi, byte [ebx]
+		movzx		edi, byte [edx]
+		add			ebx, 1
+		add			edx, 1
+
+		movd		mm1, esi
+		movd		mm2, edi
+
+		psllw		mm1, 3
+		psllw		mm2, 3
+
+		pshufw		mm5, mm1, 0
+		pshufw		mm6, mm2, 0
+
+		pmulhw		mm5, [cr_coeff]
+		pmulhw		mm6, [cb_coeff]
+		paddw		mm6, mm5
+		paddw		mm6, [rgb_bias]
+
+.xloop:
+		movd		mm0, dword [ecx];mm0 = Y3Y2Y1Y0
+		add			ecx, 4
+		punpcklbw	mm0, mm7			;mm0 = Y3 | Y2 | Y1 | Y0
+		psllw		mm0, 3
+		pmulhw		mm0, [y_co]
+		pxor		mm0, [msb_inv]
+
+		movzx		esi, byte [ebx]
+		movzx		edi, byte [edx]
+		add			ebx, 1
+		add			edx, 1
+
+		movd		mm1, esi
+		movd		mm2, edi
+
+		psllw		mm1, 3
+		psllw		mm2, 3
+
+		pshufw		mm1, mm1, 0
+		pshufw		mm2, mm2, 0
+
+		pmulhw		mm1, [cr_coeff]
+		pmulhw		mm2, [cb_coeff]
+		paddw		mm1, mm2
+		paddw		mm1, [rgb_bias]
+
+		movq		mm2, mm1
+		pavgw		mm2, mm6			;mm2 = 1/2
+		pshufw		mm3, mm0, 00000000b
+		paddw		mm3, mm6
+		pavgw		mm6, mm2			;mm1 = 1/4
+		pshufw		mm4, mm0, 01010101b
+		paddw		mm4, mm6
+		packuswb	mm3, mm4
+		movq		[eax], mm3
+
+		pshufw		mm3, mm0, 10101010b
+		paddw		mm3, mm2
+		pshufw		mm0, mm0, 11111111b
+		pavgw		mm2, mm1			;mm2 = 3/4
+		paddw		mm2, mm0
+		packuswb	mm3, mm2
+		movq		[eax+8], mm3
+
+		movq		mm6, mm1
+
+		add			eax, 16
+
+		sub			ebp, 1
+		jne			.xloop
+
+		YUV411PLANAR_TO_RGB_EPILOG
+
+;==========================================================================
+
+%macro YUV444PLANAR_TO_RGB_PROLOG 0
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		mov			eax, [esp+4+16]
+		mov			ecx, [esp+8+16]
+		mov			edx, [esp+12+16]
+		mov			ebx, [esp+16+16]
+		mov			ebp, [esp+20+16]
+%endmacro
+
+%macro YUV444PLANAR_TO_RGB_CORE 0
+		movq		mm3, mm0
+		pmullw		mm0, [y_co]
+		paddw		mm1, [c_bias]
+		paddw		mm2, [c_bias]
+		paddw		mm0, [y_bias]
+		paddsw		mm0, mm0
+		paddsw		mm0, mm3
+
+		movq		mm3, [cr_co_g]
+		movq		mm4, [cb_co_g]
+
+		pmullw		mm3, mm1
+		pmullw		mm4, mm2
+		pmullw		mm1, [cr_co_r]
+		pmullw		mm2, [cb_co_b]
+
+		paddsw		mm2, mm2
+		paddsw		mm1, mm0
+		paddsw		mm3, mm4
+		paddsw		mm2, mm0
+		paddsw		mm3, mm0
+
+		psraw		mm1, 7
+		psraw		mm2, 7
+		psraw		mm3, 7
+
+		packuswb	mm1, mm1
+		packuswb	mm2, mm2
+		packuswb	mm3, mm3
+%endmacro
+
+%macro YUV444PLANAR_TO_RGB_EPILOG 0
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+%endmacro
+
+;==========================================================================
+
+	global	_vdasm_pixblt_YUV444Planar_to_XRGB1555_scan_MMX
+_vdasm_pixblt_YUV444Planar_to_XRGB1555_scan_MMX:
+		YUV444PLANAR_TO_RGB_PROLOG
+
+		pxor		mm7, mm7
+		movq		mm5, [rb_mask_555]
+		movq		mm6, [g_mask_555]
+
+		sub			ebp, 3
+		jbe			.oddcheck
+.xloop4:
+		movd		mm0, dword [ecx];mm0 = Y3Y2Y1Y0
+		movd		mm1, dword [ebx]
+		movd		mm2, dword [edx]
+		add			ecx, 4
+		add			ebx, 4
+		add			edx, 4
+		punpcklbw	mm0, mm7			;mm0 = Y3 | Y2 | Y1 | Y0
+		punpcklbw	mm1, mm7
+		punpcklbw	mm2, mm7
+
+		YUV444PLANAR_TO_RGB_CORE
+
+		psrlw		mm1, 1
+		psrlw		mm2, 3
+		punpcklbw	mm2, mm1
+		punpcklbw	mm3, mm3
+		psllw		mm3, 2
+		pand		mm2, mm5
+		pand		mm3, mm6
+		por			mm2, mm3
+
+		movq		[eax], mm2
+		add			eax, 8
+
+		sub			ebp, 4
+		ja			.xloop4
+.oddcheck:
+		add			ebp, 3
+		jz			.noodd
+.xloop:
+		movzx		edi, byte [ecx]			;mm0 = Y3Y2Y1Y0
+		movd		mm0, edi
+		movzx		edi, byte [ebx]
+		movd		mm1, edi
+		movzx		edi, byte [edx]
+		movd		mm2, edi
+		add			ecx, 1
+		add			ebx, 1
+		add			edx, 1
+
+		YUV444PLANAR_TO_RGB_CORE
+
+		psrlw		mm1, 1
+		psrlw		mm2, 3
+		punpcklbw	mm2, mm1
+		punpcklbw	mm3, mm3
+		psllw		mm3, 2
+		pand		mm2, mm5
+		pand		mm3, mm6
+		por			mm2, mm3
+
+		movd		edi, mm2
+		mov			[eax], di
+		add			eax, 2
+
+		sub			ebp, 1
+		jnz			.xloop
+.noodd:
+		YUV444PLANAR_TO_RGB_EPILOG
+
+;==========================================================================
+
+	global	_vdasm_pixblt_YUV444Planar_to_RGB565_scan_MMX
+_vdasm_pixblt_YUV444Planar_to_RGB565_scan_MMX:
+		YUV444PLANAR_TO_RGB_PROLOG
+
+		pxor		mm7, mm7
+		movq		mm5, [rb_mask_565]
+		movq		mm6, [g_mask_565]
+
+		sub			ebp, 3
+		jbe			.oddcheck
+.xloop4:
+		movd		mm0, dword [ecx];mm0 = Y3Y2Y1Y0
+		movd		mm1, dword [ebx]
+		movd		mm2, dword [edx]
+		add			ecx, 4
+		add			ebx, 4
+		add			edx, 4
+		punpcklbw	mm0, mm7			;mm0 = Y3 | Y2 | Y1 | Y0
+		punpcklbw	mm1, mm7
+		punpcklbw	mm2, mm7
+
+		YUV444PLANAR_TO_RGB_CORE
+
+		psrlw		mm2, 3
+		punpcklbw	mm2, mm1
+		punpcklbw	mm3, mm3
+		psllw		mm3, 3
+		pand		mm2, mm5
+		pand		mm3, mm6
+		por			mm2, mm3
+
+		movq		[eax], mm2
+		add			eax, 8
+
+		sub			ebp, 4
+		ja			.xloop4
+.oddcheck:
+		add			ebp, 3
+		jz			.noodd
+.xloop:
+		movzx		edi, byte [ecx]			;mm0 = Y3Y2Y1Y0
+		movd		mm0, edi
+		movzx		edi, byte [ebx]
+		movd		mm1, edi
+		movzx		edi, byte [edx]
+		movd		mm2, edi
+		add			ecx, 1
+		add			ebx, 1
+		add			edx, 1
+
+		YUV444PLANAR_TO_RGB_CORE
+
+		psrlw		mm2, 3
+		punpcklbw	mm2, mm1
+		punpcklbw	mm3, mm3
+		psllw		mm3, 3
+		pand		mm2, mm5
+		pand		mm3, mm6
+		por			mm2, mm3
+
+		movd		edi, mm2
+		mov			[eax], di
+		add			eax, 2
+
+		sub			ebp, 1
+		jnz			.xloop
+.noodd:
+		YUV444PLANAR_TO_RGB_EPILOG
+
+;==========================================================================
+
+	global	_vdasm_pixblt_YUV444Planar_to_XRGB8888_scan_MMX
+_vdasm_pixblt_YUV444Planar_to_XRGB8888_scan_MMX:
+		YUV444PLANAR_TO_RGB_PROLOG
+
+		pxor		mm7, mm7
+
+		sub			ebp, 3
+		jbe			.oddcheck
+.xloop4:
+		movd		mm0, dword [ecx];mm0 = Y3Y2Y1Y0
+		movd		mm1, dword [ebx]
+		movd		mm2, dword [edx]
+		add			ecx, 4
+		add			ebx, 4
+		add			edx, 4
+		punpcklbw	mm0, mm7			;mm0 = Y3 | Y2 | Y1 | Y0
+		punpcklbw	mm1, mm7
+		punpcklbw	mm2, mm7
+
+		YUV444PLANAR_TO_RGB_CORE
+
+		punpcklbw	mm2, mm1
+		punpcklbw	mm3, mm3
+		movq		mm1, mm2
+		punpcklbw	mm1, mm3
+		punpckhbw	mm2, mm3
+
+		movq		[eax], mm1
+		movq		[eax+8], mm2
+		add			eax, 16
+
+		sub			ebp, 4
+		ja			.xloop4
+.oddcheck:
+		add			ebp, 3
+		jz			.noodd
+.xloop:
+		movzx		edi, byte [ecx]			;mm0 = Y3Y2Y1Y0
+		movd		mm0, edi
+		movzx		edi, byte [ebx]
+		movd		mm1, edi
+		movzx		edi, byte [edx]
+		movd		mm2, edi
+		add			ecx, 1
+		add			ebx, 1
+		add			edx, 1
+		punpcklbw	mm0, mm7			;mm0 = Y3 | Y2 | Y1 | Y0
+
+		YUV444PLANAR_TO_RGB_CORE
+
+		punpcklbw	mm2, mm1
+		punpcklbw	mm3, mm3
+		punpcklbw	mm2, mm3
+
+		movd		dword [eax], mm2
+		add			eax, 4
+
+		sub			ebp, 1
+		jnz			.xloop
+.noodd:
+		YUV444PLANAR_TO_RGB_EPILOG
+
+		end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb_mmx.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb_mmx.asm
new file mode 100644
index 000000000..aa0b99987
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb_mmx.asm
@@ -0,0 +1,806 @@
+		section	.rdata, rdata
+
+x07b		dq		00707070707070707h
+x0200w		dq		00200020002000200h
+x001fw		dq		0001f001f001f001fh
+xffc0w		dq		0ffc0ffc0ffc0ffc0h
+xffe0w		dq		0ffe0ffe0ffe0ffe0h
+x2080w		dq		02080208020802080h
+x4200w		dq		04200420042004200h
+rb_mask5	dq		000f800f800f800f8h
+g_mask5		dq		00000f8000000f800h
+g_mask6		dq		00000fc000000fc00h
+rb_mul_565	dq		02000000420000004h
+rb_mul_555	dq		02000000820000008h
+r_mask_555	dq		07c007c007c007c00h
+g_mask_555	dq		003e003e003e003e0h
+b_mask_555	dq		0001f001f001f001fh
+r_mask_565	dq		0f800f800f800f800h
+g_mask_565	dq		007e007e007e007e0h
+b_mask_565	dq		0001f001f001f001fh
+
+%macro prologue 1
+			push	ebx
+			push	esi
+			push	edi
+			push	ebp
+			;.fpo	(0,%1,4,4,1,0)
+%endmacro
+
+%macro epilogue 0
+			pop		ebp
+			pop		edi
+			pop		esi
+			pop		ebx
+%endmacro
+
+		section	.text
+
+	global	_vdasm_pixblt_RGB565_to_XRGB1555_MMX
+_vdasm_pixblt_RGB565_to_XRGB1555_MMX:
+		prologue	6
+
+		mov		ebp, [esp+20+16]
+		mov		edi, [esp+24+16]
+		add		ebp, ebp
+		mov		edx, [esp+4+16]
+		mov		ecx, [esp+12+16]
+		lea		edx, [edx+ebp-6]
+		lea		ecx, [ecx+ebp-6]
+		neg		ebp
+		mov		[esp+20+16], ebp
+
+		movq	mm5, [x001fw]
+		movq	mm4, [xffc0w]
+
+.yloop:
+		mov		ebp, [esp+20+16]
+		add		ebp, 6
+		jbe		.odd
+
+.xloop:
+		movq	mm0, [ecx+ebp]
+		movq	mm1, mm5
+		pand	mm1, mm0
+		pand	mm0, mm4
+		psrlq	mm0, 1
+		paddw	mm0, mm1
+		movq	[edx+ebp], mm0
+		add		ebp, 8
+		jnc		.xloop
+
+		sub		ebp, 6
+		jz		.noodd
+.odd:
+		movzx	eax, word [ecx+ebp+6]
+		mov		ebx, 0001f001fh
+		and		ebx, eax
+		and		eax, 0ffc0ffc0h
+		shr		eax, 1
+		add		eax, ebx
+		mov		[edx+ebp+6], ax
+		add		ebp, 2
+		jnz		.odd
+.noodd:
+		add		ecx, [esp+16+16]
+		add		edx, [esp+8+16]
+		dec		edi
+		jne		.yloop
+
+		emms
+		epilogue
+		ret
+
+	global	_vdasm_pixblt_XRGB8888_to_XRGB1555_MMX
+_vdasm_pixblt_XRGB8888_to_XRGB1555_MMX:
+		prologue	6
+
+		mov		ebp, [esp+20+16]
+		mov		edi, [esp+24+16]
+		add		ebp, ebp
+		mov		edx, [esp+4+16]
+		mov		ecx, [esp+12+16]
+		lea		edx, [edx+ebp-14]
+		lea		ecx, [ecx+ebp*2-28]
+		neg		ebp
+		mov		[esp+20+16], ebp
+
+		movq		mm5,[rb_mul_555]
+		movq		mm6,[rb_mask5]
+		movq		mm7,[g_mask5]
+
+.yloop:
+		mov			ebp, [esp+20+16]
+		add			ebp, 14
+		jbe			.odd
+
+		;This code uses the "pmaddwd" trick for 32->16 conversions from Intel's MMX
+		;Application Notes.
+
+		movq		mm0,[ecx+ebp*2]		;allocate 0	(0123)
+		movq		mm2,mm0				;allocate 2	(0 23)
+
+		movq		mm1,[ecx+ebp*2+8]	;allocate 1	(0123)
+		movq		mm3,mm1				;allocate 3	(0123)
+		pand		mm0,mm6
+		pmaddwd		mm0,mm5
+		pand		mm1,mm6
+		pmaddwd		mm1,mm5
+		pand		mm2,mm7
+		pand		mm3,mm7
+		jmp			.xloopstart
+
+		align 16
+.xloop:
+		movq		mm0,[ecx+ebp*2]		;allocate 0	(01234)
+		por			mm4,mm2				;free 2		(01 34)
+
+		por			mm3,mm1				;free 3		(01 34)
+		movq		mm2,mm0				;allocate 2	(0 234)
+
+		movq		mm1,[ecx+ebp*2+8]	;allocate 1	(01234)
+		psrld		mm4,6
+
+		psrld		mm3,6
+		pand		mm0,mm6
+
+		packssdw	mm4,mm3				;free 3		(012 4)
+		movq		mm3,mm1				;allocate 3	(01234)
+
+		pmaddwd		mm0,mm5
+		pand		mm1,mm6
+
+		pmaddwd		mm1,mm5
+		pand		mm2,mm7
+
+		movq		[edx+ebp-8],mm4		;free 4		(0123 )
+		pand		mm3,mm7
+
+.xloopstart:
+		movq		mm4,[ecx+ebp*2+16]	;allocate 4	(01234)
+		por			mm0,mm2				;free 2		(01 34)
+
+		por			mm1,mm3				;free 3		(01  4)
+		psrld		mm0,6
+
+		movq		mm3,[ecx+ebp*2+24]	;allocate 3	(01 34)
+		movq		mm2,mm4				;allocate 2	(01234)
+
+		psrld		mm1,6
+		pand		mm4,mm6
+
+		packssdw	mm0,mm1				;free 1		(0 234)
+		movq		mm1,mm3				;allocate 1	(01234)
+
+		movq		[edx+ebp],mm0		;free 0		( 1234)
+		pand		mm3,mm6
+
+		pmaddwd		mm4,mm5
+		add			ebp,16
+
+		pmaddwd		mm3,mm5
+		pand		mm2,mm7
+
+		pand		mm1,mm7
+		jnc			.xloop
+
+		por			mm4,mm2				;free 2		(01 34)
+		por			mm3,mm1				;free 3		(01 34)
+		psrld		mm4,6
+		psrld		mm3,6
+		packssdw	mm4,mm3				;free 3		(012 4)
+		movq		[edx+ebp-8],mm4		;free 4		(0123 )
+
+.odd:
+		sub			ebp, 14
+		jz			.noodd
+.oddloop:
+		mov			eax, [ecx+ebp*2+28]
+		mov			ebx, 00f80000h
+		mov			esi, eax
+		and			ebx, eax
+		shr			ebx, 9
+		and			esi, 0000f800h
+		shr			esi, 6
+		and			eax, 000000f8h
+		shr			eax, 3
+		add			esi, ebx
+		add			eax, esi
+		mov			[edx+ebp+14], ax
+		add			ebp, 2
+		jnz			.oddloop
+.noodd:
+		add		ecx, [esp+16+16]
+		add		edx, [esp+8+16]
+		dec		edi
+		jne		.yloop
+
+		emms
+		epilogue
+		ret
+
+	global	_vdasm_pixblt_XRGB1555_to_RGB565_MMX
+_vdasm_pixblt_XRGB1555_to_RGB565_MMX:
+		prologue	6
+
+		mov		ebp, [esp+20+16]
+		mov		edi, [esp+24+16]
+		add		ebp, ebp
+		mov		edx, [esp+4+16]
+		mov		ecx, [esp+12+16]
+		lea		edx, [edx+ebp-6]
+		lea		ecx, [ecx+ebp-6]
+		neg		ebp
+		mov		[esp+20+16], ebp
+
+		movq	mm5, [x0200w]
+		movq	mm4, [xffe0w]
+
+.yloop:
+		mov		ebp, [esp+20+16]
+		add		ebp, 6
+		jbe		.odd
+
+.xloop:
+		movq	mm0, [ecx+ebp]
+		movq	mm1, mm4
+		movq	mm2, mm0
+		pand	mm1, mm0
+		pand	mm0, mm5
+		paddw	mm1, mm2
+		psrlq	mm0, 4
+		paddw	mm0, mm1
+		movq	[edx+ebp], mm0
+		add		ebp, 8
+		jnc		.xloop
+
+.odd:
+		sub		ebp, 6
+		jz		.noodd
+.oddloop:
+		movzx	eax, word [ecx+ebp+6]
+		mov		ebx, 02000200h
+		mov		esi, eax
+		and		ebx, eax
+		shr		ebx, 4
+		and		esi, 0ffe0ffe0h
+		add		eax, esi
+		add		eax, ebx
+		mov		[edx+ebp+6], ax
+		add		ebp, 2
+		jnz		.oddloop
+.noodd:
+		add		ecx, [esp+16+16]
+		add		edx, [esp+8+16]
+		dec		edi
+		jne		.yloop
+
+		emms
+		epilogue
+		ret
+
+
+	global	_vdasm_pixblt_XRGB8888_to_RGB565_MMX
+_vdasm_pixblt_XRGB8888_to_RGB565_MMX:
+		prologue	6
+
+		mov		ebp, [esp+20+16]
+		mov		edi, [esp+24+16]
+		add		ebp, ebp
+		mov		edx, [esp+4+16]
+		mov		ecx, [esp+12+16]
+		lea		edx, [edx+ebp-14]
+		lea		ecx, [ecx+ebp*2-28]
+		neg		ebp
+		mov		[esp+20+16], ebp
+
+		movq		mm5,[rb_mul_565]
+		movq		mm6,[rb_mask5]
+		movq		mm7,[g_mask6]
+
+.yloop:
+		mov			ebp, [esp+20+16]
+		add			ebp, 14
+		jbe			.odd
+
+		;This code uses the "pmaddwd" trick for 32->16 conversions from Intel's MMX
+		;Application Notes.
+
+		movq		mm0,[ecx+ebp*2]		;allocate 0	(0123)
+		movq		mm2,mm0				;allocate 2	(0 23)
+
+		movq		mm1,[ecx+ebp*2+8]	;allocate 1	(0123)
+		movq		mm3,mm1				;allocate 3	(0123)
+		pand		mm0,mm6
+		pmaddwd		mm0,mm5
+		pand		mm1,mm6
+		pmaddwd		mm1,mm5
+		pand		mm2,mm7
+		pand		mm3,mm7
+		jmp			.xloopstart
+
+		align 16
+.xloop:
+		movq		mm0,[ecx+ebp*2]		;allocate 0	(01234)
+		por			mm4,mm2				;free 2		(01 34)
+
+		por			mm3,mm1				;free 3		(01 34)
+		pslld		mm4,16-5
+
+		pslld		mm3,16-5
+		movq		mm2,mm0				;allocate 2	(0 234)
+
+		movq		mm1,[ecx+ebp*2+8]	;allocate 1	(01234)
+		psrad		mm4,16
+
+		psrad		mm3,16
+		pand		mm0,mm6
+
+		packssdw	mm4,mm3				;free 3		(012 4)
+		movq		mm3,mm1				;allocate 3	(01234)
+
+		pmaddwd		mm0,mm5
+		pand		mm1,mm6
+
+		pmaddwd		mm1,mm5
+		pand		mm2,mm7
+
+		movq		[edx+ebp-8],mm4		;free 4		(0123 )
+		pand		mm3,mm7
+
+.xloopstart:
+		movq		mm4,[ecx+ebp*2+16]	;allocate 4	(01234)
+		por			mm0,mm2				;free 2		(01 34)
+
+		por			mm1,mm3				;free 3		(01  4)
+		pslld		mm0,16-5
+
+		movq		mm3,[ecx+ebp*2+24]	;allocate 3	(01 34)
+		pslld		mm1,16-5
+
+		psrad		mm0,16
+		movq		mm2,mm4				;allocate 2	(01234)
+
+		psrad		mm1,16
+		pand		mm4,mm6
+
+		packssdw	mm0,mm1				;free 1		(0 234)
+		movq		mm1,mm3				;allocate 1	(01234)
+
+		movq		[edx+ebp],mm0		;free 0		( 1234)
+		pand		mm3,mm6
+
+		pmaddwd		mm4,mm5
+		add			ebp,16
+
+		pmaddwd		mm3,mm5
+		pand		mm2,mm7
+
+		pand		mm1,mm7
+		jnc			.xloop
+
+		por			mm4,mm2				;free 2		(01 34)
+		por			mm3,mm1				;free 3		(01 34)
+		psllq		mm4,16-5
+		psllq		mm3,16-5
+		psrad		mm4,16
+		psrad		mm3,16
+		packssdw	mm4,mm3				;free 3		(012 4)
+		movq		[edx+ebp-8],mm4		;free 4		(0123 )
+
+.odd:
+		sub			ebp, 14
+		jz			.noodd
+.oddloop:
+		mov			eax, [ecx+ebp*2+28]
+		mov			ebx, 00f80000h
+		mov			esi, eax
+		and			ebx, eax
+		and			eax, 000000f8h
+		shr			eax, 3
+		and			esi, 0000fc00h
+		shr			ebx, 8
+		shr			esi, 5
+		add			eax, ebx
+		add			eax, esi
+		mov			[edx+ebp+14], ax
+		add			ebp, 2
+		jnz			.oddloop
+.noodd:
+		add		ecx, [esp+16+16]
+		add		edx, [esp+8+16]
+		dec		edi
+		jne		.yloop
+
+		emms
+		epilogue
+		ret
+
+	global	_vdasm_pixblt_XRGB8888_to_RGB888_MMX
+_vdasm_pixblt_XRGB8888_to_RGB888_MMX:
+		prologue	6
+
+		mov			esi,[esp+12+16]
+		mov			edi,[esp+4+16]
+
+		mov			ecx,[esp+20+16]
+		lea			eax,[ecx+ecx*2]
+		lea			ebx,[ecx*4]
+		sub			[esp+8+16],eax
+		sub			[esp+16+16],ebx
+		
+		pcmpeqb		mm7,mm7
+		psrld		mm7,8
+		movq		mm6,mm7
+		psllq		mm7,32			;mm7 = high rgb mask
+		psrlq		mm6,32			;mm6 = low rgb mask
+		
+		mov			ebp,[esp+20+16]
+		mov			edx,[esp+24+16]
+		mov			eax,[esp+16+16]
+		mov			ebx,[esp+ 8+16]
+.yloop:
+		mov			ecx,ebp
+		shr			ecx,3
+		jz			.checkodd
+.xloop:
+		movq		mm0,[esi]		;mm0 = a1r1g1b1a0r0g0b0
+		movq		mm1,mm6
+		
+		movq		mm2,[esi+8]		;mm2 = a3r3g3b3a2r2g2b2
+		pand		mm1,mm0			;mm1 = ----------r0g0b0
+		
+		movq		mm3,mm6
+		pand		mm0,mm7			;mm0 = --r1g1b1--------
+		
+		movq		mm4,mm2
+		pand		mm3,mm2			;mm3 = ----------r2g2b2
+		
+		psrlq		mm0,8			;mm0 = ----r1g1b1------
+		pand		mm2,mm7			;mm2 = --r3g3b3--------
+		
+		movq		mm5,[esi+16]	;mm5 = a5r5g5b5a4r4g4b4
+		psllq		mm4,48			;mm4 = g2b2------------
+		
+		por			mm0,mm1			;mm0 = ----r1g1b1r0g0b0
+		psrlq		mm3,16			;mm3 = --------------r2
+		
+		por			mm0,mm4			;mm0 = g2b2r1g1b1r0g0b0
+		movq		mm1,mm6
+		
+		pand		mm1,mm5			;mm1 = ----------r4g4b4
+		psrlq		mm2,24			;mm2 = --------r3g3b3--
+		
+		movq		[edi],mm0
+		pand		mm5,mm7			;mm5 = --r5g5b5--------
+		
+		psllq		mm1,32			;mm1 = --r4g4b4--------
+		movq		mm4,mm5			;mm4 = --r5g5b5--------
+		
+		por			mm2,mm3			;mm2 = --------r3g3b3r2
+		psllq		mm5,24			;mm5 = b5--------------
+		
+		movq		mm3,[esi+24]	;mm3 = a7r7g7b7a6r6g6b6
+		por			mm2,mm1			;mm2 = --r4g4b4r3g3b3r2
+		
+		movq		mm1,mm6
+		por			mm2,mm5			;mm2 = b5r4g4b4r3g3b3r2
+		
+		psrlq		mm4,40			;mm4 = ------------r5g5
+		pand		mm1,mm3			;mm1 = ----------r6g6b6
+		
+		psllq		mm1,16			;mm1 = ------r6g6b6----	
+		pand		mm3,mm7			;mm3 = --r7g7b7--------
+		
+		por			mm4,mm1			;mm4 = ------r6g6b6r5g5
+		psllq		mm3,8			;mm3 = r7g7b7----------
+		
+		movq		[edi+8],mm2
+		por			mm4,mm3			;mm4 = r7g7b7r6g6b6r5g5
+		
+		add			esi,32
+		sub			ecx,1
+		
+		movq		[edi+16],mm4	;mm3
+
+		lea			edi,[edi+24]
+		jne			.xloop
+	
+.checkodd:
+		mov			ecx,ebp
+		and			ecx,7
+		jz			.noodd
+		movd		mm0,eax
+.oddloop:
+		mov			eax,[esi]
+		add			esi,4
+		mov			[edi],ax
+		shr			eax,16
+		mov			[edi+2],al
+		add			edi,3
+		sub			ecx,1
+		jnz			.oddloop
+		movd		eax,mm0
+.noodd:
+		add			esi,eax
+		add			edi,ebx
+
+		sub			edx,1
+		jne			.yloop
+
+		emms
+
+		epilogue
+		ret
+
+	global	_vdasm_pixblt_XRGB1555_to_XRGB8888_MMX
+_vdasm_pixblt_XRGB1555_to_XRGB8888_MMX:
+		prologue	6
+
+		mov		ebp, [esp+20+16]
+		mov		edi, [esp+24+16]
+		add		ebp, ebp
+		mov		edx, [esp+4+16]
+		mov		ecx, [esp+12+16]
+		lea		edx, [edx+ebp*2-12]
+		lea		ecx, [ecx+ebp-6]
+		neg		ebp
+		mov		[esp+20+16], ebp
+
+		movq	mm5, [r_mask_555]
+		movq	mm6, [g_mask_555]
+		movq	mm7, [b_mask_555]
+
+.yloop:
+		mov		ebp, [esp+20+16]
+		add		ebp, 6
+		jbe		.odd
+
+.xloop:
+		movq		mm0, [ecx+ebp]
+		movq		mm1, mm6
+		movq		mm2, mm7
+		pand		mm1, mm0
+		pand		mm2, mm0
+		pand		mm0, mm5
+
+		paddw		mm0, mm0
+		pmulhw		mm1, [x4200w]
+		psllq		mm2, 3
+		paddw		mm0, mm2
+		movq		mm2, mm0
+		psrlw		mm0, 5
+		pand		mm0, [x07b]
+		paddw		mm0, mm2
+		movq		mm2, mm0
+		punpcklbw	mm0, mm1
+		punpckhbw	mm2, mm1
+
+		movq	[edx+ebp*2], mm0
+		movq	[edx+ebp*2+8], mm2
+		add		ebp, 8
+		jnc		.xloop
+.odd:
+		sub		ebp, 6
+		jz		.noodd
+.oddloop:
+		movzx	eax, word [ecx+ebp+6]
+		mov		ebx, 03e0h
+		mov		esi, 001fh
+		and		ebx, eax
+		and		esi, eax
+		and		eax, 07c00h
+		shl		esi, 3
+		shl		ebx, 6
+		shl		eax, 9
+		add		ebx, esi
+		add		eax, ebx
+		mov		ebx, eax
+		shr		eax, 5
+		and		eax, 070707h
+		add		eax, ebx
+		mov		[edx+ebp*2+12], eax
+		add		ebp, 2
+		jnz		.oddloop
+.noodd:
+		add		ecx, [esp+16+16]
+		add		edx, [esp+8+16]
+		dec		edi
+		jne		.yloop
+
+		emms
+		epilogue
+		ret
+
+
+	global	_vdasm_pixblt_RGB565_to_XRGB8888_MMX
+_vdasm_pixblt_RGB565_to_XRGB8888_MMX:
+		prologue	6
+
+		mov		ebp, [esp+20+16]
+		mov		edi, [esp+24+16]
+		add		ebp, ebp
+		mov		edx, [esp+4+16]
+		mov		ecx, [esp+12+16]
+		lea		edx, [edx+ebp*2-12]
+		lea		ecx, [ecx+ebp-6]
+		neg		ebp
+		mov		[esp+20+16], ebp
+
+		movq	mm5, [r_mask_565]
+		movq	mm6, [g_mask_565]
+		movq	mm7, [b_mask_565]
+
+.yloop:
+		mov		ebp, [esp+20+16]
+		add		ebp, 6
+		jbe		.odd
+
+.xloop:
+		movq		mm0, [ecx+ebp]
+		movq		mm1, mm6
+		movq		mm2, mm7
+		pand		mm1, mm0
+		pand		mm2, mm0
+		pand		mm0, mm5
+
+		pmulhw		mm1, [x2080w]
+		psllq		mm2, 3
+		paddw		mm0, mm2
+		movq		mm2, mm0
+		psrlw		mm0, 5
+		pand		mm0, [x07b]
+		paddw		mm0, mm2
+		movq		mm2, mm0
+		punpcklbw	mm0, mm1
+		punpckhbw	mm2, mm1
+
+		movq	[edx+ebp*2], mm0
+		movq	[edx+ebp*2+8], mm2
+		add		ebp, 8
+		jnc		.xloop
+
+.odd:
+		sub		ebp, 6
+		jz		.noodd
+		push	edi
+.oddloop:
+		movzx	eax, word [ecx+ebp+6]
+		mov		ebx, 0000f800h
+		and		ebx, eax
+		mov		esi, eax
+		shl		ebx, 8
+		mov		edi, eax
+		shl		eax, 3
+		and		esi, 000007e0h
+		and		eax, 000000f8h
+		add		ebx, eax
+		shl		esi, 5
+		mov		eax, ebx
+		shr		ebx, 5
+		and		edi, 00000600h
+		shr		edi, 1
+		and		ebx, 00070007h
+		add		esi, edi
+		add		eax, ebx
+		add		eax, esi
+		mov		[edx+ebp*2+12], eax
+		add		ebp, 2
+		jnz		.oddloop
+		pop		edi
+.noodd:
+		add		ecx, [esp+16+16]
+		add		edx, [esp+8+16]
+		dec		edi
+		jne		.yloop
+
+		emms
+		epilogue
+		ret
+
+
+	global	_vdasm_pixblt_RGB888_to_XRGB8888_MMX
+_vdasm_pixblt_RGB888_to_XRGB8888_MMX:
+		prologue	6
+
+		mov		esi,[esp+12+16]
+		mov		edi,[esp+4+16]
+
+		mov		ecx,[esp+20+16]
+		lea		eax,[ecx+ecx*2]
+		lea		ebx,[ecx*4]
+		sub		[esp+8+16],ebx
+		sub		[esp+16+16],eax
+
+		mov		edx,[esp+24+16]
+		mov		ebx,[esp+20+16]
+		mov		ecx,[esp+16+16]
+		mov		eax,[esp+ 8+16]
+		
+		;ebx	horizontal count backup
+		;ecx	source modulo
+		;edx	vertical count
+		;esi	source
+		;edi	destination
+		;ebp	horizontal count
+	
+.yloop:
+		mov	ebp,ebx
+		shr	ebp,3
+		jz	.checkodd
+.xloop:
+		movq		mm0,[esi]		;mm0: g2b2r1g1b1r0g0b0
+		movq		mm1,mm0			;
+		
+		psrlq		mm1,24			;mm1: ------g2b2r1g1b1
+		movq		mm2,mm0			;
+		
+		movq		mm3,[esi+8]		;mm3: b5r4g4b4r3g3b3r2
+		punpckldq	mm0,mm1			;mm0: b2r1g1b1b1r0g0b0	[qword 0 ready]
+		
+		movq		mm4,mm3			;mm4: b5r4g4b4r3g3b3r2
+		psllq		mm3,48			;mm3: b3r2------------
+		
+		movq		mm5,mm4			;mm5: b5r4g4b4r3g3b3r2
+		psrlq		mm2,16			;mm2: ----g2b2--------
+		
+		movq		mm1,[esi+16]	;mm1: r7g7b7r6g6b6r5g5
+		por			mm2,mm3			;mm2: b3r2g2b2--------
+		
+		movq		[edi],mm0		;
+		psllq		mm4,24			;mm4: b4r3g3b3r2------
+		
+		movq		mm3,mm5			;mm3: b5r4g4b4r3g3b3r2
+		psrlq		mm5,24			;mm5: ------b5r4g4b4r3
+		
+		movq		mm0,mm1			;mm0: r7g7b7r6g6b6r5g5
+		psllq		mm1,40			;mm1: b6r5g5----------
+		
+		punpckhdq	mm2,mm4			;mm2: b4r3g3b3b3r2g2b2 [qword 1 ready]
+		por			mm1,mm5			;mm1: b6r5g5b5r4g4b4r3
+		
+		movq		mm4,mm0			;mm4: r7g7b7r6g6b6r5g5
+		punpckhdq	mm3,mm1			;mm3: b6r5g5b5b5r4g4b4 [qword 2 ready]
+		
+		movq		[edi+8],mm2
+		psrlq		mm0,16			;mm0: ----r7g7b7r6g6b6
+		
+		movq		[edi+16],mm3
+		psrlq		mm4,40			;mm4: ----------r7g7b7
+		
+		punpckldq	mm0,mm4			;mm0: --r7g7b7b7r6g6b6 [qword 3 ready]
+		add			esi,24
+		
+		movq		[edi+24],mm0
+			
+		add			edi,32
+		sub			ebp,1
+		jne			.xloop
+
+.checkodd:
+		mov			ebp,ebx
+		and			ebp,7
+		jz			.noodd
+		movd		mm7,eax
+.oddloop:
+		mov			ax,[esi]
+		mov			[edi],ax
+		mov			al,[esi+2]
+		mov			[edi+2],al
+		add			esi,3
+		add			edi,4
+		sub			ebp,1
+		jne			.oddloop
+		
+		movd		eax,mm7
+.noodd:
+		add			esi,ecx
+		add			edi,eax
+
+		sub			edx,1
+		jne			.yloop
+		emms
+		epilogue
+		ret
+
+		end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_bltyuv2rgb_sse2.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_bltyuv2rgb_sse2.asm
new file mode 100644
index 000000000..87ff13b56
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_bltyuv2rgb_sse2.asm
@@ -0,0 +1,161 @@
+		section	.rdata, rdata
+		
+		align	16
+
+bytemasks	dd		000000ffh, 0000ffffh, 00ffffffh
+
+		section	.text
+
+;============================================================================
+
+	global	_vdasm_pixblt_XRGB8888_to_YUV444Planar_scan_SSE2
+_vdasm_pixblt_XRGB8888_to_YUV444Planar_scan_SSE2:
+		push		edi
+		push		esi
+		push		ebx
+
+		mov			eax, [esp+4+12]
+		mov			ebx, [esp+8+12]
+		mov			ecx, [esp+12+12]
+		mov			edx, [esp+16+12]
+		mov			esi, [esp+20+12]
+		mov			edi, [esp+24+12]
+
+		pcmpeqb		xmm6, xmm6
+		psrlw		xmm6, 8				;xmm6 = 00FF x 8
+		
+		sub			esi, 4
+		js			.postcheck
+.xloop:
+		movdqu		xmm2, [edx]			;xmm0 = X3R3G3B3X2R2G2B2X1R1G1B1X0R0G0B0
+		add			edx, 16
+		movdqa		xmm5, xmm2
+		pand		xmm2, xmm6			;xmm0 =   R3  B3  R2  B2  R1  B1  R0  B0
+		psrlw		xmm5, 8				;xmm1 =   X3  G3  X2  G2  X1  G1  X0  G0
+		movdqa		xmm0, [edi+0]		;coeff_rb_to_y
+		movdqa		xmm1, [edi+16]		;coeff_rb_to_u
+		movdqa		xmm3, [edi+32]		;coeff_g_to_y
+		movdqa		xmm4, [edi+48]		;coeff_g_to_u
+		pmaddwd		xmm0, xmm2
+		pmaddwd		xmm1, xmm2
+		pmaddwd		xmm2, [edi+64]		;coeff_rb_to_v
+		pmaddwd		xmm3, xmm5
+		pmaddwd		xmm4, xmm5
+		pmaddwd		xmm5, [edi+80]		;coeff_g_to_v
+		paddd		xmm0, xmm3
+		paddd		xmm1, xmm4
+		paddd		xmm2, xmm5
+		paddd		xmm0, [edi+96]		;bias_y
+		paddd		xmm1, [edi+112]		;bias_c
+		paddd		xmm2, [edi+112]		;bias_c
+		psrad		xmm0, 15
+		psrad		xmm1, 15
+		psrad		xmm2, 15
+		packssdw	xmm0, xmm0
+		packssdw	xmm1, xmm1
+		packssdw	xmm2, xmm2
+		packuswb	xmm0, xmm0
+		packuswb	xmm1, xmm1
+		packuswb	xmm2, xmm2
+		movd		[eax], xmm0
+		movd		[ebx], xmm1
+		movd		[ecx], xmm2
+		add			eax, 4	
+		add			ebx, 4	
+		add			ecx, 4	
+		sub			esi, 4
+		jns			.xloop
+.postcheck:
+		jmp			dword [.finaltable + esi*4 + 16]
+.complete:
+		pop			ebx
+		pop			esi
+		pop			edi
+		ret		
+
+.finaltable:
+		dd			.complete
+		dd			.do1
+		dd			.do2
+		dd			.do3
+		
+.finaltable2:
+		dd			.fin1
+		dd			.fin2
+		dd			.fin3
+
+.do1:
+		movd		xmm2, [edx]
+		jmp			short .dofinal
+.do2:
+		movq		xmm2, [edx]
+		jmp			short .dofinal
+.do3:
+		movq		xmm2, [edx]
+		movd		xmm1, [edx]
+		movlhps		xmm2, xmm1
+.dofinal:
+		movdqa		xmm5, xmm2
+		pand		xmm2, xmm6			;xmm0 =   R3  B3  R2  B2  R1  B1  R0  B0
+		psrlw		xmm5, 8				;xmm1 =   X3  G3  X2  G2  X1  G1  X0  G0
+		movdqa		xmm0, [edi+0]		;coeff_rb_to_y
+		movdqa		xmm1, [edi+16]		;coeff_rb_to_u
+		movdqa		xmm3, [edi+32]		;coeff_g_to_y
+		movdqa		xmm4, [edi+48]		;coeff_g_to_u
+		pmaddwd		xmm0, xmm2
+		pmaddwd		xmm1, xmm2
+		pmaddwd		xmm2, [edi+64]		;coeff_rb_to_v
+		pmaddwd		xmm3, xmm5
+		pmaddwd		xmm4, xmm5
+		pmaddwd		xmm5, [edi+80]		;coeff_g_to_v
+		paddd		xmm0, xmm3
+		paddd		xmm1, xmm4
+		paddd		xmm2, xmm5
+		paddd		xmm0, [edi+96]		;bias_y
+		paddd		xmm1, [edi+112]		;bias_c
+		paddd		xmm2, [edi+112]		;bias_c
+		psrad		xmm0, 15
+		psrad		xmm1, 15
+		psrad		xmm2, 15
+		packssdw	xmm0, xmm0
+		packssdw	xmm1, xmm1
+		packssdw	xmm2, xmm2
+		packuswb	xmm0, xmm0
+		packuswb	xmm1, xmm1
+		movd		xmm7, [bytemasks + esi*4 + 12]
+		packuswb	xmm2, xmm2
+		
+		jmp			dword [.finaltable2 + esi*4 + 12]
+		
+.fin1:
+		movd		edx, xmm0
+		mov			[eax], dl
+		movd		edx, xmm1
+		mov			[ebx], dl
+		movd		edx, xmm2
+		mov			[ecx], dl
+		jmp			.complete
+.fin2:
+		movd		edx, xmm0
+		mov			[eax], dx
+		movd		edx, xmm1
+		mov			[ebx], dx
+		movd		edx, xmm2
+		mov			[ecx], dx
+		jmp			.complete
+.fin3:
+		movd		edx, xmm0
+		mov			[eax], dx
+		shr			edx, 16
+		mov			[eax+2], dl
+		movd		edx, xmm1
+		mov			[ebx], dx
+		shr			edx, 16
+		mov			[ebx+2], dl
+		movd		edx, xmm2
+		mov			[ecx], dx
+		shr			edx, 16
+		mov			[ecx+2], dl
+		jmp			.complete
+
+		end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_resample_mmx.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_resample_mmx.asm
new file mode 100644
index 000000000..912c655ab
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_resample_mmx.asm
@@ -0,0 +1,1559 @@
+;	VirtualDub - Video processing and capture application
+;	Graphics support library
+;	Copyright (C) 1998-2004 Avery Lee
+;
+;	This program is free software; you can redistribute it and/or modify
+;	it under the terms of the GNU General Public License as published by
+;	the Free Software Foundation; either version 2 of the License, or
+;	(at your option) any later version.
+;
+;	This program is distributed in the hope that it will be useful,
+;	but WITHOUT ANY WARRANTY; without even the implied warranty of
+;	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;	GNU General Public License for more details.
+;
+;	You should have received a copy of the GNU General Public License
+;	along with this program; if not, write to the Free Software
+;	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+;
+	section	.rdata, rdata, align=16
+
+x0002000200020002	dq	0002000200020002h
+x0004000400040004	dq	0004000400040004h
+x0008000800080008	dq	0008000800080008h
+x0000200000002000	dq	0000200000002000h
+
+	align 16
+MMX_roundval		dq	0000200000002000h, 0000200000002000h
+
+
+;**************************************************************************
+
+x0000FFFF0000FFFF	dq	0000FFFF0000FFFFh
+x0000010100000101	dq	0000010100000101h
+x0100010001000100	dq	0100010001000100h
+
+	section	.text
+
+;--------------------------------------------------------------------------
+;_vdasm_resize_interp_row_run_MMX(
+;	[esp+ 4] void *dst,
+;	[esp+ 8] void *src,
+;	[esp+12] ulong width,
+;	[esp+16] __int64 xaccum,
+;	[esp+24] __int64 x_inc);
+;
+	global	_vdasm_resize_interp_row_run_MMX
+_vdasm_resize_interp_row_run_MMX:
+	push	ebp
+	push	edi
+	push	esi
+	push	ebx
+
+	mov		esi, [esp+8+16]
+	mov		edi, [esp+4+16]
+	mov		ebp, [esp+12+16]
+
+	movd	mm4, dword [esp+16+16]
+	pxor	mm7, mm7
+	movd	mm6, dword [esp+24+16]
+	punpckldq mm4, mm4
+	punpckldq mm6, mm6
+
+	shr		esi, 2
+
+	mov		eax, [esp+16+16]
+	mov		ebx, [esp+20+16]
+	add		esi, ebx
+	mov		ebx, [esp+24+16]
+	mov		ecx, [esp+28+16]
+
+	shl		ebp,2
+	add		edi,ebp
+	neg		ebp
+
+.colloop:
+	movd		mm1, dword [esi*4+4]
+	movq		mm5, mm4
+
+	movd		mm0, dword [esi*4]
+	punpcklbw	mm1, mm7
+
+	punpcklbw	mm0, mm7
+	psrld		mm5, 24
+
+	movq		mm3, [x0100010001000100]
+	packssdw	mm5, mm5
+
+	pmullw		mm1, mm5
+	psubw		mm3, mm5
+
+	pmullw		mm0, mm3
+	paddd		mm4, mm6
+
+	;stall
+	;stall
+
+	;stall
+	;stall
+
+	paddw		mm0, mm1
+
+	psrlw		mm0, 8
+	add			eax, ebx
+
+	adc			esi, ecx
+	packuswb	mm0, mm0
+
+	movd		dword [edi+ebp],mm0
+
+	add			ebp, 4
+	jnz			.colloop
+
+	pop		ebx
+	pop		esi
+	pop		edi
+	pop		ebp
+	ret
+
+
+
+;**************************************************************************
+
+;vdasm_resize_interp_col_run_MMX(
+;	[esp+ 4] void *dst,
+;	[esp+ 8] void *src1,
+;	[esp+12] void *src2,
+;	[esp+16] ulong width,
+;	[esp+20] ulong yaccum);
+
+
+	global	_vdasm_resize_interp_col_run_MMX
+_vdasm_resize_interp_col_run_MMX:
+	push	ebp
+	push	edi
+	push	esi
+	push	ebx
+
+	mov		esi, [esp+8+16]
+	mov		edx, [esp+12+16]
+	mov		edi, [esp+4+16]
+	mov		ebp, [esp+16+16]
+
+	movd	mm4, dword [esp+20+16]
+	pxor	mm7, mm7
+	punpcklwd mm4, mm4
+	punpckldq mm4, mm4
+	psrlw	mm4, 8
+	pxor	mm4, [x0000FFFF0000FFFF]
+	paddw	mm4, [x0000010100000101]
+
+	shl		ebp, 2
+	add		edi, ebp
+	add		esi, ebp
+	add		edx, ebp
+	neg		ebp
+
+.colloop:
+	movd	mm0, dword [esi+ebp]
+	movd	mm2, dword [edx+ebp]
+
+	punpcklbw	mm0, mm7
+	punpcklbw	mm2, mm7
+
+	movq		mm1, mm0
+	punpcklwd	mm0, mm2
+	punpckhwd	mm1, mm2
+
+	pmaddwd		mm0, mm4
+	pmaddwd		mm1, mm4
+
+	psrad		mm0, 8
+	psrad		mm1, 8
+
+	packssdw	mm0, mm1
+	packuswb	mm0, mm0
+
+	movd		dword [edi+ebp],mm0
+
+	add			ebp, 4
+	jnz			.colloop
+
+	pop		ebx
+	pop		esi
+	pop		edi
+	pop		ebp
+	ret
+
+
+;--------------------------------------------------------------------------
+;vdasm_resize_ccint_row_MMX(dst, src, count, xaccum, xinc, tbl);
+
+	global	_vdasm_resize_ccint_row_MMX
+_vdasm_resize_ccint_row_MMX:
+	push	ebx
+	push	esi
+	push	edi
+	push	ebp
+
+	mov		ebx, [esp+4+16]		;ebx = dest addr
+	mov		ecx, [esp+12+16]	;ecx = count
+
+	mov		ebp, [esp+20+16]	;ebp = increment
+	mov		edi, ebp			;edi = increment
+	shl		ebp, 16				;ebp = fractional increment
+	mov		esi, [esp+16+16]	;esi = 16:16 position
+	sar		edi, 16				;edi = integer increment
+	mov		[esp+20+16], ebp	;xinc = fractional increment
+	mov		ebp, esi			;ebp = 16:16 position
+	shr		esi, 16				;esi = integer position
+	shl		ebp, 16				;ebp = fraction
+	mov		[esp+16+16], ebp	;xaccum = fraction
+
+	mov		eax, [esp+8+16]
+
+	shr		ebp, 24				;ebp = fraction (0...255)
+	mov		[esp+8+16], edi
+	shl		ebp, 4				;ebp = fraction*16
+	mov		edi, ebp
+	mov		ebp, [esp+4+16]		;ebp = destination
+
+	shr		eax, 2
+	add		eax, esi
+	shl		ecx, 2				;ecx = count*4
+	lea		ebp, [ebp+ecx-4]
+	neg		ecx					;ecx = -count*4
+
+	movq		mm6, [x0000200000002000]
+	pxor		mm7, mm7
+
+	mov			edx,[esp+16+16]		;edx = fractional accumulator
+	mov			esi,[esp+20+16]		;esi = fractional increment
+
+	mov			ebx,[esp+24+16]		;ebx = coefficient pointer
+
+	movd		mm0,dword [eax*4]
+	movd		mm1,dword [eax*4+4]
+	punpcklbw	mm0,mm7				;mm0 = [a1][r1][g1][b1]
+
+	;borrow stack pointer
+	push		0					;don't crash
+	push		dword [fs:0]
+	mov			dword [fs:0], esp
+	mov			esp, [esp+8+24]		;esp = integer increment
+	jmp			short ccint_loop_MMX_start
+
+	;EAX	source pointer / 4
+	;EBX	coefficient pointer
+	;ECX	count
+	;EDX	fractional accumulator
+	;ESI	fractional increment
+	;EDI	coefficient offset
+	;ESP	integer increment
+	;EBP	destination pointer
+
+	align		16
+ccint_loop_MMX:
+	movd		mm0,dword [eax*4]
+	packuswb	mm2,mm2				;mm0 = [a][r][g][b][a][r][g][b]
+
+	movd		mm1,dword [eax*4+4]
+	punpcklbw	mm0,mm7				;mm0 = [a1][r1][g1][b1]
+
+	movd		dword [ebp+ecx],mm2
+ccint_loop_MMX_start:
+	movq		mm4,mm0				;mm0 = [a1][r1][g1][b1]
+
+	movd		mm2,dword [eax*4+8]
+	punpcklbw	mm1,mm7				;mm1 = [a2][r2][g2][b2]
+
+	movd		mm3,dword [eax*4+12]
+	punpcklbw	mm2,mm7				;mm2 = [a3][r3][g3][b3]
+
+	punpcklbw	mm3,mm7				;mm3 = [a4][r4][g4][b4]
+	movq		mm5,mm2				;mm2 = [a3][r3][g3][b3]
+
+	add			edx,esi				;add fractional increment
+	punpcklwd	mm0,mm1				;mm0 = [g2][g1][b2][b1]
+
+	pmaddwd		mm0,[ebx+edi]
+	punpcklwd	mm2,mm3				;mm2 = [g4][g3][b4][b3]
+
+	pmaddwd		mm2,[ebx+edi+8]
+	punpckhwd	mm4,mm1				;mm4 = [a2][a1][r2][r1]
+
+	pmaddwd		mm4,[ebx+edi]
+	punpckhwd	mm5,mm3				;mm5 = [a4][a3][b4][b3]
+
+	pmaddwd		mm5,[ebx+edi+8]
+	paddd		mm0,mm6
+
+	adc			eax,esp				;add integer increment and fractional bump to offset
+	mov			edi,0ff000000h
+
+	paddd		mm2,mm0				;mm0 = [ g ][ b ]
+	paddd		mm4,mm6
+
+	psrad		mm2,14
+	paddd		mm4,mm5				;mm4 = [ a ][ r ]
+
+	and			edi,edx
+	psrad		mm4,14
+
+	shr			edi,20				;edi = fraction (0...255)*16
+	add			ecx,4
+
+	packssdw	mm2,mm4				;mm0 = [ a ][ r ][ g ][  b ]
+	jnc			ccint_loop_MMX
+
+	packuswb	mm2,mm2				;mm0 = [a][r][g][b][a][r][g][b]
+	movd		dword [ebp],mm2
+
+	mov		esp, dword [fs:0]
+	pop		dword [fs:0]
+	pop		eax
+
+	pop		ebp
+	pop		edi
+	pop		esi
+	pop		ebx
+	ret
+
+;--------------------------------------------------------------------------
+;vdasm_resize_ccint_col_MMX(dst, src1, src2, src3, src4, count, tbl);
+
+	global	_vdasm_resize_ccint_col_MMX
+_vdasm_resize_ccint_col_MMX:
+	push	ebx
+	push	esi
+	push	edi
+	push	ebp
+
+	mov		ebp, [esp+4+16]	;ebp = dest addr
+	mov		esi, [esp+24+16]	;esi = count
+	add		esi, esi
+	add		esi, esi
+
+	mov		eax, [esp+8+16]	;eax = row 1
+	mov		ebx, [esp+12+16]	;ebx = row 2
+	mov		ecx, [esp+16+16]	;ecx = row 3
+	mov		edx, [esp+20+16]	;edx = row 4
+	mov		edi, [esp+28+16]	;edi = coefficient ptr
+	
+	add		eax, esi
+	add		ebx, esi
+	add		ecx, esi
+	add		edx, esi
+	add		ebp, esi
+	neg		esi
+
+	movq		mm4,[edi]
+	movq		mm5,[edi+8]
+	movq		mm6,[x0000200000002000]
+	pxor		mm7,mm7
+
+	movd		mm2,dword [eax+esi]
+	movd		mm1,dword [ebx+esi]		;mm1 = pixel1
+	punpcklbw	mm2,mm7
+	jmp		short ccint_col_loop_MMX.entry
+
+	align		16
+ccint_col_loop_MMX:
+	movd		mm2,dword [eax+esi]		;mm2 = pixel0
+	packuswb	mm0,mm0
+	
+	movd		mm1,dword [ebx+esi]		;mm1 = pixel1
+	pxor		mm7,mm7
+
+	movd		dword [ebp+esi-4],mm0
+	punpcklbw	mm2,mm7
+	
+ccint_col_loop_MMX.entry:	
+	punpcklbw	mm1,mm7
+	movq		mm0,mm2
+	
+	movd		mm3,dword [edx+esi]		;mm3 = pixel3
+	punpcklwd	mm0,mm1			;mm0 = [g1][g0][b1][b0]
+	
+	pmaddwd		mm0,mm4
+	punpckhwd	mm2,mm1			;mm2 = [a1][a0][r1][r0]
+	
+	movd		mm1,dword [ecx+esi]		;mm1 = pixel2
+	punpcklbw	mm3,mm7
+		
+	pmaddwd		mm2,mm4
+	punpcklbw	mm1,mm7
+	
+	movq		mm7,mm1
+	punpcklwd	mm1,mm3			;mm1 = [g3][g2][b3][b2]
+	
+	punpckhwd	mm7,mm3			;mm7 = [a3][a2][r3][r2]
+	pmaddwd		mm1,mm5
+	
+	pmaddwd		mm7,mm5
+	paddd		mm0,mm6
+	
+	paddd		mm2,mm6
+	paddd		mm0,mm1
+	
+	paddd		mm2,mm7
+	psrad		mm0,14
+	
+	psrad		mm2,14
+	add			esi,4
+	
+	packssdw	mm0,mm2
+	jne			ccint_col_loop_MMX
+	
+	packuswb	mm0,mm0
+	movd		dword [ebp-4],mm0
+
+	pop		ebp
+	pop		edi
+	pop		esi
+	pop		ebx
+	ret
+
+;--------------------------------------------------------------------------
+;vdasm_resize_ccint_col_SSE2(dst, src1, src2, src3, src4, count, tbl);
+
+	global	_vdasm_resize_ccint_col_SSE2
+_vdasm_resize_ccint_col_SSE2:
+	push	ebx
+	push	esi
+	push	edi
+	push	ebp
+
+	mov	ebp,[esp + 4 + 16]	;ebp = dest addr
+	mov	esi,[esp + 24 + 16]	;esi = count
+	add	esi,esi
+	add	esi,esi
+
+	mov	eax,[esp + 8 + 16]	;eax = row 1
+	mov	ebx,[esp + 12 + 16]	;ebx = row 2
+	mov	ecx,[esp + 16 + 16]	;ecx = row 3
+	mov	edx,[esp + 20 + 16]	;edx = row 4
+	mov	edi,[esp + 28 + 16]	;edi = coefficient ptr
+	
+	neg	esi
+
+	add	esi,4
+	jz	ccint_col_SSE2_odd
+
+	movq		xmm4,qword [edi]
+	movq		xmm5,qword [edi+8]
+	punpcklqdq	xmm4,xmm4
+	punpcklqdq	xmm5,xmm5
+	movq		xmm6,[x0000200000002000]
+	punpcklqdq	xmm6,xmm6
+	pxor		xmm7,xmm7
+
+;	jmp		short ccint_col_loop_SSE2.entry
+
+;	align		16
+ccint_col_loop_SSE2:
+	movq		xmm0, qword [eax]
+	add			eax, 8
+	movq		xmm1, qword [ebx]
+	add			ebx, 8
+	movq		xmm2, qword [ecx]
+	add			ecx, 8
+	movq		xmm3, qword [edx]
+	add			edx, 8
+	punpcklbw	xmm0,xmm1
+	punpcklbw	xmm2,xmm3
+	movdqa		xmm1,xmm0
+	movdqa		xmm3,xmm2
+	punpcklbw	xmm0,xmm7
+	punpckhbw	xmm1,xmm7
+	punpcklbw	xmm2,xmm7
+	punpckhbw	xmm3,xmm7
+	pmaddwd		xmm0,xmm4
+	pmaddwd		xmm1,xmm4
+	pmaddwd		xmm2,xmm5
+	pmaddwd		xmm3,xmm5
+	paddd		xmm0,xmm6
+	paddd		xmm1,xmm6
+	paddd		xmm0,xmm2
+	paddd		xmm1,xmm3
+	psrad		xmm0,14
+	psrad		xmm1,14
+	packssdw	xmm0,xmm1
+	packuswb	xmm0,xmm0
+	movdq2q		mm0,xmm0	
+	movntq		[ebp],mm0
+	add		ebp,8
+	add		esi,8
+	jnc		ccint_col_loop_SSE2
+	jnz		ccint_col_SSE2_noodd
+ccint_col_SSE2_odd:
+	movd		mm0, dword [eax]
+	pxor		mm7,mm7
+	movd		mm1, dword [ebx]
+	movdq2q		mm4,xmm4
+	movd		mm2, dword [ecx]
+	movdq2q		mm5,xmm5
+	movd		mm3, dword [edx]
+	movdq2q		mm6,xmm6
+	punpcklbw	mm0,mm1
+	punpcklbw	mm2,mm3
+	movq		mm1,mm0
+	movq		mm3,mm2
+	punpcklbw	mm0,mm7
+	punpckhbw	mm1,mm7
+	punpcklbw	mm2,mm7
+	punpckhbw	mm3,mm7
+	pmaddwd		mm0,mm4
+	pmaddwd		mm1,mm4
+	pmaddwd		mm2,mm5
+	pmaddwd		mm3,mm5
+	paddd		mm0,mm6
+	paddd		mm2,mm6
+	paddd		mm0,mm2
+	paddd		mm1,mm3
+	psrad		mm0,14
+	psrad		mm1,14
+	packssdw	mm0,mm1
+	packuswb	mm0,mm0
+	movd		eax,mm0
+	movnti		[ebp],eax
+
+ccint_col_SSE2_noodd:
+	pop		ebp
+	pop		edi
+	pop		esi
+	pop		ebx
+	ret
+
+
+
+;-------------------------------------------------------------------------
+;
+;	long resize_table_row_MMX(Pixel *out, Pixel *in, int *filter, int filter_width, PixDim w, long accum, long frac);
+
+	.code
+
+	global	_vdasm_resize_table_row_MMX
+_vdasm_resize_table_row_MMX:
+	push	ebp
+	push	esi
+	push	edi
+	push	ebx
+
+	cmp		dword [esp+16+16], 4
+	jz		.accel_4coeff
+	cmp		dword [esp+16+16], 6
+	jz		.accel_6coeff
+	cmp		dword [esp+16+16], 8
+	jz		.accel_8coeff
+
+	mov	eax,[esp + 24 + 16]
+	mov	ebp,[esp + 20 + 16]
+	mov	ebx,[esp + 8 + 16]
+	mov	edi,[esp + 4 + 16]
+
+	mov	esi,eax
+	mov	edx,eax
+
+	pxor		mm5,mm5
+
+	mov		ecx,[esp + 16 + 16]
+	shr		ecx,1
+	mov		[esp+16+16],ecx
+	test	ecx,1
+	jnz		.pixelloop_odd_pairs
+
+.pixelloop_even_pairs:
+	shr		esi,14
+	and		edx,0000ff00h
+	and		esi,byte -4
+
+	mov		ecx,[esp + 16 + 16]
+	shr		edx,5
+	add		esi,ebx
+	imul	edx,ecx
+	add		eax,[esp + 28 + 16]
+	add		edx,[esp + 12 + 16]
+
+	movq	mm6,[MMX_roundval]
+	pxor	mm3,mm3
+	movq	mm7,mm6
+	pxor	mm2,mm2
+
+.coeffloop_unaligned_even_pairs:
+	movd		mm0,dword [esi+0]
+	paddd		mm7,mm2			;accumulate alpha/red (pixels 2/3)
+
+	punpcklbw	mm0,[esi+4]		;mm1=[a0][a1][r0][r1][g0][g1][b0][b1]
+	paddd		mm6,mm3			;accumulate green/blue (pixels 2/3)
+
+	movd		mm2,dword [esi+8]
+	movq		mm1,mm0			;mm0=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+	punpcklbw	mm2,[esi+12]	;mm2=[a2][a3][r2][r3][g2][g3][b2][b3]
+
+	punpckhbw	mm0,mm5			;mm0=[ a0 ][ a1 ][ r0 ][ r1 ]
+	movq		mm3,mm2			;mm3=[a2][a3][r2][r3][g2][g3][b2][b3]
+
+	pmaddwd		mm0,[edx]		;mm0=[a0*f0+a1*f1][r0*f0+r1*f1]
+	punpcklbw	mm1,mm5			;mm1=[ g0 ][ g1 ][ b0 ][ b1 ]
+
+	pmaddwd		mm1,[edx]		;mm1=[g0*f0+g1*f1][b0*f0+b1*f1]
+	punpckhbw	mm2,mm5			;mm2=[ a2 ][ a3 ][ r0 ][ r1 ]
+
+	pmaddwd		mm2,[edx+8]		;mm2=[a2*f2+a3*f3][r2*f2+r3*f3]
+	punpcklbw	mm3,mm5			;mm3=[ g2 ][ g3 ][ b2 ][ b3 ]
+
+	pmaddwd		mm3,[edx+8]		;mm3=[g2*f2+g3*f3][b2*f2+b3*f3]
+	paddd		mm7,mm0			;accumulate alpha/red (pixels 0/1)
+
+	paddd		mm6,mm1			;accumulate green/blue (pixels 0/1)
+	add		edx,16
+
+	add		esi,16
+	sub		ecx,2
+
+	jne		.coeffloop_unaligned_even_pairs
+
+	paddd		mm7,mm2			;accumulate alpha/red (pixels 2/3)
+	paddd		mm6,mm3			;accumulate green/blue (pixels 2/3)
+
+	psrad		mm7,14
+	psrad		mm6,14
+
+	packssdw	mm6,mm7
+	add		edi,4
+
+	packuswb	mm6,mm6
+	sub		ebp,1
+
+	mov	esi,eax
+	mov	edx,eax
+
+	movd	dword [edi-4],mm6
+	jne	.pixelloop_even_pairs
+
+	pop	ebx
+	pop	edi
+	pop	esi
+	pop	ebp
+
+	ret
+
+;----------------------------------------------------------------
+
+.pixelloop_odd_pairs:
+	shr		esi,14
+	and		edx,0000ff00h
+	and		esi,byte -4
+
+	mov		ecx,[esp + 16 + 16]
+	shr		edx,5
+	add		esi,ebx
+	imul	edx,ecx
+	add		eax,[esp + 28 + 16]
+	sub		ecx,1
+	add		edx,[esp + 12 + 16]
+
+	movq	mm6,[MMX_roundval]
+	pxor	mm3,mm3
+	pxor	mm2,mm2
+	movq	mm7,mm6
+
+.coeffloop_unaligned_odd_pairs:
+	movd		mm0,dword [esi+0]
+	paddd		mm7,mm2			;accumulate alpha/red (pixels 2/3)
+
+	punpcklbw	mm0,[esi+4]		;mm1=[a0][a1][r0][r1][g0][g1][b0][b1]
+	paddd		mm6,mm3			;accumulate green/blue (pixels 2/3)
+
+	movd		mm2,dword [esi+8]
+	movq		mm1,mm0			;mm0=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+	punpcklbw	mm2,[esi+12]	;mm2=[a2][a3][r2][r3][g2][g3][b2][b3]
+
+	punpckhbw	mm0,mm5			;mm0=[ a0 ][ a1 ][ r0 ][ r1 ]
+	movq		mm3,mm2			;mm3=[a2][a3][r2][r3][g2][g3][b2][b3]
+
+	pmaddwd		mm0,[edx]		;mm0=[a0*f0+a1*f1][r0*f0+r1*f1]
+	punpcklbw	mm1,mm5			;mm1=[ g0 ][ g1 ][ b0 ][ b1 ]
+
+	pmaddwd		mm1,[edx]		;mm1=[g0*f0+g1*f1][b0*f0+b1*f1]
+	punpckhbw	mm2,mm5			;mm2=[ a2 ][ a3 ][ r0 ][ r1 ]
+
+	pmaddwd		mm2,[edx+8]		;mm2=[a2*f2+a3*f3][r2*f2+r3*f3]
+	punpcklbw	mm3,mm5			;mm3=[ g2 ][ g3 ][ b2 ][ b3 ]
+
+	pmaddwd		mm3,[edx+8]		;mm3=[g2*f2+g3*f3][b2*f2+b3*f3]
+	paddd		mm7,mm0			;accumulate alpha/red (pixels 0/1)
+
+	paddd		mm6,mm1			;accumulate green/blue (pixels 0/1)
+	add		edx,16
+
+	add		esi,16
+	sub		ecx,2
+
+	jne		.coeffloop_unaligned_odd_pairs
+
+	paddd		mm7,mm2			;accumulate alpha/red (pixels 2/3)
+	paddd		mm6,mm3			;accumulate green/blue (pixels 2/3)
+
+	;finish up odd pair
+
+	movd		mm0,dword [esi]		;mm0 = [x1][r1][g1][b1]
+	punpcklbw	mm0,[esi+4]		;mm2 = [x0][x1][r0][r1][g0][g1][b0][b1]
+	movq		mm1,mm0
+	punpcklbw	mm0,mm5			;mm0 = [g0][g1][b0][b1]
+	punpckhbw	mm1,mm5			;mm1 = [x0][x1][r0][r1]
+
+	pmaddwd		mm0,[edx]
+	pmaddwd		mm1,[edx]
+
+	paddd		mm6,mm0
+	paddd		mm7,mm1
+
+	;combine into pixel
+
+	psrad		mm6,14
+
+	psrad		mm7,14
+
+	packssdw	mm6,mm7
+	add		edi,4
+
+	packuswb	mm6,mm6
+	sub		ebp,1
+
+	mov		esi,eax
+	mov		edx,eax
+
+	movd		dword [edi-4],mm6
+	jne		.pixelloop_odd_pairs
+
+	pop	ebx
+	pop	edi
+	pop	esi
+	pop	ebp
+
+	ret
+
+;----------------------------------------------------------------
+
+.accel_4coeff:
+	mov	eax,[esp + 24 + 16]
+	mov	ebp,[esp + 20 + 16]
+	add	ebp,ebp
+	add	ebp,ebp
+	mov	ebx,[esp + 8 + 16]
+	mov	edi,[esp + 4 + 16]
+	add	edi,ebp
+	neg	ebp
+
+	mov	esi,eax
+	mov	edx,eax
+
+	movq		mm4,[MMX_roundval]
+	pxor		mm5,mm5
+
+	mov		ecx,[esp+12+16]
+
+.pixelloop_4coeff:
+	shr		esi,14
+	and		edx,0000ff00h
+	and		esi,byte -4
+
+	shr		edx,4
+	add		esi,ebx
+	add		eax,[esp+28+16]
+	add		edx,ecx
+
+	movd		mm0,dword [esi+0]
+	movd		mm2,dword [esi+8]
+	punpcklbw	mm0,[esi+4]		;mm0=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+	movq		mm1,mm0			;mm1=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+	punpckhbw	mm0,mm5			;mm0=[ a0 ][ a1 ][ r0 ][ r1 ]
+
+	pmaddwd		mm0,[edx]		;mm0=[a0*f0+a1*f1][r0*f0+r1*f1]
+	punpcklbw	mm2,[esi+12]	;mm2=[a2][a3][r2][r3][g2][g3][b2][b3]
+
+	movq		mm3,mm2			;mm3=[a2][a3][r2][r3][g2][g3][b2][b3]
+	punpcklbw	mm1,mm5			;mm1=[ g0 ][ g1 ][ b0 ][ b1 ]
+
+	pmaddwd		mm1,[edx]		;mm1=[g0*f0+g1*f1][b0*f0+b1*f1]
+	punpckhbw	mm2,mm5			;mm2=[ a2 ][ a3 ][ r0 ][ r1 ]
+
+	pmaddwd		mm2,[edx+8]		;mm2=[a2*f2+a3*f3][r2*f2+r3*f3]
+	punpcklbw	mm3,mm5			;mm3=[ g2 ][ g3 ][ b2 ][ b3 ]
+
+	pmaddwd		mm3,[edx+8]		;mm3=[g2*f2+g3*f3][b2*f2+b3*f3]
+	paddd		mm0,mm4			;accumulate alpha/red (pixels 0/1)
+
+	paddd		mm1,mm4			;accumulate green/blue (pixels 0/1)
+
+	paddd		mm0,mm2			;accumulate alpha/red (pixels 2/3)
+	paddd		mm1,mm3			;accumulate green/blue (pixels 2/3)
+
+	psrad		mm0,14
+	psrad		mm1,14
+
+	packssdw	mm1,mm0
+	mov	esi,eax
+
+	packuswb	mm1,mm1
+	mov	edx,eax
+
+	movd	dword [edi+ebp],mm1
+	add		ebp,4
+	jne		.pixelloop_4coeff
+
+	pop	ebx
+	pop	edi
+	pop	esi
+	pop	ebp
+
+	ret
+
+
+;----------------------------------------------------------------
+
+.accel_6coeff:
+	mov	eax,[esp + 24 + 16]
+	mov	ebp,[esp + 20 + 16]
+	add	ebp,ebp
+	add	ebp,ebp
+	mov	ebx,[esp + 8 + 16]
+	mov	edi,[esp + 4 + 16]
+	add	edi,ebp
+	neg	ebp
+
+	mov	esi,eax
+	mov	edx,eax
+
+	movq		mm4,[MMX_roundval]
+	pxor		mm5,mm5
+
+	mov		ecx,[esp+12+16]
+
+.pixelloop_6coeff:
+	shr		esi,14
+	and		edx,0000ff00h
+	and		esi,byte -4
+
+	shr		edx,5
+	lea		edx,[edx+edx*2]
+	add		esi,ebx
+	add		eax,[esp+28+16]
+	add		edx,ecx
+
+	movd		mm0,dword [esi+0]
+	movd		mm2,dword [esi+8]
+	punpcklbw	mm0,[esi+4]		;mm0=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+	movq		mm1,mm0			;mm1=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+	punpckhbw	mm0,mm5			;mm0=[ a0 ][ a1 ][ r0 ][ r1 ]
+
+	pmaddwd		mm0,[edx]		;mm0=[a0*f0+a1*f1][r0*f0+r1*f1]
+	punpcklbw	mm2,[esi+12]	;mm2=[a2][a3][r2][r3][g2][g3][b2][b3]
+
+	movq		mm3,mm2			;mm3=[a2][a3][r2][r3][g2][g3][b2][b3]
+	punpcklbw	mm1,mm5			;mm1=[ g0 ][ g1 ][ b0 ][ b1 ]
+
+	pmaddwd		mm1,[edx]		;mm1=[g0*f0+g1*f1][b0*f0+b1*f1]
+	punpckhbw	mm2,mm5			;mm2=[ a2 ][ a3 ][ r0 ][ r1 ]
+
+	pmaddwd		mm2,[edx+8]		;mm2=[a2*f2+a3*f3][r2*f2+r3*f3]
+	punpcklbw	mm3,mm5			;mm3=[ g2 ][ g3 ][ b2 ][ b3 ]
+
+	pmaddwd		mm3,[edx+8]		;mm3=[g2*f2+g3*f3][b2*f2+b3*f3]
+	paddd		mm0,mm4			;accumulate alpha/red (pixels 0/1)
+
+	paddd		mm1,mm4			;accumulate green/blue (pixels 0/1)
+
+	paddd		mm0,mm2			;accumulate alpha/red (pixels 2/3)
+	paddd		mm1,mm3			;accumulate green/blue (pixels 2/3)
+
+	movd		mm6,dword [esi+16]
+
+	punpcklbw	mm6,[esi+20]	;mm1=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+	movq		mm7,mm6			;mm0=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+	punpckhbw	mm6,mm5			;mm0=[ a0 ][ a1 ][ r0 ][ r1 ]
+
+	pmaddwd		mm6,[edx+16]	;mm0=[a0*f0+a1*f1][r0*f0+r1*f1]
+	punpcklbw	mm7,mm5			;mm1=[ g0 ][ g1 ][ b0 ][ b1 ]
+
+	pmaddwd		mm7,[edx+16]	;mm1=[g0*f0+g1*f1][b0*f0+b1*f1]
+	paddd		mm0,mm6			;accumulate alpha/red (pixels 0/1)
+
+	paddd		mm1,mm7			;accumulate green/blue (pixels 0/1)
+
+
+	psrad		mm0,14
+	psrad		mm1,14
+
+	packssdw	mm1,mm0
+	mov	esi,eax
+
+	packuswb	mm1,mm1
+	mov	edx,eax
+
+	movd	dword [edi+ebp],mm1
+	add		ebp,4
+	jne		.pixelloop_6coeff
+
+	pop	ebx
+	pop	edi
+	pop	esi
+	pop	ebp
+
+	ret
+
+;----------------------------------------------------------------
+
+.accel_8coeff:
+	mov	eax,[esp + 24 + 16]
+	mov	ebp,[esp + 20 + 16]
+	add	ebp,ebp
+	add	ebp,ebp
+	mov	ebx,[esp + 8 + 16]
+	mov	edi,[esp + 4 + 16]
+	add	edi,ebp
+	neg	ebp
+
+	mov	esi,eax
+	mov	edx,eax
+
+	movq		mm4,[MMX_roundval]
+	pxor		mm5,mm5
+
+	mov		ecx,[esp+12+16]
+
+.pixelloop_8coeff:
+	shr		esi,14
+	and		edx,0000ff00h
+	and		esi,byte -4
+
+	shr		edx,3
+	add		esi,ebx
+	add		eax,[esp+28+16]
+	add		edx,ecx
+
+	movd		mm0,dword [esi+0]
+	movd		mm2,dword [esi+8]
+	punpcklbw	mm0,[esi+4]		;mm0=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+	movq		mm1,mm0			;mm1=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+	punpckhbw	mm0,mm5			;mm0=[ a0 ][ a1 ][ r0 ][ r1 ]
+
+	pmaddwd		mm0,[edx]		;mm0=[a0*f0+a1*f1][r0*f0+r1*f1]
+	punpcklbw	mm2,[esi+12]	;mm2=[a2][a3][r2][r3][g2][g3][b2][b3]
+
+	movq		mm3,mm2			;mm3=[a2][a3][r2][r3][g2][g3][b2][b3]
+	punpcklbw	mm1,mm5			;mm1=[ g0 ][ g1 ][ b0 ][ b1 ]
+
+	pmaddwd		mm1,[edx]		;mm1=[g0*f0+g1*f1][b0*f0+b1*f1]
+	punpckhbw	mm2,mm5			;mm2=[ a2 ][ a3 ][ r0 ][ r1 ]
+
+	pmaddwd		mm2,[edx+8]		;mm2=[a2*f2+a3*f3][r2*f2+r3*f3]
+	punpcklbw	mm3,mm5			;mm3=[ g2 ][ g3 ][ b2 ][ b3 ]
+
+	pmaddwd		mm3,[edx+8]		;mm3=[g2*f2+g3*f3][b2*f2+b3*f3]
+	paddd		mm0,mm4			;accumulate alpha/red (pixels 0/1)
+
+	paddd		mm1,mm4			;accumulate green/blue (pixels 0/1)
+
+	paddd		mm0,mm2			;accumulate alpha/red (pixels 2/3)
+	paddd		mm1,mm3			;accumulate green/blue (pixels 2/3)
+
+
+	movd		mm6,dword [esi+16]
+
+	punpcklbw	mm6,[esi+20]	;mm1=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+	movd		mm2,dword [esi+24]
+
+	punpcklbw	mm2,[esi+28]	;mm2=[a2][a3][r2][r3][g2][g3][b2][b3]
+	movq		mm7,mm6			;mm0=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+	punpckhbw	mm6,mm5			;mm0=[ a0 ][ a1 ][ r0 ][ r1 ]
+	movq		mm3,mm2			;mm3=[a2][a3][r2][r3][g2][g3][b2][b3]
+
+	pmaddwd		mm6,[edx+16]	;mm0=[a0*f0+a1*f1][r0*f0+r1*f1]
+	punpcklbw	mm7,mm5			;mm1=[ g0 ][ g1 ][ b0 ][ b1 ]
+
+	pmaddwd		mm7,[edx+16]	;mm1=[g0*f0+g1*f1][b0*f0+b1*f1]
+	punpckhbw	mm2,mm5			;mm2=[ a2 ][ a3 ][ r0 ][ r1 ]
+
+	pmaddwd		mm2,[edx+24]	;mm2=[a2*f2+a3*f3][r2*f2+r3*f3]
+	punpcklbw	mm3,mm5			;mm3=[ g2 ][ g3 ][ b2 ][ b3 ]
+
+	pmaddwd		mm3,[edx+24]	;mm3=[g2*f2+g3*f3][b2*f2+b3*f3]
+	paddd		mm0,mm6			;accumulate alpha/red (pixels 0/1)
+
+	paddd		mm1,mm7			;accumulate green/blue (pixels 0/1)
+	paddd		mm0,mm2			;accumulate alpha/red (pixels 0/1)
+
+	paddd		mm1,mm3			;accumulate green/blue (pixels 0/1)
+
+
+	psrad		mm0,14
+	psrad		mm1,14
+
+	packssdw	mm1,mm0
+	mov	esi,eax
+
+	packuswb	mm1,mm1
+	mov	edx,eax
+
+	movd	dword [edi+ebp],mm1
+	add		ebp,4
+	jne		.pixelloop_8coeff
+
+	pop	ebx
+	pop	edi
+	pop	esi
+	pop	ebp
+
+	ret
+
+
+
+
+
+
+
+;-------------------------------------------------------------------------
+;
+;	long resize_table_col_MMX(Pixel *out, Pixel **in_table, int *filter, int filter_width, PixDim w, long frac);
+
+	global	_vdasm_resize_table_col_MMX
+_vdasm_resize_table_col_MMX:
+	push		ebp
+	push		esi
+	push		edi
+	push		ebx
+
+	mov			edx,[esp + 12 + 16]
+	mov			eax,[esp + 24 + 16]
+	shl			eax,2
+	imul		eax,[esp + 16 + 16]
+	add			edx,eax
+	mov			[esp + 12 + 16], edx	;[esp+12+28] = filter pointer
+
+	mov			ebp,[esp + 20 + 16]		;ebp = pixel counter
+	mov			edi,[esp + 4 + 16]		;edi = destination pointer
+
+	pxor		mm5,mm5
+
+	cmp			dword [esp+16+16], 4
+	jz			.accel_4coeff
+	cmp			dword [esp+16+16], 6
+	jz			.accel_6coeff
+
+	mov			ecx,[esp + 16 + 16]
+	shr			ecx,1
+	mov			[esp + 16 + 16],ecx		;ecx = filter pair count
+
+	xor			ebx,ebx					;ebx = source offset 
+
+	mov			ecx,[esp + 16 + 16]		;ecx = filter width counter
+.pixelloop:
+	mov			eax,[esp + 8 + 16]		;esi = row pointer table
+	movq		mm6,[MMX_roundval]
+	movq		mm7,mm6
+	pxor		mm0,mm0
+	pxor		mm1,mm1
+.coeffloop:
+	mov			esi,[eax]
+	paddd		mm6,mm0
+
+	movd		mm0,dword [esi+ebx]	;mm0 = [0][0][0][0][x0][r0][g0][b0]
+	paddd		mm7,mm1
+
+	mov			esi,[eax+4]
+	add			eax,8
+
+	movd		mm1,dword [esi+ebx]	;mm1 = [0][0][0][0][x1][r1][g1][b1]
+	punpcklbw	mm0,mm1			;mm0 = [x0][x1][r0][r1][g0][g1][b0][b1]
+
+	movq		mm1,mm0
+	punpcklbw	mm0,mm5			;mm0 = [g1][g0][b1][b0]
+
+	pmaddwd		mm0,[edx]
+	punpckhbw	mm1,mm5			;mm1 = [x1][x0][r1][r0]
+
+	pmaddwd		mm1,[edx]
+	add			edx,8
+
+	sub			ecx,1
+	jne			.coeffloop
+
+	paddd		mm6,mm0
+	paddd		mm7,mm1
+
+	psrad		mm6,14
+	psrad		mm7,14
+	add			edi,4
+	packssdw	mm6,mm7
+	add			ebx,4
+	packuswb	mm6,mm6
+	sub			ebp,1
+
+	mov			ecx,[esp + 16 + 16]		;ecx = filter width counter
+	mov			edx,[esp + 12 + 16]		;edx = filter bank pointer
+
+	movd		dword [edi-4],mm6
+	jne			.pixelloop
+
+.xit:
+	pop		ebx
+	pop		edi
+	pop		esi
+	pop		ebp
+	ret
+
+
+
+.accel_4coeff:
+	movq		mm2,[edx]
+	movq		mm3,[edx+8]
+
+	mov			esi,[esp+8+16]			;esi = row pointer table
+	mov			eax,[esi]
+	add			ebp,ebp
+	mov			ebx,[esi+4]
+	add			ebp,ebp
+	mov			ecx,[esi+8]
+	mov			esi,[esi+12]
+	add			eax,ebp
+	add			ebx,ebp
+	add			ecx,ebp
+	add			esi,ebp
+	add			edi,ebp
+	neg			ebp
+
+	;EAX	source 0
+	;EBX	source 1
+	;ECX	source 2
+	;ESI	source 3
+	;EDI	destination
+	;EBP	counter
+
+	movq		mm4,[MMX_roundval]
+
+.pixelloop4:
+	movd		mm6,dword [eax+ebp]	;mm0 = [0][0][0][0][x0][r0][g0][b0]
+
+	punpcklbw	mm6,[ebx+ebp]	;mm0 = [x0][x1][r0][r1][g0][g1][b0][b1]
+
+	movq		mm7,mm6
+	punpcklbw	mm6,mm5			;mm0 = [g1][g0][b1][b0]
+
+	pmaddwd		mm6,mm2
+	punpckhbw	mm7,mm5			;mm1 = [x1][x0][r1][r0]
+
+	movd		mm0,dword [ecx+ebp]	;mm0 = [0][0][0][0][x0][r0][g0][b0]
+	pmaddwd		mm7,mm2
+
+	punpcklbw	mm0,[esi+ebp]	;mm0 = [x0][x1][r0][r1][g0][g1][b0][b1]
+	paddd		mm6,mm4
+
+	movq		mm1,mm0
+	punpcklbw	mm0,mm5			;mm0 = [g1][g0][b1][b0]
+
+	pmaddwd		mm0,mm3
+	punpckhbw	mm1,mm5			;mm1 = [x1][x0][r1][r0]
+
+	pmaddwd		mm1,mm3
+	paddd		mm7,mm4
+
+	paddd		mm6,mm0
+	paddd		mm7,mm1
+
+	psrad		mm6,14
+	psrad		mm7,14
+	packssdw	mm6,mm7
+	packuswb	mm6,mm6
+
+	movd		dword [edi+ebp],mm6
+
+	add			ebp,4
+	jne			.pixelloop4
+	jmp			.xit
+
+.accel_6coeff:
+	movq		mm2,[edx]
+	movq		mm3,[edx+8]
+	movq		mm4,[edx+16]
+
+	push		0
+	push		dword [fs:0]
+	mov			dword [fs:0],esp
+
+	mov			esp,[esp+8+24]			;esp = row pointer table
+	mov			eax,[esp]
+	add			ebp,ebp
+	mov			ebx,[esp+4]
+	add			ebp,ebp
+	mov			ecx,[esp+8]
+	mov			edx,[esp+12]
+	mov			esi,[esp+16]
+	mov			esp,[esp+20]
+	add			eax,ebp
+	add			ebx,ebp
+	add			ecx,ebp
+	add			edx,ebp
+	add			esi,ebp
+	add			edi,ebp
+	add			esp,ebp
+	neg			ebp
+
+	;EAX	source 0
+	;EBX	source 1
+	;ECX	source 2
+	;EDX	source 3
+	;ESI	source 4
+	;EDI	destination
+	;ESP	source 5
+	;EBP	counter
+
+.pixelloop6:
+	movd		mm6,dword [eax+ebp]	;mm0 = [0][0][0][0][x0][r0][g0][b0]
+
+	punpcklbw	mm6,[ebx+ebp]	;mm0 = [x0][x1][r0][r1][g0][g1][b0][b1]
+
+	movq		mm7,mm6
+	punpcklbw	mm6,mm5			;mm0 = [g1][g0][b1][b0]
+
+	movd		mm0,dword [ecx+ebp]	;mm0 = [0][0][0][0][x0][r0][g0][b0]
+	punpckhbw	mm7,mm5			;mm1 = [x1][x0][r1][r0]
+
+	punpcklbw	mm0,[edx+ebp]	;mm0 = [x0][x1][r0][r1][g0][g1][b0][b1]
+	pmaddwd		mm6,mm2
+
+	movq		mm1,mm0
+	punpcklbw	mm0,mm5			;mm0 = [g1][g0][b1][b0]
+
+	pmaddwd		mm7,mm2
+	punpckhbw	mm1,mm5			;mm1 = [x1][x0][r1][r0]
+
+	paddd		mm6,[MMX_roundval]
+	pmaddwd		mm0,mm3
+
+	paddd		mm7,[MMX_roundval]
+	pmaddwd		mm1,mm3
+
+	paddd		mm6,mm0
+
+	movd		mm0,dword [esi+ebp]	;mm0 = [0][0][0][0][x0][r0][g0][b0]
+	paddd		mm7,mm1
+
+	punpcklbw	mm0,[esp+ebp]	;mm0 = [x0][x1][r0][r1][g0][g1][b0][b1]
+	movq		mm1,mm0
+	punpcklbw	mm0,mm5			;mm0 = [g1][g0][b1][b0]
+	punpckhbw	mm1,mm5			;mm1 = [x1][x0][r1][r0]
+	pmaddwd		mm0,mm4
+	pmaddwd		mm1,mm4
+	paddd		mm6,mm0
+	paddd		mm7,mm1
+
+	psrad		mm6,14
+	psrad		mm7,14
+	packssdw	mm6,mm7
+	packuswb	mm6,mm6
+
+	movd		dword [edi+ebp],mm6
+
+	add			ebp,4
+	jne			.pixelloop6
+
+	mov			esp, dword [fs:0]
+	pop			dword [fs:0]
+	pop			eax
+
+	jmp			.xit
+
+
+	global		_vdasm_resize_table_col_SSE2
+_vdasm_resize_table_col_SSE2:
+	push		ebp
+	push		esi
+	push		edi
+	push		ebx
+
+	mov			edx,[esp+12+16]
+	mov			eax,[esp+24+16]
+	shl			eax,2
+	imul		eax,[esp+16+16]
+	add			edx,eax
+	mov			[esp+12+16], edx		;[esp+12+16] = filter pointer
+
+	mov			ebp,[esp+20+16]		;ebp = pixel counter
+	mov			edi,[esp+4+16]		;edi = destination pointer
+
+	pxor		xmm7, xmm7
+	movdqa		xmm6, [MMX_roundval]
+
+	cmp			dword [esp+16+16], 4
+	jz			.accel_4coeff
+	cmp			dword [esp+16+16], 6
+	jz			.accel_6coeff
+
+	mov			ecx,[esp+16+16]
+	shr			ecx,1
+	mov			[esp+16+16],ecx		;ecx = filter pair count
+
+	xor			ebx,ebx					;ebx = source offset 
+
+	mov			ecx,[esp+16+16]		;ecx = filter width counter
+.pixelloop:
+	mov			eax, [esp+8+16]		;esi = row pointer table
+	movdqa		xmm4, xmm6
+.coeffloop:
+	mov			esi,[eax]
+
+	movd		xmm0, dword [esi+ebx]
+
+	mov			esi,[eax+4]
+	add			eax,8
+
+	movd		xmm1, dword [esi+ebx]
+	punpcklbw	xmm0, xmm1
+
+	punpcklbw	xmm0, xmm7
+
+	movq		xmm2, qword [edx]
+	pshufd		xmm2, xmm2, 01000100b
+
+	pmaddwd		xmm0, xmm2
+
+	paddd		xmm4, xmm0
+
+	add			edx,8
+
+	sub			ecx,1
+	jne			.coeffloop
+
+	psrad		xmm4,14
+	add			edi,4
+	packssdw	xmm4,xmm4
+	add			ebx,4
+	packuswb	xmm4,xmm4
+	sub			ebp,1
+
+	mov			ecx,[esp+16+16]		;ecx = filter width counter
+	mov			edx,[esp+12+16]		;edx = filter bank pointer
+
+	movd		dword [edi-4],xmm4
+	jne			.pixelloop
+
+.xit:
+	pop		ebx
+	pop		edi
+	pop		esi
+	pop		ebp
+	ret
+
+.accel_4coeff:
+	shl			ebp, 2
+	mov			eax, [esp+8+16]			;eax = row pointer table
+	mov			esi, [eax+12]
+	mov			ecx, [eax+8]
+	mov			ebx, [eax+4]
+	mov			eax, [eax]
+	lea			edi, [edi+ebp-4]
+	neg			ebp
+
+	;registers:
+	;
+	;EAX	source 0
+	;EBX	source 1
+	;ECX	source 2
+	;ESI	source 3
+	;EDI	destination
+	;EBP	counter
+	;
+	movq		xmm4, qword [edx]				;xmm4 = coeff 0/1
+	movq		xmm5, qword [edx+8]			;xmm5 = coeff 2/3
+	punpcklqdq	xmm4, xmm4
+	punpcklqdq	xmm5, xmm5
+
+	add			ebp, 4
+	jz			.oddpixel_4coeff
+
+.pixelloop_4coeff_dualpel:
+	movq		xmm0, qword [eax]
+	movq		xmm1, qword [ebx]
+	movq		xmm2, qword [ecx]
+	movq		xmm3, qword [esi]
+	add			eax,8
+	add			ebx,8
+	add			ecx,8
+	add			esi,8
+	punpcklbw	xmm0, xmm1
+	punpcklbw	xmm2, xmm3
+	movdqa		xmm1, xmm0
+	movdqa		xmm3, xmm2
+	punpcklbw	xmm0, xmm7
+	punpckhbw	xmm1, xmm7
+	punpcklbw	xmm2, xmm7
+	punpckhbw	xmm3, xmm7
+	pmaddwd		xmm0, xmm4
+	pmaddwd		xmm1, xmm4
+	pmaddwd		xmm2, xmm5
+	pmaddwd		xmm3, xmm5
+	paddd		xmm0, xmm2
+	paddd		xmm1, xmm3
+	paddd		xmm0, xmm6
+	paddd		xmm1, xmm6
+	psrad		xmm0, 14
+	psrad		xmm1, 14
+	packssdw	xmm0, xmm1
+	packuswb	xmm0, xmm0
+	movq		qword [edi+ebp],xmm0
+	add			ebp, 8
+	jae			.pixelloop_4coeff_dualpel
+	jnz			.xit
+
+.oddpixel_4coeff:
+	movd		xmm0, dword [eax]
+	movd		xmm1, dword [ebx]
+	movd		xmm2, dword [ecx]
+	movd		xmm3, dword [esi]
+	punpcklbw	xmm0, xmm1
+	punpcklbw	xmm2, xmm3
+	punpcklbw	xmm0, xmm7
+	punpcklbw	xmm2, xmm7
+	pmaddwd		xmm0, xmm4
+	pmaddwd		xmm2, xmm5
+	paddd		xmm0, xmm2
+	paddd		xmm0, xmm6
+	psrad		xmm0, 14
+	packssdw	xmm0, xmm0
+	packuswb	xmm0, xmm0
+	movd		dword [edi],xmm0
+	jmp			.xit
+
+
+.accel_6coeff:
+	movq		xmm4, qword [edx]				;xmm4 = coeff 0/1
+	movq		xmm5, qword [edx+8]			;xmm5 = coeff 2/3
+	movq		xmm6, qword [edx+16]			;xmm5 = coeff 4/5
+	punpcklqdq	xmm4, xmm4
+	punpcklqdq	xmm5, xmm5
+	punpcklqdq	xmm6, xmm6
+
+	push		0
+	push		dword [fs:0]
+	mov			dword [fs:0],esp
+
+	shl			ebp, 2
+	mov			eax, [esp+8+24]			;eax = row pointer table
+	mov			esp, [eax+20]
+	mov			esi, [eax+16]
+	mov			edx, [eax+12]
+	mov			ecx, [eax+8]
+	mov			ebx, [eax+4]
+	mov			eax, [eax]
+	lea			edi, [edi+ebp-4]
+	neg			ebp
+
+	;registers:
+	;
+	;EAX	source 0
+	;EBX	source 1
+	;ECX	source 2
+	;EDX	source 3
+	;ESI	source 4
+	;EDI	destination
+	;ESP	source 5
+	;EBP	counter
+	;
+
+	add			ebp, 4
+	jz			.oddpixel_6coeff
+
+.pixelloop_6coeff_dualpel:
+	movq		xmm0, qword [eax]
+	movq		xmm1, qword [ebx]
+	movq		xmm2, qword [ecx]
+	movq		xmm3, qword [edx]
+	add			eax,8
+	add			ebx,8
+	add			ecx,8
+	add			edx,8
+	punpcklbw	xmm0, xmm1
+	punpcklbw	xmm2, xmm3
+	movdqa		xmm1, xmm0
+	movdqa		xmm3, xmm2
+	punpcklbw	xmm0, xmm7
+	punpckhbw	xmm1, xmm7
+	punpcklbw	xmm2, xmm7
+	punpckhbw	xmm3, xmm7
+	pmaddwd		xmm0, xmm4
+	pmaddwd		xmm1, xmm4
+	pmaddwd		xmm2, xmm5
+	pmaddwd		xmm3, xmm5
+	paddd		xmm0, xmm2
+	paddd		xmm1, xmm3
+
+	movq		xmm2, qword [esi]
+	movq		xmm3, qword [esp]
+	add			esi, 8
+	add			esp, 8
+	punpcklbw	xmm2, xmm3
+	movdqa		xmm3, xmm2
+	punpcklbw	xmm2, xmm7
+	punpckhbw	xmm3, xmm7
+	pmaddwd		xmm2, xmm6
+	pmaddwd		xmm3, xmm6
+	paddd		xmm0, xmm2
+	paddd		xmm1, xmm3
+	paddd		xmm0, [MMX_roundval]
+	paddd		xmm1, [MMX_roundval]
+	psrad		xmm0, 14
+	psrad		xmm1, 14
+	packssdw	xmm0, xmm1
+	packuswb	xmm0, xmm0
+	movq		qword [edi+ebp],xmm0
+	add			ebp, 8
+	jae			.pixelloop_6coeff_dualpel
+	jnz			.xit_6coeff
+
+.oddpixel_6coeff:
+	movd		xmm0, dword [eax]
+	movd		xmm1, dword [ebx]
+	movd		xmm2, dword [ecx]
+	movd		xmm3, dword [edx]
+	punpcklbw	xmm0, xmm1
+	punpcklbw	xmm2, xmm3
+	movd		xmm1, dword [esi]
+	movd		xmm3, dword [esp]
+	punpcklbw	xmm0, xmm7
+	punpcklbw	xmm2, xmm7
+	pmaddwd		xmm0, xmm4
+	punpcklbw	xmm1, xmm3
+	pmaddwd		xmm2, xmm5
+	punpcklbw	xmm1, xmm7
+	pmaddwd		xmm1, xmm6
+	paddd		xmm0, xmm2
+	paddd		xmm1, [MMX_roundval]
+	paddd		xmm0, xmm1
+	psrad		xmm0, 14
+	packssdw	xmm0, xmm0
+	packuswb	xmm0, xmm0
+	movd		dword [edi],xmm0
+
+.xit_6coeff:
+	mov			esp, dword [fs:0]
+	pop			dword [fs:0]
+	pop			eax
+	jmp			.xit
+
+
+	end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_resample_sse41.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_resample_sse41.asm
new file mode 100644
index 000000000..cf7332cb2
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_resample_sse41.asm
@@ -0,0 +1,358 @@
+		segment	.rdata, align=16
+
+round		dq		0000000000002000h
+colround	dq		0000200000002000h
+
+		segment	.text
+		
+		global		_vdasm_resize_table_row_8_k8_4x_SSE41
+_vdasm_resize_table_row_8_k8_4x_SSE41:
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		movq		xmm6, [round]
+		pshufd		xmm6, xmm6, 0
+
+		mov			ebp, [esp +  4 + 16]		;ebp = dst
+		mov			esi, [esp + 12 + 16]		;esi = width
+		mov			edi, [esp + 16 + 16]		;edi = kernel
+.yloop:
+		;eax = temp
+		;ebx = temp
+		;ecx = temp
+		;edx = temp
+		;esi = horiz counter
+		;edi = filter list
+		;ebp = destination
+
+		mov			eax, [edi+0]
+		mov			ebx, [edi+4]
+		mov			ecx, [edi+8]
+		mov			edx, [esp+8+16]
+		add			eax, edx
+		add			ebx, edx
+		add			ecx, edx
+		add			edx, [edi+12]
+
+		pmovzxbw	xmm0, [eax]
+		pmaddwd		xmm0, [edi+10h]
+		pmovzxbw	xmm1, [ebx]
+		pmaddwd		xmm1, [edi+20h]
+		pmovzxbw	xmm2, [ecx]
+		pmaddwd		xmm2, [edi+30h]
+		pmovzxbw	xmm3, [edx]
+		pmaddwd		xmm3, [edi+40h]
+		add			edi, 50h
+		phaddd		xmm0, xmm1
+		phaddd		xmm2, xmm3
+		phaddd		xmm0, xmm2
+		paddd		xmm0, xmm6
+		psrad		xmm0, 14
+		packssdw	xmm0, xmm0
+		packuswb	xmm0, xmm0
+		movd		[ebp], xmm0
+
+		add			ebp, 4
+		sub			esi, 1
+		jne			.yloop
+
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+
+		global		_vdasm_resize_table_row_8_k16_4x_SSE41
+_vdasm_resize_table_row_8_k16_4x_SSE41:
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		movq		xmm6, [round]
+		pshufd		xmm6, xmm6, 0
+
+		mov			ebp, [esp +  4 + 16]		;ebp = dst
+		mov			esi, [esp + 12 + 16]		;esi = width
+		mov			edi, [esp + 16 + 16]		;edi = kernel
+.yloop:
+		;eax = temp
+		;ebx = temp
+		;ecx = temp
+		;edx = temp
+		;esi = horiz counter
+		;edi = filter list
+		;ebp = destination
+
+		mov			eax, [edi+0]
+		mov			ebx, [edi+4]
+		mov			ecx, [edi+8]
+		mov			edx, [esp+8+16]
+		add			eax, edx
+		add			ebx, edx
+		add			ecx, edx
+		add			edx, [edi+12]
+
+		pmovzxbw	xmm0, [eax]
+		pmaddwd		xmm0, [edi+10h]
+		pmovzxbw	xmm1, [ebx]
+		pmaddwd		xmm1, [edi+20h]
+		pmovzxbw	xmm2, [ecx]
+		pmaddwd		xmm2, [edi+30h]
+		pmovzxbw	xmm3, [edx]
+		pmaddwd		xmm3, [edi+40h]
+		pmovzxbw	xmm4, [eax+8]
+		pmaddwd		xmm4, [edi+50h]
+		pmovzxbw	xmm5, [ebx+8]
+		pmaddwd		xmm5, [edi+60h]
+		paddd		xmm0, xmm4
+		pmovzxbw	xmm4, [ecx+8]
+		pmaddwd		xmm4, [edi+70h]
+		paddd		xmm1, xmm5
+		pmovzxbw	xmm5, [edx+8]
+		pmaddwd		xmm5, [edi+80h]
+		paddd		xmm2, xmm4
+		paddd		xmm3, xmm5
+		add			edi, 90h
+		phaddd		xmm0, xmm1
+		phaddd		xmm2, xmm3
+		phaddd		xmm0, xmm2
+		paddd		xmm0, xmm6
+		psrad		xmm0, 14
+		packssdw	xmm0, xmm0
+		packuswb	xmm0, xmm0
+		movd		[ebp], xmm0
+
+		add			ebp, 4
+		sub			esi, 1
+		jne			.yloop
+
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+
+		global		_vdasm_resize_table_row_8_SSE41
+_vdasm_resize_table_row_8_SSE41:
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		pxor		xmm7, xmm7
+		movq		xmm6, [round]
+
+		mov			edi, [esp +  4 + 16]		;edi = dst
+		mov			ebx, [esp +  8 + 16]		;ebx = src
+		mov			ebp, [esp + 12 + 16]		;ebp = width
+		mov			edx, [esp + 16 + 16]		;edx = kernel
+.yloop:
+		;eax = temp
+		;ebx = source base address
+		;ecx = (temp) source
+		;edx = filter list
+		;esi = (temp) kernel width
+		;edi = destination
+		;ebp = horiz counter
+
+		mov			eax, [edx]
+		add			edx, 16
+		lea			ecx, [ebx + eax]
+		mov			esi, [esp + 20 + 16]		;esi = kernel width
+
+		movq		xmm2, xmm6
+.xloop:
+		pmovzxbw	xmm0, [ecx]
+		add			ecx, 8
+		pmaddwd		xmm0, [edx]
+		paddd		xmm2, xmm0
+		add			edx, 16
+		sub			esi, 8
+		jne			.xloop
+
+		phaddd		xmm2, xmm2
+		phaddd		xmm2, xmm2
+		psrad		xmm2, 14
+		packssdw	xmm2, xmm2
+		packuswb	xmm2, xmm2
+		movd		eax, xmm2
+		mov			[edi], al
+		add			edi, 1
+		sub			ebp, 1
+		jne			.yloop
+
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+		
+
+		global		_vdasm_resize_table_col_8_k2_SSE41
+_vdasm_resize_table_col_8_k2_SSE41:
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		movq		xmm6, [colround]
+		pshufd		xmm6, xmm6, 0
+
+		mov			esi, [esp +  4 + 16]		;esi = dst
+		mov			edi, [esp + 16 + 16]		;edi = kernel
+		mov			ebp, [esp + 12 + 16]		;ebp = width
+
+		movq		xmm7, [edi]
+		pshufd		xmm7, xmm7, 0
+
+		mov			edx, [esp +  8 + 16]		;ebx = srcs
+		mov			eax, [edx+0]
+		mov			ebx, [edx+4]
+		add			eax, ebp
+		add			ebx, ebp
+		neg			ebp
+		
+.yloop:
+		;eax = row0
+		;ebx = row1
+		;ecx =
+		;edx =
+		;edi = kernel
+		;esi = dest
+		;ebp = width counter
+
+		movd		xmm0, [eax+ebp]
+		movd		xmm2, [ebx+ebp]
+		punpcklbw	xmm0, xmm2
+		pmovzxbw	xmm0, xmm0
+		pmaddwd		xmm0, xmm7
+
+		paddd		xmm0, xmm6
+
+		psrad		xmm0, 14
+		packssdw	xmm0, xmm0
+		packuswb	xmm0, xmm0
+		movd		[esi], xmm0
+		add			esi, 4
+		add			ebp, 4
+		jnz			.yloop
+
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+
+		global		_vdasm_resize_table_col_8_k4_SSE41
+_vdasm_resize_table_col_8_k4_SSE41:
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		movq		xmm7, [colround]
+		pshufd		xmm7, xmm7, 0
+
+		mov			esi, [esp +  4 + 16]		;esi = dst
+		mov			edi, [esp + 16 + 16]		;edi = kernel
+
+		movdqu		xmm6, [edi]
+		pshufd		xmm5, xmm6, 0
+		pshufd		xmm6, xmm6, 0aah
+
+		mov			edx, [esp +  8 + 16]		;ebx = srcs
+		mov			ebp, [esp + 12 + 16]
+		mov			eax, [edx+0]
+		mov			ebx, [edx+4]
+		mov			ecx, [edx+8]
+		mov			edx, [edx+12]
+		lea			eax, [eax+ebp-4]
+		lea			ebx, [ebx+ebp-4]
+		lea			ecx, [ecx+ebp-4]
+		lea			edx, [edx+ebp-4]
+		lea			esi, [esi+ebp-4]
+		neg			ebp
+		add			ebp,4
+		jz			.odd
+.yloop:
+		;eax = row0
+		;ebx = row1
+		;ecx = row2
+		;edx = row3
+		;edi = kernel
+		;esi = dest
+		;ebp = width counter
+
+		movd		xmm0, [eax+ebp]
+		movd		xmm1, [ebx+ebp]
+		punpcklbw	xmm0, xmm1
+
+		movd		xmm1, [ecx+ebp]
+		movd		xmm2, [edx+ebp]
+		punpcklbw	xmm1, xmm2
+
+		movd		xmm2, [eax+ebp+4]
+		movd		xmm3, [ebx+ebp+4]
+		punpcklbw	xmm2, xmm3
+		
+		movd		xmm3, [ecx+ebp+4]
+		movd		xmm4, [edx+ebp+4]
+		punpcklbw	xmm3, xmm4
+		
+		pmovzxbw	xmm0, xmm0
+		pmaddwd		xmm0, xmm5
+		
+		pmovzxbw	xmm1, xmm1
+		pmaddwd		xmm1, xmm6
+		
+		pmovzxbw	xmm2, xmm2
+		pmaddwd		xmm2, xmm5
+		
+		pmovzxbw	xmm3, xmm3
+		pmaddwd		xmm3, xmm6
+
+		paddd		xmm0, xmm1
+		paddd		xmm2, xmm3
+
+		paddd		xmm0, xmm7
+		paddd		xmm2, xmm7
+
+		psrad		xmm0, 14
+		psrad		xmm2, 14
+		
+		packssdw	xmm0, xmm2
+		packuswb	xmm0, xmm0
+		movq		[esi+ebp], xmm0
+		add			ebp, 8
+		js			.yloop
+		jnz			.noodd
+
+.odd:
+		movd		xmm0, [eax]
+		movd		xmm1, [ebx]
+		movd		xmm2, [ecx]
+		movd		xmm3, [edx]
+		punpcklbw	xmm0, xmm1
+		punpcklbw	xmm2, xmm3
+		pmovzxbw	xmm0, xmm0
+		pmovzxbw	xmm2, xmm2
+		pmaddwd		xmm0, xmm5
+		pmaddwd		xmm2, xmm6
+		paddd		xmm0, xmm2
+		paddd		xmm0, xmm7
+		psrad		xmm0, 14
+		packssdw	xmm0, xmm0
+		packuswb	xmm0, xmm0
+		movd		[esi], xmm0
+.noodd:
+
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+
+		end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_spanutils_isse.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_spanutils_isse.asm
new file mode 100644
index 000000000..3fe7cedbc
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_spanutils_isse.asm
@@ -0,0 +1,193 @@
+		section	.rdata, rdata, align=16
+
+xfefefefefefefefe	dq	0fefefefefefefefeh
+xe0e0e0e0e0e0e0e0	dq	0e0e0e0e0e0e0e0e0h
+x0002000200020002	dq	00002000200020002h
+
+		section	.text
+
+;==============================================================================
+		global _vdasm_horiz_expand2x_coaligned_ISSE
+_vdasm_horiz_expand2x_coaligned_ISSE:
+		mov			ecx, [esp+8]
+		mov			edx, [esp+4]
+		mov			eax, [esp+12]
+.xloop:
+		movq		mm0, [ecx]
+		movq		mm1, mm0
+		pavgb		mm0, [ecx+1]
+		movq		mm2, mm1
+		punpcklbw	mm1, mm0
+		punpckhbw	mm2, mm0
+
+		movq		[edx], mm1
+		movq		[edx+8], mm2
+		add			edx, 16
+		add			ecx, 8
+
+		sub			eax, 16
+		jne			.xloop
+		ret
+
+;==============================================================================
+		global	_vdasm_vert_average_13_ISSE
+_vdasm_vert_average_13_ISSE:
+		push	ebx
+		mov		ebx, [esp+12+4]
+		mov		ecx, [esp+8+4]
+		mov		edx, [esp+4+4]
+		mov		eax, [esp+16+4]
+
+		add		ebx, eax
+		add		ecx, eax
+		add		edx, eax
+		neg		eax
+
+		pcmpeqb	mm7, mm7
+.xloop:
+		movq	mm0, [ebx+eax]
+		movq	mm1, [ecx+eax]
+		movq	mm2, mm0
+
+		movq	mm3, [ebx+eax+8]
+		pxor	mm0, mm7
+		pxor	mm1, mm7
+
+		movq	mm4, [ecx+eax+8]
+		movq	mm5, mm3
+		pxor	mm3, mm7
+
+		pxor	mm4, mm7
+		pavgb	mm0, mm1
+		pavgb	mm3, mm4
+
+		pxor	mm0, mm7
+		pxor	mm3, mm7
+		pavgb	mm0, mm2
+
+		movq	[edx+eax], mm0
+		pavgb	mm3, mm5
+
+		movq	[edx+eax+8], mm3
+		add		eax, 16
+		jne		.xloop
+
+		pop		ebx
+		ret
+
+;==============================================================================
+		global	_vdasm_vert_average_17_ISSE
+_vdasm_vert_average_17_ISSE:
+		push	ebx
+		mov		ebx, [esp+12+4]
+		mov		ecx, [esp+8+4]
+		mov		edx, [esp+4+4]
+		mov		eax, [esp+16+4]
+
+		add		ebx, eax
+		add		ecx, eax
+		add		edx, eax
+		neg		eax
+
+		;r = avgup(avgdown(avgdown(a, b), a), a)
+		;  = pavgb(~pavgb(pavgb(~a, ~b), ~a), a)
+		
+		pcmpeqb		mm7, mm7
+.xloop:
+		movq		mm0, [ecx+eax]
+		movq		mm1, [ebx+eax]
+		movq		mm2, mm0
+		pxor		mm0, mm7			;~a
+		pxor		mm1, mm7			;~b
+		pavgb		mm1, mm0			;pavgb(~a, ~b) = ~avgdown(a, b)
+		pavgb		mm1, mm0			;pavgb(~avgdown(a, b), ~a) = ~avgdown(avgdown(a, b), a)
+		pxor		mm1, mm7			;avgdown(avgdown(a, b), a)
+		pavgb		mm1, mm2			;pavgb(avgdown(avgdown(a, b), a), a) = round((7*a + b)/8)
+		movq		[edx+eax], mm1
+		
+		add		eax, 8
+		jne		.xloop
+
+		pop		ebx
+		ret
+
+;==============================================================================
+		global	_vdasm_vert_average_35_ISSE
+_vdasm_vert_average_35_ISSE:
+		push	ebx
+		mov		ebx, [esp+12+4]
+		mov		ecx, [esp+8+4]
+		mov		edx, [esp+4+4]
+		mov		eax, [esp+16+4]
+
+		add		ebx, eax
+		add		ecx, eax
+		add		edx, eax
+		neg		eax
+
+		;r = avgup(avgdown(avgdown(a, b), b), a)
+		;  = pavgb(~pavgb(pavgb(~a, ~b), ~b), a)
+		
+		pcmpeqb		mm7, mm7
+.xloop:
+		movq		mm0, [ecx+eax]
+		movq		mm1, [ebx+eax]
+		movq		mm2, mm0
+		pxor		mm0, mm7		;~a
+		pxor		mm1, mm7		;~b
+		pavgb		mm0, mm1		;avgup(~a, ~b) = ~avgdown(a, b)
+		pavgb		mm0, mm1		;avgup(~avgdown(a, b), ~b) = ~avgdown(avgdown(a, b), b)
+		pxor		mm0, mm7		;avgdown(avgdown(a, b), b)
+		pavgb		mm0, mm2		;avgup(avgdown(avgdown(a, b), b), a) = round((5*a + 3*b) / 8)
+		movq		[edx+eax], mm0
+		
+		add		eax, 8
+		jne		.xloop
+
+		pop		ebx
+		ret
+
+;==============================================================================
+		global	_vdasm_horiz_expand4x_coaligned_MMX
+_vdasm_horiz_expand4x_coaligned_MMX:
+		mov			edx, [esp+4]
+		mov			ecx, [esp+8]
+		mov			eax, [esp+12]
+		movq		mm6, qword [x0002000200020002]
+		pxor		mm7, mm7
+.xloop:
+		movd		mm0, [ecx]
+		movd		mm1, [ecx+1]
+		add			ecx, 4
+		punpcklbw	mm0, mm7
+		punpcklbw	mm1, mm7
+		psubw		mm1, mm0		;x1
+		movq		mm2, mm1
+		paddw		mm1, mm6		;x1 + 2
+		movq		mm3, mm1
+		paddw		mm2, mm2		;x2
+		paddw		mm3, mm2		;x3 + 2
+		paddw		mm2, mm6		;x2 + 2
+		psraw		mm1, 2			;x1/4
+		psraw		mm2, 2			;x2/4
+		psraw		mm3, 2			;x3/4
+		paddw		mm1, mm0
+		paddw		mm2, mm0
+		paddw		mm3, mm0
+		movd		mm0, [ecx-4]
+		packuswb	mm1, mm1
+		packuswb	mm2, mm2
+		packuswb	mm3, mm3
+		punpcklbw	mm0, mm1
+		punpcklbw	mm2, mm3
+		movq		mm1, mm0
+		punpcklwd	mm0, mm2
+		punpckhwd	mm1, mm2
+		
+		movq		[edx], mm0
+		movq		[edx+8], mm1
+		add			edx, 16
+		sub			eax, 1
+		jne			.xloop
+		
+		ret
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_stretchrgb_mmx.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_stretchrgb_mmx.asm
new file mode 100644
index 000000000..3db442fa2
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_stretchrgb_mmx.asm
@@ -0,0 +1,326 @@
+		segment	.rdata, align=16
+
+x0020w			dq	00020002000200020h
+rb_mask_555		dq	07c1f7c1f7c1f7c1fh
+g_mask_555		dq	003e003e003e003e0h
+rb_mask_888		dq	000ff00ff00ff00ffh
+g_mask_888		dq	00000ff000000ff00h
+
+		segment	.text
+
+		struc	VDPixmapReferenceStretchBltBilinearParameters
+.dst		resd	1
+.src		resd	1
+.u			resd	1
+.uinc		resd	1
+.dudx		resd	1
+
+.xprepos	resd	1
+.xpostpos	resd	1
+.xprecopy	resd	1
+.xpostcopy	resd	1
+.xmidsize	resd	1
+		endstruc
+
+
+
+		global	_vdasm_stretchbltV_XRGB1555_to_XRGB1555_MMX
+_vdasm_stretchbltV_XRGB1555_to_XRGB1555_MMX:
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		mov			eax, [esp+20+16]
+		and			eax, 0f8000000h
+		mov			ebx, [esp+8+16]
+		mov			ecx, [esp+12+16]
+		jz			.noreverse
+		xchg		ebx, ecx
+		js			.noreverse
+		neg			eax
+		xchg		ebx, ecx
+.noreverse:
+		shr			eax, 16
+		mov			[esp+20+16], eax
+		mov			edx, [esp+4+16]
+		mov			eax, [esp+16+16]
+		add			eax, eax
+		lea			ebx, [ebx+eax-6]
+		lea			ecx, [ecx+eax-6]
+		lea			edx, [edx+eax-6]
+		neg			eax
+
+		movd		mm4, dword [esp+20+16]
+		punpcklwd	mm4, mm4
+		punpckldq	mm4, mm4
+
+		movq		mm6, [rb_mask_555]
+		movq		mm7, [g_mask_555]
+
+.xstart:
+		add			eax, 6
+		jbe			.doodd
+.xloop:
+		movq		mm0, [ebx+eax]
+		movq		mm1, [ecx+eax]
+		movq		mm2, mm7
+		movq		mm3, mm7
+
+		pand		mm2, mm0
+		pand		mm3, mm1
+		pand		mm0, mm6
+		pand		mm1, mm6
+
+		psubw		mm3, mm2
+		psubw		mm1, mm0
+
+		pmulhw		mm3, mm4
+		pmulhw		mm1, mm4
+
+		psubw		mm0, mm1
+		psubw		mm2, mm3
+
+		pand		mm0, mm6
+		pand		mm2, mm7
+
+		paddw		mm0, mm2
+
+		movq		[edx+eax], mm0
+		add			eax, 8
+		jnc			.xloop
+
+.doodd:
+		sub			eax, 6
+		jz			.noodd
+.odd:
+		movzx		esi, word [ebx+eax+6]
+		movd		mm0, esi
+		movzx		esi, word [ecx+eax+6]
+		movd		mm1, esi
+		movq		mm2, mm7
+		movq		mm3, mm7
+
+		pand		mm2, mm0
+		pand		mm3, mm1
+		pand		mm0, mm6
+		pand		mm1, mm6
+
+		psubw		mm3, mm2
+		psubw		mm1, mm0
+
+		pmulhw		mm3, mm4
+		pmulhw		mm1, mm4
+
+		psubw		mm0, mm1
+		psubw		mm2, mm3
+
+		pand		mm0, mm6
+		pand		mm2, mm7
+
+		paddw		mm0, mm2
+
+		movd		esi, mm0
+		mov			[edx+eax+6], si
+		add			eax,2
+		jne			.odd
+
+.noodd:
+		emms
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+
+
+	global	_vdasm_stretchbltH_XRGB8888_to_XRGB8888_MMX
+_vdasm_stretchbltH_XRGB8888_to_XRGB8888_MMX:
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		mov			edx, [esp+4+16]
+
+		mov			ebx, [edx+VDPixmapReferenceStretchBltBilinearParameters.src]
+		mov			edi, [edx+VDPixmapReferenceStretchBltBilinearParameters.dst]
+
+		mov			ecx, [edx+VDPixmapReferenceStretchBltBilinearParameters.xprecopy]
+		or			ecx, ecx
+		jz			.noprecopy
+		mov			eax, [edx+VDPixmapReferenceStretchBltBilinearParameters.xprepos]
+		mov			eax, [ebx+eax]
+		lea			ebp, [ecx*4]
+		sub			edi, ebp
+		rep			stosd
+.noprecopy:
+		mov			ebp, [edx+VDPixmapReferenceStretchBltBilinearParameters.xmidsize]
+		add			ebp, ebp
+		add			ebp, ebp
+		add			edi, ebp
+		neg			ebp
+
+		mov			esi, [edx+VDPixmapReferenceStretchBltBilinearParameters.u]
+		mov			eax, [edx+VDPixmapReferenceStretchBltBilinearParameters.dudx]
+		mov			edx, [edx+VDPixmapReferenceStretchBltBilinearParameters.uinc]
+		movd		mm2, esi
+		movd		mm3, eax
+		shr			ebx, 2
+
+		movq		mm5, mm2
+		punpcklwd	mm5, mm5
+		punpckhdq	mm5, mm5
+		movq		mm4, mm5
+		psraw		mm4, 15
+
+.xloop:
+		movd		mm0, dword [ebx*4]
+		pxor		mm7, mm7
+		movd		mm1, dword [ebx*4+4]
+		punpcklbw	mm0, mm7
+		punpcklbw	mm1, mm7
+		psubw		mm1, mm0
+		pand		mm4, mm1
+		pmulhw		mm1, mm5
+		paddw		mm1, mm4
+		paddw		mm0, mm1
+		packuswb	mm0, mm0
+		movd		dword [edi+ebp], mm0
+
+		add			esi, eax
+		adc			ebx, edx
+
+		paddd		mm2, mm3
+		movq		mm5, mm2
+		punpcklwd	mm5, mm5
+		punpckhdq	mm5, mm5
+		movq		mm4, mm5
+		psraw		mm4, 15
+		add			ebp, 4
+		jnz			.xloop
+
+		mov			edx, [esp+4+16]
+		mov			ecx, [edx+VDPixmapReferenceStretchBltBilinearParameters.xpostcopy]
+		or			ecx, ecx
+		jz			.nopostcopy
+		mov			eax, [edx+VDPixmapReferenceStretchBltBilinearParameters.xpostpos]
+		add			eax, [edx+VDPixmapReferenceStretchBltBilinearParameters.src]
+		mov			eax, [eax]
+		rep			stosd
+.nopostcopy:
+
+		emms
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+
+	global	_vdasm_stretchbltV_XRGB8888_to_XRGB8888_MMX
+_vdasm_stretchbltV_XRGB8888_to_XRGB8888_MMX:
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		mov			eax, [esp+20+16]
+		and			eax, 0ff000000h
+		mov			ebx, [esp+8+16]
+		mov			ecx, [esp+12+16]
+		jz			.noreverse
+		xchg		ebx, ecx
+		js			.noreverse
+		neg			eax
+		xchg		ebx, ecx
+.noreverse:
+		shr			eax, 16
+		mov			[esp+20+16], eax
+		mov			edx, [esp+4+16]
+		mov			eax, [esp+16+16]
+		add			eax, eax
+		add			eax, eax
+		lea			ebx, [ebx+eax-4]
+		lea			ecx, [ecx+eax-4]
+		lea			edx, [edx+eax-4]
+		neg			eax
+
+		movd		mm4, dword [esp+20+16]
+		punpcklwd	mm4, mm4
+		punpckldq	mm4, mm4
+
+		movq		mm6, [rb_mask_888]
+		movq		mm7, [g_mask_888]
+
+.xstart:
+		add			eax, 4
+		jbe			.doodd
+.xloop:
+		movq		mm0, [ebx+eax]
+		movq		mm1, [ecx+eax]
+		movq		mm2, mm0
+		movq		mm3, mm1
+		psrlw		mm2, 8
+		psrlw		mm3, 8
+		pand		mm0, mm6
+		pand		mm1, mm6
+
+		psubw		mm3, mm2
+		psubw		mm1, mm0
+
+		pmulhw		mm3, mm4
+		pmulhw		mm1, mm4
+
+		psubw		mm0, mm1
+		psubw		mm2, mm3
+
+		pand		mm0, mm6
+
+		psllw		mm2, 8
+
+		paddw		mm0, mm2
+
+		movq		qword [edx+eax], mm0
+		add			eax, 8
+		jnc			.xloop
+
+.doodd:
+		sub			eax, 4
+		jz			.noodd
+.odd:
+		movd		mm0, dword [ebx]
+		movd		mm1, dword [ecx]
+		movq		mm2, mm0
+		movq		mm3, mm1
+		psrlw		mm2, 8
+		psrlw		mm3, 8
+		pand		mm0, mm6
+		pand		mm1, mm6
+
+		psubw		mm3, mm2
+		psubw		mm1, mm0
+
+		pmulhw		mm3, mm4
+		pmulhw		mm1, mm4
+
+		psubw		mm0, mm1
+		psubw		mm2, mm3
+
+		pand		mm0, mm6
+
+		psllw		mm2, 8
+
+		paddw		mm0, mm2
+
+		movd		dword [edx], mm0
+
+.noodd:
+		emms
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+
+
+		end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_stretchrgb_point.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_stretchrgb_point.asm
new file mode 100644
index 000000000..dca765b92
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_stretchrgb_point.asm
@@ -0,0 +1,96 @@
+		segment	.text
+
+		struc	scaleinfo
+.dst		resd	1
+.src		resd	1
+.xaccum		resd	1
+.xfracinc	resd	1
+.xintinc	resd	1
+.count		resd	1
+		endstruc
+
+		global	_vdasm_resize_point32
+_vdasm_resize_point32:
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		eax, [esp+4+16]
+
+		mov		ebx, [eax+scaleinfo.xaccum]
+		mov		ecx, [eax+scaleinfo.xfracinc]
+		mov		edx, [eax+scaleinfo.src]
+		mov		esi, [eax+scaleinfo.xintinc]
+		mov		edi, [eax+scaleinfo.dst]
+		mov		ebp, [eax+scaleinfo.count]
+.xloop:
+		mov		eax,[edx*4]
+		add		ebx,ecx
+		adc		edx,esi
+		mov		[edi+ebp],eax
+		add		ebp,4
+		jne		.xloop
+
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+		ret
+
+		global	_vdasm_resize_point32_MMX
+_vdasm_resize_point32_MMX:
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		eax, [esp+4+16]
+
+		push	0
+		push	dword [fs:0]
+		mov		dword [fs:0], esp
+
+		mov		ebx, [eax+scaleinfo.xaccum]
+		mov		esp, [eax+scaleinfo.xfracinc]
+		mov		edx, [eax+scaleinfo.src]
+		mov		esi, [eax+scaleinfo.xintinc]
+		mov		edi, [eax+scaleinfo.dst]
+		mov		ebp, [eax+scaleinfo.count]
+
+		mov		eax, ebx
+		mov		ecx, edx
+		add		ebx, esp
+		adc		edx, esi
+		add		esp, esp
+		adc		esi, esi
+
+		add		ebp, 4
+		jz		.odd
+.dualloop:
+		movd		mm0, dword [ecx*4]
+		punpckldq	mm0,[edx*4]
+		add		eax,esp
+		adc		ecx,esi
+		add		ebx,esp
+		adc		edx,esi
+		movq	[edi+ebp-4],mm0
+
+		add		ebp,8
+		jnc		.dualloop
+		jnz		.noodd
+.odd:
+		mov		eax, [ecx*4]
+		mov		[edi-4], eax
+.noodd:
+		mov		esp, dword [fs:0]
+		pop		eax
+		pop		eax
+
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+		ret
+
+		end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_triblt.inc b/src/thirdparty/VirtualDub/Kasumi/source/a_triblt.inc
new file mode 100644
index 000000000..fb969c56f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_triblt.inc
@@ -0,0 +1,24 @@
+		struc	span
+.u		resd	1
+.v		resd	1
+		endstruc
+
+		struc	mipspan
+.u		resd	1
+.v		resd	1
+.lambda	resd	1
+		endstruc
+
+		struc	mipmap
+.bits	resd	1
+.pitch	resd	1
+.uvmul	resd	1
+		resd	1
+		endstruc
+
+		struc	texinfo
+.mips	resd	16*4
+.dst	resd	1
+.src	resd	1
+.w		resd	1
+		endstruc
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_mmx.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_mmx.asm
new file mode 100644
index 000000000..3836488aa
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_mmx.asm
@@ -0,0 +1,425 @@
+		segment	.rdata, align=16
+
+correct		dq			0000800000008000h
+round		dq			0000200000002000h
+round1		dq			0000020000000200h
+round2		dq			0002000000020000h
+
+		segment	.text
+
+		%include	"a_triblt.inc"
+
+		extern		_kVDCubicInterpTableFX14_075_MMX
+
+;--------------------------------------------------------------------------
+	global	_vdasm_triblt_span_bilinear_mmx
+_vdasm_triblt_span_bilinear_mmx:
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+		mov			edi,[esp+4+16]
+		mov			edx,[edi+texinfo.dst]
+		mov			ebp,[edi+texinfo.w]
+		shl			ebp,2
+		mov			ebx,[edi+texinfo.mips+mipmap.bits]
+		add			edx,ebp
+		mov			esi,[edi+texinfo.mips+mipmap.pitch]
+		neg			ebp
+		movd		mm6,[edi+texinfo.mips+mipmap.uvmul]
+		pxor		mm7,mm7
+		mov			edi,[edi+texinfo.src]
+.xloop:
+		movq		mm4,[edi]
+		movq		mm0,mm4
+		psrld		mm0,16
+		movq		mm5,mm4
+		packssdw	mm0,mm0
+		pmaddwd		mm0,mm6
+		add			edi,8
+		punpcklwd	mm4,mm4
+		punpckldq	mm4,mm4
+		movd		ecx,mm0
+		add			ecx,ebx
+		psrlw		mm4,1
+		movd		mm0,dword [ecx]
+		movd		mm1,dword [ecx+4]
+		punpcklbw	mm0,mm7
+		movd		mm2,dword [ecx+esi]
+		punpcklbw	mm1,mm7
+		movd		mm3,dword [ecx+esi+4]
+		punpcklbw	mm2,mm7
+		punpcklbw	mm3,mm7
+		psubw		mm1,mm0
+		psubw		mm3,mm2
+		paddw		mm1,mm1
+		paddw		mm3,mm3
+		pmulhw		mm1,mm4
+		pmulhw		mm3,mm4
+		punpckhwd	mm5,mm5
+		punpckldq	mm5,mm5
+		paddw		mm0,mm1
+		psrlw		mm5,1
+		paddw		mm2,mm3
+		psubw		mm2,mm0
+		paddw		mm2,mm2
+		pmulhw		mm2,mm5
+		paddw		mm0,mm2
+		packuswb	mm0,mm0
+		movd		dword [edx+ebp],mm0
+		add			ebp,4
+		jnc			.xloop
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		emms
+		ret
+		
+;--------------------------------------------------------------------------
+	global	_vdasm_triblt_span_trilinear_mmx
+_vdasm_triblt_span_trilinear_mmx:
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+		mov			esi,[esp+4+16]
+		mov			edx,[esi+texinfo.dst]
+		mov			ebp,[esi+texinfo.w]
+		shl			ebp,2
+		add			edx,ebp
+		neg			ebp
+		mov			edi,[esi+texinfo.src]
+		pxor		mm7,mm7
+.xloop:
+		movd		mm6,[edi+mipspan.u]
+		punpckldq	mm6,[edi+mipspan.v]
+		mov			eax,[edi+mipspan.lambda]
+		shr			eax,4
+		and			eax,byte -16
+		movd		mm2,eax
+		psrlq		mm2,4
+		psrld		mm6,mm2
+		paddd		mm6,[correct]
+
+		;fetch mipmap 1
+		mov			ebx,[esi+eax+mipmap.pitch]
+		movd		mm1,[esi+eax+mipmap.uvmul]
+		movq		mm4,mm6
+		movq		mm0,mm6
+		psrld		mm0,16
+		packssdw	mm0,mm0
+		pmaddwd		mm0,mm1
+		movq		mm5,mm4
+		punpcklwd	mm4,mm4
+		punpckldq	mm4,mm4
+		punpckhwd	mm5,mm5
+		punpckldq	mm5,mm5
+		movd		ecx,mm0
+		add			ecx,[esi+eax+mipmap.bits]
+		psrlw		mm4,1
+		movd		mm0,dword [ecx]
+		movd		mm1,dword [ecx+4]
+		punpcklbw	mm0,mm7
+		movd		mm2,dword [ecx+ebx]
+		punpcklbw	mm1,mm7
+		movd		mm3,dword [ecx+ebx+4]
+		punpcklbw	mm2,mm7
+		punpcklbw	mm3,mm7
+		psubw		mm1,mm0
+		psubw		mm3,mm2
+		paddw		mm1,mm1
+		paddw		mm3,mm3
+		pmulhw		mm1,mm4
+		pmulhw		mm3,mm4
+		paddw		mm0,mm1
+		psrlw		mm5,1
+		paddw		mm2,mm3
+		psubw		mm2,mm0
+		paddw		mm2,mm2
+		pmulhw		mm2,mm5
+		paddw		mm0,mm2
+
+		;fetch mipmap 2
+		mov			ebx,[esi+eax+16+mipmap.pitch]
+		movd		mm1,[esi+eax+16+mipmap.uvmul]
+		paddd		mm6,[correct]
+		psrld		mm6,1
+		movq		mm4,mm6
+		psrld		mm6,16
+		packssdw	mm6,mm6
+		pmaddwd		mm6,mm1
+		movq		mm5,mm4
+		punpcklwd	mm4,mm4
+		punpckldq	mm4,mm4
+		punpckhwd	mm5,mm5
+		punpckldq	mm5,mm5
+		movd		ecx,mm6
+		add			ecx,[esi+eax+16+mipmap.bits]
+		psrlw		mm4,1
+		movd		mm6,dword [ecx]
+		movd		mm1,dword [ecx+4]
+		punpcklbw	mm6,mm7
+		movd		mm2,dword [ecx+ebx]
+		punpcklbw	mm1,mm7
+		movd		mm3,dword [ecx+ebx+4]
+		punpcklbw	mm2,mm7
+		punpcklbw	mm3,mm7
+		psubw		mm1,mm6
+		psubw		mm3,mm2
+		paddw		mm1,mm1
+		paddw		mm3,mm3
+		pmulhw		mm1,mm4
+		pmulhw		mm3,mm4
+		paddw		mm6,mm1
+		psrlw		mm5,1
+		paddw		mm2,mm3
+		psubw		mm2,mm6
+		paddw		mm2,mm2
+		pmulhw		mm2,mm5
+		paddw		mm6,mm2
+
+		;blend mips
+		movd		mm1,[edi+mipspan.lambda]
+		punpcklwd	mm1,mm1
+		punpckldq	mm1,mm1
+		psllw		mm1,8
+		psrlq		mm1,1
+		psubw		mm6,mm0
+		paddw		mm6,mm6
+		pmulhw		mm6,mm1
+		paddw		mm0,mm6
+		packuswb	mm0,mm0
+
+		movd		dword [edx+ebp],mm0
+		add			edi, mipspan_size
+		add			ebp,4
+		jnc			.xloop
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		emms
+		ret
+
+;--------------------------------------------------------------------------
+%macro .SETUPADDR 1
+		;compute mipmap index and UV
+		movd		mm0, [edi + mipspan.u]
+		punpckldq	mm0, [edi + mipspan.v]
+		mov			ebx, [edi + mipspan.lambda]
+		shr			ebx, 4
+		and			ebx, byte -16
+		
+		add			ebx, mipmap_size*%1
+		movd		mm2, ebx
+		add			ebx, [esp + .af_mipbase]
+		psrlq		mm2, 4
+		psrad		mm0, mm2
+		paddd		mm0, [correct]
+		movq		mm1, mm0
+		psrlq		mm1, 32
+
+		;compute horizontal filters
+		movd		ecx, mm0
+		shr			ecx, 4
+		and			ecx, 0ff0h
+		add			ecx, _kVDCubicInterpTableFX14_075_MMX
+		
+		;compute vertical filter
+		movd		edx, mm1
+		and			edx, 0ff00h
+		shr			edx, 4
+		add			edx, _kVDCubicInterpTableFX14_075_MMX
+
+		;compute texel address
+		movd		mm1, [ebx + mipmap.uvmul]
+		psrld		mm0, 16
+		packssdw	mm0, mm0
+		pmaddwd		mm0, mm1
+		movd		eax, mm0
+		add			eax, [ebx + mipmap.bits]
+%endmacro
+		
+%macro .HCUBIC 4
+		movd		%1, dword [eax]
+		punpcklbw	%1, qword [eax+4]
+		movd		%3, dword [eax+8]
+		punpcklbw	%3, qword [eax+12]
+		movq		%2, %1
+		movq		%4, %3
+		punpcklbw	%1, mm7
+		pmaddwd		%1, [ecx]
+		punpcklbw	%3, mm7
+		pmaddwd		%3, [ecx+8]
+		punpckhbw	%2, mm7
+		pmaddwd		%2, [ecx]
+		punpckhbw	%4, mm7
+		pmaddwd		%4, [ecx+8]
+		paddd		%1, %3
+		paddd		%2, %4
+%endmacro
+
+%macro	.VCUBIC		1
+		.HCUBIC		mm0, mm1, mm2, mm3
+		add			eax, %1
+
+		.HCUBIC		mm4, mm5, mm2, mm3
+		add			eax, %1
+		
+		movq		mm2, [round1]
+		
+		paddd		mm0, mm2
+		paddd		mm1, mm2
+		paddd		mm4, mm2
+		paddd		mm5, mm2
+
+		psrad		mm0, 10
+		psrad		mm1, 10
+		psrad		mm4, 10
+		psrad		mm5, 10
+		
+		packssdw	mm0, mm0
+		packssdw	mm1, mm1
+		packssdw	mm4, mm4
+		packssdw	mm5, mm5
+				
+		punpcklwd	mm0, mm4
+		punpcklwd	mm1, mm5
+		
+		movq		mm3, [edx]
+		
+		pmaddwd		mm0, mm3
+		pmaddwd		mm1, mm3
+		
+		movq		[esp + .af_htemp0], mm0
+		movq		[esp + .af_htemp1], mm1
+		
+		.HCUBIC		mm0, mm1, mm2, mm3
+		add			eax, %1
+		.HCUBIC		mm4, mm5, mm2, mm3
+
+		movq		mm2, [round1]
+		
+		paddd		mm0, mm2
+		paddd		mm1, mm2
+		paddd		mm4, mm2
+		paddd		mm5, mm2
+
+		psrad		mm0, 10
+		psrad		mm1, 10
+		psrad		mm4, 10
+		psrad		mm5, 10
+		
+		packssdw	mm0, mm0
+		packssdw	mm1, mm1
+		packssdw	mm4, mm4
+		packssdw	mm5, mm5
+				
+		punpcklwd	mm0, mm4
+		punpcklwd	mm1, mm5
+
+		movq		mm2, [round2]		
+		movq		mm3, [edx + 8]
+		
+		pmaddwd		mm0, mm3
+		pmaddwd		mm1, mm3
+		
+		paddd		mm0, [esp + .af_htemp0]
+		paddd		mm1, [esp + .af_htemp1]
+		
+		paddd		mm0, mm2
+		paddd		mm1, mm2
+		
+		psrad		mm0, 18
+		psrad		mm1, 18
+		packssdw	mm0, mm1
+%endmacro
+
+	global	_vdasm_triblt_span_bicubic_mip_linear_mmx
+_vdasm_triblt_span_bicubic_mip_linear_mmx:
+
+;parameters
+%define .p_texinfo	20
+
+;aligned frame
+%define .af_htemp0	0
+%define .af_htemp1	8
+%define .af_vtemp0	16
+%define .af_mipbase	24
+%define	.af_prevesp	28
+%define .afsize		32
+
+		push		ebp
+		lea			ebp, [esp-12]
+		push		edi
+		push		esi
+		push		ebx
+		
+		sub			esp, .afsize
+		and			esp, -8
+		
+		mov			[esp + .af_prevesp], ebp
+		
+		mov			ebx, [ebp + .p_texinfo]
+		mov			ebp, [ebx + texinfo.dst]
+		mov			esi, [ebx + texinfo.w]
+		shl			esi, 2
+		add			ebp,esi
+		neg			esi
+
+		mov			edi, [ebx + texinfo.src]
+		mov			[esp + .af_mipbase], ebx
+		pxor		mm7, mm7
+
+.xloop:
+
+		;registers:
+		;	eax		base texel address
+		;	ebx		first mip info
+		;	ecx		horizontal filter
+		;	edx		vertical filter
+		;	esi		horizontal count
+		;	edi		mipspan
+		;	ebp		destination
+
+		;fetch mipmap 1
+		.SETUPADDR	0
+		.VCUBIC		[ebx+mipmap.pitch]
+		
+		movq		[esp + .af_vtemp0], mm0
+
+		;fetch mipmap 2		
+		.SETUPADDR	1
+		.VCUBIC		[ebx+mipmap.pitch]
+		
+		;blend mips
+		movq		mm1, [esp + .af_vtemp0]
+		
+		psubw		mm0, mm1
+
+		movd		mm3,[edi+mipspan.lambda]
+		punpcklwd	mm3,mm3
+		punpckldq	mm3,mm3
+		psllw		mm3,8
+		psrlq		mm3,1
+		
+		paddw		mm0,mm0
+		pmulhw		mm0,mm3
+		paddw		mm0,mm1
+		packuswb	mm0,mm0
+
+		movd		dword [ebp+esi],mm0
+		add			edi, mipspan_size
+		add			esi,4
+		jnc			.xloop
+
+		mov			esp, [esp + .af_prevesp]
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		emms
+		ret
+
+		end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_scalar.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_scalar.asm
new file mode 100644
index 000000000..c550634f3
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_scalar.asm
@@ -0,0 +1,36 @@
+		segment	.text
+
+		%include	"a_triblt.inc"
+
+	global	_vdasm_triblt_span_point
+_vdasm_triblt_span_point:
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+		mov		eax,[esp+4+16]
+		mov		ebp,[eax+texinfo.w]
+		mov		ebx,[eax+texinfo.mips+mipmap.pitch]
+		shl		ebp,2
+		mov		edi,[eax+texinfo.src]
+		mov		edx,[eax+texinfo.dst]
+		mov		ecx,[eax+texinfo.mips+mipmap.bits]
+		sar		ebx,2
+		add		edx,ebp
+		neg		ebp
+.xloop:
+		mov		eax,[edi+span.v]
+		imul	eax,ebx
+		add		eax,[edi+span.u]
+		add		edi,8
+		mov		eax,[ecx+eax*4]
+		mov		[edx+ebp],eax
+		add		ebp,4
+		jnc		.xloop
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+		ret
+
+		end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_sse2.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_sse2.asm
new file mode 100644
index 000000000..54514b317
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_sse2.asm
@@ -0,0 +1,197 @@
+		segment	.rdata, align=16
+
+correct		dq			0000800000008000h, 0000800000008000h
+round		dq			0000200000002000h, 0000200000002000h
+round1		dq			0000020000000200h, 0000020000000200h
+round2		dq			0002000000020000h, 0002000000020000h
+
+		segment	.text
+
+		%include	"a_triblt.inc"
+
+		extern		_kVDCubicInterpTableFX14_075_MMX
+
+;--------------------------------------------------------------------------
+	global	_vdasm_triblt_span_bicubic_mip_linear_sse2
+_vdasm_triblt_span_bicubic_mip_linear_sse2:
+
+;parameters
+%define .p_texinfo	20
+
+;aligned frame
+%define .af_vtemp0	0
+%define .af_mipbase	16
+%define	.af_prevesp	20
+%define .afsize		24
+
+		push		ebp
+		lea			ebp, [esp-12]
+		push		edi
+		push		esi
+		push		ebx
+		
+		sub			esp, .afsize
+		and			esp, -16
+		
+		mov			[esp + .af_prevesp], ebp
+		
+		mov			ebx, [ebp + .p_texinfo]
+		mov			ebp, [ebx + texinfo.dst]
+		mov			esi, [ebx + texinfo.w]
+		shl			esi, 2
+		add			ebp,esi
+		neg			esi
+
+		mov			edi, [ebx + texinfo.src]
+		mov			[esp + .af_mipbase], ebx
+		pxor		xmm7, xmm7
+
+.xloop:
+
+		;registers:
+		;	eax		base texel address
+		;	ebx		first mip info
+		;	ecx		horizontal filter
+		;	edx		vertical filter
+		;	esi		horizontal count
+		;	edi		mipspan
+		;	ebp		destination
+
+%macro .SETUPADDR 1
+		;compute mipmap index and UV
+		movd		xmm0, [edi + mipspan.u]
+		movd		xmm1, [edi + mipspan.v]
+		punpckldq	xmm0, xmm1
+		mov			ebx, [edi + mipspan.lambda]
+		shr			ebx, 4
+		and			ebx, byte -16
+		
+		add			ebx, mipmap_size*%1
+		movd		xmm2, ebx
+		add			ebx, [esp + .af_mipbase]
+		psrlq		xmm2, 4
+		psrad		xmm0, xmm2
+		paddd		xmm0, [correct]
+		pshufd		xmm1, xmm0, 01010101b
+
+		;compute horizontal filters
+		movd		ecx, xmm0
+		shr			ecx, 4
+		and			ecx, 0ff0h
+		add			ecx, _kVDCubicInterpTableFX14_075_MMX
+		
+		;compute vertical filter
+		movd		edx, xmm1
+		and			edx, 0ff00h
+		shr			edx, 4
+		add			edx, _kVDCubicInterpTableFX14_075_MMX
+
+		;compute texel address
+		movd		xmm1, [ebx + mipmap.uvmul]
+		psrld		xmm0, 16
+		packssdw	xmm0, xmm0
+		pmaddwd		xmm0, xmm1
+		movd		eax, xmm0
+		add			eax, [ebx + mipmap.bits]
+%endmacro
+		
+%macro .HCUBIC 4
+		movd		%1, dword [eax]
+		movd		%3, dword [eax+4]
+		movd		%2, dword [eax+8]
+		movd		%4, dword [eax+12]		
+		punpcklbw	%1, %3
+		punpcklbw	%2, %4
+		punpcklbw	%1, xmm7
+		punpcklbw	%2, xmm7
+		movdqa		%3, [ecx]
+		pshufd		%4, %3, 11101110b
+		pshufd		%3, %3, 01000100b
+		pmaddwd		%1, %3
+		pmaddwd		%2, %4
+		paddd		%1, %2
+%endmacro
+
+%macro	.VCUBIC		1
+		.HCUBIC		xmm0, xmm4, xmm5, xmm6
+		add			eax, %1		
+		.HCUBIC		xmm1, xmm4, xmm5, xmm6
+		add			eax, %1
+		.HCUBIC		xmm2, xmm4, xmm5, xmm6
+		add			eax, %1		
+		.HCUBIC		xmm3, xmm4, xmm5, xmm6
+		
+		movq		xmm4, [round1]
+		
+		paddd		xmm0, xmm4
+		
+		paddd		xmm1, xmm4
+		psrad		xmm0, 10
+		
+		paddd		xmm2, xmm4
+		psrad		xmm1, 10
+		packssdw	xmm0, xmm0
+		
+		paddd		xmm3, xmm4
+		psrad		xmm2, 10
+		packssdw	xmm1, xmm1
+
+		movdqa		xmm5, [edx]
+		psrad		xmm3, 10		
+		punpcklwd	xmm0, xmm1
+
+		packssdw	xmm2, xmm2
+		packssdw	xmm3, xmm3
+		pshufd		xmm4, xmm5, 01000100b				
+
+		pmaddwd		xmm0, xmm4
+		punpcklwd	xmm2, xmm3
+
+		pshufd		xmm5, xmm5, 11101110b
+		
+		pmaddwd		xmm2, xmm5
+		paddd		xmm0, xmm2
+		paddd		xmm0, [round2]
+		psrad		xmm0, 18
+
+		packssdw	xmm0, xmm0
+%endmacro
+
+		;fetch mipmap 1
+		.SETUPADDR	0
+		.VCUBIC		[ebx+mipmap.pitch]
+		
+		movq		[esp + .af_vtemp0], xmm0
+
+		;fetch mipmap 2		
+		.SETUPADDR	1
+		.VCUBIC		[ebx+mipmap.pitch]
+		
+		;blend mips
+		movq		xmm1, [esp + .af_vtemp0]
+		
+		psubw		xmm0, xmm1
+
+		movd		xmm3, [edi+mipspan.lambda]
+		pshuflw		xmm3, xmm3, 0
+		psllw		xmm3, 8
+		psrlq		xmm3, 1
+		
+		paddw		xmm0, xmm0
+		pmulhw		xmm0, xmm3
+		paddw		xmm0, xmm1
+		packuswb	xmm0, xmm0
+
+		movd		dword [ebp+esi], xmm0
+		add			edi, mipspan_size
+		add			esi,4
+		jnc			.xloop
+
+		mov			esp, [esp + .af_prevesp]
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+
+		end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/alphablt.cpp b/src/thirdparty/VirtualDub/Kasumi/source/alphablt.cpp
new file mode 100644
index 000000000..a292ca2bd
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/alphablt.cpp
@@ -0,0 +1,76 @@
+#include <vd2/system/math.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/Kasumi/pixmapops.h>
+
+void VDPixmapBltAlphaConst8(uint8 *dst, ptrdiff_t dstpitch, const uint8 *src, ptrdiff_t srcpitch, uint32 w, uint32 h, uint32 ialpha);
+
+bool VDPixmapBltAlphaConst(const VDPixmap& dst, const VDPixmap& src, float alpha) {
+	if (!(alpha >= 0.0f))
+		alpha = 0.0f;
+	else if (!(alpha <= 1.0f))
+		alpha = 1.0f;
+
+	uint32 ialpha = VDRoundToInt32(alpha * 256.0f);
+
+	// format check
+	if (dst.format != src.format || !src.format)
+		return false;
+
+	// degenerate case check
+	if (!dst.w || !dst.h)
+		return false;
+
+	// size check
+	if (src.w != dst.w || src.h != dst.h)
+		return false;
+
+	// check for formats that are not 8bpp
+	switch(src.format) {
+		case nsVDPixmap::kPixFormat_Pal1:
+		case nsVDPixmap::kPixFormat_Pal2:
+		case nsVDPixmap::kPixFormat_Pal4:
+		case nsVDPixmap::kPixFormat_Pal8:
+		case nsVDPixmap::kPixFormat_RGB565:
+		case nsVDPixmap::kPixFormat_XRGB1555:
+			return false;
+	}
+
+	const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(src.format);
+
+	const int qw = -(-dst.w >> formatInfo.qwbits);
+	const int qh = -(-dst.h >> formatInfo.qhbits);
+	const int auxw = -(-dst.w >> formatInfo.auxwbits);
+	const int auxh = -(-dst.h >> formatInfo.auxhbits);
+
+	switch(formatInfo.auxbufs) {
+	case 2:
+		VDPixmapBltAlphaConst8((uint8 *)dst.data3, dst.pitch3, (const uint8 *)src.data3, src.pitch3, auxw, auxh, ialpha);
+	case 1:
+		VDPixmapBltAlphaConst8((uint8 *)dst.data2, dst.pitch2, (const uint8 *)src.data2, src.pitch2, auxw, auxh, ialpha);
+	case 0:
+		VDPixmapBltAlphaConst8((uint8 *)dst.data, dst.pitch, (const uint8 *)src.data, src.pitch, formatInfo.qsize * qw, qh, ialpha);
+	}
+
+	return true;
+}
+
+void VDPixmapBltAlphaConst8(uint8 *dst, ptrdiff_t dstpitch, const uint8 *src, ptrdiff_t srcpitch, uint32 w, uint32 h, uint32 ialpha) {
+	dstpitch -= w;
+	srcpitch -= w;
+	do {
+		uint32 w2 = w;
+		do {
+			sint32 sc = *src;
+			sint32 dc = *dst;
+
+			*dst = dc + (((sc-dc)*ialpha + 128) >> 8);
+			++src;
+			++dst;
+		} while(--w2);
+
+		src += srcpitch;
+		dst += dstpitch;
+	} while(--h);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt.cpp
new file mode 100644
index 000000000..75e5542a9
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt.cpp
@@ -0,0 +1,273 @@
+#include <vector>
+#include <vd2/system/memory.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/Kasumi/pixmapops.h>
+
+#if _MSC_VER >= 1300
+	#define VDNOINLINE __declspec(noinline)
+#else
+	#define VDNOINLINE
+#endif
+
+using namespace nsVDPixmap;
+
+namespace {
+	typedef void (*tpPalettedBlitter)(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h, const void *pal);
+	typedef void (*tpChunkyBlitter)(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+	typedef void (*tpPlanarBlitter)(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+}
+
+bool VDPixmapBltDirect(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+
+void VDPixmapBltDirectPalettedConversion(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h, tpPalettedBlitter pBlitter) {
+	uint8 palbytes[256 * 3];
+
+	int palsize;
+
+	switch(src.format) {
+	case kPixFormat_Pal1:
+		palsize = 2;
+		break;
+	case kPixFormat_Pal2:
+		palsize = 4;
+		break;
+	case kPixFormat_Pal4:
+		palsize = 16;
+		break;
+	case kPixFormat_Pal8:
+		palsize = 256;
+		break;
+	default:
+		VDNEVERHERE;
+	}
+
+	VDASSERT(src.palette);
+
+	VDPixmap srcpal = { (void *)src.palette, NULL, palsize, 1, 0, kPixFormat_XRGB8888 };
+	VDPixmap dstpal = { palbytes, NULL, palsize, 1, 0, dst.format };
+
+	VDVERIFY(VDPixmapBltDirect(dstpal, srcpal, palsize, 1));
+
+	pBlitter(dst.data, dst.pitch, src.data, src.pitch, w, h, palbytes);
+}
+
+tpVDPixBltTable VDPixmapGetBlitterTable() {
+#if defined(_WIN32) && defined(_M_IX86)
+	static tpVDPixBltTable pBltTable;
+	
+	if (CPUGetEnabledExtensions() & CPUF_SUPPORTS_MMX) {
+		return VDGetPixBltTableX86MMX();
+	} else {
+		return VDGetPixBltTableX86Scalar();
+	}
+#else
+	static tpVDPixBltTable pBltTable = VDGetPixBltTableReference();
+	return pBltTable;
+#endif
+}
+
+bool VDPixmapBltDirect(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h) {
+	if ((unsigned)src.format >= kPixFormat_Max_Standard) {
+		VDASSERT(false);
+		return false;
+	}
+
+	if ((unsigned)dst.format >= kPixFormat_Max_Standard) {
+		VDASSERT(false);
+		return false;
+	}
+
+	const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(src.format);
+
+	if (src.format == dst.format) {
+		int qw = w;
+		int qh = h;
+
+		if (srcinfo.qchunky) {
+			qw = (qw + srcinfo.qw - 1) / srcinfo.qw;
+			qh = -(-h >> srcinfo.qhbits);
+		}
+
+		const int auxw = -(-w >> srcinfo.auxwbits);
+		const int auxh = -(-h >> srcinfo.auxhbits);
+
+		switch(srcinfo.auxbufs) {
+		case 2:
+			VDMemcpyRect(dst.data3, dst.pitch3, src.data3, src.pitch3, srcinfo.auxsize * auxw, auxh);
+		case 1:
+			VDMemcpyRect(dst.data2, dst.pitch2, src.data2, src.pitch2, srcinfo.auxsize * auxw, auxh);
+		case 0:
+			VDMemcpyRect(dst.data, dst.pitch, src.data, src.pitch, srcinfo.qsize * qw, qh);
+		}
+
+		return true;
+	}
+
+	VDPixmapBlitterFn pBlitter = VDPixmapGetBlitterTable()[src.format][dst.format];
+
+	if (!pBlitter)
+		return false;
+
+	pBlitter(dst, src, w, h);
+	return true;
+}
+
+bool VDPixmapIsBltPossible(int dst_format, int src_format) {
+	if (src_format == dst_format)
+		return true;
+
+	tpVDPixBltTable tab(VDPixmapGetBlitterTable());
+
+	if (tab[src_format][dst_format])
+		return true;
+
+	const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(src_format);
+	const VDPixmapFormatInfo& dstinfo = VDPixmapGetInfo(dst_format);
+
+	if (srcinfo.auxbufs > 0 || dstinfo.auxbufs > 0)
+		return false;		// fail, planar buffers involved (can't do scanlines independently)
+
+	return 	  (tab[src_format][kPixFormat_YUV444_XVYU] && tab[kPixFormat_YUV444_XVYU][dst_format])
+			||(tab[src_format][kPixFormat_XRGB8888] && tab[kPixFormat_XRGB8888][dst_format]);
+}
+
+bool VDNOINLINE VDPixmapBltTwoStage(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h) {
+	const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(src.format);
+	const VDPixmapFormatInfo& dstinfo = VDPixmapGetInfo(dst.format);
+
+	if (srcinfo.auxbufs > 0 || dstinfo.auxbufs > 0)
+		return false;		// fail, planar buffers involved
+
+	if (srcinfo.qh > 1)
+		return false;		// fail, vertically packed formats involved
+
+	if (srcinfo.palsize)
+		return false;		// fail, paletted formats involved
+
+	// Allocate a 4xW buffer and try round-tripping through either
+	// RGB32 or XYVU.
+	vdblock<uint32>		tempBuf;
+	
+	tpVDPixBltTable tab(VDPixmapGetBlitterTable());
+
+	VDPixmap linesrc(src);
+	VDPixmap linedst(dst);
+	VDPixmap linetmp = {};
+
+	if (w < 1024) {
+		linetmp.data = _alloca(sizeof(uint32) * w);
+	} else {
+		tempBuf.resize(w + 1);
+		linetmp.data = tempBuf.data();
+	}
+	linetmp.pitch = 0;
+	linetmp.format = kPixFormat_YUV444_XVYU;
+	linetmp.w = w;
+	linetmp.h = 1;
+
+	VDPixmapBlitterFn pb1 = tab[src.format][kPixFormat_YUV444_XVYU];
+	VDPixmapBlitterFn pb2 = tab[kPixFormat_YUV444_XVYU][dst.format];
+	if (!pb1 || !pb2) {
+		pb1 = tab[src.format][kPixFormat_XRGB8888];
+		pb2 = tab[kPixFormat_XRGB8888][dst.format];
+		if (!pb1 || !pb2)
+			return false;
+
+		linetmp.format = kPixFormat_XRGB8888;
+	}
+
+	do {
+		pb1(linetmp, linesrc, w, 1);
+		pb2(linedst, linetmp, w, 1);
+		vdptrstep(linesrc.data, linesrc.pitch);
+		vdptrstep(linedst.data, linedst.pitch);
+	} while(--h);
+	return true;
+}
+
+bool VDPixmapBltFast(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h) {
+	if (VDPixmapBltDirect(dst, src, w, h))
+		return true;
+
+	// Oro... let's see if we can do a two-stage conversion.
+	return VDPixmapBltTwoStage(dst, src, w, h);
+}
+
+bool VDPixmapBlt(const VDPixmap& dst, const VDPixmap& src) {
+	vdpixsize w = std::min<vdpixsize>(src.w, dst.w);
+	vdpixsize h = std::min<vdpixsize>(src.h, dst.h);
+
+	if (!w || !h)
+		return true;
+
+	return VDPixmapBltFast(dst, src, w, h);
+}
+
+bool VDPixmapBlt(const VDPixmap& dst, vdpixpos x1, vdpixpos y1, const VDPixmap& src, vdpixpos x2, vdpixpos y2, vdpixsize w, vdpixsize h) {
+	if (x1 < 0) {
+		x2 -= x1;
+		w -= x1;
+		x1 = 0;
+	}
+
+	if (y1 < 0) {
+		y2 -= y1;
+		h -= y1;
+		y1 = 0;
+	}
+
+	if (x2 < 0) {
+		x1 -= x2;
+		w -= x2;
+		x2 = 0;
+	}
+
+	if (y2 < 0) {
+		y1 -= y2;
+		h -= y2;
+		y2 = 0;
+	}
+
+	if (w > dst.w - x1)
+		w = dst.w - x1;
+
+	if (h > dst.h - y1)
+		h = dst.h - y1;
+
+	if (w > src.w - x2)
+		w = src.w - x2;
+
+	if (h > src.h - y2)
+		h = src.h - y2;
+
+	if (w>=0 && h >= 0) {
+		VDPixmap dst2(VDPixmapOffset(dst, x1, y1));
+		VDPixmap src2(VDPixmapOffset(src, x2, y2));
+
+		return VDPixmapBltFast(dst2, src2, w, h);
+	}
+
+	return true;
+}
+
+extern bool VDPixmapStretchBltNearest_reference(const VDPixmap& dst, sint32 x1, sint32 y1, sint32 x2, sint32 y2, const VDPixmap& src, sint32 u1, sint32 v1, sint32 u2, sint32 v2);
+extern bool VDPixmapStretchBltBilinear_reference(const VDPixmap& dst, sint32 x1, sint32 y1, sint32 x2, sint32 y2, const VDPixmap& src, sint32 u1, sint32 v1, sint32 u2, sint32 v2);
+
+bool VDPixmapStretchBltNearest(const VDPixmap& dst, const VDPixmap& src) {
+	return VDPixmapStretchBltNearest(dst, 0, 0, dst.w<<16, dst.h<<16, src, 0, 0, src.w<<16, src.h<<16);
+}
+
+bool VDPixmapStretchBltNearest(const VDPixmap& dst, sint32 x1, sint32 y1, sint32 x2, sint32 y2, const VDPixmap& src, sint32 u1, sint32 v1, sint32 u2, sint32 v2) {
+	return VDPixmapStretchBltNearest_reference(dst, x1, y1, x2, y2, src, u1, v1, u2, v2);
+}
+
+bool VDPixmapStretchBltBilinear(const VDPixmap& dst, const VDPixmap& src) {
+	return VDPixmapStretchBltBilinear(dst, 0, 0, dst.w<<16, dst.h<<16, src, 0, 0, src.w<<16, src.h<<16);
+}
+
+bool VDPixmapStretchBltBilinear(const VDPixmap& dst, sint32 x1, sint32 y1, sint32 x2, sint32 y2, const VDPixmap& src, sint32 u1, sint32 v1, sint32 u2, sint32 v2) {
+	return VDPixmapStretchBltBilinear_reference(dst, x1, y1, x2, y2, src, u1, v1, u2, v2);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_reference.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference.cpp
new file mode 100644
index 000000000..c4dccce9f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference.cpp
@@ -0,0 +1,259 @@
+#include <vd2/system/vdtypes.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include "blt_setup.h"
+
+#define DECLARE_PALETTED(x, y) extern void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h, const void *pal0)
+#define DECLARE_RGB(x, y) extern void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h)
+#define DECLARE_YUV(x, y) extern void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h)
+#define DECLARE_YUV_REV(x, y) void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h)
+#define DECLARE_YUV_PLANAR(x, y) extern void VDPixmapBlt_##x##_to_##y##_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h)
+
+DECLARE_RGB(RGB565,	  XRGB1555);
+DECLARE_RGB(RGB888,   XRGB1555);
+DECLARE_RGB(XRGB8888, XRGB1555);
+DECLARE_RGB(XRGB1555, RGB565);
+DECLARE_RGB(RGB888,   RGB565);
+DECLARE_RGB(XRGB8888, RGB565);
+DECLARE_RGB(XRGB1555, RGB888);
+DECLARE_RGB(RGB565,   RGB888);
+DECLARE_RGB(XRGB8888, RGB888);
+DECLARE_RGB(XRGB1555, XRGB8888);
+DECLARE_RGB(RGB565,   XRGB8888);
+DECLARE_RGB(RGB888,   XRGB8888);
+
+DECLARE_PALETTED(Pal1, Any8);
+DECLARE_PALETTED(Pal1, Any16);
+DECLARE_PALETTED(Pal1, Any24);
+DECLARE_PALETTED(Pal1, Any32);
+DECLARE_PALETTED(Pal2, Any8);
+DECLARE_PALETTED(Pal2, Any16);
+DECLARE_PALETTED(Pal2, Any24);
+DECLARE_PALETTED(Pal2, Any32);
+DECLARE_PALETTED(Pal4, Any8);
+DECLARE_PALETTED(Pal4, Any16);
+DECLARE_PALETTED(Pal4, Any24);
+DECLARE_PALETTED(Pal4, Any32);
+DECLARE_PALETTED(Pal8, Any8);
+DECLARE_PALETTED(Pal8, Any16);
+DECLARE_PALETTED(Pal8, Any24);
+DECLARE_PALETTED(Pal8, Any32);
+
+DECLARE_YUV(XVYU, UYVY);
+DECLARE_YUV(XVYU, YUYV);
+DECLARE_YUV(Y8, UYVY);
+DECLARE_YUV(Y8, YUYV);
+DECLARE_YUV(UYVY, Y8);
+DECLARE_YUV(YUYV, Y8);
+DECLARE_YUV(UYVY, YUYV);
+DECLARE_YUV_PLANAR(YUV411, YV12);
+
+DECLARE_YUV(UYVY, XRGB1555);
+DECLARE_YUV(UYVY, RGB565);
+DECLARE_YUV(UYVY, RGB888);
+DECLARE_YUV(UYVY, XRGB8888);
+DECLARE_YUV(YUYV, XRGB1555);
+DECLARE_YUV(YUYV, RGB565);
+DECLARE_YUV(YUYV, RGB888);
+DECLARE_YUV(YUYV, XRGB8888);
+DECLARE_YUV(Y8, XRGB1555);
+DECLARE_YUV(Y8, RGB565);
+DECLARE_YUV(Y8, RGB888);
+DECLARE_YUV(Y8, XRGB8888);
+
+DECLARE_YUV_REV(XRGB1555, Y8);
+DECLARE_YUV_REV(RGB565,   Y8);
+DECLARE_YUV_REV(RGB888,   Y8);
+DECLARE_YUV_REV(XRGB8888, Y8);
+
+DECLARE_YUV_REV(XRGB1555, XVYU);
+DECLARE_YUV_REV(RGB565,   XVYU);
+DECLARE_YUV_REV(RGB888,   XVYU);
+DECLARE_YUV_REV(XRGB8888, XVYU);
+
+DECLARE_YUV_PLANAR(YV12, XRGB1555);
+DECLARE_YUV_PLANAR(YV12, RGB565);
+DECLARE_YUV_PLANAR(YV12, RGB888);
+DECLARE_YUV_PLANAR(YV12, XRGB8888);
+
+DECLARE_YUV_PLANAR(YUV411, XRGB1555);
+DECLARE_YUV_PLANAR(YUV411, RGB565);
+DECLARE_YUV_PLANAR(YUV411, RGB888);
+DECLARE_YUV_PLANAR(YUV411, XRGB8888);
+
+extern void VDPixmapBlt_YUVPlanar_decode_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+extern void VDPixmapBlt_YUVPlanar_encode_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+extern void VDPixmapBlt_YUVPlanar_convert_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+extern void VDPixmapBlt_UberblitAdapter(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+
+using namespace nsVDPixmap;
+
+void VDPixmapInitBlittersReference(VDPixmapBlitterTable& table) {
+	// use uberblit as the baseline
+	VDPixmapFormatSubset uberblitSrcFormats;
+	VDPixmapFormatSubset uberblitDstFormats;
+
+	uberblitSrcFormats =
+		kPixFormat_Pal1,
+		kPixFormat_Pal2,
+		kPixFormat_Pal4,
+		kPixFormat_Pal8,
+		kPixFormat_XRGB1555,
+		kPixFormat_RGB565,
+		kPixFormat_RGB888,
+		kPixFormat_XRGB8888,
+		kPixFormat_Y8,
+		kPixFormat_YUV422_UYVY,
+		kPixFormat_YUV422_YUYV,
+		kPixFormat_YUV444_XVYU,
+		kPixFormat_YUV444_Planar,
+		kPixFormat_YUV422_Planar,
+		kPixFormat_YUV422_Planar_16F,
+		kPixFormat_YUV420_Planar,
+		kPixFormat_YUV411_Planar,
+		kPixFormat_YUV410_Planar,
+		kPixFormat_YUV422_Planar_Centered,
+		kPixFormat_YUV420_Planar_Centered,
+		kPixFormat_YUV422_V210,
+		kPixFormat_YUV422_UYVY_709,
+		kPixFormat_YUV420_NV12;
+
+	uberblitDstFormats =
+		kPixFormat_XRGB1555,
+		kPixFormat_RGB565,
+		kPixFormat_RGB888,
+		kPixFormat_XRGB8888,
+		kPixFormat_Y8,
+		kPixFormat_YUV422_UYVY,
+		kPixFormat_YUV422_YUYV,
+		kPixFormat_YUV444_XVYU,
+		kPixFormat_YUV444_Planar,
+		kPixFormat_YUV422_Planar,
+		kPixFormat_YUV422_Planar_16F,
+		kPixFormat_YUV420_Planar,
+		kPixFormat_YUV411_Planar,
+		kPixFormat_YUV410_Planar,
+		kPixFormat_YUV422_Planar_Centered,
+		kPixFormat_YUV420_Planar_Centered,
+		kPixFormat_YUV422_V210,
+		kPixFormat_YUV422_UYVY_709,
+		kPixFormat_YUV420_NV12;
+
+	table.AddBlitter(uberblitSrcFormats, uberblitDstFormats, VDPixmapBlt_UberblitAdapter);
+
+	// standard formats
+
+	table.AddBlitter(kPixFormat_Pal1,	kPixFormat_Y8,			VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal1_to_Any8_reference>);
+	table.AddBlitter(kPixFormat_Pal1,	kPixFormat_XRGB1555,	VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal1_to_Any16_reference>);
+	table.AddBlitter(kPixFormat_Pal1,	kPixFormat_RGB565,		VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal1_to_Any16_reference>);
+	table.AddBlitter(kPixFormat_Pal1,	kPixFormat_RGB888,		VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal1_to_Any24_reference>);
+	table.AddBlitter(kPixFormat_Pal1,	kPixFormat_XRGB8888,	VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal1_to_Any32_reference>);
+	table.AddBlitter(kPixFormat_Pal2,	kPixFormat_Y8,			VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal2_to_Any8_reference>);
+	table.AddBlitter(kPixFormat_Pal2,	kPixFormat_XRGB1555,	VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal2_to_Any16_reference>);
+	table.AddBlitter(kPixFormat_Pal2,	kPixFormat_RGB565,		VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal2_to_Any16_reference>);
+	table.AddBlitter(kPixFormat_Pal2,	kPixFormat_RGB888,		VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal2_to_Any24_reference>);
+	table.AddBlitter(kPixFormat_Pal2,	kPixFormat_XRGB8888,	VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal2_to_Any32_reference>);
+	table.AddBlitter(kPixFormat_Pal4,	kPixFormat_Y8,			VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal4_to_Any8_reference>);
+	table.AddBlitter(kPixFormat_Pal4,	kPixFormat_XRGB1555,	VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal4_to_Any16_reference>);
+	table.AddBlitter(kPixFormat_Pal4,	kPixFormat_RGB565,		VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal4_to_Any16_reference>);
+	table.AddBlitter(kPixFormat_Pal4,	kPixFormat_RGB888,		VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal4_to_Any24_reference>);
+	table.AddBlitter(kPixFormat_Pal4,	kPixFormat_XRGB8888,	VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal4_to_Any32_reference>);
+	table.AddBlitter(kPixFormat_Pal8,	kPixFormat_Y8,			VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal8_to_Any8_reference>);
+	table.AddBlitter(kPixFormat_Pal8,	kPixFormat_XRGB1555,	VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal8_to_Any16_reference>);
+	table.AddBlitter(kPixFormat_Pal8,	kPixFormat_RGB565,		VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal8_to_Any16_reference>);
+	table.AddBlitter(kPixFormat_Pal8,	kPixFormat_RGB888,		VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal8_to_Any24_reference>);
+	table.AddBlitter(kPixFormat_Pal8,	kPixFormat_XRGB8888,	VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal8_to_Any32_reference>);
+
+	table.AddBlitter(kPixFormat_XRGB1555,	kPixFormat_RGB565,		VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB1555_to_RGB565_reference>);
+	table.AddBlitter(kPixFormat_XRGB1555,	kPixFormat_RGB888,		VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB1555_to_RGB888_reference>);
+	table.AddBlitter(kPixFormat_XRGB1555,	kPixFormat_XRGB8888,	VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB1555_to_XRGB8888_reference>);
+	table.AddBlitter(kPixFormat_RGB565,		kPixFormat_XRGB1555,	VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB565_to_XRGB1555_reference>);
+	table.AddBlitter(kPixFormat_RGB565,		kPixFormat_RGB888,		VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB565_to_RGB888_reference>);
+	table.AddBlitter(kPixFormat_RGB565,		kPixFormat_XRGB8888,	VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB565_to_XRGB8888_reference>);
+	table.AddBlitter(kPixFormat_RGB888,		kPixFormat_XRGB1555,	VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB888_to_XRGB1555_reference>);
+	table.AddBlitter(kPixFormat_RGB888,		kPixFormat_RGB565,		VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB888_to_RGB565_reference>);
+	table.AddBlitter(kPixFormat_RGB888,		kPixFormat_XRGB8888,	VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB888_to_XRGB8888_reference>);
+	table.AddBlitter(kPixFormat_XRGB8888,	kPixFormat_XRGB1555,	VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB8888_to_XRGB1555_reference>);
+	table.AddBlitter(kPixFormat_XRGB8888,	kPixFormat_RGB565,		VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB8888_to_RGB565_reference>);
+	table.AddBlitter(kPixFormat_XRGB8888,	kPixFormat_RGB888,		VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB8888_to_RGB888_reference>);
+
+	table.AddBlitter(kPixFormat_YUV444_XVYU,	kPixFormat_YUV422_UYVY,	VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XVYU_to_UYVY_reference>);
+	table.AddBlitter(kPixFormat_YUV444_XVYU,	kPixFormat_YUV422_YUYV,	VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XVYU_to_YUYV_reference>);
+	table.AddBlitter(kPixFormat_Y8,				kPixFormat_YUV422_UYVY,	VDPixmapBlitterChunkyAdapter<VDPixmapBlt_Y8_to_UYVY_reference>);
+	table.AddBlitter(kPixFormat_Y8,				kPixFormat_YUV422_YUYV,	VDPixmapBlitterChunkyAdapter<VDPixmapBlt_Y8_to_YUYV_reference>);
+	table.AddBlitter(kPixFormat_YUV422_UYVY,	kPixFormat_Y8,			VDPixmapBlitterChunkyAdapter<VDPixmapBlt_UYVY_to_Y8_reference>);
+	table.AddBlitter(kPixFormat_YUV422_YUYV,	kPixFormat_Y8,			VDPixmapBlitterChunkyAdapter<VDPixmapBlt_YUYV_to_Y8_reference>);
+
+	table.AddBlitter(kPixFormat_YUV422_UYVY,	kPixFormat_XRGB1555,	VDPixmapBlitterChunkyAdapter<VDPixmapBlt_UYVY_to_XRGB1555_reference>);
+	table.AddBlitter(kPixFormat_YUV422_UYVY,	kPixFormat_RGB565,		VDPixmapBlitterChunkyAdapter<VDPixmapBlt_UYVY_to_RGB565_reference>);
+	table.AddBlitter(kPixFormat_YUV422_UYVY,	kPixFormat_RGB888,		VDPixmapBlitterChunkyAdapter<VDPixmapBlt_UYVY_to_RGB888_reference>);
+	table.AddBlitter(kPixFormat_YUV422_UYVY,	kPixFormat_XRGB8888,	VDPixmapBlitterChunkyAdapter<VDPixmapBlt_UYVY_to_XRGB8888_reference>);
+	table.AddBlitter(kPixFormat_YUV422_YUYV,	kPixFormat_XRGB1555,	VDPixmapBlitterChunkyAdapter<VDPixmapBlt_YUYV_to_XRGB1555_reference>);
+	table.AddBlitter(kPixFormat_YUV422_YUYV,	kPixFormat_RGB565,		VDPixmapBlitterChunkyAdapter<VDPixmapBlt_YUYV_to_RGB565_reference>);
+	table.AddBlitter(kPixFormat_YUV422_YUYV,	kPixFormat_RGB888,		VDPixmapBlitterChunkyAdapter<VDPixmapBlt_YUYV_to_RGB888_reference>);
+	table.AddBlitter(kPixFormat_YUV422_YUYV,	kPixFormat_XRGB8888,	VDPixmapBlitterChunkyAdapter<VDPixmapBlt_YUYV_to_XRGB8888_reference>);
+	table.AddBlitter(kPixFormat_Y8,				kPixFormat_XRGB1555,	VDPixmapBlitterChunkyAdapter<VDPixmapBlt_Y8_to_XRGB1555_reference>);
+	table.AddBlitter(kPixFormat_Y8,				kPixFormat_RGB565,		VDPixmapBlitterChunkyAdapter<VDPixmapBlt_Y8_to_RGB565_reference>);
+	table.AddBlitter(kPixFormat_Y8,				kPixFormat_RGB888,		VDPixmapBlitterChunkyAdapter<VDPixmapBlt_Y8_to_RGB888_reference>);
+	table.AddBlitter(kPixFormat_Y8,				kPixFormat_XRGB8888,	VDPixmapBlitterChunkyAdapter<VDPixmapBlt_Y8_to_XRGB8888_reference>);
+
+	table.AddBlitter(kPixFormat_XRGB1555,	kPixFormat_YUV444_XVYU, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB1555_to_XVYU_reference>);
+	table.AddBlitter(kPixFormat_RGB565,		kPixFormat_YUV444_XVYU, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB565_to_XVYU_reference>);
+	table.AddBlitter(kPixFormat_RGB888,		kPixFormat_YUV444_XVYU, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB888_to_XVYU_reference>);
+	table.AddBlitter(kPixFormat_XRGB8888,	kPixFormat_YUV444_XVYU, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB8888_to_XVYU_reference>);
+
+	table.AddBlitter(kPixFormat_XRGB1555,	kPixFormat_Y8,			VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB1555_to_Y8_reference>);
+	table.AddBlitter(kPixFormat_RGB565,		kPixFormat_Y8,			VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB565_to_Y8_reference>);
+	table.AddBlitter(kPixFormat_RGB888,		kPixFormat_Y8,			VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB888_to_Y8_reference>);
+	table.AddBlitter(kPixFormat_XRGB8888,	kPixFormat_Y8,			VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB8888_to_Y8_reference>);
+
+	table.AddBlitter(kPixFormat_YUV411_Planar, kPixFormat_YUV420_Planar, VDPixmapBlt_YUV411_to_YV12_reference);
+
+	table.AddBlitter(kPixFormat_YUV422_UYVY, kPixFormat_YUV422_YUYV, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_UYVY_to_YUYV_reference>);
+	table.AddBlitter(kPixFormat_YUV422_YUYV, kPixFormat_YUV422_UYVY, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_UYVY_to_YUYV_reference>);		// not an error -- same routine
+
+	//////////////////////////////////////////////////////////
+
+	VDPixmapFormatSubset srcFormats;
+	VDPixmapFormatSubset dstFormats;
+
+	srcFormats = kPixFormat_YUV444_Planar,
+				kPixFormat_YUV422_Planar,
+				kPixFormat_YUV420_Planar,
+				kPixFormat_YUV411_Planar,
+				kPixFormat_YUV410_Planar,
+				kPixFormat_YUV422_Planar_Centered,
+				kPixFormat_YUV420_Planar_Centered;
+
+	dstFormats = kPixFormat_XRGB1555, kPixFormat_RGB565, kPixFormat_RGB888, kPixFormat_XRGB8888, kPixFormat_YUV422_UYVY, kPixFormat_YUV422_YUYV;
+
+	table.AddBlitter(srcFormats, dstFormats, VDPixmapBlt_YUVPlanar_decode_reference);
+
+	//////////////////////////////////////////////////////////
+
+	dstFormats = kPixFormat_YUV444_Planar, kPixFormat_YUV422_Planar, kPixFormat_YUV420_Planar, kPixFormat_YUV411_Planar, kPixFormat_YUV410_Planar, kPixFormat_YUV422_Planar_Centered, kPixFormat_YUV420_Planar_Centered;
+	srcFormats = kPixFormat_XRGB1555, kPixFormat_RGB565, kPixFormat_RGB888, kPixFormat_XRGB8888, kPixFormat_YUV422_UYVY, kPixFormat_YUV422_YUYV;
+
+	table.AddBlitter(srcFormats, dstFormats, VDPixmapBlt_YUVPlanar_encode_reference);
+
+	//////////////////////////////////////////////////////////
+
+	srcFormats = kPixFormat_YUV444_Planar, kPixFormat_YUV422_Planar, kPixFormat_YUV420_Planar, kPixFormat_YUV411_Planar, kPixFormat_YUV410_Planar, kPixFormat_Y8, kPixFormat_YUV422_Planar_Centered, kPixFormat_YUV420_Planar_Centered;
+	dstFormats = kPixFormat_YUV444_Planar, kPixFormat_YUV422_Planar, kPixFormat_YUV420_Planar, kPixFormat_YUV411_Planar, kPixFormat_YUV410_Planar, kPixFormat_Y8, kPixFormat_YUV422_Planar_Centered, kPixFormat_YUV420_Planar_Centered;
+
+	table.AddBlitter(srcFormats, dstFormats, VDPixmapBlt_YUVPlanar_convert_reference);
+}
+
+tpVDPixBltTable VDGetPixBltTableReferenceInternal() {
+	static VDPixmapBlitterTable sReferenceTable;
+
+	VDPixmapInitBlittersReference(sReferenceTable);
+
+	return sReferenceTable.mTable;
+}
+
+tpVDPixBltTable VDGetPixBltTableReference() {
+	static tpVDPixBltTable spTable = VDGetPixBltTableReferenceInternal();
+
+	return spTable;
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_pal.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_pal.cpp
new file mode 100644
index 000000000..4a103de3b
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_pal.cpp
@@ -0,0 +1,545 @@
+#include <vd2/system/vdtypes.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+
+#define DECLARE_PALETTED(x, y) void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h, const void *pal0)
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	RGB blitters: Pal1 ->
+//
+///////////////////////////////////////////////////////////////////////////
+
+DECLARE_PALETTED(Pal1, Any8) {
+	const uint8 *src = (const uint8 *)src0;
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *pal = (const uint8 *)pal0;
+
+	src += (w-1) >> 3;
+	dst += (w-1) & ~7;
+
+	srcpitch += (w+7) >> 3;
+	dstpitch += (w+7) & ~7;
+
+	do {
+		int wt = w;
+
+		uint8 v = src[0] >> ((-wt) & 7);
+		
+		switch(wt & 7) {
+			do {
+				v = src[0];
+
+		case 0:	dst[7] = pal[v&1];	v >>= 1;
+		case 7:	dst[6] = pal[v&1];	v >>= 1;
+		case 6:	dst[5] = pal[v&1];	v >>= 1;
+		case 5:	dst[4] = pal[v&1];	v >>= 1;
+		case 4:	dst[3] = pal[v&1];	v >>= 1;
+		case 3:	dst[2] = pal[v&1];	v >>= 1;
+		case 2:	dst[1] = pal[v&1];	v >>= 1;
+		case 1:	dst[0] = pal[v&1];	v >>= 1;
+
+				dst -= 8;
+				--src;
+			} while((wt -= 8) > 0);
+		}
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_PALETTED(Pal1, Any16) {
+	const uint8 *src = (const uint8 *)src0;
+	uint16 *dst = (uint16 *)dst0;
+	const uint16 *pal = (const uint16 *)pal0;
+
+	src += (w-1) >> 3;
+	dst += (w-1) & ~7;
+
+	srcpitch += (w+7) >> 3;
+	dstpitch += ((w+7) & ~7) * 2;
+
+	do {
+		int wt = w;
+
+		uint8 v = src[0] >> ((-wt) & 7);
+		
+		switch(wt & 7) {
+			do {
+				v = src[0];
+
+		case 0:	dst[7] = pal[v&1];	v >>= 1;
+		case 7:	dst[6] = pal[v&1];	v >>= 1;
+		case 6:	dst[5] = pal[v&1];	v >>= 1;
+		case 5:	dst[4] = pal[v&1];	v >>= 1;
+		case 4:	dst[3] = pal[v&1];	v >>= 1;
+		case 3:	dst[2] = pal[v&1];	v >>= 1;
+		case 2:	dst[1] = pal[v&1];	v >>= 1;
+		case 1:	dst[0] = pal[v&1];	v >>= 1;
+
+				dst -= 8;
+				--src;
+			} while((wt -= 8) > 0);
+		}
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_PALETTED(Pal1, Any24) {
+	const uint8 *src = (const uint8 *)src0;
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *pal = (const uint8 *)pal0;
+
+	src += (w-1) >> 3;
+	dst += ((w-1) & ~7) * 3;
+
+	srcpitch += (w+7) >> 3;
+	dstpitch += ((w+7) & ~7) * 3;
+
+	do {
+		int wt = w;
+
+		uint8 v = src[0] >> ((-wt) & 7);
+		const uint8 *pe;
+		
+		switch(wt & 7) {
+			do {
+				v = src[0];
+
+		case 0:	pe = &pal[3*(v&1)]; dst[7*3+0] = pe[0]; dst[7*3+1] = pe[1]; dst[7*3+2] = pe[2]; v >>= 1;
+		case 7:	pe = &pal[3*(v&1)]; dst[6*3+0] = pe[0]; dst[6*3+1] = pe[1]; dst[6*3+2] = pe[2]; v >>= 1;
+		case 6:	pe = &pal[3*(v&1)]; dst[5*3+0] = pe[0]; dst[5*3+1] = pe[1]; dst[5*3+2] = pe[2]; v >>= 1;
+		case 5:	pe = &pal[3*(v&1)]; dst[4*3+0] = pe[0]; dst[4*3+1] = pe[1]; dst[4*3+2] = pe[2]; v >>= 1;
+		case 4:	pe = &pal[3*(v&1)]; dst[3*3+0] = pe[0]; dst[3*3+1] = pe[1]; dst[3*3+2] = pe[2]; v >>= 1;
+		case 3:	pe = &pal[3*(v&1)]; dst[2*3+0] = pe[0]; dst[2*3+1] = pe[1]; dst[2*3+2] = pe[2]; v >>= 1;
+		case 2:	pe = &pal[3*(v&1)]; dst[1*3+0] = pe[0]; dst[1*3+1] = pe[1]; dst[1*3+2] = pe[2]; v >>= 1;
+		case 1:	pe = &pal[3*(v&1)]; dst[0*3+0] = pe[0]; dst[0*3+1] = pe[1]; dst[0*3+2] = pe[2]; v >>= 1;
+
+				dst -= 24;
+				--src;
+			} while((wt -= 8) > 0);
+		}
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_PALETTED(Pal1, Any32) {
+	const uint8 *src = (const uint8 *)src0;
+	uint32 *dst = (uint32 *)dst0;
+	const uint32 *pal = (const uint32 *)pal0;
+
+	src += (w-1) >> 3;
+	dst += (w-1) & ~7;
+
+	srcpitch += (w+7) >> 3;
+	dstpitch += ((w+7) & ~7) * 4;
+
+	do {
+		int wt = w;
+
+		uint8 v = src[0] >> ((-wt) & 7);
+		
+		switch(wt & 7) {
+			do {
+				v = src[0];
+
+		case 0:	dst[7] = pal[v&1];	v >>= 1;
+		case 7:	dst[6] = pal[v&1];	v >>= 1;
+		case 6:	dst[5] = pal[v&1];	v >>= 1;
+		case 5:	dst[4] = pal[v&1];	v >>= 1;
+		case 4:	dst[3] = pal[v&1];	v >>= 1;
+		case 3:	dst[2] = pal[v&1];	v >>= 1;
+		case 2:	dst[1] = pal[v&1];	v >>= 1;
+		case 1:	dst[0] = pal[v&1];	v >>= 1;
+
+				dst -= 8;
+				--src;
+			} while((wt -= 8) > 0);
+		}
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	RGB blitters: Pal2 ->
+//
+///////////////////////////////////////////////////////////////////////////
+
+DECLARE_PALETTED(Pal2, Any8) {
+	const uint8 *src = (const uint8 *)src0;
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *pal = (const uint8 *)pal0;
+
+	src += (w-1) >> 2;
+	dst += (w-1) & ~3;
+
+	srcpitch += (w+3) >> 2;
+	dstpitch += (w+3) & ~3;
+
+	do {
+		int wt = w;
+
+		uint8 v = src[0] >> (((-wt) & 3)*2);
+		
+		switch(wt & 3) {
+			do {
+				v = src[0];
+
+		case 0:	dst[3] = pal[v&3];	v >>= 2;
+		case 3:	dst[2] = pal[v&3];	v >>= 2;
+		case 2:	dst[1] = pal[v&3];	v >>= 2;
+		case 1:	dst[0] = pal[v&3];	v >>= 2;
+
+				dst -= 4;
+				--src;
+			} while((wt -= 4) > 0);
+		}
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_PALETTED(Pal2, Any16) {
+	const uint8 *src = (const uint8 *)src0;
+	uint16 *dst = (uint16 *)dst0;
+	const uint16 *pal = (const uint16 *)pal0;
+
+	src += (w-1) >> 2;
+	dst += (w-1) & ~3;
+
+	srcpitch += (w+3) >> 2;
+	dstpitch += ((w+3) & ~3) * 2;
+
+	do {
+		int wt = w;
+
+		uint8 v = src[0] >> (((-wt) & 3)*2);
+		
+		switch(wt & 3) {
+			do {
+				v = src[0];
+
+		case 0:	dst[3] = pal[v&3];	v >>= 2;
+		case 3:	dst[2] = pal[v&3];	v >>= 2;
+		case 2:	dst[1] = pal[v&3];	v >>= 2;
+		case 1:	dst[0] = pal[v&3];	v >>= 2;
+
+				dst -= 4;
+				--src;
+			} while((wt -= 4) > 0);
+		}
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_PALETTED(Pal2, Any24) {
+	const uint8 *src = (const uint8 *)src0;
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *pal = (const uint8 *)pal0;
+
+	src += (w-1) >> 2;
+	dst += ((w-1) & ~3) * 3;
+
+	srcpitch += (w+3) >> 2;
+	dstpitch += ((w+3) & ~3) * 3;
+
+	do {
+		int wt = w;
+
+		uint8 v = src[0] >> (((-wt) & 3)*2);
+		const uint8 *pe;
+		
+		switch(wt & 3) {
+			do {
+				v = src[0];
+
+		case 0:	pe = &pal[3*(v&3)]; dst[3*3+0] = pe[0]; dst[3*3+1] = pe[1]; dst[3*3+2] = pe[2]; v >>= 2;
+		case 3:	pe = &pal[3*(v&3)]; dst[2*3+0] = pe[0]; dst[2*3+1] = pe[1]; dst[2*3+2] = pe[2]; v >>= 2;
+		case 2:	pe = &pal[3*(v&3)]; dst[1*3+0] = pe[0]; dst[1*3+1] = pe[1]; dst[1*3+2] = pe[2]; v >>= 2;
+		case 1:	pe = &pal[3*(v&3)]; dst[0*3+0] = pe[0]; dst[0*3+1] = pe[1]; dst[0*3+2] = pe[2]; v >>= 2;
+
+				dst -= 12;
+				--src;
+			} while((wt -= 4) > 0);
+		}
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_PALETTED(Pal2, Any32) {
+	const uint8 *src = (const uint8 *)src0;
+	uint32 *dst = (uint32 *)dst0;
+	const uint32 *pal = (const uint32 *)pal0;
+
+	src += (w-1) >> 2;
+	dst += (w-1) & ~3;
+
+	srcpitch += (w+3) >> 2;
+	dstpitch += ((w+3) & ~3) * 4;
+
+	do {
+		int wt = w;
+
+		uint8 v = src[0] >> (((-wt) & 3)*2);
+		
+		switch(wt & 3) {
+			do {
+				v = src[0];
+
+		case 0:	dst[3] = pal[v&3];	v >>= 2;
+		case 3:	dst[2] = pal[v&3];	v >>= 2;
+		case 2:	dst[1] = pal[v&3];	v >>= 2;
+		case 1:	dst[0] = pal[v&3];	v >>= 2;
+
+				dst -= 4;
+				--src;
+			} while((wt -= 4) > 0);
+		}
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	RGB blitters: Pal4 ->
+//
+///////////////////////////////////////////////////////////////////////////
+
+DECLARE_PALETTED(Pal4, Any8) {
+	const uint8 *src = (const uint8 *)src0;
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *pal = (const uint8 *)pal0;
+
+	src += (w-1) >> 1;
+	dst += ((w-1) & ~1);
+
+	srcpitch += (w+1) >> 1;
+	dstpitch += (w+1) & ~1;
+
+	do {
+		int wt = w;
+
+		uint8 v = src[0] >> (((-wt) & 1)*4);
+		
+		switch(wt & 1) {
+			do {
+				v = src[0];
+
+		case 0:	dst[1] = pal[v&15];	v >>= 4;
+		case 1:	dst[0] = pal[v&15];	v >>= 4;
+
+				dst -= 2;
+				--src;
+			} while((wt -= 2) > 0);
+		}
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_PALETTED(Pal4, Any16) {
+	const uint8 *src = (const uint8 *)src0;
+	uint16 *dst = (uint16 *)dst0;
+	const uint16 *pal = (const uint16 *)pal0;
+
+	src += (w-1) >> 1;
+	dst += ((w-1) & ~1);
+
+	srcpitch += (w+1) >> 1;
+	dstpitch += ((w+1) & ~1) * 2;
+
+	do {
+		int wt = w;
+
+		uint8 v = src[0] >> (((-wt) & 1)*4);
+		
+		switch(wt & 1) {
+			do {
+				v = src[0];
+
+		case 0:	dst[1] = pal[v&15];	v >>= 4;
+		case 1:	dst[0] = pal[v&15];	v >>= 4;
+
+				dst -= 2;
+				--src;
+			} while((wt -= 2) > 0);
+		}
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_PALETTED(Pal4, Any24) {
+	const uint8 *src = (const uint8 *)src0;
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *pal = (const uint8 *)pal0;
+
+	src += (w-1) >> 1;
+	dst += ((w-1) & ~1) * 3;
+
+	srcpitch += (w+1) >> 1;
+	dstpitch += ((w+1) & ~1) * 3;
+
+	do {
+		int wt = w;
+
+		uint8 v = src[0] >> (((-wt) & 1)*4);
+		const uint8 *pe;
+		
+		switch(wt & 1) {
+			do {
+				v = src[0];
+
+		case 0:	pe = &pal[3*(v&15)]; dst[1*3+0] = pe[0]; dst[1*3+1] = pe[1]; dst[1*3+2] = pe[2]; v >>= 4;
+		case 1:	pe = &pal[3*(v&15)]; dst[0*3+0] = pe[0]; dst[0*3+1] = pe[1]; dst[0*3+2] = pe[2]; v >>= 4;
+
+				dst -= 6;
+				--src;
+			} while((wt -= 2) > 0);
+		}
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_PALETTED(Pal4, Any32) {
+	const uint8 *src = (const uint8 *)src0;
+	uint32 *dst = (uint32 *)dst0;
+	const uint32 *pal = (const uint32 *)pal0;
+
+	src += (w-1) >> 1;
+	dst += ((w-1) & ~1);
+
+	srcpitch += (w+1) >> 1;
+	dstpitch += ((w+1) & ~1) * 4;
+
+	do {
+		int wt = w;
+
+		uint8 v = src[0] >> (((-wt) & 1)*4);
+		
+		switch(wt & 1) {
+			do {
+				v = src[0];
+
+		case 0:	dst[1] = pal[v&15];	v >>= 4;
+		case 1:	dst[0] = pal[v&15];	v >>= 4;
+
+				dst -= 2;
+				--src;
+			} while((wt -= 2) > 0);
+		}
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	RGB blitters: Pal8 ->
+//
+///////////////////////////////////////////////////////////////////////////
+
+DECLARE_PALETTED(Pal8, Any8) {
+	const uint8 *src = (const uint8 *)src0;
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *pal = (const uint8 *)pal0;
+
+	srcpitch -= w;
+	dstpitch -= w;
+
+	do {
+		int wt = w;
+
+		do {
+			*dst++ = pal[*src++];
+		} while(--wt);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_PALETTED(Pal8, Any16) {
+	const uint8 *src = (const uint8 *)src0;
+	uint16 *dst = (uint16 *)dst0;
+	const uint16 *pal = (const uint16 *)pal0;
+
+	srcpitch -= w;
+	dstpitch -= w*2;
+
+	do {
+		int wt = w;
+
+		do {
+			*dst++ = pal[*src++];
+		} while(--wt);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_PALETTED(Pal8, Any24) {
+	const uint8 *src = (const uint8 *)src0;
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *pal = (const uint8 *)pal0;
+
+	srcpitch -= w;
+	dstpitch -= w*3;
+
+	do {
+		int wt = w;
+		do {
+			const uint8 *pe = &pal[3**src++];
+
+			dst[0] = pe[0];
+			dst[1] = pe[1];
+			dst[2] = pe[2];
+			dst += 3;
+		} while(--wt);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_PALETTED(Pal8, Any32) {
+	const uint8 *src = (const uint8 *)src0;
+	uint32 *dst = (uint32 *)dst0;
+	const uint32 *pal = (const uint32 *)pal0;
+
+	srcpitch -= w;
+	dstpitch -= w*4;
+
+	do {
+		int wt = w;
+
+		do {
+			*dst++ = pal[*src++];
+		} while(--wt);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_rgb.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_rgb.cpp
new file mode 100644
index 000000000..ea49f260d
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_rgb.cpp
@@ -0,0 +1,310 @@
+#include <vd2/system/vdtypes.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+
+#define DECLARE_RGB(x, y) void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h)
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	RGB blitters: -> XRGB1555
+//
+///////////////////////////////////////////////////////////////////////////
+
+DECLARE_RGB(RGB565, XRGB1555) {
+	const uint16 *src = (const uint16 *)src0;
+	uint16 *dst = (uint16 *)dst0;
+
+	srcpitch -= 2*w;
+	dstpitch -= 2*w;
+
+	do {
+		int wt = w;
+
+		do {
+			const uint32 px = *src++;
+			*dst++ = (px&0x001f) + ((px&0xffc0)>>1);
+		} while(--wt);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_RGB(RGB888, XRGB1555) {
+	const uint8 *src = (const uint8 *)src0;
+	uint16 *dst = (uint16 *)dst0;
+
+	srcpitch -= 3*w;
+	dstpitch -= 2*w;
+
+	do {
+		int wt = w;
+
+		do {
+			const uint32 r = ((uint32)src[2] & 0xf8) << 7;
+			const uint32 g = ((uint32)src[1] & 0xf8) << 2;
+			const uint32 b = (uint32)src[0] >> 3;
+			src += 3;
+
+			*dst++ = (uint16)(r + g + b);
+		} while(--wt);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_RGB(XRGB8888, XRGB1555) {
+	const uint8 *src = (const uint8 *)src0;
+	uint16 *dst = (uint16 *)dst0;
+
+	srcpitch -= 4*w;
+	dstpitch -= 2*w;
+
+	do {
+		int wt = w;
+
+		do {
+			const uint32 r = ((uint32)src[2] & 0xf8) << 7;
+			const uint32 g = ((uint32)src[1] & 0xf8) << 2;
+			const uint32 b = (uint32)src[0] >> 3;
+			src += 4;
+
+			*dst++ = (uint16)(r + g + b);
+		} while(--wt);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	RGB blitters: -> RGB565
+//
+///////////////////////////////////////////////////////////////////////////
+
+DECLARE_RGB(XRGB1555, RGB565) {
+	const uint16 *src = (const uint16 *)src0;
+	uint16 *dst = (uint16 *)dst0;
+
+	srcpitch -= 2*w;
+	dstpitch -= 2*w;
+
+	do {
+		int wt = w;
+
+		do {
+			const uint32 px = *src++;
+			*dst++ = (uint16)(px + (px&0xffe0) + ((px&0x0200)>>4));
+		} while(--wt);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_RGB(RGB888, RGB565) {
+	const uint8 *src = (const uint8 *)src0;
+	uint16 *dst = (uint16 *)dst0;
+
+	srcpitch -= 3*w;
+	dstpitch -= 2*w;
+
+	do {
+		int wt = w;
+
+		do {
+			const uint32 r = ((uint32)src[2] & 0xf8) << 8;
+			const uint32 g = ((uint32)src[1] & 0xfc) << 3;
+			const uint32 b = (uint32)src[0] >> 3;
+			src += 3;
+
+			*dst++ = (uint16)(r + g + b);
+		} while(--wt);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_RGB(XRGB8888, RGB565) {
+	const uint8 *src = (const uint8 *)src0;
+	uint16 *dst = (uint16 *)dst0;
+
+	srcpitch -= 4*w;
+	dstpitch -= 2*w;
+
+	do {
+		int wt = w;
+
+		do {
+			const uint32 r = ((uint32)src[2] & 0xf8) << 8;
+			const uint32 g = ((uint32)src[1] & 0xfc) << 3;
+			const uint32 b = (uint32)src[0] >> 3;
+			src += 4;
+
+			*dst++ = (uint16)(r + g + b);
+		} while(--wt);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	RGB blitters: -> RGB888
+//
+///////////////////////////////////////////////////////////////////////////
+
+DECLARE_RGB(XRGB1555, RGB888) {
+	const uint16 *src = (const uint16 *)src0;
+	uint8 *dst = (uint8 *)dst0;
+
+	srcpitch -= 2*w;
+	dstpitch -= 3*w;
+
+	do {
+		int wt = w;
+
+		do {
+			const uint32 px = *src++;
+			uint32 rb = px & 0x7c1f;
+			uint32 g = px & 0x03e0;
+
+			rb += rb<<5;
+			g += g<<5;
+
+			dst[0] = (uint8)(rb>>2);
+			dst[1] = (uint8)(g>>7);
+			dst[2] = (uint8)(rb>>12);
+			dst += 3;
+		} while(--wt);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_RGB(RGB565, RGB888) {
+	const uint16 *src = (const uint16 *)src0;
+	uint8 *dst = (uint8 *)dst0;
+
+	srcpitch -= 2*w;
+	dstpitch -= 3*w;
+
+	do {
+		int wt = w;
+
+		do {
+			const uint32 px = *src++;
+			uint32 rb = px & 0xf81f;
+			uint32 g = px & 0x07e0;
+
+			rb += rb<<5;
+			g += g<<6;
+
+			dst[0] = (uint8)(rb>>2);
+			dst[1] = (uint8)(g>>9);
+			dst[2] = (uint8)(rb>>13);
+			dst += 3;
+		} while(--wt);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_RGB(XRGB8888, RGB888) {
+	const uint8 *src = (const uint8 *)src0;
+	uint8 *dst = (uint8 *)dst0;
+
+	srcpitch -= 4*w;
+	dstpitch -= 3*w;
+
+	do {
+		int wt = w;
+
+		do {
+			dst[0] = src[0];
+			dst[1] = src[1];
+			dst[2] = src[2];
+			dst += 3;
+			src += 4;
+		} while(--wt);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	RGB blitters: -> XRGB8888
+//
+///////////////////////////////////////////////////////////////////////////
+
+DECLARE_RGB(XRGB1555, XRGB8888) {
+	const uint16 *src = (const uint16 *)src0;
+	uint32 *dst = (uint32 *)dst0;
+
+	srcpitch -= 2*w;
+	dstpitch -= 4*w;
+
+	do {
+		int wt = w;
+
+		do {
+			const uint32 px = *src++;
+			const uint32 rgb = ((px & 0x7c00) << 9) + ((px & 0x03e0) << 6) + ((px & 0x001f) << 3);
+
+			*dst++ = rgb + ((rgb & 0xe0e0e0)>>5);
+		} while(--wt);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_RGB(RGB565, XRGB8888) {
+	const uint16 *src = (const uint16 *)src0;
+	uint32 *dst = (uint32 *)dst0;
+
+	srcpitch -= 2*w;
+	dstpitch -= 4*w;
+
+	do {
+		int wt = w;
+
+		do {
+			const uint32 px = *src++;
+			const uint32 rb = ((px & 0xf800) << 8) + ((px & 0x001f) << 3);
+			const uint32 g = ((px & 0x07e0) << 5) + (px & 0x0300);
+
+			*dst++ = rb + ((rb & 0xe000e0)>>5) + g;
+		} while(--wt);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_RGB(RGB888, XRGB8888) {
+	const uint8 *src = (const uint8 *)src0;
+	uint32 *dst = (uint32 *)dst0;
+
+	srcpitch -= 3*w;
+	dstpitch -= 4*w;
+
+	do {
+		int wt = w;
+
+		do {
+			*dst++ = (uint32)src[0] + ((uint32)src[1]<<8) + ((uint32)src[2]<<16);
+			src += 3;
+		} while(--wt);
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuv.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuv.cpp
new file mode 100644
index 000000000..6f40eeaa0
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuv.cpp
@@ -0,0 +1,1590 @@
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/system/memory.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+
+#include "blt_spanutils.h"
+
+#ifdef _M_IX86
+	#include "blt_spanutils_x86.h"
+#endif
+
+using namespace nsVDPixmapSpanUtils;
+
+namespace {
+	struct YCbCrToRGB {
+		sint16 y_tab[256];
+		sint16 r_cr_tab[256];
+		sint16 b_cb_tab[256];
+		sint16 g_cr_tab[256];
+		sint16 g_cb_tab[256];
+		uint8 cliptab[277+256+279];
+		uint16 cliptab15[277+256+279];
+		uint16 cliptab16[277+256+279];
+
+		YCbCrToRGB() {
+			int i;
+
+			memset(cliptab, 0, 277);
+			memset(cliptab+277+256, 255, 279);
+
+			memset(cliptab15, 0, sizeof cliptab15[0] * 277);
+			memset(cliptab16, 0, sizeof cliptab16[0] * 277);
+			memset(cliptab15+277+256, 0xff, sizeof cliptab15[0] * 279);
+			memset(cliptab16+277+256, 0xff, sizeof cliptab16[0] * 279);
+
+			for(i=0; i<256; ++i) {
+				y_tab[i] = (sint16)(((i-16) * 76309 + 32768) >> 16);
+				r_cr_tab[i] = (sint16)(((i-128) * 104597 + 32768) >> 16);
+				b_cb_tab[i] = (sint16)(((i-128) * 132201 + 32768) >> 16);
+				g_cr_tab[i] = (sint16)(((i-128) * -53279 + 32768) >> 16);
+				g_cb_tab[i] = (sint16)(((i-128) * -25674 + 32768) >> 16);
+				cliptab[i+277] = (uint8)i;
+				cliptab15[i+277] = 0x421 * ((unsigned)i>>3);
+				cliptab16[i+277] = 0x801 * ((unsigned)i>>3) + 0x20 * ((unsigned)i>>2);
+			}
+		}
+	} colorconv;
+
+	struct YCbCrFormatInfo {
+		ptrdiff_t	ystep;
+		ptrdiff_t	cstep;
+		ptrdiff_t	yinc[4];
+		ptrdiff_t	cinc[4];
+		sint8		ypos[4];
+		sint8		cbpos[4];
+		sint8		crpos[4];
+	};
+
+	YCbCrFormatInfo		g_formatInfo_YUV444_Planar	= { -4, -4, {-1,-1,-1,-1}, {-1,-1,-1,-1}, {0,1,2,3}, {0,1,2,3}, {0,1,2,3}};
+	YCbCrFormatInfo		g_formatInfo_YUV422_YUYV	= { -8, -8, {-1,-1,-1,-1}, {-1,-1,-1,-1}, {0,2,4,6}, {1,1,5,5}, {3,3,7,7}};
+	YCbCrFormatInfo		g_formatInfo_YUV422_UYVY	= { -8, -8, {-1,-1,-1,-1}, {-1,-1,-1,-1}, {1,3,5,7}, {0,0,4,4}, {2,2,6,6}};
+	YCbCrFormatInfo		g_formatInfo_YUV420_YV12	= { -4, -2, {-1,-1,-1,-1}, { 0,-1, 0,-1}, {0,1,2,3}, {0,0,1,1}, {0,0,1,1}};
+	YCbCrFormatInfo		g_formatInfo_YUV411_YV12	= { -4, -1, {-1,-1,-1,-1}, {-1,-1,-1,-1}, {0,1,2,3}, {0,0,0,0}, {0,0,0,0}};
+
+	inline uint16 ycbcr_to_1555(uint8 y, uint8 cb0, uint8 cr0) {
+		const uint16 *p = &colorconv.cliptab15[277 + colorconv.y_tab[y]];
+		uint32 r = 0x7c00 & p[colorconv.r_cr_tab[cr0]];
+		uint32 g = 0x03e0 & p[colorconv.g_cr_tab[cr0] + colorconv.g_cb_tab[cb0]];
+		uint32 b = 0x001f & p[colorconv.b_cb_tab[cb0]];
+
+		return r + g + b;
+	}
+
+	inline uint16 ycbcr_to_565(uint8 y, uint8 cb0, uint8 cr0) {
+		const uint16 *p = &colorconv.cliptab16[277 + colorconv.y_tab[y]];
+		uint32 r = 0xf800 & p[colorconv.r_cr_tab[cr0]];
+		uint32 g = 0x07e0 & p[colorconv.g_cr_tab[cr0] + colorconv.g_cb_tab[cb0]];
+		uint32 b = 0x001f & p[colorconv.b_cb_tab[cb0]];
+
+		return r + g + b;
+	}
+
+	inline void ycbcr_to_888(uint8 *dst, uint8 y, uint8 cb0, uint8 cr0) {
+		const uint8 *p = &colorconv.cliptab[277 + colorconv.y_tab[y]];
+		uint8 r = p[colorconv.r_cr_tab[cr0]];
+		uint8 g = p[colorconv.g_cr_tab[cr0] + colorconv.g_cb_tab[cb0]];
+		uint8 b = p[colorconv.b_cb_tab[cb0]];
+
+		dst[0] = b;
+		dst[1] = g;
+		dst[2] = r;
+	}
+
+	inline uint32 ycbcr_to_8888(uint8 y, uint8 cb0, uint8 cr0) {
+		const uint8 *p = &colorconv.cliptab[277 + colorconv.y_tab[y]];
+		uint8 r = p[colorconv.r_cr_tab[cr0]];
+		uint8 g = p[colorconv.g_cr_tab[cr0] + colorconv.g_cb_tab[cb0]];
+		uint8 b = p[colorconv.b_cb_tab[cb0]];
+
+		return (r << 16) + (g << 8) + b;
+	}
+
+	void VDYCbCrToXRGB1555Span(void *dst0, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 w) {
+		uint16 *dst = (uint16 *)dst0;
+
+		do {
+			*dst++ = ycbcr_to_1555(*y++, *cb++, *cr++);
+		} while(--w);
+	}
+
+	void VDYCbCrToRGB565Span(void *dst0, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 w) {
+		uint16 *dst = (uint16 *)dst0;
+
+		do {
+			*dst++ = ycbcr_to_565(*y++, *cb++, *cr++);
+		} while(--w);
+	}
+
+	void VDYCbCrToRGB888Span(void *dst0, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 w) {
+		uint8 *dst = (uint8 *)dst0;
+
+		do {
+			ycbcr_to_888(dst, *y++, *cb++, *cr++);
+			dst += 3;
+		} while(--w);
+	}
+
+	void VDYCbCrToXRGB8888Span(void *dst0, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 w) {
+		uint32 *dst = (uint32 *)dst0;
+
+		do {
+			*dst++ = ycbcr_to_8888(*y++, *cb++, *cr++);
+		} while(--w);
+	}
+
+	void VDYCbCrToUYVYSpan(void *dst0, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 w) {
+		uint32 *dst = (uint32 *)dst0;
+
+		if (--w) {
+			do {
+				*dst++ = (uint32)*cb++ + ((uint32)y[0] << 8) + ((uint32)*cr++ << 16) + ((uint32)y[1] << 24);
+				y += 2;
+			} while((sint32)(w-=2)>0);
+		}
+
+		if (!(w & 1))
+			*dst++ = (uint32)*cb + ((uint32)y[0] << 8) + ((uint32)*cr << 16) + ((uint32)y[0] << 24);
+	}
+
+	void VDYCbCrToYUYVSpan(void *dst0, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 w) {
+		uint32 *dst = (uint32 *)dst0;
+
+		if (--w) {
+			do {
+				*dst++ = (uint32)y[0] + ((uint32)*cb++ << 8) + ((uint32)y[1] << 16) + ((uint32)*cr++ << 24);
+				y += 2;
+			} while((sint32)(w-=2)>0);
+		}
+
+		if (!(w & 1))
+			*dst++ = (uint32)y[0] + ((uint32)*cb << 8) + ((uint32)y[0] << 16) + ((uint32)*cr << 24);
+	}
+
+	void VDYCbCrToRGB1555Generic(void *dst, ptrdiff_t dststride, const void *yrow, ptrdiff_t ystride, const void *cbrow, ptrdiff_t cbstride, const void *crrow, ptrdiff_t crstride, int w, int h, const YCbCrFormatInfo& formatinfo) {
+		const ptrdiff_t ystep	= formatinfo.ystep;
+		const ptrdiff_t cstep	= formatinfo.cstep;
+		const ptrdiff_t ypos0	= formatinfo.ypos[0];
+		const ptrdiff_t ypos1	= formatinfo.ypos[1];
+		const ptrdiff_t ypos2	= formatinfo.ypos[2];
+		const ptrdiff_t ypos3	= formatinfo.ypos[3];
+		const ptrdiff_t crpos0	= formatinfo.crpos[0];
+		const ptrdiff_t crpos1	= formatinfo.crpos[1];
+		const ptrdiff_t crpos2	= formatinfo.crpos[2];
+		const ptrdiff_t crpos3	= formatinfo.crpos[3];
+		const ptrdiff_t cbpos0	= formatinfo.cbpos[0];
+		const ptrdiff_t cbpos1	= formatinfo.cbpos[1];
+		const ptrdiff_t cbpos2	= formatinfo.cbpos[2];
+		const ptrdiff_t cbpos3	= formatinfo.cbpos[3];
+
+		yrow	= (char *)yrow - ystep * ((w-1) >> 2);
+		crrow	= (char *)crrow - cstep * ((w-1) >> 2);
+		cbrow	= (char *)cbrow - cstep * ((w-1) >> 2);
+		dst		= (char *)dst + 2*((w-1) & ~3);
+
+		int y = 0;
+		do {
+			const uint8 *ysrc	= (const uint8 *)yrow;
+			const uint8 *crsrc	= (const uint8 *)crrow;
+			const uint8 *cbsrc	= (const uint8 *)cbrow;
+			uint16 *out = (uint16 *)dst;
+			int w2 = -w;
+
+			switch(w2 & 3) {
+				do {
+			case 0:	out[3] = ycbcr_to_1555(ysrc[ypos3], cbsrc[cbpos3], crsrc[crpos3]);
+			case 1:	out[2] = ycbcr_to_1555(ysrc[ypos2], cbsrc[cbpos2], crsrc[crpos2]);
+			case 2:	out[1] = ycbcr_to_1555(ysrc[ypos1], cbsrc[cbpos1], crsrc[crpos1]);
+			case 3:	out[0] = ycbcr_to_1555(ysrc[ypos0], cbsrc[cbpos0], crsrc[crpos0]);
+					out -= 4;
+					ysrc += ystep;
+					crsrc += cstep;
+					cbsrc += cstep;
+				} while((w2 += 4) < 0);
+			}
+
+			dst		= (char *)dst + dststride;
+			yrow	= (const char *)yrow + (ystride & formatinfo.yinc[y & 3]);
+			cbrow	= (const char *)cbrow + (cbstride & formatinfo.cinc[y & 3]);
+			crrow	= (const char *)crrow + (crstride & formatinfo.cinc[y & 3]);
+		} while(++y < h);
+	}
+
+	void VDYCbCrToRGB565Generic(void *dst, ptrdiff_t dststride, const void *yrow, ptrdiff_t ystride, const void *cbrow, ptrdiff_t cbstride, const void *crrow, ptrdiff_t crstride, int w, int h, const YCbCrFormatInfo& formatinfo) {
+		const ptrdiff_t ystep	= formatinfo.ystep;
+		const ptrdiff_t cstep	= formatinfo.cstep;
+		const ptrdiff_t ypos0	= formatinfo.ypos[0];
+		const ptrdiff_t ypos1	= formatinfo.ypos[1];
+		const ptrdiff_t ypos2	= formatinfo.ypos[2];
+		const ptrdiff_t ypos3	= formatinfo.ypos[3];
+		const ptrdiff_t crpos0	= formatinfo.crpos[0];
+		const ptrdiff_t crpos1	= formatinfo.crpos[1];
+		const ptrdiff_t crpos2	= formatinfo.crpos[2];
+		const ptrdiff_t crpos3	= formatinfo.crpos[3];
+		const ptrdiff_t cbpos0	= formatinfo.cbpos[0];
+		const ptrdiff_t cbpos1	= formatinfo.cbpos[1];
+		const ptrdiff_t cbpos2	= formatinfo.cbpos[2];
+		const ptrdiff_t cbpos3	= formatinfo.cbpos[3];
+
+		yrow	= (char *)yrow - ystep * ((w-1) >> 2);
+		crrow	= (char *)crrow - cstep * ((w-1) >> 2);
+		cbrow	= (char *)cbrow - cstep * ((w-1) >> 2);
+		dst		= (char *)dst + 2*((w-1) & ~3);
+
+		int y = 0;
+		do {
+			const uint8 *ysrc = (const uint8 *)yrow;
+			const uint8 *crsrc = (const uint8 *)crrow;
+			const uint8 *cbsrc = (const uint8 *)cbrow;
+			uint16 *out = (uint16 *)dst;
+			int w2 = -w;
+
+			switch(w2 & 3) {
+				do {
+			case 0:	out[3] = ycbcr_to_565(ysrc[ypos3], cbsrc[cbpos3], crsrc[crpos3]);
+			case 1:	out[2] = ycbcr_to_565(ysrc[ypos2], cbsrc[cbpos2], crsrc[crpos2]);
+			case 2:	out[1] = ycbcr_to_565(ysrc[ypos1], cbsrc[cbpos1], crsrc[crpos1]);
+			case 3:	out[0] = ycbcr_to_565(ysrc[ypos0], cbsrc[cbpos0], crsrc[crpos0]);
+					out -= 4;
+					ysrc += ystep;
+					crsrc += cstep;
+					cbsrc += cstep;
+				} while((w2 += 4) < 0);
+			}
+
+			dst		= (char *)dst + dststride;
+			yrow	= (const char *)yrow + (ystride & formatinfo.yinc[y & 3]);
+			cbrow	= (const char *)cbrow + (cbstride & formatinfo.cinc[y & 3]);
+			crrow	= (const char *)crrow + (crstride & formatinfo.cinc[y & 3]);
+		} while(++y < h);
+	}
+
+	void VDYCbCrToRGB888Generic(void *dst, ptrdiff_t dststride, const void *yrow, ptrdiff_t ystride, const void *cbrow, ptrdiff_t cbstride, const void *crrow, ptrdiff_t crstride, int w, int h, const YCbCrFormatInfo& formatinfo) {
+		const ptrdiff_t ystep	= formatinfo.ystep;
+		const ptrdiff_t cstep	= formatinfo.cstep;
+		const ptrdiff_t ypos0	= formatinfo.ypos[0];
+		const ptrdiff_t ypos1	= formatinfo.ypos[1];
+		const ptrdiff_t ypos2	= formatinfo.ypos[2];
+		const ptrdiff_t ypos3	= formatinfo.ypos[3];
+		const ptrdiff_t crpos0	= formatinfo.crpos[0];
+		const ptrdiff_t crpos1	= formatinfo.crpos[1];
+		const ptrdiff_t crpos2	= formatinfo.crpos[2];
+		const ptrdiff_t crpos3	= formatinfo.crpos[3];
+		const ptrdiff_t cbpos0	= formatinfo.cbpos[0];
+		const ptrdiff_t cbpos1	= formatinfo.cbpos[1];
+		const ptrdiff_t cbpos2	= formatinfo.cbpos[2];
+		const ptrdiff_t cbpos3	= formatinfo.cbpos[3];
+
+		yrow	= (char *)yrow - ystep * ((w-1) >> 2);
+		crrow	= (char *)crrow - cstep * ((w-1) >> 2);
+		cbrow	= (char *)cbrow - cstep * ((w-1) >> 2);
+		dst		= (char *)dst + 3*((w-1) & ~3);
+
+		int y = 0;
+		do {
+			const uint8 *ysrc	= (const uint8 *)yrow;
+			const uint8 *crsrc	= (const uint8 *)crrow;
+			const uint8 *cbsrc	= (const uint8 *)cbrow;
+			uint8 *out = (uint8 *)dst;
+			int w2 = -w;
+
+			switch(w2 & 3) {
+				do {
+			case 0:	ycbcr_to_888(out+9, ysrc[ypos3], cbsrc[cbpos3], crsrc[crpos3]);
+			case 1:	ycbcr_to_888(out+6, ysrc[ypos2], cbsrc[cbpos2], crsrc[crpos2]);
+			case 2:	ycbcr_to_888(out+3, ysrc[ypos1], cbsrc[cbpos1], crsrc[crpos1]);
+			case 3:	ycbcr_to_888(out, ysrc[ypos0], cbsrc[cbpos0], crsrc[crpos0]);
+					out -= 12;
+					ysrc += ystep;
+					crsrc += cstep;
+					cbsrc += cstep;
+				} while((w2 += 4) < 0);
+			}
+
+			dst		= (char *)dst + dststride;
+			yrow	= (const char *)yrow + (ystride & formatinfo.yinc[y & 3]);
+			cbrow	= (const char *)cbrow + (cbstride & formatinfo.cinc[y & 3]);
+			crrow	= (const char *)crrow + (crstride & formatinfo.cinc[y & 3]);
+		} while(++y < h);
+	}
+
+	void VDYCbCrToRGB8888Generic(void *dst, ptrdiff_t dststride, const void *yrow, ptrdiff_t ystride, const void *cbrow, ptrdiff_t cbstride, const void *crrow, ptrdiff_t crstride, int w, int h, const YCbCrFormatInfo& formatinfo) {
+		const ptrdiff_t ystep	= formatinfo.ystep;
+		const ptrdiff_t cstep	= formatinfo.cstep;
+		const ptrdiff_t ypos0	= formatinfo.ypos[0];
+		const ptrdiff_t ypos1	= formatinfo.ypos[1];
+		const ptrdiff_t ypos2	= formatinfo.ypos[2];
+		const ptrdiff_t ypos3	= formatinfo.ypos[3];
+		const ptrdiff_t crpos0	= formatinfo.crpos[0];
+		const ptrdiff_t crpos1	= formatinfo.crpos[1];
+		const ptrdiff_t crpos2	= formatinfo.crpos[2];
+		const ptrdiff_t crpos3	= formatinfo.crpos[3];
+		const ptrdiff_t cbpos0	= formatinfo.cbpos[0];
+		const ptrdiff_t cbpos1	= formatinfo.cbpos[1];
+		const ptrdiff_t cbpos2	= formatinfo.cbpos[2];
+		const ptrdiff_t cbpos3	= formatinfo.cbpos[3];
+
+		yrow	= (char *)yrow - ystep * ((w-1) >> 2);
+		crrow	= (char *)crrow - cstep * ((w-1) >> 2);
+		cbrow	= (char *)cbrow - cstep * ((w-1) >> 2);
+		dst		= (char *)dst + 4*((w-1) & ~3);
+
+		int y = 0;
+		do {
+			const uint8 *ysrc	= (const uint8 *)yrow;
+			const uint8 *crsrc	= (const uint8 *)crrow;
+			const uint8 *cbsrc	= (const uint8 *)cbrow;
+			uint32 *out = (uint32 *)dst;
+			int w2 = -w;
+
+			switch(w2 & 3) {
+				do {
+			case 0:	out[3] = ycbcr_to_8888(ysrc[ypos3], cbsrc[cbpos3], crsrc[crpos3]);
+			case 1:	out[2] = ycbcr_to_8888(ysrc[ypos2], cbsrc[cbpos2], crsrc[crpos2]);
+			case 2:	out[1] = ycbcr_to_8888(ysrc[ypos1], cbsrc[cbpos1], crsrc[crpos1]);
+			case 3:	out[0] = ycbcr_to_8888(ysrc[ypos0], cbsrc[cbpos0], crsrc[crpos0]);
+					out -= 4;
+					ysrc += ystep;
+					crsrc += cstep;
+					cbsrc += cstep;
+				} while((w2 += 4) < 0);
+			}
+
+			dst		= (char *)dst + dststride;
+			yrow	= (const char *)yrow + (ystride & formatinfo.yinc[y & 3]);
+			cbrow	= (const char *)cbrow + (cbstride & formatinfo.cinc[y & 3]);
+			crrow	= (const char *)crrow + (crstride & formatinfo.cinc[y & 3]);
+		} while(++y < h);
+	}
+}
+
+#define DECLARE_YUV(x, y) void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h)
+
+DECLARE_YUV(UYVY, XRGB1555) {
+	do {
+		const uint8 *src = (const uint8 *)src0;
+		uint16 *dst = (uint16 *)dst0;
+
+		// convert first pixel
+		int cb, cr;
+		int rc0, gc0, bc0, rc1, gc1, bc1;
+		const uint16 *y;
+
+		cb = src[0];
+		cr = src[2];
+		rc1 = colorconv.r_cr_tab[cr];
+		gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+		bc1 = colorconv.b_cb_tab[cb];
+
+		y = &colorconv.cliptab15[277 + colorconv.y_tab[src[1]]];
+		*dst++ = (y[rc1] & 0x7c00) + (y[gc1] & 0x3e0) + (y[bc1] & 0x001f);
+
+		// convert pairs of pixels
+		int w2 = w;
+
+		if ((w2 -= 2) > 0) {
+			do {
+				rc0 = rc1;
+				gc0 = gc1;
+				bc0 = bc1;
+
+				cb = src[4];
+				cr = src[6];
+				rc1 = colorconv.r_cr_tab[cr];
+				gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+				bc1 = colorconv.b_cb_tab[cb];
+
+				y = &colorconv.cliptab15[277 + colorconv.y_tab[src[3]]];
+				dst[0] = (y[(rc0+rc1+1)>>1] & 0x7c00) + (y[(gc0+gc1+1)>>1] & 0x3e0) + (y[(bc0+bc1+1)>>1] & 0x001f);
+
+				y = &colorconv.cliptab15[277 + colorconv.y_tab[src[5]]];
+				dst[1] = (y[rc1] & 0x7c00) + (y[gc1] & 0x3e0) + (y[bc1] & 0x001f);
+
+				dst += 2;
+				src += 4;
+			} while((w2 -= 2) > 0);
+		}
+
+		// handle oddballs
+		if (!(w2 & 1)) {
+			y = &colorconv.cliptab15[277 + colorconv.y_tab[src[3]]];
+			*dst = (y[rc1] & 0x7c00) + (y[gc1] & 0x3e0) + (y[bc1] & 0x001f);
+		}
+
+		vdptrstep(src0, srcpitch);
+		vdptrstep(dst0, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(UYVY, RGB565) {
+	do {
+		const uint8 *src = (const uint8 *)src0;
+		uint16 *dst = (uint16 *)dst0;
+
+		// convert first pixel
+		int cb, cr;
+		int rc0, gc0, bc0, rc1, gc1, bc1;
+		const uint16 *y;
+
+		cb = src[0];
+		cr = src[2];
+		rc1 = colorconv.r_cr_tab[cr];
+		gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+		bc1 = colorconv.b_cb_tab[cb];
+
+		y = &colorconv.cliptab16[277 + colorconv.y_tab[src[1]]];
+		*dst++ = (y[rc1] & 0xf800) + (y[gc1] & 0x7e0) + (y[bc1] & 0x001f);
+
+		// convert pairs of pixels
+		int w2 = w;
+
+		if ((w2 -= 2) > 0) {
+			do {
+				rc0 = rc1;
+				gc0 = gc1;
+				bc0 = bc1;
+
+				cb = src[4];
+				cr = src[6];
+				rc1 = colorconv.r_cr_tab[cr];
+				gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+				bc1 = colorconv.b_cb_tab[cb];
+
+				y = &colorconv.cliptab16[277 + colorconv.y_tab[src[3]]];
+				dst[0] = (y[(rc0+rc1+1)>>1] & 0xf800) + (y[(gc0+gc1+1)>>1] & 0x7e0) + (y[(bc0+bc1+1)>>1] & 0x001f);
+
+				y = &colorconv.cliptab16[277 + colorconv.y_tab[src[5]]];
+				dst[1] = (y[rc1] & 0xf800) + (y[gc1] & 0x7e0) + (y[bc1] & 0x001f);
+
+				dst += 2;
+				src += 4;
+			} while((w2 -= 2) > 0);
+		}
+
+		// handle oddballs
+		if (!(w2 & 1)) {
+			y = &colorconv.cliptab16[277 + colorconv.y_tab[src[3]]];
+			*dst = (y[rc1] & 0xf800) + (y[gc1] & 0x7e0) + (y[bc1] & 0x001f);
+		}
+
+		vdptrstep(src0, srcpitch);
+		vdptrstep(dst0, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(UYVY, RGB888) {
+	do {
+		const uint8 *src = (const uint8 *)src0;
+		uint8 *dst = (uint8 *)dst0;
+
+		// convert first pixel
+		int cb, cr;
+		int rc0, gc0, bc0, rc1, gc1, bc1;
+		const uint8 *y;
+
+		cb = src[0];
+		cr = src[2];
+		rc1 = colorconv.r_cr_tab[cr];
+		gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+		bc1 = colorconv.b_cb_tab[cb];
+
+		y = &colorconv.cliptab[277 + colorconv.y_tab[src[1]]];
+		dst[0] = y[bc1];
+		dst[1] = y[gc1];
+		dst[2] = y[rc1];
+		dst += 3;
+
+		// convert pairs of pixels
+		int w2 = w;
+
+		if ((w2 -= 2) > 0) {
+			do {
+				rc0 = rc1;
+				gc0 = gc1;
+				bc0 = bc1;
+
+				cb = src[4];
+				cr = src[6];
+				rc1 = colorconv.r_cr_tab[cr];
+				gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+				bc1 = colorconv.b_cb_tab[cb];
+
+				y = &colorconv.cliptab[277 + colorconv.y_tab[src[3]]];
+				dst[0] = y[(bc0+bc1+1)>>1];
+				dst[1] = y[(gc0+gc1+1)>>1];
+				dst[2] = y[(rc0+rc1+1)>>1];
+
+				y = &colorconv.cliptab[277 + colorconv.y_tab[src[5]]];
+				dst[3] = y[bc1];
+				dst[4] = y[gc1];
+				dst[5] = y[rc1];
+
+				dst += 6;
+				src += 4;
+			} while((w2 -= 2) > 0);
+		}
+
+		// handle oddballs
+		if (!(w2 & 1)) {
+			y = &colorconv.cliptab[277 + colorconv.y_tab[src[3]]];
+			dst[0] = y[bc1];
+			dst[1] = y[gc1];
+			dst[2] = y[rc1];
+		}
+
+		vdptrstep(src0, srcpitch);
+		vdptrstep(dst0, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(UYVY, XRGB8888) {
+	do {
+		const uint8 *src = (const uint8 *)src0;
+		uint8 *dst = (uint8 *)dst0;
+
+		// convert first pixel
+		int cb, cr;
+		int rc0, gc0, bc0, rc1, gc1, bc1;
+		const uint8 *y;
+
+		cb = src[0];
+		cr = src[2];
+		rc1 = colorconv.r_cr_tab[cr];
+		gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+		bc1 = colorconv.b_cb_tab[cb];
+
+		y = &colorconv.cliptab[277 + colorconv.y_tab[src[1]]];
+		dst[0] = y[bc1];
+		dst[1] = y[gc1];
+		dst[2] = y[rc1];
+		dst += 4;
+
+		// convert pairs of pixels
+		int w2 = w;
+
+		if ((w2 -= 2) > 0) {
+			do {
+				rc0 = rc1;
+				gc0 = gc1;
+				bc0 = bc1;
+
+				cb = src[4];
+				cr = src[6];
+				rc1 = colorconv.r_cr_tab[cr];
+				gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+				bc1 = colorconv.b_cb_tab[cb];
+
+				y = &colorconv.cliptab[277 + colorconv.y_tab[src[3]]];
+				dst[0] = y[(bc0+bc1+1)>>1];
+				dst[1] = y[(gc0+gc1+1)>>1];
+				dst[2] = y[(rc0+rc1+1)>>1];
+
+				y = &colorconv.cliptab[277 + colorconv.y_tab[src[5]]];
+				dst[4] = y[bc1];
+				dst[5] = y[gc1];
+				dst[6] = y[rc1];
+
+				dst += 8;
+				src += 4;
+			} while((w2 -= 2) > 0);
+		}
+
+		// handle oddballs
+		if (!(w2 & 1)) {
+			y = &colorconv.cliptab[277 + colorconv.y_tab[src[3]]];
+			dst[0] = y[bc1];
+			dst[1] = y[gc1];
+			dst[2] = y[rc1];
+		}
+
+		vdptrstep(src0, srcpitch);
+		vdptrstep(dst0, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(YUYV, XRGB1555) {
+	do {
+		const uint8 *src = (const uint8 *)src0;
+		uint16 *dst = (uint16 *)dst0;
+
+		// convert first pixel
+		int cb, cr;
+		int rc0, gc0, bc0, rc1, gc1, bc1;
+		const uint16 *y;
+
+		cb = src[1];
+		cr = src[3];
+		rc1 = colorconv.r_cr_tab[cr];
+		gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+		bc1 = colorconv.b_cb_tab[cb];
+
+		y = &colorconv.cliptab15[277 + colorconv.y_tab[src[0]]];
+		*dst++ = (y[rc1] & 0x7c00) + (y[gc1] & 0x3e0) + (y[bc1] & 0x001f);
+
+		// convert pairs of pixels
+		int w2 = w;
+
+		if ((w2 -= 2) > 0) {
+			do {
+				rc0 = rc1;
+				gc0 = gc1;
+				bc0 = bc1;
+
+				cb = src[5];
+				cr = src[7];
+				rc1 = colorconv.r_cr_tab[cr];
+				gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+				bc1 = colorconv.b_cb_tab[cb];
+
+				y = &colorconv.cliptab15[277 + colorconv.y_tab[src[2]]];
+				dst[0] = (y[(rc0+rc1+1)>>1] & 0x7c00) + (y[(gc0+gc1+1)>>1] & 0x3e0) + (y[(bc0+bc1+1)>>1] & 0x001f);
+
+				y = &colorconv.cliptab15[277 + colorconv.y_tab[src[4]]];
+				dst[1] = (y[rc1] & 0x7c00) + (y[gc1] & 0x3e0) + (y[bc1] & 0x001f);
+
+				dst += 2;
+				src += 4;
+			} while((w2 -= 2) > 0);
+		}
+
+		// handle oddballs
+		if (!(w2 & 1)) {
+			y = &colorconv.cliptab15[277 + colorconv.y_tab[src[2]]];
+			*dst = (y[rc1] & 0x7c00) + (y[gc1] & 0x3e0) + (y[bc1] & 0x001f);
+		}
+
+		vdptrstep(src0, srcpitch);
+		vdptrstep(dst0, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(YUYV, RGB565) {
+	do {
+		const uint8 *src = (const uint8 *)src0;
+		uint16 *dst = (uint16 *)dst0;
+
+		// convert first pixel
+		int cb, cr;
+		int rc0, gc0, bc0, rc1, gc1, bc1;
+		const uint16 *y;
+
+		cb = src[1];
+		cr = src[3];
+		rc1 = colorconv.r_cr_tab[cr];
+		gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+		bc1 = colorconv.b_cb_tab[cb];
+
+		y = &colorconv.cliptab16[277 + colorconv.y_tab[src[0]]];
+		*dst++ = (y[rc1] & 0xf800) + (y[gc1] & 0x7e0) + (y[bc1] & 0x001f);
+
+		// convert pairs of pixels
+		int w2 = w;
+
+		if ((w2 -= 2) > 0) {
+			do {
+				rc0 = rc1;
+				gc0 = gc1;
+				bc0 = bc1;
+
+				cb = src[5];
+				cr = src[7];
+				rc1 = colorconv.r_cr_tab[cr];
+				gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+				bc1 = colorconv.b_cb_tab[cb];
+
+				y = &colorconv.cliptab16[277 + colorconv.y_tab[src[2]]];
+				dst[0] = (y[(rc0+rc1+1)>>1] & 0xf800) + (y[(gc0+gc1+1)>>1] & 0x7e0) + (y[(bc0+bc1+1)>>1] & 0x001f);
+
+				y = &colorconv.cliptab16[277 + colorconv.y_tab[src[4]]];
+				dst[1] = (y[rc1] & 0xf800) + (y[gc1] & 0x7e0) + (y[bc1] & 0x001f);
+
+				dst += 2;
+				src += 4;
+			} while((w2 -= 2) > 0);
+		}
+
+		// handle oddballs
+		if (!(w2 & 1)) {
+			y = &colorconv.cliptab16[277 + colorconv.y_tab[src[2]]];
+			*dst = (y[rc1] & 0xf800) + (y[gc1] & 0x7e0) + (y[bc1] & 0x001f);
+		}
+
+		vdptrstep(src0, srcpitch);
+		vdptrstep(dst0, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(YUYV, RGB888) {
+	do {
+		const uint8 *src = (const uint8 *)src0;
+		uint8 *dst = (uint8 *)dst0;
+
+		// convert first pixel
+		int cb, cr;
+		int rc0, gc0, bc0, rc1, gc1, bc1;
+		const uint8 *y;
+
+		cb = src[1];
+		cr = src[3];
+		rc1 = colorconv.r_cr_tab[cr];
+		gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+		bc1 = colorconv.b_cb_tab[cb];
+
+		y = &colorconv.cliptab[277 + colorconv.y_tab[src[0]]];
+		dst[0] = y[bc1];
+		dst[1] = y[gc1];
+		dst[2] = y[rc1];
+		dst += 3;
+
+		// convert pairs of pixels
+		int w2 = w;
+
+		if ((w2 -= 2) > 0) {
+			do {
+				rc0 = rc1;
+				gc0 = gc1;
+				bc0 = bc1;
+
+				cb = src[5];
+				cr = src[7];
+				rc1 = colorconv.r_cr_tab[cr];
+				gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+				bc1 = colorconv.b_cb_tab[cb];
+
+				y = &colorconv.cliptab[277 + colorconv.y_tab[src[2]]];
+				dst[0] = y[(bc0+bc1+1)>>1];
+				dst[1] = y[(gc0+gc1+1)>>1];
+				dst[2] = y[(rc0+rc1+1)>>1];
+
+				y = &colorconv.cliptab[277 + colorconv.y_tab[src[4]]];
+				dst[3] = y[bc1];
+				dst[4] = y[gc1];
+				dst[5] = y[rc1];
+
+				dst += 6;
+				src += 4;
+			} while((w2 -= 2) > 0);
+		}
+
+		// handle oddballs
+		if (!(w2 & 1)) {
+			y = &colorconv.cliptab[277 + colorconv.y_tab[src[2]]];
+			dst[0] = y[bc1];
+			dst[1] = y[gc1];
+			dst[2] = y[rc1];
+		}
+
+		vdptrstep(src0, srcpitch);
+		vdptrstep(dst0, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(YUYV, XRGB8888) {
+	do {
+		const uint8 *src = (const uint8 *)src0;
+		uint8 *dst = (uint8 *)dst0;
+
+		// convert first pixel
+		int cb, cr;
+		int rc0, gc0, bc0, rc1, gc1, bc1;
+		const uint8 *y;
+
+		cb = src[1];
+		cr = src[3];
+		rc1 = colorconv.r_cr_tab[cr];
+		gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+		bc1 = colorconv.b_cb_tab[cb];
+
+		y = &colorconv.cliptab[277 + colorconv.y_tab[src[0]]];
+		dst[0] = y[bc1];
+		dst[1] = y[gc1];
+		dst[2] = y[rc1];
+		dst += 4;
+
+		// convert pairs of pixels
+		int w2 = w;
+
+		if ((w2 -= 2) > 0) {
+			do {
+				rc0 = rc1;
+				gc0 = gc1;
+				bc0 = bc1;
+
+				cb = src[5];
+				cr = src[7];
+				rc1 = colorconv.r_cr_tab[cr];
+				gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+				bc1 = colorconv.b_cb_tab[cb];
+
+				y = &colorconv.cliptab[277 + colorconv.y_tab[src[2]]];
+				dst[0] = y[(bc0+bc1+1)>>1];
+				dst[1] = y[(gc0+gc1+1)>>1];
+				dst[2] = y[(rc0+rc1+1)>>1];
+
+				y = &colorconv.cliptab[277 + colorconv.y_tab[src[4]]];
+				dst[4] = y[bc1];
+				dst[5] = y[gc1];
+				dst[6] = y[rc1];
+
+				dst += 8;
+				src += 4;
+			} while((w2 -= 2) > 0);
+		}
+
+		// handle oddballs
+		if (!(w2 & 1)) {
+			y = &colorconv.cliptab[277 + colorconv.y_tab[src[2]]];
+			dst[0] = y[bc1];
+			dst[1] = y[gc1];
+			dst[2] = y[rc1];
+		}
+
+		vdptrstep(src0, srcpitch);
+		vdptrstep(dst0, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(Y8, XRGB1555) {
+	uint16 *dst = (uint16 *)dst0;
+	const uint8 *src = (const uint8 *)src0;
+
+	dstpitch -= 2*w;
+	srcpitch -= w;
+
+	do {
+		vdpixsize w2 = w;
+
+		do {
+			*dst++ = colorconv.cliptab15[colorconv.y_tab[*src++] + 277];
+		} while(--w2);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(Y8, RGB565) {
+	uint16 *dst = (uint16 *)dst0;
+	const uint8 *src = (const uint8 *)src0;
+
+	dstpitch -= 2*w;
+	srcpitch -= w;
+
+	do {
+		vdpixsize w2 = w;
+
+		do {
+			*dst++ = colorconv.cliptab16[colorconv.y_tab[*src++] + 277];
+		} while(--w2);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(Y8, RGB888) {
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *src = (const uint8 *)src0;
+
+	dstpitch -= 3*w;
+	srcpitch -= w;
+
+	do {
+		vdpixsize w2 = w;
+
+		do {
+			dst[0] = dst[1] = dst[2] = colorconv.cliptab[colorconv.y_tab[*src++] + 277];
+			dst += 3;
+		} while(--w2);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(Y8, XRGB8888) {
+	uint32 *dst = (uint32 *)dst0;
+	const uint8 *src = (const uint8 *)src0;
+
+	dstpitch -= 4*w;
+	srcpitch -= w;
+
+	do {
+		vdpixsize w2 = w;
+
+		do {
+			*dst++ = 0x010101 * colorconv.cliptab[colorconv.y_tab[*src++] + 277];
+		} while(--w2);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+#define DECLARE_YUV_PLANAR(x, y) void VDPixmapBlt_##x##_to_##y##_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h)
+
+
+namespace {
+	typedef void (*tpYUVPlanarFinalDecoder)(void *, const uint8 *, const uint8 *, const uint8 *, uint32);
+	typedef void (*tpYUVPlanarHorizDecoder)(uint8 *dst, const uint8 *src, sint32 w);
+	typedef void (*tpYUVPlanarVertDecoder)(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase);
+}
+
+#ifdef _M_IX86
+	extern "C" void __cdecl vdasm_pixblt_YUV444Planar_to_XRGB1555_scan_MMX(void *dst, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 count);
+	extern "C" void __cdecl vdasm_pixblt_YUV444Planar_to_RGB565_scan_MMX(void *dst, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 count);
+	extern "C" void __cdecl vdasm_pixblt_YUV444Planar_to_XRGB8888_scan_MMX(void *dst, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 count);
+#endif
+
+
+void VDPixmapBlt_YUVPlanar_decode_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h) {
+	const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(src.format);
+	int hbits = srcinfo.auxwbits;
+	int vbits = srcinfo.auxhbits;
+
+	if (src.format == nsVDPixmap::kPixFormat_YUV422_UYVY || src.format == nsVDPixmap::kPixFormat_YUV422_YUYV)
+		hbits = 1;
+
+	bool h_coaligned = true;
+	bool v_coaligned = false;
+
+	if (src.format == nsVDPixmap::kPixFormat_YUV422_Planar_Centered ||
+		src.format == nsVDPixmap::kPixFormat_YUV420_Planar_Centered) {
+		h_coaligned = false;
+	}
+
+	tpYUVPlanarVertDecoder vfunc = NULL;
+	tpYUVPlanarHorizDecoder hfunc = NULL;
+	uint32 horiz_buffer_size = 0;
+	uint32 vert_buffer_size = 0;
+	uint32 horiz_count = 0;
+	sint32 yaccum = 8;
+	sint32 yinc = 8;
+	uint32 yleft = h;
+
+	switch(vbits*2+v_coaligned) {
+	case 0:		// 4:4:4, 4:2:2
+	case 1:
+		break;
+	case 2:		// 4:2:0 (centered) 
+		vfunc = vert_expand2x_centered;
+		vert_buffer_size = w>>1;
+		yaccum = 6;
+		yinc = 4;
+		yleft >>= 1;
+		break;
+	case 4:		// 4:1:0 (centered)
+		vfunc = vert_expand4x_centered;
+		vert_buffer_size = w>>2;
+		yaccum = 5;
+		yinc = 2;
+		yleft >>= 2;
+		break;
+	default:
+		VDNEVERHERE;
+		return;
+	}
+
+	--yleft;
+
+	tpYUVPlanarFinalDecoder dfunc = NULL;
+
+#ifdef _M_IX86
+	uint32 cpuflags = CPUGetEnabledExtensions();
+
+	if (cpuflags & CPUF_SUPPORTS_MMX) {
+		if (cpuflags & CPUF_SUPPORTS_INTEGER_SSE) {
+			if (vfunc == vert_expand2x_centered)
+				vfunc = vert_expand2x_centered_ISSE;
+		}
+
+		switch(dst.format) {
+		case nsVDPixmap::kPixFormat_XRGB1555:	dfunc = vdasm_pixblt_YUV444Planar_to_XRGB1555_scan_MMX;	break;
+		case nsVDPixmap::kPixFormat_RGB565:		dfunc = vdasm_pixblt_YUV444Planar_to_RGB565_scan_MMX;	break;
+		case nsVDPixmap::kPixFormat_XRGB8888:	dfunc = vdasm_pixblt_YUV444Planar_to_XRGB8888_scan_MMX;	break;
+		}
+	}
+#endif
+
+	bool halfchroma = false;
+
+	if (!dfunc) {
+		switch(dst.format) {
+		case nsVDPixmap::kPixFormat_XRGB1555:		dfunc = VDYCbCrToXRGB1555Span;	break;
+		case nsVDPixmap::kPixFormat_RGB565:			dfunc = VDYCbCrToRGB565Span;	break;
+		case nsVDPixmap::kPixFormat_RGB888:			dfunc = VDYCbCrToRGB888Span;	break;
+		case nsVDPixmap::kPixFormat_XRGB8888:		dfunc = VDYCbCrToXRGB8888Span;	break;
+		case nsVDPixmap::kPixFormat_YUV422_UYVY:	dfunc = VDYCbCrToUYVYSpan;		halfchroma = true;	break;
+		case nsVDPixmap::kPixFormat_YUV422_YUYV:	dfunc = VDYCbCrToYUYVSpan;		halfchroma = true;	break;
+		default:
+			VDNEVERHERE;
+			return;
+		}
+	}
+
+	switch(hbits*2+h_coaligned) {
+	case 0:		// 4:4:4
+	case 1:
+		if (halfchroma) {
+			hfunc = horiz_compress2x_coaligned;
+			horiz_buffer_size = (w + 1) >> 1;
+			horiz_count = w;
+		}
+		break;
+	case 2:		// 4:2:0 MPEG-1 (centered)
+		if (halfchroma) {
+			hfunc = horiz_realign_to_coaligned;
+			horiz_buffer_size = (w + 1) >> 1;
+			horiz_count = (w + 1) >> 1;
+		} else {
+			hfunc = horiz_expand2x_centered;
+			horiz_buffer_size = w;
+			horiz_count = w;
+		}
+		break;
+	case 3:		// 4:2:0/4:2:2 MPEG-2 (coaligned)
+		if (!halfchroma) {
+			hfunc = horiz_expand2x_coaligned;
+			horiz_buffer_size = w;
+			horiz_count = w;
+		}
+		break;
+	case 5:		// 4:1:1 (coaligned)
+		if (halfchroma) {
+			hfunc = horiz_expand2x_coaligned;
+			horiz_buffer_size = (w + 1) >> 1;
+			horiz_count = (w + 1) >> 1;
+		} else {
+			hfunc = horiz_expand4x_coaligned;
+			horiz_buffer_size = w;
+			horiz_count = w;
+		}
+		break;
+
+	default:
+		VDNEVERHERE;
+		return;
+	}
+
+#ifdef _M_IX86
+	if (cpuflags & CPUF_SUPPORTS_INTEGER_SSE) {
+		if (hfunc == horiz_expand2x_coaligned)
+			hfunc = horiz_expand2x_coaligned_ISSE;
+	}
+#endif
+
+	uint32 chroma_srcwidth = -(-w >> srcinfo.auxwbits);
+	horiz_buffer_size = (horiz_buffer_size + 15) & ~15;
+	vert_buffer_size = (vert_buffer_size + 15) & ~15;
+
+	// allocate buffers
+
+	vdblock<uint8> tempbuf((horiz_buffer_size + vert_buffer_size)*2 + 1);
+
+	uint8 *const crbufh = tempbuf.data();
+	uint8 *const crbufv = crbufh + horiz_buffer_size;
+	uint8 *const cbbufh = crbufv + vert_buffer_size;
+	uint8 *const cbbufv = cbbufh + horiz_buffer_size;
+
+	const uint8 *cb0 = (const uint8*)src.data2;
+	const uint8 *cr0 = (const uint8*)src.data3;
+	const uint8 *cb1  = cb0;
+	const uint8 *cr1  = cr0;
+	const uint8 *y = (const uint8 *)src.data;
+	const ptrdiff_t ypitch = src.pitch;
+	const ptrdiff_t cbpitch = src.pitch2;
+	const ptrdiff_t crpitch = src.pitch3;
+
+	void *out = dst.data;
+	ptrdiff_t outpitch = dst.pitch;
+
+	for(;;) {
+		if (yaccum >= 8) {
+			yaccum &= 7;
+
+			cb0 = cb1;
+			cr0 = cr1;
+
+			if (yleft > 0) {
+				--yleft;
+				vdptrstep(cb1, cbpitch);
+				vdptrstep(cr1, crpitch);
+			}
+		}
+
+		const uint8 *cr = cr0;
+		const uint8 *cb = cb0;
+
+		// vertical interpolation: cr
+		if(yaccum & 7) {
+			const uint8 *const srcs[2]={cr0, cr1};
+			vfunc(crbufv, srcs, chroma_srcwidth, (yaccum & 7) << 5);
+			cr = crbufv;
+		}
+
+		// horizontal interpolation: cr
+		if (hfunc) {
+			hfunc(crbufh, cr, horiz_count);
+			cr = crbufh;
+		}
+
+		// vertical interpolation: cb
+		if(yaccum & 7) {
+			const uint8 *const srcs[2]={cb0, cb1};
+			vfunc(cbbufv, srcs, chroma_srcwidth, (yaccum & 7) << 5);
+			cb = cbbufv;
+		}
+
+		// horizontal interpolation: cb
+		if (hfunc) {
+			hfunc(cbbufh, cb, horiz_count);
+			cb = cbbufh;
+		}
+
+		dfunc(out, y, cb, cr, w);
+		vdptrstep(out, outpitch);
+		vdptrstep(y, ypitch);
+
+		if (!--h)
+			break;
+
+		yaccum += yinc;
+	}
+
+#ifdef _M_IX86
+	if (cpuflags & CPUF_SUPPORTS_MMX) {
+		__asm emms
+	}
+#endif
+}
+
+namespace {
+	typedef void (*tpUVBltHorizDecoder)(uint8 *dst, const uint8 *src, sint32 w);
+	typedef void (*tpUVBltVertDecoder)(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase);
+
+	void uvplaneblt(uint8 *dst, ptrdiff_t dstpitch, int dstformat, const uint8 *src, ptrdiff_t srcpitch, int srcformat, vdpixsize w, vdpixsize h) {
+		const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(srcformat);
+		const VDPixmapFormatInfo& dstinfo = VDPixmapGetInfo(dstformat);
+
+		int xshift = srcinfo.auxwbits - dstinfo.auxwbits;
+		int yshift = srcinfo.auxhbits - dstinfo.auxhbits;
+
+		tpUVBltHorizDecoder		hfunc = NULL;
+		tpUVBltVertDecoder		vfunc = NULL;
+
+		switch(xshift) {
+		case +2:
+			hfunc = horiz_expand4x_coaligned;
+			break;
+		case +1:
+			hfunc = horiz_expand2x_coaligned;
+			break;
+		case  0:
+			break;
+		case -1:
+			hfunc = horiz_compress2x_coaligned;
+			break;
+		case -2:
+			hfunc = horiz_compress4x_coaligned;
+			break;
+		default:
+			VDNEVERHERE;
+			return;
+		}
+
+#ifdef _M_IX86
+		uint32 cpuflags = CPUGetEnabledExtensions();
+
+		if (cpuflags & CPUF_SUPPORTS_INTEGER_SSE) {
+			if (hfunc == horiz_expand2x_coaligned)
+				hfunc = horiz_expand2x_coaligned_ISSE;
+		}
+#endif
+
+		int winsize, winposnext, winstep;
+
+		switch(yshift) {
+		case +2:
+			vfunc = vert_expand4x_centered;
+			winsize = 2;
+			winposnext = 0xa0;
+			winstep = 0x40;
+			break;
+		case +1:
+			vfunc = vert_expand2x_centered;
+			winsize = 2;
+			winposnext = 0xc0;
+			winstep = 0x80;
+			break;
+		case  0:
+			winsize = 1;
+			winposnext = 0;
+			winstep = 0x100;
+			break;
+		case -1:
+			vfunc = vert_compress2x_centered;
+			winsize = 4;
+			winposnext = 0x200;
+			winstep = 0x200;
+			break;
+		case -2:
+			vfunc = vert_compress4x_centered;
+			winsize = 8;
+			winposnext = 0x500;
+			winstep = 0x400;
+			break;
+		default:
+			VDNEVERHERE;
+			return;
+		}
+
+#ifdef _M_IX86
+		if (cpuflags & CPUF_SUPPORTS_INTEGER_SSE) {
+			if (vfunc == vert_expand2x_centered)
+				vfunc = vert_expand2x_centered_ISSE;
+		}
+#endif
+
+		int dsth = -(-h >> dstinfo.auxhbits);
+		int srch = -(-h >> srcinfo.auxhbits);
+		int dstw = -(-w >> dstinfo.auxwbits);
+		int w2 = -(-w >> std::min<int>(dstinfo.auxwbits, srcinfo.auxwbits));
+
+		int winpos = (winposnext>>8) - winsize;
+
+		const uint8 *window[16];
+
+		vdblock<uint8> tmpbuf;
+		ptrdiff_t tmppitch = (w+15) & ~15;
+
+		if (vfunc && hfunc)
+			tmpbuf.resize(tmppitch * winsize);
+
+		do {
+			int desiredpos = winposnext >> 8;
+
+			while(winpos < desiredpos) {
+				const uint8 *srcrow = vdptroffset(src, srcpitch * std::max<int>(0, std::min<int>(srch-1, ++winpos)));
+				int winoffset = (winpos-1) & (winsize-1);
+
+				if (hfunc) {
+					uint8 *dstrow = vfunc ? tmpbuf.data() + tmppitch * winoffset : dst;
+					hfunc(dstrow, srcrow, w2);
+					srcrow = dstrow;
+				}
+
+				window[winoffset] = window[winoffset + winsize] = srcrow;
+			}
+
+			if (vfunc)
+				vfunc(dst, window + (winpos & (winsize-1)), dstw, winposnext & 255);
+			else if (!hfunc)
+				memcpy(dst, window[winpos & (winsize-1)], dstw);
+
+			winposnext += winstep;
+			vdptrstep(dst, dstpitch);
+		} while(--dsth);
+
+#ifdef _M_IX86
+		if (cpuflags & CPUF_SUPPORTS_MMX) {
+			__asm emms
+		}
+#endif
+	}
+}
+
+void VDPixmapBlt_YUVPlanar_convert_reference(const VDPixmap& dstpm, const VDPixmap& srcpm, vdpixsize w, vdpixsize h) {
+	VDMemcpyRect(dstpm.data, dstpm.pitch, srcpm.data, srcpm.pitch, dstpm.w, dstpm.h);
+
+	if (srcpm.format != nsVDPixmap::kPixFormat_Y8) {
+		if (dstpm.format != nsVDPixmap::kPixFormat_Y8) {
+			// YCbCr -> YCbCr
+			uvplaneblt((uint8 *)dstpm.data2, dstpm.pitch2, dstpm.format, (uint8 *)srcpm.data2, srcpm.pitch2, srcpm.format, w, h);
+			uvplaneblt((uint8 *)dstpm.data3, dstpm.pitch3, dstpm.format, (uint8 *)srcpm.data3, srcpm.pitch3, srcpm.format, w, h);
+		}
+	} else {
+		if (dstpm.format != nsVDPixmap::kPixFormat_Y8) {
+			const VDPixmapFormatInfo& info = VDPixmapGetInfo(dstpm.format);
+			VDMemset8Rect(dstpm.data2, dstpm.pitch2, 0x80, -(-w >> info.auxwbits), -(-h >> info.auxhbits));
+			VDMemset8Rect(dstpm.data3, dstpm.pitch3, 0x80, -(-w >> info.auxwbits), -(-h >> info.auxhbits));
+		}
+	}
+}
+
+extern "C" void vdasm_pixblt_YUV411Planar_to_XRGB1555_scan_MMX(void *dst, const void *y, const void *cb, const void *cr, unsigned count);
+extern "C" void vdasm_pixblt_YUV411Planar_to_RGB565_scan_MMX(void *dst, const void *y, const void *cb, const void *cr, unsigned count);
+extern "C" void vdasm_pixblt_YUV411Planar_to_XRGB8888_scan_MMX(void *dst, const void *y, const void *cb, const void *cr, unsigned count);
+extern "C" void vdasm_pixblt_YUV411Planar_to_XRGB1555_scan_ISSE(void *dst, const void *y, const void *cb, const void *cr, unsigned count);
+extern "C" void vdasm_pixblt_YUV411Planar_to_RGB565_scan_ISSE(void *dst, const void *y, const void *cb, const void *cr, unsigned count);
+extern "C" void vdasm_pixblt_YUV411Planar_to_XRGB8888_scan_ISSE(void *dst, const void *y, const void *cb, const void *cr, unsigned count);
+
+DECLARE_YUV_PLANAR(YUV411, XRGB1555) {
+	uint16			*out	= (uint16 *)dst.data;
+	const ptrdiff_t	opitch	= dst.pitch;
+	const uint8		*yrow	= (const uint8 *)src.data;
+	const uint8		*cbrow	= (const uint8 *)src.data2;
+	const uint8		*crrow	= (const uint8 *)src.data3;
+	const ptrdiff_t	ypitch	= src.pitch;
+	const ptrdiff_t	cbpitch	= src.pitch2;
+	const ptrdiff_t	crpitch	= src.pitch3;
+
+	vdpixsize wpairs = (w-1)>>2;
+	vdpixsize wleft = w - (wpairs<<2);
+
+	do {
+		uint16 *p = out;
+		const uint8 *y = yrow;
+		const uint8 *cb = cbrow;
+		const uint8 *cr = crrow;
+		vdpixsize wt;
+
+		if (wpairs > 0) {
+#ifdef _M_AMD64
+			wt = wpairs;
+
+			do {
+				const unsigned cb0 = cb[0];
+				const unsigned cb1 = cb[1];
+				const unsigned cr0 = cr[0];
+				const unsigned cr1 = cr[1];
+
+				p[0] = ycbcr_to_1555(y[0], cb0, cr0);
+				p[1] = ycbcr_to_1555(y[1], (3*cb0+cb1+2)>>2, (3*cr0+cr1+2)>>2);
+				p[2] = ycbcr_to_1555(y[2], (cb0+cb1+1)>>1, (cr0+cr1+1)>>1);
+				p[3] = ycbcr_to_1555(y[3], (cb0+3*cb1+2)>>2, (cr0+3*cr1+2)>>2);
+
+				y += 4;
+				p += 4;
+				++cb;
+				++cr;
+			} while(--wt);
+#else
+			vdasm_pixblt_YUV411Planar_to_XRGB1555_scan_ISSE(p, y, cb, cr, wpairs);
+			y += 4*wpairs;
+			cr += wpairs;
+			cb += wpairs;
+			p += 4*wpairs;
+#endif
+		}
+
+		if (wleft > 0) {
+			wt = wleft;
+
+			const uint8 cr0 = *cr;
+			const uint8 cb0 = *cb;
+
+			do {
+				*p++ = ycbcr_to_1555(*y++, cb0, cr0);
+			} while(--wt);
+		}
+
+		vdptrstep(out, opitch);
+		vdptrstep(yrow, ypitch);
+		vdptrstep(cbrow, cbpitch);
+		vdptrstep(crrow, crpitch);
+	} while(--h);
+
+#ifndef _M_AMD64
+	__asm emms
+#endif
+}
+
+DECLARE_YUV_PLANAR(YUV411, RGB565) {
+	uint16			*out	= (uint16 *)dst.data;
+	const ptrdiff_t	opitch	= dst.pitch;
+	const uint8		*yrow	= (const uint8 *)src.data;
+	const uint8		*cbrow	= (const uint8 *)src.data2;
+	const uint8		*crrow	= (const uint8 *)src.data3;
+	const ptrdiff_t	ypitch	= src.pitch;
+	const ptrdiff_t	cbpitch	= src.pitch2;
+	const ptrdiff_t	crpitch	= src.pitch3;
+
+	vdpixsize wpairs = (w-1)>>2;
+	vdpixsize wleft = w - (wpairs<<2);
+
+	do {
+		uint16 *p = out;
+		const uint8 *y = yrow;
+		const uint8 *cb = cbrow;
+		const uint8 *cr = crrow;
+		vdpixsize wt;
+
+		if (wpairs > 0) {
+#if _M_AMD64
+			wt = wpairs;
+
+			do {
+				const unsigned cb0 = cb[0];
+				const unsigned cb1 = cb[1];
+				const unsigned cr0 = cr[0];
+				const unsigned cr1 = cr[1];
+
+				p[0] = ycbcr_to_565(y[0], cb0, cr0);
+				p[1] = ycbcr_to_565(y[1], (3*cb0+cb1+2)>>2, (3*cr0+cr1+2)>>2);
+				p[2] = ycbcr_to_565(y[2], (cb0+cb1+1)>>1, (cr0+cr1+1)>>1);
+				p[3] = ycbcr_to_565(y[3], (cb0+3*cb1+2)>>2, (cr0+3*cr1+2)>>2);
+
+				y += 4;
+				p += 4;
+				++cb;
+				++cr;
+			} while(--wt);
+#else
+			vdasm_pixblt_YUV411Planar_to_RGB565_scan_ISSE(p, y, cb, cr, wpairs);
+#endif
+		}
+
+		if (wleft > 0) {
+			wt = wleft;
+
+			const uint8 cr0 = *cr;
+			const uint8 cb0 = *cb;
+
+			do {
+				*p++ = ycbcr_to_565(*y++, cb0, cr0);
+			} while(--wt);
+		}
+
+		vdptrstep(out, opitch);
+		vdptrstep(yrow, ypitch);
+		vdptrstep(cbrow, cbpitch);
+		vdptrstep(crrow, crpitch);
+	} while(--h);
+
+#ifndef _M_AMD64
+	__asm emms
+#endif
+}
+
+DECLARE_YUV_PLANAR(YUV411, RGB888) {
+	uint8			*out	= (uint8 *)dst.data;
+	const ptrdiff_t	opitch	= dst.pitch;
+	const uint8		*yrow	= (const uint8 *)src.data;
+	const uint8		*cbrow	= (const uint8 *)src.data2;
+	const uint8		*crrow	= (const uint8 *)src.data3;
+	const ptrdiff_t	ypitch	= src.pitch;
+	const ptrdiff_t	cbpitch	= src.pitch2;
+	const ptrdiff_t	crpitch	= src.pitch3;
+
+	vdpixsize wpairs = (w-1)>>2;
+	vdpixsize wleft = w - (wpairs<<2);
+
+	do {
+		uint8 *p = out;
+		const uint8 *y = yrow;
+		const uint8 *cb = cbrow;
+		const uint8 *cr = crrow;
+		vdpixsize wt;
+
+		if (wpairs > 0) {
+			wt = wpairs;
+
+			do {
+				const unsigned cb0 = cb[0];
+				const unsigned cb1 = cb[1];
+				const unsigned cr0 = cr[0];
+				const unsigned cr1 = cr[1];
+
+				ycbcr_to_888(p+0, y[0], cb0, cr0);
+				ycbcr_to_888(p+3, y[1], (3*cb0+cb1+2)>>2, (3*cr0+cr1+2)>>2);
+				ycbcr_to_888(p+6, y[2], (cb0+cb1+1)>>1, (cr0+cr1+1)>>1);
+				ycbcr_to_888(p+9, y[3], (cb0+3*cb1+2)>>2, (cr0+3*cr1+2)>>2);
+
+				y += 4;
+				p += 12;
+				++cb;
+				++cr;
+			} while(--wt);
+		}
+
+		if (wleft > 0) {
+			wt = wleft;
+
+			const uint8 cr0 = *cr;
+			const uint8 cb0 = *cb;
+
+			do {
+				ycbcr_to_888(p, *y++, cb0, cr0);
+				p += 4;
+			} while(--wt);
+		}
+
+		vdptrstep(out, opitch);
+		vdptrstep(yrow, ypitch);
+		vdptrstep(cbrow, cbpitch);
+		vdptrstep(crrow, crpitch);
+	} while(--h);
+}
+
+DECLARE_YUV_PLANAR(YUV411, XRGB8888) {
+	uint32			*out	= (uint32 *)dst.data;
+	const ptrdiff_t	opitch	= dst.pitch;
+	const uint8		*yrow	= (const uint8 *)src.data;
+	const uint8		*cbrow	= (const uint8 *)src.data2;
+	const uint8		*crrow	= (const uint8 *)src.data3;
+	const ptrdiff_t	ypitch	= src.pitch;
+	const ptrdiff_t	cbpitch	= src.pitch2;
+	const ptrdiff_t	crpitch	= src.pitch3;
+
+	vdpixsize wpairs = (w-1)>>2;
+	vdpixsize wleft = w - (wpairs<<2);
+
+	do {
+		uint32 *p = out;
+		const uint8 *y = yrow;
+		const uint8 *cb = cbrow;
+		const uint8 *cr = crrow;
+		vdpixsize wt;
+
+		if (wpairs > 0) {
+#ifdef _M_AMD64
+			wt = wpairs;
+
+			do {
+				const unsigned cb0 = cb[0];
+				const unsigned cb1 = cb[1];
+				const unsigned cr0 = cr[0];
+				const unsigned cr1 = cr[1];
+
+				p[0] = ycbcr_to_8888(y[0], cb0, cr0);
+				p[1] = ycbcr_to_8888(y[1], (3*cb0+cb1+2)>>2, (3*cr0+cr1+2)>>2);
+				p[2] = ycbcr_to_8888(y[2], (cb0+cb1+1)>>1, (cr0+cr1+1)>>1);
+				p[3] = ycbcr_to_8888(y[3], (cb0+3*cb1+2)>>2, (cr0+3*cr1+2)>>2);
+
+				y += 4;
+				p += 4;
+				++cb;
+				++cr;
+			} while(--wt);
+#else
+			vdasm_pixblt_YUV411Planar_to_XRGB8888_scan_MMX(p, y, cb, cr, wpairs);
+			y += 4*wpairs;
+			cr += wpairs;
+			cb += wpairs;
+			p += 4*wpairs;
+#endif
+		}
+
+		if (wleft > 0) {
+			wt = wleft;
+
+			const uint8 cr0 = *cr;
+			const uint8 cb0 = *cb;
+
+			do {
+				*p++ = ycbcr_to_8888(*y++, cb0, cr0);
+			} while(--wt);
+		}
+
+		vdptrstep(out, opitch);
+		vdptrstep(yrow, ypitch);
+		vdptrstep(cbrow, cbpitch);
+		vdptrstep(crrow, crpitch);
+	} while(--h);
+
+#ifndef _M_AMD64
+	__asm emms
+#endif
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuv2yuv.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuv2yuv.cpp
new file mode 100644
index 000000000..b581e9bf7
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuv2yuv.cpp
@@ -0,0 +1,260 @@
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/memory.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+
+#include "bitutils.h"
+#include "blt_spanutils.h"
+
+#define DECLARE_YUV(x, y) void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h)
+#define DECLARE_YUV_PLANAR(x, y) void VDPixmapBlt_##x##_to_##y##_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h)
+
+using namespace nsVDPixmapBitUtils;
+using namespace nsVDPixmapSpanUtils;
+
+DECLARE_YUV(XVYU, UYVY) {
+	uint32 *dst = (uint32 *)dst0;
+	const uint32 *src = (const uint32 *)src0;
+
+	srcpitch -= (w&~1)*4;
+	dstpitch -= (w&~1)*2;
+
+	do {
+		vdpixsize wt = w;
+
+		wt = -wt;
+
+		if (++wt) {
+			uint32 a, b, c;
+
+			a = src[0];
+			b = src[1];
+			*dst++ = (avg_8888_121(a, a, b) & 0xff00ff) + (a & 0xff00) + ((b & 0xff00)<<16);
+			src += 2;
+
+			if ((wt+=2) < 0) {
+				do {
+					a = src[-1];
+					b = src[0];
+					c = src[1];
+
+					*dst++ = (avg_8888_121(a, b, c) & 0xff00ff) + (b & 0xff00) + ((c & 0xff00)<<16);
+					src += 2;
+				} while((wt+=2) < 0);
+			}
+		}
+
+		if (!(wt&1))
+			*dst = *src;
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(XVYU, YUYV) {
+	uint32 *dst = (uint32 *)dst0;
+	const uint32 *src = (const uint32 *)src0;
+
+	srcpitch -= (w&~1)*4;
+	dstpitch -= (w&~1)*2;
+
+	do {
+		vdpixsize wt = w;
+
+		wt = -wt;
+
+		if (++wt) {
+			uint32 a, b, c;
+
+			a = src[0];
+			b = src[1];
+			*dst++ = ((avg_8888_121(a, a, b) & 0xff00ff)<<8) + ((a & 0xff00)>>8) + ((b & 0xff00)<<8);
+			src += 2;
+
+			if ((wt+=2)<0) {
+				do {
+					a = src[-1];
+					b = src[0];
+					c = src[1];
+
+					*dst++ = ((avg_8888_121(a, b, c) & 0xff00ff)<<8) + ((b & 0xff00)>>8) + ((c & 0xff00)<<8);
+					src += 2;
+				} while((wt+=2) < 0);
+			}
+		}
+
+		if (!(wt&1)) {
+			uint32 v = *src;
+			*dst = ((v&0xff00ff)<<8) + ((v&0xff00ff00)>>8);
+		}
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(UYVY, YUYV) {			// also YUYV->UYVY
+	uint32 *dst = (uint32 *)dst0;
+	const uint32 *src = (const uint32 *)src0;
+
+	w = (w+1) >> 1;
+
+	dstpitch -= 4*w;
+	srcpitch -= 4*w;
+
+	do {
+		vdpixsize w2 = w;
+
+		do {
+			const uint32 p = *src++;
+
+			*dst++ = ((p & 0xff00ff00)>>8) + ((p & 0x00ff00ff)<<8);
+		} while(--w2);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(UYVY, Y8) {
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *src = (const uint8 *)src0;
+
+	dstpitch -= w;
+	srcpitch -= 2*w;
+
+	do {
+		vdpixsize w2 = w;
+
+		do {
+			*dst++ = src[1];
+			src += 2;
+		} while(--w2);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(YUYV, Y8) {
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *src = (const uint8 *)src0;
+
+	dstpitch -= w;
+	srcpitch -= 2*w;
+
+	do {
+		vdpixsize w2 = w;
+
+		do {
+			*dst++ = src[0];
+			src += 2;
+		} while(--w2);
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(Y8, UYVY) {
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *src = (const uint8 *)src0;
+
+	dstpitch -= 2*w;
+	srcpitch -= w;
+
+	do {
+		vdpixsize w2 = w;
+
+		do {
+			dst[0] = 0x80;
+			dst[1] = *src++;
+			dst += 2;
+		} while(--w2);
+
+		if (w & 1) {
+			dst[0] = 0x80;
+			dst[1] = dst[-1];
+		}
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV(Y8, YUYV) {
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *src = (const uint8 *)src0;
+
+	dstpitch -= 2*w;
+	srcpitch -= w;
+
+	do {
+		vdpixsize w2 = w;
+
+		do {
+			dst[0] = *src++;
+			dst[1] = 0x80;
+			dst += 2;
+		} while(--w2);
+
+		if (w & 1) {
+			dst[0] = dst[-1];
+			dst[1] = 0x80;
+		}
+
+		vdptrstep(src, srcpitch);
+		vdptrstep(dst, dstpitch);
+	} while(--h);
+}
+
+DECLARE_YUV_PLANAR(YUV411, YV12) {
+	VDMemcpyRect(dst.data, dst.pitch, src.data, src.pitch, w, h);
+
+	vdblock<uint8> tmprow(w);	
+	const uint8 *srcp = (const uint8 *)src.data2;
+	ptrdiff_t srcpitch = src.pitch2;
+	uint8 *dstp = (uint8 *)dst.data2;
+	ptrdiff_t dstpitch = dst.pitch2;
+	const uint8 *src1, *src2;
+
+	vdpixsize h2;
+	for(h2 = h; h2 > 0; h2 -= 2) {
+		src1 = srcp;
+		vdptrstep(srcp, srcpitch);
+		if (h2 > 1)
+			src2 = srcp;
+		else
+			src2 = src1;
+		vdptrstep(srcp, srcpitch);
+
+		const uint8 *sources[2] = {src1, src2};
+
+		vert_compress2x_centered_fast(tmprow.data(), sources, w, 0);
+		horiz_expand2x_coaligned(dstp, tmprow.data(), w);
+
+		vdptrstep(dstp, dstpitch);
+	}
+
+	srcp = (const uint8 *)src.data3;
+	srcpitch = src.pitch3;
+	dstp = (uint8 *)dst.data3;
+	dstpitch = dst.pitch3;
+	for(h2 = h; h2 > 0; h2 -= 2) {
+		src1 = srcp;
+		vdptrstep(srcp, srcpitch);
+		if (h2 > 1)
+			src2 = srcp;
+		else
+			src2 = src1;
+		vdptrstep(srcp, srcpitch);
+
+		const uint8 *sources[2] = {src1, src2};
+		vert_compress2x_centered_fast(tmprow.data(), sources, w, 0);
+		horiz_expand2x_coaligned(dstp, tmprow.data(), w);
+
+		vdptrstep(dstp, dstpitch);
+	}
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuvrev.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuvrev.cpp
new file mode 100644
index 000000000..d6f38bf65
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuvrev.cpp
@@ -0,0 +1,530 @@
+#include <vd2/system/cpuaccel.h>
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include "blt_spanutils.h"
+
+#ifdef _M_IX86
+	#include "blt_spanutils_x86.h"
+#endif
+
+using namespace nsVDPixmapSpanUtils;
+
+namespace {
+	// From Jim Blinn's "Dirty Pixels":
+	//
+	// Y  = .299R + .587G + .114B
+	// Cr = 0.713(R-Y)
+	// Cb = 0.564(B-Y)
+	//
+	// IY  = 219Y  + 16  = ((yt = 1052IR + 2065IG + 401IB) + 67584) >> 12
+	// ICr = 224Cr + 128 = (yt*2987 - 10507932IR + 2155872256) >> 24
+	// ICb = 224Cb + 128 = (yt*2363 - 8312025IB + 2155872256) >> 24
+
+	void ConvertRGB32ToXVYU32(uint32 *dst, const uint8 *src, sint32 count) {
+		do {
+			const sint32 r  = src[2];
+			const sint32 g  = src[1];
+			const sint32 b  = src[0];
+			const sint32 yt = 1052*r + 2065*g + 401*b;
+			const sint32 y  = (yt + 67584) >> 4;							// <<8 alignment shift
+			const sint32 cr = (10507932*r - yt*2987 + 2155872256U) >> 8;	// <<16 alignment shift
+			const sint32 cb = ( 8312025*b - yt*2363 + 2155872256U) >> 24;
+
+			*dst++ = (y&0xff00) + cb + (cr&0xff0000);		// VYU order
+			src += 4;
+		} while(--count);
+	}
+
+	void ConvertRGB24ToXVYU32(uint32 *dst, const uint8 *src, sint32 count) {
+		do {
+			const sint32 r  = src[2];
+			const sint32 g  = src[1];
+			const sint32 b  = src[0];
+			const sint32 yt = 1052*r + 2065*g + 401*b;
+			const sint32 y  = (yt + 67584) >> 4;							// <<8 alignment shift
+			const sint32 cr = (10507932*r - yt*2987 + 2155872256U) >> 8;	// <<16 alignment shift
+			const sint32 cb = ( 8312025*b - yt*2363 + 2155872256U) >> 24;
+
+			*dst++ = (y&0xff00) + cb + (cr&0xff0000);		// VYU order
+			src += 3;
+		} while(--count);
+	}
+
+	void ConvertRGB16ToXVYU32(uint32 *dst, const uint16 *src, sint32 count) {
+		do {
+			const sint16 px = *src++;
+			const sint32 r  = (px & 0xf800) >> 11;
+			const sint32 g  = (px & 0x07e0) >> 5;
+			const sint32 b  = (px & 0x001f);
+			const sint32 yt = 8652*r + 8358*g + 3299*b;
+			const sint32 y  = (yt + 67584) >> 4;							// <<8 alignment shift
+			const sint32 cr = (86436217*r - yt*2987 + 2155872256U) >> 8;
+			const sint32 cb = (68373108*b - yt*2363 + 2155872256U) >> 24;	// <<16 alignment shift
+
+			*dst++ = (y&0xff00) + cb + (cr&0xff0000);		// VYU order
+		} while(--count);
+	}
+
+	void ConvertRGB15ToXVYU32(uint32 *dst, const uint16 *src, sint32 count) {
+		do {
+			const sint16 px = *src++;
+			const sint32 r  = (px & 0x7c00) >> 10;
+			const sint32 g  = (px & 0x03e0) >> 5;
+			const sint32 b  = (px & 0x001f);
+			const sint32 yt = 8652*r + 16986*g + 3299*b;
+			const sint32 y  = (yt + 67584) >> 4;							// <<8 alignment shift
+			const sint32 cr = (86436217*r - yt*2987 + 2155872256U) >> 8;	// <<16 alignment shift
+			const sint32 cb = (68373108*b - yt*2363 + 2155872256U) >> 24;
+
+			*dst++ = (y&0xff00) + cb + (cr&0xff0000);		// VYU order
+		} while(--count);
+	}
+
+	void ConvertRGB32ToY8(uint8 *dst, const uint8 *src, sint32 count) {
+		do {
+			const sint32 r  = src[2];
+			const sint32 g  = src[1];
+			const sint32 b  = src[0];
+			*dst++ = (uint8)((1052*r + 2065*g + 401*b + 67584) >> 12);
+			src += 4;
+		} while(--count);
+	}
+
+	void ConvertRGB24ToY8(uint8 *dst, const uint8 *src, sint32 count) {
+		do {
+			const sint32 r  = src[2];
+			const sint32 g  = src[1];
+			const sint32 b  = src[0];
+			*dst++ = (uint8)((1052*r + 2065*g + 401*b + 67584) >> 12);
+			src += 3;
+		} while(--count);
+	}
+
+	void ConvertRGB16ToY8(uint8 *dst, const uint16 *src, sint32 count) {
+		do {
+			const sint16 px = *src++;
+			const sint32 r  = (px & 0xf800) >> 11;
+			const sint32 g  = (px & 0x07e0) >> 5;
+			const sint32 b  = (px & 0x001f);
+			*dst++ = (uint8)((8652*r + 8358*g + 3299*b + 67584) >> 12);
+		} while(--count);
+	}
+
+	void ConvertRGB15ToY8(uint8 *dst, const uint16 *src, sint32 count) {
+		do {
+			const sint16 px = *src++;
+			const sint32 r  = (px & 0x7c00) >> 10;
+			const sint32 g  = (px & 0x03e0) >> 5;
+			const sint32 b  = (px & 0x001f);
+			*dst++ = (uint8)((8652*r + 16986*g + 3299*b + 67584) >> 12);
+		} while(--count);
+	}
+}
+
+#define DECLARE_YUV_REV(x, y) void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h)
+
+DECLARE_YUV_REV(XRGB1555, XVYU) {
+	do {
+		ConvertRGB15ToXVYU32((uint32 *)dst0, (const uint16 *)src0, w);
+
+		vdptrstep(dst0, dstpitch);
+		vdptrstep(src0, srcpitch);
+	} while(--h);
+}
+
+DECLARE_YUV_REV(RGB565, XVYU) {
+	do {
+		ConvertRGB16ToXVYU32((uint32 *)dst0, (const uint16 *)src0, w);
+
+		vdptrstep(dst0, dstpitch);
+		vdptrstep(src0, srcpitch);
+	} while(--h);
+}
+
+DECLARE_YUV_REV(RGB888, XVYU) {
+	do {
+		ConvertRGB24ToXVYU32((uint32 *)dst0, (const uint8 *)src0, w);
+
+		vdptrstep(dst0, dstpitch);
+		vdptrstep(src0, srcpitch);
+	} while(--h);
+}
+
+DECLARE_YUV_REV(XRGB8888, XVYU) {
+	do {
+		ConvertRGB32ToXVYU32((uint32 *)dst0, (const uint8 *)src0, w);
+
+		vdptrstep(dst0, dstpitch);
+		vdptrstep(src0, srcpitch);
+	} while(--h);
+}
+
+DECLARE_YUV_REV(XRGB1555, Y8) {
+	do {
+		ConvertRGB15ToY8((uint8 *)dst0, (const uint16 *)src0, w);
+
+		vdptrstep(dst0, dstpitch);
+		vdptrstep(src0, srcpitch);
+	} while(--h);
+}
+
+DECLARE_YUV_REV(RGB565, Y8) {
+	do {
+		ConvertRGB16ToY8((uint8 *)dst0, (const uint16 *)src0, w);
+
+		vdptrstep(dst0, dstpitch);
+		vdptrstep(src0, srcpitch);
+	} while(--h);
+}
+
+DECLARE_YUV_REV(RGB888, Y8) {
+	do {
+		ConvertRGB24ToY8((uint8 *)dst0, (const uint8 *)src0, w);
+
+		vdptrstep(dst0, dstpitch);
+		vdptrstep(src0, srcpitch);
+	} while(--h);
+}
+
+DECLARE_YUV_REV(XRGB8888, Y8) {
+	do {
+		ConvertRGB32ToY8((uint8 *)dst0, (const uint8 *)src0, w);
+
+		vdptrstep(dst0, dstpitch);
+		vdptrstep(src0, srcpitch);
+	} while(--h);
+}
+
+
+
+
+
+namespace {
+	void ConvertRGB32ToYUVPlanar(uint8 *ydst, uint8 *cbdst, uint8 *crdst, const void *src0, sint32 count) {
+		const uint8 *src = (const uint8 *)src0;
+
+		do {
+			const sint32 r  = src[2];
+			const sint32 g  = src[1];
+			const sint32 b  = src[0];
+			const sint32 yt = 1052*r + 2065*g + 401*b;
+			*ydst++  = (yt + 67584) >> 12;
+			*crdst++ = (10507932*r - yt*2987 + 2155872256U) >> 24;
+			*cbdst++ = ( 8312025*b - yt*2363 + 2155872256U) >> 24;
+			src += 4;
+		} while(--count);
+	}
+
+	void ConvertRGB24ToYUVPlanar(uint8 *ydst, uint8 *cbdst, uint8 *crdst, const void *src0, sint32 count) {
+		const uint8 *src = (const uint8 *)src0;
+
+		do {
+			const sint32 r  = src[2];
+			const sint32 g  = src[1];
+			const sint32 b  = src[0];
+			const sint32 yt = 1052*r + 2065*g + 401*b;
+			*ydst++  = (yt + 67584) >> 12;
+			*crdst++ = (10507932*r - yt*2987 + 2155872256U) >> 24;
+			*cbdst++ = ( 8312025*b - yt*2363 + 2155872256U) >> 24;
+			src += 3;
+		} while(--count);
+	}
+
+	void ConvertRGB16ToYUVPlanar(uint8 *ydst, uint8 *cbdst, uint8 *crdst, const void *src0, sint32 count) {
+		const uint16 *src = (const uint16 *)src0;
+
+		do {
+			const sint16 px = *src++;
+			const sint32 r  = (px & 0xf800) >> 11;
+			const sint32 g  = (px & 0x07e0) >> 5;
+			const sint32 b  = (px & 0x001f);
+			const sint32 yt = 8652*r + 8358*g + 3299*b;
+			*ydst++  = (yt + 67584) >> 12;
+			*crdst++ = (86436217*r - yt*2987 + 2155872256U) >> 24;
+			*cbdst++ = (68373108*b - yt*2363 + 2155872256U) >> 24;
+		} while(--count);
+	}
+
+	void ConvertRGB15ToYUVPlanar(uint8 *ydst, uint8 *cbdst, uint8 *crdst, const void *src0, sint32 count) {
+		const uint16 *src = (const uint16 *)src0;
+
+		do {
+			const sint16 px = *src++;
+			const sint32 r  = (px & 0x7c00) >> 10;
+			const sint32 g  = (px & 0x03e0) >> 5;
+			const sint32 b  = (px & 0x001f);
+			const sint32 yt = 8652*r + 16986*g + 3299*b;
+			*ydst++  = (yt + 67584) >> 12;
+			*crdst++ = (86436217*r - yt*2987 + 2155872256U) >> 24;
+			*cbdst++ = (68373108*b - yt*2363 + 2155872256U) >> 24;
+		} while(--count);
+	}
+
+	void ConvertUYVYToYUVPlanar(uint8 *ydst, uint8 *cbdst, uint8 *crdst, const void *src0, sint32 count) {
+		const uint8 *src = (const uint8 *)src0;
+
+		do {
+			*cbdst++ = src[0];
+			*ydst++ = src[1];
+			*crdst++ = src[2];
+			if (!--count)
+				break;
+			*ydst++ = src[3];
+			src += 4;
+		} while(--count);
+	}
+
+	void ConvertYUYVToYUVPlanar(uint8 *ydst, uint8 *cbdst, uint8 *crdst, const void *src0, sint32 count) {
+		const uint8 *src = (const uint8 *)src0;
+
+		do {
+			*cbdst++ = src[1];
+			*ydst++ = src[0];
+			*crdst++ = src[3];
+			if (!--count)
+				break;
+			*ydst++ = src[2];
+			src += 4;
+		} while(--count);
+	}
+}
+
+void VDPixmapBlt_YUVPlanar_encode_reference(const VDPixmap& dstbm, const VDPixmap& srcbm, vdpixsize w, vdpixsize h) {
+	void (*cfunc)(uint8 *ydst, uint8 *cbdst, uint8 *crdst, const void *src, sint32 w) = NULL;
+	void (*hfunc)(uint8 *dst, const uint8 *src, sint32 w) = NULL;
+	void (*vfunc)(uint8 *dst, const uint8 *const *sources, sint32 w, uint8 phase) = NULL;
+
+	bool halfchroma = false;
+
+	switch(srcbm.format) {
+	case nsVDPixmap::kPixFormat_XRGB1555:
+		cfunc = ConvertRGB15ToYUVPlanar;
+		break;
+	case nsVDPixmap::kPixFormat_RGB565:
+		cfunc = ConvertRGB16ToYUVPlanar;
+		break;
+	case nsVDPixmap::kPixFormat_RGB888:
+		cfunc = ConvertRGB24ToYUVPlanar;
+		break;
+	case nsVDPixmap::kPixFormat_XRGB8888:
+		cfunc = ConvertRGB32ToYUVPlanar;
+		break;
+	case nsVDPixmap::kPixFormat_YUV422_UYVY:
+		cfunc = ConvertUYVYToYUVPlanar;
+		halfchroma = true;
+		break;
+	case nsVDPixmap::kPixFormat_YUV422_YUYV:
+		cfunc = ConvertYUYVToYUVPlanar;
+		halfchroma = true;
+		break;
+	default:
+		VDNEVERHERE;
+		return;
+	}
+
+	vdpixsize w2 = w;
+	vdpixsize h2 = h;
+	int winstep = 1;
+	int winsize = 1;
+	int winposnext = 0;
+	vdpixsize chroma_srcw = w;
+
+	switch(dstbm.format) {
+
+	case nsVDPixmap::kPixFormat_YUV444_Planar:
+		if (halfchroma)
+			hfunc = horiz_expand2x_coaligned;
+		break;
+
+	case nsVDPixmap::kPixFormat_YUV422_Planar:
+		if (halfchroma)
+			chroma_srcw = (chroma_srcw + 1) >> 1;
+		else
+			hfunc = horiz_compress2x_coaligned;
+
+		w2 = (w2+1) >> 1;
+		break;
+
+	case nsVDPixmap::kPixFormat_YUV422_Planar_Centered:
+		if (halfchroma) {
+			chroma_srcw = (chroma_srcw + 1) >> 1;
+			hfunc = horiz_realign_to_centered;
+		} else
+			hfunc = horiz_compress2x_centered;
+
+		w2 = (w2+1) >> 1;
+		break;
+
+	case nsVDPixmap::kPixFormat_YUV420_Planar:
+		if (halfchroma)
+			chroma_srcw = (chroma_srcw + 1) >> 1;
+		else
+			hfunc = horiz_compress2x_coaligned;
+
+		vfunc = vert_compress2x_centered;
+		winstep = 2;
+		winposnext = 2;
+		winsize = 4;
+		h2 = (h+1) >> 1;
+		w2 = (w2+1) >> 1;
+		break;
+
+	case nsVDPixmap::kPixFormat_YUV420_Planar_Centered:
+		if (halfchroma) {
+			chroma_srcw = (chroma_srcw + 1) >> 1;
+			hfunc = horiz_realign_to_centered;
+		} else
+			hfunc = horiz_compress2x_centered;
+
+		vfunc = vert_compress2x_centered;
+		winstep = 2;
+		winposnext = 2;
+		winsize = 4;
+		h2 = (h+1) >> 1;
+		w2 = (w2+1) >> 1;
+		break;
+
+	case nsVDPixmap::kPixFormat_YUV411_Planar:
+		if (halfchroma) {
+			chroma_srcw = (chroma_srcw + 1) >> 1;
+			hfunc = horiz_compress2x_coaligned;
+		} else
+			hfunc = horiz_compress4x_coaligned;
+		w2 = (w2+1) >> 2;
+		break;
+
+	case nsVDPixmap::kPixFormat_YUV410_Planar:
+		if (halfchroma) {
+			chroma_srcw = (chroma_srcw + 1) >> 1;
+			hfunc = horiz_compress2x_coaligned;
+		} else
+			hfunc = horiz_compress4x_coaligned;
+		vfunc = vert_compress4x_centered;
+		winsize = 8;
+		winposnext = 5;
+		winstep = 4;
+		h2 = (h+3) >> 2;
+		w2 = (w2+3) >> 2;
+		break;
+	}
+
+#ifdef _M_IX86
+	uint32 cpuflags = CPUGetEnabledExtensions();
+
+	if (cpuflags & CPUF_SUPPORTS_INTEGER_SSE) {
+		if (hfunc == horiz_expand2x_coaligned)
+			hfunc = horiz_expand2x_coaligned_ISSE;
+	}
+#endif
+
+	const uint8 *src = (const uint8 *)srcbm.data;
+	const ptrdiff_t srcpitch = srcbm.pitch;
+
+	uint8 *ydst = (uint8 *)dstbm.data;
+	uint8 *cbdst = (uint8 *)dstbm.data2;
+	uint8 *crdst = (uint8 *)dstbm.data3;
+	const ptrdiff_t ydstpitch = dstbm.pitch;
+	const ptrdiff_t cbdstpitch = dstbm.pitch2;
+	const ptrdiff_t crdstpitch = dstbm.pitch3;
+
+	if (!vfunc) {
+		if (hfunc) {
+			uint32 tmpsize = (w + 15) & ~15;
+
+			vdblock<uint8> tmp(tmpsize * 2);
+			uint8 *const cbtmp = tmp.data();
+			uint8 *const crtmp = cbtmp + tmpsize;
+
+			do {
+				cfunc(ydst, cbtmp, crtmp, src, w);
+				src += srcpitch;
+				ydst += ydstpitch;
+				hfunc(cbdst, cbtmp, chroma_srcw);
+				hfunc(crdst, crtmp, chroma_srcw);
+				cbdst += cbdstpitch;
+				crdst += crdstpitch;
+			} while(--h);
+		} else if (dstbm.format == nsVDPixmap::kPixFormat_Y8) {
+			// wasteful, but oh well
+			uint32 tmpsize = (w2+15)&~15;
+			vdblock<uint8> tmp(tmpsize);
+
+			cbdst = tmp.data();
+			crdst = cbdst + tmpsize;
+
+			do {
+				cfunc(ydst, cbdst, crdst, src, w);
+				src += srcpitch;
+				ydst += ydstpitch;
+			} while(--h2);
+		} else {
+			do {
+				cfunc(ydst, cbdst, crdst, src, w);
+				src += srcpitch;
+				ydst += ydstpitch;
+				cbdst += cbdstpitch;
+				crdst += crdstpitch;
+			} while(--h2);
+		}
+	} else {
+		const uint32 tmpsize = w2;
+
+		vdblock<uint8>		tmpbuf(tmpsize * (winsize + 1) * 2 + 2 * w);
+
+		uint8 *cbwindow[16];
+		uint8 *crwindow[16];
+
+		uint8 *p = tmpbuf.data();
+		for(int i=0; i<winsize; ++i) {
+			cbwindow[i] = cbwindow[winsize+i] = p;
+			p += tmpsize;
+			crwindow[i] = crwindow[winsize+i] = p;
+			p += tmpsize;
+		}
+
+		uint8 *cbtmp = p;
+		uint8 *crtmp = p + w;
+
+		int winoffset;
+		int winpos = winposnext - winsize;
+		bool firstline = true;
+
+		do {
+			while(winpos < winposnext) {
+				winoffset = ++winpos & (winsize - 1);
+
+				bool valid = (unsigned)(winpos-1) < (unsigned)(h-1);		// -1 because we generate line 0 as the first window line
+				if (valid || firstline) {
+					if (hfunc) {
+						cfunc(ydst, cbtmp, crtmp, src, w);
+						hfunc(cbwindow[winoffset + winsize - 1], cbtmp, chroma_srcw);
+						hfunc(crwindow[winoffset + winsize - 1], crtmp, chroma_srcw);
+					} else {
+						cfunc(ydst, cbwindow[winoffset + winsize - 1], crwindow[winoffset + winsize - 1], src, w);
+					}
+					src += srcpitch;
+					ydst += ydstpitch;
+					firstline = false;
+				} else {
+					// dupe last generated line -- could be done by pointer swabbing, but I'm lazy
+					memcpy(cbwindow[winoffset + winsize - 1], cbwindow[winoffset + winsize - 2], w2);
+					memcpy(crwindow[winoffset + winsize - 1], crwindow[winoffset + winsize - 2], w2);
+				}
+			}
+			winposnext += winstep;
+
+			vfunc(cbdst, cbwindow + winoffset, w2, 0);
+			vfunc(crdst, crwindow + winoffset, w2, 0);
+			cbdst += cbdstpitch;
+			crdst += crdstpitch;
+		} while(--h2);
+	}
+
+#ifdef _M_IX86
+	if (cpuflags & CPUF_SUPPORTS_MMX) {
+		__asm emms
+	}
+#endif
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_setup.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_setup.cpp
new file mode 100644
index 000000000..ce999221a
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_setup.cpp
@@ -0,0 +1,17 @@
+#include "blt_setup.h"
+
+void VDPixmapBlitterTable::Clear() {
+	memset(mTable, 0, sizeof mTable);
+}
+
+void VDPixmapBlitterTable::AddBlitter(const VDPixmapFormatSubset& srcFormats, VDPixmapFormatSubset& dstFormats, VDPixmapBlitterFn blitter) {
+	for(int i=0; i<srcFormats.mFormatCount; ++i) {
+		int srcFormat = srcFormats.mFormats[i];
+		for(int j=0; j<dstFormats.mFormatCount; ++j) {
+			int dstFormat = dstFormats.mFormats[j];
+
+			if (srcFormat != dstFormat)
+				mTable[srcFormat][dstFormat] = blitter;
+		}
+	}
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_spanutils.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_spanutils.cpp
new file mode 100644
index 000000000..6baeeca36
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_spanutils.cpp
@@ -0,0 +1,365 @@
+#include "blt_spanutils.h"
+#include "bitutils.h"
+
+using namespace nsVDPixmapBitUtils;
+
+namespace nsVDPixmapSpanUtils {
+	void horiz_expand2x_centered(uint8 *dst, const uint8 *src, sint32 w) {
+		w = -w;
+
+		*dst++ = *src;
+
+		if (++w) {
+			if (++w) {
+				do {
+					dst[0] = (uint8)((3*src[0] + src[1] + 2)>>2);
+					dst[1] = (uint8)((src[0] + 3*src[1] + 2)>>2);
+					dst += 2;
+					++src;
+				} while((w+=2)<0);
+			}
+
+			if (!(w & 1)) {
+				*dst = src[0];
+			}
+		}
+	}
+
+	void horiz_expand2x_coaligned(uint8 *dst, const uint8 *src, sint32 w) {
+		w = -w;
+
+		if ((w+=2) < 0) {
+			do {
+				dst[0] = src[0];
+				dst[1] = (uint8)((src[0] + src[1] + 1)>>1);
+				dst += 2;
+				++src;
+			} while((w+=2)<0);
+		}
+
+		w -= 2;
+		while(w < 0) {
+			++w;
+			*dst++ = src[0];
+		}
+	}
+
+	void horiz_expand4x_coaligned(uint8 *dst, const uint8 *src, sint32 w) {
+		w = -w;
+
+		if ((w+=4) < 0) {
+			do {
+				dst[0] = src[0];
+				dst[1] = (uint8)((3*src[0] + src[1] + 2)>>2);
+				dst[2] = (uint8)((src[0] + src[1] + 1)>>1);
+				dst[3] = (uint8)((src[0] + 3*src[1] + 2)>>2);
+				dst += 4;
+				++src;
+			} while((w+=4)<0);
+		}
+
+		w -= 4;
+		while(w < 0) {
+			++w;
+			*dst++ = src[0];
+		}
+	}
+
+	void horiz_compress2x_coaligned(uint8 *dst, const uint8 *src, sint32 w) {
+		if (w == 1) {
+			*dst = *src;
+			return;
+		}
+
+		*dst++ = (uint8)((3*src[0] + src[1] + 2) >> 2);
+		++src;
+		--w;
+
+		while(w >= 3) {
+			w -= 2;
+			*dst++ = (uint8)((src[0] + 2*src[1] + src[2] + 2) >> 2);
+			src += 2;
+		}
+
+		if (w >= 2)
+			*dst++ = (uint8)((src[0] + 3*src[1] + 2) >> 2);
+	}
+
+	void horiz_compress2x_centered(uint8 *dst, const uint8 *src, sint32 w) {
+		if (w == 1) {
+			*dst = *src;
+			return;
+		}
+
+		if (w == 2) {
+			*dst = (uint8)((src[0] + src[1] + 1) >> 1);
+			return;
+		}
+
+		*dst++ = (uint8)((4*src[0] + 3*src[1] + src[2] + 4) >> 3);
+		--w;
+		++src;
+
+		while(w >= 4) {
+			w -= 2;
+			*dst++ = (uint8)(((src[0] + src[3]) + 3*(src[1] + src[2]) + 4) >> 3);
+			src += 2;
+		}
+
+		switch(w) {
+		case 3:
+			*dst++ = (uint8)((src[0] + 3*src[1] + 4*src[2] + 4) >> 3);
+			break;
+		case 2:
+			*dst++ = (uint8)((src[0] + 7*src[1] + 4) >> 3);
+			break;
+		}
+	}
+
+	void horiz_compress4x_coaligned(uint8 *dst, const uint8 *src, sint32 w) {
+		if (w == 1) {
+			*dst = *src;
+			return;
+		}
+
+		if (w == 2) {
+			*dst++ = (uint8)((11*src[0] + 5*src[1] + 8) >> 4);
+			return;
+		}
+
+		*dst++ = (uint8)((11*src[0] + 4*src[1] + src[2] + 8) >> 4);
+		src += 2;
+		w -= 2;
+
+		while(w >= 5) {
+			w -= 4;
+			*dst++ = (uint8)(((src[0] + src[4]) + 4*(src[1] + src[3]) + 6*src[2] + 8) >> 4);
+			src += 4;
+		}
+
+		switch(w) {
+		case 4:
+			*dst = (uint8)((src[0] + 4*src[1] + 6*src[2] + 5*src[3] + 8) >> 4);
+			break;
+		case 3:
+			*dst = (uint8)((src[0] + 4*src[1] + 11*src[2] + 8) >> 4);
+			break;
+		}
+	}
+
+	void horiz_compress4x_centered(uint8 *dst, const uint8 *src, sint32 w) {
+
+		switch(w) {
+		case 1:
+			*dst = *src;
+			return;
+		case 2:		// 29 99
+			*dst = (uint8)((29*src[0] + 99*src[1] + 64) >> 7);
+			return;
+		case 3:		// 29 35 64
+			*dst = (uint8)((29*src[0] + 35*src[1] + 64*src[1] + 64) >> 7);
+			return;
+		case 4:		// 29 35 35 29
+			*dst = (uint8)((29*src[0] + 35*(src[1] + src[2]) + 29*src[3] + 64) >> 7);
+			return;
+		case 5:		// 29 35 35 21 8
+					//        1 7 120
+			dst[0] = (uint8)((29*src[0] + 35*(src[1] + src[2]) + 21*src[3] + 8*src[4] + 64) >> 7);
+			dst[1] = (uint8)((src[2] + 7*src[3] + 120*src[4] + 64) >> 7);
+			return;
+		}
+
+		*dst++ = (uint8)((29*src[0] + 35*(src[1] + src[2]) + 21*src[3] + 7*src[4] + src[5] + 64) >> 7);
+		src += 2;
+		w -= 2;
+
+		while(w >= 8) {
+			w -= 4;
+			*dst++ = (uint8)(((src[0] + src[7]) + 7*(src[1] + src[6]) + 21*(src[2] + src[5]) + 35*(src[3] + src[4]) + 64) >> 7);
+			src += 4;
+		}
+
+		switch(w) {
+		case 4:		// 1 7 21 99
+			*dst = (uint8)((src[0] + 7*src[1] + 21*src[2] + 99*src[3] + 64) >> 7);
+			break;
+		case 5:		// 1 7 21 35 64
+			*dst = (uint8)((src[0] + 7*src[1] + 21*src[2] + 35*src[3] + 64*src[4] + 64) >> 7);
+			break;
+		case 6:		// 1 7 21 35 35 29
+			*dst = (uint8)((src[0] + 7*src[1] + 21*src[2] + 29*src[5] + 35*(src[3] + src[4]) + 64) >> 7);
+			break;
+		case 7:		// 1 7 21 35 35 21 8
+					//            1 7 120
+			dst[0] = (uint8)((src[0] + 7*src[1] + 8*src[6] + 21*(src[2] + src[5]) + 35*(src[3] + src[4]) + 64) >> 7);
+			dst[1] = (uint8)((src[4] + 7*src[5] + 120*src[6] + 64) >> 7);
+			break;
+		}
+	}
+
+	void horiz_realign_to_centered(uint8 *dst, const uint8 *src, sint32 w) {
+		// luma samples:	Y		Y		Y		Y		Y
+		// coaligned:		C				C				C
+		// centered:			C				C
+		//
+		// To realign coaligned samples to centered, we need to shift them
+		// right by a quarter sample in chroma space. This can be done via
+		// a [3 1]/4 filter.
+
+		for(sint32 i=1; i<w; ++i) {
+			dst[0] = (uint8)((3*(uint32)src[0] + (uint32)src[1] + 2) >> 2);
+			++dst;
+			++src;
+		}
+
+		*dst++ = *src++;
+	}
+
+	void horiz_realign_to_coaligned(uint8 *dst, const uint8 *src, sint32 w) {
+		// luma samples:	Y		Y		Y		Y		Y
+		// coaligned:		C				C				C
+		// centered:			C				C
+		//
+		// To realign centered samples to coaligned, we need to shift them
+		// left by a quarter sample in chroma space. This can be done via
+		// a [1 3]/4 filter.
+
+		*dst++ = *src++;
+
+		for(sint32 i=1; i<w; ++i) {
+			dst[0] = (uint8)(((uint32)src[-1] + 3*(uint32)src[0] + 2) >> 2);
+			++dst;
+			++src;
+		}
+	}
+
+	void vert_expand2x_centered(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase) {
+		const uint8 *src3 = srcs[0];
+		const uint8 *src1 = srcs[1];
+
+		if (phase >= 128)
+			std::swap(src1, src3);
+
+		sint32 w4 = w>>2;
+		w &= 3;
+
+		if (w4) {
+			const uint32 *src34 = (const uint32 *)src3;
+			const uint32 *src14 = (const uint32 *)src1;
+			      uint32 *dst4  = (      uint32 *)dst;
+
+			do {
+				const uint32 a = *src34++;
+				const uint32 b = *src14++;
+				const uint32 ab = (a&b) + (((a^b)&0xfefefefe)>>1);
+
+				*dst4++ = (a|ab) - (((a^ab)&0xfefefefe)>>1);
+			} while(--w4);
+
+			src3 = (const uint8 *)src34;
+			src1 = (const uint8 *)src14;
+			dst  = (      uint8 *)dst4;
+		}
+
+		if (w) {
+			do {
+				*dst++ = (uint8)((*src1++ + 3**src3++ + 2) >> 2);
+			} while(--w);
+		}
+	}
+
+	void vert_expand4x_centered(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase) {
+		const uint8 *src3 = srcs[0];
+		const uint8 *src1 = srcs[1];
+
+		switch(phase & 0xc0) {
+		case 0x00:
+			do {
+				*dst++ = (uint8)((1**src1++ + 7**src3++ + 4) >> 3);
+			} while(--w);
+			break;
+		case 0x40:
+			do {
+				*dst++ = (uint8)((3**src1++ + 5**src3++ + 4) >> 3);
+			} while(--w);
+			break;
+		case 0x80:
+			do {
+				*dst++ = (uint8)((5**src1++ + 3**src3++ + 4) >> 3);
+			} while(--w);
+			break;
+		case 0xc0:
+			do {
+				*dst++ = (uint8)((7**src1++ + 1**src3++ + 4) >> 3);
+			} while(--w);
+			break;
+		default:
+			VDNEVERHERE;
+		}
+	}
+
+	void vert_compress2x_centered_fast(uint8 *dst, const uint8 *const *srcarray, sint32 w, uint8 phase) {
+		const uint8 *src1 = srcarray[0];
+		const uint8 *src2 = srcarray[1];
+
+		w = -w;
+		w += 3;
+
+		while(w < 0) {
+			*(uint32 *)dst = avg_8888_11(*(uint32 *)src1, *(uint32 *)src2);
+			dst += 4;
+			src1 += 4;
+			src2 += 4;
+			w += 4;
+		}
+
+		w -= 3;
+
+		while(w < 0) {
+			*dst = (uint8)((*src1 + *src2 + 1)>>1);
+			++dst;
+			++src1;
+			++src2;
+			++w;
+		}
+	}
+
+	void vert_compress2x_centered(uint8 *dst, const uint8 *const *srcarray, sint32 w, uint8 phase) {
+		const uint8 *src1 = srcarray[0];
+		const uint8 *src2 = srcarray[1];
+		const uint8 *src3 = srcarray[2];
+		const uint8 *src4 = srcarray[3];
+
+		w = -w;
+
+		while(w < 0) {
+			*dst++ = (uint8)(((*src1++ + *src4++) + 3*(*src2++ + *src3++) + 4)>>3);
+			++w;
+		}
+	}
+
+	void vert_compress4x_centered(uint8 *dst, const uint8 *const *srcarray, sint32 w, uint8 phase) {
+		const uint8 *src1 = srcarray[0];
+		const uint8 *src2 = srcarray[1];
+		const uint8 *src3 = srcarray[2];
+		const uint8 *src4 = srcarray[3];
+		const uint8 *src5 = srcarray[4];
+		const uint8 *src6 = srcarray[5];
+		const uint8 *src7 = srcarray[6];
+		const uint8 *src8 = srcarray[7];
+
+		w = -w;
+
+		while(w < 0) {
+			int sum18 = *src1++ + *src8++;
+			int sum27 = *src2++ + *src7++;
+			int sum36 = *src3++ + *src6++;
+			int sum45 = *src4++ + *src5++;
+
+			*dst++ = (uint8)((sum18 + 7*sum27 + 21*sum36 + 35*sum45 + 64) >> 7);
+
+			++w;
+		}
+	}
+}
+
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_spanutils_x86.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_spanutils_x86.cpp
new file mode 100644
index 000000000..ea9e0599a
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_spanutils_x86.cpp
@@ -0,0 +1,170 @@
+//	VirtualDub - Video processing and capture application
+//	Graphics support library
+//	Copyright (C) 1998-2007 Avery Lee
+//
+//	This program is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	This program is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with this program; if not, write to the Free Software
+//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#include "blt_spanutils_x86.h"
+
+#ifdef _MSC_VER
+	#pragma warning(disable: 4799)		// warning C4799: function 'nsVDPixmapSpanUtils::vdasm_horiz_expand2x_coaligned_ISSE' has no EMMS instruction
+#endif
+
+extern "C" void __cdecl vdasm_horiz_expand2x_coaligned_ISSE(void *dst, const void *src, uint32 count);
+extern "C" void __cdecl vdasm_horiz_expand4x_coaligned_MMX(void *dst, const void *src, uint32 count);
+extern "C" void __cdecl vdasm_vert_average_13_ISSE(void *dst, const void *src1, const void *src3, uint32 count);
+extern "C" void __cdecl vdasm_vert_average_17_ISSE(void *dst, const void *src1, const void *src3, uint32 count);
+extern "C" void __cdecl vdasm_vert_average_35_ISSE(void *dst, const void *src1, const void *src3, uint32 count);
+
+namespace nsVDPixmapSpanUtils {
+
+	void horiz_expand2x_coaligned_ISSE(uint8 *dst, const uint8 *src, sint32 w) {
+		if (w >= 17) {
+			uint32 fastcount = (w - 1) & ~15;
+
+			vdasm_horiz_expand2x_coaligned_ISSE(dst, src, fastcount);
+			dst += fastcount;
+			src += fastcount >> 1;
+			w -= fastcount;
+		}
+
+		w = -w;
+		if ((w+=2) < 0) {
+			do {
+				dst[0] = src[0];
+				dst[1] = (uint8)((src[0] + src[1] + 1)>>1);
+				dst += 2;
+				++src;
+			} while((w+=2)<0);
+		}
+
+		w -= 2;
+		while(w < 0) {
+			++w;
+			*dst++ = src[0];
+		}
+	}
+
+	void horiz_expand4x_coaligned_MMX(uint8 *dst, const uint8 *src, sint32 w) {
+		if (w >= 17) {
+			uint32 fastcount = (w - 1) >> 4;
+
+			vdasm_horiz_expand4x_coaligned_MMX(dst, src, fastcount);
+			dst += fastcount << 4;
+			src += fastcount << 2;
+			w -= fastcount << 4;
+		}
+
+		w = -w;
+		if ((w+=4) < 0) {
+			do {
+				dst[0] = src[0];
+				dst[1] = (uint8)((3*src[0] + src[1] + 2)>>2);
+				dst[2] = (uint8)((src[0] + src[1] + 1)>>1);
+				dst[3] = (uint8)((src[0] + 3*src[1] + 2)>>2);
+				dst += 4;
+				++src;
+			} while((w+=4)<0);
+		}
+
+		w -= 4;
+		while(w < 0) {
+			++w;
+			*dst++ = src[0];
+		}
+	}
+
+	void vert_expand2x_centered_ISSE(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase) {
+		const uint8 *src3 = srcs[0];
+		const uint8 *src1 = srcs[1];
+
+		if (phase >= 128)
+			std::swap(src1, src3);
+
+		uint32 fastcount = w & ~15;
+
+		if (fastcount) {
+			vdasm_vert_average_13_ISSE(dst, src1, src3, fastcount);
+			dst += fastcount;
+			src1 += fastcount;
+			src3 += fastcount;
+			w -= fastcount;
+		}
+
+		if (w) {
+			do {
+				*dst++ = (uint8)((*src1++ + 3**src3++ + 2) >> 2);
+			} while(--w);
+		}
+	}
+
+	void vert_average_1_7_ISSE(uint8 *dst, const uint8 *src7, const uint8 *src1, sint32 w) {
+		uint32 fastcount = w & ~7;
+
+		if (fastcount) {
+			vdasm_vert_average_17_ISSE(dst, src1, src7, fastcount);
+			dst += fastcount;
+			src1 += fastcount;
+			src7 += fastcount;
+			w -= fastcount;
+		}
+
+		if (w) {
+			do {
+				*dst++ = (uint8)((*src1++ + 7**src7++ + 4) >> 3);
+			} while(--w);
+		}
+	}
+
+	void vert_average_3_5_ISSE(uint8 *dst, const uint8 *src7, const uint8 *src1, sint32 w) {
+		uint32 fastcount = w & ~7;
+
+		if (fastcount) {
+			vdasm_vert_average_35_ISSE(dst, src1, src7, fastcount);
+			dst += fastcount;
+			src1 += fastcount;
+			src7 += fastcount;
+			w -= fastcount;
+		}
+
+		if (w) {
+			do {
+				*dst++ = (uint8)((3**src1++ + 5**src7++ + 4) >> 3);
+			} while(--w);
+		}
+	}
+
+	void vert_expand4x_centered_ISSE(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase) {
+		const uint8 *src1 = srcs[0];
+		const uint8 *src2 = srcs[1];
+
+		switch(phase & 0xc0) {
+		case 0x00:
+			vert_average_1_7_ISSE(dst, src2, src1, w);
+			break;
+		case 0x40:
+			vert_average_3_5_ISSE(dst, src2, src1, w);
+			break;
+		case 0x80:
+			vert_average_3_5_ISSE(dst, src1, src2, w);
+			break;
+		case 0xc0:
+			vert_average_1_7_ISSE(dst, src1, src2, w);
+			break;
+		default:
+			VDNEVERHERE;
+		}
+	}
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_uberblit.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_uberblit.cpp
new file mode 100644
index 000000000..dcaa20907
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_uberblit.cpp
@@ -0,0 +1,19 @@
+#include <vd2/system/vdalloc.h>
+#include <vd2/Kasumi/pixmap.h>
+#include "uberblit.h"
+
+void VDPixmapBlt_UberblitAdapter(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h) {
+	vdautoptr<IVDPixmapBlitter> blitter(VDPixmapCreateBlitter(dst, src));
+
+	if (w > src.w)
+		w = src.w;
+	if (w > dst.w)
+		w = dst.w;
+	if (h > src.h)
+		h = src.h;
+	if (h > dst.h)
+		h = dst.h;
+
+	vdrect32 r(0, 0, w, h);
+	blitter->Blit(dst, &r, src);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_x86.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_x86.cpp
new file mode 100644
index 000000000..af1519c5b
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_x86.cpp
@@ -0,0 +1,144 @@
+#include <vd2/system/vdtypes.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include "blt_setup.h"
+
+void VDPixmapInitBlittersReference(VDPixmapBlitterTable& table);
+
+#define DECLARE_PALETTED(x, y) extern void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h, const void *pal0);
+#define DECLARE_RGB(x, y) extern void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+#define DECLARE_RGB_ASM(x, y) extern "C" void vdasm_pixblt_##x##_to_##y(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+#define DECLARE_RGB_ASM_MMX(x, y) extern "C" void vdasm_pixblt_##x##_to_##y##_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+#define DECLARE_YUV(x, y) extern void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+#define DECLARE_YUV_REV(x, y) void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h)
+#define DECLARE_YUV_PLANAR(x, y) extern void VDPixmapBlt_##x##_to_##y##_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+
+									DECLARE_RGB_ASM(RGB565,	  XRGB1555);	DECLARE_RGB_ASM_MMX(RGB565,   XRGB1555);
+									DECLARE_RGB_ASM(RGB888,   XRGB1555);
+									DECLARE_RGB_ASM(XRGB8888, XRGB1555);	DECLARE_RGB_ASM_MMX(XRGB8888, XRGB1555);
+									DECLARE_RGB_ASM(XRGB1555, RGB565);		DECLARE_RGB_ASM_MMX(XRGB1555, RGB565);
+									DECLARE_RGB_ASM(RGB888,   RGB565);
+									DECLARE_RGB_ASM(XRGB8888, RGB565);		DECLARE_RGB_ASM_MMX(XRGB8888, RGB565);
+DECLARE_RGB(XRGB1555, RGB888);
+DECLARE_RGB(RGB565,   RGB888);
+									DECLARE_RGB_ASM(XRGB8888, RGB888);		DECLARE_RGB_ASM_MMX(XRGB8888, RGB888);
+									DECLARE_RGB_ASM(XRGB1555, XRGB8888);	DECLARE_RGB_ASM_MMX(XRGB1555, XRGB8888);
+									DECLARE_RGB_ASM(RGB565,   XRGB8888);	DECLARE_RGB_ASM_MMX(RGB565,   XRGB8888);
+									DECLARE_RGB_ASM(RGB888,   XRGB8888);	DECLARE_RGB_ASM_MMX(RGB888,   XRGB8888);
+
+DECLARE_PALETTED(Pal1, Any8);
+DECLARE_PALETTED(Pal1, Any16);
+DECLARE_PALETTED(Pal1, Any24);
+DECLARE_PALETTED(Pal1, Any32);
+DECLARE_PALETTED(Pal2, Any8);
+DECLARE_PALETTED(Pal2, Any16);
+DECLARE_PALETTED(Pal2, Any24);
+DECLARE_PALETTED(Pal2, Any32);
+DECLARE_PALETTED(Pal4, Any8);
+DECLARE_PALETTED(Pal4, Any16);
+DECLARE_PALETTED(Pal4, Any24);
+DECLARE_PALETTED(Pal4, Any32);
+DECLARE_PALETTED(Pal8, Any8);
+DECLARE_PALETTED(Pal8, Any16);
+DECLARE_PALETTED(Pal8, Any24);
+DECLARE_PALETTED(Pal8, Any32);
+
+DECLARE_YUV(XVYU, UYVY);
+DECLARE_YUV(XVYU, YUYV);
+DECLARE_YUV(Y8, UYVY);
+DECLARE_YUV(Y8, YUYV);
+DECLARE_YUV(UYVY, Y8);
+DECLARE_YUV(YUYV, Y8);
+DECLARE_YUV(UYVY, YUYV);
+DECLARE_YUV_PLANAR(YUV411, YV12);
+
+DECLARE_YUV(UYVY, XRGB1555);
+DECLARE_YUV(UYVY, RGB565);
+DECLARE_YUV(UYVY, RGB888);
+DECLARE_YUV(UYVY, XRGB8888);
+DECLARE_YUV(YUYV, XRGB1555);
+DECLARE_YUV(YUYV, RGB565);
+DECLARE_YUV(YUYV, RGB888);
+DECLARE_YUV(YUYV, XRGB8888);
+DECLARE_YUV(Y8, XRGB1555);
+DECLARE_YUV(Y8, RGB565);
+DECLARE_YUV(Y8, RGB888);
+DECLARE_YUV(Y8, XRGB8888);
+
+DECLARE_YUV_REV(XRGB1555, Y8);
+DECLARE_YUV_REV(RGB565,   Y8);
+DECLARE_YUV_REV(RGB888,   Y8);
+DECLARE_YUV_REV(XRGB8888, Y8);
+
+DECLARE_YUV_REV(XRGB1555, XVYU);
+DECLARE_YUV_REV(RGB565,   XVYU);
+DECLARE_YUV_REV(RGB888,   XVYU);
+DECLARE_YUV_REV(XRGB8888, XVYU);
+
+DECLARE_YUV_PLANAR(YV12, XRGB1555);
+DECLARE_YUV_PLANAR(YV12, RGB565);
+DECLARE_YUV_PLANAR(YV12, RGB888);
+DECLARE_YUV_PLANAR(YV12, XRGB8888);
+
+DECLARE_YUV_PLANAR(YUV411, XRGB1555);
+DECLARE_YUV_PLANAR(YUV411, RGB565);
+DECLARE_YUV_PLANAR(YUV411, RGB888);
+DECLARE_YUV_PLANAR(YUV411, XRGB8888);
+
+extern void VDPixmapBlt_YUVPlanar_decode_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+extern void VDPixmapBlt_YUVPlanar_encode_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+extern void VDPixmapBlt_YUVPlanar_convert_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+
+using namespace nsVDPixmap;
+
+void VDPixmapInitBlittersX86(VDPixmapBlitterTable& table) {
+	VDPixmapInitBlittersReference(table);
+
+	table.AddBlitter(kPixFormat_XRGB1555,	kPixFormat_RGB565,		VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB1555_to_RGB565>);
+	table.AddBlitter(kPixFormat_XRGB1555,	kPixFormat_XRGB8888,	VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB1555_to_XRGB8888>);
+	table.AddBlitter(kPixFormat_RGB565,		kPixFormat_XRGB1555,	VDPixmapBlitterChunkyAdapter<vdasm_pixblt_RGB565_to_XRGB1555>);
+	table.AddBlitter(kPixFormat_RGB565,		kPixFormat_XRGB8888,	VDPixmapBlitterChunkyAdapter<vdasm_pixblt_RGB565_to_XRGB8888>);
+	table.AddBlitter(kPixFormat_RGB888,		kPixFormat_XRGB1555,	VDPixmapBlitterChunkyAdapter<vdasm_pixblt_RGB888_to_XRGB1555>);
+	table.AddBlitter(kPixFormat_RGB888,		kPixFormat_RGB565,		VDPixmapBlitterChunkyAdapter<vdasm_pixblt_RGB888_to_RGB565>);
+	table.AddBlitter(kPixFormat_RGB888,		kPixFormat_XRGB8888,	VDPixmapBlitterChunkyAdapter<vdasm_pixblt_RGB888_to_XRGB8888>);
+	table.AddBlitter(kPixFormat_XRGB8888,	kPixFormat_XRGB1555,	VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB8888_to_XRGB1555>);
+	table.AddBlitter(kPixFormat_XRGB8888,	kPixFormat_RGB565,		VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB8888_to_RGB565>);
+	table.AddBlitter(kPixFormat_XRGB8888,	kPixFormat_RGB888,		VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB8888_to_RGB888>);
+}
+
+tpVDPixBltTable VDGetPixBltTableX86ScalarInternal() {
+	static VDPixmapBlitterTable sReferenceTable;
+
+	VDPixmapInitBlittersX86(sReferenceTable);
+
+	return sReferenceTable.mTable;
+}
+
+tpVDPixBltTable VDGetPixBltTableX86MMXInternal() {
+	static VDPixmapBlitterTable sReferenceTable;
+
+	VDPixmapInitBlittersX86(sReferenceTable);
+
+	sReferenceTable.AddBlitter(kPixFormat_XRGB1555,	kPixFormat_RGB565,		VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB1555_to_RGB565_MMX>);
+	sReferenceTable.AddBlitter(kPixFormat_XRGB1555,	kPixFormat_XRGB8888,	VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB1555_to_XRGB8888_MMX>);
+	sReferenceTable.AddBlitter(kPixFormat_RGB565,	kPixFormat_XRGB1555,	VDPixmapBlitterChunkyAdapter<vdasm_pixblt_RGB565_to_XRGB1555_MMX>);
+	sReferenceTable.AddBlitter(kPixFormat_RGB565,	kPixFormat_XRGB8888,	VDPixmapBlitterChunkyAdapter<vdasm_pixblt_RGB565_to_XRGB8888_MMX>);
+	sReferenceTable.AddBlitter(kPixFormat_RGB888,	kPixFormat_XRGB8888,	VDPixmapBlitterChunkyAdapter<vdasm_pixblt_RGB888_to_XRGB8888_MMX>);
+	sReferenceTable.AddBlitter(kPixFormat_XRGB8888,	kPixFormat_XRGB1555,	VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB8888_to_XRGB1555_MMX>);
+	sReferenceTable.AddBlitter(kPixFormat_XRGB8888,	kPixFormat_RGB565,		VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB8888_to_RGB565_MMX>);
+	sReferenceTable.AddBlitter(kPixFormat_XRGB8888,	kPixFormat_RGB888,		VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB8888_to_RGB888_MMX>);
+
+	return sReferenceTable.mTable;
+}
+
+tpVDPixBltTable VDGetPixBltTableX86Scalar() {
+	static tpVDPixBltTable spTable = VDGetPixBltTableX86ScalarInternal();
+
+	return spTable;
+}
+
+tpVDPixBltTable VDGetPixBltTableX86MMX() {
+	static tpVDPixBltTable spTable = VDGetPixBltTableX86MMXInternal();
+
+	return spTable;
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/pixel.cpp b/src/thirdparty/VirtualDub/Kasumi/source/pixel.cpp
new file mode 100644
index 000000000..45797ca4b
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/pixel.cpp
@@ -0,0 +1,667 @@
+//	VirtualDub - Video processing and capture application
+//	Graphics support library
+//	Copyright (C) 1998-2007 Avery Lee
+//
+//	This program is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	This program is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with this program; if not, write to the Free Software
+//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#include <vd2/system/math.h>
+#include <vd2/system/halffloat.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixel.h>
+
+uint32 VDPixmapSample(const VDPixmap& px, sint32 x, sint32 y) {
+	if (x >= px.w)
+		x = px.w - 1;
+	if (y >= px.h)
+		y = px.h - 1;
+	if (x < 0)
+		x = 0;
+	if (y < 0)
+		y = 0;
+
+	switch(px.format) {
+	case nsVDPixmap::kPixFormat_Pal1:
+		{
+			uint8 idx = ((const uint8 *)px.data + px.pitch*y)[x >> 3];
+
+			return px.palette[(idx >> (7 - (x & 7))) & 1];
+		}
+
+	case nsVDPixmap::kPixFormat_Pal2:
+		{
+			uint8 idx = ((const uint8 *)px.data + px.pitch*y)[x >> 2];
+
+			return px.palette[(idx >> (6 - (x & 3)*2)) & 3];
+		}
+
+	case nsVDPixmap::kPixFormat_Pal4:
+		{
+			uint8 idx = ((const uint8 *)px.data + px.pitch*y)[x >> 1];
+
+			if (!(x & 1))
+				idx >>= 4;
+
+			return px.palette[idx & 15];
+		}
+
+	case nsVDPixmap::kPixFormat_Pal8:
+		{
+			uint8 idx = ((const uint8 *)px.data + px.pitch*y)[x];
+
+			return px.palette[idx];
+		}
+
+	case nsVDPixmap::kPixFormat_XRGB1555:
+		{
+			uint16 c = ((const uint16 *)((const uint8 *)px.data + px.pitch*y))[x];
+			uint32 r = c & 0x7c00;
+			uint32 g = c & 0x03e0;
+			uint32 b = c & 0x001f;
+			uint32 rgb = (r << 9) + (g << 6) + (b << 3);
+
+			return rgb + ((rgb >> 5) & 0x070707);
+		}
+		break;
+
+	case nsVDPixmap::kPixFormat_RGB565:
+		{
+			uint16 c = ((const uint16 *)((const uint8 *)px.data + px.pitch*y))[x];
+			uint32 r = c & 0xf800;
+			uint32 g = c & 0x07e0;
+			uint32 b = c & 0x001f;
+			uint32 rb = (r << 8) + (b << 3);
+
+			return rb + ((rb >> 5) & 0x070007) + (g << 5) + ((g >> 1) & 0x0300);
+		}
+		break;
+
+	case nsVDPixmap::kPixFormat_RGB888:
+		{
+			const uint8 *src = (const uint8 *)px.data + px.pitch*y + 3*x;
+			uint32 b = src[0];
+			uint32 g = src[1];
+			uint32 r = src[2];
+
+			return (r << 16) + (g << 8) + b;
+		}
+		break;
+
+	case nsVDPixmap::kPixFormat_XRGB8888:
+		return ((const uint32 *)((const uint8 *)px.data + px.pitch*y))[x];
+
+	case nsVDPixmap::kPixFormat_Y8:
+		{
+			uint8 luma = ((const uint8 *)px.data + px.pitch*y)[x];
+
+			return ((luma - 16)*255/219) * 0x010101;
+		}
+		break;
+
+	case nsVDPixmap::kPixFormat_YUV444_Planar:
+		return VDConvertYCbCrToRGB(VDPixmapSample8(px.data, px.pitch, x, y), VDPixmapSample8(px.data2, px.pitch2, x, y), VDPixmapSample8(px.data3, px.pitch3, x, y));
+
+	case nsVDPixmap::kPixFormat_YUV422_Planar:
+		{
+			sint32 u = (x << 7) + 128;
+			sint32 v = (y << 8);
+			uint32 w2 = px.w >> 1;
+			uint32 h2 = px.h;
+
+			return VDConvertYCbCrToRGB(
+						VDPixmapSample8(px.data, px.pitch, x, y),
+						VDPixmapInterpolateSample8(px.data2, px.pitch2, w2, h2, u, v),
+						VDPixmapInterpolateSample8(px.data3, px.pitch3, w2, h2, u, v));
+		}
+
+	case nsVDPixmap::kPixFormat_YUV420_Planar:
+		{
+			sint32 u = (x << 7) + 128;
+			sint32 v = (y << 7);
+			uint32 w2 = px.w >> 1;
+			uint32 h2 = px.h >> 1;
+
+			return VDConvertYCbCrToRGB(
+						VDPixmapSample8(px.data, px.pitch, x, y),
+						VDPixmapInterpolateSample8(px.data2, px.pitch2, w2, h2, u, v),
+						VDPixmapInterpolateSample8(px.data3, px.pitch3, w2, h2, u, v));
+		}
+
+	case nsVDPixmap::kPixFormat_YUV411_Planar:
+		{
+			sint32 u = (x << 6) + 128;
+			sint32 v = (y << 8);
+			uint32 w2 = px.w >> 2;
+			uint32 h2 = px.h;
+
+			return VDConvertYCbCrToRGB(
+						VDPixmapSample8(px.data, px.pitch, x, y),
+						VDPixmapInterpolateSample8(px.data2, px.pitch2, w2, h2, u, v),
+						VDPixmapInterpolateSample8(px.data3, px.pitch3, w2, h2, u, v));
+		}
+
+	case nsVDPixmap::kPixFormat_YUV410_Planar:
+		{
+			sint32 u = (x << 6) + 128;
+			sint32 v = (y << 6);
+			uint32 w2 = px.w >> 2;
+			uint32 h2 = px.h >> 2;
+
+			return VDConvertYCbCrToRGB(
+						VDPixmapSample8(px.data, px.pitch, x, y),
+						VDPixmapInterpolateSample8(px.data2, px.pitch2, w2, h2, u, v),
+						VDPixmapInterpolateSample8(px.data3, px.pitch3, w2, h2, u, v));
+		}
+
+	default:
+		return VDPixmapInterpolateSampleRGB24(px, (x << 8) + 128, (y << 8) + 128);
+	}
+}
+
+uint8 VDPixmapInterpolateSample8(const void *data, ptrdiff_t pitch, uint32 w, uint32 h, sint32 x_256, sint32 y_256) {
+	// bias coordinates to integer
+	x_256 -= 128;
+	y_256 -= 128;
+
+	// clamp coordinates
+	x_256 &= ~(x_256 >> 31);
+	y_256 &= ~(y_256 >> 31);
+
+	uint32 w_256 = (w - 1) << 8;
+	uint32 h_256 = (h - 1) << 8;
+	x_256 ^= (x_256 ^ w_256) & ((x_256 - w_256) >> 31);
+	y_256 ^= (y_256 ^ h_256) & ((y_256 - h_256) >> 31);
+
+	const uint8 *row0 = (const uint8 *)data + pitch * (y_256 >> 8);
+	const uint8 *row1 = row0;
+
+	if ((uint32)y_256 < h_256)
+		row1 += pitch;
+
+	ptrdiff_t xstep = (uint32)x_256 < w_256 ? 1 : 0;
+	sint32 xoffset = x_256 & 255;
+	sint32 yoffset = y_256 & 255;
+	sint32 p00 = row0[0];
+	sint32 p10 = row0[xstep];
+	sint32 p01 = row1[0];
+	sint32 p11 = row1[xstep];
+	sint32 p0 = (p00 << 8) + (p10 - p00)*xoffset;
+	sint32 p1 = (p01 << 8) + (p11 - p01)*xoffset;
+	sint32 p = ((p0 << 8) + (p1 - p0)*yoffset + 0x8000) >> 16;
+
+	return (uint8)p;
+}
+
+uint32 VDPixmapInterpolateSample8To24(const void *data, ptrdiff_t pitch, uint32 w, uint32 h, sint32 x_256, sint32 y_256) {
+	// bias coordinates to integer
+	x_256 -= 128;
+	y_256 -= 128;
+
+	// clamp coordinates
+	x_256 &= ~(x_256 >> 31);
+	y_256 &= ~(y_256 >> 31);
+
+	uint32 w_256 = (w - 1) << 8;
+	uint32 h_256 = (h - 1) << 8;
+	x_256 ^= (x_256 ^ w_256) & ((x_256 - w_256) >> 31);
+	y_256 ^= (y_256 ^ h_256) & ((y_256 - h_256) >> 31);
+
+	const uint8 *row0 = (const uint8 *)data + pitch * (y_256 >> 8) + (x_256 >> 8);
+	const uint8 *row1 = row0;
+
+	if ((uint32)y_256 < h_256)
+		row1 += pitch;
+
+	ptrdiff_t xstep = (uint32)x_256 < w_256 ? 1 : 0;
+	sint32 xoffset = x_256 & 255;
+	sint32 yoffset = y_256 & 255;
+	sint32 p00 = row0[0];
+	sint32 p10 = row0[xstep];
+	sint32 p01 = row1[0];
+	sint32 p11 = row1[xstep];
+	sint32 p0 = (p00 << 8) + (p10 - p00)*xoffset;
+	sint32 p1 = (p01 << 8) + (p11 - p01)*xoffset;
+	sint32 p = (p0 << 8) + (p1 - p0)*yoffset;
+
+	return p;
+}
+
+uint32 VDPixmapInterpolateSample8x2To24(const void *data, ptrdiff_t pitch, uint32 w, uint32 h, sint32 x_256, sint32 y_256) {
+	// bias coordinates to integer
+	x_256 -= 128;
+	y_256 -= 128;
+
+	// clamp coordinates
+	x_256 &= ~(x_256 >> 31);
+	y_256 &= ~(y_256 >> 31);
+
+	uint32 w_256 = (w - 1) << 8;
+	uint32 h_256 = (h - 1) << 8;
+	x_256 ^= (x_256 ^ w_256) & ((x_256 - w_256) >> 31);
+	y_256 ^= (y_256 ^ h_256) & ((y_256 - h_256) >> 31);
+
+	const uint8 *row0 = (const uint8 *)data + pitch * (y_256 >> 8) + (x_256 >> 8)*2;
+	const uint8 *row1 = row0;
+
+	if ((uint32)y_256 < h_256)
+		row1 += pitch;
+
+	ptrdiff_t xstep = (uint32)x_256 < w_256 ? 2 : 0;
+	sint32 xoffset = x_256 & 255;
+	sint32 yoffset = y_256 & 255;
+	sint32 p00 = row0[0];
+	sint32 p10 = row0[xstep];
+	sint32 p01 = row1[0];
+	sint32 p11 = row1[xstep];
+	sint32 p0 = (p00 << 8) + (p10 - p00)*xoffset;
+	sint32 p1 = (p01 << 8) + (p11 - p01)*xoffset;
+	sint32 p = (p0 << 8) + (p1 - p0)*yoffset;
+
+	return p;
+}
+
+uint32 VDPixmapInterpolateSample8x4To24(const void *data, ptrdiff_t pitch, uint32 w, uint32 h, sint32 x_256, sint32 y_256) {
+	// bias coordinates to integer
+	x_256 -= 128;
+	y_256 -= 128;
+
+	// clamp coordinates
+	x_256 &= ~(x_256 >> 31);
+	y_256 &= ~(y_256 >> 31);
+
+	uint32 w_256 = (w - 1) << 8;
+	uint32 h_256 = (h - 1) << 8;
+	x_256 ^= (x_256 ^ w_256) & ((x_256 - w_256) >> 31);
+	y_256 ^= (y_256 ^ h_256) & ((y_256 - h_256) >> 31);
+
+	const uint8 *row0 = (const uint8 *)data + pitch * (y_256 >> 8) + (x_256 >> 8)*4;
+	const uint8 *row1 = row0;
+
+	if ((uint32)y_256 < h_256)
+		row1 += pitch;
+
+	ptrdiff_t xstep = (uint32)x_256 < w_256 ? 4 : 0;
+	sint32 xoffset = x_256 & 255;
+	sint32 yoffset = y_256 & 255;
+	sint32 p00 = row0[0];
+	sint32 p10 = row0[xstep];
+	sint32 p01 = row1[0];
+	sint32 p11 = row1[xstep];
+	sint32 p0 = (p00 << 8) + (p10 - p00)*xoffset;
+	sint32 p1 = (p01 << 8) + (p11 - p01)*xoffset;
+	sint32 p = (p0 << 8) + (p1 - p0)*yoffset;
+
+	return p;
+}
+
+float VDPixmapInterpolateSample16F(const void *data, ptrdiff_t pitch, uint32 w, uint32 h, sint32 x_256, sint32 y_256) {
+	// bias coordinates to integer
+	x_256 -= 128;
+	y_256 -= 128;
+
+	// clamp coordinates
+	x_256 &= ~(x_256 >> 31);
+	y_256 &= ~(y_256 >> 31);
+
+	uint32 w_256 = (w - 1) << 8;
+	uint32 h_256 = (h - 1) << 8;
+	x_256 ^= (x_256 ^ w_256) & ((x_256 - w_256) >> 31);
+	y_256 ^= (y_256 ^ h_256) & ((y_256 - h_256) >> 31);
+
+	const uint16 *row0 = (const uint16 *)((const uint8 *)data + pitch * (y_256 >> 8) + (x_256 >> 8)*2);
+	const uint16 *row1 = row0;
+
+	if ((uint32)y_256 < h_256)
+		row1 = (const uint16 *)((const char *)row1 + pitch);
+
+	ptrdiff_t xstep = (uint32)x_256 < w_256 ? 1 : 0;
+	float xoffset = (float)(x_256 & 255) * (1.0f / 255.0f);
+	float yoffset = (float)(y_256 & 255) * (1.0f / 255.0f);
+
+	float p00;
+	float p10;
+	float p01;
+	float p11;
+	VDConvertHalfToFloat(row0[0], &p00);
+	VDConvertHalfToFloat(row0[xstep], &p10);
+	VDConvertHalfToFloat(row1[0], &p01);
+	VDConvertHalfToFloat(row1[xstep], &p11);
+
+	float p0 = p00 + (p10 - p00)*xoffset;
+	float p1 = p01 + (p11 - p01)*xoffset;
+
+	return p0 + (p1 - p0)*yoffset;
+}
+
+namespace {
+	uint32 Lerp8888(uint32 p0, uint32 p1, uint32 p2, uint32 p3, uint32 xf, uint32 yf) {
+		uint32 rb0 = p0 & 0x00ff00ff;
+		uint32 ag0 = p0 & 0xff00ff00;
+		uint32 rb1 = p1 & 0x00ff00ff;
+		uint32 ag1 = p1 & 0xff00ff00;
+		uint32 rb2 = p2 & 0x00ff00ff;
+		uint32 ag2 = p2 & 0xff00ff00;
+		uint32 rb3 = p3 & 0x00ff00ff;
+		uint32 ag3 = p3 & 0xff00ff00;
+
+		uint32 rbt = (rb0 + (((       rb1 - rb0       )*xf + 0x00800080) >> 8)) & 0x00ff00ff;
+		uint32 agt = (ag0 + ((((ag1 >> 8) - (ag0 >> 8))*xf + 0x00800080)     )) & 0xff00ff00;
+		uint32 rbb = (rb2 + (((       rb3 - rb2       )*xf + 0x00800080) >> 8)) & 0x00ff00ff;
+		uint32 agb = (ag2 + ((((ag3 >> 8) - (ag2 >> 8))*xf + 0x00800080)     )) & 0xff00ff00;
+		uint32 rb  = (rbt + (((       rbb - rbt       )*yf + 0x00800080) >> 8)) & 0x00ff00ff;
+		uint32 ag  = (agt + ((((agb >> 8) - (agt >> 8))*yf + 0x00800080)     )) & 0xff00ff00;
+
+		return rb + ag;
+	}
+
+	uint32 InterpPlanarY8(const VDPixmap& px, sint32 x1, sint32 y1) {
+		sint32 y = VDPixmapInterpolateSample8To24(px.data, px.pitch, px.w, px.h, x1, y1);
+
+		return VDClampedRoundFixedToUint8Fast((float)(y-0x100000) * (1.1643836f/65536.0f/255.0f))*0x010101;
+	}
+
+	uint32 InterpPlanarYCC888(const VDPixmap& px, sint32 x1, sint32 y1, sint32 x23, sint32 y23, uint32 w23, uint32 h23) {
+		float y  = (float)(sint32)VDPixmapInterpolateSample8To24(px.data, px.pitch, px.w, px.h, x1, y1);
+		float cb = (float)(sint32)VDPixmapInterpolateSample8To24(px.data2, px.pitch2, w23, h23, x23, y23);
+		float cr = (float)(sint32)VDPixmapInterpolateSample8To24(px.data3, px.pitch3, w23, h23, x23, y23);
+
+		//	!   1.1643836  - 5.599D-17    1.5960268  - 222.92157 !
+		//	!   1.1643836  - 0.3917623  - 0.8129676    135.57529 !
+		//	!   1.1643836    2.0172321  - 1.110D-16  - 276.83585 !
+		uint32 ir = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y + (1.5960268f/65536.0f/255.0f)*cr - (222.92157f / 255.0f));
+		uint32 ig = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y - (0.3917623f/65536.0f/255.0f)*cb - (0.8129676f/65536.0f/255.0f)*cr + (135.57529f / 255.0f));
+		uint32 ib = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y + (2.0172321f/65536.0f/255.0f)*cb - (276.83585f / 255.0f));
+
+		return (ir << 16) + (ig << 8) + ib;
+	}
+
+	uint32 ConvertYCC72ToRGB24(sint32 iy, sint32 icb, sint32 icr) {
+		float y  = (float)iy;
+		float cb = (float)icb;
+		float cr = (float)icr;
+
+		//	!   1.1643836  - 5.599D-17    1.5960268  - 222.92157 !
+		//	!   1.1643836  - 0.3917623  - 0.8129676    135.57529 !
+		//	!   1.1643836    2.0172321  - 1.110D-16  - 276.83585 !
+		uint32 ir = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y + (1.5960268f/65536.0f/255.0f)*cr - (222.92157f / 255.0f));
+		uint32 ig = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y - (0.3917623f/65536.0f/255.0f)*cb - (0.8129676f/65536.0f/255.0f)*cr + (135.57529f / 255.0f));
+		uint32 ib = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y + (2.0172321f/65536.0f/255.0f)*cb - (276.83585f / 255.0f));
+
+		return (ir << 16) + (ig << 8) + ib;
+	}
+
+	uint32 ConvertYCC72ToRGB24_709(sint32 iy, sint32 icb, sint32 icr) {
+		float y  = (float)iy;
+		float cb = (float)icb;
+		float cr = (float)icr;
+
+		//	!   1.1643836  - 2.932D-17    1.7927411  - 248.10099 !
+		//	!   1.1643836  - 0.2132486  - 0.5329093    76.87808  !
+		//	!   1.1643836    2.1124018  - 5.551D-17  - 289.01757 !
+		uint32 ir = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y + (1.7927411f/65536.0f/255.0f)*cr - (248.10099f / 255.0f));
+		uint32 ig = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y - (0.2132486f/65536.0f/255.0f)*cb - (0.5329093f/65536.0f/255.0f)*cr + (76.87808f / 255.0f));
+		uint32 ib = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y + (2.1124018f/65536.0f/255.0f)*cb - (289.01757f / 255.0f));
+
+		return (ir << 16) + (ig << 8) + ib;
+	}
+
+	uint32 SampleV210_Y(const void *src, ptrdiff_t srcpitch, sint32 x, sint32 y, uint32 w, uint32 h) {
+		const uint32 *p = (const uint32 *)((const char *)src + srcpitch*y) + (x / 6)*4;
+
+		switch((uint32)x % 6) {
+			default:
+			case 0:	return (p[0] >> 10) & 0x3ff;
+			case 1:	return (p[1] >>  0) & 0x3ff;
+			case 2:	return (p[1] >> 20) & 0x3ff;
+			case 3:	return (p[2] >> 10) & 0x3ff;
+			case 4:	return (p[3] >>  0) & 0x3ff;
+			case 5:	return (p[3] >> 20) & 0x3ff;
+		}
+	}
+
+	uint32 SampleV210_Cb(const void *src, ptrdiff_t srcpitch, sint32 x, sint32 y, uint32 w, uint32 h) {
+		const uint32 *p = (const uint32 *)((const char *)src + srcpitch*y) + (x / 3)*4;
+
+		switch((uint32)x % 3) {
+			default:
+			case 0:	return (p[0] >>  0) & 0x3ff;
+			case 1:	return (p[1] >> 10) & 0x3ff;
+			case 2:	return (p[2] >> 20) & 0x3ff;
+		}
+	}
+
+	uint32 SampleV210_Cr(const void *src, ptrdiff_t srcpitch, sint32 x, sint32 y, uint32 w, uint32 h) {
+		const uint32 *p = (const uint32 *)((const char *)src + srcpitch*y) + (x / 3)*4;
+
+		switch((uint32)x % 3) {
+			default:
+			case 0:	return (p[0] >> 20) & 0x3ff;
+			case 1:	return (p[2] >>  0) & 0x3ff;
+			case 2:	return (p[3] >> 10) & 0x3ff;
+		}
+	}
+}
+
+uint32 VDPixmapInterpolateSampleRGB24(const VDPixmap& px, sint32 x_256, sint32 y_256) {
+	switch(px.format) {
+		case nsVDPixmap::kPixFormat_Pal1:
+		case nsVDPixmap::kPixFormat_Pal2:
+		case nsVDPixmap::kPixFormat_Pal4:
+		case nsVDPixmap::kPixFormat_Pal8:
+		case nsVDPixmap::kPixFormat_RGB565:
+		case nsVDPixmap::kPixFormat_RGB888:
+		case nsVDPixmap::kPixFormat_XRGB1555:
+		case nsVDPixmap::kPixFormat_XRGB8888:
+			{
+				x_256 -= 128;
+				y_256 -= 128;
+				int ix = x_256 >> 8;
+				int iy = y_256 >> 8;
+				uint32 p0 = VDPixmapSample(px, ix, iy);
+				uint32 p1 = VDPixmapSample(px, ix+1, iy);
+				uint32 p2 = VDPixmapSample(px, ix, iy+1);
+				uint32 p3 = VDPixmapSample(px, ix+1, iy+1);
+
+				return Lerp8888(p0, p1, p2, p3, x_256 & 255, y_256 & 255);
+			}
+			break;
+
+		case nsVDPixmap::kPixFormat_Y8:
+			return InterpPlanarY8(px, x_256, y_256); 
+
+		case nsVDPixmap::kPixFormat_YUV422_UYVY:
+			return ConvertYCC72ToRGB24(
+					VDPixmapInterpolateSample8x2To24((const char *)px.data + 1, px.pitch, px.w, px.h, x_256, y_256),
+					VDPixmapInterpolateSample8x4To24((const char *)px.data + 0, px.pitch, (px.w + 1) >> 1, px.h, (x_256 >> 1) + 128, y_256),
+					VDPixmapInterpolateSample8x4To24((const char *)px.data + 2, px.pitch, (px.w + 1) >> 1, px.h, (x_256 >> 1) + 128, y_256)
+				);
+
+		case nsVDPixmap::kPixFormat_YUV422_YUYV:
+			return ConvertYCC72ToRGB24(
+					VDPixmapInterpolateSample8x2To24((const char *)px.data + 0, px.pitch, px.w, px.h, x_256, y_256),
+					VDPixmapInterpolateSample8x4To24((const char *)px.data + 1, px.pitch, (px.w + 1) >> 1, px.h, (x_256 >> 1) + 128, y_256),
+					VDPixmapInterpolateSample8x4To24((const char *)px.data + 3, px.pitch, (px.w + 1) >> 1, px.h, (x_256 >> 1) + 128, y_256)
+				);
+
+		case nsVDPixmap::kPixFormat_YUV444_XVYU:
+			return ConvertYCC72ToRGB24(
+					VDPixmapInterpolateSample8x4To24((const char *)px.data + 1, px.pitch, px.w, px.h, x_256, y_256),
+					VDPixmapInterpolateSample8x4To24((const char *)px.data + 0, px.pitch, px.w, px.h, x_256, y_256),
+					VDPixmapInterpolateSample8x4To24((const char *)px.data + 2, px.pitch, px.w, px.h, x_256, y_256)
+				);
+
+		case nsVDPixmap::kPixFormat_YUV422_UYVY_709:
+			return ConvertYCC72ToRGB24_709(
+					VDPixmapInterpolateSample8x2To24((const char *)px.data + 1, px.pitch, px.w, px.h, x_256, y_256),
+					VDPixmapInterpolateSample8x4To24((const char *)px.data + 0, px.pitch, (px.w + 1) >> 1, px.h, (x_256 >> 1) + 128, y_256),
+					VDPixmapInterpolateSample8x4To24((const char *)px.data + 2, px.pitch, (px.w + 1) >> 1, px.h, (x_256 >> 1) + 128, y_256)
+				);
+
+		case nsVDPixmap::kPixFormat_YUV420_NV12:
+			return ConvertYCC72ToRGB24(
+					VDPixmapInterpolateSample8To24(px.data, px.pitch, px.w, px.h, x_256, y_256),
+					VDPixmapInterpolateSample8x2To24((const char *)px.data2 + 0, px.pitch2, (px.w + 1) >> 1, (px.h + 1) >> 1, (x_256 >> 1) + 128, y_256 >> 1),
+					VDPixmapInterpolateSample8x2To24((const char *)px.data2 + 1, px.pitch2, (px.w + 1) >> 1, (px.h + 1) >> 1, (x_256 >> 1) + 128, y_256 >> 1)
+				);
+
+		case nsVDPixmap::kPixFormat_YUV444_Planar:
+			return InterpPlanarYCC888(px, x_256, y_256, x_256, y_256, px.w, px.h);
+
+		case nsVDPixmap::kPixFormat_YUV422_Planar:
+			return InterpPlanarYCC888(px, x_256, y_256, (x_256 >> 1) + 128, y_256, (px.w + 1) >> 1, px.h);
+
+		case nsVDPixmap::kPixFormat_YUV411_Planar:
+			return InterpPlanarYCC888(px, x_256, y_256, (x_256 >> 2) + 128, y_256, (px.w + 3) >> 2, px.h);
+
+		case nsVDPixmap::kPixFormat_YUV420_Planar:
+			return InterpPlanarYCC888(px, x_256, y_256, (x_256 >> 1) + 128, y_256 >> 1, (px.w + 1) >> 1, (px.h + 1) >> 1);
+
+		case nsVDPixmap::kPixFormat_YUV410_Planar:
+			return InterpPlanarYCC888(px, x_256, y_256, (x_256 >> 2) + 128, y_256 >> 2, (px.w + 3) >> 2, (px.h + 3) >> 2);
+
+		case nsVDPixmap::kPixFormat_YUV420_Planar_Centered:
+			return InterpPlanarYCC888(px, x_256, y_256, x_256 >> 1, y_256 >> 1, (px.w + 1) >> 1, (px.h + 1) >> 1);
+
+		case nsVDPixmap::kPixFormat_YUV422_Planar_Centered:
+			return InterpPlanarYCC888(px, x_256, y_256, x_256 >> 1, y_256, (px.w + 1) >> 1, px.h);
+
+		case nsVDPixmap::kPixFormat_YUV422_Planar_16F:
+			{
+				float y  = VDPixmapInterpolateSample16F(px.data, px.pitch, px.w, px.h, x_256, y_256);
+				float cb = VDPixmapInterpolateSample16F(px.data2, px.pitch2, (px.w + 1) >> 1, px.h, (x_256 >> 1) + 128, y_256);
+				float cr = VDPixmapInterpolateSample16F(px.data3, px.pitch3, (px.w + 1) >> 1, px.h, (x_256 >> 1) + 128, y_256);
+
+				uint32 ir = VDClampedRoundFixedToUint8Fast(1.1643836f*y + 1.5960268f*cr - (222.92157f / 255.0f));
+				uint32 ig = VDClampedRoundFixedToUint8Fast(1.1643836f*y - 0.3917623f*cb - 0.8129676f*cr + (135.57529f / 255.0f));
+				uint32 ib = VDClampedRoundFixedToUint8Fast(1.1643836f*y + 2.0172321f*cb - (276.83585f / 255.0f));
+
+				return (ir << 16) + (ig << 8) + ib;
+			}
+
+		case nsVDPixmap::kPixFormat_YUV422_V210:
+			{
+				sint32 luma_x = x_256 - 128;
+				sint32 luma_y = y_256 - 128;
+
+				if (luma_x < 0)
+					luma_x = 0;
+
+				if (luma_y < 0)
+					luma_y = 0;
+
+				if (luma_x > (sint32)((px.w - 1) << 8))
+					luma_x = (sint32)((px.w - 1) << 8);
+
+				if (luma_y > (sint32)((px.h - 1) << 8))
+					luma_y = (sint32)((px.h - 1) << 8);
+
+				sint32 luma_ix = luma_x >> 8;
+				sint32 luma_iy = luma_y >> 8;
+				float luma_fx = (float)(luma_x & 255) * (1.0f / 255.0f);
+				float luma_fy = (float)(luma_y & 255) * (1.0f / 255.0f);
+
+				float y0 = SampleV210_Y(px.data, px.pitch, luma_ix+0, luma_iy+0, px.w, px.h) * (1.0f / 1023.0f);
+				float y1 = SampleV210_Y(px.data, px.pitch, luma_ix+1, luma_iy+0, px.w, px.h) * (1.0f / 1023.0f);
+				float y2 = SampleV210_Y(px.data, px.pitch, luma_ix+0, luma_iy+1, px.w, px.h) * (1.0f / 1023.0f);
+				float y3 = SampleV210_Y(px.data, px.pitch, luma_ix+1, luma_iy+1, px.w, px.h) * (1.0f / 1023.0f);
+				float yt = y0 + (y1 - y0)*luma_fx;
+				float yb = y2 + (y3 - y2)*luma_fx;
+				float yr = yt + (yb - yt)*luma_fy;
+
+				uint32 chroma_w = (px.w + 1) >> 1;
+				uint32 chroma_h = px.h;
+				sint32 chroma_x = x_256 >> 1;
+				sint32 chroma_y = y_256 - 128;
+
+				if (chroma_x < 0)
+					chroma_x = 0;
+
+				if (chroma_y < 0)
+					chroma_y = 0;
+
+				if (chroma_x > (sint32)((chroma_w - 1) << 8))
+					chroma_x = (sint32)((chroma_w - 1) << 8);
+
+				if (chroma_y > (sint32)((chroma_h - 1) << 8))
+					chroma_y = (sint32)((chroma_h - 1) << 8);
+
+				sint32 chroma_ix = chroma_x >> 8;
+				sint32 chroma_iy = chroma_y >> 8;
+				float chroma_fx = (float)(chroma_x & 255) * (1.0f / 255.0f);
+				float chroma_fy = (float)(chroma_y & 255) * (1.0f / 255.0f);
+
+				float cb0 = SampleV210_Cb(px.data, px.pitch, chroma_ix+0, chroma_iy+0, px.w, px.h) * (1.0f / 1023.0f);
+				float cb1 = SampleV210_Cb(px.data, px.pitch, chroma_ix+1, chroma_iy+0, px.w, px.h) * (1.0f / 1023.0f);
+				float cb2 = SampleV210_Cb(px.data, px.pitch, chroma_ix+0, chroma_iy+1, px.w, px.h) * (1.0f / 1023.0f);
+				float cb3 = SampleV210_Cb(px.data, px.pitch, chroma_ix+1, chroma_iy+1, px.w, px.h) * (1.0f / 1023.0f);
+				float cbt = cb0 + (cb1 - cb0)*chroma_fx;
+				float cbb = cb2 + (cb3 - cb2)*chroma_fx;
+				float cbr = cbt + (cbb - cbt)*chroma_fy;
+
+				float cr0 = SampleV210_Cr(px.data, px.pitch, chroma_ix+0, chroma_iy+0, px.w, px.h) * (1.0f / 1023.0f);
+				float cr1 = SampleV210_Cr(px.data, px.pitch, chroma_ix+1, chroma_iy+0, px.w, px.h) * (1.0f / 1023.0f);
+				float cr2 = SampleV210_Cr(px.data, px.pitch, chroma_ix+0, chroma_iy+1, px.w, px.h) * (1.0f / 1023.0f);
+				float cr3 = SampleV210_Cr(px.data, px.pitch, chroma_ix+1, chroma_iy+1, px.w, px.h) * (1.0f / 1023.0f);
+				float crt = cr0 + (cr1 - cr0)*chroma_fx;
+				float crb = cr2 + (cr3 - cr2)*chroma_fx;
+				float crr = crt + (crb - crt)*chroma_fy;
+
+				uint32 ir = VDClampedRoundFixedToUint8Fast(1.1643836f*yr + 1.5960268f*crr - (222.92157f / 255.0f));
+				uint32 ig = VDClampedRoundFixedToUint8Fast(1.1643836f*yr - 0.3917623f*cbr - 0.8129676f*crr + (135.57529f / 255.0f));
+				uint32 ib = VDClampedRoundFixedToUint8Fast(1.1643836f*yr + 2.0172321f*cbr - (276.83585f / 255.0f));
+
+				return (ir << 16) + (ig << 8) + ib;
+			}
+			break;
+
+		default:
+			return 0;
+	}
+}
+
+uint32 VDConvertYCbCrToRGB(uint8 y0, uint8 cb0, uint8 cr0) {
+	sint32  y =  y0 -  16;
+	sint32 cb = cb0 - 128;
+	sint32 cr = cr0 - 128;
+
+	sint32 y2 = y * 76309 + 0x8000;
+	sint32 r = y2 + cr * 104597;
+	sint32 g = y2 + cr * -53279 + cb * -25674;
+	sint32 b = y2 + cb * 132201;
+
+	r &= ~(r >> 31);
+	g &= ~(g >> 31);
+	b &= ~(b >> 31);
+	r += (0xffffff - r) & ((0xffffff - r) >> 31);
+	g += (0xffffff - g) & ((0xffffff - g) >> 31);
+	b += (0xffffff - b) & ((0xffffff - b) >> 31);
+
+	return (r & 0xff0000) + ((g & 0xff0000) >> 8) + (b >> 16);
+}
+
+uint32 VDConvertRGBToYCbCr(uint32 c) {
+	return VDConvertRGBToYCbCr((uint8)(c >> 16), (uint8)(c >> 8), (uint8)c);
+}
+
+uint32 VDConvertRGBToYCbCr(uint8 r8, uint8 g8, uint8 b8) {
+	sint32 r  = r8;
+	sint32 g  = g8;
+	sint32 b  = b8;
+	sint32 yt = 1052*r + 2065*g + 401*b;
+	sint32 y  = (yt + 0x10800) >> 4;
+	sint32 cr = (10507932*r - yt*2987 + 0x80800000U) >> 8;
+	sint32 cb = ( 8312025*b - yt*2363 + 0x80800000U) >> 24;
+
+	return (uint8)cb + (y & 0xff00) + (cr&0xff0000);
+}
+\ No newline at end of file
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/pixmaputils.cpp b/src/thirdparty/VirtualDub/Kasumi/source/pixmaputils.cpp
new file mode 100644
index 000000000..635cbf3c0
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/pixmaputils.cpp
@@ -0,0 +1,519 @@
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/system/memory.h>
+
+extern VDPixmapFormatInfo g_vdPixmapFormats[] = {
+									// name         qchnk qw qh qwb qhb  qs ab aw ah as   ps
+	/* Null */						{ "null",		false, 1, 1,  0,  0,  0, 0, 0, 0, 0,   0 },
+	/* Pal1 */						{ "Pal1",		 true, 8, 1,  3,  0,  1, 0, 0, 0, 0,   2 },
+	/* Pal2 */						{ "Pal2",		 true, 4, 1,  2,  0,  1, 0, 0, 0, 0,   4 },
+	/* Pal4 */						{ "Pal4",		 true, 2, 1,  1,  0,  1, 0, 0, 0, 0,  16 },
+	/* Pal8 */						{ "Pal8",		false, 1, 1,  0,  0,  1, 0, 0, 0, 0, 256 },
+	/* RGB16_555 */					{ "XRGB1555",	false, 1, 1,  0,  0,  2, 0, 0, 0, 0,   0 },
+	/* RGB16_565 */					{ "RGB565",		false, 1, 1,  0,  0,  2, 0, 0, 0, 0,   0 },
+	/* RGB24 */						{ "RGB888",		false, 1, 1,  0,  0,  3, 0, 0, 0, 0,   0 },
+	/* RGB32 */						{ "XRGB8888",	false, 1, 1,  0,  0,  4, 0, 0, 0, 0,   0 },
+	/* Y8 */						{ "Y8",			false, 1, 1,  0,  0,  1, 0, 0, 0, 0,   0 },
+	/* YUV422_UYVY */				{ "UYVY",		 true, 2, 1,  1,  0,  4, 0, 0, 0, 0,   0 },
+	/* YUV422_YUYV */				{ "YUYV",		 true, 2, 1,  1,  0,  4, 0, 0, 0, 0,   0 },
+	/* YUV444_XVYU */				{ "XVYU",		false, 1, 1,  0,  0,  4, 0, 0, 0, 0,   0 },
+	/* YUV444_Planar */				{ "YUV444",		false, 1, 1,  0,  0,  1, 2, 0, 0, 1,   0 },
+	/* YUV422_Planar */				{ "YUV422",		false, 1, 1,  0,  0,  1, 2, 1, 0, 1,   0 },
+	/* YUV420_Planar */				{ "YUV420",		false, 1, 1,  0,  0,  1, 2, 1, 1, 1,   0 },
+	/* YUV411_Planar */				{ "YUV411",		false, 1, 1,  0,  0,  1, 2, 2, 0, 1,   0 },
+	/* YUV410_Planar */				{ "YUV410",		false, 1, 1,  0,  0,  1, 2, 2, 2, 1,   0 },
+	/* YUV422_Planar_Centered */	{ "YUV422C",	false, 1, 1,  0,  0,  1, 2, 1, 0, 1,   0 },
+	/* YUV420_Planar_Centered */	{ "YUV420C",	false, 1, 1,  0,  0,  1, 2, 1, 1, 1,   0 },
+	/* YUV422_Planar_16F */			{ "YUV422_16F",	false, 1, 1,  0,  0,  2, 2, 1, 0, 2,   0 },
+	/* V210 */						{ "v210",		 true,24, 1,  2,  0, 64, 0, 0, 0, 1,   0 },
+	/* YUV422_UYVY_709 */			{ "UYVY-709",	 true, 2, 1,  1,  0,  4, 0, 0, 0, 0,   0 },
+	/* NV12 */						{ "NV12",		false, 1, 1,  0,  0,  1, 1, 1, 1, 2,   0 },
+};
+
+#ifdef _DEBUG
+	bool VDIsValidPixmapPlane(const void *p, ptrdiff_t pitch, vdpixsize w, vdpixsize h) {
+		bool isvalid;
+
+		if (pitch < 0)
+			isvalid = VDIsValidReadRegion((const char *)p + pitch*(h-1), (-pitch)*(h-1)+w);
+		else
+			isvalid = VDIsValidReadRegion(p, pitch*(h-1)+w);
+
+		if (!isvalid) {
+			VDDEBUG("Kasumi: Invalid pixmap plane detected.\n"
+					"        Base=%p, pitch=%d, size=%dx%d (bytes)\n", p, (int)pitch, w, h);
+		}
+
+		return isvalid;
+	}
+
+	bool VDAssertValidPixmap(const VDPixmap& px) {
+		const VDPixmapFormatInfo& info = VDPixmapGetInfo(px.format);
+
+		if (px.format) {
+			if (!VDIsValidPixmapPlane(px.data, px.pitch, -(-px.w / info.qw)*info.qsize, -(-px.h >> info.qhbits))) {
+				VDDEBUG("Kasumi: Invalid primary plane detected in pixmap.\n"
+						"        Pixmap info: format=%d (%s), dimensions=%dx%d\n", px.format, info.name, px.w, px.h);
+				VDASSERT(!"Kasumi: Invalid primary plane detected in pixmap.\n");
+				return false;
+			}
+
+			if (info.palsize)
+				if (!VDIsValidReadRegion(px.palette, sizeof(uint32) * info.palsize)) {
+					VDDEBUG("Kasumi: Invalid palette detected in pixmap.\n"
+							"        Pixmap info: format=%d (%s), dimensions=%dx%d\n", px.format, info.name, px.w, px.h);
+					VDASSERT(!"Kasumi: Invalid palette detected in pixmap.\n");
+					return false;
+				}
+
+			if (info.auxbufs) {
+				const vdpixsize auxw = -(-px.w >> info.auxwbits);
+				const vdpixsize auxh = -(-px.h >> info.auxhbits);
+
+				if (!VDIsValidPixmapPlane(px.data2, px.pitch2, auxw * info.auxsize, auxh)) {
+					VDDEBUG("Kasumi: Invalid Cb plane detected in pixmap.\n"
+							"        Pixmap info: format=%d (%s), dimensions=%dx%d\n", px.format, info.name, px.w, px.h);
+					VDASSERT(!"Kasumi: Invalid Cb plane detected in pixmap.\n");
+					return false;
+				}
+
+				if (info.auxbufs > 2) {
+					if (!VDIsValidPixmapPlane(px.data3, px.pitch3, auxw * info.auxsize, auxh)) {
+						VDDEBUG("Kasumi: Invalid Cr plane detected in pixmap.\n"
+								"        Pixmap info: format=%d, dimensions=%dx%d\n", px.format, px.w, px.h);
+						VDASSERT(!"Kasumi: Invalid Cr plane detected in pixmap.\n");
+						return false;
+					}
+				}
+			}
+		}
+
+		return true;
+	}
+#endif
+
+VDPixmap VDPixmapOffset(const VDPixmap& src, vdpixpos x, vdpixpos y) {
+	VDPixmap temp(src);
+	const VDPixmapFormatInfo& info = VDPixmapGetInfo(temp.format);
+
+	if (info.qchunky) {
+		x = (x + info.qw - 1) / info.qw;
+		y >>= info.qhbits;
+	}
+
+	switch(info.auxbufs) {
+	case 2:
+		temp.data3 = (char *)temp.data3 + (x >> info.auxwbits)*info.auxsize + (y >> info.auxhbits)*temp.pitch3;
+	case 1:
+		temp.data2 = (char *)temp.data2 + (x >> info.auxwbits)*info.auxsize + (y >> info.auxhbits)*temp.pitch2;
+	case 0:
+		temp.data = (char *)temp.data + x*info.qsize + y*temp.pitch;
+	}
+
+	return temp;
+}
+
+VDPixmapLayout VDPixmapLayoutOffset(const VDPixmapLayout& src, vdpixpos x, vdpixpos y) {
+	VDPixmapLayout temp(src);
+	const VDPixmapFormatInfo& info = VDPixmapGetInfo(temp.format);
+
+	if (info.qchunky) {
+		x = (x + info.qw - 1) / info.qw;
+		y = -(-y >> info.qhbits);
+	}
+
+	switch(info.auxbufs) {
+	case 2:
+		temp.data3 += -(-x >> info.auxwbits)*info.auxsize + -(-y >> info.auxhbits)*temp.pitch3;
+	case 1:
+		temp.data2 += -(-x >> info.auxwbits)*info.auxsize + -(-y >> info.auxhbits)*temp.pitch2;
+	case 0:
+		temp.data += x*info.qsize + y*temp.pitch;
+	}
+
+	return temp;
+}
+
+uint32 VDPixmapCreateLinearLayout(VDPixmapLayout& layout, int format, vdpixsize w, vdpixsize h, int alignment) {
+	const ptrdiff_t alignmask = alignment - 1;
+
+	const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(format);
+	sint32		qw			= (w + srcinfo.qw - 1) / srcinfo.qw;
+	sint32		qh			= -(-h >> srcinfo.qhbits);
+	sint32		subw		= -(-w >> srcinfo.auxwbits);
+	sint32		subh		= -(-h >> srcinfo.auxhbits);
+	sint32		auxsize		= srcinfo.auxsize;
+
+	ptrdiff_t	mainpitch	= (srcinfo.qsize * qw + alignmask) & ~alignmask;
+	size_t		mainsize	= mainpitch * qh;
+
+	layout.data		= 0;
+	layout.pitch	= mainpitch;
+	layout.palette	= NULL;
+	layout.data2	= 0;
+	layout.pitch2	= 0;
+	layout.data3	= 0;
+	layout.pitch3	= 0;
+	layout.w		= w;
+	layout.h		= h;
+	layout.format	= format;
+
+	if (srcinfo.auxbufs >= 1) {
+		ptrdiff_t	subpitch	= (subw * auxsize + alignmask) & ~alignmask;
+		size_t		subsize		= subpitch * subh;
+
+		layout.data2	= mainsize;
+		layout.pitch2	= subpitch;
+		mainsize += subsize;
+
+		if (srcinfo.auxbufs >= 2) {
+			layout.data3	= mainsize;
+			layout.pitch3	= subpitch;
+			mainsize += subsize;
+		}
+	}
+
+	return mainsize;
+}
+
+void VDPixmapFlipV(VDPixmap& px) {
+	const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(px.format);
+	sint32		w			= px.w;
+	sint32		h			= px.h;
+	sint32		qw			= (w + srcinfo.qw - 1) / srcinfo.qw;
+	sint32		qh			= -(-h >> srcinfo.qhbits);
+	sint32		subh		= -(-h >> srcinfo.auxhbits);
+
+	vdptrstep(px.data, px.pitch * (qh - 1));
+	px.pitch = -px.pitch;
+
+	if (srcinfo.auxbufs >= 1) {
+		vdptrstep(px.data2, px.pitch2 * (subh - 1));
+		px.pitch2 = -px.pitch2;
+
+		if (srcinfo.auxbufs >= 2) {
+			vdptrstep(px.data3, px.pitch3 * (subh - 1));
+			px.pitch3 = -px.pitch3;
+		}
+	}
+}
+
+void VDPixmapLayoutFlipV(VDPixmapLayout& layout) {
+	const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(layout.format);
+	sint32		w			= layout.w;
+	sint32		h			= layout.h;
+	sint32		qw			= (w + srcinfo.qw - 1) / srcinfo.qw;
+	sint32		qh			= -(-h >> srcinfo.qhbits);
+	sint32		subh		= -(-h >> srcinfo.auxhbits);
+
+	layout.data += layout.pitch * (qh - 1);
+	layout.pitch = -layout.pitch;
+
+	if (srcinfo.auxbufs >= 1) {
+		layout.data2 += layout.pitch2 * (subh - 1);
+		layout.pitch2 = -layout.pitch2;
+
+		if (srcinfo.auxbufs >= 2) {
+			layout.data3 += layout.pitch3 * (subh - 1);
+			layout.pitch3 = -layout.pitch3;
+		}
+	}
+}
+
+uint32 VDPixmapLayoutGetMinSize(const VDPixmapLayout& layout) {
+	const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(layout.format);
+	sint32		w			= layout.w;
+	sint32		h			= layout.h;
+	sint32		qw			= (w + srcinfo.qw - 1) / srcinfo.qw;
+	sint32		qh			= -(-h >> srcinfo.qhbits);
+	sint32		subh		= -(-h >> srcinfo.auxhbits);
+
+	uint32 limit = layout.data;
+	if (layout.pitch >= 0)
+		limit += layout.pitch * qh;
+	else
+		limit -= layout.pitch;
+
+	if (srcinfo.auxbufs >= 1) {
+		uint32 limit2 = layout.data2;
+
+		if (layout.pitch2 >= 0)
+			limit2 += layout.pitch2 * subh;
+		else
+			limit2 -= layout.pitch2;
+
+		if (limit < limit2)
+			limit = limit2;
+
+		if (srcinfo.auxbufs >= 2) {
+			uint32 limit3 = layout.data3;
+
+			if (layout.pitch3 >= 0)
+				limit3 += layout.pitch3 * subh;
+			else
+				limit3 -= layout.pitch3;
+
+			if (limit < limit3)
+				limit = limit3;
+		}
+	}
+
+	return limit;
+}
+
+VDPixmap VDPixmapExtractField(const VDPixmap& src, bool field2) {
+	VDPixmap px(src);
+
+	if (field2) {
+		const VDPixmapFormatInfo& info = VDPixmapGetInfo(px.format);
+
+		if (px.data) {
+			if (info.qh == 1)
+				vdptrstep(px.data, px.pitch);
+
+			if (!info.auxhbits) {
+				vdptrstep(px.data2, px.pitch2);
+				vdptrstep(px.data3, px.pitch3);
+			}
+		}
+	}
+
+	px.h >>= 1;
+	px.pitch += px.pitch;
+	px.pitch2 += px.pitch2;
+	px.pitch3 += px.pitch3;
+	return px;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDPixmapBuffer::VDPixmapBuffer(const VDPixmap& src)
+	: mpBuffer(NULL)
+	, mLinearSize(0)
+{
+	assign(src);
+}
+
+VDPixmapBuffer::VDPixmapBuffer(const VDPixmapBuffer& src)
+	: mpBuffer(NULL)
+	, mLinearSize(0)
+{
+	assign(src);
+}
+
+VDPixmapBuffer::VDPixmapBuffer(const VDPixmapLayout& layout) {
+	init(layout);
+}
+
+VDPixmapBuffer::~VDPixmapBuffer() {
+#ifdef _DEBUG
+	validate();
+#endif
+
+	delete[] mpBuffer;
+}
+
+void VDPixmapBuffer::init(sint32 width, sint32 height, int f) {
+	const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(f);
+	sint32		qw			= (width + srcinfo.qw - 1) / srcinfo.qw;
+	sint32		qh			= -(-height >> srcinfo.qhbits);
+	sint32		subw		= -(-width >> srcinfo.auxwbits);
+	sint32		subh		= -(-height >> srcinfo.auxhbits);
+	ptrdiff_t	mainpitch	= (srcinfo.qsize * qw + 15) & ~15;
+	ptrdiff_t	subpitch	= (srcinfo.auxsize * subw + 15) & ~15;
+	size_t		mainsize	= mainpitch * qh;
+	size_t		subsize		= subpitch * subh;
+	size_t		totalsize	= mainsize + subsize*srcinfo.auxbufs + 4 * srcinfo.palsize;
+
+#ifdef _DEBUG
+	totalsize += 28;
+#endif
+
+	if (mLinearSize != totalsize) {
+		clear();
+		mpBuffer = new char[totalsize + 15];
+		mLinearSize = totalsize;
+	}
+
+	char *p = mpBuffer + (-(int)(uintptr)mpBuffer & 15);
+
+#ifdef _DEBUG
+	*(uint32 *)p = totalsize;
+	for(int i=0; i<12; ++i)
+		p[4+i] = (char)(0xa0 + i);
+
+	p += 16;
+#endif
+
+	data	= p;
+	pitch	= mainpitch;
+	p += mainsize;
+
+	palette	= NULL;
+	data2	= NULL;
+	pitch2	= NULL;
+	data3	= NULL;
+	pitch3	= NULL;
+	w		= width;
+	h		= height;
+	format	= f;
+
+	if (srcinfo.auxbufs >= 1) {
+		data2	= p;
+		pitch2	= subpitch;
+		p += subsize;
+	}
+
+	if (srcinfo.auxbufs >= 2) {
+		data3	= p;
+		pitch3	= subpitch;
+		p += subsize;
+	}
+
+	if (srcinfo.palsize) {
+		palette = (const uint32 *)p;
+		p += srcinfo.palsize * 4;
+	}
+
+#ifdef _DEBUG
+	for(int j=0; j<12; ++j)
+		p[j] = (char)(0xb0 + j);
+#endif
+}
+
+void VDPixmapBuffer::init(const VDPixmapLayout& layout) {
+	const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(layout.format);
+	sint32		qw			= (layout.w + srcinfo.qw - 1) / srcinfo.qw;
+	sint32		qh			= -(-layout.h >> srcinfo.qhbits);
+	sint32		subw		= -(-layout.w >> srcinfo.auxwbits);
+	sint32		subh		= -(-layout.h >> srcinfo.auxhbits);
+
+	ptrdiff_t mino=0, maxo=0;
+
+	if (layout.pitch < 0) {
+		mino = std::min<ptrdiff_t>(mino, layout.data + layout.pitch * (qh-1));
+		maxo = std::max<ptrdiff_t>(maxo, layout.data - layout.pitch);
+	} else {
+		mino = std::min<ptrdiff_t>(mino, layout.data);
+		maxo = std::max<ptrdiff_t>(maxo, layout.data + layout.pitch*qh);
+	}
+
+	if (srcinfo.auxbufs >= 1) {
+		if (layout.pitch2 < 0) {
+			mino = std::min<ptrdiff_t>(mino, layout.data2 + layout.pitch2 * (subh-1));
+			maxo = std::max<ptrdiff_t>(maxo, layout.data2 - layout.pitch2);
+		} else {
+			mino = std::min<ptrdiff_t>(mino, layout.data2);
+			maxo = std::max<ptrdiff_t>(maxo, layout.data2 + layout.pitch2*subh);
+		}
+
+		if (srcinfo.auxbufs >= 2) {
+			if (layout.pitch3 < 0) {
+				mino = std::min<ptrdiff_t>(mino, layout.data3 + layout.pitch3 * (subh-1));
+				maxo = std::max<ptrdiff_t>(maxo, layout.data3 - layout.pitch3);
+			} else {
+				mino = std::min<ptrdiff_t>(mino, layout.data3);
+				maxo = std::max<ptrdiff_t>(maxo, layout.data3 + layout.pitch3*subh);
+			}
+		}
+	}
+
+	ptrdiff_t linsize = ((maxo - mino + 3) & ~(uintptr)3);
+
+	ptrdiff_t totalsize = linsize + 4*srcinfo.palsize;
+
+#ifdef _DEBUG
+	totalsize += 28;
+#endif
+
+	if (mLinearSize != totalsize) {
+		clear();
+		mpBuffer = new char[totalsize + 15];
+		mLinearSize = totalsize;
+	}
+
+	char *p = mpBuffer + (-(int)(uintptr)mpBuffer & 15);
+
+#ifdef _DEBUG
+	*(uint32 *)p = totalsize - 28;
+	for(int i=0; i<12; ++i)
+		p[4+i] = (char)(0xa0 + i);
+
+	p += 16;
+#endif
+
+	w		= layout.w;
+	h		= layout.h;
+	format	= layout.format;
+	data	= p + layout.data - mino;
+	data2	= p + layout.data2 - mino;
+	data3	= p + layout.data3 - mino;
+	pitch	= layout.pitch;
+	pitch2	= layout.pitch2;
+	pitch3	= layout.pitch3;
+	palette	= NULL;
+
+	if (srcinfo.palsize) {
+		palette = (const uint32 *)(p + linsize);
+		memcpy((void *)palette, layout.palette, 4*srcinfo.palsize);
+	}
+
+#ifdef _DEBUG
+	for(int j=0; j<12; ++j)
+		p[totalsize + j - 28] = (char)(0xb0 + j);
+#endif
+
+	VDAssertValidPixmap(*this);
+}
+
+void VDPixmapBuffer::assign(const VDPixmap& src) {
+	if (!src.format) {
+		delete[] mpBuffer;
+		mpBuffer = NULL;
+		data = NULL;
+		format = 0;
+	} else {
+		init(src.w, src.h, src.format);
+
+		const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(src.format);
+		int qw = (src.w + srcinfo.qw - 1) / srcinfo.qw;
+		int qh = -(-src.h >> srcinfo.qhbits);
+		int subw = -(-src.w >> srcinfo.auxwbits);
+		int subh = -(-src.h >> srcinfo.auxhbits);
+
+		if (srcinfo.palsize)
+			memcpy((void *)palette, src.palette, 4 * srcinfo.palsize);
+
+		switch(srcinfo.auxbufs) {
+		case 2:
+			VDMemcpyRect(data3, pitch3, src.data3, src.pitch3, subw, subh);
+		case 1:
+			VDMemcpyRect(data2, pitch2, src.data2, src.pitch2, subw, subh);
+		case 0:
+			VDMemcpyRect(data, pitch, src.data, src.pitch, qw * srcinfo.qsize, qh);
+		}
+	}
+}
+
+void VDPixmapBuffer::swap(VDPixmapBuffer& dst) {
+	std::swap(mpBuffer, dst.mpBuffer);
+	std::swap(mLinearSize, dst.mLinearSize);
+	std::swap(static_cast<VDPixmap&>(*this), static_cast<VDPixmap&>(dst));
+}
+
+#ifdef _DEBUG
+void VDPixmapBuffer::validate() {
+	if (mpBuffer) {
+		char *p = (char *)(((uintptr)mpBuffer + 15) & ~(uintptr)15);
+
+		// verify head bytes
+		for(int i=0; i<12; ++i)
+			if (p[i+4] != (char)(0xa0 + i))
+				VDASSERT(!"VDPixmapBuffer: Buffer underflow detected.\n");
+
+		// verify tail bytes
+		for(int j=0; j<12; ++j)
+			if (p[mLinearSize - 12 + j] != (char)(0xb0 + j))
+				VDASSERT(!"VDPixmapBuffer: Buffer overflow detected.\n");
+	}
+}
+#endif
+\ No newline at end of file
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/region.cpp b/src/thirdparty/VirtualDub/Kasumi/source/region.cpp
new file mode 100644
index 000000000..283f43cf8
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/region.cpp
@@ -0,0 +1,1334 @@
+//	VirtualDub - Video processing and capture application
+//	Graphics support library
+//	Copyright (C) 1998-2007 Avery Lee
+//
+//	This program is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	This program is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with this program; if not, write to the Free Software
+//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/Kasumi/region.h>
+#include <vd2/system/math.h>
+#include <vd2/system/vdstl.h>
+
+void VDPixmapRegion::swap(VDPixmapRegion& x) {
+	mSpans.swap(x.mSpans);
+	std::swap(mBounds, x.mBounds);
+}
+
+VDPixmapPathRasterizer::VDPixmapPathRasterizer()
+	: mpEdgeBlocks(NULL)
+	, mpFreeEdgeBlocks(NULL)
+	, mEdgeBlockIdx(kEdgeBlockMax)
+	, mpScanBuffer(NULL)
+{
+	ClearScanBuffer();
+}
+
+VDPixmapPathRasterizer::VDPixmapPathRasterizer(const VDPixmapPathRasterizer&)
+	: mpEdgeBlocks(NULL)
+	, mpFreeEdgeBlocks(NULL)
+	, mEdgeBlockIdx(kEdgeBlockMax)
+	, mpScanBuffer(NULL)
+{
+	ClearScanBuffer();
+}
+
+VDPixmapPathRasterizer::~VDPixmapPathRasterizer() {
+	Clear();
+	FreeEdgeLists();
+}
+
+VDPixmapPathRasterizer& VDPixmapPathRasterizer::operator=(const VDPixmapPathRasterizer&) {
+	return *this;
+}
+
+void VDPixmapPathRasterizer::Clear() {
+	ClearEdgeList();
+	ClearScanBuffer();
+}
+
+void VDPixmapPathRasterizer::QuadraticBezier(const vdint2 *pts) {
+	int x0 = pts[0].x;
+	int x1 = pts[1].x;
+	int x2 = pts[2].x;
+	int y0 = pts[0].y;
+	int y1 = pts[1].y;
+	int y2 = pts[2].y;
+
+	// P = (1-t)^2*P0 + 2t(1-t)*P1 + t^2*P2
+	// P = (1-2t+t^2)P0 + 2(t-t^2)P1 + t^2*P2
+	// P = (P0-2P1+P2)t^2 + 2(P1-P0)t + P0
+
+	int cx2 =    x0-2*x1+x2;
+	int cx1 = -2*x0+2*x1;
+	int cx0 =    x0;
+
+	int cy2 =    y0-2*y1+y2;
+	int cy1 = -2*y0+2*y1;
+	int cy0 =    y0;
+
+	// This equation is from Graphics Gems I.
+	//
+	// The idea is that since we're approximating a cubic curve with lines,
+	// any error we incur is due to the curvature of the line, which we can
+	// estimate by calculating the maximum acceleration of the curve.  For
+	// a cubic, the acceleration (second derivative) is a line, meaning that
+	// the absolute maximum acceleration must occur at either the beginning
+	// (|c2|) or the end (|c2+c3|).  Our bounds here are a little more
+	// conservative than that, but that's okay.
+	//
+	// If the acceleration of the parametric formula is zero (c2 = c3 = 0),
+	// that component of the curve is linear and does not incur any error.
+	// If a=0 for both X and Y, the curve is a line segment and we can
+	// use a step size of 1.
+
+	int maxaccel1 = abs(cy2);
+	int maxaccel2 = abs(cx2);
+
+	int maxaccel = maxaccel1 > maxaccel2 ? maxaccel1 : maxaccel2;
+	int h = 1;
+
+	while(maxaccel > 8 && h < 1024) {
+		maxaccel >>= 2;
+		h += h;
+	}
+
+	int lastx = x0;
+	int lasty = y0;
+
+	// compute forward differences
+	sint64 h1 = (sint64)(0x40000000 / h) << 2;
+	sint64 h2 = h1/h;
+
+	sint64 ax0 = (sint64)cx0 << 32;
+	sint64 ax1 =   h1*(sint64)cx1 +   h2*(sint64)cx2;
+	sint64 ax2 = 2*h2*(sint64)cx2;
+
+	sint64 ay0 = (sint64)cy0 << 32;
+	sint64 ay1 =   h1*(sint64)cy1 +   h2*(sint64)cy2;
+	sint64 ay2 = 2*h2*(sint64)cy2;
+
+	// round, not truncate
+	ax0 += 0x80000000;
+	ay0 += 0x80000000;
+
+	do {
+		ax0 += ax1;
+		ax1 += ax2;
+		ay0 += ay1;
+		ay1 += ay2;
+
+		int xi = (int)((uint64)ax0 >> 32);
+		int yi = (int)((uint64)ay0 >> 32);
+
+		FastLine(lastx, lasty, xi, yi);
+		lastx = xi;
+		lasty = yi;
+	} while(--h);
+}
+
+void VDPixmapPathRasterizer::CubicBezier(const vdint2 *pts) {
+	int x0 = pts[0].x;
+	int x1 = pts[1].x;
+	int x2 = pts[2].x;
+	int x3 = pts[3].x;
+	int y0 = pts[0].y;
+	int y1 = pts[1].y;
+	int y2 = pts[2].y;
+	int y3 = pts[3].y;
+
+	int cx3 = -  x0+3*x1-3*x2+x3;
+	int cx2 =  3*x0-6*x1+3*x2;
+	int cx1 = -3*x0+3*x1;
+	int cx0 =    x0;
+
+	int cy3 = -  y0+3*y1-3*y2+y3;
+	int cy2 =  3*y0-6*y1+3*y2;
+	int cy1 = -3*y0+3*y1;
+	int cy0 =    y0;
+
+	// This equation is from Graphics Gems I.
+	//
+	// The idea is that since we're approximating a cubic curve with lines,
+	// any error we incur is due to the curvature of the line, which we can
+	// estimate by calculating the maximum acceleration of the curve.  For
+	// a cubic, the acceleration (second derivative) is a line, meaning that
+	// the absolute maximum acceleration must occur at either the beginning
+	// (|c2|) or the end (|c2+c3|).  Our bounds here are a little more
+	// conservative than that, but that's okay.
+	//
+	// If the acceleration of the parametric formula is zero (c2 = c3 = 0),
+	// that component of the curve is linear and does not incur any error.
+	// If a=0 for both X and Y, the curve is a line segment and we can
+	// use a step size of 1.
+
+	int maxaccel1 = abs(2*cy2) + abs(6*cy3);
+	int maxaccel2 = abs(2*cx2) + abs(6*cx3);
+
+	int maxaccel = maxaccel1 > maxaccel2 ? maxaccel1 : maxaccel2;
+	int h = 1;
+
+	while(maxaccel > 8 && h < 1024) {
+		maxaccel >>= 2;
+		h += h;
+	}
+
+	int lastx = x0;
+	int lasty = y0;
+
+	// compute forward differences
+	sint64 h1 = (sint64)(0x40000000 / h) << 2;
+	sint64 h2 = h1/h;
+	sint64 h3 = h2/h;
+
+	sint64 ax0 = (sint64)cx0 << 32;
+	sint64 ax1 =   h1*(sint64)cx1 +   h2*(sint64)cx2 + h3*(sint64)cx3;
+	sint64 ax2 = 2*h2*(sint64)cx2 + 6*h3*(sint64)cx3;
+	sint64 ax3 = 6*h3*(sint64)cx3;
+
+	sint64 ay0 = (sint64)cy0 << 32;
+	sint64 ay1 =   h1*(sint64)cy1 +   h2*(sint64)cy2 + h3*(sint64)cy3;
+	sint64 ay2 = 2*h2*(sint64)cy2 + 6*h3*(sint64)cy3;
+	sint64 ay3 = 6*h3*(sint64)cy3;
+
+	// round, not truncate
+	ax0 += 0x80000000;
+	ay0 += 0x80000000;
+
+	do {
+		ax0 += ax1;
+		ax1 += ax2;
+		ax2 += ax3;
+		ay0 += ay1;
+		ay1 += ay2;
+		ay2 += ay3;
+
+		int xi = (int)((uint64)ax0 >> 32);
+		int yi = (int)((uint64)ay0 >> 32);
+
+		FastLine(lastx, lasty, xi, yi);
+		lastx = xi;
+		lasty = yi;
+	} while(--h);
+}
+
+void VDPixmapPathRasterizer::Line(const vdint2& pt1, const vdint2& pt2) {
+	FastLine(pt1.x, pt1.y, pt2.x, pt2.y);
+}
+
+void VDPixmapPathRasterizer::FastLine(int x0, int y0, int x1, int y1) {
+	int flag = 1;
+
+	if (y1 == y0)
+		return;
+
+	if (y1 < y0) {
+		int t;
+
+		t=x0; x0=x1; x1=t;
+		t=y0; y0=y1; y1=t;
+		flag = 0;
+	}
+
+	int dy = y1-y0;
+	int xacc = x0<<13;
+
+	// prestep y0 down
+	int iy0 = (y0+3) >> 3;
+	int iy1 = (y1+3) >> 3;
+
+	if (iy0 < iy1) {
+		int invslope = (x1-x0)*65536/dy;
+
+		int prestep = (4-y0) & 7;
+		xacc += (invslope * prestep)>>3;
+
+		if (iy0 < mScanYMin || iy1 > mScanYMax) {
+			ReallocateScanBuffer(iy0, iy1);
+			VDASSERT(iy0 >= mScanYMin && iy1 <= mScanYMax);
+		}
+
+		while(iy0 < iy1) {
+			int ix = (xacc+32767)>>16;
+
+			if (mEdgeBlockIdx >= kEdgeBlockMax) {
+				if (mpFreeEdgeBlocks) {
+					EdgeBlock *newBlock = mpFreeEdgeBlocks;
+					mpFreeEdgeBlocks = mpFreeEdgeBlocks->next;
+					newBlock->next = mpEdgeBlocks;
+					mpEdgeBlocks = newBlock;
+				} else {
+					mpEdgeBlocks = new EdgeBlock(mpEdgeBlocks);
+				}
+
+				mEdgeBlockIdx = 0;
+			}
+
+			Edge& e = mpEdgeBlocks->edges[mEdgeBlockIdx];
+			Scan& s = mpScanBufferBiased[iy0];
+			VDASSERT(iy0 >= mScanYMin && iy0 < mScanYMax);
+			++mEdgeBlockIdx;
+
+			e.posandflag = ix*2+flag;
+			e.next = s.chain;
+			s.chain = &e;
+			++s.count;
+
+			++iy0;
+			xacc += invslope;
+		}
+	}
+}
+
+void VDPixmapPathRasterizer::ScanConvert(VDPixmapRegion& region) {
+	// Convert the edges to spans.  We couldn't do this before because some of
+	// the regions may have winding numbers >+1 and it would have been a pain
+	// to try to adjust the spans on the fly.  We use one heap to detangle
+	// a scanline's worth of edges from the singly-linked lists, and another
+	// to collect the actual scans.
+	vdfastvector<int> heap;
+
+	region.mSpans.clear();
+	int xmin = INT_MAX;
+	int xmax = INT_MIN;
+	int ymin = INT_MAX;
+	int ymax = INT_MIN;
+
+	for(int y=mScanYMin; y<mScanYMax; ++y) {
+		uint32 flipcount = mpScanBufferBiased[y].count;
+
+		if (!flipcount)
+			continue;
+
+		// Keep the edge heap from doing lots of stupid little reallocates.
+		if (heap.capacity() < flipcount)
+			heap.resize((flipcount + 63)&~63);
+
+		// Detangle scanline into edge heap.
+		int *heap0 = heap.data();
+		int *heap1 = heap0;
+		for(const Edge *ptr = mpScanBufferBiased[y].chain; ptr; ptr = ptr->next)
+			*heap1++ = ptr->posandflag;
+
+		VDASSERT(heap1 - heap0 == flipcount);
+
+		// Sort edge heap.  Note that we conveniently made the opening edges
+		// one more than closing edges at the same spot, so we won't have any
+		// problems with abutting spans.
+
+		std::sort(heap0, heap1);
+
+#if 0
+		while(heap0 != heap1) {
+			int x = *heap0++ >> 1;
+			region.mSpans.push_back((y<<16) + x + 0x80008000);
+			region.mSpans.push_back((y<<16) + x + 0x80008001);
+		}
+		continue;
+#endif
+
+		// Trim any odd edges off, since we can never close on one.
+		if (flipcount & 1)
+			--heap1;
+
+		// Process edges and add spans.  Since we only check for a non-zero
+		// winding number, it doesn't matter which way the outlines go. Also, since
+		// the parity always flips after each edge regardless of direction, we can
+		// process the edges in pairs.
+
+		size_t spanstart = region.mSpans.size();
+
+		int x_left;
+		int count = 0;
+		while(heap0 != heap1) {
+			int x = *heap0++;
+
+			if (!count)
+				x_left = (x>>1);
+
+			count += (x&1);
+
+			x = *heap0++;
+
+			count += (x&1);
+
+			if (!--count) {
+				int x_right = (x>>1);
+
+				if (x_right > x_left) {
+					region.mSpans.push_back((y<<16) + x_left  + 0x80008000);
+					region.mSpans.push_back((y<<16) + x_right + 0x80008000);
+
+				}
+			}
+		}
+
+		size_t spanend = region.mSpans.size();
+
+		if (spanend > spanstart) {
+			if (ymin > y)
+				ymin = y;
+
+			if (ymax < y)
+				ymax = y;
+
+			int x1 = (region.mSpans[spanstart] & 0xffff) - 0x8000;
+			int x2 = (region.mSpans[spanend-1] & 0xffff) - 0x8000;
+
+			if (xmin > x1)
+				xmin = x1;
+
+			if (xmax < x2)
+				xmax = x2;
+		}
+	}
+
+	if (xmax > xmin) {
+		region.mBounds.set(xmin, ymin, xmax, ymax);
+	} else {
+		region.mBounds.set(0, 0, 0, 0);
+	}
+
+	// Dump the edge and scan buffers, since we no longer need them.
+	ClearEdgeList();
+	ClearScanBuffer();
+}
+
+void VDPixmapPathRasterizer::ClearEdgeList() {
+	if (mpEdgeBlocks) {
+		EdgeBlock *block = mpEdgeBlocks;
+		
+		while(EdgeBlock *next = block->next)
+			block = next;
+
+		block->next = mpFreeEdgeBlocks;
+		mpFreeEdgeBlocks = mpEdgeBlocks;
+		mpEdgeBlocks = NULL;
+	}
+
+	mEdgeBlockIdx = kEdgeBlockMax;
+}
+
+void VDPixmapPathRasterizer::FreeEdgeLists() {
+	ClearEdgeList();
+
+	while(EdgeBlock *block = mpFreeEdgeBlocks) {
+		mpFreeEdgeBlocks = block->next;
+
+		delete block;
+	}
+}
+
+void VDPixmapPathRasterizer::ClearScanBuffer() {
+	delete[] mpScanBuffer;
+	mpScanBuffer = mpScanBufferBiased = NULL;
+	mScanYMin = 0;
+	mScanYMax = 0;
+}
+
+void VDPixmapPathRasterizer::ReallocateScanBuffer(int ymin, int ymax) {
+	// 
+	// check if there actually is a scan buffer to avoid unintentionally pinning at zero
+	if (mpScanBuffer) {
+		int nicedelta = (mScanYMax - mScanYMin);
+
+		if (ymin < mScanYMin) {
+			int yminnice = mScanYMin - nicedelta;
+			if (ymin > yminnice)
+				ymin = yminnice;
+
+			ymin &= ~31;
+		} else
+			ymin = mScanYMin;
+
+		if (ymax > mScanYMax) {
+			int ymaxnice = mScanYMax + nicedelta;
+			if (ymax < ymaxnice)
+				ymax = ymaxnice;
+
+			ymax = (ymax + 31) & ~31;
+		} else
+			ymax = mScanYMax;
+
+		VDASSERT(ymin <= mScanYMin && ymax >= mScanYMax);
+	}
+
+	// reallocate scan buffer
+	Scan *pNewBuffer = new Scan[ymax - ymin];
+	Scan *pNewBufferBiased = pNewBuffer - ymin;
+
+	if (mpScanBuffer) {
+		memcpy(pNewBufferBiased + mScanYMin, mpScanBufferBiased + mScanYMin, (mScanYMax - mScanYMin) * sizeof(Scan));
+		delete[] mpScanBuffer;
+
+		// zero new areas of scan buffer
+		for(int y=ymin; y<mScanYMin; ++y) {
+			pNewBufferBiased[y].chain = NULL;
+			pNewBufferBiased[y].count = 0;
+		}
+
+		for(int y=mScanYMax; y<ymax; ++y) {
+			pNewBufferBiased[y].chain = NULL;
+			pNewBufferBiased[y].count = 0;
+		}
+	} else {
+		for(int y=ymin; y<ymax; ++y) {
+			pNewBufferBiased[y].chain = NULL;
+			pNewBufferBiased[y].count = 0;
+		}
+	}
+
+	mpScanBuffer = pNewBuffer;
+	mpScanBufferBiased = pNewBufferBiased;
+	mScanYMin = ymin;
+	mScanYMax = ymax;
+}
+
+bool VDPixmapFillRegion(const VDPixmap& dst, const VDPixmapRegion& region, int x, int y, uint32 color) {
+	if (dst.format != nsVDPixmap::kPixFormat_XRGB8888)
+		return false;
+
+	// fast out
+	if (region.mSpans.empty())
+		return true;
+
+	// check if vertical clipping is required
+	const size_t n = region.mSpans.size();
+	uint32 start = 0;
+	uint32 end = n;
+
+	uint32 spanmin = (-x) + ((-y) << 16) + 0x80008000;
+
+	if (region.mSpans.front() < spanmin) {
+		uint32 lo = 0, hi = n;
+
+		// compute top clip
+		while(lo < hi) {
+			int mid = ((lo + hi) >> 1) & ~1;
+
+			if (region.mSpans[mid + 1] < spanmin)
+				lo = mid + 2;
+			else
+				hi = mid;
+		}
+
+		start = lo;
+
+		// check for total top clip
+		if (start >= n)
+			return true;
+	}
+
+	uint32 spanlimit = (dst.w - x) + ((dst.h - y - 1) << 16) + 0x80008000;
+
+	if (region.mSpans.back() > spanlimit) {
+		// compute bottom clip
+		int lo = start;
+		int hi = n;
+
+		while(lo < hi) {
+			int mid = ((lo + hi) >> 1) & ~1;
+
+			if (region.mSpans[mid] >= spanlimit)
+				hi = mid;
+			else
+				lo = mid+2;
+		}
+
+		end = lo;
+
+		// check for total bottom clip
+		if (start >= end)
+			return true;
+	}
+
+	// fill region
+	const uint32 *pSpan = &region.mSpans[start];
+	const uint32 *pEnd  = &region.mSpans[0] + end;
+	int lasty = -1;
+	uint32 *dstp;
+
+	for(; pSpan != pEnd; pSpan += 2) {
+		uint32 span0 = pSpan[0];
+		uint32 span1 = pSpan[1];
+
+		uint32 py = (span0 >> 16) - 0x8000 + y;
+		uint32 px = (span0 & 0xffff) - 0x8000 + x;
+		uint32 w = span1-span0;
+
+		VDASSERT(py < (uint32)dst.h);
+		VDASSERT(px < (uint32)dst.w);
+		VDASSERT(dst.w - (int)px >= (int)w);
+
+		if (lasty != py)
+			dstp = (uint32 *)vdptroffset(dst.data, dst.pitch * py);
+
+		uint32 *p = dstp + px;
+		do {
+			*p++ = color;
+		} while(--w);
+	}
+
+	return true;
+}
+
+namespace {
+	void RenderABuffer32(const VDPixmap& dst, int y, const uint8 *alpha, uint32 w, uint32 color) {
+		if (!w)
+			return;
+
+		// update dest pointer
+		uint32 *dstp = (uint32 *)vdptroffset(dst.data, dst.pitch * y);
+
+		const uint32 color_rb = color & 0x00FF00FF;
+		const uint32 color_g  = color & 0x0000FF00;
+		do {
+			const uint32 px = *dstp;
+			const uint32 px_rb = px & 0x00FF00FF;
+			const uint32 px_g  = px & 0x0000FF00;
+			const sint32 a     = *alpha++;
+
+			const uint32 result_rb = (((px_rb << 6) + ((sint32)(color_rb - px_rb)*a + 0x00200020)) & 0x3FC03FC0);
+			const uint32 result_g  = (((px_g  << 6) + ((sint32)(color_g  - px_g )*a + 0x00002000)) & 0x003FC000);
+
+			*dstp++ = (result_rb + result_g) >> 6;
+		} while(--w);
+	}
+
+	void RenderABuffer8(const VDPixmap& dst, int y, const uint8 *alpha, uint32 w, uint32 color) {
+		if (!w)
+			return;
+
+		// update dest pointer
+		uint8 *dstp = (uint8 *)vdptroffset(dst.data, dst.pitch * y);
+
+		do {
+			const uint8 px = *dstp;
+			const sint8 a = *alpha++;
+
+			*dstp++ = px + (((sint32)(color - px) * a + 32) >> 6);
+		} while(--w);
+	}
+
+	void RenderABuffer8_128(const VDPixmap& dst, int y, const uint8 *alpha, uint32 w, uint32 color) {
+		if (!w)
+			return;
+
+		// update dest pointer
+		uint8 *dstp = (uint8 *)vdptroffset(dst.data, dst.pitch * y);
+
+		do {
+			const uint8 px = *dstp;
+			const sint16 a = *alpha++;
+
+			*dstp++ = px + (((sint32)(color - px) * a + 64) >> 7);
+		} while(--w);
+	}
+
+	void RenderABuffer8_256(const VDPixmap& dst, int y, const uint16 *alpha, uint32 w, uint32 color) {
+		if (!w)
+			return;
+
+		// update dest pointer
+		uint8 *dstp = (uint8 *)vdptroffset(dst.data, dst.pitch * y);
+
+		do {
+			const uint8 px = *dstp;
+			const sint32 a = *alpha++;
+
+			*dstp++ = px + (((sint32)(color - px) * a + 128) >> 8);
+		} while(--w);
+	}
+
+	void RenderABuffer8_1024(const VDPixmap& dst, int y, const uint16 *alpha, uint32 w, uint32 color) {
+		if (!w)
+			return;
+
+		// update dest pointer
+		uint8 *dstp = (uint8 *)vdptroffset(dst.data, dst.pitch * y);
+
+		do {
+			const uint8 px = *dstp;
+			const sint32 a = *alpha++;
+
+			*dstp++ = px + (((sint32)(color - px) * a + 512) >> 10);
+		} while(--w);
+	}
+}
+
+bool VDPixmapFillRegionAntialiased_32x_32x(const VDPixmap& dst, const VDPixmapRegion& region, int x, int y, uint32 color) {
+	if (dst.format != nsVDPixmap::kPixFormat_Y8)
+		return false;
+
+	// fast out
+	if (region.mSpans.empty())
+		return true;
+
+	// check if vertical clipping is required
+	const size_t n = region.mSpans.size();
+	uint32 start = 0;
+	uint32 end = n;
+
+	uint32 spanmin = -x + (-y << 16) + 0x80008000;
+
+	if (region.mSpans.front() < spanmin) {
+		// find first span : x2 > spanmin
+		start = std::upper_bound(region.mSpans.begin(), region.mSpans.end(), spanmin) - region.mSpans.begin();
+		start &= ~1;
+
+		// check for total top clip
+		if (start >= n)
+			return true;
+	}
+
+	uint32 spanlimit = (dst.w*32 - x) + (((dst.h*32 - y) - 1) << 16) + 0x80008000;
+
+	if (region.mSpans.back() > spanlimit) {
+		// find last span : x1 < spanlimit
+		end = std::lower_bound(region.mSpans.begin(), region.mSpans.end(), spanlimit) - region.mSpans.begin();
+
+		end = (end + 1) & ~1;
+
+		// check for total bottom clip
+		if (start >= end)
+			return true;
+	}
+
+	// allocate A-buffer
+	vdfastvector<uint16> abuffer(dst.w, 0);
+
+	// fill region
+	const uint32 *pSpan = &region.mSpans[start];
+	const uint32 *pEnd  = &region.mSpans[0] + end;
+	int lasty = -1;
+
+	sint32 dstw32 = dst.w * 32;
+	sint32 dsth32 = dst.h * 32;
+
+	for(; pSpan != pEnd; pSpan += 2) {
+		uint32 span0 = pSpan[0];
+		uint32 span1 = pSpan[1];
+
+		sint32 py = (span0 >> 16) - 0x8000 + y;
+
+		if ((uint32)py >= (uint32)dsth32)
+			continue;
+
+		sint32 px1 = (span0 & 0xffff) - 0x8000 + x;
+		sint32 px2 = (span1 & 0xffff) - 0x8000 + x;
+		sint32 w = span1-span0;
+
+		if (lasty != py) {
+			if (((lasty ^ py) & 0xFFFFFFE0)) {
+				if (lasty >= 0) {
+					// flush scanline
+
+					RenderABuffer8_1024(dst, lasty >> 5, abuffer.data(), dst.w, color);
+				}
+
+				memset(abuffer.data(), 0, abuffer.size() * sizeof(abuffer[0]));
+			}
+			lasty = py;
+		}
+
+		if (px1 < 0)
+			px1 = 0;
+		if (px2 > dstw32)
+			px2 = dstw32;
+
+		if (px1 >= px2)
+			continue;
+
+		uint32 ix1 = px1 >> 5;
+		uint32 ix2 = px2 >> 5;
+		uint16 *p1 = abuffer.data() + ix1;
+		uint16 *p2 = abuffer.data() + ix2;
+
+		if (p1 == p2) {
+			p1[0] += (px2 - px1);
+		} else {
+			if (px1 & 31) {
+				p1[0] += 32 - (px1 & 31);
+				++p1;
+			}
+
+			while(p1 != p2) {
+				p1[0] += 32;
+				++p1;
+			}
+
+			if (px2 & 31)
+				p1[0] += px2 & 32;
+		}
+	}
+
+	if (lasty >= 0)
+		RenderABuffer8_1024(dst, lasty >> 5, abuffer.data(), dst.w, color);
+
+	return true;
+}
+
+bool VDPixmapFillRegionAntialiased_16x_16x(const VDPixmap& dst, const VDPixmapRegion& region, int x, int y, uint32 color) {
+	if (dst.format != nsVDPixmap::kPixFormat_Y8)
+		return false;
+
+	// fast out
+	if (region.mSpans.empty())
+		return true;
+
+	// check if vertical clipping is required
+	const size_t n = region.mSpans.size();
+	uint32 start = 0;
+	uint32 end = n;
+
+	uint32 spanmin = -x + (-y << 16) + 0x80008000;
+
+	if (region.mSpans.front() < spanmin) {
+		// find first span : x2 > spanmin
+		start = std::upper_bound(region.mSpans.begin(), region.mSpans.end(), spanmin) - region.mSpans.begin();
+		start &= ~1;
+
+		// check for total top clip
+		if (start >= n)
+			return true;
+	}
+
+	uint32 spanlimit = (dst.w*16 - x) + (((dst.h*16 - y) - 1) << 16) + 0x80008000;
+
+	if (region.mSpans.back() > spanlimit) {
+		// find last span : x1 < spanlimit
+		end = std::lower_bound(region.mSpans.begin(), region.mSpans.end(), spanlimit) - region.mSpans.begin();
+
+		end = (end + 1) & ~1;
+
+		// check for total bottom clip
+		if (start >= end)
+			return true;
+	}
+
+	// allocate A-buffer
+	vdfastvector<uint16> abuffer(dst.w, 0);
+
+	// fill region
+	const uint32 *pSpan = &region.mSpans[start];
+	const uint32 *pEnd  = &region.mSpans[0] + end;
+	int lasty = -1;
+
+	sint32 dstw16 = dst.w * 16;
+	sint32 dsth16 = dst.h * 16;
+
+	for(; pSpan != pEnd; pSpan += 2) {
+		uint32 span0 = pSpan[0];
+		uint32 span1 = pSpan[1];
+
+		sint32 py = (span0 >> 16) - 0x8000 + y;
+
+		if ((uint32)py >= (uint32)dsth16)
+			continue;
+
+		sint32 px1 = (span0 & 0xffff) - 0x8000 + x;
+		sint32 px2 = (span1 & 0xffff) - 0x8000 + x;
+		sint32 w = span1-span0;
+
+		if (lasty != py) {
+			if (((lasty ^ py) & 0xFFFFFFF0)) {
+				if (lasty >= 0) {
+					// flush scanline
+
+					RenderABuffer8_256(dst, lasty >> 4, abuffer.data(), dst.w, color);
+				}
+
+				memset(abuffer.data(), 0, abuffer.size() * sizeof(abuffer[0]));
+			}
+			lasty = py;
+		}
+
+		if (px1 < 0)
+			px1 = 0;
+		if (px2 > dstw16)
+			px2 = dstw16;
+
+		if (px1 >= px2)
+			continue;
+
+		uint32 ix1 = px1 >> 4;
+		uint32 ix2 = px2 >> 4;
+		uint16 *p1 = abuffer.data() + ix1;
+		uint16 *p2 = abuffer.data() + ix2;
+
+		if (p1 == p2) {
+			p1[0] += (px2 - px1);
+		} else {
+			if (px1 & 15) {
+				p1[0] += 16 - (px1 & 15);
+				++p1;
+			}
+
+			while(p1 != p2) {
+				p1[0] += 16;
+				++p1;
+			}
+
+			if (px2 & 15)
+				p1[0] += px2 & 15;
+		}
+	}
+
+	if (lasty >= 0)
+		RenderABuffer8_256(dst, lasty >> 4, abuffer.data(), dst.w, color);
+
+	return true;
+}
+
+bool VDPixmapFillRegionAntialiased_16x_8x(const VDPixmap& dst, const VDPixmapRegion& region, int x, int y, uint32 color) {
+	if (dst.format != nsVDPixmap::kPixFormat_XRGB8888 && dst.format != nsVDPixmap::kPixFormat_Y8)
+		return false;
+
+	// fast out
+	if (region.mSpans.empty())
+		return true;
+
+	// check if vertical clipping is required
+	const size_t n = region.mSpans.size();
+	uint32 start = 0;
+	uint32 end = n;
+
+	uint32 spanmin = -x + (-y << 16) + 0x80008000;
+
+	if (region.mSpans.front() < spanmin) {
+		// find first span : x2 > spanmin
+		start = std::upper_bound(region.mSpans.begin(), region.mSpans.end(), spanmin) - region.mSpans.begin();
+		start &= ~1;
+
+		// check for total top clip
+		if (start >= n)
+			return true;
+	}
+
+	uint32 spanlimit = (dst.w*16 - x) + (((dst.h*8 - y) - 1) << 16) + 0x80008000;
+
+	if (region.mSpans.back() > spanlimit) {
+		// find last span : x1 < spanlimit
+		end = std::lower_bound(region.mSpans.begin(), region.mSpans.end(), spanlimit) - region.mSpans.begin();
+
+		end = (end + 1) & ~1;
+
+		// check for total bottom clip
+		if (start >= end)
+			return true;
+	}
+
+	// allocate A-buffer
+	vdfastvector<uint8> abuffer(dst.w, 0);
+
+	// fill region
+	const uint32 *pSpan = &region.mSpans[start];
+	const uint32 *pEnd  = &region.mSpans[0] + end;
+	int lasty = -1;
+
+	sint32 dstw16 = dst.w * 16;
+	sint32 dsth8 = dst.h * 8;
+
+	for(; pSpan != pEnd; pSpan += 2) {
+		uint32 span0 = pSpan[0];
+		uint32 span1 = pSpan[1];
+
+		sint32 py = (span0 >> 16) - 0x8000 + y;
+
+		if ((uint32)py >= (uint32)dsth8)
+			continue;
+
+		sint32 px1 = (span0 & 0xffff) - 0x8000 + x;
+		sint32 px2 = (span1 & 0xffff) - 0x8000 + x;
+		sint32 w = span1-span0;
+
+		if (lasty != py) {
+			if (((lasty ^ py) & 0xFFFFFFF8)) {
+				if (lasty >= 0) {
+					// flush scanline
+
+					RenderABuffer8_128(dst, lasty >> 3, abuffer.data(), dst.w, color);
+				}
+
+				memset(abuffer.data(), 0, abuffer.size());
+			}
+			lasty = py;
+		}
+
+		if (px1 < 0)
+			px1 = 0;
+		if (px2 > dstw16)
+			px2 = dstw16;
+
+		if (px1 >= px2)
+			continue;
+
+		uint32 ix1 = px1 >> 4;
+		uint32 ix2 = px2 >> 4;
+		uint8 *p1 = abuffer.data() + ix1;
+		uint8 *p2 = abuffer.data() + ix2;
+
+		if (p1 == p2) {
+			p1[0] += (px2 - px1);
+		} else {
+			if (px1 & 15) {
+				p1[0] += 16 - (px1 & 15);
+				++p1;
+			}
+
+			while(p1 != p2) {
+				p1[0] += 16;
+				++p1;
+			}
+
+			if (px2 & 15)
+				p1[0] += px2 & 15;
+		}
+	}
+
+	if (lasty >= 0)
+		RenderABuffer8_128(dst, lasty >> 3, abuffer.data(), dst.w, color);
+
+	return true;
+}
+
+bool VDPixmapFillRegionAntialiased8x(const VDPixmap& dst, const VDPixmapRegion& region, int x, int y, uint32 color) {
+	if (dst.format == nsVDPixmap::kPixFormat_YUV444_Planar ||
+		dst.format == nsVDPixmap::kPixFormat_YUV422_Planar ||
+		dst.format == nsVDPixmap::kPixFormat_YUV420_Planar ||
+		dst.format == nsVDPixmap::kPixFormat_YUV410_Planar) {
+		VDPixmap pxY;
+		VDPixmap pxCb;
+		VDPixmap pxCr;
+
+		pxY.format = nsVDPixmap::kPixFormat_Y8;
+		pxY.data = dst.data;
+		pxY.pitch = dst.pitch;
+		pxY.w = dst.w;
+		pxY.h = dst.h;
+
+		pxCb.format = nsVDPixmap::kPixFormat_Y8;
+		pxCb.data = dst.data2;
+		pxCb.pitch = dst.pitch2;
+		pxCb.w = dst.w;
+		pxCb.h = dst.h;
+
+		pxCr.format = nsVDPixmap::kPixFormat_Y8;
+		pxCr.data = dst.data3;
+		pxCr.pitch = dst.pitch3;
+		pxCr.w = dst.w;
+		pxCr.h = dst.h;
+
+		uint32 colorY = (color >> 8) & 0xff;
+		uint32 colorCb = (color >> 0) & 0xff;
+		uint32 colorCr = (color >> 16) & 0xff;
+
+		VDPixmapFillRegionAntialiased8x(pxY, region, x, y, colorY);
+
+		switch(dst.format) {
+		case nsVDPixmap::kPixFormat_YUV410_Planar:
+			pxCr.w = pxCb.w = dst.w >> 2;
+			pxCr.h = pxCb.h = dst.h >> 2;
+			x >>= 2;
+			y >>= 2;
+			VDPixmapFillRegionAntialiased_32x_32x(pxCb, region, x, y, colorCb);
+			VDPixmapFillRegionAntialiased_32x_32x(pxCr, region, x, y, colorCr);
+			return true;
+		case nsVDPixmap::kPixFormat_YUV420_Planar:
+			pxCr.w = pxCb.w = dst.w >> 1;
+			pxCr.h = pxCb.h = dst.h >> 1;
+			x >>= 1;
+			y >>= 1;
+			x += 2;
+			VDPixmapFillRegionAntialiased_16x_16x(pxCb, region, x, y, colorCb);
+			VDPixmapFillRegionAntialiased_16x_16x(pxCr, region, x, y, colorCr);
+			return true;
+		case nsVDPixmap::kPixFormat_YUV422_Planar:
+			pxCr.w = pxCb.w = dst.w >> 1;
+			x >>= 1;
+			x += 2;
+			VDPixmapFillRegionAntialiased_16x_8x(pxCb, region, x, y, colorCb);
+			VDPixmapFillRegionAntialiased_16x_8x(pxCr, region, x, y, colorCr);
+			return true;
+		case nsVDPixmap::kPixFormat_YUV444_Planar:
+			VDPixmapFillRegionAntialiased8x(pxCb, region, x, y, colorCb);
+			VDPixmapFillRegionAntialiased8x(pxCr, region, x, y, colorCr);
+			return true;
+		}
+	}
+
+	if (dst.format != nsVDPixmap::kPixFormat_XRGB8888 && dst.format != nsVDPixmap::kPixFormat_Y8)
+		return false;
+
+	// fast out
+	if (region.mSpans.empty())
+		return true;
+
+	// check if vertical clipping is required
+	const size_t n = region.mSpans.size();
+	uint32 start = 0;
+	uint32 end = n;
+
+	uint32 spanmin = -x + (-y << 16) + 0x80008000;
+
+	if (region.mSpans.front() < spanmin) {
+		// find first span : x2 > spanmin
+		start = std::upper_bound(region.mSpans.begin(), region.mSpans.end(), spanmin) - region.mSpans.begin();
+		start &= ~1;
+
+		// check for total top clip
+		if (start >= n)
+			return true;
+	}
+
+	uint32 spanlimit = (dst.w*8 - x) + (((dst.h*8 - y) - 1) << 16) + 0x80008000;
+
+	if (region.mSpans.back() > spanlimit) {
+		// find last span : x1 < spanlimit
+		end = std::lower_bound(region.mSpans.begin(), region.mSpans.end(), spanlimit) - region.mSpans.begin();
+
+		end = (end + 1) & ~1;
+
+		// check for total bottom clip
+		if (start >= end)
+			return true;
+	}
+
+	// allocate A-buffer
+	vdfastvector<uint8> abuffer(dst.w, 0);
+
+	// fill region
+	const uint32 *pSpan = &region.mSpans[start];
+	const uint32 *pEnd  = &region.mSpans[0] + end;
+	int lasty = -1;
+
+	sint32 dstw8 = dst.w * 8;
+	sint32 dsth8 = dst.h * 8;
+
+	for(; pSpan != pEnd; pSpan += 2) {
+		uint32 span0 = pSpan[0];
+		uint32 span1 = pSpan[1];
+
+		sint32 py = (span0 >> 16) - 0x8000 + y;
+
+		if ((uint32)py >= (uint32)dsth8)
+			continue;
+
+		sint32 px1 = (span0 & 0xffff) - 0x8000 + x;
+		sint32 px2 = (span1 & 0xffff) - 0x8000 + x;
+		sint32 w = span1-span0;
+
+		if (lasty != py) {
+			if (((lasty ^ py) & 0xFFFFFFF8)) {
+				if (lasty >= 0) {
+					// flush scanline
+
+					if (dst.format == nsVDPixmap::kPixFormat_XRGB8888)
+						RenderABuffer32(dst, lasty >> 3, abuffer.data(), dst.w, color);
+					else
+						RenderABuffer8(dst, lasty >> 3, abuffer.data(), dst.w, color);
+				}
+
+				memset(abuffer.data(), 0, abuffer.size());
+			}
+			lasty = py;
+		}
+
+		if (px1 < 0)
+			px1 = 0;
+		if (px2 > dstw8)
+			px2 = dstw8;
+
+		if (px1 >= px2)
+			continue;
+
+		uint32 ix1 = px1 >> 3;
+		uint32 ix2 = px2 >> 3;
+		uint8 *p1 = abuffer.data() + ix1;
+		uint8 *p2 = abuffer.data() + ix2;
+
+		if (p1 == p2) {
+			p1[0] += (px2 - px1);
+		} else {
+			if (px1 & 7) {
+				p1[0] += 8 - (px1 & 7);
+				++p1;
+			}
+
+			while(p1 != p2) {
+				p1[0] += 8;
+				++p1;
+			}
+
+			if (px2 & 7)
+				p1[0] += px2 & 7;
+		}
+	}
+
+	if (lasty >= 0) {
+		if (dst.format == nsVDPixmap::kPixFormat_XRGB8888)
+			RenderABuffer32(dst, lasty >> 3, abuffer.data(), dst.w, color);
+		else
+			RenderABuffer8(dst, lasty >> 3, abuffer.data(), dst.w, color);
+	}
+
+	return true;
+}
+
+void VDPixmapCreateRoundRegion(VDPixmapRegion& dst, float r) {
+	int ir = VDCeilToInt(r);
+	float r2 = r*r;
+
+	dst.mSpans.clear();
+	dst.mBounds.set(-ir, 0, ir+1, 0);
+
+	for(int y = -ir; y <= ir; ++y) {
+		int dx = VDCeilToInt(sqrtf(r2 - y*y));
+
+		if (dx > 0) {
+			dst.mSpans.push_back(0x80008000 + (y << 16) - dx);
+			dst.mSpans.push_back(0x80008001 + (y << 16) + dx);
+			if (dst.mBounds.top > y)
+				dst.mBounds.top = y;
+			if (dst.mBounds.bottom < y)
+				dst.mBounds.bottom = y;
+		}
+	}
+}
+
+void VDPixmapConvolveRegion(VDPixmapRegion& dst, const VDPixmapRegion& r1, const VDPixmapRegion& r2, int dx1, int dx2, int dy) {
+	dst.mSpans.clear();
+	dst.mSpans.resize(r1.mSpans.size()+r2.mSpans.size());
+
+	const uint32 *itA	= r1.mSpans.data();
+	const uint32 *itAE	= itA + r1.mSpans.size();
+	const uint32 *itB	= r2.mSpans.data();
+	const uint32 *itBE	= itB + r2.mSpans.size();
+	uint32 *dstp0 = dst.mSpans.data();
+	uint32 *dstp = dst.mSpans.data();
+
+	uint32 offset1 = (dy<<16) + dx1;
+	uint32 offset2 = (dy<<16) + dx2;
+
+	while(itA != itAE && itB != itBE) {
+		uint32 x1;
+		uint32 x2;
+
+		if (itB[0] + offset1 < itA[0]) {
+			// B span is earlier.  Use it.
+			x1 = itB[0] + offset1;
+			x2 = itB[1] + offset2;
+			itB += 2;
+
+			// B spans *can* overlap, due to the widening.
+			while(itB != itBE && itB[0]+offset1 <= x2) {
+				uint32 bx2 = itB[1] + offset2;
+				if (x2 < bx2)
+					x2 = bx2;
+
+				itB += 2;
+			}
+
+			goto a_start;
+		} else {
+			// A span is earlier.  Use it.
+			x1 = itA[0];
+			x2 = itA[1];
+			itA += 2;
+
+			// A spans don't overlap, so begin merge loop with B first.
+		}
+
+		for(;;) {
+			// If we run out of B spans or the B span doesn't overlap,
+			// then the next A span can't either (because A spans don't
+			// overlap) and we exit.
+
+			if (itB == itBE || itB[0]+offset1 > x2)
+				break;
+
+			do {
+				uint32 bx2 = itB[1] + offset2;
+				if (x2 < bx2)
+					x2 = bx2;
+
+				itB += 2;
+			} while(itB != itBE && itB[0]+offset1 <= x2);
+
+			// If we run out of A spans or the A span doesn't overlap,
+			// then the next B span can't either, because we would have
+			// consumed all overlapping B spans in the above loop.
+a_start:
+			if (itA == itAE || itA[0] > x2)
+				break;
+
+			do {
+				uint32 ax2 = itA[1];
+				if (x2 < ax2)
+					x2 = ax2;
+
+				itA += 2;
+			} while(itA != itAE && itA[0] <= x2);
+		}
+
+		// Flush span.
+		dstp[0] = x1;
+		dstp[1] = x2;
+		dstp += 2;
+	}
+
+	// Copy over leftover spans.
+	memcpy(dstp, itA, sizeof(uint32)*(itAE - itA));
+	dstp += itAE - itA;
+
+	while(itB != itBE) {
+		// B span is earlier.  Use it.
+		uint32 x1 = itB[0] + offset1;
+		uint32 x2 = itB[1] + offset2;
+		itB += 2;
+
+		// B spans *can* overlap, due to the widening.
+		while(itB != itBE && itB[0]+offset1 <= x2) {
+			uint32 bx2 = itB[1] + offset2;
+			if (x2 < bx2)
+				x2 = bx2;
+
+			itB += 2;
+		}
+
+		dstp[0] = x1;
+		dstp[1] = x2;
+		dstp += 2;
+	}
+
+	dst.mSpans.resize(dstp - dst.mSpans.data());
+}
+
+void VDPixmapConvolveRegion(VDPixmapRegion& dst, const VDPixmapRegion& r1, const VDPixmapRegion& r2) {
+	VDPixmapRegion temp;
+
+	const uint32 *src1 = r2.mSpans.data();
+	const uint32 *src2 = src1 + r2.mSpans.size();
+
+	dst.mSpans.clear();
+	while(src1 != src2) {
+		uint32 p1 = src1[0];
+		uint32 p2 = src1[1];
+		src1 += 2;
+
+		temp.mSpans.swap(dst.mSpans);
+		VDPixmapConvolveRegion(dst, temp, r1, (p1 & 0xffff) - 0x8000, (p2 & 0xffff) - 0x8000, (p1 >> 16) - 0x8000);
+	}
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/resample.cpp b/src/thirdparty/VirtualDub/Kasumi/source/resample.cpp
new file mode 100644
index 000000000..4d1aef5f5
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/resample.cpp
@@ -0,0 +1,348 @@
+//	VirtualDub - Video processing and capture application
+//	Graphics support library
+//	Copyright (C) 1998-2004 Avery Lee
+//
+//	This program is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	This program is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with this program; if not, write to the Free Software
+//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#include <float.h>
+#include <math.h>
+#include <vd2/system/vdalloc.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/system/memory.h>
+#include <vd2/system/math.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/Kasumi/resample.h>
+#include "uberblit_gen.h"
+
+///////////////////////////////////////////////////////////////////////////
+//
+// the resampler (finally)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDPixmapResampler : public IVDPixmapResampler {
+public:
+	VDPixmapResampler();
+	~VDPixmapResampler();
+
+	void SetSplineFactor(double A) { mSplineFactor = A; }
+	void SetFilters(FilterMode h, FilterMode v, bool interpolationOnly);
+	bool Init(uint32 dw, uint32 dh, int dstformat, uint32 sw, uint32 sh, int srcformat);
+	bool Init(const vdrect32f& dstrect, uint32 dw, uint32 dh, int dstformat, const vdrect32f& srcrect, uint32 sw, uint32 sh, int srcformat);
+	void Shutdown();
+
+	void Process(const VDPixmap& dst, const VDPixmap& src);
+
+protected:
+	void ApplyFilters(VDPixmapUberBlitterGenerator& gen, uint32 dw, uint32 dh, float xoffset, float yoffset, float xfactor, float yfactor);
+
+	vdautoptr<IVDPixmapBlitter> mpBlitter;
+	vdautoptr<IVDPixmapBlitter> mpBlitter2;
+	double				mSplineFactor;
+	FilterMode			mFilterH;
+	FilterMode			mFilterV;
+	bool				mbInterpOnly;
+
+	vdrect32			mDstRectPlane0;
+	vdrect32			mDstRectPlane12;
+};
+
+IVDPixmapResampler *VDCreatePixmapResampler() { return new VDPixmapResampler; }
+
+VDPixmapResampler::VDPixmapResampler()
+	: mSplineFactor(-0.6)
+	, mFilterH(kFilterCubic)
+	, mFilterV(kFilterCubic)
+	, mbInterpOnly(false)
+{
+}
+
+VDPixmapResampler::~VDPixmapResampler() {
+	Shutdown();
+}
+
+void VDPixmapResampler::SetFilters(FilterMode h, FilterMode v, bool interpolationOnly) {
+	mFilterH = h;
+	mFilterV = v;
+	mbInterpOnly = interpolationOnly;
+}
+
+bool VDPixmapResampler::Init(uint32 dw, uint32 dh, int dstformat, uint32 sw, uint32 sh, int srcformat) {
+	vdrect32f rSrc(0.0f, 0.0f, (float)sw, (float)sh);
+	vdrect32f rDst(0.0f, 0.0f, (float)dw, (float)dh);
+	return Init(rDst, dw, dh, dstformat, rSrc, sw, sh, srcformat);
+}
+
+bool VDPixmapResampler::Init(const vdrect32f& dstrect0, uint32 dw, uint32 dh, int dstformat, const vdrect32f& srcrect0, uint32 sw, uint32 sh, int srcformat) {
+	Shutdown();
+
+	if (dstformat != srcformat || (
+			srcformat != nsVDPixmap::kPixFormat_XRGB8888 &&
+			srcformat != nsVDPixmap::kPixFormat_Y8 &&
+			srcformat != nsVDPixmap::kPixFormat_YUV444_Planar &&
+			srcformat != nsVDPixmap::kPixFormat_YUV422_Planar &&
+			srcformat != nsVDPixmap::kPixFormat_YUV420_Planar &&
+			srcformat != nsVDPixmap::kPixFormat_YUV411_Planar &&
+			srcformat != nsVDPixmap::kPixFormat_YUV410_Planar
+			))
+		return false;
+
+	// convert destination flips to source flips
+	vdrect32f dstrect(dstrect0);
+	vdrect32f srcrect(srcrect0);
+
+	if (dstrect.left > dstrect.right) {
+		std::swap(dstrect.left, dstrect.right);
+		std::swap(srcrect.left, srcrect.right);
+	}
+
+	if (dstrect.top > dstrect.bottom) {
+		std::swap(dstrect.top, dstrect.bottom);
+		std::swap(srcrect.top, srcrect.bottom);
+	}
+
+	// compute source step factors
+	float xfactor = (float)srcrect.width()  / (float)dstrect.width();
+	float yfactor = (float)srcrect.height() / (float)dstrect.height();
+
+	// clip destination rect
+	if (dstrect.left < 0) {
+		float clipx1 = -dstrect.left;
+		srcrect.left += xfactor * clipx1;
+		dstrect.left = 0.0f;
+	}
+
+	if (dstrect.top < 0) {
+		float clipy1 = -dstrect.top;
+		srcrect.top += yfactor * clipy1;
+		dstrect.top = 0.0f;
+	}
+
+	float clipx2 = dstrect.right - (float)dw;
+	if (clipx2 > 0) {
+		srcrect.right -= xfactor * clipx2;
+		dstrect.right = (float)dw;
+	}
+
+	float clipy2 = dstrect.bottom - (float)dh;
+	if (clipy2 > 0) {
+		srcrect.bottom -= yfactor * clipy2;
+		dstrect.bottom = (float)dh;
+	}
+
+	// compute plane 0 dest rect in integral quanta
+	const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(dstformat);
+	mDstRectPlane0.left		= VDCeilToInt(dstrect.left	 - 0.5f);
+	mDstRectPlane0.top		= VDCeilToInt(dstrect.top	 - 0.5f);
+	mDstRectPlane0.right	= VDCeilToInt(dstrect.right	 - 0.5f);
+	mDstRectPlane0.bottom	= VDCeilToInt(dstrect.bottom - 0.5f);
+
+	// compute plane 0 stepping parameters
+	float xoffset = (((float)mDstRectPlane0.left + 0.5f) - dstrect.left) * xfactor + srcrect.left;
+	float yoffset = (((float)mDstRectPlane0.top  + 0.5f) - dstrect.top ) * yfactor + srcrect.top;
+
+	// compute plane 1/2 dest rect and stepping parameters
+	float xoffset2 = 0.0f;
+	float yoffset2 = 0.0f;
+
+	if (formatInfo.auxbufs > 0) {
+		float xf2 = (float)(1 << formatInfo.auxwbits);
+		float yf2 = (float)(1 << formatInfo.auxhbits);
+		float invxf2 = 1.0f / xf2;
+		float invyf2 = 1.0f / yf2;
+
+		// convert source and dest rects to plane 1/2 space
+		vdrect32f srcrect2(srcrect);
+		vdrect32f dstrect2(dstrect);
+
+		srcrect2.scale(invxf2, invyf2);
+		dstrect2.scale(invxf2, invyf2);
+
+		switch(srcformat) {
+		case nsVDPixmap::kPixFormat_YUV444_Planar:
+			break;
+		case nsVDPixmap::kPixFormat_YUV422_Planar:
+			srcrect2.translate(0.25f, 0.0f);
+			dstrect2.translate(0.25f, 0.0f);
+			break;
+		case nsVDPixmap::kPixFormat_YUV420_Planar:
+			srcrect2.translate(0.25f, 0.0f);
+			dstrect2.translate(0.25f, 0.0f);
+			break;
+		case nsVDPixmap::kPixFormat_YUV411_Planar:
+			srcrect2.translate(0.375f, 0.0f);
+			dstrect2.translate(0.375f, 0.0f);
+			break;
+		case nsVDPixmap::kPixFormat_YUV410_Planar:
+			break;
+		default:
+			VDASSERT(false);
+		}
+
+		mDstRectPlane12.left	= VDCeilToInt(dstrect2.left		- 0.5f);
+		mDstRectPlane12.top		= VDCeilToInt(dstrect2.top		- 0.5f);
+		mDstRectPlane12.right	= VDCeilToInt(dstrect2.right	- 0.5f);
+		mDstRectPlane12.bottom	= VDCeilToInt(dstrect2.bottom	- 0.5f);
+
+		xoffset2 = (((float)mDstRectPlane12.left + 0.5f) - dstrect2.left) * xfactor + srcrect2.left;
+		yoffset2 = (((float)mDstRectPlane12.top  + 0.5f) - dstrect2.top ) * yfactor + srcrect2.top;
+	}
+
+	VDPixmapUberBlitterGenerator gen;
+
+	switch(srcformat) {
+		case nsVDPixmap::kPixFormat_XRGB8888:
+			gen.ldsrc(0, 0, 0, 0, sw, sh, VDPixmapGetFormatTokenFromFormat(srcformat), sw*4);
+			ApplyFilters(gen, mDstRectPlane0.width(), mDstRectPlane0.height(), xoffset, yoffset, xfactor, yfactor);
+			break;
+
+		case nsVDPixmap::kPixFormat_Y8:
+			gen.ldsrc(0, 0, 0, 0, sw, sh, kVDPixType_8, sw);
+			ApplyFilters(gen, mDstRectPlane0.width(), mDstRectPlane0.height(), xoffset, yoffset, xfactor, yfactor);
+			break;
+
+		case nsVDPixmap::kPixFormat_YUV444_Planar:
+		case nsVDPixmap::kPixFormat_YUV422_Planar:
+		case nsVDPixmap::kPixFormat_YUV420_Planar:
+		case nsVDPixmap::kPixFormat_YUV411_Planar:
+		case nsVDPixmap::kPixFormat_YUV410_Planar:
+			gen.ldsrc(0, 0, 0, 0, sw, sh, kVDPixType_8, sw);
+			ApplyFilters(gen, mDstRectPlane0.width(), mDstRectPlane0.height(), xoffset, yoffset, xfactor, yfactor);
+
+			{
+				const VDPixmapFormatInfo& info = VDPixmapGetInfo(dstformat);
+				uint32 subsw = -(-(sint32)sw >> info.auxwbits);
+				uint32 subsh = -(-(sint32)sh >> info.auxhbits);
+
+				VDPixmapUberBlitterGenerator gen2;
+				gen2.ldsrc(0, 0, 0, 0, subsw, subsh, kVDPixType_8, subsw);
+				ApplyFilters(gen2, mDstRectPlane12.width(), mDstRectPlane12.height(), xoffset2, yoffset2, xfactor, yfactor);
+				mpBlitter2 = gen2.create();
+				if (!mpBlitter2)
+					return false;
+			}
+			break;
+	}
+
+	mpBlitter = gen.create();
+	if (!mpBlitter)
+		return false;
+
+	return true;
+}
+
+void VDPixmapResampler::Shutdown() {
+	mpBlitter = NULL;
+	mpBlitter2 = NULL;
+}
+
+void VDPixmapResampler::Process(const VDPixmap& dst, const VDPixmap& src) {
+	if (!mpBlitter)
+		return;
+
+	switch(dst.format) {
+		case nsVDPixmap::kPixFormat_XRGB8888:
+		case nsVDPixmap::kPixFormat_Y8:
+			mpBlitter->Blit(dst, &mDstRectPlane0, src);
+			break;
+
+		case nsVDPixmap::kPixFormat_YUV444_Planar:
+		case nsVDPixmap::kPixFormat_YUV422_Planar:
+		case nsVDPixmap::kPixFormat_YUV420_Planar:
+		case nsVDPixmap::kPixFormat_YUV411_Planar:
+		case nsVDPixmap::kPixFormat_YUV410_Planar:
+			// blit primary plane
+			mpBlitter->Blit(dst, &mDstRectPlane0, src);
+
+			// slice and blit secondary planes
+			{
+				const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(dst.format);
+				VDPixmap pxdst;
+				pxdst.format	= nsVDPixmap::kPixFormat_Y8;
+				pxdst.w			= -(-dst.w >> formatInfo.auxwbits);
+				pxdst.h			= -(-dst.h >> formatInfo.auxhbits);
+				pxdst.pitch		= dst.pitch2;
+				pxdst.data		= dst.data2;
+
+				VDPixmap pxsrc;
+				pxsrc.format	= nsVDPixmap::kPixFormat_Y8;
+				pxsrc.w			= -(-src.w >> formatInfo.auxwbits);
+				pxsrc.h			= -(-src.h >> formatInfo.auxhbits);
+				pxsrc.pitch		= src.pitch2;
+				pxsrc.data		= src.data2;
+
+				mpBlitter2->Blit(pxdst, &mDstRectPlane12, pxsrc);
+
+				pxdst.pitch		= dst.pitch3;
+				pxdst.data		= dst.data3;
+				pxsrc.pitch		= src.pitch3;
+				pxsrc.data		= src.data3;
+				mpBlitter2->Blit(pxdst, &mDstRectPlane12, pxsrc);
+			}
+			break;
+	}
+}
+
+void VDPixmapResampler::ApplyFilters(VDPixmapUberBlitterGenerator& gen, uint32 dw, uint32 dh, float xoffset, float yoffset, float xfactor, float yfactor) {
+	switch(mFilterH) {
+		case kFilterPoint:
+			gen.pointh(xoffset, xfactor, dw);
+			break;
+
+		case kFilterLinear:
+			gen.linearh(xoffset, xfactor, dw, mbInterpOnly);
+			break;
+
+		case kFilterCubic:
+			gen.cubich(xoffset, xfactor, dw, (float)mSplineFactor, mbInterpOnly);
+			break;
+
+		case kFilterLanczos3:
+			gen.lanczos3h(xoffset, xfactor, dw);
+			break;
+	}
+
+	switch(mFilterV) {
+		case kFilterPoint:
+			gen.pointv(yoffset, yfactor, dh);
+			break;
+
+		case kFilterLinear:
+			gen.linearv(yoffset, yfactor, dh, mbInterpOnly);
+			break;
+
+		case kFilterCubic:
+			gen.cubicv(yoffset, yfactor, dh, (float)mSplineFactor, mbInterpOnly);
+			break;
+
+		case kFilterLanczos3:
+			gen.lanczos3v(yoffset, yfactor, dh);
+			break;
+	}
+}
+
+bool VDPixmapResample(const VDPixmap& dst, const VDPixmap& src, IVDPixmapResampler::FilterMode filter) {
+	VDPixmapResampler r;
+
+	r.SetFilters(filter, filter, false);
+
+	if (!r.Init(dst.w, dst.h, dst.format, src.w, src.h, src.format))
+		return false;
+
+	r.Process(dst, src);
+	return true;
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/resample_kernels.cpp b/src/thirdparty/VirtualDub/Kasumi/source/resample_kernels.cpp
new file mode 100644
index 000000000..010364e1a
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/resample_kernels.cpp
@@ -0,0 +1,255 @@
+#include <math.h>
+#include <vd2/Kasumi/resample_kernels.h>
+
+///////////////////////////////////////////////////////////////////////////
+//
+// utility functions
+//
+///////////////////////////////////////////////////////////////////////////
+
+namespace {
+	inline sint32 scale32x32_fp16(sint32 x, sint32 y) {
+		return (sint32)(((sint64)x * y + 0x8000) >> 16);
+	}
+
+	inline double sinc(double x) {
+		return fabs(x) < 1e-9 ? 1.0 : sin(x) / x;
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VDResamplerAxis
+//
+///////////////////////////////////////////////////////////////////////////
+
+void VDResamplerAxis::Init(sint32 dudx) {
+	this->dudx = dudx;
+}
+
+void VDResamplerAxis::Compute(sint32 count, sint32 u0, sint32 w, sint32 kernel_width) {
+	u = u0;
+	dx = count;
+
+	sint32 du_kern	= (kernel_width-1) << 16;
+	sint32 u2		= u + dudx*(dx-1);
+	sint32 u_limit	= w << 16;
+
+	dx_precopy	= 0;
+	dx_preclip	= 0;
+	dx_active	= 0;
+	dx_postclip	= 0;
+	dx_postcopy = 0;
+	dx_dualclip	= 0;
+
+	sint32 dx_temp = dx;
+	sint32 u_start = u;
+
+	// (desired - u0 + (dudx-1)) / dudx : first pixel >= desired
+
+	sint32 dudx_m1_mu0	= dudx - 1 - u;
+	sint32 first_preclip	= (dudx_m1_mu0 + 0x10000 - du_kern) / dudx;
+	sint32 first_active		= (dudx_m1_mu0                    ) / dudx;
+	sint32 first_postclip	= (dudx_m1_mu0 + u_limit - du_kern) / dudx;
+	sint32 first_postcopy	= (dudx_m1_mu0 + u_limit - 0x10000) / dudx;
+
+	// clamp
+	if (first_preclip < 0)
+		first_preclip = 0;
+	if (first_active < first_preclip)
+		first_active = first_preclip;
+	if (first_postclip < first_active)
+		first_postclip = first_active;
+	if (first_postcopy < first_postclip)
+		first_postcopy = first_postclip;
+	if (first_preclip > dx)
+		first_preclip = dx;
+	if (first_active > dx)
+		first_active = dx;
+	if (first_postclip > dx)
+		first_postclip = dx;
+	if (first_postcopy > dx)
+		first_postcopy = dx;
+
+	// determine widths
+
+	dx_precopy	= first_preclip;
+	dx_preclip	= first_active - first_preclip;
+	dx_active	= first_postclip - first_active;
+	dx_postclip	= first_postcopy - first_postclip;
+	dx_postcopy	= dx - first_postcopy;
+
+	// sanity checks
+	sint32 pos0 = dx_precopy;
+	sint32 pos1 = pos0 + dx_preclip;
+	sint32 pos2 = pos1 + dx_active;
+	sint32 pos3 = pos2 + dx_postclip;
+
+	VDASSERT(!((dx_precopy|dx_preclip|dx_active|dx_postcopy|dx_postclip) & 0x80000000));
+	VDASSERT(dx_precopy + dx_preclip + dx_active + dx_postcopy + dx_postclip == dx);
+
+	VDASSERT(!pos0			|| u_start + dudx*(pos0 - 1) <  0x10000 - du_kern);	// precopy -> preclip
+	VDASSERT( pos0 >= pos1	|| u_start + dudx*(pos0    ) >= 0x10000 - du_kern);
+	VDASSERT( pos1 <= pos0	|| u_start + dudx*(pos1 - 1) <  0);					// preclip -> active
+	VDASSERT( pos1 >= pos2	|| u_start + dudx*(pos1    ) >= 0 || !dx_active);
+	VDASSERT( pos2 <= pos1	|| u_start + dudx*(pos2 - 1) <  u_limit - du_kern || !dx_active);	// active -> postclip
+	VDASSERT( pos2 >= pos3	|| u_start + dudx*(pos2    ) >= u_limit - du_kern);
+	VDASSERT( pos3 <= pos2	|| u_start + dudx*(pos3 - 1) <  u_limit - 0x10000);	// postclip -> postcopy
+	VDASSERT( pos3 >= dx	|| u_start + dudx*(pos3    ) >= u_limit - 0x10000);
+
+	u += dx_precopy * dudx;
+
+	// test for overlapping clipping regions
+	if (!dx_active && kernel_width > w) {
+		dx_dualclip = dx_preclip + dx_postclip;
+		dx_preclip = dx_postclip = 0;
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VDResamplerLinearFilter
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDResamplerLinearFilter::VDResamplerLinearFilter(double twofc)
+	: mScale(twofc)
+	, mTaps((int)ceil(1.0 / twofc) * 2)
+{
+}
+
+int VDResamplerLinearFilter::GetFilterWidth() const {
+	return mTaps;
+}
+
+double VDResamplerLinearFilter::EvaluateFilter(double t) const {
+	t = 1.0f - fabs(t)*mScale;
+
+	return t + fabs(t);
+}
+
+void VDResamplerLinearFilter::GenerateFilter(float *dst, double offset) const {
+	double pos = -((double)((mTaps>>1)-1) + offset) * mScale;
+
+	for(unsigned i=0; i<mTaps; ++i) {
+		double t = 1.0 - fabs(pos);
+
+		*dst++ = (float)(t+fabs(t));
+		pos += mScale;
+	}
+}
+
+void VDResamplerLinearFilter::GenerateFilterBank(float *dst) const {
+	for(int offset=0; offset<256; ++offset) {
+		GenerateFilter(dst, offset * (1.0f / 256.0f));
+		dst += mTaps;
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VDResamplerCubicFilter
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDResamplerCubicFilter::VDResamplerCubicFilter(double twofc, double A)
+	: mScale(twofc)
+	, mA0( 1.0  )
+	, mA2(-3.0-A)
+	, mA3( 2.0+A)
+	, mB0(-4.0*A)
+	, mB1( 8.0*A)
+	, mB2(-5.0*A)
+	, mB3(     A)
+	, mTaps((int)ceil(2.0 / twofc)*2)
+{
+}
+
+int VDResamplerCubicFilter::GetFilterWidth() const { return mTaps; }
+
+double VDResamplerCubicFilter::EvaluateFilter(double t) const {
+	t = fabs(t)*mScale;
+
+	if (t < 1.0)
+		return mA0 + (t*t)*(mA2 + t*mA3);
+	else if (t < 2.0)
+		return mB0 + t*(mB1 + t*(mB2 + t*mB3));
+	else
+		return 0;
+}
+
+void VDResamplerCubicFilter::GenerateFilter(float *dst, double offset) const {
+	double pos = -((double)((mTaps>>1)-1) + offset) * mScale;
+
+	for(unsigned i=0; i<mTaps; ++i) {
+		double t = fabs(pos);
+		double v = 0;
+
+		if (t < 1.0)
+			v = mA0 + (t*t)*(mA2 + t*mA3);
+		else if (t < 2.0)
+			v = mB0 + t*(mB1 + t*(mB2 + t*mB3));
+
+		*dst++ = (float)v;
+		pos += mScale;
+	}
+}
+
+void VDResamplerCubicFilter::GenerateFilterBank(float *dst) const {
+	for(int offset=0; offset<256; ++offset) {
+		GenerateFilter(dst, offset * (1.0f / 256.0f));
+		dst += mTaps;
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VDResamplerLanczos3Filter
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDResamplerLanczos3Filter::VDResamplerLanczos3Filter(double twofc)
+	: mScale(twofc)
+	, mTaps((int)ceil(3.0 / twofc)*2)
+{
+}
+
+int VDResamplerLanczos3Filter::GetFilterWidth() const {
+	return mTaps;
+}
+
+double VDResamplerLanczos3Filter::EvaluateFilter(double t) const {
+	static const double pi  = 3.1415926535897932384626433832795;	// pi
+	static const double pi3 = 1.0471975511965977461542144610932;	// pi/3
+
+	t *= mScale;
+
+	if (fabs(t) < 3.0)
+		return sinc(pi*t) * sinc(pi3*t);
+	else
+		return 0.0;
+}
+
+void VDResamplerLanczos3Filter::GenerateFilter(float *dst, double offset) const {
+	static const double pi  = 3.1415926535897932384626433832795;	// pi
+	static const double pi3 = 1.0471975511965977461542144610932;	// pi/3
+
+	double t = -(((double)((mTaps>>1)-1) + offset) * mScale);
+
+	for(unsigned i=0; i<mTaps; ++i) {
+		double v = 0;
+
+		if (fabs(t) < 3.0)
+			v = sinc(pi*t) * sinc(pi3*t);
+
+		*dst++ = (float)v;
+		t += mScale;
+	}
+}
+
+void VDResamplerLanczos3Filter::GenerateFilterBank(float *dst) const {
+	for(int offset=0; offset<256; ++offset) {
+		GenerateFilter(dst, offset * (1.0f / 256.0f));
+		dst += mTaps;
+	}
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/resample_stages.cpp b/src/thirdparty/VirtualDub/Kasumi/source/resample_stages.cpp
new file mode 100644
index 000000000..fcea6c669
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/resample_stages.cpp
@@ -0,0 +1,149 @@
+#include <vd2/system/math.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/Kasumi/resample_kernels.h>
+#include "resample_stages.h"
+
+VDSteppedAllocator::VDSteppedAllocator(size_t initialSize)
+	: mpHead(NULL)
+	, mpAllocNext(NULL)
+	, mAllocLeft(0)
+	, mAllocNext(initialSize)
+	, mAllocInit(initialSize)
+{
+}
+
+VDSteppedAllocator::~VDSteppedAllocator() {
+	clear();
+}
+
+void VDSteppedAllocator::clear() {
+	while(Block *p = mpHead) {
+		mpHead = mpHead->next;
+		free(p);
+	}
+	mAllocLeft = 0;
+	mAllocNext = mAllocInit;
+}
+
+void *VDSteppedAllocator::allocate(size_type n) {
+	n = (n+15) & ~15;
+	if (mAllocLeft < n) {
+		mAllocLeft = mAllocNext;
+		mAllocNext += (mAllocNext >> 1);
+		if (mAllocLeft < n)
+			mAllocLeft = n;
+
+		Block *t = (Block *)malloc(sizeof(Block) + mAllocLeft);
+
+		if (mpHead)
+			mpHead->next = t;
+
+		mpHead = t;
+		mpHead->next = NULL;
+
+		mpAllocNext = (char *)(mpHead + 1);
+	}
+
+	void *p = mpAllocNext;
+	mpAllocNext += n;
+	mAllocLeft -= n;
+	return p;
+}
+
+void VDResamplerGenerateTable(sint32 *dst, const IVDResamplerFilter& filter) {
+	const unsigned width = filter.GetFilterWidth();
+	vdblock<float> filters(width * 256);
+	float *src = filters.data();
+
+	filter.GenerateFilterBank(src);
+
+	for(unsigned phase=0; phase < 256; ++phase) {
+		float sum = 0;
+
+		for(unsigned i=0; i<width; ++i)
+			sum += src[i];
+
+		float scalefac = 16384.0f / sum;
+
+		for(unsigned j=0; j<width; j += 2) {
+			int v0 = VDRoundToIntFast(src[j+0] * scalefac);
+			int v1 = VDRoundToIntFast(src[j+1] * scalefac);
+
+			dst[j+0] = v0;
+			dst[j+1] = v1;
+		}
+
+		src += width;
+		dst += width;
+	}
+}
+
+void VDResamplerGenerateTableF(float *dst, const IVDResamplerFilter& filter) {
+	const unsigned width = filter.GetFilterWidth();
+	filter.GenerateFilterBank(dst);
+
+	for(unsigned phase=0; phase < 256; ++phase) {
+		float sum = 0;
+
+		for(unsigned i=0; i<width; ++i)
+			sum += dst[i];
+
+		float scalefac = 1.0f / sum;
+
+		for(unsigned j=0; j<width; ++j)
+			*dst++ *= scalefac;
+	}
+}
+
+void VDResamplerGenerateTable2(sint32 *dst, const IVDResamplerFilter& filter, sint32 count, sint32 u0, sint32 dudx) {
+	const unsigned width = filter.GetFilterWidth();
+	vdblock<float> filters(width);
+	float *src = filters.data();
+
+	filter.GenerateFilterBank(src);
+
+	for(sint32 i=0; i<count; ++i) {
+		sint32 u = u0 + dudx*i;
+
+		*dst++ = u >> 16;
+		filter.GenerateFilter(src, (double)(u & 0xffff) / 65536.0);
+
+		float sum = 0;
+		for(uint32 j=0; j<width; ++j)
+			sum += src[j];
+
+		float scalefac = 16384.0f / sum;
+
+		sint32 isum = 0;
+		for(uint32 j=0; j<width; ++j) {
+			sint32 v = VDRoundToIntFast(src[j] * scalefac);
+
+			dst[j] = v;
+			isum += v;
+		}
+
+		sint32 ierr = 16384 - isum;
+		sint32 idelta = 2*(ierr >> 31) - 1;
+		while(ierr) {
+			for(uint32 j=0; j<width && ierr; ++j) {
+				if (!dst[j])
+					continue;
+
+				dst[j] += idelta;
+				ierr -= idelta;
+			}
+		}
+
+		dst += width;
+	}
+}
+
+void VDResamplerSwizzleTable(sint32 *dst, unsigned pairs) {
+	do {
+		sint32 v0 = dst[0];
+		sint32 v1 = dst[1];
+
+		dst[0] = dst[1] = (v0 & 0xffff) + (v1<<16);
+		dst += 2;
+	} while(--pairs);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_reference.cpp b/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_reference.cpp
new file mode 100644
index 000000000..94bee7c9e
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_reference.cpp
@@ -0,0 +1,425 @@
+#include <vd2/system/memory.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include "resample_stages_reference.h"
+#include <vd2/Kasumi/resample_kernels.h>
+#include "blt_spanutils.h"
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+int VDResamplerRowStageSeparablePoint8::GetWindowSize() const {
+	return 1;
+}
+
+void VDResamplerRowStageSeparablePoint8::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *src = (const uint8 *)src0;
+
+	do {
+		*dst++ = src[u>>16];
+		u += dudx;
+	} while(--w);
+}
+
+int VDResamplerRowStageSeparablePoint16::GetWindowSize() const {
+	return 1;
+}
+
+void VDResamplerRowStageSeparablePoint16::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+	uint16 *dst = (uint16 *)dst0;
+	const uint16 *src = (const uint16 *)src0;
+
+	do {
+		*dst++ = src[u>>16];
+		u += dudx;
+	} while(--w);
+}
+
+int VDResamplerRowStageSeparablePoint32::GetWindowSize() const {
+	return 1;
+}
+
+void VDResamplerRowStageSeparablePoint32::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+	uint32 *dst = (uint32 *)dst0;
+	const uint32 *src = (const uint32 *)src0;
+
+	do {
+		*dst++ = src[u>>16];
+		u += dudx;
+	} while(--w);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+int VDResamplerRowStageSeparableLinear8::GetWindowSize() const {return 2;}
+void VDResamplerRowStageSeparableLinear8::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *src = (const uint8 *)src0;
+
+	do {
+		const sint32 iu = u>>16;
+		const uint32 p0 = src[iu];
+		const uint32 p1 = src[iu+1];
+		const uint32 f = (u >> 8) & 0xff;
+
+		*dst++	= (uint8)(p0 + (((sint32)(p1 - p0)*f + 0x80)>>8));
+		u += dudx;
+	} while(--w);
+}
+
+void VDResamplerRowStageSeparableLinear8_phaseZeroStepHalf::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *src = (const uint8 *)src0;
+
+	VDASSERT(!u && dudx == 0x8000);
+
+	nsVDPixmapSpanUtils::horiz_expand2x_coaligned(dst, src, w);
+}
+
+int VDResamplerRowStageSeparableLinear32::GetWindowSize() const {return 2;}
+void VDResamplerRowStageSeparableLinear32::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+	uint32 *dst = (uint32 *)dst0;
+	const uint32 *src = (const uint32 *)src0;
+
+	do {
+		const sint32 iu = u>>16;
+		const uint32 p0 = src[iu];
+		const uint32 p1 = src[iu+1];
+		const uint32 f = (u >> 8) & 0xff;
+
+		const uint32 p0_rb = p0 & 0xff00ff;
+		const uint32 p1_rb = p1 & 0xff00ff;
+		const uint32 p0_g = p0 & 0xff00;
+		const uint32 p1_g = p1 & 0xff00;
+
+		*dst++	= ((p0_rb + (((p1_rb - p0_rb)*f + 0x800080)>>8)) & 0xff00ff)
+				+ ((p0_g  + (((p1_g  - p0_g )*f + 0x008000)>>8)) & 0x00ff00);
+		u += dudx;
+	} while(--w);
+}
+
+int VDResamplerColStageSeparableLinear8::GetWindowSize() const {return 2;}
+void VDResamplerColStageSeparableLinear8::Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase) {
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *src0 = (const uint8 *)srcarray[0];
+	const uint8 *src1 = (const uint8 *)srcarray[1];
+	const uint32 f = (phase >> 8) & 0xff;
+
+	do {
+		const uint32 p0 = *src0++;
+		const uint32 p1 = *src1++;
+
+		*dst++ = (uint8)(p0 + (((p1 - p0)*f + 0x80)>>8));
+	} while(--w);
+}
+
+int VDResamplerColStageSeparableLinear32::GetWindowSize() const {return 2;}
+void VDResamplerColStageSeparableLinear32::Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase) {
+	uint32 *dst = (uint32 *)dst0;
+	const uint32 *src0 = (const uint32 *)srcarray[0];
+	const uint32 *src1 = (const uint32 *)srcarray[1];
+	const uint32 f = (phase >> 8) & 0xff;
+
+	do {
+		const uint32 p0 = *src0++;
+		const uint32 p1 = *src1++;
+
+		const uint32 p0_rb = p0 & 0xff00ff;
+		const uint32 p1_rb = p1 & 0xff00ff;
+		const uint32 p0_g = p0 & 0xff00;
+		const uint32 p1_g = p1 & 0xff00;
+
+		*dst++	= ((p0_rb + (((p1_rb - p0_rb)*f + 0x800080)>>8)) & 0xff00ff)
+				+ ((p0_g  + (((p1_g  - p0_g )*f + 0x008000)>>8)) & 0x00ff00);
+	} while(--w);
+}
+
+VDResamplerRowStageSeparableTable8::VDResamplerRowStageSeparableTable8(const IVDResamplerFilter& filter) {
+	mFilterBank.resize(filter.GetFilterWidth() * 256);
+	VDResamplerGenerateTable(mFilterBank.data(), filter);
+}
+
+int VDResamplerRowStageSeparableTable8::GetWindowSize() const {return (int)mFilterBank.size() >> 8;}
+
+void VDResamplerRowStageSeparableTable8::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *src = (const uint8 *)src0;
+	const unsigned ksize = (int)mFilterBank.size() >> 8;
+	const sint32 *filterBase = mFilterBank.data();
+
+	do {
+		const uint8 *src2 = src + (u>>16);
+		const sint32 *filter = filterBase + ksize*((u>>8)&0xff);
+		u += dudx;
+
+		int b = 0x2000;
+		for(unsigned i = ksize; i; --i) {
+			uint8 p = *src2++;
+			sint32 coeff = *filter++;
+
+			b += (sint32)p*coeff;
+		}
+
+		b >>= 14;
+
+		if ((uint32)b >= 0x00000100)
+			b = ~b >> 31;
+
+		*dst++ = (uint8)b;
+	} while(--w);
+}
+
+VDResamplerRowStageSeparableTable32::VDResamplerRowStageSeparableTable32(const IVDResamplerFilter& filter) {
+	mFilterBank.resize(filter.GetFilterWidth() * 256);
+	VDResamplerGenerateTable(mFilterBank.data(), filter);
+}
+
+int VDResamplerRowStageSeparableTable32::GetWindowSize() const {return (int)mFilterBank.size() >> 8;}
+
+void VDResamplerRowStageSeparableTable32::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+	uint32 *dst = (uint32 *)dst0;
+	const uint32 *src = (const uint32 *)src0;
+	const unsigned ksize = (int)mFilterBank.size() >> 8;
+	const sint32 *filterBase = mFilterBank.data();
+
+	do {
+		const uint32 *src2 = src + (u>>16);
+		const sint32 *filter = filterBase + ksize*((u>>8)&0xff);
+		u += dudx;
+
+		int r = 0x2000, g = 0x2000, b = 0x2000;
+		for(unsigned i = ksize; i; --i) {
+			uint32 p = *src2++;
+			sint32 coeff = *filter++;
+
+			r += ((p>>16)&0xff)*coeff;
+			g += ((p>> 8)&0xff)*coeff;
+			b += ((p    )&0xff)*coeff;
+		}
+
+		r <<= 2;
+		g >>= 6;
+		b >>= 14;
+
+		if ((uint32)r >= 0x01000000)
+			r = ~r >> 31;
+		if ((uint32)g >= 0x00010000)
+			g = ~g >> 31;
+		if ((uint32)b >= 0x00000100)
+			b = ~b >> 31;
+
+		*dst++ = (r & 0xff0000) + (g & 0xff00) + (b & 0xff);
+	} while(--w);
+}
+
+VDResamplerRowStageSeparableTable32Fx4::VDResamplerRowStageSeparableTable32Fx4(const IVDResamplerFilter& filter) {
+	mFilterBank.resize(filter.GetFilterWidth() * 256);
+	VDResamplerGenerateTableF(mFilterBank.data(), filter);
+}
+
+int VDResamplerRowStageSeparableTable32Fx4::GetWindowSize() const {return (int)mFilterBank.size() >> 8;}
+
+void VDResamplerRowStageSeparableTable32Fx4::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+	float *dst = (float *)dst0;
+	const float *src = (const float *)src0;
+	const unsigned ksize = (int)mFilterBank.size() >> 8;
+	const float *filterBase = mFilterBank.data();
+
+	do {
+		const float *src2 = src + (u>>16)*4;
+		const float *filter = filterBase + ksize*((u>>8)&0xff);
+		u += dudx;
+
+		float r = 0, g = 0, b = 0, a = 0;
+		for(unsigned i = ksize; i; --i) {
+			float coeff = *filter++;
+
+			r += coeff * src2[0];
+			g += coeff * src2[1];
+			b += coeff * src2[2];
+			a += coeff * src2[3];
+			src2 += 4;
+		}
+
+		dst[0] = r;
+		dst[1] = g;
+		dst[2] = b;
+		dst[3] = a;
+		dst += 4;
+	} while(--w);
+}
+
+VDResamplerRowStageSeparableTable32F::VDResamplerRowStageSeparableTable32F(const IVDResamplerFilter& filter) {
+	mFilterBank.resize(filter.GetFilterWidth() * 256);
+	VDResamplerGenerateTableF(mFilterBank.data(), filter);
+}
+
+int VDResamplerRowStageSeparableTable32F::GetWindowSize() const {return (int)mFilterBank.size() >> 8;}
+
+void VDResamplerRowStageSeparableTable32F::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+	float *dst = (float *)dst0;
+	const float *src = (const float *)src0;
+	const unsigned ksize = (int)mFilterBank.size() >> 8;
+	const float *filterBase = mFilterBank.data();
+
+	VDCPUCleanupExtensions();
+
+	do {
+		const float *src2 = src + (u>>16);
+		const float *filter = filterBase + ksize*((u>>8)&0xff);
+		u += dudx;
+
+		float r = 0;
+		for(unsigned i = ksize; i; --i) {
+			float coeff = *filter++;
+
+			r += coeff * src2[0];
+			++src2;
+		}
+
+		dst[0] = r;
+		++dst;
+	} while(--w);
+}
+
+VDResamplerColStageSeparableTable8::VDResamplerColStageSeparableTable8(const IVDResamplerFilter& filter) {
+	mFilterBank.resize(filter.GetFilterWidth() * 256);
+	VDResamplerGenerateTable(mFilterBank.data(), filter);
+}
+
+int VDResamplerColStageSeparableTable8::GetWindowSize() const {return (int)mFilterBank.size() >> 8;}
+
+void VDResamplerColStageSeparableTable8::Process(void *dst0, const void *const *src0, uint32 w, sint32 phase) {
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *const *src = (const uint8 *const *)src0;
+	const unsigned ksize = (unsigned)mFilterBank.size() >> 8;
+	const sint32 *filter = &mFilterBank[((phase>>8)&0xff) * ksize];
+
+	for(uint32 i=0; i<w; ++i) {
+		int b = 0x2000;
+		const sint32 *filter2 = filter;
+		const uint8 *const *src2 = src;
+
+		for(unsigned j = ksize; j; --j) {
+			sint32 p = (*src2++)[i];
+			sint32 coeff = *filter2++;
+
+			b += p*coeff;
+		}
+
+		b >>= 14;
+
+		if ((uint32)b >= 0x00000100)
+			b = ~b >> 31;
+
+		*dst++ = (uint8)b;
+	}
+}
+
+VDResamplerColStageSeparableTable32::VDResamplerColStageSeparableTable32(const IVDResamplerFilter& filter) {
+	mFilterBank.resize(filter.GetFilterWidth() * 256);
+	VDResamplerGenerateTable(mFilterBank.data(), filter);
+}
+
+int VDResamplerColStageSeparableTable32::GetWindowSize() const {return (int)mFilterBank.size() >> 8;}
+
+void VDResamplerColStageSeparableTable32::Process(void *dst0, const void *const *src0, uint32 w, sint32 phase) {
+	uint32 *dst = (uint32 *)dst0;
+	const uint32 *const *src = (const uint32 *const *)src0;
+	const unsigned ksize = (unsigned)mFilterBank.size() >> 8;
+	const sint32 *filter = &mFilterBank[((phase>>8)&0xff) * ksize];
+
+	for(uint32 i=0; i<w; ++i) {
+		int r = 0x2000, g = 0x2000, b = 0x2000;
+		const sint32 *filter2 = filter;
+		const uint32 *const *src2 = src;
+
+		for(unsigned j = ksize; j; --j) {
+			uint32 p = (*src2++)[i];
+			sint32 coeff = *filter2++;
+
+			r += ((p>>16)&0xff)*coeff;
+			g += ((p>> 8)&0xff)*coeff;
+			b += ((p    )&0xff)*coeff;
+		}
+
+		r <<= 2;
+		g >>= 6;
+		b >>= 14;
+
+		if ((uint32)r >= 0x01000000)
+			r = ~r >> 31;
+		if ((uint32)g >= 0x00010000)
+			g = ~g >> 31;
+		if ((uint32)b >= 0x00000100)
+			b = ~b >> 31;
+
+		*dst++ = (r & 0xff0000) + (g & 0xff00) + (b & 0xff);
+	}
+}
+
+VDResamplerColStageSeparableTable32F::VDResamplerColStageSeparableTable32F(const IVDResamplerFilter& filter) {
+	mFilterBank.resize(filter.GetFilterWidth() * 256);
+	VDResamplerGenerateTableF(mFilterBank.data(), filter);
+}
+
+int VDResamplerColStageSeparableTable32F::GetWindowSize() const {return (int)mFilterBank.size() >> 8;}
+
+void VDResamplerColStageSeparableTable32F::Process(void *dst0, const void *const *src0, uint32 w, sint32 phase) {
+	float *dst = (float *)dst0;
+	const float *const *src = (const float *const *)src0;
+	const unsigned ksize = (unsigned)mFilterBank.size() >> 8;
+	const float *filter = &mFilterBank[((phase>>8)&0xff) * ksize];
+
+	for(uint32 i=0; i<w; ++i) {
+		float r = 0;
+		const float *filter2 = filter;
+		const float *const *src2 = src;
+
+		for(unsigned j = ksize; j; --j) {
+			const float *p = (*src2++) + i;
+			float coeff = *filter2++;
+
+			r += p[0]*coeff;
+		}
+
+		dst[0] = r;
+		++dst;
+	}
+}
+
+VDResamplerColStageSeparableTable32Fx4::VDResamplerColStageSeparableTable32Fx4(const IVDResamplerFilter& filter) {
+	mFilterBank.resize(filter.GetFilterWidth() * 256);
+	VDResamplerGenerateTableF(mFilterBank.data(), filter);
+}
+
+int VDResamplerColStageSeparableTable32Fx4::GetWindowSize() const {return (int)mFilterBank.size() >> 8;}
+
+void VDResamplerColStageSeparableTable32Fx4::Process(void *dst0, const void *const *src0, uint32 w, sint32 phase) {
+	float *dst = (float *)dst0;
+	const float *const *src = (const float *const *)src0;
+	const unsigned ksize = (unsigned)mFilterBank.size() >> 8;
+	const float *filter = &mFilterBank[((phase>>8)&0xff) * ksize];
+
+	for(uint32 i=0; i<w; ++i) {
+		float r = 0, g = 0, b = 0, a = 0;
+		const float *filter2 = filter;
+		const float *const *src2 = src;
+
+		for(unsigned j = ksize; j; --j) {
+			const float *p = (*src2++) + i*4;
+			float coeff = *filter2++;
+
+			r += p[0]*coeff;
+			g += p[1]*coeff;
+			b += p[2]*coeff;
+			a += p[3]*coeff;
+		}
+
+		dst[0] = r;
+		dst[1] = g;
+		dst[2] = b;
+		dst[3] = a;
+		dst += 4;
+	}
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_x64.cpp b/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_x64.cpp
new file mode 100644
index 000000000..a206d37d8
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_x64.cpp
@@ -0,0 +1,26 @@
+#include "resample_stages_x64.h"
+
+extern "C" long vdasm_resize_table_col_SSE2(uint32 *out, const uint32 *const*in_table, const int *filter, int filter_width, uint32 w);
+extern "C" long vdasm_resize_table_row_SSE2(uint32 *out, const uint32 *in, const int *filter, int filter_width, uint32 w, long accum, long frac);
+
+VDResamplerSeparableTableRowStageSSE2::VDResamplerSeparableTableRowStageSSE2(const IVDResamplerFilter& filter)
+	: VDResamplerRowStageSeparableTable32(filter)
+{
+	VDResamplerSwizzleTable(mFilterBank.data(), (uint32)mFilterBank.size() >> 1);
+}
+
+void VDResamplerSeparableTableRowStageSSE2::Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx) {
+	vdasm_resize_table_row_SSE2((uint32 *)dst, (const uint32 *)src, (const int *)mFilterBank.data(), (int)mFilterBank.size() >> 8, w, u, dudx);
+}
+
+VDResamplerSeparableTableColStageSSE2::VDResamplerSeparableTableColStageSSE2(const IVDResamplerFilter& filter)
+	: VDResamplerColStageSeparableTable32(filter)
+{
+	VDResamplerSwizzleTable(mFilterBank.data(), (uint32)mFilterBank.size() >> 1);
+}
+
+void VDResamplerSeparableTableColStageSSE2::Process(void *dst, const void *const *src, uint32 w, sint32 phase) {
+	const unsigned filtSize = (unsigned)mFilterBank.size() >> 8;
+
+	vdasm_resize_table_col_SSE2((uint32*)dst, (const uint32 *const *)src, (const int *)mFilterBank.data() + filtSize*((phase >> 8) & 0xff), filtSize, w);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_x86.cpp b/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_x86.cpp
new file mode 100644
index 000000000..bc4db574f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_x86.cpp
@@ -0,0 +1,1277 @@
+#include <numeric>
+#include "blt_spanutils_x86.h"
+#include "resample_stages_x86.h"
+#include <vd2/Kasumi/resample_kernels.h>
+
+#ifdef _MSC_VER
+	#pragma warning(disable: 4799)		// warning C4799: function 'vdasm_resize_table_row_8_k8_4x_MMX' has no EMMS instruction
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+
+extern "C" void vdasm_resize_table_row_8_k8_4x_SSE41(void *dst, const void *src, uint32 width, const void *kernel);
+extern "C" void vdasm_resize_table_row_8_k16_4x_SSE41(void *dst, const void *src, uint32 width, const void *kernel);
+extern "C" void vdasm_resize_table_row_8_SSE41(void *dst, const void *src, uint32 width, const void *kernel, uint32 kwidth);
+extern "C" void vdasm_resize_table_col_8_k2_SSE41(void *dst, const void *const *srcs, uint32 width, const void *kernel);
+extern "C" void vdasm_resize_table_col_8_k4_SSE41(void *dst, const void *const *srcs, uint32 width, const void *kernel);
+
+///////////////////////////////////////////////////////////////////////////////
+
+namespace {
+	struct ScaleInfo {
+		void *dst;
+		uintptr	src;
+		uint32	accum;
+		uint32	fracinc;
+		sint32	intinc;
+		uint32	count;
+	};
+
+	extern "C" void vdasm_resize_point32(const ScaleInfo *);
+}
+
+int VDResamplerSeparablePointRowStageX86::GetWindowSize() const {return 1;}
+void VDResamplerSeparablePointRowStageX86::Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx) {
+	ScaleInfo info;
+
+	info.dst = (uint32 *)dst + w;
+	info.src = ((uintptr)src >> 2) + (u>>16);
+	info.accum = u<<16;
+	info.fracinc = dudx << 16;
+	info.intinc = (sint32)dudx >> 16;
+	info.count = -(sint32)w*4;
+
+	vdasm_resize_point32(&info);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDResamplerRowStageSeparableLinear8_phaseZeroStepHalf_ISSE::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *src = (const uint8 *)src0;
+
+	nsVDPixmapSpanUtils::horiz_expand2x_coaligned_ISSE(dst, src, w);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+extern "C" void vdasm_resize_point32_MMX(const ScaleInfo *);
+extern "C" void vdasm_resize_interp_row_run_MMX(void *dst, const void *src, uint32 width, sint64 xaccum, sint64 x_inc);
+extern "C" void vdasm_resize_interp_col_run_MMX(void *dst, const void *src1, const void *src2, uint32 width, uint32 yaccum);
+extern "C" void vdasm_resize_ccint_row_MMX(void *dst, const void *src, uint32 count, uint32 xaccum, sint32 xinc, const void *tbl);
+extern "C" void vdasm_resize_ccint_col_MMX(void *dst, const void *src1, const void *src2, const void *src3, const void *src4, uint32 count, const void *tbl);
+extern "C" long vdasm_resize_table_col_MMX(uint32 *out, const uint32 *const*in_table, const int *filter, int filter_width, uint32 w, long frac);
+extern "C" long vdasm_resize_table_row_MMX(uint32 *out, const uint32 *in, const int *filter, int filter_width, uint32 w, long accum, long frac);
+
+int VDResamplerSeparablePointRowStageMMX::GetWindowSize() const {return 1;}
+void VDResamplerSeparablePointRowStageMMX::Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx) {
+	ScaleInfo info;
+
+	info.dst = (uint32 *)dst + w;
+	info.src = ((uintptr)src >> 2) + (u>>16);
+	info.accum = u<<16;
+	info.fracinc = dudx << 16;
+	info.intinc = (sint32)dudx >> 16;
+	info.count = -(sint32)w*4;
+
+	vdasm_resize_point32_MMX(&info);
+}
+
+int VDResamplerSeparableLinearRowStageMMX::GetWindowSize() const {return 2;}
+void VDResamplerSeparableLinearRowStageMMX::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+	vdasm_resize_interp_row_run_MMX(dst0, src0, w, (sint64)u << 16, (sint64)dudx << 16);
+}
+
+int VDResamplerSeparableLinearColStageMMX::GetWindowSize() const {return 2;}
+void VDResamplerSeparableLinearColStageMMX::Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase) {
+	vdasm_resize_interp_col_run_MMX(dst0, srcarray[0], srcarray[1], w, phase);
+}
+
+VDResamplerSeparableCubicRowStageMMX::VDResamplerSeparableCubicRowStageMMX(double A)
+	: mFilterBank(1024)
+{
+	sint32 *p = mFilterBank.data();
+	VDResamplerGenerateTable(p, VDResamplerCubicFilter(1.0, A));
+	VDResamplerSwizzleTable(p, 512);
+}
+
+int VDResamplerSeparableCubicRowStageMMX::GetWindowSize() const {return 4;}
+void VDResamplerSeparableCubicRowStageMMX::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+	vdasm_resize_ccint_row_MMX(dst0, src0, w, u, dudx, mFilterBank.data());
+}
+
+VDResamplerSeparableCubicColStageMMX::VDResamplerSeparableCubicColStageMMX(double A)
+	: mFilterBank(1024)
+{
+	sint32 *p = mFilterBank.data();
+	VDResamplerGenerateTable(p, VDResamplerCubicFilter(1.0, A));
+	VDResamplerSwizzleTable(p, 512);
+}
+
+int VDResamplerSeparableCubicColStageMMX::GetWindowSize() const {return 4;}
+void VDResamplerSeparableCubicColStageMMX::Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase) {
+	vdasm_resize_ccint_col_MMX(dst0, srcarray[0], srcarray[1], srcarray[2], srcarray[3], w, mFilterBank.data() + ((phase>>6)&0x3fc));
+}
+
+VDResamplerSeparableTableRowStage8MMX::VDResamplerSeparableTableRowStage8MMX(const IVDResamplerFilter& filter)
+	: VDResamplerRowStageSeparableTable32(filter)
+	, mLastSrcWidth(0)
+	, mLastDstWidth(0)
+	, mLastU(0)
+	, mLastDUDX(0)
+{
+	mAlignedKernelWidth = (GetWindowSize() + 6) & ~3;
+	mAlignedKernelSize = mAlignedKernelWidth + 4;
+}
+
+void VDResamplerSeparableTableRowStage8MMX::Init(const VDResamplerAxis& axis, uint32 srcw) {
+	uint32 w = axis.dx_preclip + axis.dx_active + axis.dx_postclip + axis.dx_dualclip;
+
+	if (mLastSrcWidth != srcw || mLastDstWidth != w || mLastU != axis.u || mLastDUDX != axis.dudx) {
+		mLastSrcWidth	= srcw;
+		mLastDstWidth	= w;
+		mLastU			= axis.u;
+		mLastDUDX		= axis.dudx;
+
+		RedoRowFilters(axis, w, srcw);
+	}
+}
+
+void VDResamplerSeparableTableRowStage8MMX::RedoRowFilters(const VDResamplerAxis& axis, uint32 w, uint32 srcw) {
+	int kstride = mFilterBank.size() >> 8;
+	int ksize = mAlignedKernelWidth;
+	int kesize = mAlignedKernelSize;
+
+	mRowKernels.clear();
+	mRowKernelSize = w * kesize;
+
+	mRowKernels.resize(mRowKernelSize * 4, 0);
+
+	for(int byteOffset = 0; byteOffset < 4; ++byteOffset) {
+		sint16 *dst = mRowKernels.data() + mRowKernelSize * byteOffset;
+		int ksizeThisOffset = std::min<int>(ksize, (byteOffset + srcw + 3) & ~3);
+
+		mKernelSizeByOffset[byteOffset] = ksizeThisOffset;
+
+		sint32 u = axis.u;
+		sint32 uoffmin = -byteOffset;
+		sint32 uoffmax = ((srcw + byteOffset + 3) & ~3) - byteOffset - ksizeThisOffset;
+		for(uint32 i=0; i<w; ++i) {
+			sint32 uoffset = u >> 16;
+			sint32 uoffset2 = ((uoffset + byteOffset) & ~3) - byteOffset;
+
+			if (uoffset2 < uoffmin)
+				uoffset2 = uoffmin;
+
+			if (uoffset2 > uoffmax)
+				uoffset2 = uoffmax;
+
+			VDASSERT(uoffset2 + ksizeThisOffset <= (((sint32)srcw + byteOffset + 3) & ~3));
+
+			*(sint32 *)dst = uoffset2;
+			dst += 2;
+			*dst++ = 0;
+			*dst++ = 0;
+
+			uint32 phase = (u >> 8) & 255;
+			const sint32 *src = &mFilterBank[kstride * phase];
+
+			sint32 start = 0;
+			sint32 end = kstride;
+
+			int dstoffset = uoffset - uoffset2;
+
+			// check for filter kernel overlapping left source boundary
+			if (uoffset < 0)
+				start = -uoffset;
+
+			// check for filter kernel overlapping right source boundary
+			if (uoffset + end > (sint32)srcw)
+				end = srcw - uoffset;
+
+			VDASSERT(dstoffset + start >= 0);
+			VDASSERT(dstoffset + end <= ksizeThisOffset);
+
+			sint16 *dst2 = dst + dstoffset;
+			dst += ksizeThisOffset;
+
+			for(int j=start; j<end; ++j)
+				dst2[j] = src[j];
+
+			if (start > 0)
+				dst2[start] = std::accumulate(src, src+start, dst2[start]);
+
+			if (end < kstride)
+				dst2[end - 1] = std::accumulate(src+end, src+kstride, dst2[end - 1]);
+
+			u += axis.dudx;
+		}
+	}
+
+	// swizzle rows where optimization is possible
+	vdfastvector<sint16> temp;
+
+	int quads = w >> 2;
+	int quadRemainder = w & 3;
+
+	for(int byteOffset = 0; byteOffset < 4; ++byteOffset) {
+		int ksizeThisOffset = mKernelSizeByOffset[byteOffset];
+		int kpairs = ksizeThisOffset >> 2;
+
+		if (ksizeThisOffset < 8 || ksizeThisOffset > 12) {
+			mbQuadOptimizationEnabled[byteOffset] = false;
+		} else {
+			ptrdiff_t unswizzledStride = (ksizeThisOffset >> 1) + 2;
+
+			mbQuadOptimizationEnabled[byteOffset] = true;
+			mTailOffset[byteOffset] = quads * (8 + ksizeThisOffset*4);
+
+			uint32 *dst = (uint32 *)&mRowKernels[mRowKernelSize * byteOffset];
+			temp.resize(mRowKernelSize);
+			memcpy(temp.data(), dst, mRowKernelSize*2);
+
+			const uint32 *src0 = (const uint32 *)temp.data();
+			const uint32 *src1 = src0 + unswizzledStride;
+			const uint32 *src2 = src1 + unswizzledStride;
+			const uint32 *src3 = src2 + unswizzledStride;
+			ptrdiff_t srcskip = unswizzledStride * 3;
+
+			for(int q = 0; q < quads; ++q) {
+				dst[0] = src0[0];
+				dst[1] = src1[0];
+				dst[2] = src2[0];
+				dst[3] = src3[0];
+				src0 += 2;
+				src1 += 2;
+				src2 += 2;
+				src3 += 2;
+				dst += 4;
+
+				for(int p = 0; p < kpairs; ++p) {
+					dst[0] = src0[0];
+					dst[1] = src0[1];
+					dst[2] = src1[0];
+					dst[3] = src1[1];
+					dst[4] = src2[0];
+					dst[5] = src2[1];
+					dst[6] = src3[0];
+					dst[7] = src3[1];
+					dst += 8;
+					src0 += 2;
+					src1 += 2;
+					src2 += 2;
+					src3 += 2;
+				}
+
+				src0 += srcskip;
+				src1 += srcskip;
+				src2 += srcskip;
+				src3 += srcskip;
+			}
+
+			memcpy(dst, src0, unswizzledStride * 4 * quadRemainder);
+
+			VDASSERT(dst + unswizzledStride * quadRemainder <= (void *)(mRowKernels.data() + (mRowKernelSize * (byteOffset + 1))));
+		}
+	}
+}
+
+void __declspec(naked) vdasm_resize_table_row_8_k8_4x_MMX(void *dst, const void *src, uint32 width, const void *kernel) {
+	static const __declspec(align(8)) __int64 kRound = 0x0000000000002000;
+	__asm {
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		pxor		mm7, mm7
+		movq		mm6, kRound
+
+		mov			ebp, [esp +  4 + 16]		;ebp = dst
+		mov			esi, [esp + 12 + 16]		;esi = width
+		mov			edi, [esp + 16 + 16]		;edi = kernel
+yloop:
+		;eax = temp
+		;ebx = temp
+		;ecx = temp
+		;edx = temp
+		;esi = horiz counter
+		;edi = filter list
+		;ebp = destination
+
+		mov			eax, [edi+0]
+		mov			ebx, [edi+4]
+		mov			ecx, [edi+8]
+		mov			edx, [esp+8+16]
+		add			eax, edx
+		add			ebx, edx
+		add			ecx, edx
+		add			edx, [edi+12]
+
+		movd		mm0, [eax]
+		punpcklbw	mm0, mm7
+
+		pmaddwd		mm0, [edi+16]
+		movd		mm1, [ebx]
+		punpcklbw	mm1, mm7
+
+		pmaddwd		mm1, [edi+24]
+		movd		mm2, [ecx]
+		punpcklbw	mm2, mm7
+
+		pmaddwd		mm2, [edi+32]
+		movd		mm3, [edx]
+		punpcklbw	mm3, mm7
+
+		pmaddwd		mm3, [edi+40]
+		movd		mm4, [eax+4]
+		paddd		mm0, mm6
+
+		movd		mm5, [ebx+4]
+		punpcklbw	mm4, mm7
+		paddd		mm1, mm6
+
+		pmaddwd		mm4, [edi+48]
+		punpcklbw	mm5, mm7
+		paddd		mm2, mm6
+
+		pmaddwd		mm5, [edi+56]
+		paddd		mm3, mm6
+		paddd		mm0, mm4
+
+		paddd		mm1, mm5
+		movd		mm4, [ecx+4]
+		punpcklbw	mm4, mm7
+
+		pmaddwd		mm4, [edi+64]
+		movd		mm5, [edx+4]
+		punpcklbw	mm5, mm7
+
+		pmaddwd		mm5, [edi+72]
+		paddd		mm2, mm4
+		paddd		mm3, mm5
+
+		movq		mm4, mm0
+		punpckldq	mm0, mm1
+		movq		mm5, mm2
+		punpckldq	mm2, mm3
+		punpckhdq	mm4, mm1
+		punpckhdq	mm5, mm3
+		paddd		mm0, mm4
+		paddd		mm2, mm5
+		psrad		mm0, 14
+		psrad		mm2, 14
+
+		packssdw	mm0, mm2
+		packuswb	mm0, mm0
+
+		add			edi, 80
+
+		movd		[ebp], mm0
+		add			ebp, 4
+		sub			esi, 1
+		jne			yloop
+
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+	}
+}
+
+void __declspec(naked) vdasm_resize_table_row_8_k12_4x_MMX(void *dst, const void *src, uint32 width, const void *kernel) {
+	static const __declspec(align(8)) __int64 kRound = 0x0000200000002000;
+	__asm {
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		pxor		mm7, mm7
+		movq		mm6, kRound
+
+		mov			ebp, [esp +  4 + 16]		;ebp = dst
+		mov			esi, [esp + 12 + 16]		;esi = width
+		mov			edi, [esp + 16 + 16]		;edi = kernel
+yloop:
+		;eax = temp
+		;ebx = temp
+		;ecx = temp
+		;edx = temp
+		;esi = horiz counter
+		;edi = filter list
+		;ebp = destination
+
+		mov			eax, [edi+0]
+		mov			ebx, [edi+4]
+		mov			ecx, [edi+8]
+		mov			edx, [esp+8+16]
+		add			eax, edx
+		add			ebx, edx
+		add			ecx, edx
+		add			edx, [edi+12]
+
+		movd		mm0, [eax]
+		punpcklbw	mm0, mm7
+
+		pmaddwd		mm0, [edi+16]
+		movd		mm1, [ebx]
+		punpcklbw	mm1, mm7
+
+		pmaddwd		mm1, [edi+24]
+		movd		mm2, [ecx]
+		punpcklbw	mm2, mm7
+
+		pmaddwd		mm2, [edi+32]
+		movd		mm3, [edx]
+		punpcklbw	mm3, mm7
+
+		pmaddwd		mm3, [edi+40]
+		movd		mm4, [eax+4]
+		punpcklbw	mm4, mm7
+
+		pmaddwd		mm4, [edi+48]
+		movd		mm5, [ebx+4]
+		punpcklbw	mm5, mm7
+
+		pmaddwd		mm5, [edi+56]
+		paddd		mm0, mm4
+		paddd		mm1, mm5
+
+		movd		mm4, [ecx+4]
+		punpcklbw	mm4, mm7
+		movd		mm5, [edx+4]
+
+		pmaddwd		mm4, [edi+64]
+		punpcklbw	mm5, mm7
+		paddd		mm2, mm4
+
+		pmaddwd		mm5, [edi+72]
+		movd		mm4, [eax+8]
+		punpcklbw	mm4, mm7
+
+		paddd		mm3, mm5
+		movd		mm5, [ebx+8]
+		punpcklbw	mm5, mm7
+
+		pmaddwd		mm4, [edi+80]
+		paddd		mm0, mm4
+		movd		mm4, [ecx+8]
+
+		pmaddwd		mm5, [edi+88]
+		paddd		mm1, mm5
+		punpcklbw	mm4, mm7
+
+		pmaddwd		mm4, [edi+96]
+		movd		mm5, [edx+8]
+		punpcklbw	mm5, mm7
+
+		pmaddwd		mm5, [edi+104]
+		paddd		mm2, mm4
+		paddd		mm3, mm5
+
+		movq		mm4, mm0
+		punpckldq	mm0, mm1
+		movq		mm5, mm2
+		punpckldq	mm2, mm3
+		punpckhdq	mm4, mm1
+		punpckhdq	mm5, mm3
+		paddd		mm0, mm4
+		paddd		mm2, mm5
+		paddd		mm0, mm6
+		paddd		mm2, mm6
+		psrad		mm0, 14
+		psrad		mm2, 14
+
+		packssdw	mm0, mm2
+		packuswb	mm0, mm0
+
+		add			edi, 112
+
+		movd		[ebp], mm0
+		add			ebp, 4
+		sub			esi, 1
+		jne			yloop
+
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+	}
+}
+
+void __declspec(naked) vdasm_resize_table_row_8_MMX(void *dst, const void *src, uint32 width, const void *kernel, uint32 kwidth) {
+	static const __declspec(align(8)) __int64 kRound = 0x0000000000002000;
+	__asm {
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		pxor		mm7, mm7
+		movq		mm6, kRound
+
+		mov			edi, [esp +  4 + 16]		;edi = dst
+		mov			ebx, [esp +  8 + 16]		;ebx = src
+		mov			ebp, [esp + 12 + 16]		;ebp = width
+		mov			edx, [esp + 16 + 16]		;edx = kernel
+yloop:
+		;eax = temp
+		;ebx = source base address
+		;ecx = (temp) source
+		;edx = filter list
+		;esi = (temp) kernel width
+		;edi = destination
+		;ebp = horiz counter
+
+		mov			eax, [edx]
+		add			edx, 8
+		lea			ecx, [ebx + eax]
+		mov			esi, [esp + 20 + 16]		;esi = kernel width
+
+		movq		mm2, mm6
+xloop:
+		movd		mm0, [ecx]
+		punpcklbw	mm0, mm7
+		add			ecx, 4
+		pmaddwd		mm0, [edx]
+		paddd		mm2, mm0
+		add			edx, 8
+		sub			esi, 4
+		jne			xloop
+
+		punpckldq	mm0, mm2
+		paddd		mm0, mm2
+		psrad		mm0, 14
+		psrlq		mm0, 32
+		packssdw	mm0, mm0
+		packuswb	mm0, mm0
+		movd		eax, mm0
+		mov			[edi], al
+		add			edi, 1
+		sub			ebp, 1
+		jne			yloop
+
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+	}
+}
+
+void VDResamplerSeparableTableRowStage8MMX::Process(void *dst, const void *src, uint32 w) {
+	int byteOffset = (int)(ptrdiff_t)src & 3;
+	const sint16 *ksrc = &mRowKernels[mRowKernelSize * byteOffset];
+#if 0
+	int kwidth = mAlignedKernelWidth;
+	uint8 *dst2 = (uint8 *)dst;
+
+	do {
+		int offset = ksrc[0];
+		ksrc += 4;
+		
+		const uint8 *src2 = (const uint8 *)src + offset;
+		sint32 accum = 0x8000;
+		for(int i=0; i<kwidth; ++i) {
+			accum += (sint32)src2[i] * (*ksrc++);
+		}
+
+		accum >>= 14;
+
+		accum &= ~(accum >> 31);
+		accum |= (255 - accum) >> 31;
+
+		*dst2++ = (uint8)accum;
+
+	} while(--w);
+#else
+	int ksize = mKernelSizeByOffset[byteOffset];
+	if (mbQuadOptimizationEnabled[byteOffset]) {
+		if (w >= 4) {
+			if (ksize == 12) {
+				vdasm_resize_table_row_8_k12_4x_MMX(dst, src, w >> 2, ksrc);
+
+#if 0
+				int w4 = w >> 2;
+				uint8 *dst2 = (uint8 *)dst;
+				const uint8 *src2 = (const uint8 *)src;
+				const sint16 *ksrc2 = ksrc;
+
+				do {
+					int off0 = ksrc2[0];
+					int off1 = ksrc2[2];
+					int off2 = ksrc2[4];
+					int off3 = ksrc2[6];
+					const uint8 *d0 = src2 + off0;
+					const uint8 *d1 = src2 + off1;
+					const uint8 *d2 = src2 + off2;
+					const uint8 *d3 = src2 + off3;
+
+					int acc0 = 0;
+					int acc1 = 0;
+					int acc2 = 0;
+					int acc3 = 0;
+
+					acc0 += d0[ 0]*ksrc2[   8]
+						  + d0[ 1]*ksrc2[   9]
+						  + d0[ 2]*ksrc2[  10]
+						  + d0[ 3]*ksrc2[  11]
+						  + d0[ 4]*ksrc2[  24]
+						  + d0[ 5]*ksrc2[  25]
+						  + d0[ 6]*ksrc2[  26]
+						  + d0[ 7]*ksrc2[  27]
+						  + d0[ 8]*ksrc2[  40]
+						  + d0[ 9]*ksrc2[  41]
+						  + d0[10]*ksrc2[  42]
+						  + d0[11]*ksrc2[  43];
+
+					acc0 = (acc0 + 0x2000) >> 14;
+					if (acc0 < 0) acc0 = 0; else if (acc0 > 255) acc0 = 255;
+
+					acc1 += d1[ 0]*ksrc2[  12]
+						  + d1[ 1]*ksrc2[  13]
+						  + d1[ 2]*ksrc2[  14]
+						  + d1[ 3]*ksrc2[  15]
+						  + d1[ 4]*ksrc2[  28]
+						  + d1[ 5]*ksrc2[  29]
+						  + d1[ 6]*ksrc2[  30]
+						  + d1[ 7]*ksrc2[  31]
+						  + d1[ 8]*ksrc2[  44]
+						  + d1[ 9]*ksrc2[  45]
+						  + d1[10]*ksrc2[  46]
+						  + d1[11]*ksrc2[  47];
+
+					acc1 = (acc1 + 0x2000) >> 14;
+					if (acc1 < 0) acc1 = 0; else if (acc1 > 255) acc1 = 255;
+
+					acc2 += d2[ 0]*ksrc2[  16]
+						  + d2[ 1]*ksrc2[  17]
+						  + d2[ 2]*ksrc2[  18]
+						  + d2[ 3]*ksrc2[  19]
+						  + d2[ 4]*ksrc2[  32]
+						  + d2[ 5]*ksrc2[  33]
+						  + d2[ 6]*ksrc2[  34]
+						  + d2[ 7]*ksrc2[  35]
+						  + d2[ 8]*ksrc2[  48]
+						  + d2[ 9]*ksrc2[  49]
+						  + d2[10]*ksrc2[  50]
+						  + d2[11]*ksrc2[  51];
+
+					acc2 = (acc2 + 0x2000) >> 14;
+					if (acc2 < 0) acc2 = 0; else if (acc2 > 255) acc2 = 255;
+
+					acc3 += d3[ 0]*ksrc2[  20]
+						  + d3[ 1]*ksrc2[  21]
+						  + d3[ 2]*ksrc2[  22]
+						  + d3[ 3]*ksrc2[  23]
+						  + d3[ 4]*ksrc2[  36]
+						  + d3[ 5]*ksrc2[  37]
+						  + d3[ 6]*ksrc2[  38]
+						  + d3[ 7]*ksrc2[  39]
+						  + d3[ 8]*ksrc2[  52]
+						  + d3[ 9]*ksrc2[  53]
+						  + d3[10]*ksrc2[  54]
+						  + d3[11]*ksrc2[  55];
+
+					acc3 = (acc3 + 0x2000) >> 14;
+					if (acc3 < 0) acc3 = 0; else if (acc3 > 255) acc3 = 255;
+
+					ksrc2 += 56;
+
+					dst2[0] = (uint8)acc0;
+					dst2[1] = (uint8)acc1;
+					dst2[2] = (uint8)acc2;
+					dst2[3] = (uint8)acc3;
+					dst2 += 4;
+				} while(--w4);
+#endif
+			} else
+				vdasm_resize_table_row_8_k8_4x_MMX(dst, src, w >> 2, ksrc);
+		}
+
+		if (w & 3)
+			vdasm_resize_table_row_8_MMX((char *)dst + (w & ~3), src, w & 3, ksrc + mTailOffset[byteOffset], ksize);
+	} else {
+		vdasm_resize_table_row_8_MMX(dst, src, w, ksrc, ksize);
+	}
+#endif
+}
+
+void VDResamplerSeparableTableRowStage8MMX::Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx) {
+	vdasm_resize_table_row_MMX((uint32 *)dst, (const uint32 *)src, (const int *)mFilterBank.data(), (int)mFilterBank.size() >> 8, w, u, dudx);
+}
+
+VDResamplerSeparableTableRowStageMMX::VDResamplerSeparableTableRowStageMMX(const IVDResamplerFilter& filter)
+	: VDResamplerRowStageSeparableTable32(filter)
+{
+	VDResamplerSwizzleTable(mFilterBank.data(), (unsigned)mFilterBank.size() >> 1);
+}
+
+void VDResamplerSeparableTableRowStageMMX::Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx) {
+	vdasm_resize_table_row_MMX((uint32 *)dst, (const uint32 *)src, (const int *)mFilterBank.data(), (int)mFilterBank.size() >> 8, w, u, dudx);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDResamplerSeparableTableColStage8MMX::VDResamplerSeparableTableColStage8MMX(const IVDResamplerFilter& filter)
+	: VDResamplerColStageSeparableTable8(filter)
+{
+	VDResamplerSwizzleTable(mFilterBank.data(), (unsigned)mFilterBank.size() >> 1);
+}
+
+void __declspec(naked) vdasm_resize_table_col_8_k2_MMX(void *dst, const void *const *srcs, uint32 width, const void *kernel) {
+	static const __declspec(align(8)) __int64 kRound = 0x0000200000002000;
+
+	__asm {
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		pxor		mm7, mm7
+		movq		mm6, kRound
+
+		mov			esi, [esp +  4 + 16]		;esi = dst
+		mov			edi, [esp + 16 + 16]		;edi = kernel
+		mov			ebp, [esp + 12 + 16]		;ebp = width
+
+		movq		mm5, [edi]
+
+		mov			edx, [esp +  8 + 16]		;ebx = srcs
+		mov			eax, [edx+0]
+		mov			ebx, [edx+4]
+		add			eax, ebp
+		add			ebx, ebp
+		neg			ebp
+yloop:
+		;eax = row0
+		;ebx = row1
+		;ecx =
+		;edx =
+		;edi = kernel
+		;esi = dest
+		;ebp = width counter
+
+		movd		mm0, [eax+ebp]
+		punpcklbw	mm0, mm7
+		movd		mm2, [ebx+ebp]
+		punpcklbw	mm2, mm7
+		movq		mm1, mm0
+		punpcklwd	mm0, mm2
+		punpckhwd	mm1, mm2
+		pmaddwd		mm0, mm5
+		pmaddwd		mm1, mm5
+
+		paddd		mm0, mm6
+		paddd		mm1, mm6
+
+		psrad		mm0, 14
+		psrad		mm1, 14
+		packssdw	mm0, mm1
+		packuswb	mm0, mm0
+		movd		[esi], mm0
+		add			esi, 4
+		add			ebp, 4
+		jne			yloop
+
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+	}
+}
+
+void __declspec(naked) vdasm_resize_table_col_8_k4_MMX(void *dst, const void *const *srcs, uint32 width, const void *kernel) {
+	static const __declspec(align(8)) __int64 kRound = 0x0000200000002000;
+
+	__asm {
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		pxor		mm7, mm7
+		movq		mm6, kRound
+
+		mov			esi, [esp +  4 + 16]		;esi = dst
+		mov			edi, [esp + 16 + 16]		;edi = kernel
+		xor			ebp, ebp
+
+		mov			edx, [esp +  8 + 16]		;ebx = srcs
+		mov			eax, [edx+0]
+		mov			ebx, [edx+4]
+		mov			ecx, [edx+8]
+		mov			edx, [edx+12]
+yloop:
+		;eax = row0
+		;ebx = row1
+		;ecx = row2
+		;edx = row3
+		;edi = kernel
+		;esi = dest
+		;ebp = width counter
+
+		movd		mm0, [eax+ebp]
+		punpcklbw	mm0, mm7
+		movd		mm2, [ebx+ebp]
+		punpcklbw	mm2, mm7
+		movq		mm1, mm0
+		punpcklwd	mm0, mm2
+		movq		mm5, [edi]
+		punpckhwd	mm1, mm2
+		pmaddwd		mm0, mm5
+		pmaddwd		mm1, mm5
+
+		paddd		mm0, mm6
+		paddd		mm1, mm6
+
+		movd		mm3, [ecx+ebp]
+		punpcklbw	mm3, mm7
+		movd		mm2, [edx+ebp]
+		punpcklbw	mm2, mm7
+		movq		mm4, mm3
+		punpcklwd	mm3, mm2
+		movq		mm5, [edi+8]
+		punpckhwd	mm4, mm2
+		pmaddwd		mm3, mm5
+		pmaddwd		mm4, mm5
+
+		paddd		mm0, mm3
+		paddd		mm1, mm4
+
+		psrad		mm0, 14
+		psrad		mm1, 14
+		packssdw	mm0, mm1
+		packuswb	mm0, mm0
+		add			ebp, 4
+		movd		[esi], mm0
+		add			esi, 4
+		cmp			ebp, [esp + 12 + 16]
+		jb			yloop
+
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+	}
+}
+
+void __declspec(naked) vdasm_resize_table_col_8_MMX(void *dst, const void *const *srcs, uint32 width, const void *kernel, uint32 kwidth) {
+	static const __declspec(align(8)) __int64 kRound = 0x0000200000002000;
+
+	__asm {
+		push		ebp
+		push		edi
+		push		esi
+		push		ebx
+
+		pxor		mm7, mm7
+		movq		mm6, kRound
+
+		mov			edi, [esp +  4 + 16]		;edi = dst
+		xor			ebp, ebp
+yloop:
+		mov			edx, [esp + 16 + 16]		;edx = kernel
+		mov			ebx, [esp +  8 + 16]		;ebx = srcs
+		mov			esi, [esp + 20 + 16]		;esi = kwidth
+		movq		mm3, mm6
+		movq		mm4, mm6
+xloop:
+		mov			ecx, [ebx]
+		movd		mm0, [ecx+ebp]
+		punpcklbw	mm0, mm7
+		mov			ecx, [ebx+4]
+		movd		mm2, [ecx+ebp]
+		punpcklbw	mm2, mm7
+		movq		mm1, mm0
+		punpcklwd	mm0, mm2
+		punpckhwd	mm1, mm2
+		movq		mm5, [edx]
+		pmaddwd		mm0, mm5
+		pmaddwd		mm1, mm5
+
+		paddd		mm3, mm0
+		paddd		mm4, mm1
+		add			ebx, 8
+		add			edx, 8
+		sub			esi, 2
+		jne			xloop
+
+		psrad		mm3, 14
+		psrad		mm4, 14
+		packssdw	mm3, mm4
+		packuswb	mm3, mm3
+		movd		[edi], mm3
+		add			edi, 4
+		add			ebp, 4
+		cmp			ebp, [esp + 12 + 16]
+		jb			yloop
+
+		pop			ebx
+		pop			esi
+		pop			edi
+		pop			ebp
+		ret
+	}
+}
+
+void VDResamplerSeparableTableColStage8MMX::Process(void *dst0, const void *const *src0, uint32 w, sint32 phase) {
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *const *src = (const uint8 *const *)src0;
+	const unsigned ksize = (unsigned)mFilterBank.size() >> 8;
+	const sint16 *filter = (const sint16 *)&mFilterBank[((phase>>8)&0xff) * ksize];
+
+	int w4 = w & ~3;
+
+	if (w4) {
+		switch(ksize) {
+			case 2:
+				vdasm_resize_table_col_8_k2_MMX(dst, (const void *const *)src, w4, filter);
+				break;
+
+			case 4:
+				vdasm_resize_table_col_8_k4_MMX(dst, (const void *const *)src, w4, filter);
+				break;
+
+			default:
+				vdasm_resize_table_col_8_MMX(dst, (const void *const *)src, w4, filter, ksize);
+				break;
+		}
+	}
+
+	for(uint32 i=w4; i<w; ++i) {
+		int b = 0x2000;
+		const sint16 *filter2 = filter;
+		const uint8 *const *src2 = src;
+
+		for(unsigned j = ksize; j; j -= 2) {
+			sint32 p0 = (*src2++)[i];
+			sint32 p1 = (*src2++)[i];
+			sint32 coeff0 = filter2[0];
+			sint32 coeff1 = filter2[1];
+			filter2 += 4;
+
+			b += p0*coeff0;
+			b += p1*coeff1;
+		}
+
+		b >>= 14;
+
+		if ((uint32)b >= 0x00000100)
+			b = ~b >> 31;
+
+		dst[i] = (uint8)b;
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDResamplerSeparableTableColStageMMX::VDResamplerSeparableTableColStageMMX(const IVDResamplerFilter& filter)
+	: VDResamplerColStageSeparableTable32(filter)
+{
+	VDResamplerSwizzleTable(mFilterBank.data(), (unsigned)mFilterBank.size() >> 1);
+}
+
+void VDResamplerSeparableTableColStageMMX::Process(void *dst, const void *const *src, uint32 w, sint32 phase) {
+	vdasm_resize_table_col_MMX((uint32*)dst, (const uint32 *const *)src, (const int *)mFilterBank.data(), (int)mFilterBank.size() >> 8, w, (phase >> 8) & 0xff);
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (SSE2, x86)
+//
+///////////////////////////////////////////////////////////////////////////
+
+extern "C" long vdasm_resize_table_col_SSE2(uint32 *out, const uint32 *const*in_table, const int *filter, int filter_width, uint32 w, long frac);
+extern "C" long vdasm_resize_table_row_SSE2(uint32 *out, const uint32 *in, const int *filter, int filter_width, uint32 w, long accum, long frac);
+extern "C" void vdasm_resize_ccint_col_SSE2(void *dst, const void *src1, const void *src2, const void *src3, const void *src4, uint32 count, const void *tbl);
+
+VDResamplerSeparableCubicColStageSSE2::VDResamplerSeparableCubicColStageSSE2(double A)
+	: VDResamplerSeparableCubicColStageMMX(A)
+{
+}
+
+void VDResamplerSeparableCubicColStageSSE2::Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase) {
+	vdasm_resize_ccint_col_SSE2(dst0, srcarray[0], srcarray[1], srcarray[2], srcarray[3], w, mFilterBank.data() + ((phase>>6)&0x3fc));
+}
+
+VDResamplerSeparableTableRowStageSSE2::VDResamplerSeparableTableRowStageSSE2(const IVDResamplerFilter& filter)
+	: VDResamplerSeparableTableRowStageMMX(filter)
+{
+}
+
+void VDResamplerSeparableTableRowStageSSE2::Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx) {
+	vdasm_resize_table_row_MMX((uint32 *)dst, (const uint32 *)src, (const int *)mFilterBank.data(), (int)mFilterBank.size() >> 8, w, u, dudx);
+}
+
+VDResamplerSeparableTableColStageSSE2::VDResamplerSeparableTableColStageSSE2(const IVDResamplerFilter& filter)
+	: VDResamplerSeparableTableColStageMMX(filter)
+{
+}
+
+void VDResamplerSeparableTableColStageSSE2::Process(void *dst, const void *const *src, uint32 w, sint32 phase) {
+	vdasm_resize_table_col_SSE2((uint32*)dst, (const uint32 *const *)src, (const int *)mFilterBank.data(), (int)mFilterBank.size() >> 8, w, (phase >> 8) & 0xff);
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (SSE4.1, x86)
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDResamplerSeparableTableRowStage8SSE41::VDResamplerSeparableTableRowStage8SSE41(const IVDResamplerFilter& filter)
+	: VDResamplerRowStageSeparableTable32(filter)
+	, mLastSrcWidth(0)
+	, mLastDstWidth(0)
+	, mLastU(0)
+	, mLastDUDX(0)
+{
+	mAlignedKernelWidth = (GetWindowSize() + 15) & ~7;
+	mAlignedKernelSize = mAlignedKernelWidth + 16;
+}
+
+void VDResamplerSeparableTableRowStage8SSE41::Init(const VDResamplerAxis& axis, uint32 srcw) {
+	uint32 w = axis.dx_preclip + axis.dx_active + axis.dx_postclip + axis.dx_dualclip;
+
+	if (mLastSrcWidth != srcw || mLastDstWidth != w || mLastU != axis.u || mLastDUDX != axis.dudx) {
+		mLastSrcWidth	= srcw;
+		mLastDstWidth	= w;
+		mLastU			= axis.u;
+		mLastDUDX		= axis.dudx;
+
+		RedoRowFilters(axis, w, srcw);
+	}
+}
+
+void VDResamplerSeparableTableRowStage8SSE41::RedoRowFilters(const VDResamplerAxis& axis, uint32 w, uint32 srcw) {
+	int kstride = mFilterBank.size() >> 8;
+	int ksize = mAlignedKernelWidth;
+	int kesize = mAlignedKernelSize;
+
+	mRowKernels.clear();
+	mRowKernelSize = w * kesize;
+
+	mRowKernels.resize(mRowKernelSize * 8, 0);
+
+	for(int byteOffset = 0; byteOffset < 8; ++byteOffset) {
+		sint16 *dst = mRowKernels.data() + mRowKernelSize * byteOffset;
+		int ksizeThisOffset = std::min<int>(ksize, (byteOffset + srcw + 7) & ~7);
+
+		mKernelSizeByOffset[byteOffset] = ksizeThisOffset;
+
+		sint32 u = axis.u;
+		sint32 uoffmin = -byteOffset;
+		sint32 uoffmax = ((srcw + byteOffset + 7) & ~7) - byteOffset - ksizeThisOffset;
+		for(uint32 i=0; i<w; ++i) {
+			sint32 uoffset = u >> 16;
+			sint32 uoffset2 = ((uoffset + byteOffset) & ~7) - byteOffset;
+
+			if (uoffset2 < uoffmin)
+				uoffset2 = uoffmin;
+
+			if (uoffset2 > uoffmax)
+				uoffset2 = uoffmax;
+
+			*(sint32 *)dst = uoffset2;
+			dst += 2;
+			*dst++ = 0;
+			*dst++ = 0;
+			*dst++ = 0;
+			*dst++ = 0;
+			*dst++ = 0;
+			*dst++ = 0;
+
+			uint32 phase = (u >> 8) & 255;
+			const sint32 *src = &mFilterBank[kstride * phase];
+
+			sint32 start = 0;
+			sint32 end = kstride;
+
+			int dstoffset = uoffset - uoffset2;
+
+			// check for filter kernel overlapping left source boundary
+			if (uoffset < 0)
+				start = -uoffset;
+
+			// check for filter kernel overlapping right source boundary
+			if (uoffset + end > (sint32)srcw)
+				end = srcw - uoffset;
+
+			VDASSERT(dstoffset + start >= 0);
+			VDASSERT(dstoffset + end <= ksizeThisOffset);
+
+			sint16 *dst2 = dst + dstoffset;
+			dst += ksizeThisOffset;
+
+			for(int j=start; j<end; ++j)
+				dst2[j] = src[j];
+
+			if (start > 0)
+				dst2[start] = std::accumulate(src, src+start, dst2[start]);
+
+			if (end < kstride)
+				dst2[end - 1] = std::accumulate(src+end, src+kstride, dst2[end - 1]);
+
+			u += axis.dudx;
+		}
+	}
+
+	// swizzle rows where optimization is possible
+	vdfastvector<sint16> temp;
+
+	int quads = w >> 2;
+	int quadRemainder = w & 3;
+
+	for(int byteOffset = 0; byteOffset < 8; ++byteOffset) {
+		int ksizeThisOffset = mKernelSizeByOffset[byteOffset];
+		int kpairs = ksizeThisOffset >> 3;
+
+		if (ksizeThisOffset < 8 || ksizeThisOffset > 16) {
+			mbQuadOptimizationEnabled[byteOffset] = false;
+		} else {
+			ptrdiff_t unswizzledStride = (ksizeThisOffset >> 1) + 4;
+
+			mbQuadOptimizationEnabled[byteOffset] = true;
+			mTailOffset[byteOffset] = quads * (8 + ksizeThisOffset*4);
+
+			uint32 *dst = (uint32 *)&mRowKernels[mRowKernelSize * byteOffset];
+			temp.resize(mRowKernelSize);
+			memcpy(temp.data(), dst, mRowKernelSize*2);
+
+			const uint32 *src0 = (const uint32 *)temp.data();
+			const uint32 *src1 = src0 + unswizzledStride;
+			const uint32 *src2 = src1 + unswizzledStride;
+			const uint32 *src3 = src2 + unswizzledStride;
+			ptrdiff_t srcskip = unswizzledStride * 3;
+
+			for(int q = 0; q < quads; ++q) {
+				dst[0] = src0[0];
+				dst[1] = src1[0];
+				dst[2] = src2[0];
+				dst[3] = src3[0];
+				src0 += 4;
+				src1 += 4;
+				src2 += 4;
+				src3 += 4;
+				dst += 4;
+
+				for(int p = 0; p < kpairs; ++p) {
+					dst[ 0] = src0[0];
+					dst[ 1] = src0[1];
+					dst[ 2] = src0[2];
+					dst[ 3] = src0[3];
+					dst[ 4] = src1[0];
+					dst[ 5] = src1[1];
+					dst[ 6] = src1[2];
+					dst[ 7] = src1[3];
+					dst[ 8] = src2[0];
+					dst[ 9] = src2[1];
+					dst[10] = src2[2];
+					dst[11] = src2[3];
+					dst[12] = src3[0];
+					dst[13] = src3[1];
+					dst[14] = src3[2];
+					dst[15] = src3[3];
+					dst += 16;
+					src0 += 4;
+					src1 += 4;
+					src2 += 4;
+					src3 += 4;
+				}
+
+				src0 += srcskip;
+				src1 += srcskip;
+				src2 += srcskip;
+				src3 += srcskip;
+			}
+
+			memcpy(dst, src0, unswizzledStride * 4 * quadRemainder);
+		}
+	}
+}
+
+void VDResamplerSeparableTableRowStage8SSE41::Process(void *dst, const void *src, uint32 w) {
+	int byteOffset = (int)(ptrdiff_t)src & 7;
+	const sint16 *ksrc = &mRowKernels[mRowKernelSize * byteOffset];
+
+	int ksize = mKernelSizeByOffset[byteOffset];
+	if (mbQuadOptimizationEnabled[byteOffset]) {
+		if (w >= 4) {
+			if (ksize == 16)
+				vdasm_resize_table_row_8_k16_4x_SSE41(dst, src, w >> 2, ksrc);
+			else
+				vdasm_resize_table_row_8_k8_4x_SSE41(dst, src, w >> 2, ksrc);
+		}
+
+		if (w & 3)
+			vdasm_resize_table_row_8_SSE41((char *)dst + (w & ~3), src, w & 3, ksrc + mTailOffset[byteOffset], ksize);
+	} else {
+		vdasm_resize_table_row_8_SSE41(dst, src, w, ksrc, ksize);
+	}
+}
+
+void VDResamplerSeparableTableRowStage8SSE41::Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx) {
+	vdasm_resize_table_row_MMX((uint32 *)dst, (const uint32 *)src, (const int *)mFilterBank.data(), (int)mFilterBank.size() >> 8, w, u, dudx);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDResamplerSeparableTableColStage8SSE41::VDResamplerSeparableTableColStage8SSE41(const IVDResamplerFilter& filter)
+	: VDResamplerColStageSeparableTable8(filter)
+{
+	VDResamplerSwizzleTable(mFilterBank.data(), (unsigned)mFilterBank.size() >> 1);
+}
+
+void VDResamplerSeparableTableColStage8SSE41::Process(void *dst0, const void *const *src0, uint32 w, sint32 phase) {
+	uint8 *dst = (uint8 *)dst0;
+	const uint8 *const *src = (const uint8 *const *)src0;
+	const unsigned ksize = (unsigned)mFilterBank.size() >> 8;
+	const sint16 *filter = (const sint16 *)&mFilterBank[((phase>>8)&0xff) * ksize];
+
+	int w4 = w & ~3;
+
+	if (w4) {
+		switch(ksize) {
+			case 2:
+				vdasm_resize_table_col_8_k2_SSE41(dst, (const void *const *)src, w4, filter);
+				break;
+
+			case 4:
+				vdasm_resize_table_col_8_k4_SSE41(dst, (const void *const *)src, w4, filter);
+				break;
+
+			default:
+				vdasm_resize_table_col_8_MMX(dst, (const void *const *)src, w4, filter, ksize);
+				break;
+		}
+	}
+
+	for(uint32 i=w4; i<w; ++i) {
+		int b = 0x2000;
+		const sint16 *filter2 = filter;
+		const uint8 *const *src2 = src;
+
+		for(unsigned j = ksize; j; j -= 2) {
+			sint32 p0 = (*src2++)[i];
+			sint32 p1 = (*src2++)[i];
+			sint32 coeff0 = filter2[0];
+			sint32 coeff1 = filter2[1];
+			filter2 += 4;
+
+			b += p0*coeff0;
+			b += p1*coeff1;
+		}
+
+		b >>= 14;
+
+		if ((uint32)b >= 0x00000100)
+			b = ~b >> 31;
+
+		dst[i] = (uint8)b;
+	}
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/stretchblt_reference.cpp b/src/thirdparty/VirtualDub/Kasumi/source/stretchblt_reference.cpp
new file mode 100644
index 000000000..3afdec910
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/stretchblt_reference.cpp
@@ -0,0 +1,816 @@
+#include <vd2/system/memory.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+
+namespace {
+	struct VDPixmapReferenceStretchBltParameters {
+		void *dst;
+		ptrdiff_t	dstpitch;
+		const void *src;
+		ptrdiff_t	srcpitch;
+		ptrdiff_t	srcinc;
+		sint32		dx;
+		sint32		dy;
+		uint32		u;
+		uint32		uinc;
+		uint32		dudx;
+		uint32		v;
+		uint32		dvdy;
+		sint32		xprecopy;
+		sint32		xpostcopy;
+		ptrdiff_t	xprepos;
+		ptrdiff_t	xpostpos;
+
+		void advance() {
+			dst = (char *)dst + dstpitch;
+			src = (char *)src + srcinc;
+
+			uint32 vt = v + dvdy;
+
+			if (vt < v)
+				src = (char *)src + srcpitch;
+
+			v = vt;
+		}
+	};
+}
+
+void VDPixmapStretchBlt_Any8_nearest_reference(VDPixmapReferenceStretchBltParameters params) {
+	do {
+		uint8 *dstp = (uint8 *)params.dst;
+		const uint8 *srcp = (const uint8 *)params.src;
+		uint32 u = params.u;
+
+		if (params.xprecopy) {
+			VDMemset8(dstp, *(const uint8 *)((const char *)params.src + params.xprepos), params.xprecopy);
+			dstp += params.xprecopy;
+		}
+
+		sint32 wt = params.dx;
+
+		if (wt > 0)
+			do {
+				*dstp++ = *srcp;
+
+				uint32 ut = u + params.dudx;
+				srcp += ut<u;
+				srcp += params.uinc;
+				u = ut;
+			} while(--wt);
+
+		if (params.xpostcopy)
+			VDMemset8(dstp, *(const uint8 *)((const char *)params.src + params.xpostpos), params.xpostcopy);
+
+		params.advance();
+	} while(--params.dy);
+}
+
+void VDPixmapStretchBlt_Any16_nearest_reference(VDPixmapReferenceStretchBltParameters params) {
+	do {
+		uint16 *dstp = (uint16 *)params.dst;
+		const uint16 *srcp = (const uint16 *)params.src;
+		uint32 u = params.u;
+
+		if (params.xprecopy) {
+			VDMemset16(dstp, *(const uint16 *)((const char *)params.src + params.xprepos), params.xprecopy);
+			dstp += params.xprecopy;
+		}
+
+		sint32 wt = params.dx;
+
+		if (wt > 0)
+			do {
+				*dstp++ = *srcp;
+
+				uint32 ut = u + params.dudx;
+				srcp += ut<u;
+				srcp += params.uinc;
+				u = ut;
+			} while(--wt);
+
+		if (params.xpostcopy)
+			VDMemset16(dstp, *(const uint16 *)((const char *)params.src + params.xpostpos), params.xpostcopy);
+
+		params.advance();
+	} while(--params.dy);
+}
+
+void VDPixmapStretchBlt_Any24_nearest_reference(VDPixmapReferenceStretchBltParameters params) {
+	do {
+		uint8 *dstp = (uint8 *)params.dst;
+		const uint8 *srcp = (const uint8 *)params.src;
+		uint32 u = params.u;
+
+		if (params.xprecopy) {
+			const uint8 *repsrc = (const uint8 *)params.src + params.xprepos;
+			const uint8 p0 = repsrc[0];
+			const uint8 p1 = repsrc[1];
+			const uint8 p2 = repsrc[2];
+
+			for(sint32 i=0; i<params.xprecopy; ++i) {
+				dstp[0] = p0;
+				dstp[1] = p1;
+				dstp[2] = p2;
+				dstp += 3;
+			}
+		}
+
+		sint32 wt = params.dx;
+
+		if (wt > 0)
+			do {
+				dstp[0] = srcp[0];
+				dstp[1] = srcp[1];
+				dstp[2] = srcp[2];
+				dstp += 3;
+
+				uint32 ut = u + params.dudx;
+				srcp += (ut<u)*3;
+				srcp += params.uinc*3;
+				u = ut;
+			} while(--wt);
+
+		if (params.xpostcopy) {
+			const uint8 *repsrc = (const uint8 *)params.src + params.xpostpos;
+			const uint8 p0 = repsrc[0];
+			const uint8 p1 = repsrc[1];
+			const uint8 p2 = repsrc[2];
+
+			for(sint32 i=0; i<params.xpostcopy; ++i) {
+				dstp[0] = p0;
+				dstp[1] = p1;
+				dstp[2] = p2;
+				dstp += 3;
+			}
+		}
+
+		params.advance();
+	} while(--params.dy);
+}
+
+void VDPixmapStretchBlt_Any32_nearest_reference(VDPixmapReferenceStretchBltParameters params) {
+	do {
+		uint32 *dstp = (uint32 *)params.dst;
+		const uint32 *srcp = (const uint32 *)params.src;
+		uint32 u = params.u;
+
+		if (params.xprecopy) {
+			VDMemset32(dstp, *(const uint32 *)((const char *)params.src + params.xprepos), params.xprecopy);
+			dstp += params.xprecopy;
+		}
+
+		sint32 wt = params.dx;
+		if (wt > 0)
+			do {
+				*dstp++ = *srcp;
+
+				uint32 ut = u + params.dudx;
+				srcp += ut<u;
+				srcp += params.uinc;
+				u = ut;
+			} while(--wt);
+
+		if (params.xpostcopy)
+			VDMemset32(dstp, *(const uint32 *)((const char *)params.src + params.xpostpos), params.xpostcopy);
+
+		params.advance();
+	} while(--params.dy);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+namespace {
+	void VDSetupNearestSamplingParameters(sint64& u64, sint64 dudx, sint32 dx, sint32 du, sint32& xprecopy, sint32& xprepos, sint32& xmain, sint32& xpostcopy, sint32& xpostpos) {
+		sint64 ulo = u64;
+		sint64 uhi = u64 + dudx * (dx - 1);
+		sint64 tdudx = dudx;
+		const sint64 ulimit = ((sint64)du << 32);
+
+		xprepos = 0;
+		xpostpos = du-1;
+
+		if (!tdudx) {
+			if (u64 < 0)
+				xprecopy = dx;
+			else if (u64 >= ulimit)
+				xprecopy = dx;
+			else
+				xmain = dx;
+		} else {
+			if (tdudx < 0) {
+				std::swap(ulo, uhi);
+				tdudx = -tdudx;
+			}
+
+			if (ulo < 0) {
+				if (uhi < 0)
+					xprecopy = dx;
+				else
+					xprecopy = (sint32)((-ulo-1) / tdudx) + 1;
+
+				VDASSERT(xprecopy <= 0 || (uint64)ulo >= (uint64)ulimit);
+				VDASSERT(xprecopy <= 0 || (uint64)(ulo + tdudx * (xprecopy-1)) >= (uint64)ulimit);
+			}
+
+			if (uhi >= ulimit) {
+				if (ulo >= ulimit)
+					xpostcopy = dx;
+				else
+					xpostcopy = (sint32)((uhi - ulimit) / tdudx) + 1;
+
+				VDASSERT(xpostcopy <= 0 || (uint64)uhi >= (uint64)ulimit);
+				VDASSERT(xpostcopy <= 0 || (uint64)(uhi - tdudx * (xpostcopy - 1)) >= (uint64)ulimit);
+			}
+
+			if (dudx < 0) {
+				std::swap(xprecopy, xpostcopy);
+				std::swap(xprepos, xpostpos);
+			}
+
+			xmain = dx - (xprecopy + xpostcopy);
+		}
+
+		// sanity-check parameters
+
+		VDASSERT(xprecopy>=0 && xprecopy <= dx);
+		VDASSERT(xpostcopy>=0 && xpostcopy <= dx);
+		VDASSERT(xmain>=0 && xmain <= dx);
+
+		VDASSERT(xprecopy <= 0 || (uint64)u64 >= (uint64)ulimit);
+		VDASSERT(xprecopy <= 0 || (uint64)(u64 + dudx * (xprecopy-1)) >= (uint64)ulimit);
+		VDASSERT(xmain <= 0 || (uint64)(u64 + dudx * xprecopy) < (uint64)ulimit);
+		VDASSERT(xmain <= 0 || (uint64)(u64 + dudx * (xprecopy+xmain-1)) < (uint64)ulimit);
+		VDASSERT(xpostcopy <= 0 || (uint64)(u64 + dudx * (xprecopy + xmain)) >= (uint64)ulimit);
+		VDASSERT(xpostcopy <= 0 || (uint64)(u64 + dudx * (xprecopy + xmain + xpostcopy - 1)) >= (uint64)ulimit);
+
+		u64 += dudx * xprecopy;
+	}
+}
+
+bool VDPixmapStretchBltNearest_reference(const VDPixmap& dst, sint32 x1, sint32 y1, sint32 x2, sint32 y2, const VDPixmap& src, sint32 u1, sint32 v1, sint32 u2, sint32 v2) {
+	// we don't support format conversion
+	if (dst.format != src.format)
+		return false;
+
+	void (*pBlitter)(VDPixmapReferenceStretchBltParameters);
+	int bpp;
+
+	switch(src.format) {
+	case nsVDPixmap::kPixFormat_Pal8:
+		pBlitter = VDPixmapStretchBlt_Any8_nearest_reference;
+		bpp = 1;
+		break;
+	case nsVDPixmap::kPixFormat_XRGB1555:
+	case nsVDPixmap::kPixFormat_RGB565:
+		pBlitter = VDPixmapStretchBlt_Any16_nearest_reference;
+		bpp = 2;
+		break;
+	case nsVDPixmap::kPixFormat_RGB888:
+		pBlitter = VDPixmapStretchBlt_Any24_nearest_reference;
+		bpp = 3;
+		break;
+	case nsVDPixmap::kPixFormat_XRGB8888:
+		pBlitter = VDPixmapStretchBlt_Any32_nearest_reference;
+		bpp = 4;
+		break;
+	default:
+		return false;
+	}
+
+	// preemptive clip to prevent gradient calculations from crashing
+	if (x2 == x1 || y2 == y1)
+		return true;
+
+	// translate destination flips into source flips
+	if (x1 > x2) {
+		std::swap(x1, x2);
+		std::swap(u1, u2);
+	}
+
+	if (y1 > y2) {
+		std::swap(y1, y2);
+		std::swap(v1, v2);
+	}
+
+	// compute gradients
+	sint32 dx	= x2 - x1;
+	sint32 dy	= y2 - y1;
+	sint32 du	= u2 - u1;
+	sint32 dv	= v2 - v1;
+	sint64 dudx = ((sint64)du << 32) / dx;		// must truncate toward zero to prevent overflow
+	sint64 dvdy = ((sint64)dv << 32) / dy;
+
+	// prestep top-left point to pixel center and convert destination coordinates to integer
+	sint64 u64 = (sint64)u1 << 16;
+	sint64 v64 = (sint64)v1 << 16;
+	sint32 prestepx = (0x8000 - x1) & 0xffff;
+	sint32 prestepy = (0x8000 - y1) & 0xffff;
+
+	u64 += (dudx * prestepx) >> 16;
+	v64 += (dvdy * prestepy) >> 16;
+
+	sint32 x1i = (x1 + 0x8000) >> 16;
+	sint32 y1i = (y1 + 0x8000) >> 16;
+	sint32 x2i = (x2 + 0x8000) >> 16;
+	sint32 y2i = (y2 + 0x8000) >> 16;
+
+	// destination clipping
+	if (x1i < 0) {
+		u64 -= dudx * x1i;
+		x1i = 0;
+	}
+
+	if (y1i < 0) {
+		v64 -= dvdy * y1i;
+		y1i = 0;
+	}
+
+	if (x2i > dst.w)
+		x2i = dst.w;
+
+	if (y2i > dst.h)
+		y2i = dst.h;
+
+	if (x1i >= x2i || y1i >= y2i)
+		return true;
+
+	// Calculate horizontal clip parameters
+	sint32 xprecopy = 0, xpostcopy = 0;
+	int xprepos = 0;
+	int xpostpos = src.w-1;
+	int xmain = 0;
+
+	VDSetupNearestSamplingParameters(u64, dudx, x2i-x1i, src.w, xprecopy, xprepos, xmain, xpostcopy, xpostpos);
+
+	// Calculate vertical clip parameters
+	sint32 yprecopy = 0, ypostcopy = 0;
+	int yprepos = 0;
+	int ypostpos = src.h-1;
+	int ymain = 0;
+
+	VDSetupNearestSamplingParameters(v64, dvdy, y2i-y1i, src.h, yprecopy, yprepos, ymain, ypostcopy, ypostpos);
+
+	// set up parameter block
+	VDPixmapReferenceStretchBltParameters params;
+
+	char *srcbase = (char *)src.data + (sint32)(u64 >> 32) * bpp;
+
+	params.dst			= (char *)dst.data + y1i * dst.pitch + x1i * bpp;
+	params.dstpitch		= dst.pitch;
+	params.src			= srcbase + (sint32)(v64 >> 32) * src.pitch;
+	params.srcpitch		= src.pitch;
+	params.srcinc		= (sint32)(dvdy >> 32) * src.pitch;
+	params.dx			= xmain;
+	params.dy			= ymain;
+	params.u			= (uint32)u64;
+	params.uinc			= (uint32)(dudx >> 32);
+	params.dudx			= (uint32)dudx;
+	params.v			= (uint32)v64;
+	params.dvdy			= (uint32)dvdy;
+	params.xprecopy		= xprecopy;
+	params.xprepos		= (xprepos - (sint32)(u64 >> 32)) * bpp;
+	params.xpostcopy	= xpostcopy;
+	params.xpostpos		= (xpostpos - (sint32)(u64 >> 32)) * bpp;
+
+	if (yprecopy > 0) {
+		VDPixmapReferenceStretchBltParameters preparams(params);
+
+		preparams.src		= srcbase + yprepos * src.pitch;
+		preparams.srcinc	= 0;
+		preparams.dy		= yprecopy;
+		preparams.v			= 0;
+		preparams.dvdy		= 0;
+
+		pBlitter(preparams);
+
+		params.dst		= (char *)params.dst + params.dstpitch * yprecopy;
+	}
+
+	if (ymain > 0)
+		pBlitter(params);
+
+	if (ypostcopy > 0) {
+		VDPixmapReferenceStretchBltParameters postparams(params);
+
+		postparams.dst		= (char *)params.dst + params.dstpitch * params.dy;
+		postparams.src		= srcbase + ypostpos * src.pitch;
+		postparams.srcpitch	= 0;
+		postparams.srcinc	= 0;
+		postparams.dy		= ypostcopy;
+		postparams.v		= 0;
+		postparams.dvdy		= 0;
+
+		pBlitter(postparams);
+	}
+	return true;
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+	uint32 lerp_XRGB1555(sint32 a, sint32 b, sint32 f) {
+		sint32 a_rb	= a & 0x7c1f;
+		sint32 a_g	= a & 0x03e0;
+		sint32 b_rb	= b & 0x7c1f;
+		sint32 b_g	= b & 0x03e0;
+
+		const sint32 rb = (a_rb + (((b_rb - a_rb)*f + 0x4010) >> 5)) & 0x7c1f;
+		const sint32 g  = (a_g  + (((b_g  - a_g )*f + 0x0200) >> 5)) & 0x03e0;
+
+		return rb + g;
+	}
+
+	uint32 lerp_XRGB8888(sint32 a, sint32 b, sint32 f) {
+		sint32 a_rb	= a & 0xff00ff;
+		sint32 a_g	= a & 0x00ff00;
+		sint32 b_rb	= b & 0xff00ff;
+		sint32 b_g	= b & 0x00ff00;
+
+		const uint32 rb = (a_rb + (((b_rb - a_rb)*f + 0x00800080) >> 8)) & 0xff00ff;
+		const uint32 g  = (a_g  + (((b_g  - a_g )*f + 0x00008000) >> 8)) & 0x00ff00;
+
+		return rb + g;
+	}
+
+	uint32 bilerp_RGB888(sint32 a, sint32 b, sint32 c, sint32 d, sint32 x, sint32 y) {
+		sint32 a_rb	= a & 0xff00ff;
+		sint32 a_g	= a & 0x00ff00;
+		sint32 b_rb	= b & 0xff00ff;
+		sint32 b_g	= b & 0x00ff00;
+		sint32 c_rb	= c & 0xff00ff;
+		sint32 c_g	= c & 0x00ff00;
+		sint32 d_rb	= d & 0xff00ff;
+		sint32 d_g	= d & 0x00ff00;
+
+		const uint32 top_rb = (a_rb + (((b_rb - a_rb)*x + 0x00800080) >> 8)) & 0xff00ff;
+		const uint32 top_g  = (a_g  + (((b_g  - a_g )*x + 0x00008000) >> 8)) & 0x00ff00;
+		const uint32 bot_rb = (c_rb + (((d_rb - c_rb)*x + 0x00800080) >> 8)) & 0xff00ff;
+		const uint32 bot_g  = (c_g  + (((d_g  - c_g )*x + 0x00008000) >> 8)) & 0x00ff00;
+
+		const uint32 final_rb = (top_rb + (((bot_rb - top_rb)*y) >> 8)) & 0xff00ff;
+		const uint32 final_g  = (top_g  + (((bot_g  - top_g )*y) >> 8)) & 0x00ff00;
+
+		return final_rb + final_g;
+	}
+
+	uint32 bilerp_XRGB1555(sint32 a, sint32 b, sint32 c, sint32 d, sint32 x, sint32 y) {
+		sint32 a_rb	= a & 0x7c1f;
+		sint32 a_g	= a & 0x03e0;
+		sint32 b_rb	= b & 0x7c1f;
+		sint32 b_g	= b & 0x03e0;
+		sint32 c_rb	= c & 0x7c1f;
+		sint32 c_g	= c & 0x03e0;
+		sint32 d_rb	= d & 0x7c1f;
+		sint32 d_g	= d & 0x03e0;
+
+		const sint32 top_rb = (a_rb + (((b_rb - a_rb)*x + 0x4010) >> 5)) & 0x7c1f;
+		const sint32 top_g  = (a_g  + (((b_g  - a_g )*x + 0x0200) >> 5)) & 0x03e0;
+		const sint32 bot_rb = (c_rb + (((d_rb - c_rb)*x + 0x4010) >> 5)) & 0x7c1f;
+		const sint32 bot_g  = (c_g  + (((d_g  - c_g )*x + 0x0200) >> 5)) & 0x03e0;
+
+		const sint32 final_rb = (top_rb + (((bot_rb - top_rb)*y + 0x4010) >> 5)) & 0x7c1f;
+		const sint32 final_g  = (top_g  + (((bot_g  - top_g )*y + 0x0200) >> 5)) & 0x03e0;
+
+		return final_rb + final_g;
+	}
+
+	uint32 bilerp_RGB565(sint32 a, sint32 b, sint32 c, sint32 d, sint32 x, sint32 y) {
+		sint32 a_rb	= a & 0xf81f;
+		sint32 a_g	= a & 0x07e0;
+		sint32 b_rb	= b & 0xf81f;
+		sint32 b_g	= b & 0x07e0;
+		sint32 c_rb	= c & 0xf81f;
+		sint32 c_g	= c & 0x07e0;
+		sint32 d_rb	= d & 0xf81f;
+		sint32 d_g	= d & 0x07e0;
+
+		const sint32 top_rb = (a_rb + (((b_rb - a_rb)*x + 0x8010) >> 6)) & 0xf81f;
+		const sint32 top_g  = (a_g  + (((b_g  - a_g )*x + 0x0400) >> 6)) & 0x07e0;
+		const sint32 bot_rb = (c_rb + (((d_rb - c_rb)*x + 0x8010) >> 6)) & 0xf81f;
+		const sint32 bot_g  = (c_g  + (((d_g  - c_g )*x + 0x0400) >> 6)) & 0x07e0;
+
+		const sint32 final_rb = (top_rb + (((bot_rb - top_rb)*y + 0x8010) >> 6)) & 0xf81f;
+		const sint32 final_g  = (top_g  + (((bot_g  - top_g )*y + 0x0400) >> 6)) & 0x07e0;
+
+		return final_rb + final_g;
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+namespace {
+	struct VDPixmapReferenceStretchBltBilinearParameters {
+		void		*dst;
+		const void	*src;
+		uint32		u;
+		uint32		uinc;
+		uint32		dudx;
+
+		ptrdiff_t	xprepos;
+		ptrdiff_t	xpostpos;
+		sint32		xprecopy;
+		sint32		xpostcopy;
+		sint32		xmidsize;
+	};
+
+	void VDPixmapStretchBiH_XRGB1555_to_XRGB1555(const VDPixmapReferenceStretchBltBilinearParameters& params) {
+		uint16 *dst = (uint16 *)params.dst;
+		const uint16 *src = (const uint16 *)params.src;
+
+		if (params.xprecopy)
+			VDMemset16(dst - params.xprecopy, *(const uint16 *)((const char *)params.src + params.xprepos), params.xprecopy);
+
+		if (params.xmidsize) {
+			sint32 w = params.xmidsize;
+			uint32 u = params.u;
+			const uint32 dudx = params.dudx;
+			const ptrdiff_t uinc = params.uinc;
+
+			do {
+				*dst++ = lerp_XRGB1555(src[0], src[1], u >> 27);
+
+				const uint32 ut = u + dudx;
+				src += uinc + (ut < u);
+				u = ut;
+			} while(--w);
+		}
+
+		if (params.xpostcopy)
+			VDMemset16(dst, *(const uint16 *)((const char *)params.src + params.xpostpos), params.xpostcopy);
+	}
+
+	void VDPixmapStretchBiH_XRGB8888_to_XRGB8888(const VDPixmapReferenceStretchBltBilinearParameters& params) {
+		uint32 *dst = (uint32 *)params.dst;
+		const uint32 *src = (const uint32 *)params.src;
+
+		if (params.xprecopy)
+			VDMemset32(dst - params.xprecopy, *(const uint32 *)((const char *)params.src + params.xprepos), params.xprecopy);
+
+		if (params.xmidsize) {
+			sint32 w = params.xmidsize;
+			uint32 u = params.u;
+			const uint32 dudx = params.dudx;
+			const ptrdiff_t uinc = params.uinc;
+
+			do {
+				*dst++ = lerp_XRGB8888(src[0], src[1], u >> 24);
+
+				const uint32 ut = u + dudx;
+				src += uinc + (ut < u);
+				u = ut;
+			} while(--w);
+		}
+
+		if (params.xpostcopy)
+			VDMemset32(dst, *(const uint32 *)((const char *)params.src + params.xpostpos), params.xpostcopy);
+	}
+
+	void VDPixmapStretchBiV_XRGB1555_to_XRGB1555(void *dstv, const void *src1v, const void *src2v, sint32 w, uint32 f) {
+		uint16 *dst = (uint16 *)dstv;
+		const uint16 *src1 = (const uint16 *)src1v;
+		const uint16 *src2 = (const uint16 *)src2v;
+
+		f >>= 27;
+
+		do {
+			*dst++ = lerp_XRGB1555(*src1++, *src2++, f);
+		} while(--w);
+	}
+
+	void VDPixmapStretchBiV_XRGB8888_to_XRGB8888(void *dstv, const void *src1v, const void *src2v, sint32 w, uint32 f) {
+		uint32 *dst = (uint32 *)dstv;
+		const uint32 *src1 = (const uint32 *)src1v;
+		const uint32 *src2 = (const uint32 *)src2v;
+
+		f >>= 24;
+
+		do {
+			*dst++ = lerp_XRGB8888(*src1++, *src2++, f);
+		} while(--w);
+	}
+}
+
+#ifdef _M_IX86
+extern "C" void vdasm_stretchbltH_XRGB8888_to_XRGB8888_MMX(const VDPixmapReferenceStretchBltBilinearParameters&);
+
+extern "C" void vdasm_stretchbltV_XRGB1555_to_XRGB1555_MMX(void *dstv, const void *src1v, const void *src2v, sint32 w, uint32 f);
+extern "C" void vdasm_stretchbltV_XRGB8888_to_XRGB8888_MMX(void *dstv, const void *src1v, const void *src2v, sint32 w, uint32 f);
+#endif
+
+bool VDPixmapStretchBltBilinear_reference(const VDPixmap& dst, sint32 x1, sint32 y1, sint32 x2, sint32 y2, const VDPixmap& src, sint32 u1, sint32 v1, sint32 u2, sint32 v2) {
+	// preemptive clip to prevent gradient calculations from crashing
+	if (x2 == x1 || y2 == y1)
+		return true;
+
+	// we don't support source clipping
+	if ((uint32)u1 > (uint32)(src.w << 16) || (uint32)v1 > (uint32)(src.h << 16))
+		return false;
+
+	if ((uint32)u2 > (uint32)(src.w << 16) || (uint32)v2 > (uint32)(src.h << 16))
+		return false;
+
+	// we don't support format changes (yet)
+	if (dst.format != src.format)
+		return false;
+
+	// format determination
+	void (*pHorizontalFilter)(const VDPixmapReferenceStretchBltBilinearParameters& params);
+	void (*pVerticalFilter)(void *dstv, const void *src1v, const void *src2v, sint32 w, uint32 f);
+	int bpp;
+
+#pragma vdpragma_TODO("fixme this is b0rken")
+	switch(src.format) {
+	case nsVDPixmap::kPixFormat_XRGB1555:
+		pHorizontalFilter = VDPixmapStretchBiH_XRGB1555_to_XRGB1555;
+#ifdef _M_IX86
+		if (CPUGetEnabledExtensions() & CPUF_SUPPORTS_MMX)
+			pVerticalFilter = vdasm_stretchbltV_XRGB1555_to_XRGB1555_MMX;
+		else
+#endif
+			pVerticalFilter = VDPixmapStretchBiV_XRGB1555_to_XRGB1555;
+		bpp = 2;
+		break;
+	case nsVDPixmap::kPixFormat_XRGB8888:
+#ifdef _M_IX86
+		if (CPUGetEnabledExtensions() & CPUF_SUPPORTS_MMX) {
+			pHorizontalFilter = vdasm_stretchbltH_XRGB8888_to_XRGB8888_MMX;
+			pVerticalFilter = vdasm_stretchbltV_XRGB8888_to_XRGB8888_MMX;
+		} else
+#endif
+		{
+			pHorizontalFilter = VDPixmapStretchBiH_XRGB8888_to_XRGB8888;
+			pVerticalFilter = VDPixmapStretchBiV_XRGB8888_to_XRGB8888;
+		}
+		bpp = 4;
+		break;
+	default:
+		return false;
+	}
+
+	// translate destination flips into source flips
+	if (x1 > x2) {
+		std::swap(x1, x2);
+		std::swap(u1, u2);
+	}
+
+	if (y1 > y2) {
+		std::swap(y1, y2);
+		std::swap(v1, v2);
+	}
+
+	// compute gradients
+	sint32 dx	= x2 - x1;
+	sint32 dy	= y2 - y1;
+	sint32 du	= u2 - u1;
+	sint32 dv	= v2 - v1;
+	sint64 dudx = ((sint64)du << 32) / dx;		// must truncate toward zero to prevent overflow
+	sint64 dvdy = ((sint64)dv << 32) / dy;
+
+	// prestep top-left point to pixel center and convert destination coordinates to integer
+	sint64 u64 = (sint64)u1 << 16;
+	sint64 v64 = (sint64)v1 << 16;
+	sint32 prestepx = (0x8000 - x1) & 0xffff;
+	sint32 prestepy = (0x8000 - y1) & 0xffff;
+
+	u64 += (dudx * prestepx) >> 16;
+	v64 += (dvdy * prestepy) >> 16;
+
+	sint32 x1i = (x1 + 0x8000) >> 16;
+	sint32 y1i = (y1 + 0x8000) >> 16;
+	sint32 x2i = (x2 + 0x8000) >> 16;
+	sint32 y2i = (y2 + 0x8000) >> 16;
+
+	// destination clipping
+	if (x1i < 0) {
+		u64 -= dudx * x1i;
+		x1i = 0;
+	}
+
+	if (y1i < 0) {
+		v64 -= dvdy * y1i;
+		y1i = 0;
+	}
+
+	if (x2i > dst.w)
+		x2i = dst.w;
+
+	if (y2i > dst.h)
+		y2i = dst.h;
+
+	if (x1i >= x2i || y1i >= y2i)
+		return true;
+
+	u64 -= 0x80000000;
+	v64 -= 0x80000000;
+
+	int xprepos = 0;
+	int xpostpos = src.w-1;
+
+	sint64 ulo = u64;
+	sint64 uhi = u64 + dudx * (x2i - x1i - 1);
+	sint64 tdudx = dudx;
+
+	if (ulo > uhi) {
+		std::swap(ulo, uhi);
+		tdudx = -tdudx;
+	}
+
+	int xprecopy = 0;
+	int xpostcopy = 0;
+
+	if (ulo < 0) {
+		xprecopy = (int)((1 - ulo) / tdudx) + 1;
+	}
+
+	const sint64 ulimit = ((sint64)(src.w-1) << 32);
+
+	if (uhi >= ulimit)
+		xpostcopy = (int)((uhi - ulimit - 1) / tdudx) + 1;
+
+	if (dudx < 0) {
+		std::swap(xprecopy, xpostcopy);
+		std::swap(xprepos, xpostpos);
+	}
+
+	u64 += dudx * xprecopy;
+	const int xtotal	= x2i - x1i;
+	int xmidcopy = (x2i - x1i) - (xprecopy + xpostcopy);
+	const sint32 ui = (sint32)(u64 >> 32);
+
+	// set up parameter block
+
+	VDPixmapReferenceStretchBltBilinearParameters params;
+
+	params.u			= (uint32)u64;
+	params.uinc			= (sint32)(dudx >> 32);
+	params.dudx			= (sint32)dudx;
+	params.xprecopy		= xprecopy;
+	params.xprepos		= (xprepos - ui) * bpp;
+	params.xpostcopy	= xpostcopy;
+	params.xpostpos		= (xpostpos - ui) * bpp;
+	params.xmidsize		= xmidcopy;
+
+	void *dstp			= (char *)dst.data + y1i * dst.pitch + x1i * bpp;
+	const void *srcp	= (char *)src.data + ui * bpp;
+
+	VDPixmapBuffer		window(xtotal, 2, src.format);
+
+	void *pTempRow1 = window.data;
+	void *pTempRow2 = (char *)window.data + window.pitch;
+	int windowbottom = dvdy > 0 ? -0x7fffffff : 0x7fffffff;
+
+	do {
+		sint32 iv = (sint32)(v64 >> 32);
+		sint32 iv_bottom = iv + 1;
+
+		if (iv < 0)
+			iv = iv_bottom = 0;
+
+		if (iv >= src.h-1)
+			iv = iv_bottom = src.h-1;
+
+		if (dvdy < 0) {
+			if (windowbottom > iv_bottom+1)
+				windowbottom = iv_bottom+1;
+
+			while(windowbottom > iv) {
+				std::swap(pTempRow1, pTempRow2);
+
+				--windowbottom;
+
+				params.dst		= (char *)pTempRow1 + bpp * params.xprecopy;
+				params.src		= vdptroffset(srcp, windowbottom * src.pitch);
+
+				pHorizontalFilter(params);
+			}
+		} else {
+			if (windowbottom < iv-1)
+				windowbottom = iv-1;
+
+			while(windowbottom < iv_bottom) {
+				std::swap(pTempRow1, pTempRow2);
+
+				++windowbottom;
+
+				params.dst		= (char *)pTempRow2 + bpp * params.xprecopy;
+				params.src		= vdptroffset(srcp, windowbottom * src.pitch);
+
+				pHorizontalFilter(params);
+			}
+		}
+
+		if (iv == iv_bottom)
+			if (dvdy < 0)
+				pVerticalFilter(dstp, pTempRow1, pTempRow1, xtotal, 0);
+			else
+				pVerticalFilter(dstp, pTempRow2, pTempRow2, xtotal, 0);
+		else
+			pVerticalFilter(dstp, pTempRow1, pTempRow2, xtotal, (uint32)v64);
+
+		v64 += dvdy;
+		dstp = (char *)dstp + dst.pitch;
+	} while(++y1i < y2i);
+
+	return true;
+}
+\ No newline at end of file
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/tables.cpp b/src/thirdparty/VirtualDub/Kasumi/source/tables.cpp
new file mode 100644
index 000000000..bf1987500
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/tables.cpp
@@ -0,0 +1,204 @@
+// Automatically generated by Asuka 'maketables.'" DO NOT EDIT!
+
+#include <vd2/system/vdtypes.h>
+
+extern "C" const sint32 kVDCubicInterpTableFX14_075[256][4]={
+	{      0,  16384,      0,      0 },	{    -48,  16384,     48,      0 },	{    -95,  16383,     97,     -1 },	{   -141,  16380,    147,     -2 },
+	{   -186,  16375,    198,     -3 },	{   -231,  16371,    249,     -5 },	{   -275,  16365,    301,     -7 },	{   -318,  16357,    354,     -9 },
+	{   -360,  16349,    407,    -12 },	{   -402,  16340,    461,    -15 },	{   -443,  16329,    516,    -18 },	{   -484,  16318,    572,    -22 },
+	{   -523,  16305,    628,    -26 },	{   -562,  16291,    685,    -30 },	{   -601,  16278,    742,    -35 },	{   -638,  16262,    800,    -40 },
+	{   -675,  16245,    859,    -45 },	{   -711,  16228,    918,    -51 },	{   -747,  16209,    978,    -56 },	{   -782,  16190,   1039,    -63 },
+	{   -816,  16169,   1100,    -69 },	{   -849,  16147,   1162,    -76 },	{   -882,  16124,   1225,    -83 },	{   -915,  16101,   1288,    -90 },
+	{   -946,  16077,   1351,    -98 },	{   -977,  16052,   1415,   -106 },	{  -1007,  16025,   1480,   -114 },	{  -1037,  15998,   1545,   -122 },
+	{  -1066,  15970,   1611,   -131 },	{  -1094,  15940,   1678,   -140 },	{  -1122,  15910,   1745,   -149 },	{  -1149,  15879,   1812,   -158 },
+	{  -1176,  15848,   1880,   -168 },	{  -1202,  15815,   1949,   -178 },	{  -1227,  15781,   2018,   -188 },	{  -1252,  15747,   2087,   -198 },
+	{  -1276,  15712,   2157,   -209 },	{  -1300,  15676,   2228,   -220 },	{  -1323,  15639,   2299,   -231 },	{  -1345,  15601,   2370,   -242 },
+	{  -1367,  15562,   2442,   -253 },	{  -1388,  15523,   2514,   -265 },	{  -1409,  15482,   2587,   -276 },	{  -1429,  15441,   2660,   -288 },
+	{  -1448,  15399,   2734,   -301 },	{  -1467,  15356,   2808,   -313 },	{  -1486,  15312,   2883,   -325 },	{  -1504,  15268,   2958,   -338 },
+	{  -1521,  15223,   3033,   -351 },	{  -1538,  15177,   3109,   -364 },	{  -1554,  15130,   3185,   -377 },	{  -1570,  15084,   3261,   -391 },
+	{  -1585,  15035,   3338,   -404 },	{  -1600,  14986,   3416,   -418 },	{  -1614,  14936,   3493,   -431 },	{  -1627,  14885,   3571,   -445 },
+	{  -1641,  14834,   3650,   -459 },	{  -1653,  14783,   3728,   -474 },	{  -1665,  14730,   3807,   -488 },	{  -1677,  14676,   3887,   -502 },
+	{  -1688,  14623,   3966,   -517 },	{  -1699,  14568,   4046,   -531 },	{  -1709,  14512,   4127,   -546 },	{  -1719,  14457,   4207,   -561 },
+	{  -1728,  14400,   4288,   -576 },	{  -1737,  14343,   4369,   -591 },	{  -1745,  14284,   4451,   -606 },	{  -1753,  14226,   4532,   -621 },
+	{  -1760,  14167,   4614,   -637 },	{  -1767,  14107,   4696,   -652 },	{  -1774,  14047,   4779,   -668 },	{  -1780,  13986,   4861,   -683 },
+	{  -1785,  13924,   4944,   -699 },	{  -1791,  13861,   5028,   -714 },	{  -1795,  13798,   5111,   -730 },	{  -1800,  13736,   5194,   -746 },
+	{  -1804,  13671,   5278,   -761 },	{  -1807,  13606,   5362,   -777 },	{  -1810,  13541,   5446,   -793 },	{  -1813,  13475,   5531,   -809 },
+	{  -1815,  13409,   5615,   -825 },	{  -1817,  13342,   5700,   -841 },	{  -1818,  13275,   5784,   -857 },	{  -1819,  13207,   5869,   -873 },
+	{  -1820,  13139,   5954,   -889 },	{  -1820,  13069,   6040,   -905 },	{  -1820,  13000,   6125,   -921 },	{  -1820,  12930,   6211,   -937 },
+	{  -1819,  12860,   6296,   -953 },	{  -1818,  12789,   6382,   -969 },	{  -1816,  12717,   6468,   -985 },	{  -1815,  12647,   6553,  -1001 },
+	{  -1812,  12574,   6639,  -1017 },	{  -1810,  12502,   6725,  -1033 },	{  -1807,  12427,   6812,  -1048 },	{  -1804,  12354,   6898,  -1064 },
+	{  -1800,  12280,   6984,  -1080 },	{  -1796,  12206,   7070,  -1096 },	{  -1792,  12130,   7157,  -1111 },	{  -1787,  12055,   7243,  -1127 },
+	{  -1782,  11980,   7329,  -1143 },	{  -1777,  11903,   7416,  -1158 },	{  -1772,  11827,   7502,  -1173 },	{  -1766,  11751,   7588,  -1189 },
+	{  -1760,  11673,   7675,  -1204 },	{  -1753,  11595,   7761,  -1219 },	{  -1747,  11517,   7848,  -1234 },	{  -1740,  11439,   7934,  -1249 },
+	{  -1733,  11361,   8020,  -1264 },	{  -1725,  11281,   8107,  -1279 },	{  -1717,  11202,   8193,  -1294 },	{  -1709,  11123,   8279,  -1309 },
+	{  -1701,  11043,   8365,  -1323 },	{  -1692,  10962,   8451,  -1337 },	{  -1684,  10883,   8537,  -1352 },	{  -1675,  10802,   8623,  -1366 },
+	{  -1665,  10720,   8709,  -1380 },	{  -1656,  10640,   8794,  -1394 },	{  -1646,  10557,   8880,  -1407 },	{  -1636,  10476,   8965,  -1421 },
+	{  -1626,  10393,   9051,  -1434 },	{  -1615,  10311,   9136,  -1448 },	{  -1604,  10228,   9221,  -1461 },	{  -1594,  10146,   9306,  -1474 },
+	{  -1582,  10062,   9391,  -1487 },	{  -1571,   9979,   9475,  -1499 },	{  -1560,   9896,   9560,  -1512 },	{  -1548,   9812,   9644,  -1524 },
+	{  -1536,   9728,   9728,  -1536 },	{  -1524,   9644,   9812,  -1548 },	{  -1512,   9560,   9896,  -1560 },	{  -1499,   9475,   9979,  -1571 },
+	{  -1487,   9391,  10062,  -1582 },	{  -1474,   9306,  10146,  -1594 },	{  -1461,   9221,  10228,  -1604 },	{  -1448,   9136,  10311,  -1615 },
+	{  -1434,   9051,  10393,  -1626 },	{  -1421,   8965,  10476,  -1636 },	{  -1407,   8880,  10557,  -1646 },	{  -1394,   8795,  10639,  -1656 },
+	{  -1380,   8709,  10720,  -1665 },	{  -1366,   8624,  10801,  -1675 },	{  -1352,   8538,  10882,  -1684 },	{  -1337,   8450,  10963,  -1692 },
+	{  -1323,   8365,  11043,  -1701 },	{  -1309,   8279,  11123,  -1709 },	{  -1294,   8192,  11203,  -1717 },	{  -1279,   8106,  11282,  -1725 },
+	{  -1264,   8020,  11361,  -1733 },	{  -1249,   7934,  11439,  -1740 },	{  -1234,   7847,  11518,  -1747 },	{  -1219,   7760,  11596,  -1753 },
+	{  -1204,   7675,  11673,  -1760 },	{  -1189,   7589,  11750,  -1766 },	{  -1173,   7502,  11827,  -1772 },	{  -1158,   7415,  11904,  -1777 },
+	{  -1143,   7329,  11980,  -1782 },	{  -1127,   7243,  12055,  -1787 },	{  -1111,   7156,  12131,  -1792 },	{  -1096,   7070,  12206,  -1796 },
+	{  -1080,   6984,  12280,  -1800 },	{  -1064,   6898,  12354,  -1804 },	{  -1048,   6811,  12428,  -1807 },	{  -1033,   6726,  12501,  -1810 },
+	{  -1017,   6639,  12574,  -1812 },	{  -1001,   6554,  12646,  -1815 },	{   -985,   6467,  12718,  -1816 },	{   -969,   6382,  12789,  -1818 },
+	{   -953,   6296,  12860,  -1819 },	{   -937,   6211,  12930,  -1820 },	{   -921,   6125,  13000,  -1820 },	{   -905,   6039,  13070,  -1820 },
+	{   -889,   5954,  13139,  -1820 },	{   -873,   5869,  13207,  -1819 },	{   -857,   5784,  13275,  -1818 },	{   -841,   5700,  13342,  -1817 },
+	{   -825,   5615,  13409,  -1815 },	{   -809,   5531,  13475,  -1813 },	{   -793,   5446,  13541,  -1810 },	{   -777,   5362,  13606,  -1807 },
+	{   -761,   5278,  13671,  -1804 },	{   -746,   5195,  13735,  -1800 },	{   -730,   5111,  13798,  -1795 },	{   -714,   5028,  13861,  -1791 },
+	{   -699,   4944,  13924,  -1785 },	{   -683,   4862,  13985,  -1780 },	{   -668,   4780,  14046,  -1774 },	{   -652,   4696,  14107,  -1767 },
+	{   -637,   4614,  14167,  -1760 },	{   -621,   4532,  14226,  -1753 },	{   -606,   4450,  14285,  -1745 },	{   -591,   4369,  14343,  -1737 },
+	{   -576,   4288,  14400,  -1728 },	{   -561,   4207,  14457,  -1719 },	{   -546,   4126,  14513,  -1709 },	{   -531,   4046,  14568,  -1699 },
+	{   -517,   3966,  14623,  -1688 },	{   -502,   3886,  14677,  -1677 },	{   -488,   3807,  14730,  -1665 },	{   -474,   3728,  14783,  -1653 },
+	{   -459,   3650,  14834,  -1641 },	{   -445,   3570,  14886,  -1627 },	{   -431,   3493,  14936,  -1614 },	{   -418,   3416,  14986,  -1600 },
+	{   -404,   3338,  15035,  -1585 },	{   -391,   3262,  15083,  -1570 },	{   -377,   3185,  15130,  -1554 },	{   -364,   3109,  15177,  -1538 },
+	{   -351,   3033,  15223,  -1521 },	{   -338,   2958,  15268,  -1504 },	{   -325,   2882,  15313,  -1486 },	{   -313,   2808,  15356,  -1467 },
+	{   -301,   2734,  15399,  -1448 },	{   -288,   2660,  15441,  -1429 },	{   -276,   2587,  15482,  -1409 },	{   -265,   2514,  15523,  -1388 },
+	{   -253,   2442,  15562,  -1367 },	{   -242,   2370,  15601,  -1345 },	{   -231,   2299,  15639,  -1323 },	{   -220,   2228,  15676,  -1300 },
+	{   -209,   2157,  15712,  -1276 },	{   -198,   2087,  15747,  -1252 },	{   -188,   2017,  15782,  -1227 },	{   -178,   1949,  15815,  -1202 },
+	{   -168,   1880,  15848,  -1176 },	{   -158,   1811,  15880,  -1149 },	{   -149,   1744,  15911,  -1122 },	{   -140,   1677,  15941,  -1094 },
+	{   -131,   1611,  15970,  -1066 },	{   -122,   1545,  15998,  -1037 },	{   -114,   1480,  16025,  -1007 },	{   -106,   1415,  16052,   -977 },
+	{    -98,   1351,  16077,   -946 },	{    -90,   1288,  16101,   -915 },	{    -83,   1224,  16125,   -882 },	{    -76,   1162,  16147,   -849 },
+	{    -69,   1100,  16169,   -816 },	{    -63,   1040,  16189,   -782 },	{    -56,    978,  16209,   -747 },	{    -51,    919,  16227,   -711 },
+	{    -45,    859,  16245,   -675 },	{    -40,    800,  16262,   -638 },	{    -35,    743,  16277,   -601 },	{    -30,    684,  16292,   -562 },
+	{    -26,    628,  16305,   -523 },	{    -22,    572,  16318,   -484 },	{    -18,    516,  16329,   -443 },	{    -15,    462,  16339,   -402 },
+	{    -12,    407,  16349,   -360 },	{     -9,    354,  16357,   -318 },	{     -7,    302,  16364,   -275 },	{     -5,    250,  16370,   -231 },
+	{     -3,    198,  16375,   -186 },	{     -2,    148,  16379,   -141 },	{     -1,     98,  16382,    -95 },	{      0,     49,  16383,    -48 },
+};
+
+#ifdef _M_IX86
+extern "C" const __declspec(align(16)) sint16 kVDCubicInterpTableFX14_075_MMX[256][8]={
+	{      0,  16384,      0,  16384,      0,      0,      0,      0 },	{    -48,  16384,    -48,  16384,     48,      0,     48,      0 },
+	{    -95,  16383,    -95,  16383,     97,     -1,     97,     -1 },	{   -141,  16380,   -141,  16380,    147,     -2,    147,     -2 },
+	{   -186,  16375,   -186,  16375,    198,     -3,    198,     -3 },	{   -231,  16371,   -231,  16371,    249,     -5,    249,     -5 },
+	{   -275,  16365,   -275,  16365,    301,     -7,    301,     -7 },	{   -318,  16357,   -318,  16357,    354,     -9,    354,     -9 },
+	{   -360,  16349,   -360,  16349,    407,    -12,    407,    -12 },	{   -402,  16340,   -402,  16340,    461,    -15,    461,    -15 },
+	{   -443,  16329,   -443,  16329,    516,    -18,    516,    -18 },	{   -484,  16318,   -484,  16318,    572,    -22,    572,    -22 },
+	{   -523,  16305,   -523,  16305,    628,    -26,    628,    -26 },	{   -562,  16291,   -562,  16291,    685,    -30,    685,    -30 },
+	{   -601,  16278,   -601,  16278,    742,    -35,    742,    -35 },	{   -638,  16262,   -638,  16262,    800,    -40,    800,    -40 },
+	{   -675,  16245,   -675,  16245,    859,    -45,    859,    -45 },	{   -711,  16228,   -711,  16228,    918,    -51,    918,    -51 },
+	{   -747,  16209,   -747,  16209,    978,    -56,    978,    -56 },	{   -782,  16190,   -782,  16190,   1039,    -63,   1039,    -63 },
+	{   -816,  16169,   -816,  16169,   1100,    -69,   1100,    -69 },	{   -849,  16147,   -849,  16147,   1162,    -76,   1162,    -76 },
+	{   -882,  16124,   -882,  16124,   1225,    -83,   1225,    -83 },	{   -915,  16101,   -915,  16101,   1288,    -90,   1288,    -90 },
+	{   -946,  16077,   -946,  16077,   1351,    -98,   1351,    -98 },	{   -977,  16052,   -977,  16052,   1415,   -106,   1415,   -106 },
+	{  -1007,  16025,  -1007,  16025,   1480,   -114,   1480,   -114 },	{  -1037,  15998,  -1037,  15998,   1545,   -122,   1545,   -122 },
+	{  -1066,  15970,  -1066,  15970,   1611,   -131,   1611,   -131 },	{  -1094,  15940,  -1094,  15940,   1678,   -140,   1678,   -140 },
+	{  -1122,  15910,  -1122,  15910,   1745,   -149,   1745,   -149 },	{  -1149,  15879,  -1149,  15879,   1812,   -158,   1812,   -158 },
+	{  -1176,  15848,  -1176,  15848,   1880,   -168,   1880,   -168 },	{  -1202,  15815,  -1202,  15815,   1949,   -178,   1949,   -178 },
+	{  -1227,  15781,  -1227,  15781,   2018,   -188,   2018,   -188 },	{  -1252,  15747,  -1252,  15747,   2087,   -198,   2087,   -198 },
+	{  -1276,  15712,  -1276,  15712,   2157,   -209,   2157,   -209 },	{  -1300,  15676,  -1300,  15676,   2228,   -220,   2228,   -220 },
+	{  -1323,  15639,  -1323,  15639,   2299,   -231,   2299,   -231 },	{  -1345,  15601,  -1345,  15601,   2370,   -242,   2370,   -242 },
+	{  -1367,  15562,  -1367,  15562,   2442,   -253,   2442,   -253 },	{  -1388,  15523,  -1388,  15523,   2514,   -265,   2514,   -265 },
+	{  -1409,  15482,  -1409,  15482,   2587,   -276,   2587,   -276 },	{  -1429,  15441,  -1429,  15441,   2660,   -288,   2660,   -288 },
+	{  -1448,  15399,  -1448,  15399,   2734,   -301,   2734,   -301 },	{  -1467,  15356,  -1467,  15356,   2808,   -313,   2808,   -313 },
+	{  -1486,  15312,  -1486,  15312,   2883,   -325,   2883,   -325 },	{  -1504,  15268,  -1504,  15268,   2958,   -338,   2958,   -338 },
+	{  -1521,  15223,  -1521,  15223,   3033,   -351,   3033,   -351 },	{  -1538,  15177,  -1538,  15177,   3109,   -364,   3109,   -364 },
+	{  -1554,  15130,  -1554,  15130,   3185,   -377,   3185,   -377 },	{  -1570,  15084,  -1570,  15084,   3261,   -391,   3261,   -391 },
+	{  -1585,  15035,  -1585,  15035,   3338,   -404,   3338,   -404 },	{  -1600,  14986,  -1600,  14986,   3416,   -418,   3416,   -418 },
+	{  -1614,  14936,  -1614,  14936,   3493,   -431,   3493,   -431 },	{  -1627,  14885,  -1627,  14885,   3571,   -445,   3571,   -445 },
+	{  -1641,  14834,  -1641,  14834,   3650,   -459,   3650,   -459 },	{  -1653,  14783,  -1653,  14783,   3728,   -474,   3728,   -474 },
+	{  -1665,  14730,  -1665,  14730,   3807,   -488,   3807,   -488 },	{  -1677,  14676,  -1677,  14676,   3887,   -502,   3887,   -502 },
+	{  -1688,  14623,  -1688,  14623,   3966,   -517,   3966,   -517 },	{  -1699,  14568,  -1699,  14568,   4046,   -531,   4046,   -531 },
+	{  -1709,  14512,  -1709,  14512,   4127,   -546,   4127,   -546 },	{  -1719,  14457,  -1719,  14457,   4207,   -561,   4207,   -561 },
+	{  -1728,  14400,  -1728,  14400,   4288,   -576,   4288,   -576 },	{  -1737,  14343,  -1737,  14343,   4369,   -591,   4369,   -591 },
+	{  -1745,  14284,  -1745,  14284,   4451,   -606,   4451,   -606 },	{  -1753,  14226,  -1753,  14226,   4532,   -621,   4532,   -621 },
+	{  -1760,  14167,  -1760,  14167,   4614,   -637,   4614,   -637 },	{  -1767,  14107,  -1767,  14107,   4696,   -652,   4696,   -652 },
+	{  -1774,  14047,  -1774,  14047,   4779,   -668,   4779,   -668 },	{  -1780,  13986,  -1780,  13986,   4861,   -683,   4861,   -683 },
+	{  -1785,  13924,  -1785,  13924,   4944,   -699,   4944,   -699 },	{  -1791,  13861,  -1791,  13861,   5028,   -714,   5028,   -714 },
+	{  -1795,  13798,  -1795,  13798,   5111,   -730,   5111,   -730 },	{  -1800,  13736,  -1800,  13736,   5194,   -746,   5194,   -746 },
+	{  -1804,  13671,  -1804,  13671,   5278,   -761,   5278,   -761 },	{  -1807,  13606,  -1807,  13606,   5362,   -777,   5362,   -777 },
+	{  -1810,  13541,  -1810,  13541,   5446,   -793,   5446,   -793 },	{  -1813,  13475,  -1813,  13475,   5531,   -809,   5531,   -809 },
+	{  -1815,  13409,  -1815,  13409,   5615,   -825,   5615,   -825 },	{  -1817,  13342,  -1817,  13342,   5700,   -841,   5700,   -841 },
+	{  -1818,  13275,  -1818,  13275,   5784,   -857,   5784,   -857 },	{  -1819,  13207,  -1819,  13207,   5869,   -873,   5869,   -873 },
+	{  -1820,  13139,  -1820,  13139,   5954,   -889,   5954,   -889 },	{  -1820,  13069,  -1820,  13069,   6040,   -905,   6040,   -905 },
+	{  -1820,  13000,  -1820,  13000,   6125,   -921,   6125,   -921 },	{  -1820,  12930,  -1820,  12930,   6211,   -937,   6211,   -937 },
+	{  -1819,  12860,  -1819,  12860,   6296,   -953,   6296,   -953 },	{  -1818,  12789,  -1818,  12789,   6382,   -969,   6382,   -969 },
+	{  -1816,  12717,  -1816,  12717,   6468,   -985,   6468,   -985 },	{  -1815,  12647,  -1815,  12647,   6553,  -1001,   6553,  -1001 },
+	{  -1812,  12574,  -1812,  12574,   6639,  -1017,   6639,  -1017 },	{  -1810,  12502,  -1810,  12502,   6725,  -1033,   6725,  -1033 },
+	{  -1807,  12427,  -1807,  12427,   6812,  -1048,   6812,  -1048 },	{  -1804,  12354,  -1804,  12354,   6898,  -1064,   6898,  -1064 },
+	{  -1800,  12280,  -1800,  12280,   6984,  -1080,   6984,  -1080 },	{  -1796,  12206,  -1796,  12206,   7070,  -1096,   7070,  -1096 },
+	{  -1792,  12130,  -1792,  12130,   7157,  -1111,   7157,  -1111 },	{  -1787,  12055,  -1787,  12055,   7243,  -1127,   7243,  -1127 },
+	{  -1782,  11980,  -1782,  11980,   7329,  -1143,   7329,  -1143 },	{  -1777,  11903,  -1777,  11903,   7416,  -1158,   7416,  -1158 },
+	{  -1772,  11827,  -1772,  11827,   7502,  -1173,   7502,  -1173 },	{  -1766,  11751,  -1766,  11751,   7588,  -1189,   7588,  -1189 },
+	{  -1760,  11673,  -1760,  11673,   7675,  -1204,   7675,  -1204 },	{  -1753,  11595,  -1753,  11595,   7761,  -1219,   7761,  -1219 },
+	{  -1747,  11517,  -1747,  11517,   7848,  -1234,   7848,  -1234 },	{  -1740,  11439,  -1740,  11439,   7934,  -1249,   7934,  -1249 },
+	{  -1733,  11361,  -1733,  11361,   8020,  -1264,   8020,  -1264 },	{  -1725,  11281,  -1725,  11281,   8107,  -1279,   8107,  -1279 },
+	{  -1717,  11202,  -1717,  11202,   8193,  -1294,   8193,  -1294 },	{  -1709,  11123,  -1709,  11123,   8279,  -1309,   8279,  -1309 },
+	{  -1701,  11043,  -1701,  11043,   8365,  -1323,   8365,  -1323 },	{  -1692,  10962,  -1692,  10962,   8451,  -1337,   8451,  -1337 },
+	{  -1684,  10883,  -1684,  10883,   8537,  -1352,   8537,  -1352 },	{  -1675,  10802,  -1675,  10802,   8623,  -1366,   8623,  -1366 },
+	{  -1665,  10720,  -1665,  10720,   8709,  -1380,   8709,  -1380 },	{  -1656,  10640,  -1656,  10640,   8794,  -1394,   8794,  -1394 },
+	{  -1646,  10557,  -1646,  10557,   8880,  -1407,   8880,  -1407 },	{  -1636,  10476,  -1636,  10476,   8965,  -1421,   8965,  -1421 },
+	{  -1626,  10393,  -1626,  10393,   9051,  -1434,   9051,  -1434 },	{  -1615,  10311,  -1615,  10311,   9136,  -1448,   9136,  -1448 },
+	{  -1604,  10228,  -1604,  10228,   9221,  -1461,   9221,  -1461 },	{  -1594,  10146,  -1594,  10146,   9306,  -1474,   9306,  -1474 },
+	{  -1582,  10062,  -1582,  10062,   9391,  -1487,   9391,  -1487 },	{  -1571,   9979,  -1571,   9979,   9475,  -1499,   9475,  -1499 },
+	{  -1560,   9896,  -1560,   9896,   9560,  -1512,   9560,  -1512 },	{  -1548,   9812,  -1548,   9812,   9644,  -1524,   9644,  -1524 },
+	{  -1536,   9728,  -1536,   9728,   9728,  -1536,   9728,  -1536 },	{  -1524,   9644,  -1524,   9644,   9812,  -1548,   9812,  -1548 },
+	{  -1512,   9560,  -1512,   9560,   9896,  -1560,   9896,  -1560 },	{  -1499,   9475,  -1499,   9475,   9979,  -1571,   9979,  -1571 },
+	{  -1487,   9391,  -1487,   9391,  10062,  -1582,  10062,  -1582 },	{  -1474,   9306,  -1474,   9306,  10146,  -1594,  10146,  -1594 },
+	{  -1461,   9221,  -1461,   9221,  10228,  -1604,  10228,  -1604 },	{  -1448,   9136,  -1448,   9136,  10311,  -1615,  10311,  -1615 },
+	{  -1434,   9051,  -1434,   9051,  10393,  -1626,  10393,  -1626 },	{  -1421,   8965,  -1421,   8965,  10476,  -1636,  10476,  -1636 },
+	{  -1407,   8880,  -1407,   8880,  10557,  -1646,  10557,  -1646 },	{  -1394,   8795,  -1394,   8795,  10639,  -1656,  10639,  -1656 },
+	{  -1380,   8709,  -1380,   8709,  10720,  -1665,  10720,  -1665 },	{  -1366,   8624,  -1366,   8624,  10801,  -1675,  10801,  -1675 },
+	{  -1352,   8538,  -1352,   8538,  10882,  -1684,  10882,  -1684 },	{  -1337,   8450,  -1337,   8450,  10963,  -1692,  10963,  -1692 },
+	{  -1323,   8365,  -1323,   8365,  11043,  -1701,  11043,  -1701 },	{  -1309,   8279,  -1309,   8279,  11123,  -1709,  11123,  -1709 },
+	{  -1294,   8192,  -1294,   8192,  11203,  -1717,  11203,  -1717 },	{  -1279,   8106,  -1279,   8106,  11282,  -1725,  11282,  -1725 },
+	{  -1264,   8020,  -1264,   8020,  11361,  -1733,  11361,  -1733 },	{  -1249,   7934,  -1249,   7934,  11439,  -1740,  11439,  -1740 },
+	{  -1234,   7847,  -1234,   7847,  11518,  -1747,  11518,  -1747 },	{  -1219,   7760,  -1219,   7760,  11596,  -1753,  11596,  -1753 },
+	{  -1204,   7675,  -1204,   7675,  11673,  -1760,  11673,  -1760 },	{  -1189,   7589,  -1189,   7589,  11750,  -1766,  11750,  -1766 },
+	{  -1173,   7502,  -1173,   7502,  11827,  -1772,  11827,  -1772 },	{  -1158,   7415,  -1158,   7415,  11904,  -1777,  11904,  -1777 },
+	{  -1143,   7329,  -1143,   7329,  11980,  -1782,  11980,  -1782 },	{  -1127,   7243,  -1127,   7243,  12055,  -1787,  12055,  -1787 },
+	{  -1111,   7156,  -1111,   7156,  12131,  -1792,  12131,  -1792 },	{  -1096,   7070,  -1096,   7070,  12206,  -1796,  12206,  -1796 },
+	{  -1080,   6984,  -1080,   6984,  12280,  -1800,  12280,  -1800 },	{  -1064,   6898,  -1064,   6898,  12354,  -1804,  12354,  -1804 },
+	{  -1048,   6811,  -1048,   6811,  12428,  -1807,  12428,  -1807 },	{  -1033,   6726,  -1033,   6726,  12501,  -1810,  12501,  -1810 },
+	{  -1017,   6639,  -1017,   6639,  12574,  -1812,  12574,  -1812 },	{  -1001,   6554,  -1001,   6554,  12646,  -1815,  12646,  -1815 },
+	{   -985,   6467,   -985,   6467,  12718,  -1816,  12718,  -1816 },	{   -969,   6382,   -969,   6382,  12789,  -1818,  12789,  -1818 },
+	{   -953,   6296,   -953,   6296,  12860,  -1819,  12860,  -1819 },	{   -937,   6211,   -937,   6211,  12930,  -1820,  12930,  -1820 },
+	{   -921,   6125,   -921,   6125,  13000,  -1820,  13000,  -1820 },	{   -905,   6039,   -905,   6039,  13070,  -1820,  13070,  -1820 },
+	{   -889,   5954,   -889,   5954,  13139,  -1820,  13139,  -1820 },	{   -873,   5869,   -873,   5869,  13207,  -1819,  13207,  -1819 },
+	{   -857,   5784,   -857,   5784,  13275,  -1818,  13275,  -1818 },	{   -841,   5700,   -841,   5700,  13342,  -1817,  13342,  -1817 },
+	{   -825,   5615,   -825,   5615,  13409,  -1815,  13409,  -1815 },	{   -809,   5531,   -809,   5531,  13475,  -1813,  13475,  -1813 },
+	{   -793,   5446,   -793,   5446,  13541,  -1810,  13541,  -1810 },	{   -777,   5362,   -777,   5362,  13606,  -1807,  13606,  -1807 },
+	{   -761,   5278,   -761,   5278,  13671,  -1804,  13671,  -1804 },	{   -746,   5195,   -746,   5195,  13735,  -1800,  13735,  -1800 },
+	{   -730,   5111,   -730,   5111,  13798,  -1795,  13798,  -1795 },	{   -714,   5028,   -714,   5028,  13861,  -1791,  13861,  -1791 },
+	{   -699,   4944,   -699,   4944,  13924,  -1785,  13924,  -1785 },	{   -683,   4862,   -683,   4862,  13985,  -1780,  13985,  -1780 },
+	{   -668,   4780,   -668,   4780,  14046,  -1774,  14046,  -1774 },	{   -652,   4696,   -652,   4696,  14107,  -1767,  14107,  -1767 },
+	{   -637,   4614,   -637,   4614,  14167,  -1760,  14167,  -1760 },	{   -621,   4532,   -621,   4532,  14226,  -1753,  14226,  -1753 },
+	{   -606,   4450,   -606,   4450,  14285,  -1745,  14285,  -1745 },	{   -591,   4369,   -591,   4369,  14343,  -1737,  14343,  -1737 },
+	{   -576,   4288,   -576,   4288,  14400,  -1728,  14400,  -1728 },	{   -561,   4207,   -561,   4207,  14457,  -1719,  14457,  -1719 },
+	{   -546,   4126,   -546,   4126,  14513,  -1709,  14513,  -1709 },	{   -531,   4046,   -531,   4046,  14568,  -1699,  14568,  -1699 },
+	{   -517,   3966,   -517,   3966,  14623,  -1688,  14623,  -1688 },	{   -502,   3886,   -502,   3886,  14677,  -1677,  14677,  -1677 },
+	{   -488,   3807,   -488,   3807,  14730,  -1665,  14730,  -1665 },	{   -474,   3728,   -474,   3728,  14783,  -1653,  14783,  -1653 },
+	{   -459,   3650,   -459,   3650,  14834,  -1641,  14834,  -1641 },	{   -445,   3570,   -445,   3570,  14886,  -1627,  14886,  -1627 },
+	{   -431,   3493,   -431,   3493,  14936,  -1614,  14936,  -1614 },	{   -418,   3416,   -418,   3416,  14986,  -1600,  14986,  -1600 },
+	{   -404,   3338,   -404,   3338,  15035,  -1585,  15035,  -1585 },	{   -391,   3262,   -391,   3262,  15083,  -1570,  15083,  -1570 },
+	{   -377,   3185,   -377,   3185,  15130,  -1554,  15130,  -1554 },	{   -364,   3109,   -364,   3109,  15177,  -1538,  15177,  -1538 },
+	{   -351,   3033,   -351,   3033,  15223,  -1521,  15223,  -1521 },	{   -338,   2958,   -338,   2958,  15268,  -1504,  15268,  -1504 },
+	{   -325,   2882,   -325,   2882,  15313,  -1486,  15313,  -1486 },	{   -313,   2808,   -313,   2808,  15356,  -1467,  15356,  -1467 },
+	{   -301,   2734,   -301,   2734,  15399,  -1448,  15399,  -1448 },	{   -288,   2660,   -288,   2660,  15441,  -1429,  15441,  -1429 },
+	{   -276,   2587,   -276,   2587,  15482,  -1409,  15482,  -1409 },	{   -265,   2514,   -265,   2514,  15523,  -1388,  15523,  -1388 },
+	{   -253,   2442,   -253,   2442,  15562,  -1367,  15562,  -1367 },	{   -242,   2370,   -242,   2370,  15601,  -1345,  15601,  -1345 },
+	{   -231,   2299,   -231,   2299,  15639,  -1323,  15639,  -1323 },	{   -220,   2228,   -220,   2228,  15676,  -1300,  15676,  -1300 },
+	{   -209,   2157,   -209,   2157,  15712,  -1276,  15712,  -1276 },	{   -198,   2087,   -198,   2087,  15747,  -1252,  15747,  -1252 },
+	{   -188,   2017,   -188,   2017,  15782,  -1227,  15782,  -1227 },	{   -178,   1949,   -178,   1949,  15815,  -1202,  15815,  -1202 },
+	{   -168,   1880,   -168,   1880,  15848,  -1176,  15848,  -1176 },	{   -158,   1811,   -158,   1811,  15880,  -1149,  15880,  -1149 },
+	{   -149,   1744,   -149,   1744,  15911,  -1122,  15911,  -1122 },	{   -140,   1677,   -140,   1677,  15941,  -1094,  15941,  -1094 },
+	{   -131,   1611,   -131,   1611,  15970,  -1066,  15970,  -1066 },	{   -122,   1545,   -122,   1545,  15998,  -1037,  15998,  -1037 },
+	{   -114,   1480,   -114,   1480,  16025,  -1007,  16025,  -1007 },	{   -106,   1415,   -106,   1415,  16052,   -977,  16052,   -977 },
+	{    -98,   1351,    -98,   1351,  16077,   -946,  16077,   -946 },	{    -90,   1288,    -90,   1288,  16101,   -915,  16101,   -915 },
+	{    -83,   1224,    -83,   1224,  16125,   -882,  16125,   -882 },	{    -76,   1162,    -76,   1162,  16147,   -849,  16147,   -849 },
+	{    -69,   1100,    -69,   1100,  16169,   -816,  16169,   -816 },	{    -63,   1040,    -63,   1040,  16189,   -782,  16189,   -782 },
+	{    -56,    978,    -56,    978,  16209,   -747,  16209,   -747 },	{    -51,    919,    -51,    919,  16227,   -711,  16227,   -711 },
+	{    -45,    859,    -45,    859,  16245,   -675,  16245,   -675 },	{    -40,    800,    -40,    800,  16262,   -638,  16262,   -638 },
+	{    -35,    743,    -35,    743,  16277,   -601,  16277,   -601 },	{    -30,    684,    -30,    684,  16292,   -562,  16292,   -562 },
+	{    -26,    628,    -26,    628,  16305,   -523,  16305,   -523 },	{    -22,    572,    -22,    572,  16318,   -484,  16318,   -484 },
+	{    -18,    516,    -18,    516,  16329,   -443,  16329,   -443 },	{    -15,    462,    -15,    462,  16339,   -402,  16339,   -402 },
+	{    -12,    407,    -12,    407,  16349,   -360,  16349,   -360 },	{     -9,    354,     -9,    354,  16357,   -318,  16357,   -318 },
+	{     -7,    302,     -7,    302,  16364,   -275,  16364,   -275 },	{     -5,    250,     -5,    250,  16370,   -231,  16370,   -231 },
+	{     -3,    198,     -3,    198,  16375,   -186,  16375,   -186 },	{     -2,    148,     -2,    148,  16379,   -141,  16379,   -141 },
+	{     -1,     98,     -1,     98,  16382,    -95,  16382,    -95 },	{      0,     49,      0,     49,  16383,    -48,  16383,    -48 },
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/triblt.cpp b/src/thirdparty/VirtualDub/Kasumi/source/triblt.cpp
new file mode 100644
index 000000000..8fe16138a
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/triblt.cpp
@@ -0,0 +1,1717 @@
+//	VirtualDub - Video processing and capture application
+//	Graphics support library
+//	Copyright (C) 1998-2008 Avery Lee
+//
+//	This program is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	This program is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with this program; if not, write to the Free Software
+//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#include <math.h>
+#include <vector>
+#include <vd2/system/math.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/system/vdalloc.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/Kasumi/pixmapops.h>
+#include <vd2/Kasumi/resample.h>
+#include <vd2/Kasumi/tables.h>
+#include <vd2/Kasumi/triblt.h>
+
+namespace {
+	uint32 lerp_RGB888(sint32 a, sint32 b, sint32 x) {
+		sint32 a_rb	= a & 0xff00ff;
+		sint32 a_g	= a & 0x00ff00;
+		sint32 b_rb	= b & 0xff00ff;
+		sint32 b_g	= b & 0x00ff00;
+
+		const uint32 top_rb = (a_rb + (((b_rb - a_rb)*x + 0x00800080) >> 8)) & 0xff00ff;
+		const uint32 top_g  = (a_g  + (((b_g  - a_g )*x + 0x00008000) >> 8)) & 0x00ff00;
+
+		return top_rb + top_g;
+	}
+
+	uint32 bilerp_RGB888(sint32 a, sint32 b, sint32 c, sint32 d, sint32 x, sint32 y) {
+		sint32 a_rb	= a & 0xff00ff;
+		sint32 a_g	= a & 0x00ff00;
+		sint32 b_rb	= b & 0xff00ff;
+		sint32 b_g	= b & 0x00ff00;
+		sint32 c_rb	= c & 0xff00ff;
+		sint32 c_g	= c & 0x00ff00;
+		sint32 d_rb	= d & 0xff00ff;
+		sint32 d_g	= d & 0x00ff00;
+
+		const uint32 top_rb = (a_rb + (((b_rb - a_rb)*x + 0x00800080) >> 8)) & 0xff00ff;
+		const uint32 top_g  = (a_g  + (((b_g  - a_g )*x + 0x00008000) >> 8)) & 0x00ff00;
+		const uint32 bot_rb = (c_rb + (((d_rb - c_rb)*x + 0x00800080) >> 8)) & 0xff00ff;
+		const uint32 bot_g  = (c_g  + (((d_g  - c_g )*x + 0x00008000) >> 8)) & 0x00ff00;
+
+		const uint32 final_rb = (top_rb + (((bot_rb - top_rb)*y) >> 8)) & 0xff00ff;
+		const uint32 final_g  = (top_g  + (((bot_g  - top_g )*y) >> 8)) & 0x00ff00;
+
+		return final_rb + final_g;
+	}
+
+	uint32 bicubic_RGB888(const uint32 *src0, const uint32 *src1, const uint32 *src2, const uint32 *src3, sint32 x, sint32 y) {
+		const uint32 p00 = src0[0];
+		const uint32 p01 = src0[1];
+		const uint32 p02 = src0[2];
+		const uint32 p03 = src0[3];
+		const uint32 p10 = src1[0];
+		const uint32 p11 = src1[1];
+		const uint32 p12 = src1[2];
+		const uint32 p13 = src1[3];
+		const uint32 p20 = src2[0];
+		const uint32 p21 = src2[1];
+		const uint32 p22 = src2[2];
+		const uint32 p23 = src2[3];
+		const uint32 p30 = src3[0];
+		const uint32 p31 = src3[1];
+		const uint32 p32 = src3[2];
+		const uint32 p33 = src3[3];
+
+		const sint32 *htab = kVDCubicInterpTableFX14_075[x];
+		const sint32 *vtab = kVDCubicInterpTableFX14_075[y];
+
+		const int ch0 = htab[0];
+		const int ch1 = htab[1];
+		const int ch2 = htab[2];
+		const int ch3 = htab[3];
+		const int cv0 = vtab[0];
+		const int cv1 = vtab[1];
+		const int cv2 = vtab[2];
+		const int cv3 = vtab[3];
+
+		int r0 = ((int)((p00>>16)&0xff) * ch0 + (int)((p01>>16)&0xff) * ch1 + (int)((p02>>16)&0xff) * ch2 + (int)((p03>>16)&0xff) * ch3 + 128) >> 8;
+		int g0 = ((int)((p00>> 8)&0xff) * ch0 + (int)((p01>> 8)&0xff) * ch1 + (int)((p02>> 8)&0xff) * ch2 + (int)((p03>> 8)&0xff) * ch3 + 128) >> 8;
+		int b0 = ((int)((p00    )&0xff) * ch0 + (int)((p01    )&0xff) * ch1 + (int)((p02    )&0xff) * ch2 + (int)((p03    )&0xff) * ch3 + 128) >> 8;
+		int r1 = ((int)((p10>>16)&0xff) * ch0 + (int)((p11>>16)&0xff) * ch1 + (int)((p12>>16)&0xff) * ch2 + (int)((p13>>16)&0xff) * ch3 + 128) >> 8;
+		int g1 = ((int)((p10>> 8)&0xff) * ch0 + (int)((p11>> 8)&0xff) * ch1 + (int)((p12>> 8)&0xff) * ch2 + (int)((p13>> 8)&0xff) * ch3 + 128) >> 8;
+		int b1 = ((int)((p10    )&0xff) * ch0 + (int)((p11    )&0xff) * ch1 + (int)((p12    )&0xff) * ch2 + (int)((p13    )&0xff) * ch3 + 128) >> 8;
+		int r2 = ((int)((p20>>16)&0xff) * ch0 + (int)((p21>>16)&0xff) * ch1 + (int)((p22>>16)&0xff) * ch2 + (int)((p23>>16)&0xff) * ch3 + 128) >> 8;
+		int g2 = ((int)((p20>> 8)&0xff) * ch0 + (int)((p21>> 8)&0xff) * ch1 + (int)((p22>> 8)&0xff) * ch2 + (int)((p23>> 8)&0xff) * ch3 + 128) >> 8;
+		int b2 = ((int)((p20    )&0xff) * ch0 + (int)((p21    )&0xff) * ch1 + (int)((p22    )&0xff) * ch2 + (int)((p23    )&0xff) * ch3 + 128) >> 8;
+		int r3 = ((int)((p30>>16)&0xff) * ch0 + (int)((p31>>16)&0xff) * ch1 + (int)((p32>>16)&0xff) * ch2 + (int)((p33>>16)&0xff) * ch3 + 128) >> 8;
+		int g3 = ((int)((p30>> 8)&0xff) * ch0 + (int)((p31>> 8)&0xff) * ch1 + (int)((p32>> 8)&0xff) * ch2 + (int)((p33>> 8)&0xff) * ch3 + 128) >> 8;
+		int b3 = ((int)((p30    )&0xff) * ch0 + (int)((p31    )&0xff) * ch1 + (int)((p32    )&0xff) * ch2 + (int)((p33    )&0xff) * ch3 + 128) >> 8;
+
+		int r = (r0 * cv0 + r1 * cv1 + r2 * cv2 + r3 * cv3 + (1<<19)) >> 20;
+		int g = (g0 * cv0 + g1 * cv1 + g2 * cv2 + g3 * cv3 + (1<<19)) >> 20;
+		int b = (b0 * cv0 + b1 * cv1 + b2 * cv2 + b3 * cv3 + (1<<19)) >> 20;
+
+		if (r<0) r=0; else if (r>255) r=255;
+		if (g<0) g=0; else if (g>255) g=255;
+		if (b<0) b=0; else if (b>255) b=255;
+
+		return (r<<16) + (g<<8) + b;
+	}
+}
+
+namespace {
+	enum {
+		kTop = 1,
+		kBottom = 2,
+		kLeft = 4,
+		kRight = 8,
+		kNear = 16,
+		kFar = 32
+	};
+
+	struct VDTriBltMipInfo {
+		const uint32 *mip;
+		ptrdiff_t pitch;
+		uint32 uvmul, _pad;
+	};
+
+	struct VDTriBltInfo {
+		VDTriBltMipInfo mips[16];
+		uint32 *dst;
+		const uint32 *src;
+		sint32 width;
+		const int *cubictab;
+	};
+
+	struct VDTriBltGenInfo {
+		float	u;
+		float	v;
+		float	rhw;
+		float	dudx;
+		float	dvdx;
+		float	drhwdx;
+	};
+
+	typedef void (*VDTriBltSpanFunction)(const VDTriBltInfo *);
+	typedef void (*VDTriBltGenFunction)(const VDTriBltGenInfo *);
+
+	void vd_triblt_span_point(const VDTriBltInfo *pInfo) {
+		sint32 w = -pInfo->width;
+		uint32 *dst = pInfo->dst + pInfo->width;
+		const uint32 *src = pInfo->src;
+		const uint32 *texture = pInfo->mips[0].mip;
+		const ptrdiff_t texpitch = pInfo->mips[0].pitch;
+
+		do {
+			dst[w] = vdptroffset(texture, texpitch * src[1])[src[0]];
+			src += 2;
+		} while(++w);
+	}
+
+	void vd_triblt_span_bilinear(const VDTriBltInfo *pInfo) {
+		sint32 w = -pInfo->width;
+		uint32 *dst = pInfo->dst + pInfo->width;
+		const uint32 *src = pInfo->src;
+		const uint32 *texture = pInfo->mips[0].mip;
+		const ptrdiff_t texpitch = pInfo->mips[0].pitch;
+
+		do {
+			const sint32 u = src[0];
+			const sint32 v = src[1];
+			src += 2;
+			const uint32 *src1 = vdptroffset(texture, texpitch * (v>>8)) + (u>>8);
+			const uint32 *src2 = vdptroffset(src1, texpitch);
+
+			dst[w] = bilerp_RGB888(src1[0], src1[1], src2[0], src2[1], u&255, v&255);
+		} while(++w);
+	}
+
+	void vd_triblt_span_trilinear(const VDTriBltInfo *pInfo) {
+		sint32 w = -pInfo->width;
+		uint32 *dst = pInfo->dst + pInfo->width;
+		const uint32 *src = pInfo->src;
+
+		do {
+			sint32 u = src[0];
+			sint32 v = src[1];
+			const sint32 lambda = src[2];
+			src += 3;
+
+			const sint32 lod = lambda >> 8;
+
+			const uint32 *texture1 = pInfo->mips[lod].mip;
+			const ptrdiff_t texpitch1 = pInfo->mips[lod].pitch;
+			const uint32 *texture2 = pInfo->mips[lod+1].mip;
+			const ptrdiff_t texpitch2 = pInfo->mips[lod+1].pitch;
+
+			u >>= lod;
+			v >>= lod;
+
+			u += 128;
+			v += 128;
+
+			const uint32 *src1 = vdptroffset(texture1, texpitch1 * (v>>8)) + (u>>8);
+			const uint32 *src2 = vdptroffset(src1, texpitch1);
+			const uint32 p1 = bilerp_RGB888(src1[0], src1[1], src2[0], src2[1], u&255, v&255);
+
+			u += 128;
+			v += 128;
+
+			const uint32 *src3 = vdptroffset(texture2, texpitch2 * (v>>9)) + (u>>9);
+			const uint32 *src4 = vdptroffset(src3, texpitch2);
+			const uint32 p2 = bilerp_RGB888(src3[0], src3[1], src4[0], src4[1], (u>>1)&255, (v>>1)&255);
+
+			dst[w] = lerp_RGB888(p1, p2, lambda & 255);
+		} while(++w);
+	}
+
+	void vd_triblt_span_bicubic_mip_linear(const VDTriBltInfo *pInfo) {
+		sint32 w = -pInfo->width;
+		uint32 *dst = pInfo->dst + pInfo->width;
+		const uint32 *src = pInfo->src;
+
+		do {
+			sint32 u = src[0];
+			sint32 v = src[1];
+			const sint32 lambda = src[2];
+			src += 3;
+
+			const sint32 lod = lambda >> 8;
+
+			const uint32 *texture1 = pInfo->mips[lod].mip;
+			const ptrdiff_t texpitch1 = pInfo->mips[lod].pitch;
+			const uint32 *texture2 = pInfo->mips[lod+1].mip;
+			const ptrdiff_t texpitch2 = pInfo->mips[lod+1].pitch;
+
+			u >>= lod;
+			v >>= lod;
+
+			u += 128;
+			v += 128;
+
+			const uint32 *src1 = vdptroffset(texture1, texpitch1 * (v>>8)) + (u>>8);
+			const uint32 *src2 = vdptroffset(src1, texpitch1);
+			const uint32 *src3 = vdptroffset(src2, texpitch1);
+			const uint32 *src4 = vdptroffset(src3, texpitch1);
+			const uint32 p1 = bicubic_RGB888(src1, src2, src3, src4, u&255, v&255);
+
+			u += 128;
+			v += 128;
+
+			const uint32 *src5 = vdptroffset(texture2, texpitch2 * (v>>9)) + (u>>9);
+			const uint32 *src6 = vdptroffset(src5, texpitch2);
+			const uint32 *src7 = vdptroffset(src6, texpitch2);
+			const uint32 *src8 = vdptroffset(src7, texpitch2);
+			const uint32 p2 = bicubic_RGB888(src5, src6, src7, src8, (u>>1)&255, (v>>1)&255);
+
+			dst[w] = lerp_RGB888(p1, p2, lambda & 255);
+		} while(++w);
+	}
+
+#ifdef _M_IX86
+	extern "C" void vdasm_triblt_span_bilinear_mmx(const VDTriBltInfo *pInfo);
+	extern "C" void vdasm_triblt_span_trilinear_mmx(const VDTriBltInfo *pInfo);
+	extern "C" void vdasm_triblt_span_bicubic_mip_linear_mmx(const VDTriBltInfo *pInfo);
+	extern "C" void vdasm_triblt_span_bicubic_mip_linear_sse2(const VDTriBltInfo *pInfo);
+	extern "C" void vdasm_triblt_span_point(const VDTriBltInfo *pInfo);
+#endif
+
+	struct VDTriBltTransformedVertex {
+		float x, y, z;
+		union {
+			float w;
+			float rhw;
+		};
+		float r, g, b, a;
+		float u, v;
+		int outcode;
+
+		void interp(const VDTriBltTransformedVertex *v1, const VDTriBltTransformedVertex *v2, float alpha) {
+			x = v1->x + alpha * (v2->x - v1->x);
+			y = v1->y + alpha * (v2->y - v1->y);
+			z = v1->z + alpha * (v2->z - v1->z);
+			w = v1->w + alpha * (v2->w - v1->w);
+
+			r = v1->r + alpha * (v2->r - v1->r);
+			g = v1->g + alpha * (v2->g - v1->g);
+			b = v1->b + alpha * (v2->b - v1->b);
+			a = v1->a + alpha * (v2->a - v1->a);
+
+			u = v1->u + alpha * (v2->u - v1->u);
+			v = v1->v + alpha * (v2->v - v1->v);
+
+			outcode	= (x < -w ? kLeft : 0)
+					+ (x > +w ? kRight : 0)
+					+ (y < -w ? kTop : 0)
+					+ (y > +w ? kBottom : 0)
+					+ (z < -w ? kNear : 0)
+					+ (z > +w ? kFar : 0);
+		}
+	};
+
+	void TransformVerts(VDTriBltTransformedVertex *dst, const VDTriBltVertex *src, int nVerts, const float xform[16]) {
+		const float xflocal[16]={
+			xform[ 0],	xform[ 1],	xform[ 2],	xform[ 3],
+			xform[ 4],	xform[ 5],	xform[ 6],	xform[ 7],
+			xform[ 8],	xform[ 9],	xform[10],	xform[11],
+			xform[12],	xform[13],	xform[14],	xform[15],
+		};
+
+		if (nVerts <= 0)
+			return;
+
+		do {
+			const float x0 = src->x;
+			const float y0 = src->y;
+			const float z0 = src->z;
+
+			const float w	= x0*xflocal[12] + y0*xflocal[13] + z0*xflocal[14] + xflocal[15];
+			const float x   = x0*xflocal[ 0] + y0*xflocal[ 1] + z0*xflocal[ 2] + xflocal[ 3];
+			const float y   = x0*xflocal[ 4] + y0*xflocal[ 5] + z0*xflocal[ 6] + xflocal[ 7];
+			const float z   = x0*xflocal[ 8] + y0*xflocal[ 9] + z0*xflocal[10] + xflocal[11];
+
+			int outcode = 0;
+
+			if (x < -w)		outcode += kLeft;
+			if (x > w)		outcode += kRight;
+			if (y < -w)		outcode += kTop;
+			if (y > w)		outcode += kBottom;
+			if (z < -w)		outcode += kNear;
+			if (z > w)		outcode += kFar;
+
+			dst->x = x;
+			dst->y = y;
+			dst->z = z;
+			dst->w = w;
+			dst->u = src->u;
+			dst->v = src->v;
+			dst->r = 1.0f;
+			dst->g = 1.0f;
+			dst->b = 1.0f;
+			dst->a = 1.0f;
+			dst->outcode = outcode;
+
+			++src;
+			++dst;
+		} while(--nVerts);
+	}
+
+	void TransformVerts(VDTriBltTransformedVertex *dst, const VDTriColorVertex *src, int nVerts, const float xform[16]) {
+		const float xflocal[16]={
+			xform[ 0],	xform[ 1],	xform[ 2],	xform[ 3],
+			xform[ 4],	xform[ 5],	xform[ 6],	xform[ 7],
+			xform[ 8],	xform[ 9],	xform[10],	xform[11],
+			xform[12],	xform[13],	xform[14],	xform[15],
+		};
+
+		if (nVerts <= 0)
+			return;
+
+		do {
+			const float x0 = src->x;
+			const float y0 = src->y;
+			const float z0 = src->z;
+
+			const float w	= x0*xflocal[12] + y0*xflocal[13] + z0*xflocal[14] + xflocal[15];
+			const float x   = x0*xflocal[ 0] + y0*xflocal[ 1] + z0*xflocal[ 2] + xflocal[ 3];
+			const float y   = x0*xflocal[ 4] + y0*xflocal[ 5] + z0*xflocal[ 6] + xflocal[ 7];
+			const float z   = x0*xflocal[ 8] + y0*xflocal[ 9] + z0*xflocal[10] + xflocal[11];
+
+			int outcode = 0;
+
+			if (x < -w)		outcode += kLeft;
+			if (x > w)		outcode += kRight;
+			if (y < -w)		outcode += kTop;
+			if (y > w)		outcode += kBottom;
+			if (z < -w)		outcode += kNear;
+			if (z > w)		outcode += kFar;
+
+			dst->x = x;
+			dst->y = y;
+			dst->z = z;
+			dst->w = w;
+			dst->u = 0.0f;
+			dst->v = 0.0f;
+			dst->r = src->r;
+			dst->g = src->g;
+			dst->b = src->b;
+			dst->a = src->a;
+			dst->outcode = outcode;
+
+			++src;
+			++dst;
+		} while(--nVerts);
+	}
+
+	struct VDTriangleSetupInfo {
+		const VDTriBltTransformedVertex *pt, *pr, *pl;
+		VDTriBltTransformedVertex tmp0, tmp1, tmp2;
+	};
+
+	void SetupTri(
+			VDTriangleSetupInfo& setup,
+			VDPixmap& dst,
+			const VDTriBltTransformedVertex *vx0,
+			const VDTriBltTransformedVertex *vx1,
+			const VDTriBltTransformedVertex *vx2,
+			const VDTriBltFilterMode *filterMode
+			)
+	{
+		setup.tmp0 = *vx0;
+		setup.tmp1 = *vx1;
+		setup.tmp2 = *vx2;
+
+		// adjust UVs for filter mode
+		if (filterMode) {
+			switch(*filterMode) {
+			case kTriBltFilterBilinear:
+				setup.tmp0.u += 0.5f;
+				setup.tmp0.v += 0.5f;
+				setup.tmp1.u += 0.5f;
+				setup.tmp1.v += 0.5f;
+				setup.tmp2.u += 0.5f;
+				setup.tmp2.v += 0.5f;
+			case kTriBltFilterTrilinear:
+			case kTriBltFilterBicubicMipLinear:
+				setup.tmp0.u *= 256.0f;
+				setup.tmp0.v *= 256.0f;
+				setup.tmp1.u *= 256.0f;
+				setup.tmp1.v *= 256.0f;
+				setup.tmp2.u *= 256.0f;
+				setup.tmp2.v *= 256.0f;
+				break;
+			case kTriBltFilterPoint:
+				setup.tmp0.u += 1.0f;
+				setup.tmp0.v += 1.0f;
+				setup.tmp1.u += 1.0f;
+				setup.tmp1.v += 1.0f;
+				setup.tmp2.u += 1.0f;
+				setup.tmp2.v += 1.0f;
+				break;
+			}
+		}
+
+		// do perspective divide and NDC space conversion
+		const float xscale = dst.w * 0.5f;
+		const float yscale = dst.h * 0.5f;
+
+		setup.tmp0.rhw = 1.0f / setup.tmp0.w;
+		setup.tmp0.x = (1.0f+setup.tmp0.x*setup.tmp0.rhw)*xscale;
+		setup.tmp0.y = (1.0f+setup.tmp0.y*setup.tmp0.rhw)*yscale;
+		setup.tmp0.u *= setup.tmp0.rhw;
+		setup.tmp0.v *= setup.tmp0.rhw;
+		setup.tmp0.r *= setup.tmp0.rhw;
+		setup.tmp0.g *= setup.tmp0.rhw;
+		setup.tmp0.b *= setup.tmp0.rhw;
+		setup.tmp0.a *= setup.tmp0.rhw;
+		setup.tmp1.rhw = 1.0f / setup.tmp1.w;
+		setup.tmp1.x = (1.0f+setup.tmp1.x*setup.tmp1.rhw)*xscale;
+		setup.tmp1.y = (1.0f+setup.tmp1.y*setup.tmp1.rhw)*yscale;
+		setup.tmp1.u *= setup.tmp1.rhw;
+		setup.tmp1.v *= setup.tmp1.rhw;
+		setup.tmp1.r *= setup.tmp1.rhw;
+		setup.tmp1.g *= setup.tmp1.rhw;
+		setup.tmp1.b *= setup.tmp1.rhw;
+		setup.tmp1.a *= setup.tmp1.rhw;
+		setup.tmp2.rhw = 1.0f / setup.tmp2.w;
+		setup.tmp2.x = (1.0f+setup.tmp2.x*setup.tmp2.rhw)*xscale;
+		setup.tmp2.y = (1.0f+setup.tmp2.y*setup.tmp2.rhw)*yscale;
+		setup.tmp2.u *= setup.tmp2.rhw;
+		setup.tmp2.v *= setup.tmp2.rhw;
+		setup.tmp2.r *= setup.tmp2.rhw;
+		setup.tmp2.g *= setup.tmp2.rhw;
+		setup.tmp2.b *= setup.tmp2.rhw;
+		setup.tmp2.a *= setup.tmp2.rhw;
+
+		// verify clipping
+		VDASSERT(setup.tmp0.x >= 0 && setup.tmp0.x <= dst.w);
+		VDASSERT(setup.tmp1.x >= 0 && setup.tmp1.x <= dst.w);
+		VDASSERT(setup.tmp2.x >= 0 && setup.tmp2.x <= dst.w);
+		VDASSERT(setup.tmp0.y >= 0 && setup.tmp0.y <= dst.h);
+		VDASSERT(setup.tmp1.y >= 0 && setup.tmp1.y <= dst.h);
+		VDASSERT(setup.tmp2.y >= 0 && setup.tmp2.y <= dst.h);
+
+		vx0 = &setup.tmp0;
+		vx1 = &setup.tmp1;
+		vx2 = &setup.tmp2;
+
+		const VDTriBltTransformedVertex *pt, *pl, *pr;
+
+		// sort points
+		if (vx0->y < vx1->y)		// 1 < 2
+			if (vx0->y < vx2->y) {	// 1 < 2,3
+				pt = vx0;
+				pr = vx1;
+				pl = vx2;
+			} else {				// 3 < 1 < 2
+				pt = vx2;
+				pr = vx0;
+				pl = vx1;
+			}
+		else						// 2 < 1
+			if (vx1->y < vx2->y) {	// 2 < 1,3
+				pt = vx1;
+				pr = vx2;
+				pl = vx0;
+			} else {				// 3 < 2 < 1
+				pt = vx2;
+				pr = vx0;
+				pl = vx1;
+			}
+
+		setup.pl = pl;
+		setup.pt = pt;
+		setup.pr = pr;
+	}
+
+	void RenderTri(VDPixmap& dst, const VDPixmap *const *pSources, int nMipmaps,
+							const VDTriBltTransformedVertex *vx0,
+							const VDTriBltTransformedVertex *vx1,
+							const VDTriBltTransformedVertex *vx2,
+							VDTriBltFilterMode filterMode,
+							float mipMapLODBias)
+	{
+		VDTriangleSetupInfo setup;
+
+		SetupTri(setup, dst, vx0, vx1, vx2, &filterMode);
+
+		const VDTriBltTransformedVertex *pt = setup.pt, *pl = setup.pl, *pr = setup.pr;
+
+		const float x10 = pl->x - pt->x;
+		const float x20 = pr->x - pt->x;
+		const float y10 = pl->y - pt->y;
+		const float y20 = pr->y - pt->y;
+		const float A = x20*y10 - x10*y20;
+
+		if (A <= 0.f)
+			return;
+
+		float invA = 0.f;
+		if (A >= 1e-5f)
+			invA = 1.0f / A;
+
+		float x10_A = x10 * invA;
+		float x20_A = x20 * invA;
+		float y10_A = y10 * invA;
+		float y20_A = y20 * invA;
+
+		float u10 = pl->u - pt->u;
+		float u20 = pr->u - pt->u;
+		float v10 = pl->v - pt->v;
+		float v20 = pr->v - pt->v;
+		float rhw10 = pl->rhw - pt->rhw;
+		float rhw20 = pr->rhw - pt->rhw;
+
+		float dudx = u20*y10_A - u10*y20_A;
+		float dudy = u10*x20_A - u20*x10_A;
+		float dvdx = v20*y10_A - v10*y20_A;
+		float dvdy = v10*x20_A - v20*x10_A;
+		float drhwdx = rhw20*y10_A - rhw10*y20_A;
+		float drhwdy = rhw10*x20_A - rhw20*x10_A;
+
+		// Compute edge walking parameters
+
+		float dxl1=0, dxr1=0, dul1=0, dvl1=0, drhwl1=0;
+		float dxl2=0, dxr2=0, dul2=0, dvl2=0, drhwl2=0;
+
+		// Compute left-edge interpolation parameters for first half.
+
+		if (pl->y != pt->y) {
+			dxl1 = (pl->x - pt->x) / (pl->y - pt->y);
+
+			dul1 = dudy + dxl1 * dudx;
+			dvl1 = dvdy + dxl1 * dvdx;
+			drhwl1 = drhwdy + dxl1 * drhwdx;
+		}
+
+		// Compute right-edge interpolation parameters for first half.
+
+		if (pr->y != pt->y) {
+			dxr1 = (pr->x - pt->x) / (pr->y - pt->y);
+		}
+
+		// Compute third-edge interpolation parameters.
+
+		if (pr->y != pl->y) {
+			dxl2 = (pr->x - pl->x) / (pr->y - pl->y);
+
+			dul2 = dudy + dxl2 * dudx;
+			dvl2 = dvdy + dxl2 * dvdx;
+			drhwl2 = drhwdy + dxl2 * drhwdx;
+
+			dxr2 = dxl2;
+		}
+
+		// Initialize parameters for first half.
+		//
+		// We place pixel centers at (x+0.5, y+0.5).
+
+		double xl, xr, ul, vl, rhwl, yf;
+		int y, y1, y2;
+
+		// y_start < y+0.5 to include pixel y.
+
+		y = (int)floor(pt->y + 0.5);
+		yf = (y+0.5) - pt->y;
+
+		xl = pt->x + dxl1 * yf;
+		xr = pt->x + dxr1 * yf;
+		ul = pt->u + dul1 * yf;
+		vl = pt->v + dvl1 * yf;
+		rhwl = pt->rhw + drhwl1 * yf;
+
+		// Initialize parameters for second half.
+
+		double xl2, xr2, ul2, vl2, rhwl2;
+
+		if (pl->y > pr->y) {		// Left edge is long side
+			dxl2 = dxl1;
+			dul2 = dul1;
+			dvl2 = dvl1;
+			drhwl2 = drhwl1;
+
+			y1 = (int)floor(pr->y + 0.5);
+			y2 = (int)floor(pl->y + 0.5);
+
+			yf = (y1+0.5) - pr->y;
+
+			// Step left edge.
+
+			xl2 = xl + dxl1 * (y1 - y);
+			ul2 = ul + dul1 * (y1 - y);
+			vl2 = vl + dvl1 * (y1 - y);
+			rhwl2 = rhwl + drhwl1 * (y1 - y);
+
+			// Prestep right edge.
+
+			xr2 = pr->x + dxr2 * yf;
+		} else {					// Right edge is long side
+			dxr2 = dxr1;
+
+			y1 = (int)floor(pl->y + 0.5);
+			y2 = (int)floor(pr->y + 0.5);
+
+			yf = (y1+0.5) - pl->y;
+
+			// Prestep left edge.
+
+			xl2 = pl->x + dxl2 * yf;
+			ul2 = pl->u + dul2 * yf;
+			vl2 = pl->v + dvl2 * yf;
+			rhwl2 = pl->rhw + drhwl2 * yf;
+
+			// Step right edge.
+
+			xr2 = xr + dxr1 * (y1 - y);
+		}
+
+		// rasterize
+		const ptrdiff_t dstpitch = dst.pitch;
+		uint32 *dstp = (uint32 *)((char *)dst.data + dstpitch * y);
+
+		VDTriBltInfo texinfo;
+		VDTriBltSpanFunction drawSpan;
+		uint32 cpuflags = CPUGetEnabledExtensions();
+
+		bool triBlt16 = false;
+
+		switch(filterMode) {
+		case kTriBltFilterBicubicMipLinear:
+#ifdef _M_IX86
+			if (cpuflags & CPUF_SUPPORTS_SSE2) {
+				drawSpan = vdasm_triblt_span_bicubic_mip_linear_sse2;
+				triBlt16 = true;
+			} else if (cpuflags & CPUF_SUPPORTS_MMX) {
+				drawSpan = vdasm_triblt_span_bicubic_mip_linear_mmx;
+				triBlt16 = true;
+			} else
+#endif
+				drawSpan = vd_triblt_span_bicubic_mip_linear;
+			break;
+		case kTriBltFilterTrilinear:
+#ifdef _M_IX86
+			if (cpuflags & CPUF_SUPPORTS_MMX) {
+				drawSpan = vdasm_triblt_span_trilinear_mmx;
+				triBlt16 = true;
+			} else
+#endif
+				drawSpan = vd_triblt_span_trilinear;
+			break;
+		case kTriBltFilterBilinear:
+#ifdef _M_IX86
+			if (cpuflags & CPUF_SUPPORTS_MMX) {
+				drawSpan = vdasm_triblt_span_bilinear_mmx;
+				triBlt16 = true;
+			} else
+#endif
+				drawSpan = vd_triblt_span_bilinear;
+			break;
+		case kTriBltFilterPoint:
+			drawSpan = vd_triblt_span_point;
+			break;
+		}
+
+		float rhobase = sqrtf(std::max<float>(dudx*dudx + dvdx*dvdx, dudy*dudy + dvdy*dvdy) * (1.0f / 65536.0f)) * powf(2.0f, mipMapLODBias);
+
+		if (triBlt16) {
+			ul *= 256.0f;
+			vl *= 256.0f;
+			ul2 *= 256.0f;
+			vl2 *= 256.0f;
+			dul1 *= 256.0f;
+			dvl1 *= 256.0f;
+			dul2 *= 256.0f;
+			dvl2 *= 256.0f;
+			dudx *= 256.0f;
+			dvdx *= 256.0f;
+			dudy *= 256.0f;
+			dvdy *= 256.0f;
+		}
+
+		int minx1 = (int)floor(std::min<float>(std::min<float>(pl->x, pr->x), pt->x) + 0.5);
+		int maxx2 = (int)floor(std::max<float>(std::max<float>(pl->x, pr->x), pt->x) + 0.5);
+
+		uint32 *const spanptr = new uint32[3 * (maxx2 - minx1)];
+
+		while(y < y2) {
+			if (y == y1) {
+				xl = xl2;
+				xr = xr2;
+				ul = ul2;
+				vl = vl2;
+				rhwl = rhwl2;
+				dxl1 = dxl2;
+				dxr1 = dxr2;
+				dul1 = dul2;
+				dvl1 = dvl2;
+				drhwl1 = drhwl2;
+			}
+
+			int x1, x2;
+			double xf;
+			double u, v, rhw;
+
+			// x_left must be less than (x+0.5) to include pixel x.
+
+			x1		= (int)floor(xl + 0.5);
+			x2		= (int)floor(xr + 0.5);
+			xf		= (x1+0.5) - xl;
+			
+			u		= ul + xf * dudx;
+			v		= vl + xf * dvdx;
+			rhw		= rhwl + xf * drhwdx;
+
+			int x = x1;
+			uint32 *spanp = spanptr;
+
+			float w = 1.0f / (float)rhw;
+
+			if (x < x2) {
+				if (filterMode >= kTriBltFilterTrilinear) {
+					do {
+						int utexel = VDRoundToIntFastFullRange(u * w);
+						int vtexel = VDRoundToIntFastFullRange(v * w);
+						union{ float f; sint32 i; } rho = {rhobase * w};
+
+						int lambda = ((rho.i - 0x3F800000) >> (23-8));
+						if (lambda < 0)
+							lambda = 0;
+						if (lambda >= (nMipmaps<<8)-256)
+							lambda = (nMipmaps<<8)-257;
+
+						spanp[0] = utexel;
+						spanp[1] = vtexel;
+						spanp[2] = lambda;
+						spanp += 3;
+
+						u += dudx;
+						v += dvdx;
+						rhw += drhwdx;
+
+						w *= (2.0f - w*(float)rhw);
+					} while(++x < x2);
+				} else {
+					do {
+						int utexel = VDFloorToInt(u * w);
+						int vtexel = VDFloorToInt(v * w);
+
+						spanp[0] = utexel;
+						spanp[1] = vtexel;
+						spanp += 2;
+
+						u += dudx;
+						v += dvdx;
+						rhw += drhwdx;
+
+						w *= (2.0f - w*(float)rhw);
+					} while(++x < x2);
+				}
+			}
+
+			for(int i=0; i<nMipmaps; ++i) {
+				texinfo.mips[i].mip		= (const uint32 *)pSources[i]->data;
+				texinfo.mips[i].pitch	= pSources[i]->pitch;
+				texinfo.mips[i].uvmul	= (pSources[i]->pitch << 16) + 4;
+			}
+			texinfo.dst = dstp+x1;
+			texinfo.src = spanptr;
+			texinfo.width = x2-x1;
+
+			if (texinfo.width>0)
+				drawSpan(&texinfo);
+
+			dstp = vdptroffset(dstp, dstpitch);
+			xl += dxl1;
+			xr += dxr1;
+			ul += dul1;
+			vl += dvl1;
+			rhwl += drhwl1;
+
+			++y;
+		}
+
+		delete[] spanptr;
+	}
+
+	void FillTri(VDPixmap& dst, uint32 c,
+					const VDTriBltTransformedVertex *vx0,
+					const VDTriBltTransformedVertex *vx1,
+					const VDTriBltTransformedVertex *vx2
+					)
+	{
+
+		VDTriangleSetupInfo setup;
+
+		SetupTri(setup, dst, vx0, vx1, vx2, NULL);
+
+		const VDTriBltTransformedVertex *pt = setup.pt, *pl = setup.pl, *pr = setup.pr;
+
+		// Compute edge walking parameters
+		float dxl1=0, dxr1=0;
+		float dxl2=0, dxr2=0;
+
+		float x_lt = pl->x - pt->x;
+		float x_rt = pr->x - pt->x;
+		float x_rl = pr->x - pl->x;
+		float y_lt = pl->y - pt->y;
+		float y_rt = pr->y - pt->y;
+		float y_rl = pr->y - pl->y;
+
+		// reject backfaces
+		if (x_lt*y_rt >= x_rt*y_lt)
+			return;
+
+		// Compute left-edge interpolation parameters for first half.
+		if (pl->y != pt->y)
+			dxl1 = x_lt / y_lt;
+
+		// Compute right-edge interpolation parameters for first half.
+		if (pr->y != pt->y)
+			dxr1 = x_rt / y_rt;
+
+		// Compute third-edge interpolation parameters.
+		if (pr->y != pl->y) {
+			dxl2 = x_rl / y_rl;
+
+			dxr2 = dxl2;
+		}
+
+		// Initialize parameters for first half.
+		//
+		// We place pixel centers at (x+0.5, y+0.5).
+
+		double xl, xr, yf;
+		int y, y1, y2;
+
+		// y_start < y+0.5 to include pixel y.
+
+		y = (int)floor(pt->y + 0.5);
+		yf = (y+0.5) - pt->y;
+
+		xl = pt->x + dxl1 * yf;
+		xr = pt->x + dxr1 * yf;
+
+		// Initialize parameters for second half.
+		double xl2, xr2;
+
+		if (pl->y > pr->y) {		// Left edge is long side
+			dxl2 = dxl1;
+
+			y1 = (int)floor(pr->y + 0.5);
+			y2 = (int)floor(pl->y + 0.5);
+
+			yf = (y1+0.5) - pr->y;
+
+			// Prestep right edge.
+			xr2 = pr->x + dxr2 * yf;
+
+			// Step left edge.
+			xl2 = xl + dxl1 * (y1 - y);
+		} else {					// Right edge is long side
+			dxr2 = dxr1;
+
+			y1 = (int)floor(pl->y + 0.5);
+			y2 = (int)floor(pr->y + 0.5);
+
+			yf = (y1+0.5) - pl->y;
+
+			// Prestep left edge.
+			xl2 = pl->x + dxl2 * yf;
+
+			// Step right edge.
+			xr2 = xr + dxr1 * (y1 - y);
+		}
+
+		// rasterize
+		const ptrdiff_t dstpitch = dst.pitch;
+		uint32 *dstp = (uint32 *)((char *)dst.data + dstpitch * y);
+
+		while(y < y2) {
+			if (y == y1) {
+				xl = xl2;
+				xr = xr2;
+				dxl1 = dxl2;
+				dxr1 = dxr2;
+			}
+
+			int x1, x2;
+			double xf;
+
+			// x_left must be less than (x+0.5) to include pixel x.
+
+			x1		= (int)floor(xl + 0.5);
+			x2		= (int)floor(xr + 0.5);
+			xf		= (x1+0.5) - xl;
+			
+			while(x1 < x2)
+				dstp[x1++] = c;
+
+			dstp = vdptroffset(dstp, dstpitch);
+			xl += dxl1;
+			xr += dxr1;
+			++y;
+		}
+	}
+
+	void FillTriGrad(VDPixmap& dst,
+					const VDTriBltTransformedVertex *vx0,
+					const VDTriBltTransformedVertex *vx1,
+					const VDTriBltTransformedVertex *vx2
+					)
+	{
+
+		VDTriangleSetupInfo setup;
+
+		SetupTri(setup, dst, vx0, vx1, vx2, NULL);
+
+		const VDTriBltTransformedVertex *pt = setup.pt, *pl = setup.pl, *pr = setup.pr;
+		const float x10 = pl->x - pt->x;
+		const float x20 = pr->x - pt->x;
+		const float y10 = pl->y - pt->y;
+		const float y20 = pr->y - pt->y;
+		const float A = x20*y10 - x10*y20;
+
+		if (A <= 0.f)
+			return;
+
+		float invA = 0.f;
+		if (A >= 1e-5f)
+			invA = 1.0f / A;
+
+		float x10_A = x10 * invA;
+		float x20_A = x20 * invA;
+		float y10_A = y10 * invA;
+		float y20_A = y20 * invA;
+
+		float r10 = pl->r - pt->r;
+		float r20 = pr->r - pt->r;
+		float g10 = pl->g - pt->g;
+		float g20 = pr->g - pt->g;
+		float b10 = pl->b - pt->b;
+		float b20 = pr->b - pt->b;
+		float a10 = pl->a - pt->a;
+		float a20 = pr->a - pt->a;
+		float rhw10 = pl->rhw - pt->rhw;
+		float rhw20 = pr->rhw - pt->rhw;
+
+		float drdx = r20*y10_A - r10*y20_A;
+		float drdy = r10*x20_A - r20*x10_A;
+		float dgdx = g20*y10_A - g10*y20_A;
+		float dgdy = g10*x20_A - g20*x10_A;
+		float dbdx = b20*y10_A - b10*y20_A;
+		float dbdy = b10*x20_A - b20*x10_A;
+		float dadx = a20*y10_A - a10*y20_A;
+		float dady = a10*x20_A - a20*x10_A;
+		float drhwdx = rhw20*y10_A - rhw10*y20_A;
+		float drhwdy = rhw10*x20_A - rhw20*x10_A;
+
+		// Compute edge walking parameters
+		float dxl1=0;
+		float drl1=0;
+		float dgl1=0;
+		float dbl1=0;
+		float dal1=0;
+		float drhwl1=0;
+		float dxr1=0;
+		float dxl2=0;
+		float drl2=0;
+		float dgl2=0;
+		float dbl2=0;
+		float dal2=0;
+		float drhwl2=0;
+		float dxr2=0;
+
+		float x_lt = pl->x - pt->x;
+		float x_rt = pr->x - pt->x;
+		float x_rl = pr->x - pl->x;
+		float y_lt = pl->y - pt->y;
+		float y_rt = pr->y - pt->y;
+		float y_rl = pr->y - pl->y;
+
+		// Compute left-edge interpolation parameters for first half.
+		if (pl->y != pt->y) {
+			dxl1 = x_lt / y_lt;
+			drl1 = drdy + dxl1 * drdx;
+			dgl1 = dgdy + dxl1 * dgdx;
+			dbl1 = dbdy + dxl1 * dbdx;
+			dal1 = dady + dxl1 * dadx;
+			drhwl1 = drhwdy + dxl1 * drhwdx;
+		}
+
+		// Compute right-edge interpolation parameters for first half.
+		if (pr->y != pt->y)
+			dxr1 = x_rt / y_rt;
+
+		// Compute third-edge interpolation parameters.
+		if (pr->y != pl->y) {
+			dxl2 = x_rl / y_rl;
+
+			drl2 = drdy + dxl2 * drdx;
+			dgl2 = dgdy + dxl2 * dgdx;
+			dbl2 = dbdy + dxl2 * dbdx;
+			dal2 = dady + dxl2 * dadx;
+			drhwl2 = drhwdy + dxl2 * drhwdx;
+
+			dxr2 = dxl2;
+		}
+
+		// Initialize parameters for first half.
+		//
+		// We place pixel centers at (x+0.5, y+0.5).
+
+		double xl, xr, yf;
+		double rl, gl, bl, al, rhwl;
+		double rl2, gl2, bl2, al2, rhwl2;
+		int y, y1, y2;
+
+		// y_start < y+0.5 to include pixel y.
+
+		y = (int)floor(pt->y + 0.5);
+		yf = (y+0.5) - pt->y;
+
+		xl = pt->x + dxl1 * yf;
+		xr = pt->x + dxr1 * yf;
+		rl = pt->r + drl1 * yf;
+		gl = pt->g + dgl1 * yf;
+		bl = pt->b + dbl1 * yf;
+		al = pt->a + dal1 * yf;
+		rhwl = pt->rhw + drhwl1 * yf;
+
+		// Initialize parameters for second half.
+		double xl2, xr2;
+
+		if (pl->y > pr->y) {		// Left edge is long side
+			dxl2 = dxl1;
+			drl2 = drl1;
+			dgl2 = dgl1;
+			dbl2 = dbl1;
+			dal2 = dal1;
+			drhwl2 = drhwl1;
+
+			y1 = (int)floor(pr->y + 0.5);
+			y2 = (int)floor(pl->y + 0.5);
+
+			yf = (y1+0.5) - pr->y;
+
+			// Step left edge.
+			xl2 = xl + dxl1 * (y1 - y);
+			rl2 = rl + drl1 * (y1 - y);
+			gl2 = gl + dgl1 * (y1 - y);
+			bl2 = bl + dbl1 * (y1 - y);
+			al2 = al + dal1 * (y1 - y);
+			rhwl2 = rhwl + drhwl1 * (y1 - y);
+
+			// Prestep right edge.
+			xr2 = pr->x + dxr2 * yf;
+		} else {					// Right edge is long side
+			dxr2 = dxr1;
+
+			y1 = (int)floor(pl->y + 0.5);
+			y2 = (int)floor(pr->y + 0.5);
+
+			yf = (y1+0.5) - pl->y;
+
+			// Prestep left edge.
+			xl2 = pl->x + dxl2 * yf;
+			rl2 = pl->r + drl2 * yf;
+			gl2 = pl->g + dgl2 * yf;
+			bl2 = pl->b + dbl2 * yf;
+			al2 = pl->a + dal2 * yf;
+			rhwl2 = pl->rhw + drhwl2 * yf;
+
+			// Step right edge.
+			xr2 = xr + dxr2 * (y1 - y);
+		}
+
+		// rasterize
+		const ptrdiff_t dstpitch = dst.pitch;
+		char *dstp0 = (char *)dst.data + dstpitch * y;
+
+		while(y < y2) {
+			if (y == y1) {
+				xl = xl2;
+				xr = xr2;
+				rl = rl2;
+				gl = gl2;
+				bl = bl2;
+				al = al2;
+				rhwl = rhwl2;
+				dxl1 = dxl2;
+				drl1 = drl2;
+				dgl1 = dgl2;
+				dbl1 = dbl2;
+				dal1 = dal2;
+				drhwl1 = drhwl2;
+				dxr1 = dxr2;
+			}
+
+			int x1, x2;
+			double xf;
+			double r, g, b, a, rhw;
+
+			// x_left must be less than (x+0.5) to include pixel x.
+
+			x1		= (int)floor(xl + 0.5);
+			x2		= (int)floor(xr + 0.5);
+			xf		= (x1+0.5) - xl;
+			
+			r		= rl + xf * drdx;
+			g		= gl + xf * dgdx;
+			b		= bl + xf * dbdx;
+			a		= al + xf * dadx;
+			rhw		= rhwl + xf * drhwdx;
+
+			float w = 1.0f / (float)rhw;
+
+			if (x1 < x2) {
+				if (dst.format == nsVDPixmap::kPixFormat_XRGB8888) {
+					uint32 *dstp = (uint32 *)dstp0;
+
+					do {
+						float sr = (float)(r * w);
+						float sg = (float)(g * w);
+						float sb = (float)(b * w);
+						float sa = (float)(a * w);
+
+						uint8 ir = VDClampedRoundFixedToUint8Fast(sr);
+						uint8 ig = VDClampedRoundFixedToUint8Fast(sg);
+						uint8 ib = VDClampedRoundFixedToUint8Fast(sb);
+						uint8 ia = VDClampedRoundFixedToUint8Fast(sa);
+
+						dstp[x1] = ((uint32)ia << 24) + ((uint32)ir << 16) + ((uint32)ig << 8) + ib;
+
+						r += drdx;
+						g += dgdx;
+						b += dbdx;
+						a += dadx;
+						rhw += drhwdx;
+
+						w *= (2.0f - w*(float)rhw);
+					} while(++x1 < x2);
+				} else {
+					uint8 *dstp = (uint8 *)dstp0;
+
+					do {
+						float sg = (float)(g * w);
+
+						uint8 ig = VDClampedRoundFixedToUint8Fast(sg);
+
+						dstp[x1] = ig;
+
+						g += dgdx;
+						rhw += drhwdx;
+
+						w *= (2.0f - w*(float)rhw);
+					} while(++x1 < x2);
+				}
+			}
+
+			dstp0 = vdptroffset(dstp0, dstpitch);
+			xl += dxl1;
+			rl += drl1;
+			gl += dgl1;
+			bl += dbl1;
+			al += dal1;
+			rhwl += drhwl1;
+			xr += dxr1;
+			++y;
+		}
+	}
+
+	struct VDTriClipWorkspace {
+		VDTriBltTransformedVertex *vxheapptr[2][19];
+		VDTriBltTransformedVertex vxheap[21];
+	};
+
+	VDTriBltTransformedVertex **VDClipTriangle(VDTriClipWorkspace& ws,
+						const VDTriBltTransformedVertex *vx0,
+						const VDTriBltTransformedVertex *vx1,
+						const VDTriBltTransformedVertex *vx2,
+						int orflags) {
+		// Each line segment can intersect all six planes, meaning the maximum bound is
+		// 18 vertices.  Add 3 for the original.
+
+		VDTriBltTransformedVertex *vxheapnext;
+		VDTriBltTransformedVertex **vxlastheap = ws.vxheapptr[0], **vxnextheap = ws.vxheapptr[1];
+
+		ws.vxheap[0]	= *vx0;
+		ws.vxheap[1]	= *vx1;
+		ws.vxheap[2]	= *vx2;
+
+		vxlastheap[0] = &ws.vxheap[0];
+		vxlastheap[1] = &ws.vxheap[1];
+		vxlastheap[2] = &ws.vxheap[2];
+		vxlastheap[3] = NULL;
+
+		vxheapnext = ws.vxheap + 3;
+
+		//	Current		Next		Action
+		//	-------		----		------
+		//	Unclipped	Unclipped	Copy vertex
+		//	Unclipped	Clipped		Copy vertex and add intersection
+		//	Clipped		Unclipped	Add intersection
+		//	Clipped		Clipped		No action
+
+#define	DOCLIP(cliptype, _sign_, cliparg)				\
+		if (orflags & k##cliptype) {					\
+			VDTriBltTransformedVertex **src = vxlastheap;		\
+			VDTriBltTransformedVertex **dst = vxnextheap;		\
+														\
+			while(*src) {								\
+				VDTriBltTransformedVertex *cur = *src;			\
+				VDTriBltTransformedVertex *next = src[1];		\
+														\
+				if (!next)								\
+					next = vxlastheap[0];				\
+														\
+				if (!(cur->outcode & k##cliptype))	\
+					*dst++ = cur;						\
+														\
+				if ((cur->outcode ^ next->outcode) & k##cliptype) {	\
+					double alpha = (cur->w _sign_ cur->cliparg) / ((cur->w _sign_ cur->cliparg) - (next->w _sign_ next->cliparg));	\
+														\
+					if (alpha >= 0.0 && alpha <= 1.0) {	\
+						vxheapnext->interp(cur, next, (float)alpha);	\
+						vxheapnext->cliparg = -(_sign_ vxheapnext->w);	\
+						*dst++ = vxheapnext++;			\
+					}									\
+				}										\
+				++src;									\
+			}											\
+			*dst = NULL;								\
+			if (dst < vxnextheap+3) return NULL;		\
+			src = vxlastheap; vxlastheap = vxnextheap; vxnextheap = src;	\
+		}
+
+
+		DOCLIP(Far, -, z);
+		DOCLIP(Near, +, z);
+		DOCLIP(Bottom, -, y);
+		DOCLIP(Top, +, y);
+		DOCLIP(Right, -, x);
+		DOCLIP(Left, +, x);
+
+#undef DOCLIP
+
+		return vxlastheap;
+	}
+
+	void RenderClippedTri(VDPixmap& dst, const VDPixmap *const *pSources, int nMipmaps,
+							const VDTriBltTransformedVertex *vx0,
+							const VDTriBltTransformedVertex *vx1,
+							const VDTriBltTransformedVertex *vx2,
+							VDTriBltFilterMode filterMode,
+							float mipMapLODBias,
+							int orflags)
+	{
+
+		VDTriBltTransformedVertex *vxheapnext;
+		VDTriBltTransformedVertex vxheap[21];
+
+		VDTriBltTransformedVertex *vxheapptr[2][19];
+		VDTriBltTransformedVertex **vxlastheap = vxheapptr[0], **vxnextheap = vxheapptr[1];
+
+		vxheap[0]	= *vx0;
+		vxheap[1]	= *vx1;
+		vxheap[2]	= *vx2;
+
+		vxlastheap[0] = &vxheap[0];
+		vxlastheap[1] = &vxheap[1];
+		vxlastheap[2] = &vxheap[2];
+		vxlastheap[3] = NULL;
+
+		vxheapnext = vxheap + 3;
+
+		//	Current		Next		Action
+		//	-------		----		------
+		//	Unclipped	Unclipped	Copy vertex
+		//	Unclipped	Clipped		Copy vertex and add intersection
+		//	Clipped		Unclipped	Add intersection
+		//	Clipped		Clipped		No action
+
+#define	DOCLIP(cliptype, _sign_, cliparg)				\
+		if (orflags & k##cliptype) {					\
+			VDTriBltTransformedVertex **src = vxlastheap;		\
+			VDTriBltTransformedVertex **dst = vxnextheap;		\
+														\
+			while(*src) {								\
+				VDTriBltTransformedVertex *cur = *src;			\
+				VDTriBltTransformedVertex *next = src[1];		\
+														\
+				if (!next)								\
+					next = vxlastheap[0];				\
+														\
+				if (!(cur->outcode & k##cliptype))	\
+					*dst++ = cur;						\
+														\
+				if ((cur->outcode ^ next->outcode) & k##cliptype) {	\
+					double alpha = (cur->w _sign_ cur->cliparg) / ((cur->w _sign_ cur->cliparg) - (next->w _sign_ next->cliparg));	\
+														\
+					if (alpha >= 0.0 && alpha <= 1.0) {	\
+						vxheapnext->interp(cur, next, (float)alpha);	\
+						vxheapnext->cliparg = -(_sign_ vxheapnext->w);	\
+						*dst++ = vxheapnext++;			\
+					}									\
+				}										\
+				++src;									\
+			}											\
+			*dst = NULL;								\
+			if (dst < vxnextheap+3) return;				\
+			src = vxlastheap; vxlastheap = vxnextheap; vxnextheap = src;	\
+		}
+
+
+		DOCLIP(Far, -, z);
+		DOCLIP(Near, +, z);
+		DOCLIP(Bottom, -, y);
+		DOCLIP(Top, +, y);
+		DOCLIP(Right, -, x);
+		DOCLIP(Left, +, x);
+
+#undef DOCLIP
+
+		VDTriBltTransformedVertex **src = vxlastheap+1;
+
+		while(src[1]) {
+			RenderTri(dst, pSources, nMipmaps, vxlastheap[0], src[0], src[1], filterMode, mipMapLODBias);
+			++src;
+		}
+	}
+
+}
+
+bool VDPixmapTriFill(VDPixmap& dst, const uint32 c, const VDTriBltVertex *pVertices, int nVertices, const int *pIndices, int nIndices, const float pTransform[16]) {
+	if (dst.format != nsVDPixmap::kPixFormat_XRGB8888)
+		return false;
+
+	static const float xf_ident[16]={1.f,0.f,0.f,0.f,0.f,1.f,0.f,0.f,0.f,0.f,1.f,0.f,0.f,0.f,0.f,1.f};
+	vdfastvector<VDTriBltTransformedVertex>	xverts(nVertices);
+
+	if (!pTransform)
+		pTransform = xf_ident;
+
+	TransformVerts(xverts.data(), pVertices, nVertices, pTransform);
+
+	const VDTriBltTransformedVertex *xsrc = xverts.data();
+
+	VDTriClipWorkspace clipws;
+
+	while(nIndices >= 3) {
+		const int idx0 = pIndices[0];
+		const int idx1 = pIndices[1];
+		const int idx2 = pIndices[2];
+		const VDTriBltTransformedVertex *xv0 = &xsrc[idx0];
+		const VDTriBltTransformedVertex *xv1 = &xsrc[idx1];
+		const VDTriBltTransformedVertex *xv2 = &xsrc[idx2];
+		const int kode0 = xv0->outcode;
+		const int kode1 = xv1->outcode;
+		const int kode2 = xv2->outcode;
+
+		if (!(kode0 & kode1 & kode2)) {
+			if (int orflags = kode0 | kode1 | kode2) {
+				VDTriBltTransformedVertex **src = VDClipTriangle(clipws, xv0, xv1, xv2, orflags);
+
+				if (src) {
+					VDTriBltTransformedVertex *src0 = *src++;
+
+					// fan out triangles
+					while(src[1]) {
+						FillTri(dst, c, src0, src[0], src[1]);
+						++src;
+					}
+				}
+			} else
+				FillTri(dst, c, xv0, xv1, xv2);
+		}
+
+		pIndices += 3;
+		nIndices -= 3;
+	}
+
+	return true;
+}
+
+bool VDPixmapTriFill(VDPixmap& dst, const VDTriColorVertex *pVertices, int nVertices, const int *pIndices, int nIndices, const float pTransform[16]) {
+	VDPixmap pxY;
+	VDPixmap pxCb;
+	VDPixmap pxCr;
+	bool ycbcr = false;
+	float ycbcr_xoffset = 0;
+
+	switch(dst.format) {
+	case nsVDPixmap::kPixFormat_XRGB8888:
+	case nsVDPixmap::kPixFormat_Y8:
+		break;
+	case nsVDPixmap::kPixFormat_YUV444_Planar:
+	case nsVDPixmap::kPixFormat_YUV422_Planar:
+	case nsVDPixmap::kPixFormat_YUV420_Planar:
+	case nsVDPixmap::kPixFormat_YUV410_Planar:
+		pxY.format = nsVDPixmap::kPixFormat_Y8;
+		pxY.data = dst.data;
+		pxY.pitch = dst.pitch;
+		pxY.w = dst.w;
+		pxY.h = dst.h;
+
+		pxCb.format = nsVDPixmap::kPixFormat_Y8;
+		pxCb.data = dst.data2;
+		pxCb.pitch = dst.pitch2;
+		pxCb.h = dst.h;
+
+		pxCr.format = nsVDPixmap::kPixFormat_Y8;
+		pxCr.data = dst.data3;
+		pxCr.pitch = dst.pitch3;
+		pxCr.h = dst.h;
+
+		if (dst.format == nsVDPixmap::kPixFormat_YUV410_Planar) {
+			pxCr.w = pxCb.w = dst.w >> 2;
+			pxCr.h = pxCb.h = dst.h >> 2;
+			ycbcr_xoffset = 0.75f / (float)pxCr.w;
+		} else if (dst.format == nsVDPixmap::kPixFormat_YUV420_Planar) {
+			pxCr.w = pxCb.w = dst.w >> 1;
+			pxCr.h = pxCb.h = dst.h >> 1;
+			ycbcr_xoffset = 0.5f / (float)pxCr.w;
+		} else if (dst.format == nsVDPixmap::kPixFormat_YUV422_Planar) {
+			pxCr.w = pxCb.w = dst.w >> 1;
+			ycbcr_xoffset = 0.5f / (float)pxCr.w;
+		} else if (dst.format == nsVDPixmap::kPixFormat_YUV444_Planar) {
+			pxCr.w = pxCb.w = dst.w;
+			ycbcr_xoffset = 0.0f;
+		}
+
+		ycbcr = true;
+		break;
+	default:
+		return false;
+	}
+
+	VDTriBltTransformedVertex fastxverts[64];
+	vdfastvector<VDTriBltTransformedVertex>	xverts;
+
+	VDTriBltTransformedVertex *xsrc;
+	if (nVertices <= 64) {
+		xsrc = fastxverts;
+	} else {
+		xverts.resize(nVertices);
+		xsrc = xverts.data();
+	}
+
+	static const float xf_ident[16]={1.f,0.f,0.f,0.f,0.f,1.f,0.f,0.f,0.f,0.f,1.f,0.f,0.f,0.f,0.f,1.f};
+	if (!pTransform)
+		pTransform = xf_ident;
+
+	VDTriClipWorkspace clipws;
+	for(int plane=0; plane<(ycbcr?3:1); ++plane) {
+		VDPixmap& pxPlane = ycbcr ? plane == 0 ? pxY : plane == 1 ? pxCb : pxCr : dst;
+
+		if (ycbcr && plane) {
+			float xf_ycbcr[16];
+			memcpy(xf_ycbcr, pTransform, sizeof(float) * 16);
+
+			// translate in x by ycbcr_xoffset
+			xf_ycbcr[0] += xf_ycbcr[12]*ycbcr_xoffset;
+			xf_ycbcr[1] += xf_ycbcr[13]*ycbcr_xoffset;
+			xf_ycbcr[2] += xf_ycbcr[14]*ycbcr_xoffset;
+			xf_ycbcr[3] += xf_ycbcr[15]*ycbcr_xoffset;
+
+			TransformVerts(xsrc, pVertices, nVertices, xf_ycbcr);
+
+			switch(plane) {
+				case 1:
+					for(int i=0; i<nVertices; ++i)
+						xsrc[i].g = xsrc[i].b;
+					break;
+				case 2:
+					for(int i=0; i<nVertices; ++i)
+						xsrc[i].g = xsrc[i].r;
+					break;
+			}
+		} else {
+			TransformVerts(xsrc, pVertices, nVertices, pTransform);
+		}
+
+		const int *nextIndex = pIndices;
+		int indicesLeft = nIndices;
+		while(indicesLeft >= 3) {
+			const int idx0 = nextIndex[0];
+			const int idx1 = nextIndex[1];
+			const int idx2 = nextIndex[2];
+			const VDTriBltTransformedVertex *xv0 = &xsrc[idx0];
+			const VDTriBltTransformedVertex *xv1 = &xsrc[idx1];
+			const VDTriBltTransformedVertex *xv2 = &xsrc[idx2];
+			const int kode0 = xv0->outcode;
+			const int kode1 = xv1->outcode;
+			const int kode2 = xv2->outcode;
+
+			if (!(kode0 & kode1 & kode2)) {
+				if (int orflags = kode0 | kode1 | kode2) {
+					VDTriBltTransformedVertex **src = VDClipTriangle(clipws, xv0, xv1, xv2, orflags);
+
+					if (src) {
+						VDTriBltTransformedVertex *src0 = *src++;
+
+						// fan out triangles
+						while(src[1]) {
+							FillTriGrad(pxPlane, src0, src[0], src[1]);
+							++src;
+						}
+					}
+				} else {
+					FillTriGrad(pxPlane, xv0, xv1, xv2);
+				}
+			}
+
+			nextIndex += 3;
+			indicesLeft -= 3;
+		}
+	}
+
+	return true;
+}
+
+bool VDPixmapTriBlt(VDPixmap& dst, const VDPixmap *const *pSources, int nMipmaps,
+					const VDTriBltVertex *pVertices, int nVertices,
+					const int *pIndices, int nIndices,
+					VDTriBltFilterMode filterMode,
+					float mipMapLODBias,
+					const float pTransform[16])
+{
+	if (dst.format != nsVDPixmap::kPixFormat_XRGB8888)
+		return false;
+
+	static const float xf_ident[16]={1.f,0.f,0.f,0.f,0.f,1.f,0.f,0.f,0.f,0.f,1.f,0.f,0.f,0.f,0.f,1.f};
+	vdfastvector<VDTriBltTransformedVertex>	xverts(nVertices);
+
+	if (!pTransform)
+		pTransform = xf_ident;
+
+	TransformVerts(xverts.data(), pVertices, nVertices, pTransform);
+
+	const VDTriBltTransformedVertex *xsrc = xverts.data();
+
+	VDTriClipWorkspace clipws;
+
+	while(nIndices >= 3) {
+		const int idx0 = pIndices[0];
+		const int idx1 = pIndices[1];
+		const int idx2 = pIndices[2];
+		const VDTriBltTransformedVertex *xv0 = &xsrc[idx0];
+		const VDTriBltTransformedVertex *xv1 = &xsrc[idx1];
+		const VDTriBltTransformedVertex *xv2 = &xsrc[idx2];
+		const int kode0 = xv0->outcode;
+		const int kode1 = xv1->outcode;
+		const int kode2 = xv2->outcode;
+
+		if (!(kode0 & kode1 & kode2)) {
+			if (int orflags = kode0 | kode1 | kode2) {
+				VDTriBltTransformedVertex **src = VDClipTriangle(clipws, xv0, xv1, xv2, orflags);
+
+				if (src) {
+					VDTriBltTransformedVertex *src0 = *src++;
+
+					// fan out triangles
+					while(src[1]) {
+						RenderTri(dst, pSources, nMipmaps, src0, src[0], src[1], filterMode, mipMapLODBias);
+						++src;
+					}
+				}
+			} else
+				RenderTri(dst, pSources, nMipmaps, xv0, xv1, xv2, filterMode, mipMapLODBias);
+		}
+
+		pIndices += 3;
+		nIndices -= 3;
+	}
+
+	return true;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+void VDPixmapSetTextureBorders(VDPixmap& px, bool wrap) {
+	const int w = px.w;
+	const int h = px.h;
+
+	VDPixmapBlt(px, 0,   1,   px, wrap ? w-2 : 1, 1,              1, h-2);
+	VDPixmapBlt(px, w-1, 1,   px, wrap ? 1 : w-2, 1,              1, h-2);
+
+	VDPixmapBlt(px, 0,   0,   px, 0,              wrap ? h-2 : 1, w, 1);
+	VDPixmapBlt(px, 0,   h-1, px, 0,              wrap ? 1 : h-2, w, 1);
+}
+
+void VDPixmapSetTextureBordersCubic(VDPixmap& px) {
+	const int w = px.w;
+	const int h = px.h;
+
+	VDPixmapBlt(px, 0,   1, px, 2, 1, 1, h-2);
+	VDPixmapBlt(px, 1,   1, px, 2, 1, 1, h-2);
+	VDPixmapBlt(px, w-2, 1, px, w-3, 1, 1, h-2);
+	VDPixmapBlt(px, w-1, 1, px, w-3, 1, 1, h-2);
+
+	VDPixmapBlt(px, 0, 0,   px, 0, 2, w, 1);
+	VDPixmapBlt(px, 0, 1,   px, 0, 2, w, 1);
+	VDPixmapBlt(px, 0, h-2, px, 0, h-3, w, 1);
+	VDPixmapBlt(px, 0, h-1, px, 0, h-3, w, 1);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDPixmapTextureMipmapChain::VDPixmapTextureMipmapChain(const VDPixmap& src, bool wrap, bool cubic, int maxlevels) {
+	int w = src.w;
+	int h = src.h;
+	int mipcount = 0;
+
+	while((w>1 || h>1) && maxlevels--) {
+		++mipcount;
+		w >>= 1;
+		h >>= 1;
+	}
+
+	mBuffers.resize(mipcount);
+	mMipMaps.resize(mipcount);
+
+	vdautoptr<IVDPixmapResampler> r(VDCreatePixmapResampler());
+	r->SetFilters(IVDPixmapResampler::kFilterLinear, IVDPixmapResampler::kFilterLinear, false);
+
+	float fw = (float)src.w;
+	float fh = (float)src.h;
+	for(int mip=0; mip<mipcount; ++mip) {
+		const int mipw = VDCeilToInt(fw);
+		const int miph = VDCeilToInt(fh);
+
+		mMipMaps[mip] = &mBuffers[mip];
+
+		if (cubic) {
+			mBuffers[mip].init(mipw+4, miph+4, nsVDPixmap::kPixFormat_XRGB8888);
+
+			if (!mip) {
+				VDPixmapBlt(mBuffers[0], 2, 2, src, 0, 0, src.w, src.h);
+				VDPixmapSetTextureBordersCubic(mBuffers[0]);
+			} else {
+				const VDPixmap& curmip = mBuffers[mip];
+				const VDPixmap& prevmip = mBuffers[mip-1];
+
+				vdrect32f rdst( 0.0f,  0.0f,      (float)curmip.w       ,      (float)curmip.h       );
+				vdrect32f rsrc(-2.0f, -2.0f, 2.0f*(float)curmip.w - 2.0f, 2.0f*(float)curmip.h - 2.0f);
+				r->Init(rdst, curmip.w, curmip.h, curmip.format, rsrc, prevmip.w, prevmip.h, prevmip.format);
+				r->Process(curmip, prevmip);
+			}
+		} else {
+			mBuffers[mip].init(mipw+2, miph+2, nsVDPixmap::kPixFormat_XRGB8888);
+
+			if (!mip) {
+				VDPixmapBlt(mBuffers[0], 1, 1, src, 0, 0, src.w, src.h);
+				VDPixmapSetTextureBorders(mBuffers[0], wrap);
+			} else {
+				const VDPixmap& curmip = mBuffers[mip];
+				const VDPixmap& prevmip = mBuffers[mip-1];
+
+				vdrect32f rdst( 0.0f,  0.0f,      (float)curmip.w       ,      (float)curmip.h       );
+				vdrect32f rsrc(-1.0f, -1.0f, 2.0f*(float)curmip.w - 1.0f, 2.0f*(float)curmip.h - 1.0f);
+				r->Init(rdst, curmip.w, curmip.h, curmip.format, rsrc, prevmip.w, prevmip.h, prevmip.format);
+				r->Process(curmip, prevmip);
+			}
+		}
+
+		fw *= 0.5f;
+		fh *= 0.5f;
+	}
+}
+
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit.cpp
new file mode 100644
index 000000000..6dc1b4334
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit.cpp
@@ -0,0 +1,903 @@
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include "uberblit.h"
+#include "uberblit_gen.h"
+
+uint32 VDPixmapGetFormatTokenFromFormat(int format) {
+	using namespace nsVDPixmap;
+	switch(format) {
+	case kPixFormat_Pal1:			return kVDPixType_1 | kVDPixSamp_444 | kVDPixSpace_Pal;
+	case kPixFormat_Pal2:			return kVDPixType_2 | kVDPixSamp_444 | kVDPixSpace_Pal;
+	case kPixFormat_Pal4:			return kVDPixType_4 | kVDPixSamp_444 | kVDPixSpace_Pal;
+	case kPixFormat_Pal8:			return kVDPixType_8 | kVDPixSamp_444 | kVDPixSpace_Pal;
+	case kPixFormat_XRGB1555:		return kVDPixType_1555_LE | kVDPixSamp_444 | kVDPixSpace_BGR;
+	case kPixFormat_RGB565:			return kVDPixType_565_LE | kVDPixSamp_444 | kVDPixSpace_BGR;
+	case kPixFormat_RGB888:			return kVDPixType_888 | kVDPixSamp_444 | kVDPixSpace_BGR;
+	case kPixFormat_XRGB8888:		return kVDPixType_8888 | kVDPixSamp_444 | kVDPixSpace_BGR;
+	case kPixFormat_Y8:				return kVDPixType_8 | kVDPixSamp_444 | kVDPixSpace_Y_601;
+	case kPixFormat_YUV422_UYVY:	return kVDPixType_B8G8_R8G8 | kVDPixSamp_422 | kVDPixSpace_YCC_601;
+	case kPixFormat_YUV422_YUYV:	return kVDPixType_G8B8_G8R8 | kVDPixSamp_422 | kVDPixSpace_YCC_601;
+	case kPixFormat_YUV444_XVYU:	return kVDPixType_8888 | kVDPixSamp_444 | kVDPixSpace_YCC_601;
+	case kPixFormat_YUV444_Planar:	return kVDPixType_8_8_8 | kVDPixSamp_444 | kVDPixSpace_YCC_601;
+	case kPixFormat_YUV422_Planar:	return kVDPixType_8_8_8 | kVDPixSamp_422 | kVDPixSpace_YCC_601;
+	case kPixFormat_YUV422_Planar_16F:	return kVDPixType_16F_16F_16F_LE | kVDPixSamp_422 | kVDPixSpace_YCC_601;
+	case kPixFormat_YUV420_Planar:	return kVDPixType_8_8_8 | kVDPixSamp_420_MPEG2 | kVDPixSpace_YCC_601;
+	case kPixFormat_YUV411_Planar:	return kVDPixType_8_8_8 | kVDPixSamp_411 | kVDPixSpace_YCC_601;
+	case kPixFormat_YUV410_Planar:	return kVDPixType_8_8_8 | kVDPixSamp_410 | kVDPixSpace_YCC_601;
+	case kPixFormat_YUV422_Planar_Centered:	return kVDPixType_8_8_8 | kVDPixSamp_422_JPEG | kVDPixSpace_YCC_601;
+	case kPixFormat_YUV420_Planar_Centered:	return kVDPixType_8_8_8 | kVDPixSamp_420_MPEG1 | kVDPixSpace_YCC_601;
+	case kPixFormat_YUV422_V210:	return kVDPixType_V210 | kVDPixSamp_422 | kVDPixSpace_YCC_601;
+	case kPixFormat_YUV422_UYVY_709:	return kVDPixType_B8G8_R8G8 | kVDPixSamp_422 | kVDPixSpace_YCC_709;
+	case kPixFormat_YUV420_NV12:	return kVDPixType_8_B8R8 | kVDPixSamp_420_MPEG2 | kVDPixSpace_YCC_601;
+	default:
+		VDASSERT(false);
+		return 0;
+	}
+}
+
+const VDPixmapSamplingInfo& VDPixmapGetSamplingInfo(uint32 samplingToken) {
+	static const VDPixmapSamplingInfo kPixmapSamplingInfo[]={
+		/* Null			*/ {  0,  0,  0,  0,  0 },
+		/* 444			*/ {  0,  0,  0,  0,  0 },
+		/* 422			*/ { -4,  0,  0,  1,  0 },
+		/* 422_JPEG		*/ {  0,  0,  0,  1,  0 },
+		/* 420_MPEG2	*/ { -4,  0,  0,  1,  1 },
+		/* 420_MPEG2INT	*/ { -4,  0,  0,  1,  1 },
+		/* 420_MPEG1	*/ {  0,  0,  0,  1,  1 },
+		/* 420_DVPAL	*/ { -4,  0,  0,  1,  1 },
+		/* 411			*/ { -6,  0,  0,  2,  0 },
+		/* 410			*/ { -6,  0,  0,  2,  2 }
+	};
+
+	uint32 index = (samplingToken & kVDPixSamp_Mask) >> kVDPixSamp_Bits;
+
+	return index >= sizeof(kPixmapSamplingInfo)/sizeof(kPixmapSamplingInfo[0]) ? kPixmapSamplingInfo[0] : kPixmapSamplingInfo[index];
+}
+
+namespace {
+	uint32 BlitterConvertSampling(VDPixmapUberBlitterGenerator& gen, uint32 srcToken, uint32 dstSamplingToken, sint32 w, sint32 h) {
+		// if the source type is 16F, we have to convert to 32F
+		if ((srcToken & kVDPixType_Mask) == kVDPixType_16F_16F_16F_LE) {
+			// 0 1 2
+			gen.conv_16F_to_32F();
+			gen.swap(1);
+			// 1 0 2
+			gen.conv_16F_to_32F();
+			gen.swap(2);
+			// 2 0 1
+			gen.conv_16F_to_32F();
+			gen.swap(2);
+			gen.swap(1);
+			srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_32F_32F_LE;
+		}
+
+		// look up sampling info
+		const VDPixmapSamplingInfo& srcInfo = VDPixmapGetSamplingInfo(srcToken);
+		const VDPixmapSamplingInfo& dstInfo = VDPixmapGetSamplingInfo(dstSamplingToken);
+
+		// convert destination chroma origin to luma space
+		int c_x = ((8 + dstInfo.mCXOffset16) << dstInfo.mCXBits) - 8;
+		int cr_y = ((8 + dstInfo.mCrYOffset16) << dstInfo.mCYBits) - 8;
+		int cb_y = ((8 + dstInfo.mCbYOffset16) << dstInfo.mCYBits) - 8;
+
+		// convert luma chroma location to source chroma space
+		c_x = ((8 + c_x) >> srcInfo.mCXBits) - 8 - srcInfo.mCXOffset16;
+		cr_y = ((8 + cr_y) >> srcInfo.mCYBits) - 8 - srcInfo.mCrYOffset16;
+		cb_y = ((8 + cb_y) >> srcInfo.mCYBits) - 8 - srcInfo.mCbYOffset16;
+
+		float cxo = c_x / 16.0f + 0.5f;
+		float cxf = ((16 << dstInfo.mCXBits) >> srcInfo.mCXBits) / 16.0f;
+		float cyf = ((16 << dstInfo.mCYBits) >> srcInfo.mCYBits) / 16.0f;
+		sint32 cw = -(-w >> dstInfo.mCXBits);
+		sint32 ch = -(-h >> dstInfo.mCYBits);
+
+		gen.swap(2);
+		gen.linear(cxo, cxf, cw, cb_y / 16.0f + 0.5f, cyf, ch);
+		gen.swap(2);
+		gen.linear(cxo, cxf, cw, cr_y / 16.0f + 0.5f, cyf, ch);
+
+		return (srcToken & ~kVDPixSamp_Mask) | (dstSamplingToken & kVDPixSamp_Mask);
+	}
+
+	uint32 BlitterConvertType(VDPixmapUberBlitterGenerator& gen, uint32 srcToken, uint32 dstToken, sint32 w, sint32 h) {
+		uint32 dstType = dstToken & kVDPixType_Mask;
+
+		while((srcToken ^ dstToken) & kVDPixType_Mask) {
+			uint32 srcType = srcToken & kVDPixType_Mask;
+			uint32 targetType = dstType;
+
+	type_reconvert:
+			switch(targetType) {
+				case kVDPixType_1555_LE:
+					switch(srcType) {
+						case kVDPixType_565_LE:
+							gen.conv_565_to_555();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_1555_LE;
+							break;
+
+						case kVDPixType_8888:
+							gen.conv_8888_to_555();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_1555_LE;
+							break;
+						case kVDPixType_B8G8_R8G8:
+						case kVDPixType_G8B8_G8R8:
+							targetType = kVDPixType_8_8_8;
+							goto type_reconvert;
+						default:
+							targetType = kVDPixType_8888;
+							goto type_reconvert;
+					}
+					break;
+
+				case kVDPixType_565_LE:
+					switch(srcType) {
+						case kVDPixType_1555_LE:
+							gen.conv_555_to_565();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_565_LE;
+							break;
+						case kVDPixType_8888:
+							gen.conv_8888_to_565();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_565_LE;
+							break;
+						case kVDPixType_B8G8_R8G8:
+						case kVDPixType_G8B8_G8R8:
+							targetType = kVDPixType_8_8_8;
+							goto type_reconvert;
+						default:
+							targetType = kVDPixType_8888;
+							goto type_reconvert;
+					}
+					break;
+
+				case kVDPixType_888:
+					switch(srcType) {
+						case kVDPixType_8888:
+							gen.conv_8888_to_888();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_888;
+							break;
+						default:
+							targetType = kVDPixType_8888;
+							goto type_reconvert;
+					}
+					break;
+
+				case kVDPixType_8888:
+					switch(srcType) {
+						case kVDPixType_1555_LE:
+							gen.conv_555_to_8888();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8888;
+							break;
+						case kVDPixType_565_LE:
+							gen.conv_565_to_8888();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8888;
+							break;
+						case kVDPixType_888:
+							gen.conv_888_to_8888();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8888;
+							break;
+						case kVDPixType_32Fx4_LE:
+							gen.conv_X32F_to_8888();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8888;
+							break;
+						case kVDPixType_8_8_8:
+							if ((srcToken & kVDPixSamp_Mask) != kVDPixSamp_444)
+								srcToken = BlitterConvertSampling(gen, srcToken, kVDPixSamp_444, w, h);
+							gen.interleave_X8R8G8B8();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8888;
+							break;
+						default:
+							VDASSERT(false);
+							break;
+					}
+					break;
+
+				case kVDPixType_8:
+					switch(srcType) {
+						case kVDPixType_8_8_8:
+							gen.pop();
+							gen.swap(1);
+							gen.pop();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
+							break;
+
+						case kVDPixType_16F_LE:
+							targetType = kVDPixType_32F_LE;
+							goto type_reconvert;
+
+						case kVDPixType_32F_LE:
+							gen.conv_32F_to_8();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
+							break;
+
+						default:
+							targetType = kVDPixType_8_8_8;
+							goto type_reconvert;
+					}
+					break;
+
+				case kVDPixType_8_8_8:
+					switch(srcType) {
+						case kVDPixType_B8G8_R8G8:
+							gen.dup();
+							gen.dup();
+							gen.extract_8in32(2, (w + 1) >> 1, h);
+							gen.swap(2);
+							gen.extract_8in16(1, w, h);
+							gen.swap(1);
+							gen.extract_8in32(0, (w + 1) >> 1, h);
+							srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_8_8_8 | kVDPixSamp_422;
+							break;
+						case kVDPixType_G8B8_G8R8:
+							gen.dup();
+							gen.dup();
+							gen.extract_8in32(3, (w + 1) >> 1, h);
+							gen.swap(2);
+							gen.extract_8in16(0, w, h);
+							gen.swap(1);
+							gen.extract_8in32(1, (w + 1) >> 1, h);
+							srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_8_8_8 | kVDPixSamp_422;
+							break;
+						case kVDPixType_16F_16F_16F_LE:
+						case kVDPixType_V210:
+							targetType = kVDPixType_32F_32F_32F_LE;
+							goto type_reconvert;
+						case kVDPixType_32F_32F_32F_LE:
+							// 0 1 2
+							gen.conv_32F_to_8();
+							gen.swap(1);
+							// 1 0 2
+							gen.conv_32F_to_8();
+							gen.swap(2);
+							// 2 0 1
+							gen.conv_32F_to_8();
+							gen.swap(2);
+							gen.swap(1);
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_8_8;
+							break;
+						case kVDPixType_8_B8R8:
+							{
+								const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
+								int cw = -(-w >> sampInfo.mCXBits);
+								int ch = -(-h >> sampInfo.mCYBits);
+
+								gen.dup();
+								gen.extract_8in16(1, cw, ch);
+								gen.swap(2);
+								gen.swap(1);
+								gen.extract_8in16(0, cw, ch);
+								srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_8_8;
+							}
+							break;
+						default:
+							VDASSERT(false);
+							break;
+					}
+					break;
+
+				case kVDPixType_B8G8_R8G8:
+					switch(srcType) {
+					case kVDPixType_8_8_8:
+						if ((srcToken ^ dstToken) & kVDPixSamp_Mask)
+							srcToken = BlitterConvertSampling(gen, srcToken, dstToken, w, h);
+
+						gen.interleave_B8G8_R8G8();
+						srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_B8G8_R8G8;
+						break;
+					case kVDPixType_G8B8_G8R8:
+						gen.swap_8in16(w, h, w*2);
+						srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_B8G8_R8G8;
+						break;
+					default:
+						targetType = kVDPixType_8_8_8;
+						goto type_reconvert;
+					}
+					break;
+
+				case kVDPixType_G8B8_G8R8:
+					switch(srcType) {
+					case kVDPixType_8_8_8:
+						if ((srcToken ^ dstToken) & kVDPixSamp_Mask)
+							srcToken = BlitterConvertSampling(gen, srcToken, dstToken, w, h);
+
+						gen.interleave_G8B8_G8R8();
+						srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_G8B8_G8R8;
+						break;
+					case kVDPixType_B8G8_R8G8:
+						gen.swap_8in16(w, h, w*2);
+						srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_G8B8_G8R8;
+						break;
+					default:
+						targetType = kVDPixType_8_8_8;
+						goto type_reconvert;
+					}
+					break;
+
+				case kVDPixType_16F_16F_16F_LE:
+					switch(srcType) {
+						case kVDPixType_32F_32F_32F_LE:
+							// 0 1 2
+							gen.conv_32F_to_16F();
+							gen.swap(1);
+							// 1 0 2
+							gen.conv_32F_to_16F();
+							gen.swap(2);
+							// 2 0 1
+							gen.conv_32F_to_16F();
+							gen.swap(2);
+							gen.swap(1);
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_16F_16F_16F_LE;
+							break;
+
+						default:
+							targetType = kVDPixType_32F_32F_32F_LE;
+							goto type_reconvert;
+					}
+					break;
+
+				case kVDPixType_32F_32F_32F_LE:
+					switch(srcType) {
+						case kVDPixType_8_8_8:
+							// 0 1 2
+							gen.conv_8_to_32F();
+							gen.swap(1);
+							// 1 0 2
+							gen.conv_8_to_32F();
+							gen.swap(2);
+							// 2 0 1
+							gen.conv_8_to_32F();
+							gen.swap(2);
+							gen.swap(1);
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_32F_32F_LE;
+							break;
+
+						case kVDPixType_16F_16F_16F_LE:
+							// 0 1 2
+							gen.conv_16F_to_32F();
+							gen.swap(1);
+							// 1 0 2
+							gen.conv_16F_to_32F();
+							gen.swap(2);
+							// 2 0 1
+							gen.conv_16F_to_32F();
+							gen.swap(2);
+							gen.swap(1);
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_32F_32F_LE;
+							break;
+
+						case kVDPixType_B8G8_R8G8:
+						case kVDPixType_G8B8_G8R8:
+						case kVDPixType_8_B8R8:
+							targetType = kVDPixType_8_8_8;
+							goto type_reconvert;
+
+						case kVDPixType_V210:
+							gen.conv_V210_to_32F();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_32F_32F_LE;
+							break;
+
+						default:
+							VDASSERT(false);
+					}
+					break;
+
+				case kVDPixType_V210:
+					switch(srcType) {
+						case kVDPixType_32F_32F_32F_LE:
+							if ((srcToken & kVDPixSamp_Mask) != kVDPixSamp_422)
+								srcToken = BlitterConvertSampling(gen, srcToken, kVDPixSamp_422, w, h);
+
+							gen.conv_32F_to_V210();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_V210;
+							break;
+
+						case kVDPixType_16F_16F_16F_LE:
+							targetType = kVDPixType_32F_32F_32F_LE;
+							goto type_reconvert;
+
+						case kVDPixType_8_8_8:
+							if ((srcToken & kVDPixSamp_Mask) != kVDPixSamp_422)
+								srcToken = BlitterConvertSampling(gen, srcToken, kVDPixSamp_422, w, h);
+
+							targetType = kVDPixType_32F_32F_32F_LE;
+							goto type_reconvert;
+
+						case kVDPixType_B8G8_R8G8:
+						case kVDPixType_G8B8_G8R8:
+						case kVDPixType_8_B8R8:
+							targetType = kVDPixType_8_8_8;
+							goto type_reconvert;
+
+						default:
+							VDASSERT(false);
+					}
+					break;
+
+				case kVDPixType_32F_LE:
+					switch(srcType) {
+						case kVDPixType_8:
+							gen.conv_8_to_32F();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_LE;
+							break;
+						case kVDPixType_16F_LE:
+							gen.conv_16F_to_32F();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_LE;
+							break;
+						default:
+							VDASSERT(false);
+					}
+					break;
+
+				case kVDPixType_8_B8R8:
+					switch(srcType) {
+						case kVDPixType_8_8_8:
+							gen.swap(1);
+							gen.swap(2);
+							gen.interleave_B8R8();
+							srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_B8R8;
+							break;
+						default:
+							VDASSERT(false);
+							break;
+					}
+					break;
+
+				default:
+					VDASSERT(false);
+					break;
+			}
+		}
+
+		return srcToken;
+	}
+}
+
+IVDPixmapBlitter *VDPixmapCreateBlitter(const VDPixmap& dst, const VDPixmap& src) {
+	const VDPixmapLayout& dstlayout = VDPixmapToLayoutFromBase(dst, dst.data);
+	const VDPixmapLayout& srclayout = VDPixmapToLayoutFromBase(src, src.data);
+
+	return VDPixmapCreateBlitter(dstlayout, srclayout);
+}
+
+IVDPixmapBlitter *VDPixmapCreateBlitter(const VDPixmapLayout& dst, const VDPixmapLayout& src) {
+	if (src.format == dst.format) {
+		return VDCreatePixmapUberBlitterDirectCopy(dst, src);
+	}
+
+	uint32 srcToken = VDPixmapGetFormatTokenFromFormat(src.format);
+	uint32 dstToken = VDPixmapGetFormatTokenFromFormat(dst.format);
+
+	VDPixmapUberBlitterGenerator gen;
+
+	// load source channels
+	int w = src.w;
+	int h = src.h;
+
+	switch(srcToken & kVDPixType_Mask) {
+	case kVDPixType_1:
+		gen.ldsrc(0, 0, 0, 0, w, h, srcToken, (w + 7) >> 3);
+		break;
+
+	case kVDPixType_2:
+		gen.ldsrc(0, 0, 0, 0, w, h, srcToken, (w + 3) >> 2);
+		break;
+
+	case kVDPixType_4:
+		gen.ldsrc(0, 0, 0, 0, w, h, srcToken, (w + 1) >> 1);
+		break;
+
+	case kVDPixType_8:
+		gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w);
+		break;
+
+	case kVDPixType_555_LE:
+	case kVDPixType_565_LE:
+	case kVDPixType_1555_LE:
+		gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*2);
+		break;
+
+	case kVDPixType_888:
+		gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*3);
+		break;
+
+	case kVDPixType_8888:
+	case kVDPixType_32F_LE:
+		gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*4);
+		break;
+
+	case kVDPixType_32Fx4_LE:
+		gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*16);
+		break;
+
+	case kVDPixType_B8G8_R8G8:
+	case kVDPixType_G8B8_G8R8:
+		gen.ldsrc(0, 0, 0, 0, w, h, srcToken, ((w + 1) & ~1)*2);
+		break;
+
+	case kVDPixType_8_8_8:
+		{
+			uint32 ytoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
+			uint32 cbtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
+			uint32 crtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
+
+			const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
+
+			int cxbits = sampInfo.mCXBits;
+			int cybits = sampInfo.mCYBits;
+			int w2 = -(-w >> cxbits);
+			int h2 = -(-h >> cybits);
+			gen.ldsrc(0, 2, 0, 0, w2, h2, cbtoken, w2);
+			gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w);
+			gen.ldsrc(0, 1, 0, 0, w2, h2, crtoken, w2);
+		}
+		break;
+
+	case kVDPixType_16F_16F_16F_LE:
+		{
+			uint32 ytoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_16F_LE;
+			uint32 cbtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_16F_LE;
+			uint32 crtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_16F_LE;
+
+			const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
+
+			int cxbits = sampInfo.mCXBits;
+			int cybits = sampInfo.mCYBits;
+			int w2 = -(-w >> cxbits);
+			int h2 = -(-h >> cybits);
+			gen.ldsrc(0, 2, 0, 0, w2, h2, cbtoken, w2 * 2);
+			gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*2);
+			gen.ldsrc(0, 1, 0, 0, w2, h2, crtoken, w2 * 2);
+		}
+		break;
+
+	case kVDPixType_32F_32F_32F_LE:
+		{
+			uint32 ytoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_LE;
+			uint32 cbtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_LE;
+			uint32 crtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_LE;
+
+			const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
+
+			int cxbits = sampInfo.mCXBits;
+			int cybits = sampInfo.mCYBits;
+			int w2 = -(-w >> cxbits);
+			int h2 = -(-h >> cybits);
+			gen.ldsrc(0, 2, 0, 0, w2, h2, cbtoken, w2 * 4);
+			gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*4);
+			gen.ldsrc(0, 1, 0, 0, w2, h2, crtoken, w2 * 4);
+		}
+		break;
+
+	case kVDPixType_V210:
+		gen.ldsrc(0, 0, 0, 0, w, h, srcToken, ((w + 5) / 6) * 4);
+		break;
+
+	case kVDPixType_8_B8R8:
+		{
+			uint32 ytoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
+			uint32 ctoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_B8R8;
+
+			const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
+
+			int cxbits = sampInfo.mCXBits;
+			int cybits = sampInfo.mCYBits;
+			int w2 = -(-w >> cxbits);
+			int h2 = -(-h >> cybits);
+			gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w);
+			gen.ldsrc(0, 1, 0, 0, w2, h2, ctoken, w2*2);
+		}
+		break;
+
+	default:
+		VDASSERT(false);
+	}
+
+	// check if we need a color space change
+	if ((srcToken ^ dstToken) & kVDPixSpace_Mask) {
+		// first, if we're dealing with an interleaved format, deinterleave it
+		switch(srcToken & kVDPixType_Mask) {
+		case kVDPixType_B8G8_R8G8:
+			gen.dup();
+			gen.dup();
+			gen.extract_8in32(2, (w + 1) >> 1, h);
+			gen.swap(2);
+			gen.extract_8in16(1, w, h);
+			gen.swap(1);
+			gen.extract_8in32(0, (w + 1) >> 1, h);
+			srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_8_8;
+			break;
+
+		case kVDPixType_G8B8_G8R8:
+			gen.dup();
+			gen.dup();
+			gen.extract_8in32(3, (w + 1) >> 1, h);
+			gen.swap(2);
+			gen.extract_8in16(0, w, h);
+			gen.swap(1);
+			gen.extract_8in32(1, (w + 1) >> 1, h);
+			srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_8_8;
+			break;
+
+		case kVDPixType_8_B8R8:
+			gen.dup();
+			gen.extract_8in16(1, (w + 1) >> 1, (h + 1) >> 1);
+			gen.swap(2);
+			gen.swap(1);
+			gen.extract_8in16(0, (w + 1) >> 1, (h + 1) >> 1);
+			srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_8_8;
+			break;
+
+		case kVDPixType_V210:
+			gen.conv_V210_to_32F();
+			srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_32F_32F_LE;
+			break;
+		}
+
+		// if the source is subsampled, converge on 4:4:4 subsampling, but only if we actually need
+		// the auxiliary channels
+		const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
+
+		if ((dstToken & kVDPixSpace_Mask) != kVDPixSpace_Y_601 && (dstToken & kVDPixSpace_Mask) != kVDPixSpace_Y_709) {
+			if (sampInfo.mCXBits | sampInfo.mCYBits | sampInfo.mCXOffset16 | sampInfo.mCbYOffset16 | sampInfo.mCrYOffset16)
+				srcToken = BlitterConvertSampling(gen, srcToken, kVDPixSamp_444, w, h);
+		}
+
+		// change color spaces
+		uint32 dstSpace = dstToken & kVDPixSpace_Mask;
+		while((srcToken ^ dstToken) & kVDPixSpace_Mask) {
+			uint32 srcSpace = srcToken & kVDPixSpace_Mask;
+			uint32 targetSpace = dstSpace;
+
+space_reconvert:
+			switch(targetSpace) {
+				case kVDPixSpace_BGR:
+					switch(srcSpace) {
+					case kVDPixSpace_YCC_709:
+						switch(srcToken & kVDPixType_Mask) {
+							case kVDPixType_8_8_8:
+								gen.ycbcr709_to_rgb32();
+								srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
+								break;
+
+							case kVDPixType_16F_16F_16F_LE:
+								srcToken = BlitterConvertType(gen, srcToken, kVDPixType_32F_32F_32F_LE, w, h);
+								gen.ycbcr709_to_rgb32_32f();
+								srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_32Fx4_LE;
+								break;
+
+							case kVDPixType_32F_32F_32F_LE:
+								gen.ycbcr709_to_rgb32_32f();
+								srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_32Fx4_LE;
+								break;
+
+							default:
+								VDASSERT(false);
+								break;
+						}
+						break;
+
+					case kVDPixSpace_YCC_601:
+						switch(srcToken & kVDPixType_Mask) {
+							case kVDPixType_8_8_8:
+								gen.ycbcr601_to_rgb32();
+								srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
+								break;
+
+							case kVDPixType_16F_16F_16F_LE:
+								srcToken = BlitterConvertType(gen, srcToken, kVDPixType_32F_32F_32F_LE, w, h);
+								gen.ycbcr601_to_rgb32_32f();
+								srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_32Fx4_LE;
+								break;
+
+							case kVDPixType_32F_32F_32F_LE:
+								gen.ycbcr601_to_rgb32_32f();
+								srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_32Fx4_LE;
+								break;
+
+							default:
+								VDASSERT(false);
+								break;
+						}
+						break;
+
+					case kVDPixSpace_Y_601:
+						targetSpace = kVDPixSpace_YCC_601;
+						goto space_reconvert;
+
+					case kVDPixSpace_Pal:
+						switch(srcToken & kVDPixType_Mask) {
+							case kVDPixType_1:
+								gen.conv_Pal1_to_8888(0);
+								srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
+								break;
+
+							case kVDPixType_2:
+								gen.conv_Pal2_to_8888(0);
+								srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
+								break;
+
+							case kVDPixType_4:
+								gen.conv_Pal4_to_8888(0);
+								srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
+								break;
+
+							case kVDPixType_8:
+								gen.conv_Pal8_to_8888(0);
+								srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
+								break;
+
+							default:
+								VDASSERT(false);
+								break;
+						}
+						break;
+
+					default:
+						VDASSERT(false);
+						break;
+					}
+					break;
+				case kVDPixSpace_Y_601:
+					if (srcSpace == kVDPixSpace_YCC_601) {
+						gen.pop();
+						gen.swap(1);
+						gen.pop();
+						switch(srcToken & kVDPixType_Mask) {
+							case kVDPixType_32F_32F_32F_LE:
+								srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_601 | kVDPixType_32F_LE;
+								break;
+							case kVDPixType_16F_16F_16F_LE:
+								srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_601 | kVDPixType_16F_LE;
+								break;
+							case kVDPixType_8_8_8:
+								srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_601 | kVDPixType_8;
+								break;
+
+							default:
+								VDASSERT(false);
+						}
+						srcToken = BlitterConvertType(gen, srcToken, kVDPixType_8, w, h);
+						break;
+					} else if (srcSpace == kVDPixSpace_YCC_709) {
+						gen.pop();
+						gen.swap(1);
+						gen.pop();
+						switch(srcToken & kVDPixType_Mask) {
+							case kVDPixType_32F_32F_32F_LE:
+								srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_709 | kVDPixType_32F_LE;
+								break;
+							case kVDPixType_16F_16F_16F_LE:
+								srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_709 | kVDPixType_16F_LE;
+								break;
+							case kVDPixType_8_8_8:
+								srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_709 | kVDPixType_8;
+								break;
+
+							default:
+								VDASSERT(false);
+						}
+						srcToken = BlitterConvertType(gen, srcToken, kVDPixType_8, w, h);
+						break;
+					}
+					// fall through
+				case kVDPixSpace_YCC_601:
+					switch(srcSpace) {
+					case kVDPixSpace_BGR:
+						srcToken = BlitterConvertType(gen, srcToken, kVDPixType_8888, w, h);
+						gen.rgb32_to_ycbcr601();
+						srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_YCC_601 | kVDPixType_8_8_8;
+						break;
+					case kVDPixSpace_Y_601:
+					case kVDPixSpace_Y_709:
+						srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_YCC_601 | kVDPixType_8;
+
+						{
+							const VDPixmapSamplingInfo& sinfo = VDPixmapGetSamplingInfo(dstToken);
+							int cw = ((w - 1) >> sinfo.mCXBits) + 1;
+							int ch = ((h - 1) >> sinfo.mCYBits) + 1;
+
+							gen.ldconst(0x80, cw, cw, ch, srcToken);
+						}
+
+						gen.dup();
+						gen.swap(2);
+						gen.swap(1);
+						srcToken = kVDPixSpace_YCC_601 | kVDPixType_8_8_8 | (dstToken & kVDPixSamp_Mask);
+						break;
+					case kVDPixSpace_YCC_709:
+						VDASSERT((srcToken & kVDPixType_Mask) == kVDPixType_8_8_8);
+						gen.ycbcr709_to_ycbcr601();
+						srcToken = (srcToken & ~kVDPixSpace_Mask) | kVDPixSpace_YCC_601;
+						break;
+
+					case kVDPixSpace_Pal:
+						targetSpace = kVDPixSpace_BGR;
+						goto space_reconvert;
+
+					default:
+						VDASSERT(false);
+						break;
+					}
+					break;
+				case kVDPixSpace_YCC_709:
+					switch(srcSpace) {
+					case kVDPixSpace_BGR:
+						srcToken = BlitterConvertType(gen, srcToken, kVDPixType_8888, w, h);
+						gen.rgb32_to_ycbcr709();
+						srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_YCC_709 | kVDPixType_8_8_8;
+						break;
+					case kVDPixSpace_Y_709:
+					case kVDPixSpace_Y_601:
+						srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_YCC_709 | kVDPixType_8;
+
+						{
+							const VDPixmapSamplingInfo& sinfo = VDPixmapGetSamplingInfo(dstToken);
+							int cw = ((w - 1) >> sinfo.mCXBits) + 1;
+							int ch = ((h - 1) >> sinfo.mCYBits) + 1;
+
+							gen.ldconst(0x80, cw, cw, ch, srcToken);
+						}
+
+						gen.dup();
+						gen.swap(2);
+						gen.swap(1);
+						srcToken = kVDPixSpace_YCC_709 | kVDPixType_8_8_8 | (dstToken & kVDPixSamp_Mask);
+						break;
+					case kVDPixSpace_YCC_601:
+						VDASSERT((srcToken & kVDPixType_Mask) == kVDPixType_8_8_8 || (srcToken & kVDPixType_Mask) == kVDPixType_32F_32F_32F_LE);
+						gen.ycbcr601_to_ycbcr709();
+						srcToken = (srcToken & ~kVDPixSpace_Mask) | kVDPixSpace_YCC_709;
+						break;
+					case kVDPixSpace_Pal:
+						targetSpace = kVDPixSpace_BGR;
+						goto space_reconvert;
+					default:
+						VDASSERT(false);
+						break;
+					}
+					break;
+
+				default:
+					VDASSERT(false);
+					break;
+			}
+		}
+	}
+
+	// check if we need a type change
+	//
+	// Note: If the sampling is also different, we have to be careful about what types we
+	// target. The type conversion may itself involve a sampling conversion, so things get
+	// VERY tricky here.
+	if ((srcToken ^ dstToken) & kVDPixType_Mask) {
+		bool samplingDifferent = 0 != ((srcToken ^ dstToken) & kVDPixSamp_Mask);
+		uint32 intermediateTypeToken = dstToken & kVDPixType_Mask;
+
+		if (samplingDifferent) {
+			switch(dstToken & kVDPixType_Mask) {
+				case kVDPixType_16F_16F_16F_LE:
+					intermediateTypeToken = kVDPixType_32F_32F_32F_LE;
+					break;
+				case kVDPixType_8_B8R8:
+					intermediateTypeToken = kVDPixType_8_8_8;
+					break;
+			}
+		}
+
+		srcToken = BlitterConvertType(gen, srcToken, (dstToken & ~kVDPixType_Mask) | intermediateTypeToken, w, h);
+	}
+
+	// convert subsampling if necessary
+	switch(srcToken & kVDPixType_Mask) {
+		case kVDPixType_8_8_8:
+		case kVDPixType_16F_16F_16F_LE:
+		case kVDPixType_32F_32F_32F_LE:
+			if ((srcToken ^ dstToken) & kVDPixSamp_Mask)
+				srcToken = BlitterConvertSampling(gen, srcToken, dstToken, w, h);
+			break;
+	}
+
+	// check if we need a type change (possible with 16F)
+	srcToken = BlitterConvertType(gen, srcToken, dstToken, w, h);
+
+	return gen.create();
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_16f.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_16f.cpp
new file mode 100644
index 000000000..3e9af1a1b
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_16f.cpp
@@ -0,0 +1,40 @@
+#include <vd2/system/halffloat.h>
+#include "uberblit_16f.h"
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGen_32F_To_16F::Start() {
+	StartWindow(mWidth * sizeof(uint16));
+}
+
+uint32 VDPixmapGen_32F_To_16F::GetType(uint32 output) const {
+	return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_16F_LE;
+}
+
+void VDPixmapGen_32F_To_16F::Compute(void *dst0, sint32 y) {
+	uint16 *dst = (uint16 *)dst0;
+	const float *src = (const float *)mpSrc->GetRow(y, mSrcIndex);
+	uint32 w = mWidth;
+
+	for(uint32 i=0; i<w; ++i)
+		*dst++ = VDConvertFloatToHalf(src++);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGen_16F_To_32F::Start() {
+	StartWindow(mWidth * sizeof(float));
+}
+
+uint32 VDPixmapGen_16F_To_32F::GetType(uint32 output) const {
+	return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_32F_LE;
+}
+
+void VDPixmapGen_16F_To_32F::Compute(void *dst0, sint32 y) {
+	float *dst = (float *)dst0;
+	const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+	uint32 w = mWidth;
+
+	for(uint32 i=0; i<w; ++i)
+		VDConvertHalfToFloat(*src++, dst++);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_gen.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_gen.cpp
new file mode 100644
index 000000000..f93ca322e
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_gen.cpp
@@ -0,0 +1,1597 @@
+#include <vd2/system/vdalloc.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include "uberblit.h"
+#include "uberblit_gen.h"
+#include "uberblit_fill.h"
+#include "uberblit_input.h"
+#include "uberblit_resample.h"
+#include "uberblit_resample_special.h"
+#include "uberblit_ycbcr.h"
+#include "uberblit_rgb.h"
+#include "uberblit_swizzle.h"
+#include "uberblit_pal.h"
+#include "uberblit_16f.h"
+#include "uberblit_v210.h"
+
+#ifdef VD_CPU_X86
+	#include "uberblit_swizzle_x86.h"
+	#include "uberblit_ycbcr_x86.h"
+	#include "uberblit_rgb_x86.h"
+	#include "uberblit_resample_special_x86.h"
+#endif
+
+void VDPixmapGenerate(void *dst, ptrdiff_t pitch, sint32 bpr, sint32 height, IVDPixmapGen *gen, int genIndex) {
+	for(sint32 y=0; y<height; ++y) {
+		memcpy(dst, gen->GetRow(y, genIndex), bpr);
+		vdptrstep(dst, pitch);
+	}
+	VDCPUCleanupExtensions();
+}
+
+void VDPixmapGenerateFast(void *dst, ptrdiff_t pitch, sint32 height, IVDPixmapGen *gen) {
+	for(sint32 y=0; y<height; ++y) {
+		gen->ProcessRow(dst, y);
+		vdptrstep(dst, pitch);
+	}
+	VDCPUCleanupExtensions();
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+IVDPixmapBlitter *VDCreatePixmapUberBlitterDirectCopy(const VDPixmap& dst, const VDPixmap& src) {
+	return new VDPixmapUberBlitterDirectCopy;
+}
+
+IVDPixmapBlitter *VDCreatePixmapUberBlitterDirectCopy(const VDPixmapLayout& dst, const VDPixmapLayout& src) {
+	return new VDPixmapUberBlitterDirectCopy;
+}
+
+VDPixmapUberBlitterDirectCopy::VDPixmapUberBlitterDirectCopy() {
+}
+
+VDPixmapUberBlitterDirectCopy::~VDPixmapUberBlitterDirectCopy() {
+}
+
+void VDPixmapUberBlitterDirectCopy::Blit(const VDPixmap& dst, const VDPixmap& src) {
+	Blit(dst, NULL, src);
+}
+
+void VDPixmapUberBlitterDirectCopy::Blit(const VDPixmap& dst, const vdrect32 *rDst, const VDPixmap& src) {
+	VDASSERT(dst.format == src.format);
+
+	const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(dst.format);
+
+	void *p = dst.data;
+	void *p2 = dst.data2;
+	void *p3 = dst.data3;
+	int w = dst.w;
+	int h = dst.h;
+
+	if (formatInfo.qchunky)  {
+		w = (w + formatInfo.qw - 1) / formatInfo.qw;
+		h = -(-h >> formatInfo.qhbits);
+	}
+
+	int w2 = -(-dst.w >> formatInfo.auxwbits);
+	int h2 = -(-dst.h >> formatInfo.auxhbits);
+
+	if (rDst) {
+		int x1 = rDst->left;
+		int y1 = rDst->top;
+		int x2 = rDst->right;
+		int y2 = rDst->bottom;
+
+		VDASSERT(x1 >= 0 && y1 >= 0 && x2 <= w && y2 <= h && x2 >= x1 && y2 >= y1);
+
+		if (x2 < x1 || y2 < y1)
+			return;
+
+		p = vdptroffset(dst.data, dst.pitch * y1 + x1 * formatInfo.qsize);
+		w = x2 - x1;
+		h = y2 - y1;
+
+		if (formatInfo.auxbufs >= 1) {
+			VDASSERT(!((x1|x2) & ((1 << formatInfo.auxwbits) - 1)));
+			VDASSERT(!((y1|y2) & ((1 << formatInfo.auxhbits) - 1)));
+
+			int ax1 = x1 >> formatInfo.auxwbits;
+			int ay1 = y1 >> formatInfo.auxhbits;
+			int ax2 = x2 >> formatInfo.auxwbits;
+			int ay2 = y2 >> formatInfo.auxhbits;
+
+			p2 = vdptroffset(dst.data2, dst.pitch2 * ay1 + ax1);
+			w2 = ax2 - ax1;
+			h2 = ay2 - ay1;
+
+			if (formatInfo.auxbufs >= 2)
+				p3 = vdptroffset(dst.data3, dst.pitch3 * ay1 + ax1);
+		}
+	}
+
+	uint32 bpr = formatInfo.qsize * w;
+
+	VDMemcpyRect(p, dst.pitch, src.data, src.pitch, bpr, h);
+
+	if (formatInfo.auxbufs >= 1) {
+		VDMemcpyRect(p2, dst.pitch2, src.data2, src.pitch2, w2 * formatInfo.auxsize, h2);
+
+		if (formatInfo.auxbufs >= 2)
+			VDMemcpyRect(p3, dst.pitch3, src.data3, src.pitch3, w2 * formatInfo.auxsize, h2);
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+VDPixmapUberBlitter::VDPixmapUberBlitter() {
+}
+
+VDPixmapUberBlitter::~VDPixmapUberBlitter() {
+	while(!mGenerators.empty()) {
+		delete mGenerators.back();
+		mGenerators.pop_back();
+	}
+}
+
+void VDPixmapUberBlitter::Blit(const VDPixmap& dst, const VDPixmap& src) {
+	Blit(dst, NULL, src);
+}
+
+void VDPixmapUberBlitter::Blit(const VDPixmap& dst, const vdrect32 *rDst, const VDPixmap& src) {
+	for(Sources::const_iterator it(mSources.begin()), itEnd(mSources.end()); it!=itEnd; ++it) {
+		const SourceEntry& se = *it;
+		const void *p;
+		ptrdiff_t pitch;
+
+		switch(se.mSrcPlane) {
+			case 0:
+				p = src.data;
+				pitch = src.pitch;
+				break;
+			case 1:
+				p = src.data2;
+				pitch = src.pitch2;
+				break;
+			case 2:
+				p = src.data3;
+				pitch = src.pitch3;
+				break;
+			default:
+				VDASSERT(false);
+				break;
+		}
+
+		se.mpSrc->SetSource((const char *)p + pitch*se.mSrcY + se.mSrcX, pitch, src.palette);
+	}
+
+	if (mOutputs[2].mpSrc) {
+		if (mbIndependentPlanes)
+			Blit3Separated(dst, rDst);
+		else if (mbIndependentChromaPlanes)
+			Blit3Split(dst, rDst);
+		else
+			Blit3(dst, rDst);
+	} else if (mOutputs[1].mpSrc) {
+		if (mbIndependentPlanes)
+			Blit2Separated(dst, rDst);
+		else
+			Blit2(dst, rDst);
+	} else
+		Blit(dst, rDst);
+}
+
+void VDPixmapUberBlitter::Blit(const VDPixmap& dst, const vdrect32 *rDst) {
+	const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(dst.format);
+
+	mOutputs[0].mpSrc->AddWindowRequest(0, 0);
+	mOutputs[0].mpSrc->Start();
+
+	void *p = dst.data;
+	int w = dst.w;
+	int h = dst.h;
+
+	if (formatInfo.qchunky) {
+		w = (w + formatInfo.qw - 1) / formatInfo.qw;
+		h = -(-h >> formatInfo.qhbits);
+	}
+
+	if (rDst) {
+		int x1 = rDst->left;
+		int y1 = rDst->top;
+		int x2 = rDst->right;
+		int y2 = rDst->bottom;
+
+		if (formatInfo.qchunky) {
+			x1 = x1 / formatInfo.qw;
+			y1 = y1 / formatInfo.qh;
+			x2 = (x2 + formatInfo.qw - 1) / formatInfo.qw;
+			y2 = (y2 + formatInfo.qh - 1) / formatInfo.qh;
+		}
+
+		VDASSERT(x1 >= 0 && y1 >= 0 && x2 <= w && y2 <= h && x2 >= x1 && y2 >= y1);
+
+		if (x2 < x1 || y2 < y1)
+			return;
+
+		p = vdptroffset(dst.data, dst.pitch * y1 + x1 * formatInfo.qsize);
+		w = x2 - x1;
+		h = y2 - y1;
+	}
+
+	uint32 bpr = formatInfo.qsize * w;
+
+	if (mOutputs[0].mSrcIndex == 0)
+		VDPixmapGenerateFast(p, dst.pitch, h, mOutputs[0].mpSrc);
+	else
+		VDPixmapGenerate(p, dst.pitch, bpr, h, mOutputs[0].mpSrc, mOutputs[0].mSrcIndex);
+}
+
+void VDPixmapUberBlitter::Blit3(const VDPixmap& px, const vdrect32 *rDst) {
+	const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(px.format);
+	IVDPixmapGen *gen = mOutputs[1].mpSrc;
+	int idx = mOutputs[1].mSrcIndex;
+	IVDPixmapGen *gen1 = mOutputs[2].mpSrc;
+	int idx1 = mOutputs[2].mSrcIndex;
+	IVDPixmapGen *gen2 = mOutputs[0].mpSrc;
+	int idx2 = mOutputs[0].mSrcIndex;
+
+	gen->AddWindowRequest(0, 0);
+	gen->Start();
+	gen1->AddWindowRequest(0, 0);
+	gen1->Start();
+	gen2->AddWindowRequest(0, 0);
+	gen2->Start();
+
+	uint32 auxstep = 0x80000000UL >> formatInfo.auxhbits;
+	uint32 auxaccum = 0;
+
+	auxstep += auxstep;
+
+	int qw = px.w;
+	int qh = px.h;
+
+	if (formatInfo.qchunky) {
+		qw = (qw + formatInfo.qw - 1) / formatInfo.qw;
+		qh = -(-qh >> formatInfo.qhbits);
+	}
+
+	uint32 height = qh;
+	uint32 bpr = formatInfo.qsize * qw;
+	uint32 bpr2 = formatInfo.auxsize * -(-px.w >> formatInfo.auxwbits);
+	uint8 *dst = (uint8 *)px.data;
+	uint8 *dst2 = (uint8 *)px.data2;
+	uint8 *dst3 = (uint8 *)px.data3;
+	ptrdiff_t pitch = px.pitch;
+	ptrdiff_t pitch2 = px.pitch2;
+	ptrdiff_t pitch3 = px.pitch3;
+	uint32 y2 = 0;
+	for(uint32 y=0; y<height; ++y) {
+		memcpy(dst, gen->GetRow(y, idx), bpr);
+		vdptrstep(dst, pitch);
+
+		if (!auxaccum) {
+			memcpy(dst2, gen1->GetRow(y2, idx1), bpr2);
+			vdptrstep(dst2, pitch2);
+			memcpy(dst3, gen2->GetRow(y2, idx2), bpr2);
+			vdptrstep(dst3, pitch3);
+			++y2;
+		}
+
+		auxaccum += auxstep;
+	}
+
+	VDCPUCleanupExtensions();
+}
+
+void VDPixmapUberBlitter::Blit3Split(const VDPixmap& px, const vdrect32 *rDst) {
+	const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(px.format);
+	IVDPixmapGen *gen = mOutputs[1].mpSrc;
+	int idx = mOutputs[1].mSrcIndex;
+	IVDPixmapGen *gen1 = mOutputs[2].mpSrc;
+	int idx1 = mOutputs[2].mSrcIndex;
+	IVDPixmapGen *gen2 = mOutputs[0].mpSrc;
+	int idx2 = mOutputs[0].mSrcIndex;
+
+	gen->AddWindowRequest(0, 0);
+	gen->Start();
+	gen1->AddWindowRequest(0, 0);
+	gen1->Start();
+	gen2->AddWindowRequest(0, 0);
+	gen2->Start();
+
+	uint32 auxstep = 0x80000000UL >> formatInfo.auxhbits;
+	uint32 auxaccum = 0;
+
+	auxstep += auxstep;
+
+	int qw = px.w;
+	int qh = px.h;
+
+	if (formatInfo.qchunky) {
+		qw = (qw + formatInfo.qw - 1) / formatInfo.qw;
+		qh = -(-qh >> formatInfo.qhbits);
+	}
+
+	uint32 height = qh;
+	uint32 bpr = formatInfo.qsize * qw;
+	uint8 *dst = (uint8 *)px.data;
+	ptrdiff_t pitch = px.pitch;
+
+	if (idx == 0) {
+		for(uint32 y=0; y<height; ++y) {
+			gen->ProcessRow(dst, y);
+			vdptrstep(dst, pitch);
+		}
+	} else {
+		for(uint32 y=0; y<height; ++y) {
+			memcpy(dst, gen->GetRow(y, idx), bpr);
+			vdptrstep(dst, pitch);
+		}
+	}
+
+	uint32 bpr2 = -(-px.w >> formatInfo.auxwbits) * formatInfo.auxsize;
+	uint8 *dst2 = (uint8 *)px.data2;
+	uint8 *dst3 = (uint8 *)px.data3;
+	ptrdiff_t pitch2 = px.pitch2;
+	ptrdiff_t pitch3 = px.pitch3;
+	uint32 y2 = 0;
+	for(uint32 y=0; y<height; ++y) {
+		if (!auxaccum) {
+			memcpy(dst2, gen1->GetRow(y2, idx1), bpr2);
+			vdptrstep(dst2, pitch2);
+			memcpy(dst3, gen2->GetRow(y2, idx2), bpr2);
+			vdptrstep(dst3, pitch3);
+			++y2;
+		}
+
+		auxaccum += auxstep;
+	}
+
+	VDCPUCleanupExtensions();
+}
+
+void VDPixmapUberBlitter::Blit3Separated(const VDPixmap& px, const vdrect32 *rDst) {
+	const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(px.format);
+	IVDPixmapGen *gen = mOutputs[1].mpSrc;
+	int idx = mOutputs[1].mSrcIndex;
+	IVDPixmapGen *gen1 = mOutputs[2].mpSrc;
+	int idx1 = mOutputs[2].mSrcIndex;
+	IVDPixmapGen *gen2 = mOutputs[0].mpSrc;
+	int idx2 = mOutputs[0].mSrcIndex;
+
+	gen->AddWindowRequest(0, 0);
+	gen->Start();
+	gen1->AddWindowRequest(0, 0);
+	gen1->Start();
+	gen2->AddWindowRequest(0, 0);
+	gen2->Start();
+
+	int qw = px.w;
+	int qh = px.h;
+
+	if (formatInfo.qchunky) {
+		qw = (qw + formatInfo.qw - 1) / formatInfo.qw;
+		qh = -(-qh >> formatInfo.qhbits);
+	}
+
+	uint32 height = qh;
+	uint32 bpr = formatInfo.qsize * qw;
+	uint8 *dst = (uint8 *)px.data;
+	ptrdiff_t pitch = px.pitch;
+
+	if (idx == 0) {
+		for(uint32 y=0; y<height; ++y) {
+			gen->ProcessRow(dst, y);
+			vdptrstep(dst, pitch);
+		}
+	} else {
+		for(uint32 y=0; y<height; ++y) {
+			memcpy(dst, gen->GetRow(y, idx), bpr);
+			vdptrstep(dst, pitch);
+		}
+	}
+
+	uint32 bpr2 = -(-px.w >> formatInfo.auxwbits) * formatInfo.auxsize;
+	uint32 h2 = -(-px.h >> formatInfo.auxhbits);
+	uint8 *dst2 = (uint8 *)px.data2;
+	ptrdiff_t pitch2 = px.pitch2;
+	if (idx1 == 0) {
+		for(uint32 y2=0; y2<h2; ++y2) {
+			gen1->ProcessRow(dst2, y2);
+			vdptrstep(dst2, pitch2);
+		}
+	} else {
+		for(uint32 y2=0; y2<h2; ++y2) {
+			memcpy(dst2, gen1->GetRow(y2, idx1), bpr2);
+			vdptrstep(dst2, pitch2);
+		}
+	}
+
+	uint8 *dst3 = (uint8 *)px.data3;
+	ptrdiff_t pitch3 = px.pitch3;
+	if (idx2 == 0) {
+		for(uint32 y2=0; y2<h2; ++y2) {
+			gen2->ProcessRow(dst3, y2);
+			vdptrstep(dst3, pitch3);
+		}
+	} else {
+		for(uint32 y2=0; y2<h2; ++y2) {
+			memcpy(dst3, gen2->GetRow(y2, idx2), bpr2);
+			vdptrstep(dst3, pitch3);
+		}
+	}
+
+	VDCPUCleanupExtensions();
+}
+
+void VDPixmapUberBlitter::Blit2(const VDPixmap& px, const vdrect32 *rDst) {
+	const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(px.format);
+	IVDPixmapGen *gen = mOutputs[0].mpSrc;
+	int idx = mOutputs[0].mSrcIndex;
+	IVDPixmapGen *gen1 = mOutputs[1].mpSrc;
+	int idx1 = mOutputs[1].mSrcIndex;
+
+	gen->AddWindowRequest(0, 0);
+	gen->Start();
+	gen1->AddWindowRequest(0, 0);
+	gen1->Start();
+
+	uint32 auxstep = 0x80000000UL >> formatInfo.auxhbits;
+	uint32 auxaccum = 0;
+
+	auxstep += auxstep;
+
+	int qw = px.w;
+	int qh = px.h;
+
+	if (formatInfo.qchunky) {
+		qw = (qw + formatInfo.qw - 1) / formatInfo.qw;
+		qh = -(-qh >> formatInfo.qhbits);
+	}
+
+	uint32 height = qh;
+	uint32 bpr = formatInfo.qsize * qw;
+	uint32 bpr2 = formatInfo.auxsize * -(-px.w >> formatInfo.auxwbits);
+	uint8 *dst = (uint8 *)px.data;
+	uint8 *dst2 = (uint8 *)px.data2;
+	ptrdiff_t pitch = px.pitch;
+	ptrdiff_t pitch2 = px.pitch2;
+	uint32 y2 = 0;
+	for(uint32 y=0; y<height; ++y) {
+		memcpy(dst, gen->GetRow(y, idx), bpr);
+		vdptrstep(dst, pitch);
+
+		if (!auxaccum) {
+			memcpy(dst2, gen1->GetRow(y2, idx1), bpr2);
+			vdptrstep(dst2, pitch2);
+			++y2;
+		}
+
+		auxaccum += auxstep;
+	}
+
+	VDCPUCleanupExtensions();
+}
+
+void VDPixmapUberBlitter::Blit2Separated(const VDPixmap& px, const vdrect32 *rDst) {
+	const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(px.format);
+	IVDPixmapGen *gen = mOutputs[0].mpSrc;
+	int idx = mOutputs[0].mSrcIndex;
+	IVDPixmapGen *gen1 = mOutputs[1].mpSrc;
+	int idx1 = mOutputs[1].mSrcIndex;
+
+	gen->AddWindowRequest(0, 0);
+	gen->Start();
+	gen1->AddWindowRequest(0, 0);
+	gen1->Start();
+
+	int qw = px.w;
+	int qh = px.h;
+
+	if (formatInfo.qchunky) {
+		qw = (qw + formatInfo.qw - 1) / formatInfo.qw;
+		qh = -(-qh >> formatInfo.qhbits);
+	}
+
+	uint32 height = qh;
+	uint32 bpr = formatInfo.qsize * qw;
+	uint8 *dst = (uint8 *)px.data;
+	ptrdiff_t pitch = px.pitch;
+
+	if (idx == 0) {
+		for(uint32 y=0; y<height; ++y) {
+			gen->ProcessRow(dst, y);
+			vdptrstep(dst, pitch);
+		}
+	} else {
+		for(uint32 y=0; y<height; ++y) {
+			memcpy(dst, gen->GetRow(y, idx), bpr);
+			vdptrstep(dst, pitch);
+		}
+	}
+
+	uint32 bpr2 = -(-px.w >> formatInfo.auxwbits) * formatInfo.auxsize;
+	uint32 h2 = -(-px.h >> formatInfo.auxhbits);
+	uint8 *dst2 = (uint8 *)px.data2;
+	ptrdiff_t pitch2 = px.pitch2;
+	if (idx1 == 0) {
+		for(uint32 y2=0; y2<h2; ++y2) {
+			gen1->ProcessRow(dst2, y2);
+			vdptrstep(dst2, pitch2);
+		}
+	} else {
+		for(uint32 y2=0; y2<h2; ++y2) {
+			memcpy(dst2, gen1->GetRow(y2, idx1), bpr2);
+			vdptrstep(dst2, pitch2);
+		}
+	}
+
+	VDCPUCleanupExtensions();
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+VDPixmapUberBlitterGenerator::VDPixmapUberBlitterGenerator() {
+}
+
+VDPixmapUberBlitterGenerator::~VDPixmapUberBlitterGenerator() {
+	while(!mGenerators.empty()) {
+		delete mGenerators.back();
+		mGenerators.pop_back();
+	}
+}
+
+void VDPixmapUberBlitterGenerator::swap(int index) {
+	std::swap(mStack.back(), (&mStack.back())[-index]);
+}
+
+void VDPixmapUberBlitterGenerator::dup() {
+	mStack.push_back(mStack.back());
+}
+
+void VDPixmapUberBlitterGenerator::pop() {
+	mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::ldsrc(int srcIndex, int srcPlane, int x, int y, uint32 w, uint32 h, uint32 type, uint32 bpr) {
+	VDPixmapGenSrc *src = new VDPixmapGenSrc;
+
+	src->Init(w, h, type, bpr);
+
+	mGenerators.push_back(src);
+	mStack.push_back(StackEntry(src, 0));
+
+	SourceEntry se;
+	se.mpSrc = src;
+	se.mSrcIndex = srcIndex;
+	se.mSrcPlane = srcPlane;
+	se.mSrcX = x;
+	se.mSrcY = y;
+	mSources.push_back(se);
+}
+
+void VDPixmapUberBlitterGenerator::ldconst(uint8 fill, uint32 bpr, uint32 w, uint32 h, uint32 type) {
+	VDPixmapGenFill8 *src = new VDPixmapGenFill8;
+
+	src->Init(fill, bpr, w, h, type);
+
+	mGenerators.push_back(src);
+	mStack.push_back(StackEntry(src, 0));
+}
+
+void VDPixmapUberBlitterGenerator::extract_8in16(int offset, uint32 w, uint32 h) {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_8In16 *src = NULL;
+	
+#if VD_CPU_X86
+	if (MMX_enabled) {
+		if (offset == 0)
+			src = new VDPixmapGen_8In16_Even_MMX;
+		else if (offset == 1)
+			src = new VDPixmapGen_8In16_Odd_MMX;
+	}
+#endif
+	if (!src)
+		src = new VDPixmapGen_8In16;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex, offset, w, h);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::extract_8in32(int offset, uint32 w, uint32 h) {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_8In32 *src = NULL;
+
+#if VD_CPU_X86
+	if (MMX_enabled) {
+		if ((unsigned)offset < 4)
+			src = new VDPixmapGen_8In32_MMX;
+	}
+#endif
+
+	if (!src)
+		src = new VDPixmapGen_8In32;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex, offset, w, h);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::swap_8in16(uint32 w, uint32 h, uint32 bpr) {
+	StackEntry *args = &mStack.back();
+
+#if VD_CPU_X86
+	VDPixmapGen_Swap8In16 *src = MMX_enabled ? new VDPixmapGen_Swap8In16_MMX : new VDPixmapGen_Swap8In16;
+#else
+	VDPixmapGen_Swap8In16 *src = new VDPixmapGen_Swap8In16;
+#endif
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex, w, h, bpr);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_Pal1_to_8888(int srcIndex) {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_Pal1_To_X8R8G8B8 *src = new VDPixmapGen_Pal1_To_X8R8G8B8;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+
+	SourceEntry se;
+	se.mpSrc = src;
+	se.mSrcIndex = srcIndex;
+	se.mSrcPlane = 0;
+	se.mSrcX = 0;
+	se.mSrcY = 0;
+	mSources.push_back(se);
+}
+
+void VDPixmapUberBlitterGenerator::conv_Pal2_to_8888(int srcIndex) {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_Pal2_To_X8R8G8B8 *src = new VDPixmapGen_Pal2_To_X8R8G8B8;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+
+	SourceEntry se;
+	se.mpSrc = src;
+	se.mSrcIndex = srcIndex;
+	se.mSrcPlane = 0;
+	se.mSrcX = 0;
+	se.mSrcY = 0;
+	mSources.push_back(se);
+}
+
+void VDPixmapUberBlitterGenerator::conv_Pal4_to_8888(int srcIndex) {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_Pal4_To_X8R8G8B8 *src = new VDPixmapGen_Pal4_To_X8R8G8B8;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+
+	SourceEntry se;
+	se.mpSrc = src;
+	se.mSrcIndex = srcIndex;
+	se.mSrcPlane = 0;
+	se.mSrcX = 0;
+	se.mSrcY = 0;
+	mSources.push_back(se);
+}
+
+void VDPixmapUberBlitterGenerator::conv_Pal8_to_8888(int srcIndex) {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_Pal8_To_X8R8G8B8 *src = new VDPixmapGen_Pal8_To_X8R8G8B8;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+
+	SourceEntry se;
+	se.mpSrc = src;
+	se.mSrcIndex = srcIndex;
+	se.mSrcPlane = 0;
+	se.mSrcX = 0;
+	se.mSrcY = 0;
+	mSources.push_back(se);
+}
+
+void VDPixmapUberBlitterGenerator::pointh(float xoffset, float xfactor, uint32 w) {
+	StackEntry *args = &mStack.back();
+
+	if (xoffset != 0.5f || xfactor != 1.0f) {
+		VDPixmapGenResampleRow *src = new VDPixmapGenResampleRow;
+
+		src->Init(args[0].mpSrc, args[0].mSrcIndex, w, xoffset, xfactor, nsVDPixmap::kFilterPoint, 0, false);
+
+		mGenerators.push_back(src);
+		MarkDependency(src, args[0].mpSrc);
+		args[0] = StackEntry(src, 0);
+	}
+}
+
+void VDPixmapUberBlitterGenerator::pointv(float yoffset, float yfactor, uint32 h) {
+	StackEntry *args = &mStack.back();
+
+	if (yoffset != 0.5f || yfactor != 1.0f) {
+		VDPixmapGenResampleCol *src = new VDPixmapGenResampleCol;
+
+		src->Init(args[0].mpSrc, args[0].mSrcIndex, h, yoffset, yfactor, nsVDPixmap::kFilterPoint, 0, false);
+
+		mGenerators.push_back(src);
+		MarkDependency(src, args[0].mpSrc);
+		args[0] = StackEntry(src, 0);
+	}
+}
+
+void VDPixmapUberBlitterGenerator::linearh(float xoffset, float xfactor, uint32 w, bool interpOnly) {
+	StackEntry *args = &mStack.back();
+	IVDPixmapGen *src = args[0].mpSrc;
+	int srcIndex = args[0].mSrcIndex;
+
+	sint32 srcw = src->GetWidth(srcIndex);
+	if (xoffset == 0.5f && xfactor == 1.0f && srcw == w)
+		return;
+
+	if (xoffset == 0.5f && (src->GetType(srcIndex) & kVDPixType_Mask) == kVDPixType_8) {
+		if (xfactor == 2.0f && w == ((srcw + 1) >> 1)) {
+			VDPixmapGenResampleRow_d2_p0_lin_u8 *out = new VDPixmapGenResampleRow_d2_p0_lin_u8;
+
+			out->Init(src, srcIndex);
+			mGenerators.push_back(out);
+			MarkDependency(out, src);
+			args[0] = StackEntry(out, 0);
+			return;
+		}
+
+		if (xfactor == 4.0f && w == ((srcw + 3) >> 2)) {
+			VDPixmapGenResampleRow_d4_p0_lin_u8 *out = new VDPixmapGenResampleRow_d4_p0_lin_u8;
+
+			out->Init(src, srcIndex);
+			mGenerators.push_back(out);
+			MarkDependency(out, src);
+			args[0] = StackEntry(out, 0);
+			return;
+		}
+
+		if (xfactor == 0.5f && w == srcw*2) {
+#if VD_CPU_X86
+			VDPixmapGenResampleRow_x2_p0_lin_u8 *out = ISSE_enabled ? new VDPixmapGenResampleRow_x2_p0_lin_u8_ISSE : new VDPixmapGenResampleRow_x2_p0_lin_u8;
+#else
+			VDPixmapGenResampleRow_x2_p0_lin_u8 *out = new VDPixmapGenResampleRow_x2_p0_lin_u8;
+#endif
+
+			out->Init(src, srcIndex);
+			mGenerators.push_back(out);
+			MarkDependency(out, src);
+			args[0] = StackEntry(out, 0);
+			return;
+		}
+
+		if (xfactor == 0.25f && w == srcw*4) {
+#if VD_CPU_X86
+			VDPixmapGenResampleRow_x4_p0_lin_u8 *out = MMX_enabled ? new VDPixmapGenResampleRow_x4_p0_lin_u8_MMX : new VDPixmapGenResampleRow_x4_p0_lin_u8;
+#else
+			VDPixmapGenResampleRow_x4_p0_lin_u8 *out = new VDPixmapGenResampleRow_x4_p0_lin_u8;
+#endif
+
+			out->Init(src, srcIndex);
+			mGenerators.push_back(out);
+			MarkDependency(out, src);
+			args[0] = StackEntry(out, 0);
+			return;
+		}
+	}
+
+	VDPixmapGenResampleRow *out = new VDPixmapGenResampleRow;
+
+	out->Init(args[0].mpSrc, args[0].mSrcIndex, w, xoffset, xfactor, nsVDPixmap::kFilterLinear, 0, interpOnly);
+
+	mGenerators.push_back(out);
+	MarkDependency(out, src);
+	args[0] = StackEntry(out, 0);
+}
+
+void VDPixmapUberBlitterGenerator::linearv(float yoffset, float yfactor, uint32 h, bool interpOnly) {
+	StackEntry *args = &mStack.back();
+	IVDPixmapGen *src = args[0].mpSrc;
+	int srcIndex = args[0].mSrcIndex;
+
+	sint32 srch = src->GetHeight(srcIndex);
+	if (yoffset == 0.5f && yfactor == 1.0f && srch == h)
+		return;
+
+	if ((src->GetType(srcIndex) & kVDPixType_Mask) == kVDPixType_8) {
+		if (yoffset == 1.0f && yfactor == 2.0f && h == ((srch + 1) >> 1)) {
+			VDPixmapGenResampleCol_x2_phalf_lin_u8 *out = new VDPixmapGenResampleCol_x2_phalf_lin_u8;
+
+			out->Init(src, srcIndex);
+			mGenerators.push_back(out);
+			MarkDependency(out, src);
+			args[0] = StackEntry(out, 0);
+			return;
+		}
+
+		if (yoffset == 2.0f && yfactor == 4.0f && h == ((srch + 2) >> 2)) {
+			VDPixmapGenResampleCol_x4_p1half_lin_u8 *out = new VDPixmapGenResampleCol_x4_p1half_lin_u8;
+
+			out->Init(src, srcIndex);
+			mGenerators.push_back(out);
+			MarkDependency(out, src);
+			args[0] = StackEntry(out, 0);
+			return;
+		}
+
+		if (yoffset == 0.25f && yfactor == 0.5f && h == srch*2) {
+#if VD_CPU_X86
+			VDPixmapGenResampleCol_d2_pnqrtr_lin_u8 *out = ISSE_enabled ? new VDPixmapGenResampleCol_d2_pnqrtr_lin_u8_ISSE : new VDPixmapGenResampleCol_d2_pnqrtr_lin_u8;
+#else
+			VDPixmapGenResampleCol_d2_pnqrtr_lin_u8 *out = new VDPixmapGenResampleCol_d2_pnqrtr_lin_u8;
+#endif
+
+			out->Init(src, srcIndex);
+			mGenerators.push_back(out);
+			MarkDependency(out, src);
+			args[0] = StackEntry(out, 0);
+			return;
+		}
+
+		if (yoffset == 0.125f && yfactor == 0.25f && h == srch*4) {
+#if VD_CPU_X86
+			VDPixmapGenResampleCol_d4_pn38_lin_u8 *out = ISSE_enabled ? new VDPixmapGenResampleCol_d4_pn38_lin_u8_ISSE : new VDPixmapGenResampleCol_d4_pn38_lin_u8;
+#else
+			VDPixmapGenResampleCol_d4_pn38_lin_u8 *out = new VDPixmapGenResampleCol_d4_pn38_lin_u8;
+#endif
+
+			out->Init(src, srcIndex);
+			mGenerators.push_back(out);
+			MarkDependency(out, src);
+			args[0] = StackEntry(out, 0);
+			return;
+		}
+	}
+
+	VDPixmapGenResampleCol *out = new VDPixmapGenResampleCol;
+
+	out->Init(src, srcIndex, h, yoffset, yfactor, nsVDPixmap::kFilterLinear, 0, interpOnly);
+
+	mGenerators.push_back(out);
+	MarkDependency(out, src);
+	args[0] = StackEntry(out, 0);
+}
+
+void VDPixmapUberBlitterGenerator::linear(float xoffset, float xfactor, uint32 w, float yoffset, float yfactor, uint32 h) {
+	linearh(xoffset, xfactor, w, false);
+	linearv(yoffset, yfactor, h, false);
+}
+
+void VDPixmapUberBlitterGenerator::cubich(float xoffset, float xfactor, uint32 w, float splineFactor, bool interpOnly) {
+	StackEntry *args = &mStack.back();
+
+	if (xoffset != 0.5f || xfactor != 1.0f) {
+		VDPixmapGenResampleRow *src = new VDPixmapGenResampleRow;
+
+		src->Init(args[0].mpSrc, args[0].mSrcIndex, w, xoffset, xfactor, nsVDPixmap::kFilterCubic, splineFactor, interpOnly);
+
+		mGenerators.push_back(src);
+		MarkDependency(src, args[0].mpSrc);
+		args[0] = StackEntry(src, 0);
+	}
+}
+
+void VDPixmapUberBlitterGenerator::cubicv(float yoffset, float yfactor, uint32 h, float splineFactor, bool interpOnly) {
+	StackEntry *args = &mStack.back();
+
+	if (yoffset != 0.5f || yfactor != 1.0f) {
+		VDPixmapGenResampleCol *src = new VDPixmapGenResampleCol;
+
+		src->Init(args[0].mpSrc, args[0].mSrcIndex, h, yoffset, yfactor, nsVDPixmap::kFilterCubic, splineFactor, interpOnly);
+
+		mGenerators.push_back(src);
+		MarkDependency(src, args[0].mpSrc);
+		args[0] = StackEntry(src, 0);
+	}
+}
+
+void VDPixmapUberBlitterGenerator::cubic(float xoffset, float xfactor, uint32 w, float yoffset, float yfactor, uint32 h, float splineFactor) {
+	cubich(xoffset, xfactor, w, splineFactor, false);
+	cubicv(yoffset, yfactor, h, splineFactor, false);
+}
+
+void VDPixmapUberBlitterGenerator::lanczos3h(float xoffset, float xfactor, uint32 w) {
+	StackEntry *args = &mStack.back();
+
+	if (xoffset != 0.5f || xfactor != 1.0f) {
+		VDPixmapGenResampleRow *src = new VDPixmapGenResampleRow;
+
+		src->Init(args[0].mpSrc, args[0].mSrcIndex, w, xoffset, xfactor, nsVDPixmap::kFilterLanczos3, 0, false);
+
+		mGenerators.push_back(src);
+		MarkDependency(src, args[0].mpSrc);
+		args[0] = StackEntry(src, 0);
+	}
+}
+
+void VDPixmapUberBlitterGenerator::lanczos3v(float yoffset, float yfactor, uint32 h) {
+	StackEntry *args = &mStack.back();
+
+	if (yoffset != 0.5f || yfactor != 1.0f) {
+		VDPixmapGenResampleCol *src = new VDPixmapGenResampleCol;
+
+		src->Init(args[0].mpSrc, args[0].mSrcIndex, h, yoffset, yfactor, nsVDPixmap::kFilterLanczos3, 0, false);
+
+		mGenerators.push_back(src);
+		MarkDependency(src, args[0].mpSrc);
+		args[0] = StackEntry(src, 0);
+	}
+}
+
+void VDPixmapUberBlitterGenerator::lanczos3(float xoffset, float xfactor, uint32 w, float yoffset, float yfactor, uint32 h) {
+	lanczos3h(xoffset, xfactor, w);
+	lanczos3v(yoffset, yfactor, h);
+}
+
+void VDPixmapUberBlitterGenerator::conv_555_to_8888() {
+	StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+	VDPixmapGen_X1R5G5B5_To_X8R8G8B8 *src = MMX_enabled ? new VDPixmapGen_X1R5G5B5_To_X8R8G8B8_MMX : new VDPixmapGen_X1R5G5B5_To_X8R8G8B8;
+#else
+	VDPixmapGen_X1R5G5B5_To_X8R8G8B8 *src = new VDPixmapGen_X1R5G5B5_To_X8R8G8B8;
+#endif
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_565_to_8888() {
+	StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+	VDPixmapGen_R5G6B5_To_X8R8G8B8 *src = MMX_enabled ? new VDPixmapGen_R5G6B5_To_X8R8G8B8_MMX : new VDPixmapGen_R5G6B5_To_X8R8G8B8;
+#else
+	VDPixmapGen_R5G6B5_To_X8R8G8B8 *src = new VDPixmapGen_R5G6B5_To_X8R8G8B8;
+#endif
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_888_to_8888() {
+	StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+	VDPixmapGen_R8G8B8_To_A8R8G8B8 *src = MMX_enabled ? new VDPixmapGen_R8G8B8_To_X8R8G8B8_MMX : new VDPixmapGen_R8G8B8_To_A8R8G8B8;
+#else
+	VDPixmapGen_R8G8B8_To_A8R8G8B8 *src = new VDPixmapGen_R8G8B8_To_A8R8G8B8;
+#endif
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_8_to_32F() {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_8_To_32F *src = new VDPixmapGen_8_To_32F;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_16F_to_32F() {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_16F_To_32F *src = new VDPixmapGen_16F_To_32F;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_V210_to_32F() {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_V210_To_32F *src = new VDPixmapGen_V210_To_32F;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+	mStack.push_back(StackEntry(src, 1));
+	mStack.push_back(StackEntry(src, 2));
+}
+
+void VDPixmapUberBlitterGenerator::conv_8888_to_X32F() {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_X8R8G8B8_To_X32B32G32R32F *src = new VDPixmapGen_X8R8G8B8_To_X32B32G32R32F;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_8888_to_555() {
+	StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+	VDPixmapGen_X8R8G8B8_To_X1R5G5B5 *src = MMX_enabled ? new VDPixmapGen_X8R8G8B8_To_X1R5G5B5_MMX : new VDPixmapGen_X8R8G8B8_To_X1R5G5B5;
+#else
+	VDPixmapGen_X8R8G8B8_To_X1R5G5B5 *src = new VDPixmapGen_X8R8G8B8_To_X1R5G5B5;
+#endif
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_555_to_565() {
+	StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+	VDPixmapGen_X1R5G5B5_To_R5G6B5 *src = MMX_enabled ? new VDPixmapGen_X1R5G5B5_To_R5G6B5_MMX : new VDPixmapGen_X1R5G5B5_To_R5G6B5;
+#else
+	VDPixmapGen_X1R5G5B5_To_R5G6B5 *src = new VDPixmapGen_X1R5G5B5_To_R5G6B5;
+#endif
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_565_to_555() {
+	StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+	VDPixmapGen_R5G6B5_To_X1R5G5B5 *src = MMX_enabled ? new VDPixmapGen_R5G6B5_To_X1R5G5B5_MMX : new VDPixmapGen_R5G6B5_To_X1R5G5B5;
+#else
+	VDPixmapGen_R5G6B5_To_X1R5G5B5 *src = new VDPixmapGen_R5G6B5_To_X1R5G5B5;
+#endif
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_8888_to_565() {
+	StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+	VDPixmapGen_X8R8G8B8_To_R5G6B5 *src = MMX_enabled ? new VDPixmapGen_X8R8G8B8_To_R5G6B5_MMX : new VDPixmapGen_X8R8G8B8_To_R5G6B5;
+#else
+	VDPixmapGen_X8R8G8B8_To_R5G6B5 *src = new VDPixmapGen_X8R8G8B8_To_R5G6B5;
+#endif
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_8888_to_888() {
+	StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+	VDPixmapGen_X8R8G8B8_To_R8G8B8 *src = MMX_enabled ? new VDPixmapGen_X8R8G8B8_To_R8G8B8_MMX : new VDPixmapGen_X8R8G8B8_To_R8G8B8;
+#else
+	VDPixmapGen_X8R8G8B8_To_R8G8B8 *src = new VDPixmapGen_X8R8G8B8_To_R8G8B8;
+#endif
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_32F_to_8() {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_32F_To_8 *src = new VDPixmapGen_32F_To_8;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_X32F_to_8888() {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_X32B32G32R32F_To_X8R8G8B8 *src = new VDPixmapGen_X32B32G32R32F_To_X8R8G8B8;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_32F_to_16F() {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_32F_To_16F *src = new VDPixmapGen_32F_To_16F;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_32F_to_V210() {
+	StackEntry *args = &*(mStack.end() - 3);
+	VDPixmapGen_32F_To_V210 *src = new VDPixmapGen_32F_To_V210;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	MarkDependency(src, args[1].mpSrc);
+	MarkDependency(src, args[2].mpSrc);
+	args[0] = StackEntry(src, 0);
+	mStack.pop_back();
+	mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::convd_8888_to_555() {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_X8R8G8B8_To_X1R5G5B5_Dithered *src = new VDPixmapGen_X8R8G8B8_To_X1R5G5B5_Dithered;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::convd_8888_to_565() {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_X8R8G8B8_To_R5G6B5_Dithered *src = new VDPixmapGen_X8R8G8B8_To_R5G6B5_Dithered;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::convd_32F_to_8() {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_32F_To_8_Dithered *src = new VDPixmapGen_32F_To_8_Dithered;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::convd_X32F_to_8888() {
+	StackEntry *args = &mStack.back();
+	VDPixmapGen_X32B32G32R32F_To_X8R8G8B8_Dithered *src = new VDPixmapGen_X32B32G32R32F_To_X8R8G8B8_Dithered;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::interleave_B8G8_R8G8() {
+	StackEntry *args = &mStack.back() - 2;
+	VDPixmapGen_B8x3_To_B8G8_R8G8 *src = NULL;
+	
+#if VD_CPU_X86
+	if (MMX_enabled)
+		src = new VDPixmapGen_B8x3_To_B8G8_R8G8_MMX;
+#endif
+
+	if (!src)
+		src = new VDPixmapGen_B8x3_To_B8G8_R8G8;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	MarkDependency(src, args[1].mpSrc);
+	MarkDependency(src, args[2].mpSrc);
+	args[0] = StackEntry(src, 0);
+	mStack.pop_back();
+	mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::interleave_G8B8_G8R8() {
+	StackEntry *args = &mStack.back() - 2;
+	VDPixmapGen_B8x3_To_G8B8_G8R8 *src = NULL;
+	
+#if VD_CPU_X86
+	if (MMX_enabled)
+		src = new VDPixmapGen_B8x3_To_G8B8_G8R8_MMX;
+#endif
+
+	if (!src)
+		src = new VDPixmapGen_B8x3_To_G8B8_G8R8;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	MarkDependency(src, args[1].mpSrc);
+	MarkDependency(src, args[2].mpSrc);
+	args[0] = StackEntry(src, 0);
+	mStack.pop_back();
+	mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::interleave_X8R8G8B8() {
+	StackEntry *args = &mStack.back() - 2;
+	VDPixmapGen_B8x3_To_X8R8G8B8 *src = new VDPixmapGen_B8x3_To_X8R8G8B8;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	MarkDependency(src, args[1].mpSrc);
+	MarkDependency(src, args[2].mpSrc);
+	args[0] = StackEntry(src, 0);
+	mStack.pop_back();
+	mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::interleave_B8R8() {
+	StackEntry *args = &mStack.back() - 1;
+
+#if VD_CPU_X86
+	VDPixmapGen_B8x2_To_B8R8 *src = MMX_enabled ? new VDPixmapGen_B8x2_To_B8R8_MMX : new VDPixmapGen_B8x2_To_B8R8;
+#else
+	VDPixmapGen_B8x2_To_B8R8 *src = new VDPixmapGen_B8x2_To_B8R8;
+#endif
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	MarkDependency(src, args[1].mpSrc);
+	args[0] = StackEntry(src, 0);
+	mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::ycbcr601_to_rgb32() {
+	StackEntry *args = &mStack.back() - 2;
+
+#ifdef VD_CPU_X86
+	VDPixmapGenYCbCr601ToRGB32 *src = MMX_enabled ? new VDPixmapGenYCbCr601ToRGB32_MMX : new VDPixmapGenYCbCr601ToRGB32;
+#else
+	VDPixmapGenYCbCr601ToRGB32 *src = new VDPixmapGenYCbCr601ToRGB32;
+#endif
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	MarkDependency(src, args[1].mpSrc);
+	MarkDependency(src, args[2].mpSrc);
+	args[0] = StackEntry(src, 0);
+	mStack.pop_back();
+	mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::ycbcr709_to_rgb32() {
+	StackEntry *args = &mStack.back() - 2;
+
+	VDPixmapGenYCbCr709ToRGB32 *src = new VDPixmapGenYCbCr709ToRGB32;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	MarkDependency(src, args[1].mpSrc);
+	MarkDependency(src, args[2].mpSrc);
+	args[0] = StackEntry(src, 0);
+	mStack.pop_back();
+	mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::rgb32_to_ycbcr601() {
+	StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+	VDPixmapGenRGB32ToYCbCr601 *src = SSE2_enabled ? new VDPixmapGenRGB32ToYCbCr601_SSE2 : new VDPixmapGenRGB32ToYCbCr601;
+#else
+	VDPixmapGenRGB32ToYCbCr601 *src = new VDPixmapGenRGB32ToYCbCr601;
+#endif
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+	mStack.push_back(StackEntry(src, 1));
+	mStack.push_back(StackEntry(src, 2));
+}
+
+void VDPixmapUberBlitterGenerator::rgb32_to_ycbcr709() {
+	StackEntry *args = &mStack.back();
+	VDPixmapGenRGB32ToYCbCr709 *src = new VDPixmapGenRGB32ToYCbCr709;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+	mStack.push_back(StackEntry(src, 1));
+	mStack.push_back(StackEntry(src, 2));
+}
+
+void VDPixmapUberBlitterGenerator::ycbcr601_to_rgb32_32f() {
+	StackEntry *args = &mStack.back() - 2;
+
+	VDPixmapGenYCbCr601ToRGB32F *src = new VDPixmapGenYCbCr601ToRGB32F;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	MarkDependency(src, args[1].mpSrc);
+	MarkDependency(src, args[2].mpSrc);
+	args[0] = StackEntry(src, 0);
+	mStack.pop_back();
+	mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::ycbcr709_to_rgb32_32f() {
+	StackEntry *args = &mStack.back() - 2;
+
+	VDPixmapGenYCbCr709ToRGB32F *src = new VDPixmapGenYCbCr709ToRGB32F;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	MarkDependency(src, args[1].mpSrc);
+	MarkDependency(src, args[2].mpSrc);
+	args[0] = StackEntry(src, 0);
+	mStack.pop_back();
+	mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::rgb32_to_ycbcr601_32f() {
+	StackEntry *args = &mStack.back();
+	VDPixmapGenRGB32FToYCbCr601 *src = new VDPixmapGenRGB32FToYCbCr601;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+	mStack.push_back(StackEntry(src, 1));
+	mStack.push_back(StackEntry(src, 2));
+}
+
+void VDPixmapUberBlitterGenerator::rgb32_to_ycbcr709_32f() {
+	StackEntry *args = &mStack.back();
+	VDPixmapGenRGB32FToYCbCr709 *src = new VDPixmapGenRGB32FToYCbCr709;
+
+	src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	args[0] = StackEntry(src, 0);
+	mStack.push_back(StackEntry(src, 1));
+	mStack.push_back(StackEntry(src, 2));
+}
+
+void VDPixmapUberBlitterGenerator::ycbcr601_to_ycbcr709() {
+	StackEntry *args = &mStack.back() - 2;
+
+	IVDPixmapGen *src;
+	if ((args[0].mpSrc->GetType(args[0].mSrcIndex) & kVDPixType_Mask) == kVDPixType_32F_LE) {
+		VDPixmapGenYCbCr601ToYCbCr709_32F *src2 = new VDPixmapGenYCbCr601ToYCbCr709_32F;
+
+		src2->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+		src = src2;
+	} else {
+		VDPixmapGenYCbCr601ToYCbCr709 *src2 = new VDPixmapGenYCbCr601ToYCbCr709;
+
+		src2->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+		src = src2;
+	}
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	MarkDependency(src, args[1].mpSrc);
+	MarkDependency(src, args[2].mpSrc);
+	args[0] = StackEntry(src, 0);
+	args[1] = StackEntry(src, 1);
+	args[2] = StackEntry(src, 2);
+}
+
+void VDPixmapUberBlitterGenerator::ycbcr709_to_ycbcr601() {
+	StackEntry *args = &mStack.back() - 2;
+
+	IVDPixmapGen *src;
+	if ((args[0].mpSrc->GetType(args[0].mSrcIndex) & kVDPixType_Mask) == kVDPixType_32F_LE) {
+		VDPixmapGenYCbCr709ToYCbCr601_32F *src2 = new VDPixmapGenYCbCr709ToYCbCr601_32F;
+
+		src2->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+		src = src2;
+	} else {
+		VDPixmapGenYCbCr709ToYCbCr601 *src2 = new VDPixmapGenYCbCr709ToYCbCr601;
+
+		src2->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+		src = src2;
+	}
+
+	mGenerators.push_back(src);
+	MarkDependency(src, args[0].mpSrc);
+	MarkDependency(src, args[1].mpSrc);
+	MarkDependency(src, args[2].mpSrc);
+	args[0] = StackEntry(src, 0);
+	args[1] = StackEntry(src, 1);
+	args[2] = StackEntry(src, 2);
+}
+
+IVDPixmapBlitter *VDPixmapUberBlitterGenerator::create() {
+	vdautoptr<VDPixmapUberBlitter> blitter(new VDPixmapUberBlitter);
+
+	int numStackEntries = (int)mStack.size();
+
+	for(int i=0; i<3; ++i) {
+		if (i < numStackEntries) {
+			blitter->mOutputs[i].mpSrc = mStack[i].mpSrc;
+			blitter->mOutputs[i].mSrcIndex = mStack[i].mSrcIndex;
+		} else {
+			blitter->mOutputs[i].mpSrc = NULL;
+			blitter->mOutputs[i].mSrcIndex = 0;
+		}
+	}
+
+	mStack.clear();
+
+	// If this blitter has three outputs, determine if outputs 1 and 2 are independent
+	// from output 0.
+	blitter->mbIndependentChromaPlanes = true;
+	blitter->mbIndependentPlanes = true;
+	if (numStackEntries >= 3) {
+		int numGens = mGenerators.size();
+		vdfastvector<uint8> genflags(numGens, 0);
+
+		enum {
+			kFlagStateful = 0x80,
+			kFlagY = 0x01,
+			kFlagCb = 0x02,
+			kFlagCr = 0x04,
+			kFlagYCbCr = 0x07
+		};
+
+		for(int i=0; i<3; ++i)
+			genflags[std::find(mGenerators.begin(), mGenerators.end(), blitter->mOutputs[i].mpSrc) - mGenerators.begin()] |= (1 << i);
+
+		for(int i=0; i<numGens; ++i) {
+			IVDPixmapGen *gen = mGenerators[i];
+
+			if (gen->IsStateful())
+				genflags[i] |= kFlagStateful;
+		}
+
+		while(!mDependencies.empty()) {
+			const Dependency& dep = mDependencies.back();
+
+			genflags[dep.mSrcIdx] |= (genflags[dep.mDstIdx] & ~kFlagStateful);
+
+			mDependencies.pop_back();
+		}
+
+		for(int i=0; i<numGens; ++i) {
+			uint8 flags = genflags[i];
+
+			if (!(flags & kFlagStateful))
+				continue;
+
+			switch(flags & kFlagYCbCr) {
+				case 0:
+				case kFlagY:
+				case kFlagCb:
+				case kFlagCr:
+					break;
+				case kFlagCr | kFlagCb:
+					blitter->mbIndependentPlanes = false;
+					break;
+				case kFlagCb | kFlagY:
+				case kFlagCr | kFlagY:
+				case kFlagCr | kFlagCb | kFlagY:
+					blitter->mbIndependentPlanes = false;
+					blitter->mbIndependentChromaPlanes = false;
+					break;
+			}
+		}
+	} else if (numStackEntries >= 2) {
+		int numGens = mGenerators.size();
+		vdfastvector<uint8> genflags(numGens, 0);
+
+		enum {
+			kFlagStateful = 0x80,
+			kFlagY = 0x01,
+			kFlagC = 0x02,
+			kFlagYC = 0x03
+		};
+
+		for(int i=0; i<2; ++i)
+			genflags[std::find(mGenerators.begin(), mGenerators.end(), blitter->mOutputs[i].mpSrc) - mGenerators.begin()] |= (1 << i);
+
+		for(int i=0; i<numGens; ++i) {
+			IVDPixmapGen *gen = mGenerators[i];
+
+			if (gen->IsStateful())
+				genflags[i] |= kFlagStateful;
+		}
+
+		while(!mDependencies.empty()) {
+			const Dependency& dep = mDependencies.back();
+
+			genflags[dep.mSrcIdx] |= (genflags[dep.mDstIdx] & ~kFlagStateful);
+
+			mDependencies.pop_back();
+		}
+
+		for(int i=0; i<numGens; ++i) {
+			uint8 flags = genflags[i];
+
+			if (!(flags & kFlagStateful))
+				continue;
+
+			switch(flags & kFlagYC) {
+				case kFlagYC:
+					blitter->mbIndependentPlanes = false;
+					blitter->mbIndependentChromaPlanes = false;
+					break;
+			}
+		}
+	}
+
+	blitter->mGenerators.swap(mGenerators);
+	blitter->mSources.swap(mSources);
+	return blitter.release();
+}
+
+void VDPixmapUberBlitterGenerator::MarkDependency(IVDPixmapGen *dst, IVDPixmapGen *src) {
+	Generators::const_iterator it1(std::find(mGenerators.begin(), mGenerators.end(), dst));
+	Generators::const_iterator it2(std::find(mGenerators.begin(), mGenerators.end(), src));
+
+	VDASSERT(it1 != mGenerators.end());
+	VDASSERT(it2 != mGenerators.end());
+
+	int idx1 = it1 - mGenerators.begin();
+	int idx2 = it2 - mGenerators.begin();
+
+	Dependency dep = { idx1, idx2 };
+
+	mDependencies.push_back(dep);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample.cpp
new file mode 100644
index 000000000..1363fb730
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample.cpp
@@ -0,0 +1,623 @@
+#include <float.h>
+#include <math.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/system/memory.h>
+#include <vd2/system/math.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/Kasumi/resample.h>
+
+#include <vd2/Kasumi/resample_kernels.h>
+#include "resample_stages_x86.h"
+#include "uberblit_resample.h"
+
+namespace {
+	sint32 scale32x32_fp16(sint32 x, sint32 y) {
+		return (sint32)(((sint64)x * y + 0x8000) >> 16);
+	}
+
+	template<class T>
+	IVDResamplerSeparableRowStage *RowFactory(double cutoff, float filterFactor) {
+		return new T;
+	}
+
+	template<class T>
+	IVDResamplerSeparableRowStage *RowFactoryLinear(double cutoff, float filterFactor) {
+		return new T(VDResamplerLinearFilter(cutoff));
+	}
+
+	template<class T>
+	IVDResamplerSeparableRowStage *RowFactoryCubic(double cutoff, float filterFactor) {
+		return new T(VDResamplerCubicFilter(cutoff, filterFactor));
+	}
+
+	template<class T>
+	IVDResamplerSeparableRowStage *RowFactoryCubic2(double cutoff, float filterFactor) {
+		return new T(filterFactor);
+	}
+
+	template<class T>
+	IVDResamplerSeparableRowStage *RowFactoryLanczos3(double cutoff, float filterFactor) {
+		return new T(VDResamplerLanczos3Filter(cutoff));
+	}
+
+	template<class T>
+	IVDResamplerSeparableColStage *ColFactory(double cutoff, float filterFactor) {
+		return new T;
+	}
+
+	template<class T>
+	IVDResamplerSeparableColStage *ColFactoryLinear(double cutoff, float filterFactor) {
+		return new T(VDResamplerLinearFilter(cutoff));
+	}
+
+	template<class T>
+	IVDResamplerSeparableColStage *ColFactoryCubic(double cutoff, float filterFactor) {
+		return new T(VDResamplerCubicFilter(cutoff, filterFactor));
+	}
+
+	template<class T>
+	IVDResamplerSeparableColStage *ColFactoryCubic2(double cutoff, float filterFactor) {
+		return new T(filterFactor);
+	}
+
+	template<class T>
+	IVDResamplerSeparableColStage *ColFactoryLanczos3(double cutoff, float filterFactor) {
+		return new T(VDResamplerLanczos3Filter(cutoff));
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VDPixmapGenResampleRow
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDPixmapGenResampleRow::VDPixmapGenResampleRow()
+	: mpRowStage(NULL)
+	, mpRowStage2(NULL)
+{
+}
+
+VDPixmapGenResampleRow::~VDPixmapGenResampleRow() {
+	if (mpRowStage)
+		delete mpRowStage;
+}
+
+void VDPixmapGenResampleRow::Init(IVDPixmapGen *src, uint32 srcIndex, uint32 width, float offset, float step, nsVDPixmap::FilterMode filterMode, float filterFactor, bool interpolationOnly) {
+	InitSource(src, srcIndex);
+
+	sint32 u0 = (sint32)(offset * 65536.0);
+	sint32 dudx = (sint32)(step * 65536.0);
+
+	mAxis.Init(dudx);
+
+	double x_2fc = 1.0;
+	if (!interpolationOnly && step > 1.0f)
+		x_2fc = 1.0 / step;
+
+	struct SpecialCaseSpanRoutine {
+		sint32		mPhase;
+		sint32		mStep;
+		uint32		mType;
+		nsVDPixmap::FilterMode mFilterMode;
+		uint32 mCPUFlags;
+		IVDResamplerSeparableRowStage *(*mpClassFactory)(double filterCutoff, float filterFactor);
+	};
+
+	static const SpecialCaseSpanRoutine kSpecialCaseSpanRoutines[]={
+		// Generic
+#if defined _M_IX86
+		{ +0x0000, 0x008000, kVDPixType_8,		nsVDPixmap::kFilterLinear,		CPUF_SUPPORTS_INTEGER_SSE,	RowFactory<VDResamplerRowStageSeparableLinear8_phaseZeroStepHalf_ISSE> },
+#endif
+
+		{ +0x0000, 0x008000, kVDPixType_8,		nsVDPixmap::kFilterLinear,		0,							RowFactory<VDResamplerRowStageSeparableLinear8_phaseZeroStepHalf> },
+	};
+
+	long flags = CPUGetEnabledExtensions();
+	uint32 type = mpSrc->GetType(mSrcIndex) & kVDPixType_Mask;
+
+	for(int i=0; i<sizeof(kSpecialCaseSpanRoutines)/sizeof(kSpecialCaseSpanRoutines[0]); ++i) {
+		const SpecialCaseSpanRoutine& rout = kSpecialCaseSpanRoutines[i];
+
+		if (rout.mType != type)
+			continue;
+
+		if (x_2fc < 1.0)
+			continue;
+
+		if (rout.mStep != dudx)
+			continue;
+
+		if (rout.mPhase != u0)
+			continue;
+
+		if (rout.mFilterMode != filterMode)
+			continue;
+
+		if ((rout.mCPUFlags & flags) != rout.mCPUFlags)
+			continue;
+
+		mpRowStage = rout.mpClassFactory(x_2fc, filterFactor);
+		mpRowStage2 = mpRowStage->AsRowStage2();
+		break;
+	}
+
+	if (!mpRowStage) {
+		struct SpanRoutine {
+			uint32		mType;
+			bool mbInterpOnly;
+			nsVDPixmap::FilterMode mFilterMode;
+			uint32 mCPUFlags;
+			IVDResamplerSeparableRowStage *(*mpClassFactory)(double filterCutoff, float filterFactor);
+		};
+		
+		static const SpanRoutine kSpanRoutines[]={
+#if defined _M_IX86
+			// X86
+			{ kVDPixType_8888,		false,	nsVDPixmap::kFilterPoint,		CPUF_SUPPORTS_MMX,	RowFactory<VDResamplerSeparablePointRowStageMMX> },
+			{ kVDPixType_8888,		false,	nsVDPixmap::kFilterPoint,		0,					RowFactory<VDResamplerSeparablePointRowStageX86> },
+			{ kVDPixType_8,			false,	nsVDPixmap::kFilterLinear,		CPUF_SUPPORTS_SSE41,	RowFactoryLinear<VDResamplerSeparableTableRowStage8SSE41> },
+			{ kVDPixType_8,			false,	nsVDPixmap::kFilterLinear,		CPUF_SUPPORTS_MMX,	RowFactoryLinear<VDResamplerSeparableTableRowStage8MMX> },
+			{ kVDPixType_8888,		true,	nsVDPixmap::kFilterLinear,		CPUF_SUPPORTS_MMX,	RowFactory<VDResamplerSeparableLinearRowStageMMX> },
+			{ kVDPixType_8888,		false,	nsVDPixmap::kFilterLinear,		CPUF_SUPPORTS_SSE2,	RowFactoryLinear<VDResamplerSeparableTableRowStageSSE2> },
+			{ kVDPixType_8888,		false,	nsVDPixmap::kFilterLinear,		CPUF_SUPPORTS_MMX,	RowFactoryLinear<VDResamplerSeparableTableRowStageMMX> },
+			{ kVDPixType_8,			false,	nsVDPixmap::kFilterCubic,		CPUF_SUPPORTS_SSE41,	RowFactoryCubic<VDResamplerSeparableTableRowStage8SSE41> },
+			{ kVDPixType_8,			false,	nsVDPixmap::kFilterCubic,		CPUF_SUPPORTS_MMX,	RowFactoryCubic<VDResamplerSeparableTableRowStage8MMX> },
+			{ kVDPixType_8888,		true,	nsVDPixmap::kFilterCubic,		CPUF_SUPPORTS_MMX,	RowFactoryCubic2<VDResamplerSeparableCubicRowStageMMX> },
+			{ kVDPixType_8888,		false,	nsVDPixmap::kFilterCubic,		CPUF_SUPPORTS_SSE2,	RowFactoryCubic<VDResamplerSeparableTableRowStageSSE2> },
+			{ kVDPixType_8888,		false,	nsVDPixmap::kFilterCubic,		CPUF_SUPPORTS_MMX,	RowFactoryCubic<VDResamplerSeparableTableRowStageMMX> },
+			{ kVDPixType_8,			false,	nsVDPixmap::kFilterLanczos3,		CPUF_SUPPORTS_SSE41,	RowFactoryLanczos3<VDResamplerSeparableTableRowStage8SSE41> },
+			{ kVDPixType_8,			false,	nsVDPixmap::kFilterLanczos3,	CPUF_SUPPORTS_MMX,	RowFactoryLanczos3<VDResamplerSeparableTableRowStage8MMX> },
+			{ kVDPixType_8888,		false,	nsVDPixmap::kFilterLanczos3,	CPUF_SUPPORTS_SSE2,	RowFactoryLanczos3<VDResamplerSeparableTableRowStageSSE2> },
+			{ kVDPixType_8888,		false,	nsVDPixmap::kFilterLanczos3,	CPUF_SUPPORTS_MMX,	RowFactoryLanczos3<VDResamplerSeparableTableRowStageMMX> },
+#elif defined _M_AMD64
+			// AMD64
+			{ kVDPixType_8888,		false,	nsVDPixmap::kFilterLinear,		CPUF_SUPPORTS_SSE2,	RowFactoryLinear<VDResamplerSeparableTableRowStageSSE2> },
+			{ kVDPixType_8888,		false,	nsVDPixmap::kFilterCubic,		CPUF_SUPPORTS_SSE2,	RowFactoryCubic<VDResamplerSeparableTableRowStageSSE2> },
+			{ kVDPixType_8888,		false,	nsVDPixmap::kFilterLanczos3,	CPUF_SUPPORTS_SSE2,	RowFactoryLanczos3<VDResamplerSeparableTableRowStageSSE2> },
+#endif
+			// Generic
+			{ kVDPixType_8,			false,	nsVDPixmap::kFilterPoint,		0,					RowFactory<VDResamplerRowStageSeparablePoint8> },
+			{ kVDPixType_8888,		false,	nsVDPixmap::kFilterPoint,		0,					RowFactory<VDResamplerRowStageSeparablePoint32> },
+			{ kVDPixType_8,			true,	nsVDPixmap::kFilterLinear,		0,					RowFactory<VDResamplerRowStageSeparableLinear8> },
+			{ kVDPixType_8888,		true,	nsVDPixmap::kFilterLinear,		0,					RowFactory<VDResamplerRowStageSeparableLinear32> },
+			{ kVDPixType_8,			false,	nsVDPixmap::kFilterLinear,		0,					RowFactoryLinear<VDResamplerRowStageSeparableTable8> },
+			{ kVDPixType_8888,		false,	nsVDPixmap::kFilterLinear,		0,					RowFactoryLinear<VDResamplerRowStageSeparableTable32> },
+			{ kVDPixType_32F_LE,	false,	nsVDPixmap::kFilterLinear,		0,					RowFactoryLinear<VDResamplerRowStageSeparableTable32F> },
+			{ kVDPixType_32Fx4_LE,	false,	nsVDPixmap::kFilterLinear,		0,					RowFactoryLinear<VDResamplerRowStageSeparableTable32Fx4> },
+			{ kVDPixType_8,			false,	nsVDPixmap::kFilterCubic,		0,					RowFactoryCubic<VDResamplerRowStageSeparableTable8> },
+			{ kVDPixType_8888,		false,	nsVDPixmap::kFilterCubic,		0,					RowFactoryCubic<VDResamplerRowStageSeparableTable32> },
+			{ kVDPixType_32F_LE,	false,	nsVDPixmap::kFilterCubic,		0,					RowFactoryCubic<VDResamplerRowStageSeparableTable32F> },
+			{ kVDPixType_32Fx4_LE,	false,	nsVDPixmap::kFilterCubic,		0,					RowFactoryCubic<VDResamplerRowStageSeparableTable32Fx4> },
+			{ kVDPixType_8,			false,	nsVDPixmap::kFilterLanczos3,	0,					RowFactoryLanczos3<VDResamplerRowStageSeparableTable8> },
+			{ kVDPixType_8888,		false,	nsVDPixmap::kFilterLanczos3,	0,					RowFactoryLanczos3<VDResamplerRowStageSeparableTable32> },
+			{ kVDPixType_32F_LE,	false,	nsVDPixmap::kFilterLanczos3,	0,					RowFactoryLanczos3<VDResamplerRowStageSeparableTable32F> },
+			{ kVDPixType_32Fx4_LE,	false,	nsVDPixmap::kFilterLanczos3,	0,					RowFactoryLanczos3<VDResamplerRowStageSeparableTable32Fx4> },
+		};
+
+		for(int i=0; i<sizeof(kSpanRoutines)/sizeof(kSpanRoutines[0]); ++i) {
+			const SpanRoutine& rout = kSpanRoutines[i];
+
+			if (rout.mType != type)
+				continue;
+
+			if (rout.mbInterpOnly && x_2fc < 1.0)
+				continue;
+
+			if (rout.mFilterMode != filterMode)
+				continue;
+
+			if ((rout.mCPUFlags & flags) != rout.mCPUFlags)
+				continue;
+
+			mpRowStage = rout.mpClassFactory(x_2fc, filterFactor);
+			mpRowStage2 = mpRowStage->AsRowStage2();
+			break;
+		}
+	}
+
+	VDASSERT(mpRowStage);
+
+	mRowFiltW = mpRowStage->GetWindowSize();
+
+	mpSrc->AddWindowRequest(0, 0);
+
+	sint32 fsx1 = (sint32)(offset * 65536.0) - ((mRowFiltW-1) << 15);
+	mAxis.Compute(width, fsx1, mSrcWidth, mRowFiltW);
+	mWidth = width;
+
+	switch(type) {
+		case kVDPixType_8:
+			mBytesPerSample = 1;
+			break;
+		case kVDPixType_8888:
+		case kVDPixType_32F_LE:
+			mBytesPerSample = 4;
+			break;
+		case kVDPixType_32Fx4_LE:
+			mBytesPerSample = 16;
+			break;
+
+		default:
+			VDASSERT(false);
+	}
+}
+
+void VDPixmapGenResampleRow::Start() {
+	StartWindow(mWidth * mBytesPerSample);
+
+	uint32 clipSpace = ((mRowFiltW*3*mBytesPerSample + 15) >> 4) << 2;
+	mTempSpace.resize(clipSpace);
+
+	if (mpRowStage2)
+		mpRowStage2->Init(mAxis, mSrcWidth);
+}
+
+void VDPixmapGenResampleRow::Compute(void *dst0, sint32 y) {
+	switch(mBytesPerSample) {
+		case 1:
+			Compute8(dst0, y);
+			break;
+		case 4:
+			Compute32(dst0, y);
+			break;
+		case 16:
+			Compute128(dst0, y);
+			break;
+	}
+}
+
+void VDPixmapGenResampleRow::Compute8(void *dst0, sint32 y) {
+	const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+	uint8 *dst = (uint8 *)dst0;
+
+	// process pre-copy region
+	if (uint32 count = mAxis.dx_precopy) {
+		VDMemset8(dst, src[0], count);
+		dst += count;
+	}
+
+	uint8 *p = (uint8*)mTempSpace.data();
+	sint32 u = mAxis.u;
+	const sint32 dudx = mAxis.dudx;
+
+	// process dual-clip region
+	if (mpRowStage2) {
+		uint32 count = mAxis.dx_preclip + mAxis.dx_active + mAxis.dx_postclip + mAxis.dx_dualclip;
+		mpRowStage2->Process(dst, src, count);
+		dst += count;
+	} else if (uint32 count = mAxis.dx_dualclip) {
+		VDMemset8(p, src[0], mRowFiltW);
+		memcpy(p + mRowFiltW, src+1, (mSrcWidth-2));
+		VDMemset8(p + mRowFiltW + (mSrcWidth-2), src[mSrcWidth-1], mRowFiltW);
+
+		mpRowStage->Process(dst, p, count, u + ((mRowFiltW-1)<<16), dudx);
+		u += dudx*count;
+		dst += count;
+	} else {
+		// process pre-clip region
+		if (uint32 count = mAxis.dx_preclip) {
+			VDMemset8(p, src[0], mRowFiltW);
+			memcpy(p + mRowFiltW, src+1, (mRowFiltW-1));
+
+			mpRowStage->Process(dst, p, count, u + ((mRowFiltW-1)<<16), dudx);
+			u += dudx*count;
+			dst += count;
+		}
+
+		// process active region
+		if (uint32 count = mAxis.dx_active) {
+			mpRowStage->Process(dst, src, count, u, dudx);
+			u += dudx*count;
+			dst += count;
+		}
+
+		// process post-clip region
+		if (uint32 count = mAxis.dx_postclip) {
+			uint32 offset = mSrcWidth + 1 - mRowFiltW;
+
+			memcpy(p, src+offset, (mRowFiltW-1));
+			VDMemset8(p + (mRowFiltW-1), src[mSrcWidth-1], mRowFiltW);
+
+			mpRowStage->Process(dst, p, count, u - (offset<<16), dudx);
+			dst += count;
+		}
+	}
+
+	// process post-copy region
+	if (uint32 count = mAxis.dx_postcopy) {
+		VDMemset8(dst, src[mSrcWidth-1], count);
+	}
+}
+
+void VDPixmapGenResampleRow::Compute32(void *dst0, sint32 y) {
+	const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+	uint32 *dst = (uint32 *)dst0;
+
+	// process pre-copy region
+	if (uint32 count = mAxis.dx_precopy) {
+		VDMemset32(dst, src[0], count);
+		dst += count;
+	}
+
+	uint32 *p = mTempSpace.data();
+	sint32 u = mAxis.u;
+	const sint32 dudx = mAxis.dudx;
+
+	// process dual-clip region
+	if (uint32 count = mAxis.dx_dualclip) {
+		VDMemset32(p, src[0], mRowFiltW);
+		memcpy(p + mRowFiltW, src+1, (mSrcWidth-2)*sizeof(uint32));
+		VDMemset32(p + mRowFiltW + (mSrcWidth-2), src[mSrcWidth-1], mRowFiltW);
+
+		mpRowStage->Process(dst, p, count, u + ((mRowFiltW-1)<<16), dudx);
+		u += dudx*count;
+		dst += count;
+	} else if (mpRowStage2) {
+		mpRowStage2->Process(dst, p, mAxis.dx_preclip + mAxis.dx_active + mAxis.dx_postclip);
+	} else {
+		// process pre-clip region
+		if (uint32 count = mAxis.dx_preclip) {
+			VDMemset32(p, src[0], mRowFiltW);
+			memcpy(p + mRowFiltW, src+1, (mRowFiltW-1)*sizeof(uint32));
+
+			mpRowStage->Process(dst, p, count, u + ((mRowFiltW-1)<<16), dudx);
+			u += dudx*count;
+			dst += count;
+		}
+
+		// process active region
+		if (uint32 count = mAxis.dx_active) {
+			mpRowStage->Process(dst, src, count, u, dudx);
+			u += dudx*count;
+			dst += count;
+		}
+
+		// process post-clip region
+		if (uint32 count = mAxis.dx_postclip) {
+			uint32 offset = mSrcWidth + 1 - mRowFiltW;
+
+			memcpy(p, src+offset, (mRowFiltW-1)*sizeof(uint32));
+			VDMemset32(p + (mRowFiltW-1), src[mSrcWidth-1], mRowFiltW);
+
+			mpRowStage->Process(dst, p, count, u - (offset<<16), dudx);
+			dst += count;
+		}
+	}
+
+	// process post-copy region
+	if (uint32 count = mAxis.dx_postcopy) {
+		VDMemset32(dst, src[mSrcWidth-1], count);
+	}
+}
+
+void VDPixmapGenResampleRow::Compute128(void *dst0, sint32 y) {
+	const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+	uint32 *dst = (uint32 *)dst0;
+
+	// process pre-copy region
+	if (uint32 count = mAxis.dx_precopy) {
+		VDMemset128(dst, src, count);
+		dst += 4*count;
+	}
+
+	uint32 *p = mTempSpace.data();
+	sint32 u = mAxis.u;
+	const sint32 dudx = mAxis.dudx;
+
+	// process dual-clip region
+	if (uint32 count = mAxis.dx_dualclip) {
+		VDMemset128(p, src, mRowFiltW);
+		memcpy(p + 4*mRowFiltW, src+1, (mSrcWidth-2)*sizeof(uint32)*4);
+		VDMemset128(p + 4*(mRowFiltW + (mSrcWidth-2)), src + 4*(mSrcWidth-1), mRowFiltW);
+
+		mpRowStage->Process(dst, p, count, u + ((mRowFiltW-1)<<16), dudx);
+		u += dudx*count;
+		dst += count * 4;
+	} else if (mpRowStage2) {
+		mpRowStage2->Process(dst, p, mAxis.dx_preclip + mAxis.dx_active + mAxis.dx_postclip);
+	} else {
+		// process pre-clip region
+		if (uint32 count = mAxis.dx_preclip) {
+			VDMemset128(p, src, mRowFiltW);
+			memcpy(p + 4*mRowFiltW, src+1, (mRowFiltW-1)*sizeof(uint32)*4);
+
+			mpRowStage->Process(dst, p, count, u + ((mRowFiltW-1)<<16), dudx);
+			u += dudx*count;
+			dst += count*4;
+		}
+
+		// process active region
+		if (uint32 count = mAxis.dx_active) {
+			mpRowStage->Process(dst, src, count, u, dudx);
+			u += dudx*count;
+			dst += count*4;
+		}
+
+		// process post-clip region
+		if (uint32 count = mAxis.dx_postclip) {
+			uint32 offset = mSrcWidth + 1 - mRowFiltW;
+
+			memcpy(p, src+offset*4, (mRowFiltW-1)*sizeof(uint32)*4);
+			VDMemset128(p + 4*(mRowFiltW-1), src + 4*(mSrcWidth-1), mRowFiltW);
+
+			mpRowStage->Process(dst, p, count, u - (offset<<16), dudx);
+			dst += count*4;
+		}
+	}
+
+	// process post-copy region
+	if (uint32 count = mAxis.dx_postcopy) {
+		VDMemset128(dst, src + 4*(mSrcWidth-1), count);
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VDPixmapGenResampleCol
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDPixmapGenResampleCol::VDPixmapGenResampleCol()
+	: mpColStage(NULL)
+{
+}
+
+VDPixmapGenResampleCol::~VDPixmapGenResampleCol() {
+	if (mpColStage)
+		delete mpColStage;
+}
+
+void VDPixmapGenResampleCol::Init(IVDPixmapGen *src, uint32 srcIndex, uint32 height, float offset, float step, nsVDPixmap::FilterMode filterMode, float filterFactor, bool interpolationOnly) {
+	InitSource(src, srcIndex);
+
+	sint32 dvdy = (sint32)(step * 65536.0);
+
+	mAxis.Init(dvdy);
+
+	// construct stages
+	double y_2fc = 1.0;
+	if (!interpolationOnly && step > 1.0f)
+		y_2fc = 1.0 / step;
+
+	struct SpanRoutine {
+		uint32 mType;
+		bool mbInterpOnly;
+		nsVDPixmap::FilterMode mFilterMode;
+		uint32 mCPUFlags;
+		IVDResamplerSeparableColStage *(*mpClassFactory)(double filterCutoff, float filterFactor);
+	};
+	
+	static const SpanRoutine kSpanRoutines[]={
+#if defined _M_IX86
+		// X86
+		{ kVDPixType_8,			false,	nsVDPixmap::kFilterLinear,		CPUF_SUPPORTS_SSE41,	ColFactoryLinear<VDResamplerSeparableTableColStage8SSE41> },
+		{ kVDPixType_8,			false,	nsVDPixmap::kFilterLinear,		CPUF_SUPPORTS_MMX,	ColFactoryLinear<VDResamplerSeparableTableColStage8MMX> },
+		{ kVDPixType_8888,		true,	nsVDPixmap::kFilterLinear,		CPUF_SUPPORTS_MMX,	ColFactory<VDResamplerSeparableLinearColStageMMX> },
+		{ kVDPixType_8888,		false,	nsVDPixmap::kFilterLinear,		CPUF_SUPPORTS_SSE2,	ColFactoryLinear<VDResamplerSeparableTableColStageSSE2> },
+		{ kVDPixType_8888,		false,	nsVDPixmap::kFilterLinear,		CPUF_SUPPORTS_MMX,	ColFactoryLinear<VDResamplerSeparableTableColStageMMX> },
+		{ kVDPixType_8,			false,	nsVDPixmap::kFilterCubic,		CPUF_SUPPORTS_SSE41,	ColFactoryCubic<VDResamplerSeparableTableColStage8SSE41> },
+		{ kVDPixType_8,			false,	nsVDPixmap::kFilterCubic,		CPUF_SUPPORTS_MMX,	ColFactoryCubic<VDResamplerSeparableTableColStage8MMX> },
+		{ kVDPixType_8888,		true,	nsVDPixmap::kFilterCubic,		CPUF_SUPPORTS_SSE2,	ColFactoryCubic2<VDResamplerSeparableCubicColStageSSE2> },
+		{ kVDPixType_8888,		true,	nsVDPixmap::kFilterCubic,		CPUF_SUPPORTS_MMX,	ColFactoryCubic2<VDResamplerSeparableCubicColStageMMX> },
+		{ kVDPixType_8888,		false,	nsVDPixmap::kFilterCubic,		CPUF_SUPPORTS_SSE2,	ColFactoryCubic<VDResamplerSeparableTableColStageSSE2> },
+		{ kVDPixType_8888,		false,	nsVDPixmap::kFilterCubic,		CPUF_SUPPORTS_MMX,	ColFactoryCubic<VDResamplerSeparableTableColStageMMX> },
+		{ kVDPixType_8,			false,	nsVDPixmap::kFilterLanczos3,	CPUF_SUPPORTS_SSE41,	ColFactoryLanczos3<VDResamplerSeparableTableColStage8SSE41> },
+		{ kVDPixType_8,			false,	nsVDPixmap::kFilterLanczos3,	CPUF_SUPPORTS_MMX,	ColFactoryLanczos3<VDResamplerSeparableTableColStage8MMX> },
+		{ kVDPixType_8888,		false,	nsVDPixmap::kFilterLanczos3,	CPUF_SUPPORTS_SSE2,	ColFactoryLanczos3<VDResamplerSeparableTableColStageSSE2> },
+		{ kVDPixType_8888,		false,	nsVDPixmap::kFilterLanczos3,	CPUF_SUPPORTS_MMX,	ColFactoryLanczos3<VDResamplerSeparableTableColStageMMX> },
+#elif defined _M_AMD64
+		// AMD64
+		{ kVDPixType_8888,		false,	nsVDPixmap::kFilterLinear,		CPUF_SUPPORTS_SSE2,	ColFactoryLinear<VDResamplerSeparableTableColStageSSE2> },
+		{ kVDPixType_8888,		false,	nsVDPixmap::kFilterCubic,		CPUF_SUPPORTS_SSE2,	ColFactoryCubic<VDResamplerSeparableTableColStageSSE2> },
+		{ kVDPixType_8888,		false,	nsVDPixmap::kFilterLanczos3,	CPUF_SUPPORTS_SSE2,	ColFactoryLanczos3<VDResamplerSeparableTableColStageSSE2> },
+#endif
+		// Generic
+		{ kVDPixType_8,			true,	nsVDPixmap::kFilterLinear,		0,					ColFactory<VDResamplerColStageSeparableLinear8> },
+		{ kVDPixType_8888,		true,	nsVDPixmap::kFilterLinear,		0,					ColFactory<VDResamplerColStageSeparableLinear32> },
+		{ kVDPixType_8,			false,	nsVDPixmap::kFilterLinear,		0,					ColFactoryLinear<VDResamplerColStageSeparableTable8> },
+		{ kVDPixType_8888,		false,	nsVDPixmap::kFilterLinear,		0,					ColFactoryLinear<VDResamplerColStageSeparableTable32> },
+		{ kVDPixType_32F_LE,	false,	nsVDPixmap::kFilterLinear,		0,					ColFactoryLinear<VDResamplerColStageSeparableTable32F> },
+		{ kVDPixType_32Fx4_LE,	false,	nsVDPixmap::kFilterLinear,		0,					ColFactoryLinear<VDResamplerColStageSeparableTable32Fx4> },
+		{ kVDPixType_8,			false,	nsVDPixmap::kFilterCubic,		0,					ColFactoryCubic<VDResamplerColStageSeparableTable8> },
+		{ kVDPixType_8888,		false,	nsVDPixmap::kFilterCubic,		0,					ColFactoryCubic<VDResamplerColStageSeparableTable32> },
+		{ kVDPixType_32F_LE,	false,	nsVDPixmap::kFilterCubic,		0,					ColFactoryCubic<VDResamplerColStageSeparableTable32F> },
+		{ kVDPixType_32Fx4_LE,	false,	nsVDPixmap::kFilterCubic,		0,					ColFactoryCubic<VDResamplerColStageSeparableTable32Fx4> },
+		{ kVDPixType_8,			false,	nsVDPixmap::kFilterLanczos3,	0,					ColFactoryLanczos3<VDResamplerColStageSeparableTable8> },
+		{ kVDPixType_8888,		false,	nsVDPixmap::kFilterLanczos3,	0,					ColFactoryLanczos3<VDResamplerColStageSeparableTable32> },
+		{ kVDPixType_32F_LE,	false,	nsVDPixmap::kFilterLanczos3,	0,					ColFactoryLanczos3<VDResamplerColStageSeparableTable32F> },
+		{ kVDPixType_32Fx4_LE,	false,	nsVDPixmap::kFilterLanczos3,	0,					ColFactoryLanczos3<VDResamplerColStageSeparableTable32Fx4> },
+	};
+
+	long flags = CPUGetEnabledExtensions();
+	uint32 type = src->GetType(srcIndex) & kVDPixType_Mask;
+	for(int i=0; i<sizeof(kSpanRoutines)/sizeof(kSpanRoutines[0]); ++i) {
+		const SpanRoutine& rout = kSpanRoutines[i];
+
+		if (rout.mType != type)
+			continue;
+
+		if (rout.mbInterpOnly && y_2fc < 1.0)
+			continue;
+
+		if (rout.mFilterMode != filterMode)
+			continue;
+
+		if ((rout.mCPUFlags & flags) != rout.mCPUFlags)
+			continue;
+
+		mpColStage = rout.mpClassFactory(y_2fc, filterFactor);
+		break;
+	}
+
+	mWinSize = mpColStage ? mpColStage->GetWindowSize() : 1;
+	mWindow.resize(mWinSize);
+
+	int delta = (mWinSize + 1) >> 1;
+	mpSrc->AddWindowRequest(-delta, delta);
+
+	sint32 fsy1 = (sint32)(offset * 65536.0) - ((mWinSize-1)<<15);
+	mAxis.Compute(height, fsy1, mSrcHeight, mWinSize);
+	mHeight = height;
+
+	switch(type) {
+		case kVDPixType_8:
+			mBytesPerSample = 1;
+			break;
+		case kVDPixType_8888:
+		case kVDPixType_32F_LE:
+			mBytesPerSample = 4;
+			break;
+		case kVDPixType_32Fx4_LE:
+			mBytesPerSample = 16;
+			break;
+
+		default:
+			VDASSERT(false);
+	}
+}
+
+void VDPixmapGenResampleCol::Start() {
+	mBytesPerRow = mWidth * mBytesPerSample;
+	StartWindow(mBytesPerRow);
+}
+
+void VDPixmapGenResampleCol::Compute(void *dst0, sint32 y) {
+	const uint32 winsize = mWinSize;
+	const uint32 dx = mSrcWidth;
+
+	y -= (sint32)mAxis.dx_precopy;
+
+	if (y < 0) {
+		const void *srcrow0 = mpSrc->GetRow(0, mSrcIndex);
+		memcpy(dst0, srcrow0, mBytesPerRow);
+		return;
+	}
+
+	uint32 midrange = mAxis.dx_preclip + mAxis.dx_active + mAxis.dx_postclip + mAxis.dx_dualclip;
+
+	if (y < (sint32)midrange) {
+		sint32 v = mAxis.u + mAxis.dudx * y;
+
+		if (mpColStage) {
+			for(uint32 i=0; i<winsize; ++i) {
+				int sy = (v >> 16) + i;
+
+				if ((unsigned)sy >= (unsigned)mSrcHeight)
+					sy = (~sy >> 31) & (mSrcHeight - 1);
+
+				mWindow[i] = mpSrc->GetRow(sy, mSrcIndex);
+			}
+
+			mpColStage->Process(dst0, mWindow.data(), dx, v);
+		} else
+			memcpy(dst0, mpSrc->GetRow(v >> 16, mSrcIndex), mBytesPerRow);
+		return;
+	}
+
+	const void *p = mpSrc->GetRow(mSrcHeight - 1, mSrcIndex);
+
+	memcpy(dst0, p, mBytesPerRow);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample_special.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample_special.cpp
new file mode 100644
index 000000000..0c649dd5c
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample_special.cpp
@@ -0,0 +1,186 @@
+#include "uberblit_resample_special.h"
+#include "blt_spanutils.h"
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGenResampleRow_d2_p0_lin_u8::Init(IVDPixmapGen *src, uint32 srcIndex) {
+	InitSource(src, srcIndex);
+	src->AddWindowRequest(0, 0);
+
+	mWidth = (mSrcWidth + 1) >> 1;
+}
+
+void VDPixmapGenResampleRow_d2_p0_lin_u8::Start() {
+	mpSrc->Start();
+	StartWindow(mWidth);
+}
+
+void VDPixmapGenResampleRow_d2_p0_lin_u8::Compute(void *dst0, sint32 y) {
+	const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+	nsVDPixmapSpanUtils::horiz_compress2x_coaligned((uint8 *)dst0, src, mSrcWidth);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGenResampleRow_d4_p0_lin_u8::Init(IVDPixmapGen *src, uint32 srcIndex) {
+	InitSource(src, srcIndex);
+	src->AddWindowRequest(0, 0);
+
+	mWidth = (mSrcWidth + 3) >> 2;
+}
+
+void VDPixmapGenResampleRow_d4_p0_lin_u8::Start() {
+	mpSrc->Start();
+	StartWindow(mWidth);
+}
+
+void VDPixmapGenResampleRow_d4_p0_lin_u8::Compute(void *dst0, sint32 y) {
+	const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+	nsVDPixmapSpanUtils::horiz_compress4x_coaligned((uint8 *)dst0, src, mSrcWidth);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGenResampleRow_x2_p0_lin_u8::Init(IVDPixmapGen *src, uint32 srcIndex) {
+	InitSource(src, srcIndex);
+	src->AddWindowRequest(0, 0);
+
+	mWidth = mSrcWidth * 2;
+}
+
+void VDPixmapGenResampleRow_x2_p0_lin_u8::Start() {
+	mpSrc->Start();
+	StartWindow(mWidth);
+}
+
+void VDPixmapGenResampleRow_x2_p0_lin_u8::Compute(void *dst0, sint32 y) {
+	const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+	nsVDPixmapSpanUtils::horiz_expand2x_coaligned((uint8 *)dst0, src, mWidth);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGenResampleRow_x4_p0_lin_u8::Init(IVDPixmapGen *src, uint32 srcIndex) {
+	InitSource(src, srcIndex);
+	src->AddWindowRequest(0, 0);
+
+	mWidth = mSrcWidth * 4;
+}
+
+void VDPixmapGenResampleRow_x4_p0_lin_u8::Start() {
+	mpSrc->Start();
+	StartWindow(mWidth);
+}
+
+void VDPixmapGenResampleRow_x4_p0_lin_u8::Compute(void *dst0, sint32 y) {
+	const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+	nsVDPixmapSpanUtils::horiz_expand4x_coaligned((uint8 *)dst0, src, mWidth);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGenResampleCol_x2_phalf_lin_u8::Init(IVDPixmapGen *src, uint32 srcIndex) {
+	InitSource(src, srcIndex);
+	src->AddWindowRequest(-2, 2);
+
+	mHeight = (mSrcHeight + 1) >> 1;
+}
+
+void VDPixmapGenResampleCol_x2_phalf_lin_u8::Start() {
+	mpSrc->Start();
+	StartWindow(mWidth);
+}
+
+void VDPixmapGenResampleCol_x2_phalf_lin_u8::Compute(void *dst0, sint32 y) {
+	sint32 y2 = y+y;
+	const uint8 *src[4] = {
+		(const uint8 *)mpSrc->GetRow(y2 > 0 ? y2-1 : 0, mSrcIndex),
+		(const uint8 *)mpSrc->GetRow(y2  , mSrcIndex),
+		(const uint8 *)mpSrc->GetRow(y2+1, mSrcIndex),
+		(const uint8 *)mpSrc->GetRow(y2+2, mSrcIndex)
+	};
+
+	nsVDPixmapSpanUtils::vert_compress2x_centered((uint8 *)dst0, src, mWidth, 0);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGenResampleCol_x4_p1half_lin_u8::Init(IVDPixmapGen *src, uint32 srcIndex) {
+	InitSource(src, srcIndex);
+	src->AddWindowRequest(-4, 4);
+
+	mHeight = (mSrcHeight + 2) >> 2;
+}
+
+void VDPixmapGenResampleCol_x4_p1half_lin_u8::Start() {
+	mpSrc->Start();
+	StartWindow(mWidth);
+}
+
+void VDPixmapGenResampleCol_x4_p1half_lin_u8::Compute(void *dst0, sint32 y) {
+	sint32 y4 = y*4;
+	const uint8 *src[8] = {
+		(const uint8 *)mpSrc->GetRow(y4 > 2 ? y4-2 : 0, mSrcIndex),
+		(const uint8 *)mpSrc->GetRow(y4 > 1 ? y4-1 : 0, mSrcIndex),
+		(const uint8 *)mpSrc->GetRow(y4  , mSrcIndex),
+		(const uint8 *)mpSrc->GetRow(y4+1, mSrcIndex),
+		(const uint8 *)mpSrc->GetRow(y4+2, mSrcIndex),
+		(const uint8 *)mpSrc->GetRow(y4+3, mSrcIndex),
+		(const uint8 *)mpSrc->GetRow(y4+4, mSrcIndex),
+		(const uint8 *)mpSrc->GetRow(y4+5, mSrcIndex)
+	};
+
+	nsVDPixmapSpanUtils::vert_compress4x_centered((uint8 *)dst0, src, mWidth, 0);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGenResampleCol_d2_pnqrtr_lin_u8::Init(IVDPixmapGen *src, uint32 srcIndex) {
+	InitSource(src, srcIndex);
+	src->AddWindowRequest(-1, 1);
+
+	mHeight = mSrcHeight * 2;
+}
+
+void VDPixmapGenResampleCol_d2_pnqrtr_lin_u8::Start() {
+	mpSrc->Start();
+	StartWindow(mWidth);
+}
+
+void VDPixmapGenResampleCol_d2_pnqrtr_lin_u8::Compute(void *dst0, sint32 y) {
+	sint32 y2 = (y - 1) >> 1;
+	const uint8 *src[2] = {
+		(const uint8 *)mpSrc->GetRow(y2, mSrcIndex),
+		(const uint8 *)mpSrc->GetRow(y2+1, mSrcIndex),
+	};
+
+	nsVDPixmapSpanUtils::vert_expand2x_centered((uint8 *)dst0, src, mWidth, ~y << 7);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGenResampleCol_d4_pn38_lin_u8::Init(IVDPixmapGen *src, uint32 srcIndex) {
+	InitSource(src, srcIndex);
+	src->AddWindowRequest(-1, 1);
+
+	mHeight = mSrcHeight * 4;
+}
+
+void VDPixmapGenResampleCol_d4_pn38_lin_u8::Start() {
+	mpSrc->Start();
+	StartWindow(mWidth);
+}
+
+void VDPixmapGenResampleCol_d4_pn38_lin_u8::Compute(void *dst0, sint32 y) {
+	sint32 y2 = (y - 2) >> 2;
+	const uint8 *src[2] = {
+		(const uint8 *)mpSrc->GetRow(y2, mSrcIndex),
+		(const uint8 *)mpSrc->GetRow(y2+1, mSrcIndex),
+	};
+
+	nsVDPixmapSpanUtils::vert_expand4x_centered((uint8 *)dst0, src, mWidth, (y - 2) << 6);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample_special_x86.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample_special_x86.cpp
new file mode 100644
index 000000000..b1828fcca
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample_special_x86.cpp
@@ -0,0 +1,35 @@
+#include "uberblit_resample_special_x86.h"
+#include "blt_spanutils.h"
+#include "blt_spanutils_x86.h"
+
+void VDPixmapGenResampleRow_x2_p0_lin_u8_ISSE::Compute(void *dst0, sint32 y) {
+	const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+	nsVDPixmapSpanUtils::horiz_expand2x_coaligned_ISSE((uint8 *)dst0, src, mWidth);
+}
+
+void VDPixmapGenResampleRow_x4_p0_lin_u8_MMX::Compute(void *dst0, sint32 y) {
+	const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+	nsVDPixmapSpanUtils::horiz_expand4x_coaligned_MMX((uint8 *)dst0, src, mWidth);
+}
+
+void VDPixmapGenResampleCol_d2_pnqrtr_lin_u8_ISSE::Compute(void *dst0, sint32 y) {
+	sint32 y2 = (y - 1) >> 1;
+	const uint8 *src[2] = {
+		(const uint8 *)mpSrc->GetRow(y2, mSrcIndex),
+		(const uint8 *)mpSrc->GetRow(y2+1, mSrcIndex),
+	};
+
+	nsVDPixmapSpanUtils::vert_expand2x_centered_ISSE((uint8 *)dst0, src, mWidth, ~y << 7);
+}
+
+void VDPixmapGenResampleCol_d4_pn38_lin_u8_ISSE::Compute(void *dst0, sint32 y) {
+	sint32 y2 = (y - 2) >> 2;
+	const uint8 *src[2] = {
+		(const uint8 *)mpSrc->GetRow(y2, mSrcIndex),
+		(const uint8 *)mpSrc->GetRow(y2+1, mSrcIndex),
+	};
+
+	nsVDPixmapSpanUtils::vert_expand4x_centered_ISSE((uint8 *)dst0, src, mWidth, (y - 2) << 6);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_swizzle.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_swizzle.cpp
new file mode 100644
index 000000000..4cb5e4409
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_swizzle.cpp
@@ -0,0 +1,89 @@
+#include "uberblit_swizzle.h"
+
+void VDPixmapGen_Swap8In16::Init(IVDPixmapGen *gen, int srcIndex, uint32 w, uint32 h, uint32 bpr) {
+	InitSource(gen, srcIndex);
+	mRowLength = bpr;
+	SetOutputSize(w, h);
+	gen->AddWindowRequest(0, 0);
+}
+
+void VDPixmapGen_Swap8In16::Start() {
+	StartWindow(mRowLength);
+}
+
+uint32 VDPixmapGen_Swap8In16::GetType(uint32 index) const {
+	return mpSrc->GetType(mSrcIndex);
+}
+
+void VDPixmapGen_Swap8In16::Compute(void *dst0, sint32 y) {
+	const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+	uint8 *dst = (uint8 *)dst0;
+	sint32 w = mRowLength;
+
+	uint32 n4 = w >> 2;
+
+	for(uint32 i=0; i<n4; ++i) {
+		uint32 p = *(uint32 *)src;
+		src += 4;
+
+		uint32 r = ((p & 0xff00ff00) >> 8) + ((p & 0x00ff00ff) << 8);
+
+		*(uint32 *)dst = r;
+		dst += 4;
+	}
+
+	if (w & 2) {
+		dst[0] = src[1];
+		dst[1] = src[0];
+		dst += 2;
+		src += 2;
+	}
+
+	if (w & 1) {
+		*dst = *src;
+	}
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGen_B8x2_To_B8R8::Init(IVDPixmapGen *srcCb, uint32 srcindexCb, IVDPixmapGen *srcCr, uint32 srcindexCr) {
+	mpSrcCb = srcCb;
+	mSrcIndexCb = srcindexCb;
+	mpSrcCr = srcCr;
+	mSrcIndexCr = srcindexCr;
+	mWidth = srcCb->GetWidth(srcindexCb);
+	mHeight = srcCb->GetHeight(srcindexCb);
+
+	srcCb->AddWindowRequest(0, 0);
+	srcCr->AddWindowRequest(0, 0);
+}
+
+void VDPixmapGen_B8x2_To_B8R8::Start() {
+	mpSrcCb->Start();
+	mpSrcCr->Start();
+
+	StartWindow(mWidth * 2);
+}
+
+uint32 VDPixmapGen_B8x2_To_B8R8::GetType(uint32 output) const {
+	return (mpSrcCb->GetType(mSrcIndexCb) & ~kVDPixType_Mask) | kVDPixType_B8R8;
+}
+
+void VDPixmapGen_B8x2_To_B8R8::Compute(void *dst0, sint32 y) {
+	uint8 *VDRESTRICT dst = (uint8 *)dst0;
+	const uint8 *VDRESTRICT srcCb = (const uint8 *)mpSrcCb->GetRow(y, mSrcIndexCb);
+	const uint8 *VDRESTRICT srcCr = (const uint8 *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+	sint32 w = mWidth;
+	for(sint32 x=0; x<w; ++x) {
+		uint8 cb = srcCb[0];
+		uint8 cr = srcCr[0];
+
+		dst[0] = cb;
+		dst[1] = cr;
+
+		++srcCb;
+		++srcCr;
+		dst += 2;
+	}
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_swizzle_x86.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_swizzle_x86.cpp
new file mode 100644
index 000000000..3a87d5a68
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_swizzle_x86.cpp
@@ -0,0 +1,400 @@
+#include "uberblit_swizzle_x86.h"
+
+#ifdef VD_COMPILER_MSVC
+	#pragma warning(disable: 4799)	// warning C4799: function 'vdasm_extract_8in16_even_MMX' has no EMMS instruction
+#endif
+
+void __declspec(naked) __fastcall vdasm_extract_8in16_even_MMX(void *dst, const void *src, uint32 count) {
+	__asm {
+		mov			eax, [esp+4]
+		pcmpeqb		mm2, mm2
+		psrlw		mm2, 8
+		sub			eax, 8
+		jc			xtra
+xloop:
+		movq		mm0, [edx]
+		movq		mm1, [edx+8]
+		pand		mm0, mm2
+		pand		mm1, mm2
+		packuswb	mm0, mm1
+		add			edx, 16
+		movq		[ecx], mm0
+		add			ecx, 8
+		sub			eax, 8
+		jns			xloop
+xtra:
+		add			eax, 8
+		jz			fin
+		push		ebx
+xtraloop:
+		mov			bl, [edx]
+		add			edx, 2
+		mov			[ecx], bl
+		add			ecx, 1
+		sub			eax, 1
+		jnz			xtraloop
+
+		pop			ebx
+fin:
+		ret			4
+	}
+}
+
+void __declspec(naked) __fastcall vdasm_extract_8in16_odd_MMX(void *dst, const void *src, uint32 count) {
+	__asm {
+		mov			eax, [esp+4]
+		sub			eax, 8
+		jc			xtra
+xloop:
+		movq		mm0, [edx]
+		movq		mm1, [edx+8]
+		psrlw		mm0, 8
+		psrlw		mm1, 8
+		add			edx, 16
+		packuswb	mm0, mm1
+		movq		[ecx], mm0
+		add			ecx, 8
+		sub			eax, 8
+		jns			xloop
+xtra:
+		add			eax, 8
+		jz			fin
+		push		ebx
+xtraloop:
+		mov			bl, [edx+1]
+		add			edx, 2
+		mov			[ecx], bl
+		add			ecx, 1
+		sub			eax, 1
+		jnz			xtraloop
+
+		pop			ebx
+fin:
+		ret			4
+	}
+}
+
+void __declspec(naked) __fastcall vdasm_extract_8in32_MMX(void *dst, const void *src, uint32 count, int byteshift) {
+	__asm {
+		movd		mm4, [esp+8]
+		pcmpeqb		mm5, mm5
+		pslld		mm4, 3
+		mov			eax, [esp+4]
+		psrld		mm5, 24
+		sub			eax, 8
+		jc			xtra
+xloop:
+		movq		mm0, [edx]
+		movq		mm1, [edx+8]
+		psrld		mm0, mm4
+		movq		mm2, [edx+16]
+		psrld		mm1, mm4
+		pand		mm0, mm5
+		movq		mm3, [edx+24]
+		psrld		mm2, mm4
+		pand		mm1, mm5
+		packssdw	mm0, mm1
+		psrld		mm3, mm4
+		pand		mm2, mm5
+		pand		mm3, mm5
+		add			edx, 32
+		packssdw	mm2, mm3
+		packuswb	mm0, mm2
+		movq		[ecx], mm0
+		add			ecx, 8
+		sub			eax, 8
+		jns			xloop
+xtra:
+		add			eax, 8
+		jz			fin
+		add			edx, dword ptr [esp+8]
+		push		ebx
+xtraloop:
+		mov			bl, [edx]
+		add			edx, 4
+		mov			[ecx], bl
+		add			ecx, 1
+		sub			eax, 1
+		jnz			xtraloop
+
+		pop			ebx
+fin:
+		ret			8
+	}
+}
+
+void __declspec(naked) __fastcall vdasm_swap_8in16_MMX(void *dst, const void *src, uint32 count) {
+	__asm {
+		mov			eax, [esp+4]
+		sub			eax, 8
+		js			xtra
+xloop:
+		movq		mm0, [edx]
+		add			edx, 8
+		movq		mm1, mm0
+		psllw		mm0, 8
+		psrlw		mm1, 8
+		paddb		mm0, mm1
+		movq		[ecx], mm0
+		add			ecx, 8
+		sub			eax, 8
+		jns			xloop
+xtra:
+		add			eax, 6
+		js			nopairs
+		push		ebx
+pairloop:
+		mov			bl, [edx]
+		mov			bh, [edx+1]
+		add			edx, 2
+		mov			[ecx], bh
+		mov			[ecx+1], bl
+		add			ecx, 2
+		sub			eax, 2
+		jns			pairloop
+		pop			ebx
+nopairs:
+		add			eax, 2
+		jz			noodd
+		mov			al, [edx]
+		mov			[ecx], al
+noodd:
+		ret			4
+	}
+}
+
+void __declspec(naked) __fastcall vdasm_interleave_BGRG_MMX(void *dst, const void *srcR, const void *srcG, const void *srcB, uint32 count) {
+	__asm {
+		push		edi
+		push		esi
+		push		ebx
+		mov			esi, [esp+12+12]
+		mov			edi, [esp+8+12]
+		mov			ebx, [esp+4+12]
+		sub			esi, 4
+		jc			xtra
+		; ecx = dst
+		; edx = srcR
+		; ebx = srcG
+		; edi = srcB
+xloop:
+		movd		mm0, [edi]
+		movd		mm1, [edx]
+		punpcklbw	mm0, mm1
+		movq		mm1, [ebx]
+		movq		mm2, mm0
+		punpcklbw	mm0, mm1
+		add			edx, 4
+		punpckhbw	mm2, mm1
+		add			edi, 4
+		movq		[ecx], mm0
+		add			ebx, 8
+		movq		[ecx+8], mm2
+		add			ecx, 16
+		sub			esi, 4
+		jns			xloop
+xtra:
+		add			esi, 4
+		jz			fin
+xtraloop:
+		mov			al, [edi]
+		mov			[ecx], al
+		mov			al, [ebx]
+		mov			[ecx+1], al
+		mov			al, [edx]
+		mov			[ecx+2], al
+		mov			al, [ebx+1]
+		mov			[ecx+3], al
+		add			ebx, 2
+		add			edx, 1
+		add			edi, 1
+		add			ecx, 4
+		sub			esi, 1
+		jnz			xtraloop
+fin:
+		pop			ebx
+		pop			esi
+		pop			edi
+		ret			12
+	}
+}
+
+void __declspec(naked) __fastcall vdasm_interleave_GBGR_MMX(void *dst, const void *srcR, const void *srcG, const void *srcB, uint32 count) {
+	__asm {
+		push		edi
+		push		esi
+		push		ebx
+		mov			esi, [esp+12+12]
+		mov			edi, [esp+8+12]
+		mov			ebx, [esp+4+12]
+		sub			esi, 4
+		jc			xtra
+		; ecx = dst
+		; edx = srcR
+		; ebx = srcG
+		; edi = srcB
+xloop:
+		movd		mm0, [edi]
+		movd		mm1, [edx]
+		punpcklbw	mm0, mm1
+		movq		mm2, [ebx]
+		movq		mm1, mm2
+		punpcklbw	mm2, mm0
+		add			edx, 4
+		punpckhbw	mm1, mm0
+		add			edi, 4
+		movq		[ecx], mm2
+		add			ebx, 8
+		movq		[ecx+8], mm1
+		add			ecx, 16
+		sub			esi, 4
+		jns			xloop
+xtra:
+		add			esi, 4
+		jz			fin
+xtraloop:
+		mov			al, [ebx]
+		mov			[ecx], al
+		mov			al, [edi]
+		mov			[ecx+1], al
+		mov			al, [ebx+1]
+		mov			[ecx+2], al
+		mov			al, [edx]
+		mov			[ecx+3], al
+		add			ebx, 2
+		add			edx, 1
+		add			edi, 1
+		add			ecx, 4
+		sub			esi, 1
+		jnz			xtraloop
+fin:
+		pop			ebx
+		pop			esi
+		pop			edi
+		ret			12
+	}
+}
+
+void __declspec(naked) __fastcall vdasm_interleave_BR_MMX(void *dst, const void *srcB, const void *srcR, uint32 count) {
+	__asm {
+		push		edi
+		push		esi
+		push		ebx
+		mov			esi, [esp+8+12]
+		mov			ebx, [esp+4+12]
+		sub			esi, 8
+		jc			xtra
+		; ecx = dst
+		; edx = srcB
+		; ebx = srcG
+xloop:
+		movq		mm0, [edx]
+		movq		mm1, [ebx]
+		movq		mm2, mm0
+		punpcklbw	mm0, mm1
+		punpckhbw	mm2, mm1
+		add			edx, 8
+		movq		[ecx], mm0
+		add			ebx, 8
+		movq		[ecx+8], mm2
+		add			ecx, 16
+		sub			esi, 8
+		jns			xloop
+xtra:
+		add			esi, 8
+		jz			fin
+xtraloop:
+		mov			al, [edx]
+		mov			[ecx], al
+		mov			al, [ebx]
+		mov			[ecx+1], al
+		add			ebx, 1
+		add			edx, 1
+		add			ecx, 2
+		sub			esi, 1
+		jnz			xtraloop
+fin:
+		pop			ebx
+		pop			esi
+		pop			edi
+		ret			8
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGen_8In16_Even_MMX::Compute(void *dst, sint32 y) {
+	const uint8 *srcp = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+	vdasm_extract_8in16_even_MMX(dst, srcp, mWidth);
+}
+
+void VDPixmapGen_8In16_Odd_MMX::Compute(void *dst, sint32 y) {
+	const uint8 *srcp = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+	vdasm_extract_8in16_odd_MMX(dst, srcp, mWidth);
+}
+
+void VDPixmapGen_8In32_MMX::Compute(void *dst, sint32 y) {
+	const uint8 *srcp = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+	vdasm_extract_8in32_MMX(dst, srcp, mWidth, mOffset);
+}
+
+void VDPixmapGen_Swap8In16_MMX::Compute(void *dst, sint32 y) {
+	const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+	vdasm_swap_8in16_MMX(dst, src, mRowLength);
+}
+
+void VDPixmapGen_B8x2_To_B8R8_MMX::Compute(void *dst0, sint32 y) {
+	uint8 *VDRESTRICT dst = (uint8 *VDRESTRICT)dst0;
+	const uint8 *VDRESTRICT srcCb = (const uint8 *VDRESTRICT)mpSrcCb->GetRow(y, mSrcIndexCb);
+	const uint8 *VDRESTRICT srcCr = (const uint8 *VDRESTRICT)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+	vdasm_interleave_BR_MMX(dst, srcCb, srcCr, mWidth);
+}
+
+void VDPixmapGen_B8x3_To_G8B8_G8R8_MMX::Compute(void *VDRESTRICT dst0, sint32 y) {
+	uint8 *VDRESTRICT dst = (uint8 *VDRESTRICT)dst0;
+	const uint8 *VDRESTRICT srcY = (const uint8 *VDRESTRICT)mpSrcY->GetRow(y, mSrcIndexY);
+	const uint8 *VDRESTRICT srcCb = (const uint8 *VDRESTRICT)mpSrcCb->GetRow(y, mSrcIndexCb);
+	const uint8 *VDRESTRICT srcCr = (const uint8 *VDRESTRICT)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+	vdasm_interleave_GBGR_MMX(dst, srcCr, srcY, srcCb, mWidth >> 1);
+
+	if (mWidth & 1) {
+		int w2 = mWidth >> 1;
+		srcY += mWidth;
+		srcCb += w2;
+		srcCr += w2;
+		dst += mWidth * 2;
+
+		dst[-2] = srcY[-1];
+		dst[-1] = srcCb[0];
+		dst[ 0] = 0;			// must be zero for QuickTime compatibility
+		dst[ 1] = srcCr[0];
+	}
+}
+
+void VDPixmapGen_B8x3_To_B8G8_R8G8_MMX::Compute(void *VDRESTRICT dst0, sint32 y) {
+	uint8 *VDRESTRICT dst = (uint8 *VDRESTRICT)dst0;
+	const uint8 *VDRESTRICT srcY = (const uint8 * VDRESTRICT)mpSrcY->GetRow(y, mSrcIndexY);
+	const uint8 *VDRESTRICT srcCb = (const uint8 * VDRESTRICT)mpSrcCb->GetRow(y, mSrcIndexCb);
+	const uint8 *VDRESTRICT srcCr = (const uint8 * VDRESTRICT)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+	vdasm_interleave_BGRG_MMX(dst, srcCr, srcY, srcCb, mWidth >> 1);
+
+	if (mWidth & 1) {
+		int w2 = mWidth >> 1;
+		srcY += mWidth;
+		srcCb += w2;
+		srcCr += w2;
+		dst += mWidth * 2;
+
+		dst[-2] = srcCb[0];
+		dst[-1] = srcY[-1];
+		dst[ 0] = srcCr[0];
+		dst[ 1] = 0;			// must be zero for QuickTime compatibility
+	}
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_v210.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_v210.cpp
new file mode 100644
index 000000000..78793f477
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_v210.cpp
@@ -0,0 +1,199 @@
+#include <vd2/system/halffloat.h>
+#include <vd2/system/math.h>
+#include "uberblit_v210.h"
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGen_32F_To_V210::Compute(void *dst0, sint32 y) {
+	uint32 *dst = (uint32 *)dst0;
+	const float *srcR = (const float *)mpSrcR->GetRow(y, mSrcIndexR);
+	const float *srcG = (const float *)mpSrcG->GetRow(y, mSrcIndexG);
+	const float *srcB = (const float *)mpSrcB->GetRow(y, mSrcIndexB);
+
+	VDCPUCleanupExtensions();
+
+	int w6 = mWidth / 6;
+	for(sint32 i=0; i<w6; ++i) {
+		float r0 = srcR[0];
+		float r1 = srcR[1];
+		float r2 = srcR[2];
+		srcR += 3;
+
+		float b0 = srcB[0];
+		float b1 = srcB[1];
+		float b2 = srcB[2];
+		srcB += 3;
+
+		float g0 = srcG[0];
+		float g1 = srcG[1];
+		float g2 = srcG[2];
+		float g3 = srcG[3];
+		float g4 = srcG[4];
+		float g5 = srcG[5];
+		srcG += 6;
+
+		if (r0 < 0.0f) r0 = 0.0f; else if (r0 > 1.0f) r0 = 1.0f;
+		if (r1 < 0.0f) r1 = 0.0f; else if (r1 > 1.0f) r1 = 1.0f;
+		if (r2 < 0.0f) r2 = 0.0f; else if (r2 > 1.0f) r2 = 1.0f;
+		if (g0 < 0.0f) g0 = 0.0f; else if (g0 > 1.0f) g0 = 1.0f;
+		if (g1 < 0.0f) g1 = 0.0f; else if (g1 > 1.0f) g1 = 1.0f;
+		if (g2 < 0.0f) g2 = 0.0f; else if (g2 > 1.0f) g2 = 1.0f;
+		if (g3 < 0.0f) g3 = 0.0f; else if (g3 > 1.0f) g3 = 1.0f;
+		if (g4 < 0.0f) g4 = 0.0f; else if (g4 > 1.0f) g4 = 1.0f;
+		if (g5 < 0.0f) g5 = 0.0f; else if (g5 > 1.0f) g5 = 1.0f;
+		if (b0 < 0.0f) b0 = 0.0f; else if (b0 > 1.0f) b0 = 1.0f;
+		if (b1 < 0.0f) b1 = 0.0f; else if (b1 > 1.0f) b1 = 1.0f;
+		if (b2 < 0.0f) b2 = 0.0f; else if (b2 > 1.0f) b2 = 1.0f;
+
+		uint32 ir0 = (uint32)VDRoundToIntFast(r0 * 1024.0f);
+		uint32 ir1 = (uint32)VDRoundToIntFast(r1 * 1024.0f);
+		uint32 ir2 = (uint32)VDRoundToIntFast(r2 * 1024.0f);
+		uint32 ib0 = (uint32)VDRoundToIntFast(b0 * 1024.0f);
+		uint32 ib1 = (uint32)VDRoundToIntFast(b1 * 1024.0f);
+		uint32 ib2 = (uint32)VDRoundToIntFast(b2 * 1024.0f);
+		uint32 ig0 = (uint32)VDRoundToIntFast(g0 * 1024.0f);
+		uint32 ig1 = (uint32)VDRoundToIntFast(g1 * 1024.0f);
+		uint32 ig2 = (uint32)VDRoundToIntFast(g2 * 1024.0f);
+		uint32 ig3 = (uint32)VDRoundToIntFast(g3 * 1024.0f);
+		uint32 ig4 = (uint32)VDRoundToIntFast(g4 * 1024.0f);
+		uint32 ig5 = (uint32)VDRoundToIntFast(g5 * 1024.0f);
+
+		// dword 0: XX Cr0 Y0 Cb0
+		// dword 1: XX Y2 Cb1 Y1
+		// dword 2: XX Cb2 Y3 Cr1
+		// dword 3: XX Y5 Cr2 Y4
+		dst[0] = (ir0 << 20) + (ig0 << 10) + ib0;
+		dst[1] = (ig2 << 20) + (ib1 << 10) + ig1;
+		dst[2] = (ib2 << 20) + (ig3 << 10) + ir1;
+		dst[3] = (ig5 << 20) + (ir2 << 10) + ig4;
+
+		dst += 4;
+	}
+
+	int leftovers = mWidth - w6*6;
+	if (leftovers) {
+		float g0 = 0;
+		float g1 = 0;
+		float g2 = 0;
+		float g3 = 0;
+		float g4 = 0;
+		float r0 = 0;
+		float r1 = 0;
+		float r2 = 0;
+		float b0 = 0;
+		float b1 = 0;
+		float b2 = 0;
+
+		switch(leftovers) {
+			case 5:	r2 = srcR[2];
+					b2 = srcB[2];
+					g4 = srcG[4];
+			case 4:	g3 = srcG[3];
+			case 3:	r1 = srcR[1];
+					b1 = srcB[1];
+					g2 = srcG[2];
+			case 2:	g1 = srcG[1];
+			case 1:	r0 = srcR[0];
+					b0 = srcB[0];
+					g0 = srcG[0];
+		}
+
+		if (r0 < 0.0f) r0 = 0.0f; else if (r0 > 1.0f) r0 = 1.0f;
+		if (r1 < 0.0f) r1 = 0.0f; else if (r1 > 1.0f) r1 = 1.0f;
+		if (r2 < 0.0f) r2 = 0.0f; else if (r2 > 1.0f) r2 = 1.0f;
+		if (g0 < 0.0f) g0 = 0.0f; else if (g0 > 1.0f) g0 = 1.0f;
+		if (g1 < 0.0f) g1 = 0.0f; else if (g1 > 1.0f) g1 = 1.0f;
+		if (g2 < 0.0f) g2 = 0.0f; else if (g2 > 1.0f) g2 = 1.0f;
+		if (g3 < 0.0f) g3 = 0.0f; else if (g3 > 1.0f) g3 = 1.0f;
+		if (g4 < 0.0f) g4 = 0.0f; else if (g4 > 1.0f) g4 = 1.0f;
+		if (b0 < 0.0f) b0 = 0.0f; else if (b0 > 1.0f) b0 = 1.0f;
+		if (b1 < 0.0f) b1 = 0.0f; else if (b1 > 1.0f) b1 = 1.0f;
+		if (b2 < 0.0f) b2 = 0.0f; else if (b2 > 1.0f) b2 = 1.0f;
+
+		uint32 ir0 = (uint32)VDRoundToIntFast(r0 * 1024.0f);
+		uint32 ir1 = (uint32)VDRoundToIntFast(r1 * 1024.0f);
+		uint32 ir2 = (uint32)VDRoundToIntFast(r2 * 1024.0f);
+		uint32 ib0 = (uint32)VDRoundToIntFast(b0 * 1024.0f);
+		uint32 ib1 = (uint32)VDRoundToIntFast(b1 * 1024.0f);
+		uint32 ib2 = (uint32)VDRoundToIntFast(b2 * 1024.0f);
+		uint32 ig0 = (uint32)VDRoundToIntFast(g0 * 1024.0f);
+		uint32 ig1 = (uint32)VDRoundToIntFast(g1 * 1024.0f);
+		uint32 ig2 = (uint32)VDRoundToIntFast(g2 * 1024.0f);
+		uint32 ig3 = (uint32)VDRoundToIntFast(g3 * 1024.0f);
+		uint32 ig4 = (uint32)VDRoundToIntFast(g4 * 1024.0f);
+
+		// dword 0: XX Cr0 Y0 Cb0
+		// dword 1: XX Y2 Cb1 Y1
+		// dword 2: XX Cb2 Y3 Cr1
+		// dword 3: XX Y5 Cr2 Y4
+		dst[0] = (ir0 << 20) + (ig0 << 10) + ib0;
+		dst[1] = (ig2 << 20) + (ib1 << 10) + ig1;
+		dst[2] = (ib2 << 20) + (ig3 << 10) + ir1;
+		dst[3] =               (ir2 << 10) + ig4;
+		dst += 4;
+	}
+
+	// QuickTime defines the v210 format and requires zero padding in all unused samples.
+	int w48up = (mWidth + 23) / 24;
+	int w6up = (mWidth + 5) / 6;
+	int zeropad = w48up * 16 - w6up * 4;
+	memset(dst, 0, zeropad * 4);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGen_V210_To_32F::Start() {
+	StartWindow(((mWidth + 5) / 6) * 6 * sizeof(float), 3);
+}
+
+const void *VDPixmapGen_V210_To_32F::GetRow(sint32 y, uint32 index) {
+	return (const uint8 *)VDPixmapGenWindowBasedOneSource::GetRow(y, index) + mWindowPitch * index;
+}
+
+sint32 VDPixmapGen_V210_To_32F::GetWidth(int index) const {
+	return index == 1 ? mWidth : (mWidth + 1) >> 1;
+}
+
+uint32 VDPixmapGen_V210_To_32F::GetType(uint32 output) const {
+	return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_32F_LE;
+}
+
+void VDPixmapGen_V210_To_32F::Compute(void *dst0, sint32 y) {
+	float *dstR = (float *)dst0;
+	float *dstG = (float *)((char *)dstR + mWindowPitch);
+	float *dstB = (float *)((char *)dstG + mWindowPitch);
+	const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+	uint32 w = (mWidth + 5) / 6;
+
+	VDCPUCleanupExtensions();
+
+	// dword 0: XX Cr0 Y0 Cb0
+	// dword 1: XX Y2 Cb1 Y1
+	// dword 2: XX Cb2 Y3 Cr1
+	// dword 3: XX Y5 Cr2 Y4
+
+	for(uint32 i=0; i<w; ++i) {
+		const uint32 w0 = src[0];
+		const uint32 w1 = src[1];
+		const uint32 w2 = src[2];
+		const uint32 w3 = src[3];
+		src += 4;
+
+		dstB[0] = (float)( w0        & 0x3ff) / 1023.0f;
+		dstG[0] = (float)((w0 >> 10) & 0x3ff) / 1023.0f;
+		dstR[0] = (float)((w0 >> 20) & 0x3ff) / 1023.0f;
+		dstG[1] = (float)( w1        & 0x3ff) / 1023.0f;
+		dstB[1] = (float)((w1 >> 10) & 0x3ff) / 1023.0f;
+		dstG[2] = (float)((w1 >> 20) & 0x3ff) / 1023.0f;
+		dstR[1] = (float)( w2        & 0x3ff) / 1023.0f;
+		dstG[3] = (float)((w2 >> 10) & 0x3ff) / 1023.0f;
+		dstB[2] = (float)((w2 >> 20) & 0x3ff) / 1023.0f;
+		dstG[4] = (float)( w3        & 0x3ff) / 1023.0f;
+		dstR[2] = (float)((w3 >> 10) & 0x3ff) / 1023.0f;
+		dstG[5] = (float)((w3 >> 20) & 0x3ff) / 1023.0f;
+
+		dstR += 3;
+		dstG += 6;
+		dstB += 3;
+	}
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_ycbcr_x86.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_ycbcr_x86.cpp
new file mode 100644
index 000000000..d34f731f1
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_ycbcr_x86.cpp
@@ -0,0 +1,35 @@
+#include "uberblit_ycbcr_x86.h"
+
+extern "C" void vdasm_pixblt_XRGB8888_to_YUV444Planar_scan_SSE2(void *dstY, void *dstCb, void *dstCr, const void *srcRGB, uint32 count, const void *coeffs);
+
+void VDPixmapGenRGB32ToYCbCr601_SSE2::Compute(void *dst0, sint32 y) {
+	uint8 *dstCb = (uint8 *)dst0;
+	uint8 *dstY = dstCb + mWindowPitch;
+	uint8 *dstCr = dstY + mWindowPitch;
+	const uint8 *srcRGB = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+	static const __declspec(align(16)) struct {
+		sint16 rb_to_y[8];
+		sint16 rb_to_cb[8];
+		sint16 rb_to_cr[8];
+		sint16 g_to_y[8];
+		sint16 g_to_cb[8];
+		sint16 g_to_cr[8];
+		sint32 y_bias[4];
+		sint32 c_bias[4];
+	} kCoeffs={
+	//	Cb = (28784*r - 24103*g -  4681*b + 8388608 + 32768) >> 16;
+	//	Y  = (16829*r + 33039*g +  6416*b + 1048576 + 32768) >> 16;
+	//	Cr = (-9714*r - 19071*g + 28784*b + 8388608 + 32768) >> 16;
+		{   3208,  8414,   3208,  8414,   3208,  8414,   3208,  8414, },		// rb to y
+		{  -2340, 14392,  -2340, 14392,  -2340, 14392,  -2340, 14392, },		// rb to cb
+		{  16519,     0,  16519,     0,  16519,     0,  16519,     0, },		// g to y
+		{ -12050,     0, -12050,     0, -12050,     0, -12050,     0, },		// g to cb
+		{  14392, -4857,  14392, -4857,  14392, -4857,  14392, -4857, },		// rb to cr
+		{  -9535,     0,  -9535,     0,  -9535,     0,  -9535,     0, },		// g to cr
+		{ 0x084000, 0x084000, 0x084000, 0x084000, },	// y bias
+		{ 0x404000, 0x404000, 0x404000, 0x404000, },	// c bias
+	};
+
+	vdasm_pixblt_XRGB8888_to_YUV444Planar_scan_SSE2(dstY, dstCb, dstCr, srcRGB, mWidth, &kCoeffs);
+}
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/blitter.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/blitter.h
new file mode 100644
index 000000000..536bc0e7a
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/blitter.h
@@ -0,0 +1,19 @@
+#ifndef f_VD2_KASUMI_BLITTER_H
+#define f_VD2_KASUMI_BLITTER_H
+
+#include <vd2/system/vectors.h>
+
+struct VDPixmap;
+struct VDPixmapLayout;
+
+class IVDPixmapBlitter {
+public:
+	virtual ~IVDPixmapBlitter() {}
+	virtual void Blit(const VDPixmap& dst, const VDPixmap& src) = 0;
+	virtual void Blit(const VDPixmap& dst, const vdrect32 *rDst, const VDPixmap& src) = 0;
+};
+
+IVDPixmapBlitter *VDPixmapCreateBlitter(const VDPixmap& dst, const VDPixmap& src);
+IVDPixmapBlitter *VDPixmapCreateBlitter(const VDPixmapLayout& dst, const VDPixmapLayout& src);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixel.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixel.h
new file mode 100644
index 000000000..a2f2e2ead
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixel.h
@@ -0,0 +1,40 @@
+//	VirtualDub - Video processing and capture application
+//	Graphics support library
+//	Copyright (C) 1998-2007 Avery Lee
+//
+//	This program is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	This program is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with this program; if not, write to the Free Software
+//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#ifndef f_VD2_KASUMI_PIXEL_H
+#define f_VD2_KASUMI_PIXEL_H
+
+#ifndef f_VD2_SYSTEM_VDTYPES_H
+	#include <vd2/system/vdtypes.h>
+#endif
+
+struct VDPixmap;
+
+uint32 VDPixmapSample(const VDPixmap& px, sint32 x, sint32 y);
+uint32 VDPixmapInterpolateSampleRGB24(const VDPixmap& px, sint32 x, sint32 y);
+
+inline uint8 VDPixmapSample8(const void *data, ptrdiff_t pitch, sint32 x, sint32 y) {
+	return ((const uint8 *)data)[pitch*y + x];
+}
+
+uint8 VDPixmapInterpolateSample8(const void *data, ptrdiff_t pitch, uint32 w, uint32 h, sint32 x_256, sint32 y_256);
+uint32 VDConvertYCbCrToRGB(uint8 y, uint8 cb, uint8 cr);
+uint32 VDConvertRGBToYCbCr(uint32 c);
+uint32 VDConvertRGBToYCbCr(uint8 r, uint8 g, uint8 b);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmap.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmap.h
new file mode 100644
index 000000000..a0125b6e3
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmap.h
@@ -0,0 +1,76 @@
+#ifndef f_VD2_KASUMI_PIXMAP_H
+#define f_VD2_KASUMI_PIXMAP_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+
+namespace nsVDPixmap {
+	enum VDPixmapFormat {
+		kPixFormat_Null,
+		kPixFormat_Pal1,
+		kPixFormat_Pal2,
+		kPixFormat_Pal4,
+		kPixFormat_Pal8,
+		kPixFormat_XRGB1555,
+		kPixFormat_RGB565,
+		kPixFormat_RGB888,
+		kPixFormat_XRGB8888,
+		kPixFormat_Y8,
+		kPixFormat_YUV422_UYVY,
+		kPixFormat_YUV422_YUYV,
+		kPixFormat_YUV444_XVYU,				// The reason for the strange VYU ordering is to make it easier to convert to UYVY/YUY2.
+		kPixFormat_YUV444_Planar,
+		kPixFormat_YUV422_Planar,
+		kPixFormat_YUV420_Planar,
+		kPixFormat_YUV411_Planar,
+		kPixFormat_YUV410_Planar,
+		kPixFormat_YUV422_Planar_Centered,	// MPEG-1/MJPEG chroma alignment
+		kPixFormat_YUV420_Planar_Centered,	// MPEG-1/MJPEG chroma alignment
+		kPixFormat_YUV422_Planar_16F,
+		kPixFormat_YUV422_V210,
+		kPixFormat_YUV422_UYVY_709,			// Also known as HDYC.
+		kPixFormat_YUV420_NV12,
+		kPixFormat_Max_Standard
+	};
+}
+
+typedef sint32		vdpixpos;
+typedef sint32		vdpixsize;
+typedef ptrdiff_t	vdpixoffset;
+
+struct VDPixmap {
+	void			*data;
+	const uint32	*palette;
+	vdpixsize		w;
+	vdpixsize		h;
+	vdpixoffset		pitch;
+	sint32			format;
+
+	// Auxiliary planes are always byte-per-pixel.
+
+	void			*data2;		// Cb (U) for YCbCr
+	vdpixoffset		pitch2;
+	void			*data3;		// Cr (V) for YCbCr
+	vdpixoffset		pitch3;
+};
+
+struct VDPixmapLayout {
+	ptrdiff_t		data;
+	const uint32	*palette;
+	vdpixsize		w;
+	vdpixsize		h;
+	vdpixoffset		pitch;
+	sint32			format;
+
+	// Auxiliary planes are always byte-per-pixel.
+
+	ptrdiff_t		data2;		// Cb (U) for YCbCr
+	vdpixoffset		pitch2;
+	ptrdiff_t		data3;		// Cr (V) for YCbCr
+	vdpixoffset		pitch3;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmapops.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmapops.h
new file mode 100644
index 000000000..6dce3a858
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmapops.h
@@ -0,0 +1,20 @@
+#ifndef f_VD2_KASUMI_PIXMAPOPS_H
+#define f_VD2_KASUMI_PIXMAPOPS_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <vd2/Kasumi/pixmap.h>
+
+bool VDPixmapIsBltPossible(int dst_format, int src_format);
+bool VDPixmapBlt(const VDPixmap& dst, const VDPixmap& src);
+bool VDPixmapBlt(const VDPixmap& dst, vdpixpos x1, vdpixpos y1, const VDPixmap& src, vdpixpos x2, vdpixpos y2, vdpixsize w, vdpixsize h);
+bool VDPixmapStretchBltNearest(const VDPixmap& dst, const VDPixmap& src);
+bool VDPixmapStretchBltNearest(const VDPixmap& dst, sint32 x1, sint32 y1, sint32 x2, sint32 y2, const VDPixmap& src, sint32 u1, sint32 v1, sint32 u2, sint32 v2);
+bool VDPixmapStretchBltBilinear(const VDPixmap& dst, const VDPixmap& src);
+bool VDPixmapStretchBltBilinear(const VDPixmap& dst, sint32 x1, sint32 y1, sint32 x2, sint32 y2, const VDPixmap& src, sint32 u1, sint32 v1, sint32 u2, sint32 v2);
+
+bool VDPixmapBltAlphaConst(const VDPixmap& dst, const VDPixmap& src, float alpha);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmaputils.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmaputils.h
new file mode 100644
index 000000000..0d9e50cfd
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmaputils.h
@@ -0,0 +1,171 @@
+#ifndef f_VD2_KASUMI_PIXMAPUTILS_H
+#define f_VD2_KASUMI_PIXMAPUTILS_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <vd2/Kasumi/pixmap.h>
+
+struct VDPixmapFormatInfo {
+	const char *name;		// debugging name
+	bool qchunky;			// quantums are chunky (not 1x1 pixels)
+	int qw, qh;				// width, height of a quantum
+	int	qwbits, qhbits;		// width and height of a quantum as shifts
+	int qsize;				// size of a pixel in bytes
+	int auxbufs;			// number of auxiliary buffers (0 for chunky formats, usually 2 for planar)
+	int	auxwbits, auxhbits;	// subsampling factors for auxiliary buffers in shifts
+	int auxsize;			// size of an aux sample in bytes
+	int palsize;			// entries in palette
+	int subformats;			// number of subformats for this format
+};
+
+extern VDPixmapFormatInfo g_vdPixmapFormats[];
+
+inline const VDPixmapFormatInfo& VDPixmapGetInfo(sint32 format) {
+	VDASSERT((uint32)format < nsVDPixmap::kPixFormat_Max_Standard);
+	return g_vdPixmapFormats[(uint32)format < nsVDPixmap::kPixFormat_Max_Standard ? format : 0];
+}
+
+#ifdef _DEBUG
+	bool VDAssertValidPixmap(const VDPixmap& px);
+#else
+	inline bool VDAssertValidPixmap(const VDPixmap& px) { return true; }
+#endif
+
+inline VDPixmap VDPixmapFromLayout(const VDPixmapLayout& layout, void *p) {
+	VDPixmap px;
+
+	px.data		= (char *)p + layout.data;
+	px.data2	= (char *)p + layout.data2;
+	px.data3	= (char *)p + layout.data3;
+	px.format	= layout.format;
+	px.w		= layout.w;
+	px.h		= layout.h;
+	px.palette	= layout.palette;
+	px.pitch	= layout.pitch;
+	px.pitch2	= layout.pitch2;
+	px.pitch3	= layout.pitch3;
+
+	return px;
+}
+
+inline VDPixmapLayout VDPixmapToLayoutFromBase(const VDPixmap& px, void *p) {
+	VDPixmapLayout layout;
+	layout.data		= (char *)px.data - (char *)p;
+	layout.data2	= (char *)px.data2 - (char *)p;
+	layout.data3	= (char *)px.data3 - (char *)p;
+	layout.format	= px.format;
+	layout.w		= px.w;
+	layout.h		= px.h;
+	layout.palette	= px.palette;
+	layout.pitch	= px.pitch;
+	layout.pitch2	= px.pitch2;
+	layout.pitch3	= px.pitch3;
+	return layout;
+}
+
+inline VDPixmapLayout VDPixmapToLayout(const VDPixmap& px, void *&p) {
+	VDPixmapLayout layout;
+	p = px.data;
+	layout.data		= 0;
+	layout.data2	= (char *)px.data2 - (char *)px.data;
+	layout.data3	= (char *)px.data3 - (char *)px.data;
+	layout.format	= px.format;
+	layout.w		= px.w;
+	layout.h		= px.h;
+	layout.palette	= px.palette;
+	layout.pitch	= px.pitch;
+	layout.pitch2	= px.pitch2;
+	layout.pitch3	= px.pitch3;
+	return layout;
+}
+
+uint32 VDPixmapCreateLinearLayout(VDPixmapLayout& layout, int format, vdpixsize w, vdpixsize h, int alignment);
+
+VDPixmap VDPixmapOffset(const VDPixmap& src, vdpixpos x, vdpixpos y);
+VDPixmapLayout VDPixmapLayoutOffset(const VDPixmapLayout& src, vdpixpos x, vdpixpos y);
+
+void VDPixmapFlipV(VDPixmap& layout);
+void VDPixmapLayoutFlipV(VDPixmapLayout& layout);
+
+uint32 VDPixmapLayoutGetMinSize(const VDPixmapLayout& layout);
+
+VDPixmap VDPixmapExtractField(const VDPixmap& src, bool field2);
+
+#ifndef VDPTRSTEP_DECLARED
+	template<class T>
+	inline void vdptrstep(T *&p, ptrdiff_t offset) {
+		p = (T *)((char *)p + offset);
+	}
+#endif
+#ifndef VDPTROFFSET_DECLARED
+	template<class T>
+	inline T *vdptroffset(T *p, ptrdiff_t offset) {
+		return (T *)((char *)p + offset);
+	}
+#endif
+#ifndef VDPTRDIFFABS_DECLARED
+	inline ptrdiff_t vdptrdiffabs(ptrdiff_t x) {
+		return x<0 ? -x : x;
+	}
+#endif
+
+
+typedef void (*VDPixmapBlitterFn)(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+typedef VDPixmapBlitterFn (*tpVDPixBltTable)[nsVDPixmap::kPixFormat_Max_Standard];
+
+tpVDPixBltTable VDGetPixBltTableReference();
+tpVDPixBltTable VDGetPixBltTableX86Scalar();
+tpVDPixBltTable VDGetPixBltTableX86MMX();
+
+
+
+class VDPixmapBuffer : public VDPixmap {
+public:
+	VDPixmapBuffer() : mpBuffer(NULL), mLinearSize(0) { data = NULL; format = 0; }
+	explicit VDPixmapBuffer(const VDPixmap& src);
+	VDPixmapBuffer(const VDPixmapBuffer& src);
+	VDPixmapBuffer(sint32 w, sint32 h, int format) : mpBuffer(NULL), mLinearSize(0) {
+		init(w, h, format);
+	}
+	explicit VDPixmapBuffer(const VDPixmapLayout& layout);
+
+	~VDPixmapBuffer();
+
+	void clear() {
+		if (mpBuffer)		// to reduce debug checks
+			delete[] mpBuffer;
+		mpBuffer = NULL;
+		mLinearSize = 0;
+		format = nsVDPixmap::kPixFormat_Null;
+	}
+
+#ifdef _DEBUG
+	void *base() { return mpBuffer + (-(int)(uintptr)mpBuffer & 15) + 16; }
+	const void *base() const { return mpBuffer + (-(int)(uintptr)mpBuffer & 15) + 16; }
+	size_t size() const { return mLinearSize - 28; }
+
+	void validate();
+#else
+	void *base() { return mpBuffer + (-(int)(uintptr)mpBuffer & 15); }
+	const void *base() const { return mpBuffer + (-(int)(uintptr)mpBuffer & 15); }
+	size_t size() const { return mLinearSize; }
+
+	void validate() {}
+#endif
+
+	void init(sint32 w, sint32 h, int format);
+	void init(const VDPixmapLayout&);
+
+	void assign(const VDPixmap& src);
+
+	void swap(VDPixmapBuffer&);
+
+protected:
+	char *mpBuffer;
+	size_t	mLinearSize;
+};
+
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/region.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/region.h
new file mode 100644
index 000000000..aa2963c90
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/region.h
@@ -0,0 +1,92 @@
+//	VirtualDub - Video processing and capture application
+//	Graphics support library
+//	Copyright (C) 1998-2007 Avery Lee
+//
+//	This program is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	This program is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with this program; if not, write to the Free Software
+//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#ifndef f_VD2_KASUMI_REGION_H
+#define f_VD2_KASUMI_REGION_H
+
+struct VDPixmap;
+
+#include <vd2/system/vectors.h>
+#include <vd2/system/vdstl.h>
+
+class VDPixmapRegion {
+public:
+	void swap(VDPixmapRegion& x);
+
+public:
+	vdfastvector<uint32> mSpans;
+	vdrect32	mBounds;
+};
+
+class VDPixmapPathRasterizer {
+public:
+	VDPixmapPathRasterizer();
+	VDPixmapPathRasterizer(const VDPixmapPathRasterizer&);	// no-op
+	~VDPixmapPathRasterizer();
+
+	VDPixmapPathRasterizer& operator=(const VDPixmapPathRasterizer&);	// no-op
+
+	void Clear();
+	void QuadraticBezier(const vdint2 pts[4]);
+	void CubicBezier(const vdint2 pts[4]);
+	void Line(const vdint2& pt1, const vdint2& pt2);
+	void FastLine(int x0, int y0, int x1, int y1);
+
+	void ScanConvert(VDPixmapRegion& region);
+
+protected:
+	void ClearEdgeList();
+	void FreeEdgeLists();
+	void ClearScanBuffer();
+	void ReallocateScanBuffer(int ymin, int ymax);
+
+	struct Edge {
+		Edge *next;
+		int posandflag;
+	};
+
+	enum { kEdgeBlockMax = 1024 };
+
+	struct EdgeBlock {
+		EdgeBlock *next;
+		Edge edges[1024];
+
+		EdgeBlock(EdgeBlock *p) : next(p) {}
+	};
+
+	struct Scan {
+		Edge *chain;
+		uint32 count;
+	};
+
+	EdgeBlock *mpEdgeBlocks;
+	EdgeBlock *mpFreeEdgeBlocks;
+	int mEdgeBlockIdx;
+	Scan *mpScanBuffer;
+	Scan *mpScanBufferBiased;
+	int mScanYMin;
+	int mScanYMax;
+};
+
+bool VDPixmapFillRegion(const VDPixmap& dst, const VDPixmapRegion& region, int x, int y, uint32 color);
+bool VDPixmapFillRegionAntialiased8x(const VDPixmap& dst, const VDPixmapRegion& region, int x, int y, uint32 color);
+
+void VDPixmapCreateRoundRegion(VDPixmapRegion& dst, float r);
+void VDPixmapConvolveRegion(VDPixmapRegion& dst, const VDPixmapRegion& r1, const VDPixmapRegion& r2);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/resample.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/resample.h
new file mode 100644
index 000000000..12c6f01a2
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/resample.h
@@ -0,0 +1,31 @@
+#ifndef f_VD2_KASUMI_RESAMPLE_H
+#define f_VD2_KASUMI_RESAMPLE_H
+
+#include <vd2/system/vectors.h>
+
+struct VDPixmap;
+
+class IVDPixmapResampler {
+public:
+	enum FilterMode {
+		kFilterPoint,
+		kFilterLinear,
+		kFilterCubic,
+		kFilterLanczos3,
+		kFilterCount
+	};
+
+	virtual ~IVDPixmapResampler() {}
+	virtual void SetSplineFactor(double A) = 0;
+	virtual void SetFilters(FilterMode h, FilterMode v, bool interpolationOnly) = 0;
+	virtual bool Init(uint32 dw, uint32 dh, int dstformat, uint32 sw, uint32 sh, int srcformat) = 0;
+	virtual bool Init(const vdrect32f& dstrect, uint32 dw, uint32 dh, int dstformat, const vdrect32f& srcrect, uint32 sw, uint32 sh, int srcformat) = 0;
+	virtual void Shutdown() = 0;
+
+	virtual void Process(const VDPixmap& dst, const VDPixmap& src) = 0;
+};
+
+IVDPixmapResampler *VDCreatePixmapResampler();
+bool VDPixmapResample(const VDPixmap& dst, const VDPixmap& src, IVDPixmapResampler::FilterMode filter);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/resample_kernels.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/resample_kernels.h
new file mode 100644
index 000000000..a95e9b028
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/resample_kernels.h
@@ -0,0 +1,91 @@
+#ifndef f_VD2_KASUMI_RESAMPLE_KERNELS_H
+#define f_VD2_KASUMI_RESAMPLE_KERNELS_H
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/Kasumi/pixmap.h>
+
+struct VDResamplerAxis {
+	sint32		dx;
+	sint32		u;
+	sint32		dudx;
+	uint32		dx_precopy;
+	uint32		dx_preclip;
+	uint32		dx_active;
+	uint32		dx_postclip;
+	uint32		dx_postcopy;
+	uint32		dx_dualclip;
+
+	void Init(sint32 dudx);
+	void Compute(sint32 count, sint32 u0, sint32 w, sint32 kernel_width);
+};
+
+
+///////////////////////////////////////////////////////////////////////////
+//
+// filter kernels
+//
+///////////////////////////////////////////////////////////////////////////
+
+class IVDResamplerFilter {
+public:
+	virtual ~IVDResamplerFilter() {}
+
+	virtual int GetFilterWidth() const = 0;
+	virtual double EvaluateFilter(double offset) const = 0;
+	virtual void GenerateFilter(float *dst, double offset) const = 0;
+	virtual void GenerateFilterBank(float *dst) const = 0;
+};
+
+class VDResamplerLinearFilter : public IVDResamplerFilter {
+public:
+	VDResamplerLinearFilter(double twofc);
+
+	int GetFilterWidth() const;
+
+	double EvaluateFilter(double offset) const;
+	void GenerateFilter(float *dst, double offset) const;
+	void GenerateFilterBank(float *dst) const;
+
+protected:
+	double		mScale;
+	unsigned	mTaps;
+};
+
+class VDResamplerCubicFilter : public IVDResamplerFilter {
+public:
+	VDResamplerCubicFilter(double twofc, double A);
+
+	int GetFilterWidth() const;
+
+	double EvaluateFilter(double offset) const;
+	void GenerateFilter(float *dst, double offset) const;
+	void GenerateFilterBank(float *dst) const;
+
+protected:
+	double		mScale;
+	double		mA0;
+	double		mA2;
+	double		mA3;
+	double		mB0;
+	double		mB1;
+	double		mB2;
+	double		mB3;
+	unsigned	mTaps;
+};
+
+class VDResamplerLanczos3Filter : public IVDResamplerFilter {
+public:
+	VDResamplerLanczos3Filter(double twofc);
+
+	int GetFilterWidth() const;
+
+	double EvaluateFilter(double offset) const;
+	void GenerateFilter(float *dst, double offset) const;
+	void GenerateFilterBank(float *dst) const;
+
+protected:
+	double		mScale;
+	unsigned	mTaps;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/tables.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/tables.h
new file mode 100644
index 000000000..972f37036
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/tables.h
@@ -0,0 +1,41 @@
+//	VirtualDub - Video processing and capture application
+//	Graphics support library
+//	Copyright (C) 1998-2008 Avery Lee
+//
+//	This program is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	This program is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with this program; if not, write to the Free Software
+//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#ifndef f_VD2_KASUMI_TABLES_H
+#define f_VD2_KASUMI_TABLES_H
+
+///////////////////////////////////////////////////////////////////////////////
+// Cubic interpolation tables
+//
+// These tables give coefficients for 1-D cubic interpolation with 8-bit
+// subunit precision. The [0] entry is positioned exactly on top of the
+// second sample, and the [255] entry is 255/256th of the way to the third
+// sample. The cardinal spline constant is -0.75 and the output range is
+// [-0.1875, 1.1875], where the maximum overshoot and undershoot occur at
+// the midpoint.
+//
+// The first and fourth coefficients are always negative; the second and
+// third coefficients are always positive.
+//
+extern "C" const sint32 kVDCubicInterpTableFX14_075[256][4];
+
+#ifdef _M_IX86
+	extern "C" const sint16 kVDCubicInterpTableFX14_075_MMX[256][8];
+#endif
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/text.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/text.h
new file mode 100644
index 000000000..245d38f12
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/text.h
@@ -0,0 +1,62 @@
+//	VirtualDub - Video processing and capture application
+//	Graphics support library
+//	Copyright (C) 1998-2007 Avery Lee
+//
+//	This program is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	This program is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with this program; if not, write to the Free Software
+//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#ifndef f_VD2_KASUMI_TEXT_H
+#define f_VD2_KASUMI_TEXT_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <vd2/system/vectors.h>
+
+class VDPixmapPathRasterizer;
+
+struct VDOutlineFontGlyphInfo {
+	uint16	mPointArrayStart;		// start of points (encoded as 8:8)
+	uint16	mCommandArrayStart;		// start of commands (encoded as 6:2 RLE).
+	sint16	mAWidth;				// advance from start to character cell
+	sint16	mBWidth;				// width of character cell
+	sint16	mCWidth;				// advance from character cell to end
+};
+
+struct VDOutlineFontInfo {
+	const uint16 *mpPointArray;
+	const uint8 *mpCommandArray;
+	const VDOutlineFontGlyphInfo *mpGlyphArray;
+	int		mStartGlyph;
+	int		mEndGlyph;
+	int		mMinX;
+	int		mMinY;
+	int		mMaxX;
+	int		mMaxY;
+	int		mEmSquare;
+	int		mAscent;
+	int		mDescent;
+	int		mLineGap;
+};
+
+struct VDTextLayoutMetrics {
+	vdrect32f	mExtents;
+	float		mAdvance;
+};
+
+void VDPixmapGetTextExtents(const VDOutlineFontInfo *font, float size, const char *pText, VDTextLayoutMetrics& out_Metrics);
+void VDPixmapConvertTextToPath(VDPixmapPathRasterizer& rast, const VDOutlineFontInfo *font, float size, float x, float y, const char *pText, const float transform[2][2] = NULL);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/triblt.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/triblt.h
new file mode 100644
index 000000000..4602cd883
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/triblt.h
@@ -0,0 +1,71 @@
+//	VirtualDub - Video processing and capture application
+//	Graphics support library
+//	Copyright (C) 1998-2008 Avery Lee
+//
+//	This program is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	This program is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with this program; if not, write to the Free Software
+//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#ifndef f_VD2_KASUMI_TRIBLT_H
+#define f_VD2_KASUMI_TRIBLT_H
+
+#include <vd2/system/vdstl.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vector>
+
+struct VDTriBltVertex {
+	float x, y, z, u, v;
+};
+
+struct VDTriColorVertex {
+	float x, y, z, r, g, b, a;
+};
+
+enum VDTriBltFilterMode {
+	kTriBltFilterPoint,
+	kTriBltFilterBilinear,
+	kTriBltFilterTrilinear,
+	kTriBltFilterBicubicMipLinear,
+	kTriBltFilterCount
+};
+
+bool VDPixmapTriFill(VDPixmap& dst, uint32 c,
+					const VDTriBltVertex *pVertices, int nVertices,
+					const int *pIndices, const int nIndices,
+					const float pTransform[16] = NULL);
+
+bool VDPixmapTriFill(VDPixmap& dst,
+					const VDTriColorVertex *pVertices, int nVertices,
+					const int *pIndices, const int nIndices,
+					const float pTransform[16] = NULL);
+
+bool VDPixmapTriBlt(VDPixmap& dst, const VDPixmap *const *pSources, int nMipmaps,
+					const VDTriBltVertex *pVertices, int nVertices,
+					const int *pIndices, const int nIndices,
+					VDTriBltFilterMode filterMode,
+					float mipMapLODBias,
+					const float pTransform[16] = NULL);
+
+class VDPixmapTextureMipmapChain {
+public:
+	VDPixmapTextureMipmapChain(const VDPixmap& src, bool wrap=false, bool cubic = false, int maxlevels = 16);
+
+	const VDPixmap *const *Mips() const { return mMipMaps.data(); }
+	int Levels() const { return mMipMaps.size(); }
+
+protected:
+	std::vector<VDPixmapBuffer>		mBuffers;
+	vdfastvector<const VDPixmap *>	mMipMaps;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/Error.h b/src/thirdparty/VirtualDub/h/vd2/system/Error.h
new file mode 100644
index 000000000..22f15ede3
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/Error.h
@@ -0,0 +1,119 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_ERROR_H
+#define f_VD2_ERROR_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+
+class MyError;
+
+///////////////////////////////////////////////////////////////////////////
+//	IVDAsyncErrorCallback
+//
+class IVDAsyncErrorCallback {
+public:
+	virtual bool OnAsyncError(MyError& e) = 0;
+};
+
+///////////////////////////////////////////////////////////////////////////
+//	MyError
+//
+class MyError {
+private:
+	const MyError& operator=(const MyError&);		// protect against accidents
+
+protected:
+	char *buf;
+
+public:
+	MyError();
+	MyError(const MyError& err);
+	MyError(const char *f, ...);
+	~MyError();
+	void clear();
+	void assign(const MyError& e);
+	void assign(const char *s);
+	void setf(const char *f, ...);
+	void vsetf(const char *f, va_list val);
+	void post(struct HWND__ *hWndParent, const char *title) const;
+	char *gets() const {
+		return buf;
+	}
+	char *c_str() const {
+		return buf;
+	}
+	bool empty() const { return !buf; }
+	void discard();
+	void swap(MyError& err);
+	void TransferFrom(MyError& err);
+};
+
+class MyICError : public MyError {
+public:
+	MyICError(const char *s, uint32 icErr);
+	MyICError(uint32 icErr, const char *format, ...);
+};
+
+class MyMMIOError : public MyError {
+public:
+	MyMMIOError(const char *s, uint32 icErr);
+};
+
+class MyAVIError : public MyError {
+public:
+	MyAVIError(const char *s, uint32 aviErr);
+};
+
+class MyMemoryError : public MyError {
+public:
+	MyMemoryError();
+};
+
+class MyWin32Error : public MyError {
+public:
+	MyWin32Error(const char *format, uint32 err, ...);
+};
+
+class MyCrashError : public MyError {
+public:
+	MyCrashError(const char *format, uint32 dwExceptionCode);
+};
+
+class MyUserAbortError : public MyError {
+public:
+	MyUserAbortError();
+};
+
+class MyInternalError : public MyError {
+public:
+	MyInternalError(const char *format, ...);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/Fraction.h b/src/thirdparty/VirtualDub/h/vd2/system/Fraction.h
new file mode 100644
index 000000000..742533635
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/Fraction.h
@@ -0,0 +1,95 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_FRACTION_H
+#define f_VD2_SYSTEM_FRACTION_H
+
+#include <vd2/system/vdtypes.h>
+
+class VDFraction {
+friend VDFraction operator*(unsigned long b, const VDFraction f);
+friend VDFraction operator*(int b, const VDFraction f);
+private:
+	unsigned long	hi, lo;
+
+	static VDFraction reduce(uint64 hi, uint64 lo);
+
+public:
+	VDFraction() {}
+	explicit VDFraction(int i) : hi(i), lo(1) {}
+	explicit VDFraction(unsigned long i) : hi(i), lo(1) { }
+	explicit VDFraction(unsigned long i, unsigned long j) : hi(i), lo(j) {}
+	explicit VDFraction(double d);
+
+	bool	operator<(VDFraction b) const;
+	bool	operator<=(VDFraction b) const;
+	bool	operator>(VDFraction b) const;
+	bool	operator>=(VDFraction b) const;
+	bool	operator==(VDFraction b) const;
+	bool	operator!=(VDFraction b) const;
+
+	VDFraction operator*(VDFraction b) const;
+	VDFraction operator/(VDFraction b) const;
+
+	VDFraction operator*(unsigned long b) const;
+	VDFraction operator/(unsigned long b) const;
+
+	VDFraction& operator*=(VDFraction b);
+	VDFraction& operator/=(VDFraction b);
+	VDFraction& operator*=(unsigned long b);
+	VDFraction& operator/=(unsigned long b);
+
+	void	Assign(unsigned long n, unsigned long d) {
+		hi = n;
+		lo = d;
+	}
+
+	sint64 scale64t(sint64) const;
+	sint64 scale64r(sint64) const;
+	sint64 scale64u(sint64) const;
+	sint64 scale64it(sint64) const;
+	sint64 scale64ir(sint64) const;
+	sint64 scale64iu(sint64) const;
+
+	double asDouble() const;
+	double AsInverseDouble() const;
+
+	unsigned long roundup32ul() const;
+
+	unsigned long getHi() const { return hi; }
+	unsigned long getLo() const { return lo; }
+
+	VDFraction reduce() const { return reduce(hi, lo); }
+
+	bool Parse(const char *s);
+
+	static inline VDFraction reduce64(sint64 hi, sint64 lo) { return reduce(hi, lo); }
+};
+
+inline VDFraction operator*(unsigned long b, const VDFraction f) { return f*b; }
+
+typedef VDFraction Fraction;
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/VDNamespace.h b/src/thirdparty/VirtualDub/h/vd2/system/VDNamespace.h
new file mode 100644
index 000000000..c0f0d4141
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/VDNamespace.h
@@ -0,0 +1,157 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+#ifndef f_SYSTEM_VDNAMESPACE_H
+#define f_SYSTEM_VDNAMESPACE_H
+
+#include <vd2/system/list.h>
+
+class VDNamespaceNode;
+class VDNamespaceGroup;
+class VDNamespaceItem;
+class VDNamespace;
+template <class T> class VDNamespace2;
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Node: Any item in the namespace.
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDNamespaceNode {
+public:
+	const char *pszName;
+	VDNamespaceGroup *const pParent;
+
+	VDNamespaceNode(const char *name, VDNamespaceGroup *parent) : pszName(name), pParent(parent) {	}
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Group: Holds items.
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDNamespaceGroup : public VDNamespaceNode, public ListNode2<VDNamespaceGroup> {
+public:
+	ListAlloc<VDNamespaceItem> listItems;
+	ListAlloc<VDNamespaceGroup> listGroups;
+
+	const char *namedup(const char *s);
+
+	VDNamespaceGroup(const char *_pszName, VDNamespaceGroup *parent);
+	~VDNamespaceGroup();
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Item class
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDNamespaceItem : public VDNamespaceNode, public ListNode2<VDNamespaceItem> {
+public:
+	const void *object;
+
+	VDNamespaceItem(const char *_pszName, VDNamespaceGroup *parent, const void *src);
+	~VDNamespaceItem();
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Namespace class
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDNamespace {
+protected:
+	VDNamespaceGroup root;
+
+	VDNamespaceGroup *_lookupGroup(const char *pszName, bool fCreate, bool fIsFilter);
+	VDNamespaceItem *_findItemByObject(const VDNamespaceGroup *pGroup, const void *pObj);
+	bool _getPathByItem(const VDNamespaceNode *pEntry, char *buf, int maxlen);
+
+public:
+
+	VDNamespace();
+	~VDNamespace();
+
+	typedef bool (*tGroupEnumerator)(VDNamespace *pThis, const char *pszName, const VDNamespaceGroup *pGroup, void *pvData);
+	typedef bool (*tItemEnumerator)(VDNamespace *pThis, const char *pszName, const void *pItem, void *pvData);
+
+	void clear();
+	void add(const char *pszGroup, const char *pszName, const void *pDef);
+	const void *lookup(const char *pszName);
+
+	bool enumerateGroups(const VDNamespaceGroup *pGroupRoot, tGroupEnumerator pEnum, void *pvData);
+	bool enumerateItems(const VDNamespaceGroup *pGroupRoot, tItemEnumerator pEnum, void *pvData);
+
+	bool getPathByItem(const void *pObj, char *buf, int maxlen);
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	Templated Namespace class
+//
+///////////////////////////////////////////////////////////////////////////
+
+template <class T>
+class VDNamespace2 : public VDNamespace {
+public:
+	VDNamespace2() {}
+	~VDNamespace2() {}
+
+	typedef bool (*tGroupEnumerator)(VDNamespace2<T> *pThis, const char *pszName, const VDNamespaceGroup *pGroup, void *pvData);
+	typedef bool (*tItemEnumerator)(VDNamespace2<T> *pThis, const char *pszName, const T *pItem, void *pvData);
+
+	void add(const char *pszGroup, const char *pszName, const T *pDef) {
+		VDNamespace::add(pszGroup, pszName, pDef);
+	}
+
+	const T *lookup(const char *pszName) {
+		return static_cast<const T *>(VDNamespace::lookup(pszName));
+	}
+
+	bool enumerateGroups(const VDNamespaceGroup *pGroupRoot, tGroupEnumerator pEnum, void *pvData) {
+		for(ListAlloc<VDNamespaceGroup>::fwit it = (pGroupRoot ? pGroupRoot : &root)->listGroups.begin(); it; ++it)
+			if (!pEnum(this, it->pszName, it, pvData))
+				return false;
+
+		return true;
+	}
+
+	bool enumerateItems(const VDNamespaceGroup *pGroupRoot, tItemEnumerator pEnum, void *pvData) {
+		for(ListAlloc<VDNamespaceItem>::fwit it = (pGroupRoot ? pGroupRoot : &root)->listItems.begin(); it; ++it)
+			if (!pEnum(this, it->pszName, static_cast<const T *>(it->object), pvData))
+				return false;
+
+		return true;
+	}
+
+	bool getPathByItem(const T *pObj, char *buf, int maxlen) {
+		return VDNamespace::getPathByItem(pObj, buf, maxlen);
+	}
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/VDQueue.h b/src/thirdparty/VirtualDub/h/vd2/system/VDQueue.h
new file mode 100644
index 000000000..43367d287
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/VDQueue.h
@@ -0,0 +1,90 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_VDQUEUE_H
+#define f_VD2_SYSTEM_VDQUEUE_H
+
+#include <vd2/system/List.h>
+
+template<class T>
+class VDQueueNode : public ListNode2< VDQueueNode<T> > {
+public:
+	T t;
+	VDQueueNode(const T& t2) : t(t2) {}
+};
+
+template<class T>
+class VDQueue {
+public:
+	ListAlloc< VDQueueNode<T> > list;
+
+	VDQueue<T>();
+	~VDQueue<T>();
+	T Pop();
+	T Peek();
+	void Push(const T&);
+	bool isEmpty() { return list.IsEmpty(); }
+};
+
+template<class T>
+VDQueue<T>::VDQueue<T>() {
+}
+
+template<class T>
+VDQueue<T>::~VDQueue<T>() {
+	while(!list.IsEmpty())
+		delete list.RemoveTail();
+}
+
+template<class T>
+T VDQueue<T>::Peek() {
+	return list.AtHead()->t;
+}
+
+template<class T>
+T VDQueue<T>::Pop() {
+	return list.RemoveHead()->t;
+}
+
+template<class T>
+void VDQueue<T>::Push(const T& t) {
+	list.AddTail(new VDQueueNode<T>(t));
+}
+
+/////////////
+
+template<class T>
+class VDQueueAlloc : public VDQueue<T> {
+public:
+	~VDQueueAlloc();
+};
+
+template<class T>
+VDQueueAlloc<T>::~VDQueueAlloc() {
+	for(ListAlloc< VDQueueNode<T> >::fwit it = list.begin(); it; ++it)
+		delete &*it;
+}
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/VDRingBuffer.h b/src/thirdparty/VirtualDub/h/vd2/system/VDRingBuffer.h
new file mode 100644
index 000000000..f0c7806a0
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/VDRingBuffer.h
@@ -0,0 +1,301 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_SYSTEM_VDRINGBUFFER_H
+#define f_SYSTEM_VDRINGBUFFER_H
+
+#include <string.h>
+#include <utility>
+
+#include <vd2/system/atomic.h>
+
+class VDRingBufferBase {
+public:
+	VDRingBufferBase()
+		: nSize(0)
+		, nReadPoint(0)
+		, nWritePoint(0)
+	{
+	}
+
+	int		 getSize() const { return nSize; }
+	int		 getReadOffset() const { return nReadPoint; }
+	int		 getWriteOffset() const { return nWritePoint; }
+
+protected:
+	int				 nSize;
+	int				 nReadPoint;
+	int				 nWritePoint;
+};
+
+template<class T, class Allocator = std::allocator<T> >
+class VDRingBuffer : public VDRingBufferBase, private Allocator {
+protected:
+	T				*pBuffer;
+	VDAtomicInt		 nLevel;
+
+public:
+	VDRingBuffer();
+	VDRingBuffer(int size);
+	~VDRingBuffer();
+
+	void	 Init(int size);
+	void	 Shutdown();
+
+	int		 getLevel() const { return nLevel; }
+	int		 getSpace() const { return nSize - nLevel; }
+	int		 getWriteSpace() const;
+	T *		 getWritePtr() const { return pBuffer+nWritePoint; }
+
+	int		 size() const { return nSize; }
+	bool	 empty() const { return !nLevel; }
+	bool	 full() const { return nLevel == nSize; }
+
+	void	 Flush() { nReadPoint = nWritePoint = nLevel = 0; }
+
+	int		 Read(T *pBuffer, int bytes);
+	const T	*LockRead(int requested, int& actual);
+	const T	*LockReadAll(int& actual);
+	const T *LockReadWrapped(int requested, int& actual, int& nReadPoint);
+	const T *LockReadAllWrapped(int& actual, int& nReadPoint);
+	int		 UnlockRead(int actual);
+
+	int		 Write(const T *pData, int bytes);
+	T		*LockWrite(int requested, int& actual);
+	T		*LockWriteAll(int& actual);
+	int		 UnlockWrite(int actual);
+};
+
+template<class T, class Allocator>
+VDRingBuffer<T, Allocator>::VDRingBuffer(int size)
+	: pBuffer(NULL)
+{
+	Init(size);
+}
+
+template<class T, class Allocator>
+VDRingBuffer<T, Allocator>::VDRingBuffer()
+	: pBuffer(NULL)
+	, nLevel(0)
+{
+}
+
+template<class T, class Allocator>
+VDRingBuffer<T, Allocator>::~VDRingBuffer() {
+	Shutdown();
+}
+
+template<class T, class Allocator>
+void VDRingBuffer<T, Allocator>::Init(int size) {
+	Shutdown();
+	pBuffer		= allocate(nSize = size, 0);
+	nLevel		= 0;
+	nReadPoint	= 0;
+	nWritePoint	= 0;
+}
+
+template<class T, class Allocator>
+void VDRingBuffer<T, Allocator>::Shutdown() {
+	if (pBuffer) {
+		deallocate(pBuffer, nSize);
+		pBuffer = NULL;
+	}
+}
+
+template<class T, class Allocator>
+int VDRingBuffer<T, Allocator>::getWriteSpace() const {
+	volatile int tc = nSize - nWritePoint;
+	volatile int space = nSize - nLevel;
+
+	if (tc > space)
+		tc = space;
+
+	return tc;
+}
+
+template<class T, class Allocator>
+int VDRingBuffer<T, Allocator>::Read(T *pBuffer, int units) {
+	VDASSERT(units >= 0);
+
+	int actual = 0;
+	const T *pSrc;
+
+	while(units) {
+		int tc;
+
+		pSrc = LockRead(units, tc);
+
+		if (!tc)
+			break;
+
+		memcpy(pBuffer, pSrc, tc * sizeof(T));
+
+		UnlockRead(tc);
+
+		actual += tc;
+		units -= tc;
+		pBuffer += tc;
+	}
+
+	return actual;
+}
+
+template<class T, class Allocator>
+const T *VDRingBuffer<T, Allocator>::LockRead(int requested, int& actual) {
+	VDASSERT(requested >= 0);
+
+	int nLevelNow = nLevel;
+
+	if (requested > nLevelNow)
+		requested = nLevelNow;
+
+	if (requested + nReadPoint > nSize)
+		requested = nSize - nReadPoint;
+
+	actual = requested;
+
+	return pBuffer + nReadPoint;
+}
+
+template<class T, class Allocator>
+const T *VDRingBuffer<T, Allocator>::LockReadAll(int& actual) {
+	int requested = nLevel;
+
+	if (requested + nReadPoint > nSize)
+		requested = nSize - nReadPoint;
+
+	actual = requested;
+
+	return pBuffer + nReadPoint;
+}
+
+template<class T, class Allocator>
+const T *VDRingBuffer<T, Allocator>::LockReadWrapped(int requested, int& actual, int& readpt) {
+	int nLevelNow = nLevel;
+
+	if (requested > nLevelNow)
+		requested = nLevelNow;
+
+	actual = requested;
+	readpt = nReadPoint;
+
+	return pBuffer;
+}
+
+template<class T, class Allocator>
+const T *VDRingBuffer<T, Allocator>::LockReadAllWrapped(int& actual, int& readpt) {
+	int requested = nLevel;
+
+	actual = requested;
+	readpt = nReadPoint;
+
+	return pBuffer;
+}
+
+template<class T, class Allocator>
+int VDRingBuffer<T, Allocator>::UnlockRead(int actual) {
+	VDASSERT(actual >= 0);
+	VDASSERT(nLevel >= actual);
+
+	int newpt = nReadPoint + actual;
+
+	if (newpt >= nSize)
+		newpt -= nSize;
+
+	nReadPoint = newpt;
+
+	return nLevel.add(-actual);
+}
+
+template<class T, class Allocator>
+int VDRingBuffer<T, Allocator>::Write(const T *src, int elements) {
+	VDASSERT(elements >= 0);
+
+	int actual = 0;
+	while(elements) {
+		int tc;
+		void *dst = LockWrite(elements, tc);
+
+		if (!tc)
+			break;
+
+		memcpy(dst, src, tc*sizeof(T));
+
+		UnlockWrite(tc);
+
+		actual += tc;
+		elements -= tc;
+		src += tc;
+	}
+
+	return actual;
+}
+
+template<class T, class Allocator>
+T *VDRingBuffer<T, Allocator>::LockWrite(int requested, int& actual) {
+	VDASSERT(requested >= 0);
+	int nLevelNow = nSize - nLevel;
+
+	if (requested > nLevelNow)
+		requested = nLevelNow;
+
+	if (requested + nWritePoint > nSize)
+		requested = nSize - nWritePoint;
+
+	actual = requested;
+
+	return pBuffer + nWritePoint;
+}
+
+template<class T, class Allocator>
+T *VDRingBuffer<T, Allocator>::LockWriteAll(int& actual) {
+	int requested = nSize - nLevel;
+
+	if (requested + nWritePoint > nSize)
+		requested = nSize - nWritePoint;
+
+	actual = requested;
+
+	return pBuffer + nWritePoint;
+}
+
+template<class T, class Allocator>
+int VDRingBuffer<T, Allocator>::UnlockWrite(int actual) {
+	VDASSERT(actual >= 0);
+	VDASSERT(nLevel + actual <= nSize);
+
+	int newpt = nWritePoint + actual;
+
+	if (newpt >= nSize)
+		newpt = 0;
+
+	nWritePoint = newpt;
+
+	return nLevel.add(actual);
+}
+
+
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/VDScheduler.h b/src/thirdparty/VirtualDub/h/vd2/system/VDScheduler.h
new file mode 100644
index 000000000..e88fb6c6f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/VDScheduler.h
@@ -0,0 +1,125 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_VDSCHEDULER_H
+#define f_VD2_SYSTEM_VDSCHEDULER_H
+
+#include <vd2/system/vdstl.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/error.h>
+
+class VDSchedulerNode;
+class VDSchedulerSuspendNode;
+class VDSignal;
+class IVDAsyncErrorCallback;
+
+class VDScheduler {
+public:
+	VDScheduler();
+	~VDScheduler();
+
+	void setSignal(VDSignal *);
+	VDSignal *getSignal() { return pWakeupSignal; }
+	void setSchedulerNode(VDSchedulerNode *pSchedulerNode);
+
+	IVDAsyncErrorCallback *getErrorCallback() const { return mpErrorCB; }
+	void setErrorCallback(IVDAsyncErrorCallback *pCB) { mpErrorCB = pCB; }
+
+	bool isShuttingDown() const { return mbExitThreads; }
+
+	void BeginShutdown();							///< Start signaling scheduling threads to exit.
+
+	bool Run();
+	bool IdleWait();								///< Wait because no nodes are ready. Returns false if a thread should exit immediately.
+	void Ping();									///< Restart a scheduler thread.  This is required when a scheduler thread leaves.
+	void Lock();
+	void Unlock();
+	void Reschedule(VDSchedulerNode *);				///< Move node to Ready if Waiting.
+	void RescheduleFast(VDSchedulerNode *);			///< Same as Reschedule(), but assumes the scheduler is already locked.
+	void Add(VDSchedulerNode *pNode);				///< Add node to scheduler.
+	void Remove(VDSchedulerNode *pNode);			///< Remove node from scheduler.
+	void DumpStatus();
+
+protected:
+	void Repost(VDSchedulerNode *, bool);
+
+	VDCriticalSection csScheduler;
+	IVDAsyncErrorCallback	*mpErrorCB;
+	VDSignal *pWakeupSignal;
+	volatile bool	mbExitThreads;
+	VDSchedulerNode *pParentSchedulerNode;
+
+	typedef vdlist<VDSchedulerNode> tNodeList;
+	tNodeList listWaiting, listReady;
+
+	typedef vdlist<VDSchedulerSuspendNode> tSuspendList;
+	tSuspendList listSuspends;
+};
+
+class VDSchedulerNode : public vdlist<VDSchedulerNode>::node {
+friend class VDScheduler;
+public:
+	int nPriority;
+
+	VDSchedulerNode() : nPriority(0) {}
+
+	virtual bool Service()=0;
+
+	virtual void DumpStatus();
+
+	void Reschedule() { pScheduler->Reschedule(this); }
+	void RemoveFromScheduler() { pScheduler->Remove(this); }
+
+protected:
+	VDScheduler *pScheduler;
+	volatile bool bRunning;
+	volatile bool bReschedule;
+	volatile bool bReady;
+	volatile bool bCondemned;
+};
+
+class VDSchedulerSuspendNode : public vdlist<VDSchedulerSuspendNode>::node {
+public:
+	VDSchedulerSuspendNode(VDSchedulerNode *pNode) : mpNode(pNode) {}
+
+	VDSchedulerNode *mpNode;
+	VDSignal mSignal;
+};
+
+class VDSchedulerThread : public VDThread {
+public:
+	VDSchedulerThread();
+	~VDSchedulerThread();
+
+	bool Start(VDScheduler *pScheduler);
+
+protected:
+	void ThreadRun();
+
+	VDScheduler *mpScheduler;
+	uint32 mAffinity;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/VDString.h b/src/thirdparty/VirtualDub/h/vd2/system/VDString.h
new file mode 100644
index 000000000..58955384e
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/VDString.h
@@ -0,0 +1,1134 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_VDSTRING_H
+#define f_VD2_SYSTEM_VDSTRING_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <functional>
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/text.h>
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDStringSpanA {
+public:
+	typedef char					value_type;
+	typedef uint32					size_type;
+	typedef ptrdiff_t				difference_type;
+	typedef value_type&				reference;
+	typedef const value_type&		const_reference;
+	typedef value_type *			pointer;
+	typedef const value_type *		const_pointer;
+	typedef pointer					iterator;
+	typedef const_pointer			const_iterator;
+
+	static const size_type npos = (size_type)-1;
+
+	VDStringSpanA() 
+		: mpBegin(const_cast<value_type *>(sNull))
+		, mpEnd(const_cast<value_type *>(sNull))
+	{
+	}
+
+	explicit VDStringSpanA(const value_type *s)
+		: mpBegin(const_cast<value_type *>(s))
+		, mpEnd(const_cast<value_type *>(s) + strlen(s))
+	{
+	}
+
+	VDStringSpanA(const value_type *s, const value_type *t)
+		: mpBegin(const_cast<value_type *>(s))
+		, mpEnd(const_cast<value_type *>(t))
+	{
+	}
+
+	// 21.3.2 iterators
+	const_iterator		begin() const		{ return mpBegin; }
+	const_iterator		end() const			{ return mpEnd; }
+
+	// 21.3.3 capacity
+	size_type			size() const		{ return mpEnd - mpBegin; }
+	size_type			length() const		{ return mpEnd - mpBegin; }
+	bool				empty() const		{ return mpBegin == mpEnd; }
+
+	// 21.3.4 element access
+	const_reference		operator[](size_type pos) const	{ VDASSERT(pos < (size_type)(mpEnd - mpBegin)); return mpBegin[pos]; }
+	const_reference		at(size_type pos) const			{ VDASSERT(pos < (size_type)(mpEnd - mpBegin)); return mpBegin[pos]; }
+
+	const_reference		front() const		{ VDASSERT(mpBegin != mpEnd); return *mpBegin; }
+	const_reference		back() const		{ VDASSERT(mpBegin != mpEnd); return mpEnd[-1]; }
+
+	// 21.3.6 string operations
+	const_pointer		data() const		{ return mpBegin; }
+
+	size_type copy(value_type *dst, size_type n, size_type pos = 0) const {
+		size_type len = (size_type)(mpEnd - mpBegin);
+		VDASSERT(pos <= len);
+
+		len -= pos;
+		if (n > len)
+			n = len;
+
+		memcpy(dst, mpBegin + pos, n*sizeof(value_type));
+		return n;
+	}
+
+	size_type find(value_type c, size_type pos = 0) const {
+		VDASSERT(pos <= (size_type)(mpEnd - mpBegin));
+		const void *p = memchr(mpBegin + pos, c, mpEnd - (mpBegin + pos));
+
+		return p ? (const value_type *)p - mpBegin : npos;
+	}
+
+	int compare(const VDStringSpanA& s) const {
+		size_type l1 = mpEnd - mpBegin;
+		size_type l2 = s.mpEnd - s.mpBegin;
+		size_type lm = l1 < l2 ? l1 : l2;
+
+		int r = memcmp(mpBegin, s.mpBegin, lm);
+
+		if (!r)
+			r = (int)mpBegin[lm] - (int)s.mpBegin[lm];
+
+		return r;
+	}
+
+	const VDStringSpanA trim(const value_type *s) const {
+		bool flags[256]={false};
+
+		while(value_type c = *s++)
+			flags[(unsigned char)c] = true;
+
+		const value_type *p = mpBegin;
+		const value_type *q = mpEnd;
+
+		while(p != q && flags[*p])
+			++p;
+
+		while(p != q && flags[q[-1]])
+			--q;
+
+		return VDStringSpanA(p, q);
+	}
+
+	const VDStringSpanA subspan(size_type pos = 0, size_type n = npos) const {
+		
+		size_type len = (size_type)(mpEnd - mpBegin);
+		VDASSERT(pos <= len);
+
+		len -= pos;
+		if (n > len)
+			n = len;
+
+		value_type *p = mpBegin + pos;
+		return VDStringSpanA(p, p+n);
+	}
+
+protected:
+	friend bool operator==(const VDStringSpanA& x, const VDStringSpanA& y);
+	friend bool operator==(const VDStringSpanA& x, const char *y);
+
+	value_type *mpBegin;
+	value_type *mpEnd;
+
+	static const value_type sNull[1];
+};
+
+inline bool operator==(const VDStringSpanA& x, const VDStringSpanA& y) { VDStringSpanA::size_type len = (VDStringSpanA::size_type)(x.mpEnd - x.mpBegin); return len == (VDStringSpanA::size_type)(y.mpEnd - y.mpBegin) && !memcmp(x.mpBegin, y.mpBegin, len*sizeof(char)); }
+inline bool operator==(const VDStringSpanA& x, const char *y) { size_t len = strlen(y); return len == (size_t)(x.mpEnd - x.mpBegin) && !memcmp(x.mpBegin, y, len*sizeof(char)); }
+inline bool operator==(const char *x, const VDStringSpanA& y) { return y == x; }
+
+inline bool operator!=(const VDStringSpanA& x, const VDStringSpanA& y) { return !(x == y); }
+inline bool operator!=(const VDStringSpanA& x, const char *y) { return !(x == y); }
+inline bool operator!=(const char *x, const VDStringSpanA& y) { return !(y == x); }
+
+inline bool operator<(const VDStringSpanA& x, const VDStringSpanA& y) {
+	return x.compare(y) < 0;
+}
+
+inline bool operator>(const VDStringSpanA& x, const VDStringSpanA& y) {
+	return x.compare(y) > 0;
+}
+
+inline bool operator<=(const VDStringSpanA& x, const VDStringSpanA& y) {
+	return x.compare(y) <= 0;
+}
+
+inline bool operator>=(const VDStringSpanA& x, const VDStringSpanA& y) {
+	return x.compare(y) >= 0;
+}
+
+class VDStringRefA : public VDStringSpanA {
+public:
+	typedef VDStringRefA this_type;
+
+	VDStringRefA()  {
+	}
+
+	explicit VDStringRefA(const value_type *s)
+		: VDStringSpanA(s)
+	{
+	}
+
+	explicit VDStringRefA(const VDStringSpanA& s)
+		: VDStringSpanA(s)
+	{
+	}
+
+	VDStringRefA(const value_type *s, const value_type *t)
+		: VDStringSpanA(s, t)
+	{
+	}
+
+	this_type& operator=(const value_type *s) {
+		assign(s);
+		return *this;
+	}
+
+	this_type& operator=(const VDStringSpanA& str) {
+		assign(str);
+		return *this;
+	}
+
+	void assign(const value_type *s) {
+		static_cast<VDStringSpanA&>(*this) = VDStringSpanA(s);
+	}
+
+	void assign(const value_type *s, const value_type *t) {
+		static_cast<VDStringSpanA&>(*this) = VDStringSpanA(s, t);
+	}
+
+	void assign(const VDStringSpanA& s) {
+		static_cast<VDStringSpanA&>(*this) = s;
+	}
+
+	bool split(value_type c, VDStringRefA& token) {
+		size_type pos = find(c);
+
+		if (pos == npos)
+			return false;
+
+		token = subspan(0, pos);
+		mpBegin += pos+1;
+		return true;
+	}
+};
+
+class VDStringA : public VDStringSpanA {
+public:
+	typedef VDStringA				this_type;
+
+	// 21.3.1 construct/copy/destroy
+
+	VDStringA()
+		: mpEOS(const_cast<value_type *>(sNull))
+	{
+	}
+
+	VDStringA(const VDStringSpanA& x)
+		: mpEOS(const_cast<value_type *>(sNull))
+	{
+		assign(x.begin(), x.end());
+	}
+
+	VDStringA(const this_type& x)
+		: mpEOS(const_cast<value_type *>(sNull))
+	{
+		assign(x);
+	}
+
+	explicit VDStringA(const value_type *s)
+		: mpEOS(const_cast<value_type *>(sNull))
+	{
+		assign(s);
+	}
+
+	explicit VDStringA(size_type n)
+		: mpEOS(const_cast<value_type *>(sNull))
+	{
+		resize(n);
+	}
+
+	VDStringA(const value_type *s, size_type n)
+		: mpEOS(const_cast<value_type *>(sNull))
+	{
+		assign(s, n);
+	}
+
+	VDStringA(const value_type *s, const value_type *t)
+		: mpEOS(const_cast<value_type *>(sNull))
+	{
+		assign(s, t);
+	}
+
+	~VDStringA() {
+		if (mpBegin != sNull)
+			delete[] mpBegin;
+	}
+
+	this_type& operator=(const value_type *s) {
+		assign(s);
+		return *this;
+	}
+
+	this_type& operator=(const this_type& str) {
+		assign(str);
+		return *this;
+	}
+
+	this_type& operator=(const VDStringSpanA& str) {
+		assign(str);
+		return *this;
+	}
+
+	// 21.3.2 iterators
+	using VDStringSpanA::begin;
+	using VDStringSpanA::end;
+
+	iterator			begin()				{ return mpBegin; }
+	iterator			end()				{ return mpEnd; }
+
+	// 21.3.3 capacity (COMPLETE)
+	void resize(size_type n) {
+		size_type current = (size_type)(mpEnd - mpBegin);
+
+		if (n < current) {
+			mpEnd = mpBegin + n;
+			mpEnd[0] = 0;
+		} else if (n > current)
+			resize_slow(n, current);
+	}
+
+	void resize(size_type n, value_type v) {
+		size_type current = (size_type)(mpEnd - mpBegin);
+
+		if (n < current) {
+			mpEnd = mpBegin + n;
+			mpEnd[0] = 0;
+		} else if (n > current)
+			resize_slow(n, current, v);
+	}
+
+	size_type			capacity() const	{ return mpEOS - mpBegin; }
+
+	void reserve(size_t n) {
+		size_type current = (size_type)(mpEOS - mpBegin);
+
+		if (n > current)
+			reserve_slow(n, current);
+	}
+
+	void clear() {
+		if (mpEnd != mpBegin) {
+			mpEnd = mpBegin;
+			mpEnd[0] = 0;
+		}
+	}
+
+	// 21.3.4 element access
+	using VDStringSpanA::operator[];
+	using VDStringSpanA::at;
+	using VDStringSpanA::front;
+	using VDStringSpanA::back;
+
+	reference			operator[](size_type pos)		{ VDASSERT(pos < (size_type)(mpEnd - mpBegin)); return mpBegin[pos]; }
+	reference			at(size_type pos)				{ VDASSERT(pos < (size_type)(mpEnd - mpBegin)); return mpBegin[pos]; }
+	reference			front()				{ VDASSERT(mpBegin != mpEnd); return *mpBegin; }
+	reference			back()				{ VDASSERT(mpBegin != mpEnd); return mpEnd[-1]; }
+
+	// 21.3.5 modifiers
+	this_type& operator+=(const this_type& str) {
+		return append(str.mpBegin, str.mpEnd);
+	}
+
+	this_type& operator+=(const value_type *s) {
+		return append(s, s+strlen(s));
+	}
+
+	this_type& operator+=(value_type c) {
+		if (mpEnd == mpEOS)
+			push_back_extend();
+
+		*mpEnd++ = c;
+		*mpEnd = 0;
+		return *this;
+	}
+
+	this_type& append(const this_type& str) {
+		return append(str.mpBegin, str.mpEnd);
+	}
+
+	this_type& append(const this_type& str, size_type pos, size_type n) {
+		size_type len = (size_type)(str.mpEnd - str.mpBegin);
+		VDASSERT(pos <= len);
+
+		len -= pos;
+		if (n > len)
+			n = len;
+
+		return append(str.mpBegin + pos, str.mpBegin + pos + n);
+	}
+
+	this_type& append(const value_type *s, size_type n) {
+		return append(s, s+n);
+	}
+
+	this_type& append(const value_type *s) {
+		return append(s, s+strlen(s));
+	}
+
+	this_type& append(const value_type *s, const value_type *t) {
+		if (s != t) {
+			size_type current_size = (size_type)(mpEnd - mpBegin);
+			size_type current_capacity = (size_type)(mpEOS - mpBegin);
+			size_type n = (size_type)(t - s);
+
+			if (current_capacity - current_size < n)
+				reserve_amortized_slow(n, current_size, current_capacity);
+
+			memcpy(mpBegin + current_size, s, n*sizeof(value_type));
+			mpEnd += n;
+			*mpEnd = 0;
+		}
+		return *this;
+	}
+
+	void push_back(const value_type c) {
+		if (mpEnd == mpEOS)
+			push_back_extend();
+
+		*mpEnd++ = c;
+		*mpEnd = 0;
+	}
+
+	this_type& assign(const VDStringSpanA& str) {
+		return assign(str.begin(), str.end());
+	}
+
+	this_type& assign(const this_type& str) {
+		return assign(str.mpBegin, str.mpEnd);
+	}
+
+	this_type& assign(const this_type& str, size_type pos, size_type n) {
+		size_type len = (size_type)(str.mpEnd - str.mpBegin);
+		VDASSERT(pos <= len);
+
+		len -= pos;
+		if (n > len)
+			n = len;
+
+		return assign(str.mpBegin + pos, str.mpBegin + pos + n);
+	}
+
+	this_type& assign(const value_type *s, size_type n) {
+		return assign(s, s+n);
+	}
+
+	this_type& assign(const value_type *s) {
+		return assign(s, s+strlen(s));
+	}
+
+	this_type& assign(size_type n, value_type c) {
+		size_type current_capacity = (size_type)(mpEOS - mpBegin);
+
+		if (current_capacity < n)
+			reserve_slow(n, current_capacity);
+
+		if (mpBegin != sNull) {
+			mpEnd = mpBegin;
+			while(n--)
+				*mpEnd++ = c;
+		}
+
+		return *this;
+	}
+
+	this_type& assign(const value_type *s, const value_type *t) {
+		size_type current_capacity = (size_type)(mpEOS - mpBegin);
+		size_type n = (size_type)(t - s);
+
+		if (current_capacity < n)
+			reserve_slow(n, current_capacity);
+
+		if (mpBegin != sNull) {
+			memcpy(mpBegin, s, sizeof(value_type)*n);
+			mpEnd = mpBegin + n;
+			*mpEnd = 0;
+		}
+
+		return *this;
+	}
+
+	this_type& insert(iterator it, value_type c) {
+		if (mpEnd == mpEOS) {
+			size_type pos = (size_type)(it - mpBegin);
+			push_back_extend();
+			it = mpBegin + pos;
+		}
+
+		memmove(it + 1, it, (mpEnd - it + 1)*sizeof(value_type));
+		*it = c;
+		++mpEnd;
+		return *this;
+	}
+
+	this_type& erase(size_type pos = 0, size_type n = npos) {
+		size_type len = (size_type)(mpEnd - mpBegin);
+
+		VDASSERT(pos <= len);
+		len -= pos;
+		if (n > len)
+			n = len;
+
+		if (n) {
+			size_type pos2 = pos + n;
+			memmove(mpBegin + pos, mpBegin + pos2, (len + 1 - n)*sizeof(value_type));
+			mpEnd -= n;
+		}
+
+		return *this;
+	}
+
+	iterator erase(iterator x) {
+		VDASSERT(x != mpEnd);
+
+		memmove(x, x+1, (mpEnd - x)*sizeof(value_type));
+		--mpEnd;
+		return x;
+	}
+
+	iterator erase(iterator first, iterator last) {
+		VDASSERT(last >= first);
+
+		memmove(first, last, ((mpEnd - last) + 1)*sizeof(value_type));
+		mpEnd -= (last - first);
+		return first;
+	}
+
+	this_type& replace(size_type pos, size_type n1, const value_type *s, size_type n2) {
+		size_type len = (size_type)(mpEnd - mpBegin);
+
+		VDASSERT(pos <= len);
+		size_type limit = len - pos;
+		if (n1 > limit)
+			n1 = limit;
+
+		size_type len2 = len - n1 + n2;
+		size_type current_capacity = (size_type)(mpEOS - mpBegin);
+
+		if (current_capacity < len2)
+			reserve_slow(len2, current_capacity);
+
+		memmove(mpBegin + pos + n2, mpBegin + pos + n1, (limit - n1 + 1) * sizeof(value_type));
+		memcpy(mpBegin + pos, s, n2*sizeof(value_type));
+		mpEnd = mpBegin + len2;
+		return *this;
+	}
+
+	void swap(this_type& x) {
+		value_type *p;
+
+		p = mpBegin;	mpBegin = x.mpBegin;	x.mpBegin = p;
+		p = mpEnd;		mpEnd = x.mpEnd;		x.mpEnd = p;
+		p = mpEOS;		mpEOS = x.mpEOS;		x.mpEOS = p;
+	}
+
+	// 21.3.6 string operations
+	const_pointer		c_str() const		{ return mpBegin; }
+
+	this_type& sprintf(const value_type *format, ...);
+	this_type& append_sprintf(const value_type *format, ...);
+	this_type& append_vsprintf(const value_type *format, va_list val);
+
+protected:
+	void push_back_extend();
+	void resize_slow(size_type n, size_type current_size);
+	void resize_slow(size_type n, size_type current_size, value_type c);
+	void reserve_slow(size_type n, size_type current_capacity);
+	void reserve_amortized_slow(size_type n, size_type current_size, size_type current_capacity);
+
+	char *mpEOS;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+inline VDStringA operator+(const VDStringA& str, const VDStringA& s) {
+	VDStringA result;
+	result.reserve(str.size() + s.size());
+	result.assign(str);
+	result.append(s);
+	return result;
+}
+
+inline VDStringA operator+(const VDStringA& str, const char *s) {
+	VDStringA result;
+	result.reserve(str.size() + strlen(s));
+	result.assign(str);
+	result.append(s);
+	return result;
+}
+
+inline VDStringA operator+(const VDStringA& str, char c) {
+	VDStringA result;
+	result.reserve(str.size() + 1);
+	result.assign(str);
+	result += c;
+	return result;
+}
+
+namespace std {
+	template<>
+	struct less<VDStringA> : binary_function<VDStringA, VDStringA, bool> {
+		bool operator()(const VDStringA& x, const VDStringA& y) const {
+			return x.compare(y) < 0;
+		}
+	};
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDStringSpanW {
+public:
+	typedef wchar_t					value_type;
+	typedef uint32					size_type;
+	typedef ptrdiff_t				difference_type;
+	typedef value_type&				reference;
+	typedef const value_type&		const_reference;
+	typedef value_type *			pointer;
+	typedef const value_type *		const_pointer;
+	typedef pointer					iterator;
+	typedef const_pointer			const_iterator;
+
+	static const size_type npos = (size_type)-1;
+
+	VDStringSpanW() 
+		: mpBegin(const_cast<value_type *>(sNull))
+		, mpEnd(const_cast<value_type *>(sNull))
+	{
+	}
+
+	explicit VDStringSpanW(const value_type *s)
+		: mpBegin(const_cast<value_type *>(s))
+		, mpEnd(const_cast<value_type *>(s) + wcslen(s))
+	{
+	}
+
+	VDStringSpanW(const value_type *s, const value_type *t)
+		: mpBegin(const_cast<value_type *>(s))
+		, mpEnd(const_cast<value_type *>(t))
+	{
+	}
+
+	// 21.3.2 iterators
+	const_iterator		begin() const		{ return mpBegin; }
+	const_iterator		end() const			{ return mpEnd; }
+
+	// 21.3.3 capacity
+	size_type			size() const		{ return mpEnd - mpBegin; }
+	size_type			length() const		{ return mpEnd - mpBegin; }
+	bool				empty() const		{ return mpBegin == mpEnd; }
+
+	// 21.3.4 element access
+	const_reference		operator[](size_type pos) const	{ VDASSERT(pos < (size_type)(mpEnd - mpBegin)); return mpBegin[pos]; }
+	const_reference		at(size_type pos) const			{ VDASSERT(pos < (size_type)(mpEnd - mpBegin)); return mpBegin[pos]; }
+
+	const_reference		front() const		{ VDASSERT(mpBegin != mpEnd); return *mpBegin; }
+	const_reference		back() const		{ VDASSERT(mpBegin != mpEnd); return mpEnd[-1]; }
+
+	// 21.3.6 string operations
+	const_pointer		data() const		{ return mpBegin; }
+
+	size_type copy(value_type *dst, size_type n, size_type pos = 0) const {
+		size_type len = (size_type)(mpEnd - mpBegin);
+		VDASSERT(pos <= len);
+
+		len -= pos;
+		if (n > len)
+			n = len;
+
+		memcpy(dst, mpBegin + pos, n*sizeof(value_type));
+		return n;
+	}
+
+	size_type find(value_type c, size_type pos = 0) const {
+		VDASSERT(pos <= (size_type)(mpEnd - mpBegin));
+		const void *p = wmemchr(mpBegin + pos, c, mpEnd - (mpBegin + pos));
+
+		return p ? (const value_type *)p - mpBegin : npos;
+	}
+
+	// extensions
+	const VDStringSpanW subspan(size_type pos, size_type n) const {
+		size_type len = (size_type)(mpEnd - mpBegin);
+		VDASSERT(pos <= len);
+
+		len -= pos;
+		if (n > len)
+			n = len;
+
+		value_type *p = mpBegin + pos;
+		return VDStringSpanW(p, p+n);
+	}
+
+protected:
+	friend bool operator==(const VDStringSpanW& x, const VDStringSpanW& y);
+	friend bool operator==(const VDStringSpanW& x, const wchar_t *y);
+
+	value_type *mpBegin;
+	value_type *mpEnd;
+
+	static const value_type sNull[1];
+};
+
+inline bool operator==(const VDStringSpanW& x, const VDStringSpanW& y) { VDStringA::size_type len = (VDStringSpanW::size_type)(x.mpEnd - x.mpBegin); return len == (VDStringSpanW::size_type)(y.mpEnd - y.mpBegin) && !memcmp(x.mpBegin, y.mpBegin, len*sizeof(wchar_t)); }
+inline bool operator==(const VDStringSpanW& x, const wchar_t *y) { size_t len = wcslen(y); return len == (size_t)(x.mpEnd - x.mpBegin) && !memcmp(x.mpBegin, y, len*sizeof(wchar_t)); }
+inline bool operator==(const wchar_t *x, const VDStringSpanW& y) { return y == x; }
+
+inline bool operator!=(const VDStringSpanW& x, const VDStringSpanW& y) { return !(x == y); }
+inline bool operator!=(const VDStringSpanW& x, const wchar_t *y) { return !(x == y); }
+inline bool operator!=(const wchar_t *x, const VDStringSpanW& y) { return !(y == x); }
+
+class VDStringRefW : public VDStringSpanW {
+public:
+	typedef VDStringRefW this_type;
+
+	VDStringRefW()  {
+	}
+
+	explicit VDStringRefW(const value_type *s)
+		: VDStringSpanW(s)
+	{
+	}
+
+	explicit VDStringRefW(const VDStringSpanW& s)
+		: VDStringSpanW(s)
+	{
+	}
+
+	VDStringRefW(const value_type *s, const value_type *t)
+		: VDStringSpanW(s, t)
+	{
+	}
+
+	this_type& operator=(const value_type *s) {
+		assign(s);
+		return *this;
+	}
+
+	this_type& operator=(const VDStringSpanW& str) {
+		assign(str);
+		return *this;
+	}
+
+	void assign(const value_type *s) {
+		static_cast<VDStringSpanW&>(*this) = VDStringSpanW(s);
+	}
+
+	void assign(const value_type *s, const value_type *t) {
+		static_cast<VDStringSpanW&>(*this) = VDStringSpanW(s, t);
+	}
+
+	void assign(const VDStringSpanW& s) {
+		static_cast<VDStringSpanW&>(*this) = s;
+	}
+
+	bool split(value_type c, VDStringRefW& token) {
+		size_type pos = find(c);
+
+		if (pos == npos)
+			return false;
+
+		token = subspan(0, pos);
+		mpBegin += pos+1;
+		return true;
+	}
+};
+
+class VDStringW : public VDStringSpanW {
+public:
+	typedef VDStringW				this_type;
+
+	// 21.3.1 construct/copy/destroy
+
+	VDStringW()
+		: mpEOS(const_cast<value_type *>(sNull))
+	{
+	}
+
+	VDStringW(const VDStringSpanW& x)
+		: mpEOS(const_cast<value_type *>(sNull))
+	{
+		assign(x.begin(), x.end());
+	}
+
+	VDStringW(const this_type& x)
+		: mpEOS(const_cast<value_type *>(sNull))
+	{
+		assign(x);
+	}
+
+	explicit VDStringW(const value_type *s)
+		: mpEOS(const_cast<value_type *>(sNull))
+	{
+		assign(s);
+	}
+
+	explicit VDStringW(size_type n)
+		: mpEOS(const_cast<value_type *>(sNull))
+	{
+		resize(n);
+	}
+
+	VDStringW(const value_type *s, size_type n)
+		: mpEOS(const_cast<value_type *>(sNull))
+	{
+		assign(s, n);
+	}
+
+	VDStringW(const value_type *s, const value_type *t)
+		: mpEOS(const_cast<value_type *>(sNull))
+	{
+		assign(s, t);
+	}
+
+	~VDStringW() {
+		if (mpBegin != sNull)
+			delete[] mpBegin;
+	}
+
+	this_type& operator=(const wchar_t *s) {
+		assign(s);
+		return *this;
+	}
+
+	this_type& operator=(const this_type& str) {
+		assign(str);
+		return *this;
+	}
+
+	// 21.3.2 iterators
+	using VDStringSpanW::begin;
+	using VDStringSpanW::end;
+	iterator			begin()				{ return mpBegin; }
+	iterator			end()				{ return mpEnd; }
+
+	// 21.3.3 capacity (COMPLETE)
+	void resize(size_type n) {
+		size_type current = (size_type)(mpEnd - mpBegin);
+
+		if (n < current) {
+			mpEnd = mpBegin + n;
+			mpEnd[0] = 0;
+		} else if (n > current)
+			resize_slow(n, current);
+	}
+
+	void resize(size_type n, value_type v) {
+		size_type current = (size_type)(mpEnd - mpBegin);
+
+		if (n < current) {
+			mpEnd = mpBegin + n;
+			mpEnd[0] = 0;
+		} else if (n > current)
+			resize_slow(n, current);
+		wmemset(mpBegin, v, n);
+	}
+
+	size_type			capacity() const	{ return mpEOS - mpBegin; }
+
+	void reserve(size_t n) {
+		size_type current = (size_type)(mpEOS - mpBegin);
+
+		if (n > current)
+			reserve_slow(n, current);
+	}
+
+	void clear() {
+		if (mpEnd != mpBegin) {
+			mpEnd = mpBegin;
+			mpEnd[0] = 0;
+		}
+	}
+
+	// 21.3.4 element access
+	using VDStringSpanW::operator[];
+	using VDStringSpanW::at;
+	using VDStringSpanW::front;
+	using VDStringSpanW::back;
+	reference			operator[](size_type pos)		{ VDASSERT(pos < (size_type)(mpEnd - mpBegin)); return mpBegin[pos]; }
+	reference			at(size_type pos)				{ VDASSERT(pos < (size_type)(mpEnd - mpBegin)); return mpBegin[pos]; }
+	reference			front()				{ VDASSERT(mpBegin != mpEnd); return *mpBegin; }
+	reference			back()				{ VDASSERT(mpBegin != mpEnd); return mpEnd[-1]; }
+
+	// 21.3.5 modifiers
+	this_type& operator+=(const this_type& str) {
+		return append(str.mpBegin, str.mpEnd);
+	}
+
+	this_type& operator+=(const value_type *s) {
+		return append(s, s+wcslen(s));
+	}
+
+	this_type& operator+=(value_type c) {
+		if (mpEnd == mpEOS)
+			push_back_extend();
+
+		*mpEnd++ = c;
+		*mpEnd = 0;
+		return *this;
+	}
+
+	this_type& append(const this_type& str) {
+		return append(str.mpBegin, str.mpEnd);
+	}
+
+	this_type& append(const this_type& str, size_type pos, size_type n) {
+		size_type len = (size_type)(str.mpEnd - str.mpBegin);
+		VDASSERT(pos <= len);
+
+		len -= pos;
+		if (n > len)
+			n = len;
+
+		return append(str.mpBegin + pos, str.mpBegin + pos + n);
+	}
+
+	this_type& append(const value_type *s, size_type n) {
+		return append(s, s+n);
+	}
+
+	this_type& append(const value_type *s) {
+		return append(s, s+wcslen(s));
+	}
+
+	this_type& append(const value_type *s, const value_type *t) {
+		if (s != t) {
+			size_type current_size = (size_type)(mpEnd - mpBegin);
+			size_type current_capacity = (size_type)(mpEOS - mpBegin);
+			size_type n = (size_type)(t - s);
+
+			if (current_capacity - current_size < n)
+				reserve_amortized_slow(n, current_size, current_capacity);
+
+			memcpy(mpBegin + current_size, s, n*sizeof(value_type));
+			mpEnd += n;
+			*mpEnd = 0;
+		}
+		return *this;
+	}
+
+	void push_back(const value_type c) {
+		if (mpEnd == mpEOS)
+			push_back_extend();
+
+		*mpEnd++ = c;
+		*mpEnd = 0;
+	}
+
+	this_type& assign(const this_type& str) {
+		return assign(str.mpBegin, str.mpEnd);
+	}
+
+	this_type& assign(const this_type& str, size_type pos, size_type n) {
+		size_type len = (size_type)(str.mpEnd - str.mpBegin);
+		VDASSERT(pos <= len);
+
+		len -= pos;
+		if (n > len)
+			n = len;
+
+		return assign(str.mpBegin + pos, str.mpBegin + pos + n);
+	}
+
+	this_type& assign(const value_type *s, size_type n) {
+		return assign(s, s+n);
+	}
+
+	this_type& assign(const value_type *s) {
+		return assign(s, s+wcslen(s));
+	}
+
+	this_type& assign(size_type n, value_type c) {
+		size_type current_capacity = (size_type)(mpEOS - mpBegin);
+
+		if (current_capacity < n)
+			reserve_slow(n, current_capacity);
+
+		if (mpBegin != sNull) {
+			mpEnd = mpBegin;
+			while(n--)
+				*mpEnd++ = c;
+		}
+
+		return *this;
+	}
+
+	this_type& assign(const value_type *s, const value_type *t) {
+		size_type current_capacity = (size_type)(mpEOS - mpBegin);
+		size_type n = (size_type)(t - s);
+
+		if (current_capacity < n)
+			reserve_slow(n, current_capacity);
+
+		if (mpBegin != sNull) {
+			memcpy(mpBegin, s, sizeof(value_type)*n);
+			mpEnd = mpBegin + n;
+			*mpEnd = 0;
+		}
+
+		return *this;
+	}
+
+	this_type& insert(iterator it, value_type c) {
+		if (mpEnd == mpEOS) {
+			size_type pos = (size_type)(it - mpBegin);
+			push_back_extend();
+			it = mpBegin + pos;
+		}
+
+		memmove(it + 1, it, (mpEnd - it + 1)*sizeof(value_type));
+		*it = c;
+		++mpEnd;
+		return *this;
+	}
+
+	this_type& erase(size_type pos = 0, size_type n = npos) {
+		size_type len = (size_type)(mpEnd - mpBegin);
+
+		VDASSERT(pos <= len);
+		len -= pos;
+		if (n > len)
+			n = len;
+
+		if (n) {
+			size_type pos2 = pos + n;
+			memmove(mpBegin + pos, mpBegin + pos2, (len + 1 - n)*sizeof(value_type));
+			mpEnd -= n;
+		}
+
+		return *this;
+	}
+
+	iterator erase(iterator x) {
+		VDASSERT(x != mpEnd);
+
+		memmove(x, x+1, (mpEnd - x)*sizeof(value_type));
+		--mpEnd;
+		return x;
+	}
+
+	iterator erase(iterator first, iterator last) {
+		VDASSERT(last >= first);
+
+		memmove(first, last, ((mpEnd - last) + 1)*sizeof(value_type));
+		mpEnd -= (last - first);
+		return first;
+	}
+
+	this_type& replace(size_type pos, size_type n1, const value_type *s, size_type n2) {
+		size_type len = (size_type)(mpEnd - mpBegin);
+
+		VDASSERT(pos <= len);
+		size_type limit = len - pos;
+		if (n1 > limit)
+			n1 = limit;
+
+		size_type len2 = len - n1 + n2;
+		size_type current_capacity = (size_type)(mpEOS - mpBegin);
+
+		if (current_capacity < len2)
+			reserve_slow(len2, current_capacity);
+
+		memmove(mpBegin + pos + n2, mpBegin + pos + n1, (limit - n1 + 1) * sizeof(value_type));
+		memcpy(mpBegin + pos, s, n2*sizeof(value_type));
+		mpEnd = mpBegin + len2;
+		return *this;
+	}
+
+	void swap(this_type& x) {
+		value_type *p;
+
+		p = mpBegin;	mpBegin = x.mpBegin;	x.mpBegin = p;
+		p = mpEnd;		mpEnd = x.mpEnd;		x.mpEnd = p;
+		p = mpEOS;		mpEOS = x.mpEOS;		x.mpEOS = p;
+	}
+
+	// 21.3.6 string operations
+	const_pointer		c_str() const		{ return mpBegin; }
+
+	this_type& sprintf(const value_type *format, ...);
+	this_type& append_sprintf(const value_type *format, ...);
+	this_type& append_vsprintf(const value_type *format, va_list val);
+
+protected:
+	void push_back_extend();
+	void resize_slow(size_type n, size_type current_size);
+	void reserve_slow(size_type n, size_type current_capacity);
+	void reserve_amortized_slow(size_type n, size_type current_size, size_type current_capacity);
+
+	value_type *mpEOS;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+inline VDStringW operator+(const VDStringW& str, const VDStringW& s) {
+	VDStringW result;
+	result.reserve(str.size() + s.size());
+	result.assign(str);
+	result.append(s);
+	return result;
+}
+
+inline VDStringW operator+(const VDStringW& str, const wchar_t *s) {
+	VDStringW result;
+	result.reserve(str.size() + wcslen(s));
+	result.assign(str);
+	result.append(s);
+	return result;
+}
+
+inline VDStringW operator+(const VDStringW& str, wchar_t c) {
+	VDStringW result;
+	result.reserve(str.size() + 1);
+	result.assign(str);
+	result += c;
+	return result;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+typedef VDStringA				VDString;
+
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/atomic.h b/src/thirdparty/VirtualDub/h/vd2/system/atomic.h
new file mode 100644
index 000000000..a7c2eb532
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/atomic.h
@@ -0,0 +1,282 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_ATOMIC_H
+#define f_VD2_SYSTEM_ATOMIC_H
+
+#include <vd2/system/vdtypes.h>
+
+// Intrinsics available in VC6.0
+extern "C" long __cdecl _InterlockedDecrement(volatile long *p);
+extern "C" long __cdecl _InterlockedIncrement(volatile long *p);
+extern "C" long __cdecl _InterlockedCompareExchange(volatile long *p, long n, long p_compare);
+extern "C" long __cdecl _InterlockedExchange(volatile long *p, long n);
+extern "C" long __cdecl _InterlockedExchangeAdd(volatile long *p, long n);
+
+#pragma intrinsic(_InterlockedDecrement)
+#pragma intrinsic(_InterlockedIncrement)
+#pragma intrinsic(_InterlockedCompareExchange)
+#pragma intrinsic(_InterlockedExchange)
+#pragma intrinsic(_InterlockedExchangeAdd)
+
+// Intrinsics available in VC7.1. Note that the compiler is smart enough to
+// use straight LOCK AND/OR/XOR if the return value is not needed; otherwise
+// it uses a LOCK CMPXCHG loop.
+#if _MSC_VER >= 1310
+	extern "C" long __cdecl _InterlockedAnd(volatile long *p, long n);
+	extern "C" long __cdecl _InterlockedOr(volatile long *p, long n);
+	extern "C" long __cdecl _InterlockedXor(volatile long *p, long n);
+
+	#pragma intrinsic(_InterlockedAnd)
+	#pragma intrinsic(_InterlockedOr)
+	#pragma intrinsic(_InterlockedXor)
+#endif
+
+// Intrinsics available with AMD64
+#ifdef _M_AMD64
+	extern "C" void *__cdecl _InterlockedExchangePointer(void *volatile *pp, void *p);
+	#pragma intrinsic(_InterlockedExchangePointer)
+	extern "C" void *__cdecl _InterlockedCompareExchangePointer(void *volatile *pp, void *p, void *compare);
+	#pragma intrinsic(_InterlockedCompareExchangePointer)
+#endif
+
+inline void *VDAtomicCompareExchangePointer(void *volatile *pp, void *p, void *compare) {
+#ifdef _M_AMD64
+	return _InterlockedCompareExchangePointer(pp, p, compare);
+#else
+	return (void *)(sintptr)_InterlockedCompareExchange((volatile long *)(volatile sintptr *)pp, (long)(sintptr)p, (long)(sintptr)compare);
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////////
+/// \class VDAtomicInt
+/// \brief Wrapped integer supporting thread-safe atomic operations.
+///
+/// VDAtomicInt allows integer values shared between threads to be
+/// modified with several common operations in a lock-less manner and
+/// without the need for explicit barriers. This is particularly useful
+/// for thread-safe reference counting.
+///
+class VDAtomicInt {
+protected:
+	volatile int n;
+
+public:
+	VDAtomicInt() {}
+	VDAtomicInt(int v) : n(v) {}
+
+	bool operator!() const { return !n; }
+	bool operator!=(volatile int v) const  { return n!=v; }
+	bool operator==(volatile int v) const { return n==v; }
+	bool operator<=(volatile int v) const { return n<=v; }
+	bool operator>=(volatile int v) const { return n>=v; }
+	bool operator<(volatile int v) const { return n<v; }
+	bool operator>(volatile int v) const { return n>v; }
+
+	///////////////////////////////
+
+	/// Atomically exchanges a value with an integer in memory.
+	static inline int staticExchange(volatile int *dst, int v) {
+		return (int)_InterlockedExchange((volatile long *)dst, v);
+	}
+
+	/// Atomically adds one to an integer in memory.
+	static inline void staticIncrement(volatile int *dst) {
+		_InterlockedExchangeAdd((volatile long *)dst, 1);
+	}
+
+	/// Atomically subtracts one from an integer in memory.
+	static inline void staticDecrement(volatile int *dst) {
+		_InterlockedExchangeAdd((volatile long *)dst, -1);
+	}
+
+	/// Atomically subtracts one from an integer in memory and returns
+	/// true if the result is zero.
+	static inline bool staticDecrementTestZero(volatile int *dst) {
+		return 1 == _InterlockedExchangeAdd((volatile long *)dst, -1);
+	}
+
+	/// Atomically adds a value to an integer in memory and returns the
+	/// result.
+	static inline int staticAdd(volatile int *dst, int v) {
+		return (int)_InterlockedExchangeAdd((volatile long *)dst, v) + v;
+	}
+
+	/// Atomically adds a value to an integer in memory and returns the
+	/// old result (post-add).
+	static inline int staticExchangeAdd(volatile int *dst, int v) {
+		return _InterlockedExchangeAdd((volatile long *)dst, v);
+	}
+
+	/// Atomically compares an integer in memory to a compare value and
+	/// swaps the memory location with a second value if the compare
+	/// succeeds. The return value is the memory value prior to the swap.
+	static inline int staticCompareExchange(volatile int *dst, int v, int compare) {
+		return _InterlockedCompareExchange((volatile long *)dst, v, compare);
+	}
+
+	///////////////////////////////
+
+	int operator=(int v) { return n = v; }
+
+	int operator++()		{ return staticAdd(&n, 1); }
+	int operator--()		{ return staticAdd(&n, -1); }
+	int operator++(int)		{ return staticExchangeAdd(&n, 1); }
+	int operator--(int)		{ return staticExchangeAdd(&n, -1); }
+	int operator+=(int v)	{ return staticAdd(&n, v); }
+	int operator-=(int v)	{ return staticAdd(&n, -v); }
+
+#if _MSC_VER >= 1310
+	void operator&=(int v)	{ _InterlockedAnd((volatile long *)&n, v); }	///< Atomic bitwise AND.
+	void operator|=(int v)	{ _InterlockedOr((volatile long *)&n, v); }		///< Atomic bitwise OR.
+	void operator^=(int v)	{ _InterlockedXor((volatile long *)&n, v); }	///< Atomic bitwise XOR.
+#else
+	/// Atomic bitwise AND.
+	void operator&=(int v) {
+		__asm mov eax,v
+		__asm mov ecx,this
+		__asm lock and dword ptr [ecx],eax
+	}
+
+	/// Atomic bitwise OR.
+	void operator|=(int v) {
+		__asm mov eax,v
+		__asm mov ecx,this
+		__asm lock or dword ptr [ecx],eax
+	}
+
+	/// Atomic bitwise XOR.
+	void operator^=(int v) {
+		__asm mov eax,v
+		__asm mov ecx,this
+		__asm lock xor dword ptr [ecx],eax
+	}
+#endif
+
+	operator int() const {
+		return n;
+	}
+
+	/// Atomic exchange.
+	int xchg(int v) {
+		return staticExchange(&n, v);
+	}
+
+	/// Compare/exchange (486+).
+	int compareExchange(int newValue, int oldValue) {
+		return staticCompareExchange(&n, newValue, oldValue);
+	}
+
+	// 486 only, but much nicer.  They return the actual result.
+
+	int inc()			{ return operator++(); }				///< Atomic increment.
+	int dec()			{ return operator--(); }				///< Atomic decrement.
+	int add(int v)		{ return operator+=(v); }				///< Atomic add.
+
+	// These return the result before the operation, which is more inline with
+	// what XADD allows us to do.
+
+	int postinc()		{ return operator++(0); }				///< Atomic post-increment.
+	int postdec()		{ return operator--(0); }				///< Atomic post-decrement.
+	int postadd(int v)	{ return staticExchangeAdd(&n, v); }	///< Atomic post-add.
+
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDAtomicFloat {
+protected:
+	volatile float n;
+
+public:
+	VDAtomicFloat() {}
+	VDAtomicFloat(float v) : n(v) {}
+
+	bool operator!=(float v) const  { return n!=v; }
+	bool operator==(float v) const { return n==v; }
+	bool operator<=(float v) const { return n<=v; }
+	bool operator>=(float v) const { return n>=v; }
+	bool operator<(float v) const { return n<v; }
+	bool operator>(float v) const { return n>v; }
+
+	float operator=(float v) { return n = v; }
+
+	operator float() const {
+		return n;
+	}
+
+	/// Atomic exchange.
+	float xchg(float v) {
+		union { int i; float f; } converter = {VDAtomicInt::staticExchange((volatile int *)&n, *(const int *)&v)};
+
+		return converter.f;
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////
+/// \class VDAtomicPtr
+/// \brief Wrapped pointer supporting thread-safe atomic operations.
+///
+/// VDAtomicPtr allows a shared pointer to be safely manipulated by
+/// multiple threads without locks. Note that atomicity is only guaranteed
+/// for the pointer itself, so any operations on the object must be dealt
+/// with in other manners, such as an inner lock or other atomic
+/// operations. An atomic pointer can serve as a single entry queue.
+///
+template<typename T>
+class VDAtomicPtr {
+protected:
+	T *volatile ptr;
+
+public:
+	VDAtomicPtr() {}
+	VDAtomicPtr(T *p) : ptr(p) { }
+
+	operator T*() const { return ptr; }
+	T* operator->() const { return ptr; }
+
+	T* operator=(T* p) {
+		return ptr = p;
+	}
+
+	/// Atomic pointer exchange.
+	T *xchg(T* p) {
+#ifdef _M_AMD64
+		return ptr == p ? p : (T *)_InterlockedExchangePointer((void *volatile *)&ptr, p);
+#else
+		return ptr == p ? p : (T *)_InterlockedExchange((volatile long *)&ptr, (long)p);
+#endif
+	}
+
+	T *compareExchange(T *newValue, T *oldValue) {
+#ifdef _M_AMD64
+		return (T *)_InterlockedCompareExchangePointer((void *volatile *)&ptr, (void *)newValue, (void *)oldValue);
+#else
+		return (T *)_InterlockedCompareExchange((volatile long *)&ptr, (long)(size_t)newValue, (long)(size_t)oldValue);
+#endif
+	}
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/binary.h b/src/thirdparty/VirtualDub/h/vd2/system/binary.h
new file mode 100644
index 000000000..66542a516
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/binary.h
@@ -0,0 +1,184 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_BINARY_H
+#define f_VD2_SYSTEM_BINARY_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+
+#define VDMAKEFOURCC(byte1, byte2, byte3, byte4) (((uint8)byte1) + (((uint8)byte2) << 8) + (((uint8)byte3) << 16) + (((uint8)byte4) << 24))
+
+#ifdef _MSC_VER
+	unsigned short _byteswap_ushort(unsigned short);
+	unsigned long _byteswap_ulong(unsigned long);
+	unsigned __int64 _byteswap_uint64(unsigned __int64);
+
+	#pragma intrinsic(_byteswap_ushort)
+	#pragma intrinsic(_byteswap_ulong)
+	#pragma intrinsic(_byteswap_uint64)
+
+	inline uint16 VDSwizzleU16(uint16 value) { return (uint16)_byteswap_ushort((unsigned short)value); }
+	inline sint16 VDSwizzleS16(sint16 value) { return (sint16)_byteswap_ushort((unsigned short)value); }
+	inline uint32 VDSwizzleU32(uint32 value) { return (uint32)_byteswap_ulong((unsigned long)value); }
+	inline sint32 VDSwizzleS32(sint32 value) { return (sint32)_byteswap_ulong((unsigned long)value); }
+	inline uint64 VDSwizzleU64(uint64 value) { return (uint32)_byteswap_uint64((unsigned __int64)value); }
+	inline sint64 VDSwizzleS64(sint64 value) { return (sint32)_byteswap_uint64((unsigned __int64)value); }
+#else
+	inline uint16 VDSwizzleU16(uint16 value) {
+		return (value >> 8) + (value >> 8);
+	}
+
+	inline sint16 VDSwizzleS16(sint16 value) {
+		return (sint16)(((uint16)value >> 8) + ((uint16)value >> 8));
+	}
+
+	inline uint32 VDSwizzleU32(uint32 value) {
+		return (value >> 24) + (value << 24) + ((value&0xff00)<<8) + ((value&0xff0000)>>8);
+	}
+
+	inline sint32 VDSwizzleS32(sint32 value) {
+		return (sint32)(((uint32)value >> 24) + ((uint32)value << 24) + (((uint32)value&0xff00)<<8) + (((uint32)value&0xff0000)>>8));
+	}
+
+	inline uint64 VDSwizzleU64(uint64 value) {
+		return	((value & 0xFF00000000000000) >> 56) +
+				((value & 0x00FF000000000000) >> 40) +
+				((value & 0x0000FF0000000000) >> 24) +
+				((value & 0x000000FF00000000) >>  8) +
+				((value & 0x00000000FF000000) <<  8) +
+				((value & 0x0000000000FF0000) << 24) +
+				((value & 0x000000000000FF00) << 40) +
+				((value & 0x00000000000000FF) << 56);
+	}
+
+	inline sint64 VDSwizzleS64(sint64 value) {
+		return (sint64)((((uint64)value & 0xFF00000000000000) >> 56) +
+						(((uint64)value & 0x00FF000000000000) >> 40) +
+						(((uint64)value & 0x0000FF0000000000) >> 24) +
+						(((uint64)value & 0x000000FF00000000) >>  8) +
+						(((uint64)value & 0x00000000FF000000) <<  8) +
+						(((uint64)value & 0x0000000000FF0000) << 24) +
+						(((uint64)value & 0x000000000000FF00) << 40) +
+						(((uint64)value & 0x00000000000000FF) << 56));
+	}
+#endif
+
+inline uint16 VDReadUnalignedU16(const void *p) { return *(uint16 *)p; }
+inline sint16 VDReadUnalignedS16(const void *p) { return *(sint16 *)p; }
+inline uint32 VDReadUnalignedU32(const void *p) { return *(uint32 *)p; }
+inline sint32 VDReadUnalignedS32(const void *p) { return *(sint32 *)p; }
+inline uint64 VDReadUnalignedU64(const void *p) { return *(uint64 *)p; }
+inline sint64 VDReadUnalignedS64(const void *p) { return *(sint64 *)p; }
+inline float VDReadUnalignedF(const void *p) { return *(float *)p; }
+inline double VDReadUnalignedD(const void *p) { return *(double *)p; }
+
+inline uint16 VDReadUnalignedLEU16(const void *p) { return *(uint16 *)p; }
+inline sint16 VDReadUnalignedLES16(const void *p) { return *(sint16 *)p; }
+inline uint32 VDReadUnalignedLEU32(const void *p) { return *(uint32 *)p; }
+inline sint32 VDReadUnalignedLES32(const void *p) { return *(sint32 *)p; }
+inline uint64 VDReadUnalignedLEU64(const void *p) { return *(uint64 *)p; }
+inline sint64 VDReadUnalignedLES64(const void *p) { return *(sint64 *)p; }
+inline float VDReadUnalignedLEF(const void *p) { return *(float *)p; }
+inline double VDReadUnalignedLED(const void *p) { return *(double *)p; }
+
+inline uint16 VDReadUnalignedBEU16(const void *p) { return VDSwizzleU16(*(uint16 *)p); }
+inline sint16 VDReadUnalignedBES16(const void *p) { return VDSwizzleS16(*(sint16 *)p); }
+inline uint32 VDReadUnalignedBEU32(const void *p) { return VDSwizzleU32(*(uint32 *)p); }
+inline sint32 VDReadUnalignedBES32(const void *p) { return VDSwizzleS32(*(sint32 *)p); }
+inline uint64 VDReadUnalignedBEU64(const void *p) { return VDSwizzleU64(*(uint64 *)p); }
+inline sint64 VDReadUnalignedBES64(const void *p) { return VDSwizzleS64(*(sint64 *)p); }
+inline float VDReadUnalignedBEF(const void *p) {
+	union {
+		uint32 i;
+		float f;
+	} conv = {VDSwizzleU32(*(const uint32 *)p)};
+	return conv.f;
+}
+inline double VDReadUnalignedBED(const void *p) {
+	union {
+		uint64 i;
+		double d;
+	} conv = {VDSwizzleU64(*(const uint32 *)p)};
+	return conv.d;
+}
+
+inline void VDWriteUnalignedU16  (void *p, uint16 v) { *(uint16 *)p = v; }
+inline void VDWriteUnalignedS16  (void *p, sint16 v) { *(sint16 *)p = v; }
+inline void VDWriteUnalignedU32  (void *p, uint32 v) { *(uint32 *)p = v; }
+inline void VDWriteUnalignedS32  (void *p, sint32 v) { *(sint32 *)p = v; }
+inline void VDWriteUnalignedU64  (void *p, uint64 v) { *(uint64 *)p = v; }
+inline void VDWriteUnalignedS64  (void *p, sint64 v) { *(sint64 *)p = v; }
+inline void VDWriteUnalignedF    (void *p, float  v) { *(float  *)p = v; }
+inline void VDWriteUnalignedD    (void *p, double v) { *(double *)p = v; }
+
+inline void VDWriteUnalignedLEU16(void *p, uint16 v) { *(uint16 *)p = v; }
+inline void VDWriteUnalignedLES16(void *p, sint16 v) { *(sint16 *)p = v; }
+inline void VDWriteUnalignedLEU32(void *p, uint32 v) { *(uint32 *)p = v; }
+inline void VDWriteUnalignedLES32(void *p, sint32 v) { *(sint32 *)p = v; }
+inline void VDWriteUnalignedLEU64(void *p, uint64 v) { *(uint64 *)p = v; }
+inline void VDWriteUnalignedLES64(void *p, sint64 v) { *(sint64 *)p = v; }
+inline void VDWriteUnalignedLEF  (void *p, float  v) { *(float  *)p = v; }
+inline void VDWriteUnalignedLED  (void *p, double v) { *(double *)p = v; }
+
+inline void VDWriteUnalignedBEU16(void *p, uint16 v) { *(uint16 *)p = VDSwizzleU16(v); }
+inline void VDWriteUnalignedBES16(void *p, sint16 v) { *(sint16 *)p = VDSwizzleS16(v); }
+inline void VDWriteUnalignedBEU32(void *p, uint32 v) { *(uint32 *)p = VDSwizzleU32(v); }
+inline void VDWriteUnalignedBES32(void *p, sint32 v) { *(sint32 *)p = VDSwizzleS32(v); }
+inline void VDWriteUnalignedBEU64(void *p, uint64 v) { *(uint64 *)p = VDSwizzleU64(v); }
+inline void VDWriteUnalignedBES64(void *p, sint64 v) { *(sint64 *)p = VDSwizzleS64(v); }
+inline void VDReadUnalignedBEF(void *p, float v) {
+	union {
+		float f;
+		uint32 i;
+	} conv = {v};
+	*(uint32 *)p = VDSwizzleU32(conv.i);
+}
+inline double VDReadUnalignedBED(void *p, double v) {
+	union {
+		double f;
+		uint64 i;
+	} conv = {v};
+	*(uint64 *)p = VDSwizzleU64(conv.i);
+}
+
+#define VDFromLE8(x)	(x)
+#define VDFromLE16(x)	(x)
+#define VDFromLE32(x)	(x)
+#define VDFromBE8(x)	VDSwizzleU8(x)
+#define VDFromBE16(x)	VDSwizzleU16(x)
+#define VDFromBE32(x)	VDSwizzleU32(x)
+
+#define VDToLE8(x)		(x)
+#define VDToLE16(x)		(x)
+#define VDToLE32(x)		(x)
+#define VDToBE8(x)		VDSwizzleU8(x)
+#define VDToBE16(x)		VDSwizzleU16(x)
+#define VDToBE32(x)		VDSwizzleU32(x)
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/bitmath.h b/src/thirdparty/VirtualDub/h/vd2/system/bitmath.h
new file mode 100644
index 000000000..fc1c185a7
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/bitmath.h
@@ -0,0 +1,75 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_BITMATH_H
+#define f_VD2_SYSTEM_BITMATH_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#ifndef f_VD2_SYSTEM_VDTYPES_H
+	#include <vd2/system/vdtypes.h>
+#endif
+
+int VDCountBits(uint32 v);
+int VDFindLowestSetBit(uint32 v);
+int VDFindHighestSetBit(uint32 v);
+uint32 VDCeilToPow2(uint32 v);
+
+///////////////////////////////////////////////////////////////////////////////
+
+#ifdef VD_COMPILER_MSVC_VC8
+	#include <intrin.h>
+	#pragma intrinsic(_BitScanForward)
+	#pragma intrinsic(_BitScanReverse)
+
+	inline int VDFindLowestSetBit(uint32 v) {
+		unsigned long index;
+		return _BitScanForward(&index, v) ? index : 32;
+	}
+
+	inline int VDFindHighestSetBit(uint32 v) {
+		unsigned long index;
+		return _BitScanReverse(&index, v) ? index : -1;
+	}
+
+	inline int VDFindLowestSetBitFast(uint32 v) {
+		unsigned long index;
+		_BitScanForward(&index, v);
+		return index;
+	}
+
+	inline int VDFindHighestSetBitFast(uint32 v) {
+		unsigned long index;
+		_BitScanReverse(&index, v);
+		return index;
+	}
+#else
+	#define VDFindLowestSetBitFast	VDFindLowestSetBit
+	#define VDFindHighestSetBitFast	VDFindHighestSetBit
+#endif
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/cache.h b/src/thirdparty/VirtualDub/h/vd2/system/cache.h
new file mode 100644
index 000000000..8fbdea7c2
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/cache.h
@@ -0,0 +1,325 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2005 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_CACHE_H
+#define f_VD2_SYSTEM_CACHE_H
+
+#include <vd2/system/thread.h>
+#include <vd2/system/vdstl.h>
+
+///////////////////////////////////////////////////////////////////////////
+
+struct vdhashmap_node {
+	vdhashmap_node *mpHashPrev;
+	vdhashmap_node *mpHashNext;
+};
+
+template<class K>
+struct vdhash {
+	size_t operator()(const K key) const {
+		return (size_t)key;
+	}
+};
+
+template<class K, class V, class Hash = vdhash<K>, int N = 256>
+class vdhashmap_iterator {
+public:
+	typedef	vdhashmap_node	node;
+
+	bool operator==(vdhashmap_iterator& x) const { return mpNode == x.mpNode; }
+	bool operator!=(vdhashmap_iterator& x) const { return mpNode != x.mpNode; }
+
+	V& operator*() const { return *static_cast<V *>((node *)mpNode); }
+	V *operator->() const { return static_cast<V *>((node *)mpNode); }
+
+	vdhashmap_iterator& operator++() {
+		do {
+			mpNode = ((node *)mpNode)->mpHashNext;
+			if (mpNode != mpTableNode)
+				break;
+
+			++mpTableNode;
+			mpNode = mpTableNode->mpHashNext;
+		} while(mpNode);
+
+		return *this;
+	}
+
+	vdhashmap_iterator operator++(int) {
+		vdhashmap_iterator it(*this);
+		++*this;
+		return it;
+	}
+
+public:
+	vdhashmap_node *mpNode;
+	vdhashmap_node *mpTableNode;
+};
+
+template<class K, class V, class Hash = vdhash<K>, int N = 256>
+class vdhashmap {
+public:
+	typedef	K					key_type;
+	typedef	V					value_type;
+	typedef	Hash				hash_type;
+	typedef vdhashmap_node		node;
+	typedef	vdhashmap_iterator<K, V>	iterator;
+
+	vdhashmap() {
+		for(int i=0; i<N; ++i)
+			m.mpTable[i].mpHashPrev = m.mpTable[i].mpHashNext = &m.mpTable[i];
+	}
+
+	iterator begin() {
+		int i;
+		for(i=0; i<N && !m.mpTable[i]; ++i)
+			;
+		iterator it = { m.mpTable[i].mpFirst, &m.mpTable[i] };
+		return it;
+	}
+
+	iterator end() {
+		iterator it = { NULL, NULL };
+		return it;
+	}
+
+	V *operator[](const K& key) {
+		const size_t htidx = m(key) % N;
+
+		node *r = &m.mpTable[htidx];
+		for(node *p = r->mpHashNext; p != r; p = p->mpHashNext) {
+			if (static_cast<V *>(p)->mHashKey == key)
+				return static_cast<V *>(p);
+		}
+
+		return NULL;
+	}
+
+	iterator find(const K& key) {
+		const size_t htidx = m(key) % N;
+
+		node *r = &m.mpTable[htidx];
+		for(node *p = r->mpHashNext; p != r; p = p->mpHashNext) {
+			if (static_cast<V *>(p)->mHashKey == key) {
+				iterator it = { p, &m.mpTable[htidx] };
+				return it;
+			}
+		}
+
+		return end();
+	}
+
+	iterator insert(V *p) {
+		const size_t htidx = m(p->mHashKey) % N;
+
+		node *r = &m.mpTable[htidx];
+		node *n = r->mpHashNext;
+		r->mpHashNext = p;
+		p->mpHashPrev = &m.mpTable[htidx];
+		p->mpHashNext = n;
+		n->mpHashPrev = p;
+
+		iterator it = { p, &m.mpTable[htidx] };
+		return it;
+	}
+
+	void erase(V *x) {
+		node *p = x->mpHashPrev;
+		node *n = x->mpHashNext;
+
+		p->mpHashNext = n;
+		n->mpHashPrev = p;
+	}
+
+	void erase(iterator it) {
+		erase(it.mpNode);
+	}
+
+protected:
+	struct Data : public Hash {
+		vdhashmap_node	mpTable[N];
+	} m;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDCachedObject;
+
+class IVDCacheAllocator {
+public:
+	virtual VDCachedObject *OnCacheAllocate() = 0;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+enum VDCacheState {
+	kVDCacheStateFree,
+	kVDCacheStatePending,
+	kVDCacheStateReady,
+	kVDCacheStateActive,
+	kVDCacheStateComplete,
+	kVDCacheStateIdle,
+	kVDCacheStateAborting,
+	kVDCacheStateCount
+};
+
+struct VDCachedObjectNodes : public vdlist_node, public vdhashmap_node {
+	sint64	mHashKey;
+};
+
+class VDCache {
+public:
+	VDCache(IVDCacheAllocator *pAllocator);
+	~VDCache();
+
+	void Shutdown();
+
+	int GetStateCount(int state);
+
+	void DumpListStatus(int state);
+
+	VDCachedObject *Create(sint64 key, bool& is_new);
+
+	VDCachedObject *Allocate(sint64 key);
+	void Schedule(VDCachedObject *);			// Moves a Pending or Active object to Ready.
+	VDCachedObject *GetNextReady();				// Selects a Ready object and moves it to Active.
+	void MarkCompleted(VDCachedObject *);		// Marks an object as completed.
+
+public:
+	void NotifyFree(VDCachedObject *pObject);
+
+protected:
+	void Evict(uint32 level);
+
+protected:
+	VDCriticalSection	mLock;
+
+	IVDCacheAllocator	*mpAllocator;
+	uint32		mObjectCount;
+	uint32		mObjectLimit;
+
+	typedef vdlist<VDCachedObjectNodes> ObjectList;
+	ObjectList	mLists[kVDCacheStateCount];
+
+	vdhashmap<sint64, VDCachedObjectNodes>	mHash;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDCachedObject : private VDCachedObjectNodes {
+	friend class VDCache;
+public:
+	VDCachedObject();
+	virtual ~VDCachedObject() {}
+
+	int AddRef();
+	int Release();
+
+	void WeakAddRef();
+	void WeakRelease();
+
+protected:
+	virtual void OnCacheEvict() {}
+	virtual void OnCacheAbortPending() {}
+	virtual void DumpStatus() {}
+
+protected:
+	int GetRefCount() const { return mRefCount; }
+	void SetCache(VDCache *pCache);
+
+	VDCacheState GetState() const { return mState; }
+	void SetState(VDCacheState state) { mState = state; }
+
+	sint64 GetCacheKey() const { return mHashKey; }
+
+	virtual bool IsValid() const { return true; }
+
+protected:
+	VDCache			*mpCache;
+	VDAtomicInt		mRefCount;
+	VDCacheState	mState;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDPooledObject;
+
+class IVDPoolAllocator {
+public:
+	virtual VDPooledObject *OnPoolAllocate() = 0;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+enum VDPoolState {
+	kVDPoolStateFree,
+	kVDPoolStateActive,
+	kVDPoolStateCount
+};
+
+struct VDPooledObjectNodes : public vdlist_node {};
+
+class VDPool {
+public:
+	VDPool(IVDPoolAllocator *pAllocator);
+	~VDPool();
+
+	void Shutdown();
+
+	VDPooledObject *Allocate();
+
+public:
+	void NotifyFree(VDPooledObject *pObject);
+
+protected:
+	VDCriticalSection	mLock;
+
+	IVDPoolAllocator	*mpAllocator;
+	uint32		mObjectCount;
+	uint32		mObjectLimit;
+
+	typedef vdlist<VDPooledObjectNodes> ObjectList;
+	ObjectList	mLists[kVDPoolStateCount];
+};
+
+class VDPooledObject : private VDPooledObjectNodes {
+	friend class VDPool;
+public:
+	VDPooledObject();
+	virtual ~VDPooledObject() {}
+
+	int AddRef();
+	int Release();
+
+protected:
+	int GetRefCount() const { return mRefCount; }
+	void SetPool(VDPool *pPool);
+
+protected:
+	VDPool			*mpPool;
+	VDAtomicInt		mRefCount;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/cmdline.h b/src/thirdparty/VirtualDub/h/vd2/system/cmdline.h
new file mode 100644
index 000000000..eb1d94480
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/cmdline.h
@@ -0,0 +1,69 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2005 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_CMDLINE_H
+#define f_VD2_SYSTEM_CMDLINE_H
+
+#include <vd2/system/vdstl.h>
+
+class VDCommandLineIterator {
+	friend class VDCommandLine;
+public:
+	VDCommandLineIterator() : mIndex(1) {}
+
+private:
+	int mIndex;
+};
+
+class VDCommandLine {
+public:
+	VDCommandLine();
+	VDCommandLine(const wchar_t *s);
+	~VDCommandLine();
+
+	void Init(const wchar_t *s);
+
+	uint32 GetCount() const;
+	const wchar_t *operator[](int index) const;
+
+	bool GetNextArgument(VDCommandLineIterator& index, const wchar_t *& token, bool& isSwitch) const;
+	bool GetNextNonSwitchArgument(VDCommandLineIterator& index, const wchar_t *& token) const;
+	bool GetNextSwitchArgument(VDCommandLineIterator& index, const wchar_t *& token) const;
+	bool FindAndRemoveSwitch(const wchar_t *name);
+	bool FindAndRemoveSwitch(const wchar_t *name, const wchar_t *& token);
+
+protected:
+	vdfastvector<wchar_t>	mLine;
+
+	struct Token {
+		int mTokenIndex;
+		bool mbIsSwitch;
+		bool mbQuoted;
+	};
+
+	vdfastvector<Token>	mTokens;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/cpuaccel.h b/src/thirdparty/VirtualDub/h/vd2/system/cpuaccel.h
new file mode 100644
index 000000000..a15bc8be9
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/cpuaccel.h
@@ -0,0 +1,49 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VIRTUALDUB_CPUACCEL_H
+#define f_VIRTUALDUB_CPUACCEL_H
+
+#define CPUF_SUPPORTS_CPUID			(0x00000001L)
+#define CPUF_SUPPORTS_FPU			(0x00000002L)
+#define CPUF_SUPPORTS_MMX			(0x00000004L)
+#define CPUF_SUPPORTS_INTEGER_SSE	(0x00000008L)
+#define CPUF_SUPPORTS_SSE			(0x00000010L)
+#define CPUF_SUPPORTS_SSE2			(0x00000020L)
+#define CPUF_SUPPORTS_3DNOW			(0x00000040L)
+#define CPUF_SUPPORTS_3DNOW_EXT		(0x00000080L)
+#define CPUF_SUPPORTS_SSE3			(0x00000100L)
+#define CPUF_SUPPORTS_SSSE3			(0x00000200L)
+#define CPUF_SUPPORTS_SSE41			(0x00000400L)
+#define CPUF_SUPPORTS_MASK			(0x000007FFL)
+
+long CPUCheckForExtensions();
+long CPUEnableExtensions(long lEnableFlags);
+long CPUGetEnabledExtensions();
+void VDCPUCleanupExtensions();
+
+extern "C" bool FPU_enabled, MMX_enabled, SSE_enabled, ISSE_enabled, SSE2_enabled;
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/debug.h b/src/thirdparty/VirtualDub/h/vd2/system/debug.h
new file mode 100644
index 000000000..a4eb59e60
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/debug.h
@@ -0,0 +1,96 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_DEBUG_H
+#define f_VD2_SYSTEM_DEBUG_H
+
+#include <vd2/system/vdtypes.h>
+
+class IVDExternalCallTrap {
+public:
+	virtual void OnMMXTrap(const wchar_t *context, const char *file, int line) = 0;
+	virtual void OnFPUTrap(const wchar_t *context, const char *file, int line, uint16 fpucw) = 0;
+	virtual void OnSSETrap(const wchar_t *context, const char *file, int line, uint32 mxcsr) = 0;
+};
+
+void VDSetExternalCallTrap(IVDExternalCallTrap *);
+
+bool IsMMXState();
+void ClearMMXState();
+void VDClearEvilCPUStates();
+void VDPreCheckExternalCodeCall(const char *file, int line);
+void VDPostCheckExternalCodeCall(const wchar_t *mpContext, const char *mpFile, int mLine);
+
+struct VDSilentExternalCodeBracket {
+	VDSilentExternalCodeBracket() {
+		VDClearEvilCPUStates();
+	}
+
+	~VDSilentExternalCodeBracket() {
+		VDClearEvilCPUStates();
+	}
+};
+
+struct VDExternalCodeBracketLocation {
+	VDExternalCodeBracketLocation(const wchar_t *pContext, const char *file, const int line)
+		: mpContext(pContext)
+		, mpFile(file)
+		, mLine(line)
+	{
+	}
+
+	const wchar_t *mpContext;
+	const char *mpFile;
+	const int mLine;	
+};
+
+struct VDExternalCodeBracket {
+	VDExternalCodeBracket(const wchar_t *pContext, const char *file, const int line)
+		: mpContext(pContext)
+		, mpFile(file)
+		, mLine(line)
+	{
+		VDPreCheckExternalCodeCall(file, line);
+	}
+
+	VDExternalCodeBracket(const VDExternalCodeBracketLocation& loc)
+		: mpContext(loc.mpContext)
+		, mpFile(loc.mpFile)
+		, mLine(loc.mLine)
+	{
+	}
+
+	~VDExternalCodeBracket() {
+		VDPostCheckExternalCodeCall(mpContext, mpFile, mLine);
+	}
+
+	operator bool() const { return false; }
+
+	const wchar_t *mpContext;
+	const char *mpFile;
+	const int mLine;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/debugx86.h b/src/thirdparty/VirtualDub/h/vd2/system/debugx86.h
new file mode 100644
index 000000000..03a4f29a3
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/debugx86.h
@@ -0,0 +1,37 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+enum VDInstructionTypeX86 {
+	kX86InstUnknown,
+	kX86InstP6,
+	kX86InstMMX,
+	kX86InstMMX2,
+	kX86InstSSE,
+	kX86InstSSE2,
+	kX86Inst3DNow
+};
+
+bool VDIsValidCallX86(const char *buf, int len);
+VDInstructionTypeX86 VDGetInstructionTypeX86(const void *p);
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/event.h b/src/thirdparty/VirtualDub/h/vd2/system/event.h
new file mode 100644
index 000000000..a725f8d43
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/event.h
@@ -0,0 +1,201 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2006 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_EVENT_H
+#define f_VD2_SYSTEM_EVENT_H
+
+struct VDDelegateNode {
+	VDDelegateNode *mpNext, *mpPrev;
+};
+
+class VDDelegate;
+
+class VDEventBase {
+protected:
+	VDEventBase();
+	~VDEventBase();
+
+	void Add(VDDelegate&);
+	void Remove(VDDelegate&);
+	void Raise(void *src, const void *info);
+
+	VDDelegateNode mAnchor;
+};
+
+// Because Visual C++ uses different pointer-to-member representations for
+// different inheritance regimes, we have to include a whole lot of stupid
+// logic to detect and switch code paths based on the inheritance used.
+// We detect the inheritance by the size of the member function pointer.
+//
+// Some have managed to make faster and more compact delegates by hacking
+// into the PMT representation and pre-folding the this pointer adjustment.
+// I'm avoiding this for now because (a) it's even less portable than what
+// we have here, and (b) that fails if the object undergoes a change in
+// virtual table status while the delegate is alive (which is possible
+// during construction/destruction).
+//
+// Note: We can't handle virtual inheritance here because on X64, MSVC uses
+// 16 bytes for both multiple and virtual inheritance cases.
+
+#ifdef _MSC_VER
+	class __single_inheritance VDDelegateHolderS;
+	class __multiple_inheritance VDDelegateHolderM;
+#else
+	class VDDelegateHolderS;
+#endif
+
+template<class Source, class ArgType>
+class VDDelegateBinding {
+public:
+	VDDelegate *mpBoundDelegate;
+};
+
+template<class T, class Source, class ArgType>
+struct VDDelegateAdapterS {
+	typedef void (T::*T_Fn)(Source *, const ArgType&);
+	typedef void (T::*T_Fn2)(Source *, ArgType);
+
+	static void Init(VDDelegate& dst, T_Fn fn) {
+		dst.mpCallback = Fn;
+		dst.mpFnS = reinterpret_cast<void(VDDelegateHolderS::*)()>(fn);
+	}
+
+	static void Init(VDDelegate& dst, T_Fn2 fn) {
+		dst.mpCallback = Fn2;
+		dst.mpFnS = reinterpret_cast<void(VDDelegateHolderS::*)()>(fn);
+	}
+
+	static void Fn(void *src, const void *info, VDDelegate& del) {
+		return (((T *)del.mpObj)->*reinterpret_cast<T_Fn>(del.mpFnS))(static_cast<Source *>(src), *static_cast<const ArgType *>(info));
+	}
+
+	static void Fn2(void *src, const void *info, VDDelegate& del) {
+		return (((T *)del.mpObj)->*reinterpret_cast<T_Fn2>(del.mpFnS))(static_cast<Source *>(src), *static_cast<const ArgType *>(info));
+	}
+};
+
+template<int size>
+class VDDelegateAdapter {
+public:
+	template<class T, class Source, class ArgType>
+	struct AdapterLookup {
+		typedef VDDelegateAdapterS<T, Source, ArgType> result;
+	};
+};
+
+#ifdef _MSC_VER
+template<class T, class Source, class ArgType>
+struct VDDelegateAdapterM {
+	typedef void (T::*T_Fn)(Source *, const ArgType&);
+	typedef void (T::*T_Fn2)(Source *, ArgType);
+
+	static void Init(VDDelegate& dst, T_Fn fn) {
+		dst.mpCallback = Fn;
+		dst.mpFnM = reinterpret_cast<void(VDDelegateHolderM::*)()>(fn);
+	}
+
+	static void Init(VDDelegate& dst, T_Fn2 fn) {
+		dst.mpCallback = Fn2;
+		dst.mpFnM = reinterpret_cast<void(VDDelegateHolderM::*)()>(fn2);
+	}
+
+	static void Fn(void *src, const void *info, VDDelegate& del) {
+		return (((T *)del.mpObj)->*reinterpret_cast<T_Fn>(del.mpFnM))(static_cast<Source *>(src), *static_cast<const ArgType *>(info));
+	}
+
+	static void Fn2(void *src, const void *info, VDDelegate& del) {
+		return (((T *)del.mpObj)->*reinterpret_cast<T_Fn2>(del.mpFnM))(static_cast<Source *>(src), *static_cast<const ArgType *>(info));
+	}
+};
+
+
+template<>
+class VDDelegateAdapter<sizeof(void (VDDelegateHolderM::*)())> {
+public:
+	template<class T, class Source, class ArgType>
+	struct AdapterLookup {
+		typedef VDDelegateAdapterM<T, Source, ArgType> result;
+	};
+};
+#endif
+
+class VDDelegate : public VDDelegateNode {
+	friend class VDEventBase;
+public:
+	VDDelegate();
+	~VDDelegate();
+
+	template<class T, class Source, class ArgType>
+	VDDelegateBinding<Source, ArgType> operator()(T *obj, void (T::*fn)(Source *, const ArgType&)) {
+		mpObj = obj;
+
+		VDDelegateAdapter<sizeof fn>::AdapterLookup<T, Source, ArgType>::result::Init(*this, fn);
+
+		VDDelegateBinding<Source, ArgType> binding = {this};
+		return binding;
+	}
+
+	template<class T, class Source, class ArgType>
+	VDDelegateBinding<Source, ArgType> Bind(T *obj, void (T::*fn)(Source *, ArgType)) {
+		mpObj = obj;
+
+		VDDelegateAdapter<sizeof fn>::AdapterLookup<T, Source, ArgType>::result::Init(*this, fn);
+
+		VDDelegateBinding<Source, ArgType> binding = {this};
+		return binding;
+	}
+
+public:
+	void (*mpCallback)(void *src, const void *info, VDDelegate&);
+	void *mpObj;
+
+#ifdef _MSC_VER
+	union {
+		void (VDDelegateHolderS::*mpFnS)();
+		void (VDDelegateHolderM::*mpFnM)();
+	};
+#else
+	class VDDelegateHolderS;
+	void (VDDelegateHolderS::*mpFnS)();
+#endif
+};
+
+template<class Source, class ArgType>
+class VDEvent : public VDEventBase {
+public:
+	void operator+=(const VDDelegateBinding<Source, ArgType>& binding) {
+		Add(*binding.mpBoundDelegate);
+	}
+
+	void operator-=(const VDDelegateBinding<Source, ArgType>& binding) {
+		Remove(*binding.mpBoundDelegate);
+	}
+
+	void Raise(Source *src, const ArgType& args) {
+		VDEventBase::Raise(src, &args);
+	}
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/file.h b/src/thirdparty/VirtualDub/h/vd2/system/file.h
new file mode 100644
index 000000000..bfdfab44e
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/file.h
@@ -0,0 +1,323 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_FILE_H
+#define f_VD2_SYSTEM_FILE_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <limits.h>
+#include <stdarg.h>
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/vdalloc.h>
+#include <vd2/system/vdstl.h>
+#include <vector>
+
+#ifdef WIN32
+	typedef void *VDFileHandle;				// this needs to match wtypes.h definition for HANDLE
+#else
+	#error No operating system target declared??
+#endif
+
+namespace nsVDFile {
+	enum eSeekMode {
+		kSeekStart=0, kSeekCur, kSeekEnd
+	};
+
+	enum eFlags {
+		kRead			= 0x00000001,
+		kWrite			= 0x00000002,
+		kReadWrite		= kRead | kWrite,
+
+		kDenyNone		= 0x00000000,
+		kDenyRead		= 0x00000010,
+		kDenyWrite		= 0x00000020,
+		kDenyAll		= kDenyRead | kDenyWrite,
+
+		kOpenExisting		= 0x00000100,
+		kOpenAlways			= 0x00000200,
+		kCreateAlways		= 0x00000300,
+		kCreateNew			= 0x00000400,
+		kTruncateExisting	= 0x00000500,		// not particularly useful, really
+		kCreationMask		= 0x0000FF00,
+
+		kSequential			= 0x00010000,
+		kRandomAccess		= 0x00020000,
+		kUnbuffered			= 0x00040000,		// much faster on Win32 thanks to the crappy cache, but possibly bad in Unix?
+		kWriteThrough		= 0x00080000,
+
+		kAllFileFlags		= 0xFFFFFFFF
+	};
+};
+
+class VDFile {
+protected:
+	VDFileHandle	mhFile;
+	vdautoptr2<wchar_t>	mpFilename;
+	sint64			mFilePosition;
+
+private:
+	VDFile(const VDFile&);
+	const VDFile& operator=(const VDFile& f);
+
+public:
+	VDFile() : mhFile(NULL) {}
+	VDFile(const char *pszFileName, uint32 flags = nsVDFile::kRead | nsVDFile::kDenyWrite | nsVDFile::kOpenExisting);
+	VDFile(const wchar_t *pwszFileName, uint32 flags = nsVDFile::kRead | nsVDFile::kDenyWrite | nsVDFile::kOpenExisting);
+	VDFile(VDFileHandle h);
+	~VDFile();
+
+	// The "NT" functions are non-throwing and return success/failure; the regular functions throw exceptions
+	// when something bad happens.
+
+	void	open(const char *pszFileName, uint32 flags = nsVDFile::kRead | nsVDFile::kDenyWrite | nsVDFile::kOpenExisting);
+	void	open(const wchar_t *pwszFileName, uint32 flags = nsVDFile::kRead | nsVDFile::kDenyWrite | nsVDFile::kOpenExisting);
+
+	bool	openNT(const wchar_t *pwszFileName, uint32 flags = nsVDFile::kRead | nsVDFile::kDenyWrite | nsVDFile::kOpenExisting);
+
+	bool	closeNT();
+	void	close();
+	bool	truncateNT();
+	void	truncate();
+
+	// extendValid() pushes the valid threshold of a file out, so that the system allocates
+	// space for a file without ensuring that it is cleared.  It is mainly useful for
+	// preallocating a file without waiting for the system to clear all of it.  The caveats:
+	//
+	// - only required on NTFS
+	// - requires Windows XP or Windows Server 2003
+	// - does not work on compressed or sparse files
+	//
+	// As such, it shouldn't normally be relied upon, and extendValidNT() should be the call
+	// of choice.
+	//
+	// enableExtendValid() must be called beforehand, as SeVolumeNamePrivilege must be
+	// enabled on the process before the file is opened!
+
+	bool	extendValidNT(sint64 pos);
+	void	extendValid(sint64 pos);
+	static bool enableExtendValid();
+
+	sint64	size();
+	void	read(void *buffer, long length);
+	long	readData(void *buffer, long length);
+	void	write(const void *buffer, long length);
+	long	writeData(const void *buffer, long length);
+	bool	seekNT(sint64 newPos, nsVDFile::eSeekMode mode = nsVDFile::kSeekStart);
+	void	seek(sint64 newPos, nsVDFile::eSeekMode mode = nsVDFile::kSeekStart);
+	bool	skipNT(sint64 delta);
+	void	skip(sint64 delta);
+	sint64	tell();
+
+	bool	isOpen();
+	VDFileHandle	getRawHandle();
+
+	const wchar_t *getFilenameForError() const { return mpFilename; }
+
+	// unbuffered I/O requires aligned buffers ("unbuffers")
+	static void *AllocUnbuffer(size_t nBytes);
+	static void FreeUnbuffer(void *p);
+
+protected:
+	bool	open_internal(const char *pszFilename, const wchar_t *pwszFilename, uint32 flags, bool throwOnError);
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+template<class T>
+class VDFileUnbufferAllocator {
+public:
+	typedef	size_t		size_type;
+	typedef	ptrdiff_t	difference_type;
+	typedef	T*			pointer;
+	typedef	const T*	const_pointer;
+	typedef	T&			reference;
+	typedef	const T&	const_reference;
+	typedef	T			value_type;
+
+	template<class U> struct rebind { typedef VDFileUnbufferAllocator<U> other; };
+
+	pointer			address(reference x) const			{ return &x; }
+	const_pointer	address(const_reference x) const	{ return &x; }
+
+	pointer			allocate(size_type n, void *p = 0)	{ return (pointer)VDFile::AllocUnbuffer(n * sizeof(T)); }
+	void			deallocate(pointer p, size_type n)	{ VDFile::FreeUnbuffer(p); }
+	size_type		max_size() const throw()			{ return MAX_INT; }
+
+	void			construct(pointer p, const T& val)	{ new((void *)p) T(val); }
+	void			destroy(pointer p)					{ ((T*)p)->~T(); }
+
+#if defined(_MSC_VER) && _MSC_VER < 1300
+	char *			_Charalloc(size_type n)				{ return (char *)allocate((n + sizeof(T) - 1) / sizeof(T)); }
+#endif
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+class IVDStream {
+public:
+	virtual const wchar_t *GetNameForError() = 0;
+	virtual sint64	Pos() = 0;
+	virtual void	Read(void *buffer, sint32 bytes) = 0;
+	virtual sint32	ReadData(void *buffer, sint32 bytes) = 0;
+	virtual void	Write(const void *buffer, sint32 bytes) = 0;
+};
+
+class IVDRandomAccessStream : public IVDStream {
+public:
+	virtual sint64	Length() = 0;
+	virtual void	Seek(sint64 offset) = 0;
+};
+
+class VDFileStream : public VDFile, public IVDRandomAccessStream {
+private:
+	VDFileStream(const VDFile&);
+	const VDFileStream& operator=(const VDFileStream& f);
+
+public:
+	VDFileStream() {}
+	VDFileStream(const char *pszFileName, uint32 flags = nsVDFile::kRead | nsVDFile::kDenyWrite | nsVDFile::kOpenExisting)
+		: VDFile(pszFileName, flags) {}
+	VDFileStream(const wchar_t *pwszFileName, uint32 flags = nsVDFile::kRead | nsVDFile::kDenyWrite | nsVDFile::kOpenExisting)
+		: VDFile(pwszFileName, flags) {}
+	VDFileStream(VDFileHandle h) : VDFile(h) {}
+	~VDFileStream();
+
+	const wchar_t *GetNameForError();
+	sint64	Pos();
+	void	Read(void *buffer, sint32 bytes);
+	sint32	ReadData(void *buffer, sint32 bytes);
+	void	Write(const void *buffer, sint32 bytes);
+	sint64	Length();
+	void	Seek(sint64 offset);
+};
+
+class VDMemoryStream : public IVDRandomAccessStream {
+public:
+	VDMemoryStream(const void *pSrc, uint32 len);
+
+	const wchar_t *GetNameForError();
+	sint64	Pos();
+	void	Read(void *buffer, sint32 bytes);
+	sint32	ReadData(void *buffer, sint32 bytes);
+	void	Write(const void *buffer, sint32 bytes);
+	sint64	Length();
+	void	Seek(sint64 offset);
+
+protected:
+	const char		*mpSrc;
+	const uint32	mLength;
+	uint32			mPos;
+};
+
+class VDBufferedStream : public IVDRandomAccessStream {
+public:
+	VDBufferedStream(IVDRandomAccessStream *pSrc, uint32 bufferSize);
+	~VDBufferedStream();
+
+	const wchar_t *GetNameForError();
+	sint64	Pos();
+	void	Read(void *buffer, sint32 bytes);
+	sint32	ReadData(void *buffer, sint32 bytes);
+	void	Write(const void *buffer, sint32 bytes);
+
+	sint64	Length();
+	void	Seek(sint64 offset);
+
+	void	Skip(sint64 size);
+
+protected:
+	IVDRandomAccessStream *mpSrc;
+	vdblock<char>	mBuffer;
+	sint64		mBasePosition;
+	uint32		mBufferOffset;
+	uint32		mBufferValidSize;
+};
+
+class VDTextStream {
+public:
+	VDTextStream(IVDStream *pSrc);
+	~VDTextStream();
+
+	const char *GetNextLine();
+
+protected:
+	IVDStream	*mpSrc;
+	uint32		mBufferPos;
+	uint32		mBufferLimit;
+	enum {
+		kFetchLine,
+		kEatNextIfCR,
+		kEatNextIfLF
+	} mState;
+
+	enum {
+		kFileBufferSize = 4096
+	};
+
+	vdfastvector<char>	mLineBuffer;
+	vdblock<char>		mFileBuffer;
+};
+
+class VDTextInputFile {
+public:
+	VDTextInputFile(const wchar_t *filename, uint32 flags = nsVDFile::kOpenExisting);
+	~VDTextInputFile();
+
+	inline const char *GetNextLine() {
+		return mTextStream.GetNextLine();
+	}
+
+protected:
+	VDFileStream	mFileStream;
+	VDTextStream	mTextStream;
+};
+
+class VDTextOutputStream {
+public:
+	VDTextOutputStream(IVDStream *stream);
+	~VDTextOutputStream();
+
+	void Flush();
+
+	void Write(const char *s, int len);
+	void PutLine();
+	void PutLine(const char *s);
+	void FormatLine(const char *format, ...);
+
+protected:
+	void FormatLine2(const char *format, va_list val);
+	void PutData(const char *s, int len);
+
+	enum { kBufSize = 4096 };
+
+	int			mLevel;
+	IVDStream	*mpDst;
+	char		mBuf[kBufSize];
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/fileasync.h b/src/thirdparty/VirtualDub/h/vd2/system/fileasync.h
new file mode 100644
index 000000000..7693aa30f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/fileasync.h
@@ -0,0 +1,64 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_FILEASYNC_H
+#define f_VD2_SYSTEM_FILEASYNC_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+
+class VDRTProfileChannel;
+
+class IVDFileAsync {
+public:
+	enum Mode {
+		kModeSynchronous,		///< Use synchronous I/O.
+		kModeThreaded,			///< Use multithreaded I/O.
+		kModeAsynchronous,		///< Use true asynchronous I/O (Windows NT only).
+		kModeCount
+	};
+
+	virtual ~IVDFileAsync() {}
+	virtual void SetPreemptiveExtend(bool b) = 0;
+	virtual bool IsPreemptiveExtendActive() = 0;
+	virtual bool IsOpen() = 0;
+	virtual void Open(const wchar_t *pszFilename, uint32 count, uint32 bufferSize) = 0;
+	virtual void Close() = 0;
+	virtual void FastWrite(const void *pData, uint32 bytes) = 0;
+	virtual void FastWriteEnd() = 0;
+	virtual void Write(sint64 pos, const void *pData, uint32 bytes) = 0;
+	virtual bool Extend(sint64 pos) = 0;
+	virtual void Truncate(sint64 pos) = 0;
+	virtual void SafeTruncateAndClose(sint64 pos) = 0;
+	virtual sint64 GetFastWritePos() = 0;
+	virtual sint64 GetSize() = 0;
+};
+
+IVDFileAsync *VDCreateFileAsync(IVDFileAsync::Mode);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/filesys.h b/src/thirdparty/VirtualDub/h/vd2/system/filesys.h
new file mode 100644
index 000000000..4aa830833
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/filesys.h
@@ -0,0 +1,170 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_FILESYS_H
+#define f_VD2_SYSTEM_FILESYS_H
+
+#include <ctype.h>
+#include <vector>
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/VDString.h>
+
+// VDFileSplitPath returns a pointer to the first character of the filename,
+// or the beginning of the string if the path only contains one component.
+
+const char *VDFileSplitFirstDir(const char *s);
+const wchar_t *VDFileSplitFirstDir(const wchar_t *s);
+
+static inline char *VDFileSplitFirstDir(char *s) {
+	return const_cast<char *>(VDFileSplitFirstDir(const_cast<const char *>(s)));
+}
+
+static inline wchar_t *VDFileSplitFirstDir(wchar_t *s) {
+	return const_cast<wchar_t *>(VDFileSplitFirstDir(const_cast<const wchar_t *>(s)));
+}
+
+const char *VDFileSplitPath(const char *);
+const wchar_t *VDFileSplitPath(const wchar_t *);
+
+static inline char *VDFileSplitPath(char *s) {
+	return const_cast<char *>(VDFileSplitPath(const_cast<const char *>(s)));
+}
+
+static inline wchar_t *VDFileSplitPath(wchar_t *s) {
+	return const_cast<wchar_t *>(VDFileSplitPath(const_cast<const wchar_t *>(s)));
+}
+
+VDString VDFileSplitPathLeft(const VDString&);
+VDString VDFileSplitPathRight(const VDString&);
+VDStringW VDFileSplitPathLeft(const VDStringW&);
+VDStringW VDFileSplitPathRight(const VDStringW&);
+
+// VDSplitRoot returns a pointer to the second component of the filename,
+// or the beginning of the string if there is no second component.
+
+const char *VDFileSplitRoot(const char *);
+const wchar_t *VDFileSplitRoot(const wchar_t *);
+
+static inline char *VDFileSplitRoot(char *s) {
+	return const_cast<char *>(VDFileSplitRoot(const_cast<const char *>(s)));
+}
+
+static inline wchar_t *VDFileSplitRoot(wchar_t *s) {
+	return const_cast<wchar_t *>(VDFileSplitRoot(const_cast<const wchar_t *>(s)));
+}
+
+VDString VDFileSplitRoot(const VDString&);
+VDStringW VDFileSplitRoot(const VDStringW&);
+
+// VDSplitExtension returns a pointer to the extension, including the period.
+// The ending null terminator is returned if there is no extension.
+
+const char *VDFileSplitExt(const char *);
+const wchar_t *VDFileSplitExt(const wchar_t *);
+
+static inline char *VDFileSplitExt(char *s) {
+	return const_cast<char *>(VDFileSplitExt(const_cast<const char *>(s)));
+}
+
+static inline wchar_t *VDFileSplitExt(wchar_t *s) {
+	return const_cast<wchar_t *>(VDFileSplitExt(const_cast<const wchar_t *>(s)));
+}
+
+VDString VDFileSplitExtLeft(const VDString&);
+VDStringW VDFileSplitExtLeft(const VDStringW&);
+VDString VDFileSplitExtRight(const VDString&);
+VDStringW VDFileSplitExtRight(const VDStringW&);
+
+/////////////////////////////////////////////////////////////////////////////
+
+/// Perform a case-insensitive wildcard match against a filename; returns
+/// true if the pattern matches, false otherwise. '?' matches any single
+/// character, and '*' matches zero or more characters.
+///
+/// NOTE: This is not guaranteed or intended to perfectly match the
+/// underlying OS wildcard mechanism. In particular, we don't try to
+/// emulate MSDOS or Windows goofiness.
+bool VDFileWildMatch(const char *pattern, const char *path);
+bool VDFileWildMatch(const wchar_t *pattern, const wchar_t *path);
+
+/////////////////////////////////////////////////////////////////////////////
+
+sint64 VDGetDiskFreeSpace(const wchar_t *path);
+void VDCreateDirectory(const wchar_t *path);
+
+extern bool (*VDRemoveFile)(const wchar_t *path);
+
+bool VDDoesPathExist(const wchar_t *fileName);
+
+uint64 VDFileGetLastWriteTime(const wchar_t *path);
+VDStringW VDFileGetRootPath(const wchar_t *partialPath);
+VDStringW VDGetFullPath(const wchar_t *partialPath);
+
+VDStringW VDMakePath(const wchar_t *base, const wchar_t *file);
+void VDFileFixDirPath(VDStringW& path);
+VDStringW VDGetLocalModulePath();
+VDStringW VDGetProgramPath();
+
+/////////////////////////////////////////////////////////////////////////////
+
+class VDDirectoryIterator {
+	VDDirectoryIterator(const VDDirectoryIterator&);
+	VDDirectoryIterator& operator=(VDDirectoryIterator&);
+public:
+	VDDirectoryIterator(const wchar_t *path);
+	~VDDirectoryIterator();
+
+	bool Next();
+
+	bool IsDirectory() const {
+		return mbDirectory;
+	}
+
+	const wchar_t *GetName() const {
+		return mFilename.c_str();
+	}
+
+	const VDStringW GetFullPath() const {
+		return mBasePath + mFilename;
+	}
+
+	const sint64 GetSize() const {
+		return mFileSize;
+	}
+
+protected:
+	void *mpHandle;
+	bool mbSearchComplete;
+
+	VDStringW	mSearchPath;
+	VDStringW	mBasePath;
+
+	VDStringW	mFilename;
+	sint64		mFileSize;
+	bool		mbDirectory;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/filewatcher.h b/src/thirdparty/VirtualDub/h/vd2/system/filewatcher.h
new file mode 100644
index 000000000..db1a02312
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/filewatcher.h
@@ -0,0 +1,45 @@
+#ifndef f_VD2_SYSTEM_FILEWATCHER_H
+#define f_VD2_SYSTEM_FILEWATCHER_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/VDString.h>
+
+class VDFunctionThunk;
+
+class IVDFileWatcherCallback {
+public:
+	virtual bool OnFileUpdated(const wchar_t *path) = 0;
+};
+
+class VDFileWatcher {
+public:
+	VDFileWatcher();
+	~VDFileWatcher();
+
+	bool IsActive() const;
+
+	void Init(const wchar_t *file, IVDFileWatcherCallback *cb);
+	void Shutdown();
+
+	bool Wait(uint32 delay = 0xFFFFFFFFU);
+
+protected:
+	void StaticTimerCallback(void *, unsigned, unsigned, unsigned long);
+
+	void *mChangeHandle;
+	uint64 mLastWriteTime;
+	VDStringW mPath;
+
+	IVDFileWatcherCallback *mpCB;
+
+	bool mbRepeatRequested;
+	bool mbThunksInited;
+	VDFunctionThunk *mpThunk;
+	uint32 mTimerId;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/halffloat.h b/src/thirdparty/VirtualDub/h/vd2/system/halffloat.h
new file mode 100644
index 000000000..e65a4109c
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/halffloat.h
@@ -0,0 +1,9 @@
+#ifndef f_VD2_SYSTEM_HALFFLOAT_H
+#define f_VD2_SYSTEM_HALFFLOAT_H
+
+#include <vd2/system/vdtypes.h>
+
+uint16 VDConvertFloatToHalf(const void *f);
+void VDConvertHalfToFloat(uint16 h, void *dst);
+
+#endif	// f_VD2_SYSTEM_HALFFLOAT_H
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/hash.h b/src/thirdparty/VirtualDub/h/vd2/system/hash.h
new file mode 100644
index 000000000..d5f3612e1
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/hash.h
@@ -0,0 +1,47 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_HASH_H
+#define f_VD2_SYSTEM_HASH_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#ifndef f_VD2_SYSTEM_VDTYPES_H
+	#include <vd2/system/vdtypes.h>
+#endif
+
+// Case-sensitive string hashes
+
+uint32 VDHashString32(const char *s);
+uint32 VDHashString32(const char *s, uint32 len);
+
+// Case-insensitive, culture-invariant string hashes
+
+uint32 VDHashString32I(const wchar_t *s);
+uint32 VDHashString32I(const wchar_t *s, uint32 len);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/int128.h b/src/thirdparty/VirtualDub/h/vd2/system/int128.h
new file mode 100644
index 000000000..da86c4878
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/int128.h
@@ -0,0 +1,361 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_INT128_H
+#define f_VD2_SYSTEM_INT128_H
+
+#include <vd2/system/vdtypes.h>
+
+struct vdint128;
+struct vduint128;
+
+#ifdef _M_AMD64
+	extern "C" __int64 _mul128(__int64 x, __int64 y, __int64 *hiresult);
+	extern "C" unsigned __int64 _umul128(unsigned __int64 x, unsigned __int64 y, unsigned __int64 *hiresult);
+	extern "C" unsigned __int64 __shiftleft128(unsigned __int64 low, unsigned __int64 high, unsigned char shift);
+	extern "C" unsigned __int64 __shiftright128(unsigned __int64 low, unsigned __int64 high, unsigned char shift);
+
+	#pragma intrinsic(_mul128)
+	#pragma intrinsic(_umul128)
+	#pragma intrinsic(__shiftleft128)
+	#pragma intrinsic(__shiftright128)
+
+	extern "C" {
+		void vdasm_uint128_add(uint64 dst[2], const uint64 x[2], const uint64 y[2]);
+		void vdasm_uint128_sub(uint64 dst[2], const uint64 x[2], const uint64 y[2]);
+		void vdasm_uint128_mul(uint64 dst[2], const uint64 x[2], const uint64 y[2]);
+	}
+#else
+	extern "C" {
+		void __cdecl vdasm_uint128_add(uint64 dst[2], const uint64 x[2], const uint64 y[2]);
+		void __cdecl vdasm_uint128_sub(uint64 dst[2], const uint64 x[2], const uint64 y[2]);
+	}
+#endif
+
+struct vdint128 {
+public:
+	union {
+		sint32 d[4];
+		sint64 q[2];
+	};
+
+	vdint128() {}
+
+	vdint128(sint64 x) {
+		q[0] = x;
+		q[1] = x>>63;
+	}
+
+	vdint128(uint64 x) {
+		q[0] = (sint64)x;
+		q[1] = 0;
+	}
+
+	vdint128(int x) {
+		q[0] = x;
+		q[1] = (sint64)x >> 63;
+	}
+
+	vdint128(unsigned int x) {
+		q[0] = x;
+		q[1] = 0;
+	}
+
+	vdint128(unsigned long x) {
+		q[0] = x;
+		q[1] = 0;
+	}
+
+	vdint128(sint64 hi, uint64 lo) {
+		q[0] = lo;
+		q[1] = hi;
+	}
+
+	sint64 getHi() const { return q[1]; }
+	uint64 getLo() const { return q[0]; }
+
+	operator double() const;
+	operator sint64() const {
+		return (sint64)q[0];
+	}
+	operator uint64() const {
+		return (uint64)q[0];
+	}
+
+	bool operator==(const vdint128& x) const {
+		return q[1] == x.q[1] && q[0] == x.q[0];
+	}
+
+	bool operator!=(const vdint128& x) const {
+		return q[1] != x.q[1] || q[0] != x.q[0];
+	}
+
+	bool operator<(const vdint128& x) const {
+		return q[1] < x.q[1] || (q[1] == x.q[1] && (uint64)q[0] < (uint64)x.q[0]);
+	}
+
+	bool operator<=(const vdint128& x) const {
+		return q[1] < x.q[1] || (q[1] == x.q[1] && (uint64)q[0] <= (uint64)x.q[0]);
+	}
+
+	bool operator>(const vdint128& x) const {
+		return q[1] > x.q[1] || (q[1] == x.q[1] && (uint64)q[0] > (uint64)x.q[0]);
+	}
+
+	bool operator>=(const vdint128& x) const {
+		return q[1] > x.q[1] || (q[1] == x.q[1] && (uint64)q[0] >= (uint64)x.q[0]);
+	}
+
+	const vdint128 operator+(const vdint128& x) const {
+		vdint128 t;
+		vdasm_uint128_add((uint64 *)t.q, (const uint64 *)q, (const uint64 *)x.q);
+		return t;
+	}
+
+	const vdint128 operator-(const vdint128& x) const {
+		vdint128 t;
+		vdasm_uint128_sub((uint64 *)t.q, (const uint64 *)q, (const uint64 *)x.q);
+		return t;
+	}
+
+	const vdint128& operator+=(const vdint128& x) {
+		vdasm_uint128_add((uint64 *)q, (const uint64 *)q, (const uint64 *)x.q);
+		return *this;
+	}
+
+	const vdint128& operator-=(const vdint128& x) {
+		vdasm_uint128_sub((uint64 *)q, (const uint64 *)q, (const uint64 *)x.q);
+		return *this;
+	}
+
+	const vdint128 operator*(const vdint128& x) const;
+
+	const vdint128 operator/(int x) const;
+
+	const vdint128 operator-() const {
+		vdint128 t(0);
+		vdasm_uint128_sub((uint64 *)t.q, (const uint64 *)t.q, (const uint64 *)q);
+		return t;
+	}
+
+	const vdint128 abs() const {
+		return q[1] < 0 ? -*this : *this;
+	}
+
+#ifdef _M_AMD64
+	void setSquare(sint64 v) {
+		const vdint128 v128(v);
+		operator=(v128*v128);
+	}
+
+	const vdint128 operator<<(int count) const {
+		vdint128 t;
+
+		if (count >= 64) {
+			t.q[0] = 0;
+			t.q[1] = q[0] << (count-64);
+		} else {
+			t.q[0] = q[0] << count;
+			t.q[1] = __shiftleft128(q[0], q[1], count);
+		}
+
+		return t;
+	}
+
+	const vdint128 operator>>(int count) const {
+		vdint128 t;
+
+		if (count >= 64) {
+			t.q[0] = q[1] >> (count-64);
+			t.q[1] = q[1] >> 63;
+		} else {
+			t.q[0] = __shiftright128(q[0], q[1], count);
+			t.q[1] = q[1] >> count;
+		}
+
+		return t;
+	}
+#else
+	void setSquare(sint64 v);
+
+	const vdint128 operator<<(int v) const;
+	const vdint128 operator>>(int v) const;
+#endif
+};
+
+struct vduint128 {
+public:
+	union {
+		uint32 d[4];
+		uint64 q[2];
+	};
+
+	vduint128() {}
+
+	vduint128(sint64 x) {
+		q[0] = (sint64)x;
+		q[1] = 0;
+	}
+
+	vduint128(uint64 x) {
+		q[0] = x;
+		q[1] = 0;
+	}
+
+	vduint128(int x) {
+		q[0] = (uint64)x;
+		q[1] = 0;
+	}
+
+	vduint128(unsigned x) {
+		q[0] = x;
+		q[1] = 0;
+	}
+
+	vduint128(uint64 hi, uint64 lo) {
+		q[0] = lo;
+		q[1] = hi;
+	}
+
+	uint64 getHi() const { return q[1]; }
+	uint64 getLo() const { return q[0]; }
+
+	operator sint64() const {
+		return (sint64)q[0];
+	}
+
+	operator uint64() const {
+		return (uint64)q[0];
+	}
+
+	bool operator==(const vduint128& x) const {
+		return q[1] == x.q[1] && q[0] == x.q[0];
+	}
+
+	bool operator!=(const vduint128& x) const {
+		return q[1] != x.q[1] || q[0] != x.q[0];
+	}
+
+	bool operator<(const vduint128& x) const {
+		return q[1] < x.q[1] || (q[1] == x.q[1] && q[0] < x.q[0]);
+	}
+
+	bool operator<=(const vduint128& x) const {
+		return q[1] < x.q[1] || (q[1] == x.q[1] && q[0] <= x.q[0]);
+	}
+
+	bool operator>(const vduint128& x) const {
+		return q[1] > x.q[1] || (q[1] == x.q[1] && q[0] > x.q[0]);
+	}
+
+	bool operator>=(const vduint128& x) const {
+		return q[1] > x.q[1] || (q[1] == x.q[1] && q[0] >= x.q[0]);
+	}
+
+	const vduint128 operator+(const vduint128& x) const {
+		vduint128 t;
+		vdasm_uint128_add(t.q, q, x.q);
+		return t;
+	}
+
+	const vduint128 operator-(const vduint128& x) const {
+		vduint128 t;
+		vdasm_uint128_sub(t.q, q, x.q);
+		return t;
+	}
+
+	const vduint128& operator+=(const vduint128& x) {
+		vdasm_uint128_add(q, q, x.q);
+		return *this;
+	}
+
+	const vduint128& operator-=(const vduint128& x) {
+		vdasm_uint128_sub(q, q, x.q);
+		return *this;
+	}
+
+	const vduint128 operator*(const vduint128& x) const;
+
+	const vduint128 operator-() const {
+		vduint128 t(0U);
+		vdasm_uint128_sub((uint64 *)t.q, (const uint64 *)t.q, (const uint64 *)q);
+		return t;
+	}
+
+	vduint128& operator<<=(int count) {
+		return operator=(operator<<(count));
+	}
+
+	vduint128& operator>>=(int count) {
+		return operator=(operator>>(count));
+	}
+
+#ifdef _M_AMD64
+	const vduint128 operator<<(int count) const {
+		vduint128 t;
+
+		if (count >= 64) {
+			t.q[0] = 0;
+			t.q[1] = q[0] << (count-64);
+		} else {
+			t.q[0] = q[0] << count;
+			t.q[1] = __shiftleft128(q[0], q[1], count);
+		}
+
+		return t;
+	}
+
+	const vduint128 operator>>(int count) const {
+		vduint128 t;
+
+		if (count >= 64) {
+			t.q[0] = q[1] >> (count-64);
+			t.q[1] = 0;
+		} else {
+			t.q[0] = __shiftright128(q[0], q[1], count);
+			t.q[1] = q[1] >> count;
+		}
+
+		return t;
+	}
+#else
+	const vduint128 operator<<(int v) const;
+	const vduint128 operator>>(int v) const;
+#endif
+};
+
+#ifdef _M_AMD64
+	inline vduint128 VDUMul64x64To128(uint64 x, uint64 y) {
+		vduint128 result;
+		result.q[0] = _umul128(x, y, &result.q[1]);
+		return result;
+	}
+	uint64 VDUDiv128x64To64(const vduint128& dividend, uint64 divisor, uint64& remainder);
+#else
+	vduint128 VDUMul64x64To128(uint64 x, uint64 y);
+	uint64 VDUDiv128x64To64(const vduint128& dividend, uint64 divisor, uint64& remainder);
+#endif
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/list.h b/src/thirdparty/VirtualDub/h/vd2/system/list.h
new file mode 100644
index 000000000..e2c39b4e5
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/list.h
@@ -0,0 +1,275 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_LIST_H
+#define f_LIST_H
+
+class ListNode {
+public:
+	ListNode *next, *prev;
+
+	void Remove() {
+		next->prev = prev;
+		prev->next = next;
+#ifdef _DEBUG
+		prev = next = 0;
+#endif
+	}
+
+	void InsertAfter(ListNode *node) {
+		next = node;
+		prev = node->prev;
+		if (node->prev) node->prev->next = this;
+		node->prev = this;
+	}
+
+	void InsertBefore(ListNode *node) {
+		next = node->next;
+		prev = node;
+		if (node->next) node->next->prev = this;
+		node->next = this;
+	}
+
+	ListNode *NextFromHead() const {
+		return prev;
+	}
+
+	ListNode *NextFromTail() const {
+		return next;
+	}
+};
+
+class List {
+private:
+public:
+	ListNode head, tail;
+
+	// <--- next             prev --->
+	//
+	// head <-> node <-> node <-> tail
+
+	List();
+	List(int) {}
+
+	void Init();
+
+	void AddHead(ListNode *node) {
+		node->InsertAfter(&head);
+	}
+
+	void AddTail(ListNode *node) {
+		node->InsertBefore(&tail);
+	}
+
+	ListNode *RemoveHead();
+	ListNode *RemoveTail();
+
+	bool IsEmpty() const {
+		return !head.prev->prev;
+	}
+
+	ListNode *AtHead() const {
+		return head.prev;
+	}
+
+	ListNode *AtTail() const {
+		return tail.next;
+	}
+
+	void Take(List& from);
+	void Swap(List& with);
+};
+
+// Templated classes... templated classes good.
+
+template<class T> class List2;
+
+template<class T>
+class ListNode2 : public ListNode {
+friend List2<T>;
+public:
+	void InsertBefore(ListNode2<T> *node) { ListNode::InsertBefore(node); }
+	void InsertAfter(ListNode2<T> *node) { ListNode::InsertAfter(node); }
+
+	void Remove() { ListNode::Remove(); }
+	T *NextFromHead() const { return static_cast<T *>(static_cast<ListNode2<T>*>(ListNode::NextFromHead())); }
+	T *NextFromTail() const { return static_cast<T *>(static_cast<ListNode2<T>*>(ListNode::NextFromTail())); }
+};
+
+template<class T>
+class List2 : public List {
+public:
+	List2<T>() {}
+
+	// This is a really lame, stupid way to postpone initialization of the
+	// list.
+
+	List2<T>(int v) : List(v) {}
+
+	void AddHead(ListNode2<T> *node) { List::AddHead(node); }
+	void AddTail(ListNode2<T> *node) { List::AddTail(node); }
+	T *RemoveHead()   { return static_cast<T *>(static_cast<ListNode2<T>*>(List::RemoveHead())); }
+	T *RemoveTail()   { return static_cast<T *>(static_cast<ListNode2<T>*>(List::RemoveTail())); }
+	T *AtHead() const { return static_cast<T *>(static_cast<ListNode2<T>*>(List::AtHead())); }
+	T *AtTail() const { return static_cast<T *>(static_cast<ListNode2<T>*>(List::AtTail())); }
+
+	// I must admit to being pampered by STL (end is different though!!)
+
+	T *begin() const { return AtHead(); }
+	T *end() const { return AtTail(); }
+
+	void take(List2<T>& from) { List::take(from); }
+
+	class iterator {
+	protected:
+		ListNode2<T> *node;
+		ListNode2<T> *next;
+
+	public:
+		iterator() {}
+		iterator(const iterator& src) throw() : node(src.node), next(src.next) {}
+
+		bool operator!() const throw() { return 0 == next; }
+		T *operator->() const throw() { return (T *)node; }
+		operator bool() const throw() { return 0 != next; }
+		operator T *() const throw() { return (T *)node; }
+		T& operator *() const throw() { return *(T *)node; }
+	};
+
+	// fwit: forward iterator (SAFE if node disappears)
+	// rvit: reverse iterator (SAFE if node disappears)
+
+	class fwit : public iterator {
+	public:
+		fwit() throw() {}
+		fwit(const fwit& src) throw() : iterator(src) {}
+		fwit(ListNode2<T> *start) throw() {
+			node = start;
+			next = start->NextFromHead();
+		}
+
+		const fwit& operator=(ListNode2<T> *start) throw() {
+			node = start;
+			next = start->NextFromHead();
+
+			return *this;
+		}
+
+		fwit& operator++() throw() {
+			node = next;
+			next = node->NextFromHead();
+
+			return *this;
+		}
+
+		const fwit& operator+=(int v) throw() {
+			while(next && v--) {
+				node = next;
+				next = node->NextFromHead();
+			}
+
+			return *this;
+		}
+
+		fwit operator+(int v) const throw() {
+			fwit t(*this);
+
+			t += v;
+
+			return t;
+		}
+
+		// This one's for my sanity.
+
+		void operator++(int) throw() {
+			++*this;
+		}
+	};
+
+	class rvit : public iterator {
+	public:
+		rvit() throw() {}
+
+		rvit(ListNode2<T> *start) throw() {
+			node = start;
+			next = start->NextFromTail();
+		}
+
+		const rvit& operator=(ListNode2<T> *start) throw() {
+			node = start;
+			next = start->NextFromTail();
+
+			return *this;
+		}
+
+		rvit& operator--() throw() {
+			node = next;
+			next = node->NextFromTail();
+
+			return *this;
+		}
+
+		const rvit& operator-=(int v) throw() {
+			while(next && v--) {
+				node = next;
+				next = node->NextFromTail();
+			}
+
+			return *this;
+		}
+
+		rvit operator-(int v) const throw() {
+			rvit t(*this);
+
+			t -= v;
+
+			return t;
+		}
+
+		// This one's for my sanity.
+
+		void operator--(int) throw() {
+			--*this;
+		}
+	};
+};
+
+template<class T>
+class ListAlloc : public List2<T> {
+public:
+	ListAlloc<T>() {}
+	~ListAlloc<T>() {
+		dispose();
+	}
+
+	void dispose() {
+		T *node;
+
+		while(node = RemoveHead())
+			delete node;
+	}
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/log.h b/src/thirdparty/VirtualDub/h/vd2/system/log.h
new file mode 100644
index 000000000..b36e36e7e
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/log.h
@@ -0,0 +1,70 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_LOG_H
+#define f_VD2_SYSTEM_LOG_H
+
+#include <vd2/system/VDString.h>
+#include <list>
+
+class IVDLogger {
+public:
+	virtual void AddLogEntry(int severity, const VDStringW& s) = 0;
+};
+
+enum {
+	kVDLogInfo, kVDLogMarker, kVDLogWarning, kVDLogError
+};
+
+void VDLog(int severity, const VDStringW& s);
+void VDLogF(int severity, const wchar_t *format, ...);
+void VDAttachLogger(IVDLogger *pLogger, bool bThisThreadOnly, bool bReplayLog);
+void VDDetachLogger(IVDLogger *pLogger);
+
+class VDAutoLogger : public IVDLogger {
+public:
+	struct Entry {
+		int severity;
+		VDStringW text;
+
+		Entry(int sev, const VDStringW& s) : severity(sev), text(s) {}
+	};
+
+	typedef std::list<Entry>	tEntries;
+
+	VDAutoLogger(int min_severity);
+	~VDAutoLogger();
+
+	void AddLogEntry(int severity, const VDStringW& s);
+
+	const tEntries& GetEntries();
+
+protected:
+	tEntries mEntries;
+	const int mMinSeverity;
+	bool	mbAttached;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/math.h b/src/thirdparty/VirtualDub/h/vd2/system/math.h
new file mode 100644
index 000000000..aa4d03f77
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/math.h
@@ -0,0 +1,259 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_MATH_H
+#define f_VD2_SYSTEM_MATH_H
+
+#include <math.h>
+#include <vd2/system/vdtypes.h>
+
+// Constants
+namespace nsVDMath {
+	static const float	kfPi = 3.1415926535897932384626433832795f;
+	static const double	krPi = 3.1415926535897932384626433832795;
+	static const float	kfTwoPi = 6.283185307179586476925286766559f;
+	static const double	krTwoPi = 6.283185307179586476925286766559;
+	static const float	kfLn2 = 0.69314718055994530941723212145818f;
+	static const double	krLn2 = 0.69314718055994530941723212145818;
+	static const float	kfLn10 = 2.3025850929940456840179914546844f;
+	static const double	krLn10 = 2.3025850929940456840179914546844;
+	static const float	kfOneOverLn10 = 0.43429448190325182765112891891661f;
+	static const double	krOneOverLn10 = 0.43429448190325182765112891891661;
+};
+
+///////////////////////////////////////////////////////////////////////////
+// Integer clamping functions
+//
+#ifdef _M_IX86
+	inline uint32 VDClampToUint32(sint64 v) {
+		union U {
+			__int64 v64;
+			struct {
+				unsigned lo;
+				int hi;
+			} v32;
+		};
+
+		return ((U *)&v)->v32.hi ? ~(((U *)&v)->v32.hi >> 31) : ((U *)&v)->v32.lo;
+	}
+#else
+	inline uint32 VDClampToUint32(sint64 v) {
+		uint32 r = (uint32)v;
+		return r == v ? r : (uint32)~(sint32)(v>>63);
+	}
+#endif
+
+inline sint32 VDClampToSint32(uint32 v) {
+	return (v | ((sint32)v >> 31)) & 0x7FFFFFFF;
+}
+
+inline sint32 VDClampToSint32(sint64 v) {
+	sint32 r = (sint32)v;
+	return r == v ? r : (sint32)(v >> 63) ^ 0x7FFFFFFF;
+}
+
+inline uint16 VDClampToUint16(uint32 v) {
+	if (v > 0xffff)
+		v = 0xffff;
+	return (uint16)v;
+}
+
+///////////////////////////////////////////////////////////////////////////
+// Absolute value functions
+inline sint64 VDAbs64(sint64 v) {
+	return v<0 ? -v : v;
+}
+
+inline ptrdiff_t VDAbsPtrdiff(ptrdiff_t v) {
+	return v<0 ? -v : v;
+}
+
+// Rounding functions
+//
+// Round a double to an int or a long.  Behavior is not specified at
+// int(y)+0.5, if x is NaN or Inf, or if x is out of range.
+
+int VDRoundToInt(double x);
+long VDRoundToLong(double x);
+sint32 VDRoundToInt32(double x);
+sint64 VDRoundToInt64(double x);
+
+inline sint32 VDRoundToIntFast(float x) {
+	union {
+		float f;
+		sint32 i;
+	} u = {x + 12582912.0f};		// 2^22+2^23
+
+	return (sint32)u.i - 0x4B400000;
+}
+
+inline sint32 VDRoundToIntFastFullRange(double x) {
+	union {
+		double f;
+		sint32 i[2];
+	} u = {x + 6755399441055744.0f};		// 2^51+2^52
+
+	return (sint32)u.i[0];
+}
+
+#ifdef _M_AMD64
+	inline sint32 VDFloorToInt(double x) {
+		return (sint32)floor(x);
+	}
+
+	inline sint64 VDFloorToInt64(double x) {
+		return (sint64)floor(x);
+	}
+#else
+	#pragma warning(push)
+	#pragma warning(disable: 4035)		// warning C4035: 'VDFloorToInt' : no return value
+	inline sint32 VDFloorToInt(double x) {
+		sint32 temp;
+
+		__asm {
+			fld x
+			fist temp
+			fild temp
+			mov eax, temp
+			fsub
+			fstp temp
+			cmp	temp, 80000001h
+			adc eax, -1
+		}
+	}
+	inline sint64 VDFloorToInt64(double x) {
+		sint64 temp;
+		sint32 temp2;
+
+		__asm {
+			fld x
+			fld st(0)
+			fistp qword ptr temp
+			fild qword ptr temp
+			mov eax, dword ptr temp
+			mov edx, dword ptr temp+4
+			fsub
+			fstp dword ptr temp2
+			cmp	dword ptr temp2, 80000001h
+			adc eax, -1
+			adc edx, -1
+		}
+	}
+	#pragma warning(pop)
+#endif
+
+#ifdef _M_AMD64
+	inline sint32 VDCeilToInt(double x) {
+		return (sint32)ceil(x);
+	}
+
+	inline sint64 VDCeilToInt64(double x) {
+		return (sint64)ceil(x);
+	}
+#else
+	#pragma warning(push)
+	#pragma warning(disable: 4035)		// warning C4035: 'VDCeilToInt' : no return value
+	inline sint32 VDCeilToInt(double x) {
+		sint32 temp;
+
+		__asm {
+			fld x
+			fist temp
+			fild temp
+			mov eax, temp
+			fsubr
+			fstp temp
+			cmp	temp, 80000001h
+			sbb eax, -1
+		}
+	}
+
+	inline sint32 VDCeilToInt64(double x) {
+		sint64 temp;
+		sint32 temp2;
+
+		__asm {
+			fld x
+			fld st(0)
+			fistp temp
+			fild temp
+			mov eax, dword ptr temp
+			mov edx, dword ptr temp+4
+			fsubr
+			fstp temp2
+			cmp	temp2, 80000001h
+			sbb eax, -1
+			sbb edx, -1
+		}
+	}
+	#pragma warning(pop)
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+inline sint16 VDClampedRoundFixedToInt16Fast(float x) {
+	union {
+		float f;
+		sint32 i;
+	} u = {x + 384.0f};		// 2^7+2^8
+
+	sint32 v = (sint32)u.i - 0x43BF8000;
+
+	if ((uint32)v >= 0x10000)
+		v = ~v >> 31;
+
+	return (sint16)(v - 0x8000);
+}
+
+inline uint8 VDClampedRoundFixedToUint8Fast(float x) {
+	union {
+		float f;
+		sint32 i;
+	} u = {x * 255.0f + 12582912.0f};		// 2^22+2^23
+
+	sint32 v = (sint32)u.i - 0x4B400000;
+
+	if ((uint32)v >= 0xFF)
+		v = ~v >> 31;
+
+	return (uint8)v;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+#ifdef _M_IX86
+	sint64 __stdcall VDFractionScale64(uint64 a, uint32 b, uint32 c, uint32& remainder);
+	uint64 __stdcall VDUMulDiv64x32(uint64 a, uint32 b, uint32 c);
+#else
+	extern "C" sint64 VDFractionScale64(uint64 a, uint64 b, uint64 c, uint32& remainder);
+	extern "C" uint64 VDUMulDiv64x32(uint64 a, uint32 b, uint32 c);
+#endif
+
+sint64 VDMulDiv64(sint64 a, sint64 b, sint64 c);
+
+///////////////////////////////////////////////////////////////////////////
+
+bool VDVerifyFiniteFloats(const float *p, uint32 n);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/memory.h b/src/thirdparty/VirtualDub/h/vd2/system/memory.h
new file mode 100644
index 000000000..56decc401
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/memory.h
@@ -0,0 +1,84 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_MEMORY_H
+#define f_VD2_SYSTEM_MEMORY_H
+
+#include <vd2/system/vdtypes.h>
+
+void *VDAlignedMalloc(size_t n, unsigned alignment);
+void VDAlignedFree(void *p);
+
+template<unsigned alignment>
+struct VDAlignedObject {
+	inline void *operator new(size_t n) { return VDAlignedMalloc(n, alignment); }
+	inline void operator delete(void *p) { VDAlignedFree(p); }
+};
+
+void *VDAlignedVirtualAlloc(size_t n);
+void VDAlignedVirtualFree(void *p);
+
+extern void (__cdecl *VDSwapMemory)(void *p0, void *p1, size_t bytes);
+
+void VDInvertMemory(void *p, unsigned bytes);
+
+bool VDIsValidReadRegion(const void *p, size_t bytes);
+bool VDIsValidWriteRegion(void *p, size_t bytes);
+
+bool VDCompareRect(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, size_t w, size_t h);
+
+const void *VDMemCheck8(const void *src, uint8 value, size_t count);
+
+void VDMemset8(void *dst, uint8 value, size_t count);
+void VDMemset16(void *dst, uint16 value, size_t count);
+void VDMemset24(void *dst, uint32 value, size_t count);
+void VDMemset32(void *dst, uint32 value, size_t count);
+void VDMemset64(void *dst, uint64 value, size_t count);
+void VDMemset128(void *dst, const void *value, size_t count);
+void VDMemsetPointer(void *dst, const void *value, size_t count);
+
+void VDMemset8Rect(void *dst, ptrdiff_t pitch, uint8 value, size_t w, size_t h);
+void VDMemset16Rect(void *dst, ptrdiff_t pitch, uint16 value, size_t w, size_t h);
+void VDMemset24Rect(void *dst, ptrdiff_t pitch, uint32 value, size_t w, size_t h);
+void VDMemset32Rect(void *dst, ptrdiff_t pitch, uint32 value, size_t w, size_t h);
+
+#if defined(_WIN32) && defined(_M_IX86)
+	extern void (__cdecl *VDFastMemcpyPartial)(void *dst, const void *src, size_t bytes);
+	extern void (__cdecl *VDFastMemcpyFinish)();
+	void VDFastMemcpyAutodetect();
+#else
+	void VDFastMemcpyPartial(void *dst, const void *src, size_t bytes);
+	void VDFastMemcpyFinish();
+	void VDFastMemcpyAutodetect();
+#endif
+
+
+void VDMemcpyRect(void *dst, ptrdiff_t dststride, const void *src, ptrdiff_t srcstride, size_t w, size_t h);
+
+/// Copy a region of memory with an access violation guard; used in cases where a sporadic
+/// AV is unavoidable (dynamic Direct3D VB under XP). The regions must not overlap.
+bool VDMemcpyGuarded(void *dst, const void *src, size_t bytes);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/profile.h b/src/thirdparty/VirtualDub/h/vd2/system/profile.h
new file mode 100644
index 000000000..ff4f1b3d7
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/profile.h
@@ -0,0 +1,167 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_PROFILE_H
+#define f_VD2_SYSTEM_PROFILE_H
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/vdstl.h>
+#include <vector>
+
+class VDRTProfiler;
+
+void VDInitProfilingSystem();
+void VDDeinitProfilingSystem();
+VDRTProfiler *VDGetRTProfiler();
+
+//
+//	VDRTProfiler		Real-time profiler
+//
+//	This class forms the base for a very simple real-time profiler: threads
+//	record events in channels, and periodically, someone swaps the active
+//	recording array with a second array, and draws the sampled events off
+//	that array.  In VirtualDub, this is done via RTProfileDisplay.  Events
+//	are sampled via the high-performance counter in Win32, but clients need
+//	never know this fact.
+//
+//	All methods in VDRTProfiler are thread-safe.  However, it is assumed
+//	that only one client will be calling Swap() and accessing the Paint
+//	channel set.  Swap() should be called from rather low-level code as
+//	it may introduce deadlocks otherwise.
+//
+//	Strings passed to VDRTProfiler must be constant data in the main EXE.
+//	No dynamic strings or DLLs.  The reason is that there is an
+//	indefinite delay between a call to FreeChannel() and the last time
+//	data from that channel is displayed.
+//
+//	Channels are not restricted to a particular thread; it is permissible
+//	to allocate a channel in one thread and use it in another.  However,
+//	channels must not be simultaneously used by two threads -- that will
+//	generate interesting output.
+//
+class VDRTProfiler {
+public:
+	enum CounterType {
+		kCounterTypeUint32,
+		kCounterTypeDouble
+	};
+
+public:
+	VDRTProfiler();
+	~VDRTProfiler();
+
+	void BeginCollection();
+	void EndCollection();
+	void Swap();
+
+	bool IsEnabled() const { return mbEnableCollection; }
+
+	int AllocChannel(const char *name);
+	void FreeChannel(int ch);
+	void BeginEvent(int channel, uint32 color, const char *name);
+	void EndEvent(int channel);
+
+	void RegisterCounterD(const char *name, const double *val);
+	void RegisterCounterU32(const char *name, const uint32 *val);
+	void RegisterCounter(const char *name, const void *val, CounterType type);
+	void UnregisterCounter(void *p);
+
+public:
+	struct Event {
+		uint64		mStartTime;
+		uint64		mEndTime;			// only last 32 bits of counter
+		uint32		mColor;
+		const char *mpName;
+	};
+
+	struct Channel {
+		const char			*mpName;
+		bool				mbEventPending;
+		vdfastvector<Event>	mEventList;
+	};
+
+	struct Counter {
+		const char			*mpName;
+		const void			*mpData;
+		CounterType			mType;
+		union {
+			uint32 u32;
+			double d;
+		} mData, mDataLast;
+	};
+
+	struct CounterByNamePred;
+
+	typedef std::vector<Channel> tChannels;
+	typedef vdfastvector<Counter> Counters;
+
+	VDCriticalSection		mLock;
+	tChannels				mChannelArray;
+	tChannels				mChannelArrayToPaint;
+	Counters				mCounterArray;
+	Counters				mCounterArrayToPaint;
+	uint64					mPerfFreq;
+	uint64					mSnapshotTime;
+
+	volatile bool			mbEnableCollection;
+};
+
+//
+//	VDRTProfileChannel
+//
+//	This helper simply makes channel acquisition easier.  It automatically
+//	stubs out if no profiler is available.  However, it's still advisable
+//	not to call this from your inner loop!
+//
+class VDRTProfileChannel {
+public:
+	VDRTProfileChannel(const char *name)
+		: mpProfiler(VDGetRTProfiler())
+		, mProfileChannel(mpProfiler ? mpProfiler->AllocChannel(name) : 0)
+	{
+	}
+	~VDRTProfileChannel() {
+		if (mpProfiler)
+			mpProfiler->FreeChannel(mProfileChannel);
+	}
+
+	void Begin(uint32 color, const char *name) {
+		if (mpProfiler)
+			mpProfiler->BeginEvent(mProfileChannel, color, name);
+	}
+
+	void End() {
+		if (mpProfiler)
+			mpProfiler->EndEvent(mProfileChannel);
+	}
+
+protected:
+	VDRTProfiler *const mpProfiler;
+	int mProfileChannel;
+};
+
+#endif
+
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/progress.h b/src/thirdparty/VirtualDub/h/vd2/system/progress.h
new file mode 100644
index 000000000..976e3c6e3
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/progress.h
@@ -0,0 +1,96 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_SYSTEM_PROGRESS_H
+#define f_SYSTEM_PROGRESS_H
+
+#include <vd2/system/error.h>
+
+class VDAtomicInt;
+class VDSignalPersistent;
+
+class IProgress {
+public:
+	virtual void Error(const char *)=0;
+	virtual void Warning(const char *)=0;
+	virtual bool Query(const char *query, bool fDefault)=0;
+	virtual void ProgressStart(const char *text, const char *caption, const char *progtext, long lMax)=0;
+	virtual void ProgressAdvance(long)=0;
+	virtual void ProgressEnd()=0;
+	virtual void Output(const char *text)=0;
+	virtual VDAtomicInt *ProgressGetAbortFlag()=0;
+	virtual VDSignalPersistent *ProgressGetAbortSignal()=0;
+};
+
+
+void ProgressSetHandler(IProgress *pp);
+IProgress *ProgressGetHandler();
+
+bool ProgressCheckAbort();
+void ProgressSetAbort(bool bNewValue);
+VDSignalPersistent *ProgressGetAbortSignal();
+void ProgressError(const class MyError&);
+void ProgressWarning(const char *format, ...);
+void ProgressOutput(const char *format, ...);
+bool ProgressQuery(bool fDefault, const char *format, ...);
+void ProgressStart(long lMax, const char *caption, const char *progresstext, const char *format, ...);
+void ProgressAdvance(long lNewValue);
+void ProgressEnd();
+
+
+class VDProgress {
+public:
+	VDProgress(long lMax, const char *caption, const char *progresstext, const char *format, ...) {
+		ProgressStart(lMax, caption, progresstext, format);
+	}
+
+	~VDProgress() {
+		ProgressEnd();
+	}
+
+	void advance(long v) {
+		ProgressAdvance(v);
+	}
+};
+
+class VDProgressAbortable {
+public:
+	VDProgressAbortable(long lMax, const char *caption, const char *progresstext, const char *format, ...) {
+		ProgressStart(lMax, caption, progresstext, format);
+		ProgressSetAbort(false);
+	}
+
+	~VDProgressAbortable() {
+		ProgressEnd();
+	}
+
+	void advance(long v) {
+		if (ProgressCheckAbort())
+			throw MyUserAbortError();
+		ProgressAdvance(v);
+	}
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/protscope.h b/src/thirdparty/VirtualDub/h/vd2/system/protscope.h
new file mode 100644
index 000000000..6c22a54ad
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/protscope.h
@@ -0,0 +1,245 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_PROTSCOPE_H
+#define f_VD2_SYSTEM_PROTSCOPE_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Protected scope macros
+//
+// These macros allow you to define a scope which is known to the crash
+// handler -- that is, if the application crashes within a protected scope
+// the handler will report the scope information in the crash output.
+//
+
+class VDProtectedAutoScope;
+
+typedef VDProtectedAutoScope *(*tpVDGetProtectedScopeLink)();
+typedef void (*tpVDSetProtectedScopeLink)(VDProtectedAutoScope *);
+
+extern tpVDGetProtectedScopeLink g_pVDGetProtectedScopeLink;
+extern tpVDSetProtectedScopeLink g_pVDSetProtectedScopeLink;
+
+// The reason for this function is a bug in the Intel compiler regarding
+// construction optimization -- it stores VDProtectedAutoScope::'vtable'
+// in the vtable slot instead of VDProtectedAutoScope1<T>::'vtable', thus
+// killing the printf()s. "volatile" doesn't work to fix the problem, but
+// calling an opaque global function does.  Oh well.
+
+#ifdef __INTEL_COMPILER
+void VDProtectedAutoScopeICLWorkaround();
+#endif
+
+class IVDProtectedScopeOutput {
+public:
+	virtual void write(const char *s) = 0;
+	virtual void writef(const char *s, ...) = 0;
+};
+
+class VDProtectedAutoScope {
+public:
+	VDProtectedAutoScope(const char *file, int line, const char *action) : mpFile(file), mLine(line), mpAction(action), mpLink(g_pVDGetProtectedScopeLink()) {
+		// Note that the assignment to g_protectedScopeLink cannot occur here, as the
+		// derived class has not been constructed yet.  Uninitialized objects in
+		// the debugging chain are *bad*.
+	}
+
+	~VDProtectedAutoScope() {
+		g_pVDSetProtectedScopeLink(mpLink);
+	}
+
+	operator bool() const { return false; }
+
+	virtual void Write(IVDProtectedScopeOutput& out) {
+		out.write(mpAction);
+	}
+
+	VDProtectedAutoScope *mpLink;
+	const char *const mpFile;
+	const int mLine;
+	const char *const mpAction;
+};
+
+class VDProtectedAutoScopeData0 {
+public:
+	VDProtectedAutoScopeData0(const char *file, int line, const char *action) : mpFile(file), mLine(line), mpAction(action) {}
+	const char *const mpFile;
+	const int mLine;
+	const char *const mpAction;
+};
+
+template<class T1>
+class VDProtectedAutoScopeData1 {
+public:
+	VDProtectedAutoScopeData1(const char *file, int line, const char *action, const T1 a1) : mpFile(file), mLine(line), mpAction(action), mArg1(a1) {}
+	const char *const mpFile;
+	const int mLine;
+	const char *const mpAction;
+	const T1 mArg1;
+};
+
+template<class T1, class T2>
+class VDProtectedAutoScopeData2 {
+public:
+	VDProtectedAutoScopeData2(const char *file, int line, const char *action, const T1 a1, const T2 a2) : mpFile(file), mLine(line), mpAction(action), mArg1(a1), mArg2(a2) {}
+	const char *const mpFile;
+	const int mLine;
+	const char *const mpAction;
+	const T1 mArg1;
+	const T2 mArg2;
+};
+
+template<class T1, class T2, class T3>
+class VDProtectedAutoScopeData3 {
+public:
+	VDProtectedAutoScopeData3(const char *file, int line, const char *action, const T1 a1, const T2 a2, const T3 a3) : mpFile(file), mLine(line), mpAction(action), mArg1(a1), mArg2(a2), mArg3(a3) {}
+	const char *const mpFile;
+	const int mLine;
+	const char *const mpAction;
+	const T1 mArg1;
+	const T2 mArg2;
+	const T3 mArg3;
+};
+
+template<class T1, class T2, class T3, class T4>
+class VDProtectedAutoScopeData4 {
+public:
+	VDProtectedAutoScopeData4(const char *file, int line, const char *action, const T1 a1, const T2 a2, const T3 a3, const T4 a4) : mpFile(file), mLine(line), mpAction(action), mArg1(a1), mArg2(a2), mArg3(a3), mArg4(a4) {}
+	const char *const mpFile;
+	const int mLine;
+	const char *const mpAction;
+	const T1 mArg1;
+	const T2 mArg2;
+	const T3 mArg3;
+	const T4 mArg4;
+};
+
+class VDProtectedAutoScope0 : public VDProtectedAutoScope {
+public:
+	VDProtectedAutoScope0(const VDProtectedAutoScopeData0& data) : VDProtectedAutoScope(data.mpFile, data.mLine, data.mpAction) {
+		g_pVDSetProtectedScopeLink(this);
+#ifdef __INTEL_COMPILER
+		VDProtectedAutoScopeICLWorkaround();
+#endif
+	}
+};
+
+template<class T1>
+class VDProtectedAutoScope1 : public VDProtectedAutoScope {
+public:
+	VDProtectedAutoScope1(const VDProtectedAutoScopeData1<T1>& data) : VDProtectedAutoScope(data.mpFile, data.mLine, data.mpAction), mArg1(data.mArg1) {
+		g_pVDSetProtectedScopeLink(this);
+#ifdef __INTEL_COMPILER
+		VDProtectedAutoScopeICLWorkaround();
+#endif
+	}
+
+	virtual void Write(IVDProtectedScopeOutput& out) {
+		out.writef(mpAction, mArg1);
+	}
+
+	const T1 mArg1;
+};
+
+template<class T1, class T2>
+class VDProtectedAutoScope2 : public VDProtectedAutoScope {
+public:
+	VDProtectedAutoScope2(const VDProtectedAutoScopeData2<T1,T2>& data) : VDProtectedAutoScope(data.mpFile, data.mLine, data.mpAction), mArg1(data.mArg1), mArg2(data.mArg2) {
+		g_pVDSetProtectedScopeLink(this);
+#ifdef __INTEL_COMPILER
+		VDProtectedAutoScopeICLWorkaround();
+#endif
+	}
+
+	virtual void Write(IVDProtectedScopeOutput& out) {
+		out.writef(mpAction, mArg1, mArg2);
+	}
+
+	const T1 mArg1;
+	const T2 mArg2;
+};
+
+template<class T1, class T2, class T3>
+class VDProtectedAutoScope3 : public VDProtectedAutoScope {
+public:
+	VDProtectedAutoScope3(const VDProtectedAutoScopeData3<T1,T2,T3>& data) : VDProtectedAutoScope(data.mpFile, data.mLine, data.mpAction), mArg1(data.mArg1), mArg2(data.mArg2), mArg3(data.mArg3) {
+		g_pVDSetProtectedScopeLink(this);
+#ifdef __INTEL_COMPILER
+		VDProtectedAutoScopeICLWorkaround();
+#endif
+	}
+
+	virtual void Write(IVDProtectedScopeOutput& out) {
+		out.writef(mpAction, mArg1, mArg2, mArg3);
+	}
+
+	const T1 mArg1;
+	const T2 mArg2;
+	const T3 mArg3;
+};
+
+template<class T1, class T2, class T3, class T4>
+class VDProtectedAutoScope4 : public VDProtectedAutoScope {
+public:
+	VDProtectedAutoScope4(const VDProtectedAutoScopeData4<T1,T2,T3,T4>& data) : VDProtectedAutoScope(data.mpFile, data.mLine, data.mpAction), mArg1(data.mArg1), mArg2(data.mArg2), mArg3(data.mArg3), mArg4(data.mArg4) {
+		g_pVDSetProtectedScopeLink(this);
+#ifdef __INTEL_COMPILER
+		VDProtectedAutoScopeICLWorkaround();
+#endif
+	}
+
+	virtual void Write(IVDProtectedScopeOutput& out) {
+		out.writef(mpAction, mArg1, mArg2, mArg3, mArg4);
+	}
+
+	const T1 mArg1;
+	const T2 mArg2;
+	const T3 mArg3;
+	const T4 mArg4;
+};
+
+
+#define vdprotected(action) vdobjectscope(VDProtectedAutoScope0 autoscope = VDProtectedAutoScopeData0(__FILE__, __LINE__, action))
+#define vdprotected1(actionf, type1, arg1) vdobjectscope(VDProtectedAutoScope1<type1> autoscope = VDProtectedAutoScopeData1<type1>(__FILE__, __LINE__, actionf, arg1))
+
+// @&#(* preprocessor doesn't view template brackets as escaping commas, so we have a slight
+// problem....
+
+#if defined(VD_COMPILER_MSVC) && (VD_COMPILER_MSVC < 1400 || defined(VD_COMPILER_MSVC_VC8_DDK))
+#define vdprotected2(actionf, type1, arg1, type2, arg2) if(VDProtectedAutoScope2<type1, type2> autoscope = VDProtectedAutoScopeData2<type1, type2>(__FILE__, __LINE__, actionf, arg1, arg2)) VDNEVERHERE; else
+#define vdprotected3(actionf, type1, arg1, type2, arg2, type3, arg3) if(VDProtectedAutoScope3<type1, type2, type3> autoscope = VDProtectedAutoScopeData3<type1, type2, type3>(__FILE__, __LINE__, actionf, arg1, arg2, arg3)) VDNEVERHERE; else
+#define vdprotected4(actionf, type1, arg1, type2, arg2, type3, arg3, type4, arg4) if(VDProtectedAutoScope4<type1, type2, type3, type4> autoscope = VDProtectedAutoScopeData4<type1, type2, type3, type4>(__FILE__, __LINE__, actionf, arg1, arg2, arg3, arg4)) VDNEVERHERE; else
+#else
+#define vdprotected2(actionf, type1, arg1, type2, arg2) switch(VDProtectedAutoScope2<type1, type2> autoscope = VDProtectedAutoScopeData2<type1, type2>(__FILE__, __LINE__, actionf, arg1, arg2)) case 0: default:
+#define vdprotected3(actionf, type1, arg1, type2, arg2, type3, arg3) switch(VDProtectedAutoScope3<type1, type2, type3> autoscope = VDProtectedAutoScopeData3<type1, type2, type3>(__FILE__, __LINE__, actionf, arg1, arg2, arg3)) case 0: default:
+#define vdprotected4(actionf, type1, arg1, type2, arg2, type3, arg3, type4, arg4) switch(VDProtectedAutoScope4<type1, type2, type3, type4> autoscope = VDProtectedAutoScopeData4<type1, type2, type3, type4>(__FILE__, __LINE__, actionf, arg1, arg2, arg3, arg4)) case 0: default:
+#endif
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/refcount.h b/src/thirdparty/VirtualDub/h/vd2/system/refcount.h
new file mode 100644
index 000000000..654cbe24c
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/refcount.h
@@ -0,0 +1,282 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_REFCOUNT_H
+#define f_VD2_SYSTEM_REFCOUNT_H
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/atomic.h>
+
+///////////////////////////////////////////////////////////////////////////
+//	IVDRefCount
+///	Base interface for reference-counted objects.
+///
+/// Reference counting is a relatively straightforward and simple method
+/// of garbage collection. The rules are:
+///
+/// 1) Objects increment their reference count on an AddRef() and
+///    decrement it on a Release().
+/// 2) Objects destroy themselves when their reference count is dropped
+///    to zero.
+/// 3) Clients create references with AddRef() and destroy them with
+///    Release().
+///
+/// One way to interact with refcounted objects is to call AddRef()
+/// whenever a pointer is created, and Release() when the pointer is
+/// nulled or changed.  The vdrefptr<T> template does this automatically.
+/// Reference counting may be "combined" between pointers for optimization
+/// reasons, such that fewer reference counts are outstanding than actual
+/// pointers; this requires weak (non-refcounted) pointers and explicit
+/// refcount management.
+///
+/// Reference counting has two issues:
+///
+/// A) It is expensive.  VirtualDub uses it somewhat sparingly.
+///
+/// B) Reference counting cannot cope with cycles.  This issue is
+///    avoided by arranging objects in a clearly ordered tree, such that
+///    no class ever holds a pointer to another object of the same class
+///    or to a parent in the reference hierarchy.  vdrefptr<T> can
+///    implicitly create cycles if you are not careful.
+///
+///	In VirtualDub, reference counting must be multithread safe, so atomic
+///	increment/decrement should be used.  vdrefcounted<T> handles this
+///	automatically for the template type class.
+///
+///	Two final implementation details:
+///
+///	- Little or no code should be executed after the reference count
+///	  drops to zero, preferably nothing more than the destructor implicitly
+///	  generated by the compiler.  The reason is that otherwise there is the
+///	  potential for an object to be resurrected past its final release by
+///	  temporarily creating a new reference on the object.
+///
+/// - AddRef() and Release() traditionally return the reference count on
+///	  the object after increment or decrement, but this is not required.
+///	  For Release builds, it is only required that the value for Release()
+///	  be zero iff the object is destroyed.  (The same applies for AddRef(),
+///	  but since the result of AddRef() is always non-zero, the return of
+///	  AddRef() is of no use unless it is the actual count.)
+///
+class VDINTERFACE IVDRefCount {
+public:
+	virtual int AddRef()=0;
+	virtual int Release()=0;
+};
+
+///////////////////////////////////////////////////////////////////////////
+//	vdrefcounted<T>
+///	Implements thread-safe reference counting on top of a base class.
+///
+///	vdrefcounted<T> is used to either add reference counting to a base
+///	class or to implement it on an interface. Use it by deriving your
+///	class from it.
+///
+template<class T> class vdrefcounted : public T {
+public:
+	vdrefcounted() : mRefCount(0) {}
+	vdrefcounted(const vdrefcounted<T>& src) : mRefCount(0) {}		// do not copy the refcount
+	virtual ~vdrefcounted() {}
+
+	vdrefcounted<T>& operator=(const vdrefcounted<T>&) {}			// do not copy the refcount
+
+	inline virtual int AddRef() {
+		return mRefCount.inc();
+	}
+
+	inline virtual int Release() {
+		int rc = --mRefCount;
+
+		if (!rc) {
+			delete this;
+			return 0;
+		}
+
+		VDASSERT(rc > 0);
+
+		return rc;
+	}
+
+protected:
+	VDAtomicInt		mRefCount;
+};
+
+///////////////////////////////////////////////////////////////////////////
+//	vdrefptr<T>
+///	Reference-counting smart pointer.
+///
+///	Maintains a strong reference on any object that supports AddRef/Release
+///	semantics. This includes any interface including IVDRefCount,
+///	IVDRefUnknown, or the IUnknown interface in Microsoft COM. Because
+///	references are automatically traded as necessary, smart pointers are
+///	very useful for maintaining exception safety.
+///
+template<class T> class vdrefptr {
+protected:
+	T *ptr;
+
+public:
+	typedef vdrefptr<T> self_type;
+	typedef T			element_type;
+
+	/// Creates a new smart pointer and obtains a new reference on the
+	/// specified object.
+	explicit vdrefptr(T *p = 0) : ptr(p) {
+		if (p)
+			p->AddRef();
+	}
+
+	/// Clones a smart pointer, duplicating any held reference.
+	vdrefptr(const self_type& src) {
+		ptr = src.ptr;
+		if (ptr)
+			ptr->AddRef();
+	}
+
+	/// Destroys the smart pointer, releasing any held reference.
+	~vdrefptr() {
+		if (ptr)
+			ptr->Release();
+	}
+
+	/// Assigns a new object to a smart pointer. Any old object is released
+	/// and the new object is addrefed.
+	inline self_type& operator=(T *src) {
+		if (src)
+			src->AddRef();
+		if (ptr)
+			ptr->Release();
+		ptr = src;
+		return *this;
+	}
+
+	/// Assigns a new object to a smart pointer. Any old object is released
+	/// and the new object is addrefed.
+	inline self_type& operator=(const vdrefptr& src) {
+		if (src.ptr)
+			src.ptr->AddRef();
+		if (ptr)
+			ptr->Release();
+		ptr = src.ptr;
+		return *this;
+	}
+
+	operator T*() const { return ptr; }
+	T& operator*() const { return *ptr; }
+	T *operator->() const { return ptr; }
+
+	/// Removes any old reference and returns a double-pointer to the nulled
+	/// internal pointer. This is useful for passing to IUnknown-derived
+	/// interfaces that accept (T **) parameters, like QueryInterface().
+	T** operator~() {
+		if (ptr) {
+			ptr->Release();
+			ptr = NULL;
+		}
+		return &ptr;
+	}
+
+	/// Removes any held reference.
+	inline void clear() {
+		if (ptr)
+			ptr->Release();
+		ptr = NULL;
+	}
+
+	/// Removes any existing reference and moves a reference from another
+	/// smart pointer. The source pointer is cleared afterward.
+	inline void from(vdrefptr& src) {
+		if (ptr)
+			ptr->Release();
+		ptr = src.ptr;
+		src.ptr = NULL;
+	}
+
+	/// Removes any existing reference and accepts a reference to a new
+	/// object without actually obtaining one. This is useful if someone
+	/// has already addrefed an object for you.
+	inline void set(T* src) {
+		if (ptr)
+			ptr->Release();
+
+		ptr = src;
+	}
+
+	/// Returns the held reference and clears the smart pointer without
+	/// releasing the reference. This is useful for holding onto a reference
+	/// in an exception-safe manner up until the last moment.
+	inline T *release() {
+		T *p = ptr;
+		ptr = NULL;
+		return p;
+	}
+
+	/// Swaps the references between two smart pointers.
+	void swap(vdrefptr& r) {
+		T *p = ptr;
+		ptr = r.ptr;
+		r.ptr = p;
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+template<class T, class U>
+bool VDRefCountObjectFactory(U **pp) {
+	T *p = new_nothrow T;
+	if (!p)
+		return false;
+
+	*pp = static_cast<U *>(p);
+	p->AddRef();
+	return true;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+struct vdsaferelease_t {};
+extern vdsaferelease_t vdsaferelease;
+
+template<class T>
+inline vdsaferelease_t& operator<<=(vdsaferelease_t& x, T *& p) {
+	if (p) {
+		p->Release();
+		p = 0;
+	}
+
+	return x;
+}
+
+template<class T>
+inline vdsaferelease_t& operator,(vdsaferelease_t& x, T *& p) {
+	if (p) {
+		p->Release();
+		p = 0;
+	}
+
+	return x;
+}
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/registry.h b/src/thirdparty/VirtualDub/h/vd2/system/registry.h
new file mode 100644
index 000000000..c9ee119da
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/registry.h
@@ -0,0 +1,84 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_REGISTRY_H
+#define f_VD2_SYSTEM_REGISTRY_H
+
+#include <vd2/system/VDString.h>
+
+class VDRegistryKey {
+private:
+	void *pHandle;
+
+public:
+	VDRegistryKey(const char *pszKey, bool global = false, bool write = true);
+	~VDRegistryKey();
+
+	void *getRawHandle() const { return pHandle; }
+
+	bool isReady() const { return pHandle != 0; }
+
+	bool setBool(const char *pszName, bool) const;
+	bool setInt(const char *pszName, int) const;
+	bool setString(const char *pszName, const char *pszString) const;
+	bool setString(const char *pszName, const wchar_t *pszString) const;
+	bool setBinary(const char *pszName, const char *data, int len) const;
+
+	bool getBool(const char *pszName, bool def=false) const;
+	int getInt(const char *pszName, int def=0) const;
+	int getEnumInt(const char *pszName, int maxVal, int def=0) const;
+	bool getString(const char *pszName, VDStringA& s) const;
+	bool getString(const char *pszName, VDStringW& s) const;
+
+	int getBinaryLength(const char *pszName) const;
+	bool getBinary(const char *pszName, char *buf, int maxlen) const;
+
+	bool removeValue(const char *);
+};
+
+class VDRegistryValueIterator {
+public:
+	VDRegistryValueIterator(const VDRegistryKey& key);
+
+	const char *Next();
+
+protected:
+	void *mpHandle;
+	uint32 mIndex;
+	char mName[256];
+};
+
+class VDRegistryAppKey : public VDRegistryKey {
+private:
+	static VDString s_appbase;
+
+public:
+	VDRegistryAppKey();
+	VDRegistryAppKey(const char *pszKey, bool write = true);
+
+	static void setDefaultKey(const char *pszAppName);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/source/bitmath.cpp b/src/thirdparty/VirtualDub/h/vd2/system/source/bitmath.cpp
new file mode 100644
index 000000000..d8eaf47ae
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/source/bitmath.cpp
@@ -0,0 +1,67 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vd2/system/bitmath.h>
+
+int VDCountBits(uint32 v) {
+	v -= (v >> 1) & 0x55555555;
+	v = ((v & 0xcccccccc) >> 2) + (v & 0x33333333);
+	v = (v + (v >> 4)) & 0x0f0f0f0f;
+	return (v * 0x01010101) >> 24;
+}
+
+#ifndef VD_COMPILER_MSVC_VC8
+
+	int VDFindLowestSetBit(uint32 v) {
+		for(int i=0; i<32; ++i) {
+			if (v & 1)
+				return i;
+			v >>= 1;
+		}
+
+		return 32;
+	}
+
+	int VDFindHighestSetBit(uint32 v) {
+		for(int i=31; i>=0; --i) {
+			if ((sint32)v < 0)
+				return i;
+			v += v;
+		}
+		return -1;
+	}
+
+#endif
+
+uint32 VDCeilToPow2(uint32 v) {
+	v += v;
+	--v;
+
+	while(uint32 x = v & (v - 1))
+		v = x;
+
+	return v;
+}
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/strutil.h b/src/thirdparty/VirtualDub/h/vd2/system/strutil.h
new file mode 100644
index 000000000..2f1fdf84f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/strutil.h
@@ -0,0 +1,44 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+#ifndef f_VD2_SYSTEM_STRUTIL_H
+#define f_VD2_SYSTEM_STRUTIL_H
+
+#include <string.h>
+#include <vd2/system/vdtypes.h>
+
+char *strncpyz(char *strDest, const char *strSource, size_t count);
+wchar_t *wcsncpyz(wchar_t *strDest, const wchar_t *strSource, size_t count);
+const char *strskipspace(const char *s) throw();
+
+inline char *strskipspace(char *s) throw() {
+	return const_cast<char *>(strskipspace(s));
+}
+
+size_t vdstrlcpy(char *dst, const char *src, size_t sizeChars);
+size_t vdwcslcpy(wchar_t *dst, const wchar_t *src, size_t sizeChars);
+
+size_t vdstrlcat(char *dst, const char *src, size_t sizeChars);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/text.h b/src/thirdparty/VirtualDub/h/vd2/system/text.h
new file mode 100644
index 000000000..bc8ea93f3
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/text.h
@@ -0,0 +1,60 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_TEXT_H
+#define f_VD2_SYSTEM_TEXT_H
+
+#include <ctype.h>
+#include <stdarg.h>
+
+class VDStringA;
+class VDStringW;
+
+// The max_dst value needs to include space for the NULL as well.  The number
+// of characters produced is returned, minus the null terminator.
+
+int VDTextWToA(char *dst, int max_dst, const wchar_t *src, int max_src = -1);
+int VDTextAToW(wchar_t *dst, int max_dst, const char *src, int max_src = -1);
+
+VDStringA VDTextWToA(const wchar_t *src, int length = -1);
+VDStringA VDTextWToA(const VDStringW& sw);
+VDStringW VDTextAToW(const char *src, int length = -1);
+VDStringW VDTextAToW(const VDStringA& sw);
+
+VDStringA VDTextWToU8(const VDStringW& s);
+VDStringA VDTextWToU8(const wchar_t *s, int length);
+VDStringW VDTextU8ToW(const VDStringA& s);
+VDStringW VDTextU8ToW(const char *s, int length);
+
+// The terminating NULL character is not included in these.
+
+int VDTextWToALength(const wchar_t *s, int length=-1);
+int VDTextAToWLength(const char *s, int length=-1);
+
+VDStringW VDaswprintf(const wchar_t *format, int args, const void *const *argv);
+VDStringW VDvswprintf(const wchar_t *format, int args, va_list val);
+VDStringW VDswprintf(const wchar_t *format, int args, ...);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/thread.h b/src/thirdparty/VirtualDub/h/vd2/system/thread.h
new file mode 100644
index 000000000..6cf1fc7a0
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/thread.h
@@ -0,0 +1,269 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_THREAD_H
+#define f_VD2_SYSTEM_THREAD_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/atomic.h>
+
+typedef void *VDThreadHandle;
+typedef uint32 VDThreadID;
+typedef uint32 VDThreadId;
+typedef uint32 VDProcessId;
+
+struct _RTL_CRITICAL_SECTION;
+
+extern "C" void __declspec(dllimport) __stdcall InitializeCriticalSection(_RTL_CRITICAL_SECTION *lpCriticalSection);
+extern "C" void __declspec(dllimport) __stdcall LeaveCriticalSection(_RTL_CRITICAL_SECTION *lpCriticalSection);
+extern "C" void __declspec(dllimport) __stdcall EnterCriticalSection(_RTL_CRITICAL_SECTION *lpCriticalSection);
+extern "C" void __declspec(dllimport) __stdcall DeleteCriticalSection(_RTL_CRITICAL_SECTION *lpCriticalSection);
+extern "C" unsigned long __declspec(dllimport) __stdcall WaitForSingleObject(void *hHandle, unsigned long dwMilliseconds);
+extern "C" int __declspec(dllimport) __stdcall ReleaseSemaphore(void *hSemaphore, long lReleaseCount, long *lpPreviousCount);
+
+VDThreadID VDGetCurrentThreadID();
+VDProcessId VDGetCurrentProcessId();
+
+void VDSetThreadDebugName(VDThreadID tid, const char *name);
+void VDThreadSleep(int milliseconds);
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	VDThread
+//
+//	VDThread is a quick way to portably create threads -- to use it,
+//	derive a subclass from it that implements the ThreadRun() function.
+//
+//	Win32 notes:
+//
+//	The thread startup code will attempt to notify the VC++ debugger of
+//	the debug name of the thread.  Only the first 9 characters are used
+//	by Visual C 6.0; Visual Studio .NET will accept a few dozen.
+//
+//	VDThread objects must not be WaitThread()ed or destructed from a
+//	DllMain() function, TLS callback for an executable, or static
+//	destructor unless the thread has been detached from the object.
+//  The reason is that Win32 serializes calls to DllMain() functions.
+//  If you attempt to do so, you will cause a deadlock when Win32
+//  attempts to fire thread detach notifications.
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDThread {
+public:
+	VDThread(const char *pszDebugName = NULL);	// NOTE: pszDebugName must have static duration
+	~VDThread() throw();
+
+	// external functions
+
+	bool ThreadStart();							// start thread
+	void ThreadDetach();						// detach thread (wait() won't be called)
+	void ThreadWait();							// wait for thread to finish
+
+	bool isThreadActive();
+
+	bool isThreadAttached() const {				// NOTE: Will return true if thread started, even if thread has since exited
+		return mhThread != 0;
+	}
+
+	VDThreadHandle getThreadHandle() const {	// get handle to thread (Win32: HANDLE)
+		return mhThread;
+	}
+
+	VDThreadID getThreadID() const {			// get ID of thread (Win32: DWORD)
+		return mThreadID;
+	}
+
+	void *ThreadLocation() const;				// retrieve current EIP of thread (use only for debug purposes -- may not return reliable information on syscall, etc.)
+
+	// thread-local functions
+
+	virtual void ThreadRun() = 0;				// thread, come to life
+	void ThreadFinish();						// exit thread
+
+private:
+	static unsigned __stdcall StaticThreadStart(void *pThis);
+
+	const char *mpszDebugName;
+	VDThreadHandle	mhThread;
+	VDThreadID		mThreadID;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDCriticalSection {
+private:
+	struct CritSec {				// This is a clone of CRITICAL_SECTION.
+		void	*DebugInfo;
+		sint32	LockCount;
+		sint32	RecursionCount;
+		void	*OwningThread;
+		void	*LockSemaphore;
+		uint32	SpinCount;
+	} csect;
+
+	VDCriticalSection(const VDCriticalSection&);
+	const VDCriticalSection& operator=(const VDCriticalSection&);
+	static void StructCheck();
+public:
+	class AutoLock {
+	private:
+		VDCriticalSection& cs;
+	public:
+		AutoLock(VDCriticalSection& csect) : cs(csect) { cs.Lock(); }
+		~AutoLock() { cs.Unlock(); }
+
+		inline operator bool() const { return false; }
+	};
+
+	VDCriticalSection() {
+		InitializeCriticalSection((_RTL_CRITICAL_SECTION *)&csect);
+	}
+
+	~VDCriticalSection() {
+		DeleteCriticalSection((_RTL_CRITICAL_SECTION *)&csect);
+	}
+
+	void operator++() {
+		EnterCriticalSection((_RTL_CRITICAL_SECTION *)&csect);
+	}
+
+	void operator--() {
+		LeaveCriticalSection((_RTL_CRITICAL_SECTION *)&csect);
+	}
+
+	void Lock() {
+		EnterCriticalSection((_RTL_CRITICAL_SECTION *)&csect);
+	}
+
+	void Unlock() {
+		LeaveCriticalSection((_RTL_CRITICAL_SECTION *)&csect);
+	}
+};
+
+// 'vdsynchronized' keyword
+//
+// The vdsynchronized(lock) keyword emulates Java's 'synchronized' keyword, which
+// protects the following statement or block from race conditions by obtaining a
+// lock during its execution:
+//
+//		vdsynchronized(list_lock) {
+//			mList.pop_back();
+//			if (mList.empty())
+//				return false;
+//		}
+//
+// The construct is exception safe and will release the lock even if a return,
+// continue, break, or thrown exception exits the block.  However, hardware
+// exceptions (access violations) may not work due to synchronous model
+// exception handling.
+//
+// There are two Visual C++ bugs we need to work around here (both are in VC6 and VC7).
+//
+// 1) Declaring an object with a non-trivial destructor in a switch() condition
+//    causes a C1001 INTERNAL COMPILER ERROR.
+//
+// 2) Using __LINE__ in a macro expanded in a function with Edit and Continue (/ZI)
+//    breaks the preprocessor (KB article Q199057).  Shame, too, because without it
+//    all the autolocks look the same.
+
+#define vdsynchronized2(lock) if(VDCriticalSection::AutoLock vd__lock=(lock))VDNEVERHERE;else
+#define vdsynchronized1(lock) vdsynchronized2(lock)
+#define vdsynchronized(lock) vdsynchronized1(lock)
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDSignalBase {
+protected:
+	void *hEvent;
+
+public:
+	~VDSignalBase();
+
+	void signal();
+	bool check();
+	void wait();
+	int wait(VDSignalBase *second);
+	int wait(VDSignalBase *second, VDSignalBase *third);
+	static int waitMultiple(const VDSignalBase **signals, int count);
+	void *getHandle() { return hEvent; }
+
+	void operator()() { signal(); }
+};
+
+class VDSignal : public VDSignalBase {
+	VDSignal(const VDSignal&);
+	VDSignal& operator=(const VDSignal&);
+public:
+	VDSignal();
+};
+
+class VDSignalPersistent : public VDSignalBase {
+	VDSignalPersistent(const VDSignalPersistent&);
+	VDSignalPersistent& operator=(const VDSignalPersistent&);
+public:
+	VDSignalPersistent();
+
+	void unsignal();
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDSemaphore {
+public:
+	VDSemaphore(int initial);
+	~VDSemaphore();
+
+	void *GetHandle() const {
+		return mKernelSema;
+	}
+
+	void Reset(int count);
+
+	void Wait() {
+		WaitForSingleObject(mKernelSema, 0xFFFFFFFFU);
+	}
+
+	bool Wait(int timeout) {
+		return 0 == WaitForSingleObject(mKernelSema, timeout);
+	}
+
+	bool TryWait() {
+		return 0 == WaitForSingleObject(mKernelSema, 0);
+	}
+
+	void Post() {
+		ReleaseSemaphore(mKernelSema, 1, NULL);
+	}
+
+private:
+	void *mKernelSema;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/thunk.h b/src/thirdparty/VirtualDub/h/vd2/system/thunk.h
new file mode 100644
index 000000000..cf92407ac
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/thunk.h
@@ -0,0 +1,76 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_THUNK_H
+#define f_VD2_SYSTEM_THUNK_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+bool VDInitThunkAllocator();
+void VDShutdownThunkAllocator();
+
+void *VDAllocateThunkMemory(size_t len);
+void VDFreeThunkMemory(void *p, size_t len);
+void VDSetThunkMemory(void *p, const void *src, size_t len);
+void VDFlushThunkMemory(void *p, size_t len);
+
+class VDFunctionThunk;
+
+VDFunctionThunk *VDCreateFunctionThunkFromMethod(void *method, void *pThis, size_t argbytes, bool stdcall_thunk);
+void VDDestroyFunctionThunk(VDFunctionThunk *pFnThunk);
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<typename T> struct VDMetaSizeofArg { enum { value = (sizeof(T) + sizeof(void *) - 1) & ~(sizeof(void *) - 1) }; };
+
+// This doesn't work for references. Sadly, these seem to get stripped during template matching.
+template<class T, class R>
+char (&VDMetaGetMethodArgBytes(R (T::*method)()))[1];
+
+template<class T, class R, class A1>
+char (&VDMetaGetMethodArgBytes(R (T::*method)(A1)))[1 + VDMetaSizeofArg<A1>::value];
+
+template<class T, class R, class A1, class A2>
+char (&VDMetaGetMethodArgBytes(R (T::*method)(A1, A2)))[1 + VDMetaSizeofArg<A1>::value + VDMetaSizeofArg<A2>::value];
+
+template<class T, class R, class A1, class A2, class A3>
+char (&VDMetaGetMethodArgBytes(R (T::*method)(A1, A2, A3)))[1 + VDMetaSizeofArg<A1>::value + VDMetaSizeofArg<A2>::value + VDMetaSizeofArg<A3>::value];
+
+template<class T, class R, class A1, class A2, class A3, class A4>
+char (&VDMetaGetMethodArgBytes(R (T::*method)(A1, A2, A3, A4)))[1 + VDMetaSizeofArg<A1>::value + VDMetaSizeofArg<A2>::value + VDMetaSizeofArg<A3>::value + VDMetaSizeofArg<A4>::value];
+
+template<class T, class R, class A1, class A2, class A3, class A4, class A5>
+char (&VDMetaGetMethodArgBytes(R (T::*method)(A1, A2, A3, A4)))[1 + VDMetaSizeofArg<A1>::value + VDMetaSizeofArg<A2>::value + VDMetaSizeofArg<A3>::value + VDMetaSizeofArg<A4>::value + VDMetaSizeofArg<A5>::value];
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<class T, class T_Method>
+VDFunctionThunk *VDCreateFunctionThunkFromMethod(T *pThis, T_Method method, bool stdcall_thunk) {
+	return VDCreateFunctionThunkFromMethod(*(void **)&method, pThis, sizeof VDMetaGetMethodArgBytes(method) - 1, stdcall_thunk);
+}
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/time.h b/src/thirdparty/VirtualDub/h/vd2/system/time.h
new file mode 100644
index 000000000..e2da3ce4a
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/time.h
@@ -0,0 +1,118 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_TIME_H
+#define f_VD2_SYSTEM_TIME_H
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/atomic.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/win32/miniwindows.h>
+
+class VDFunctionThunk;
+
+// VDGetCurrentTick: Retrieve current process timer, in milliseconds.  Should only
+// be used for sparsing updates/checks, and not for precision timing.  Approximate
+// resolution is 55ms under Win9x and 10-15ms under WinNT. The advantage of this
+// call is that it is usually extremely fast (just reading from the PEB).
+uint32 VDGetCurrentTick();
+
+// VDGetPreciseTick: Retrieves high-performance timer (QueryPerformanceCounter in
+// Win32). This is very precise, often <1us, but often suffers from various bugs.
+// that make it undesirable for high-accuracy requirements. On x64 Windows it
+// can run at 1/2 speed when CPU throttling is enabled, and on some older buggy
+// chipsets it can skip around occasionally.
+uint64 VDGetPreciseTick();
+uint64 VDGetPreciseTicksPerSecondI();
+double VDGetPreciseTicksPerSecond();
+double VDGetPreciseSecondsPerTick();
+
+// VDGetAccurateTick: Reads a timer with good precision and accuracy, in
+// milliseconds. On Win9x, it has 1ms precision; on WinNT, it may have anywhere
+// from 1ms to 10-15ms, although 1ms can be forced with timeBeginPeriod().
+uint32 VDGetAccurateTick();
+
+// VDCallbackTimer is an abstraction of the Windows multimedia timer.  As such, it
+// is rather expensive to instantiate, and should only be used for critical timing
+// needs... such as multimedia.  Basically, there should only really be one or two
+// of these running.  Win32 typically implements these as separate threads
+// triggered off a timer, so despite the outdated documentation -- which still hasn't
+// been updated from Windows 3.1 -- you can call almost any function from the
+// callback.  Execution time in the callback delays other timers, however, so the
+// callback should still execute as quickly as possible.
+
+class VDINTERFACE IVDTimerCallback {
+public:
+	virtual void TimerCallback() = 0;
+};
+
+class VDCallbackTimer : private VDThread {
+public:
+	VDCallbackTimer();
+	~VDCallbackTimer();
+
+	bool Init(IVDTimerCallback *pCB, uint32 period_ms);
+	bool Init2(IVDTimerCallback *pCB, uint32 period_100ns);
+	bool Init3(IVDTimerCallback *pCB, uint32 period_100ns, uint32 accuracy_100ns, bool precise);
+	void Shutdown();
+
+	void SetRateDelta(int delta_100ns);
+	void AdjustRate(int adjustment_100ns);
+
+	bool IsTimerRunning() const;
+
+private:
+	void ThreadRun();
+
+	IVDTimerCallback *mpCB;
+	unsigned		mTimerAccuracy;
+	uint32			mTimerPeriod;
+	VDAtomicInt		mTimerPeriodDelta;
+	VDAtomicInt		mTimerPeriodAdjustment;
+
+	VDSignal		msigExit;
+
+	volatile bool	mbExit;				// this doesn't really need to be atomic -- think about it
+	bool			mbPrecise;
+};
+
+
+class VDLazyTimer {
+public:
+	VDLazyTimer();
+	~VDLazyTimer();
+
+	void SetOneShot(IVDTimerCallback *pCB, uint32 delay);
+	void Stop();
+
+protected:
+	void StaticTimeCallback(VDZHWND hwnd, VDZUINT msg, VDZUINT_PTR id, VDZDWORD time);
+
+	uint32				mTimerId;
+	VDFunctionThunk		*mpThunk;
+	IVDTimerCallback	*mpCB;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/tls.h b/src/thirdparty/VirtualDub/h/vd2/system/tls.h
new file mode 100644
index 000000000..2cd2ecc70
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/tls.h
@@ -0,0 +1,38 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_SYSTEM_TLS_H
+#define f_SYSTEM_TLS_H
+
+#include <ctype.h>
+
+void VDInitThreadData(const char *pszThreadName);
+void VDDeinitThreadData();
+
+typedef void (*VDThreadInitHook)(bool init, const char *threadName);
+
+void VDSetThreadInitHook(VDThreadInitHook pHook);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/unknown.h b/src/thirdparty/VirtualDub/h/vd2/system/unknown.h
new file mode 100644
index 000000000..1a3efb71b
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/unknown.h
@@ -0,0 +1,77 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_UNKNOWN_H
+#define f_VD2_SYSTEM_UNKNOWN_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+
+///////////////////////////////////////////////////////////////////////////
+//	IVDUnknown
+///	Base interface for runtime type discovery.
+class IVDUnknown {
+public:
+	/// Attempt to cast to another type. Returns NULL if interface is unsupported.
+	virtual void *AsInterface(uint32 id) = 0;
+
+	inline const void *AsInterface(uint32 id) const {
+		return const_cast<IVDUnknown *>(this)->AsInterface(id);
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////
+//	IVDUnknown
+///	Base interface for runtime type discovery with reference counting.
+class IVDRefUnknown : public IVDUnknown {
+public:
+	virtual int AddRef() = 0;	///< Add strong reference to object. Returns new reference count (debug builds only).
+	virtual int Release() = 0;	///< Remove strong refence from object, and destroy it if the refcount drops to zero. Returns zero if object was destroyed.
+};
+
+template<class T>
+inline uint32 vdpoly_id_from_ptr(T *p) {
+	return T::kTypeID;
+}
+
+///////////////////////////////////////////////////////////////////////////
+//	vdpoly_cast
+///	Performs a runtime polymorphic cast on an IUnknown-based object.
+///
+///	\param	pUnk	Pointer to cast. May be NULL.
+///
+///	Attempts to cast a pointer to a different type using the
+///	\c AsInterface() method. The destination type must support the
+/// \c kTypeID convention for returning the type ID.
+/// 
+template<class T>
+T vdpoly_cast(IVDUnknown *pUnk) {
+	return pUnk ? (T)pUnk->AsInterface(vdpoly_id_from_ptr(T(NULL))) : NULL;
+}
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/vdalloc.h b/src/thirdparty/VirtualDub/h/vd2/system/vdalloc.h
new file mode 100644
index 000000000..2c9fa2efd
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/vdalloc.h
@@ -0,0 +1,123 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_VDALLOC_H
+#define f_VD2_SYSTEM_VDALLOC_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <stdlib.h>
+
+// Why don't I use STL auto_ptr?  Two reasons.  First, auto_ptr has
+// the overhead of an ownership flag, and second, auto_ptr can't
+// be used with malloc() blocks.  So think of these as auto_ptr
+// objects, but not quite....
+
+#pragma warning(push)
+#pragma warning(disable: 4284)		// operator-> must return pointer to UDT
+
+class vdautoblockptr {
+protected:
+	void *ptr;
+
+public:
+	explicit vdautoblockptr(void *p = 0) : ptr(p) {}
+	~vdautoblockptr() { free(ptr); }
+
+	vdautoblockptr& operator=(void *src) { free(ptr); ptr = src; return *this; }
+
+	operator void*() const { return ptr; }
+
+	vdautoblockptr& from(vdautoblockptr& src) { free(ptr); ptr=src.ptr; src.ptr=0; }
+	void *get() const { return ptr; }
+	void *release() { void *v = ptr; ptr = NULL; return v; }
+};
+
+template<class T> class vdautoptr2 {
+protected:
+	T *ptr;
+
+public:
+	explicit vdautoptr2(T *p = 0) : ptr(p) {}
+	~vdautoptr2() { free((void *)ptr); }
+
+	vdautoptr2<T>& operator=(T *src) { free((void *)ptr); ptr = src; return *this; }
+
+	operator T*() const { return ptr; }
+	T& operator*() const { return *ptr; }
+	T *operator->() const { return ptr; }
+
+	vdautoptr2<T>& from(vdautoptr2<T>& src) { free((void *)ptr); ptr=src.ptr; src.ptr=0; }
+	T *get() const { return ptr; }
+	T *release() { T *v = ptr; ptr = NULL; return v; }
+};
+
+template<class T> class vdautoptr {
+protected:
+	T *ptr;
+
+public:
+	explicit vdautoptr(T *p = 0) : ptr(p) {}
+	~vdautoptr() { delete ptr; }
+
+	vdautoptr<T>& operator=(T *src) { delete ptr; ptr = src; return *this; }
+
+	operator T*() const { return ptr; }
+	T& operator*() const { return *ptr; }
+	T *operator->() const { return ptr; }
+
+	vdautoptr<T>& from(vdautoptr<T>& src) { delete ptr; ptr=src.ptr; src.ptr=0; }
+	T *get() const { return ptr; }
+	T *release() { T *v = ptr; ptr = NULL; return v; }
+
+	void swap(vdautoptr<T>& other) {
+		T *p = other.ptr;
+		other.ptr = ptr;
+		ptr = p;
+	}
+};
+
+template<class T> class vdautoarrayptr {
+protected:
+	T *ptr;
+
+public:
+	explicit vdautoarrayptr(T *p = 0) : ptr(p) {}
+	~vdautoarrayptr() { delete[] ptr; }
+
+	vdautoarrayptr<T>& operator=(T *src) { delete[] ptr; ptr = src; return *this; }
+
+	T& operator[](int offset) const { return ptr[offset]; }
+
+	vdautoarrayptr<T>& from(vdautoarrayptr<T>& src) { delete[] ptr; ptr=src.ptr; src.ptr=0; }
+	T *get() const { return ptr; }
+	T *release() { T *v = ptr; ptr = NULL; return v; }
+};
+
+#pragma warning(pop)
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/vdstl.h b/src/thirdparty/VirtualDub/h/vd2/system/vdstl.h
new file mode 100644
index 000000000..aeaaf15d6
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/vdstl.h
@@ -0,0 +1,1610 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef VD2_SYSTEM_VDSTL_H
+#define VD2_SYSTEM_VDSTL_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <limits.h>
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/memory.h>
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	glue
+//
+///////////////////////////////////////////////////////////////////////////
+
+template<class Category, class T, class Distance = ptrdiff_t, class Pointer = T*, class Reference = T&>
+struct vditerator {
+#if defined(VD_COMPILER_MSVC) && (VD_COMPILER_MSVC < 1310 || (defined(VD_COMPILER_MSVC_VC8_PSDK) || defined(VD_COMPILER_MSVC_VC8_DDK)))
+	typedef std::iterator<Category, T, Distance> type;
+#else
+	typedef std::iterator<Category, T, Distance, Pointer, Reference> type;
+#endif
+};
+
+template<class Iterator, class T>
+struct vdreverse_iterator {
+#if defined(VD_COMPILER_MSVC) && (VD_COMPILER_MSVC < 1310 || (defined(VD_COMPILER_MSVC_VC8_PSDK) || defined(VD_COMPILER_MSVC_VC8_DDK)))
+	typedef std::reverse_iterator<Iterator, T> type;
+#else
+	typedef std::reverse_iterator<Iterator> type;
+#endif
+};
+
+///////////////////////////////////////////////////////////////////////////
+class vdallocator_base {
+protected:
+	void VDNORETURN throw_oom();
+};
+
+template<class T>
+class vdallocator : public vdallocator_base {
+public:
+	typedef	size_t		size_type;
+	typedef	ptrdiff_t	difference_type;
+	typedef	T*			pointer;
+	typedef	const T*	const_pointer;
+	typedef	T&			reference;
+	typedef	const T&	const_reference;
+	typedef	T			value_type;
+
+	template<class U> struct rebind { typedef vdallocator<U> other; };
+
+	pointer			address(reference x) const			{ return &x; }
+	const_pointer	address(const_reference x) const	{ return &x; }
+
+	pointer allocate(size_type n, void *p_close = 0) {
+		pointer p = (pointer)malloc(n*sizeof(T));
+
+		if (!p)
+			throw_oom();
+
+		return p;
+	}
+
+	void deallocate(pointer p, size_type n) {
+		free(p);
+	}
+
+	size_type		max_size() const throw()			{ return ((~(size_type)0) >> 1) / sizeof(T); }
+
+	void			construct(pointer p, const T& val)	{ new((void *)p) T(val); }
+	void			destroy(pointer p)					{ ((T*)p)->~T(); }
+
+#if defined(_MSC_VER) && _MSC_VER < 1300
+	char *			_Charalloc(size_type n)				{ return rebind<char>::other::allocate(n); }
+#endif
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+template<class T, unsigned kDeadZone = 16>
+class vddebug_alloc {
+public:
+	typedef	size_t		size_type;
+	typedef	ptrdiff_t	difference_type;
+	typedef	T*			pointer;
+	typedef	const T*	const_pointer;
+	typedef	T&			reference;
+	typedef	const T&	const_reference;
+	typedef	T			value_type;
+
+	template<class U> struct rebind { typedef vddebug_alloc<U, kDeadZone> other; };
+
+	pointer			address(reference x) const			{ return &x; }
+	const_pointer	address(const_reference x) const	{ return &x; }
+
+	pointer allocate(size_type n, void *p_close = 0) {
+		pointer p = (pointer)VDAlignedMalloc(n*sizeof(T) + 2*kDeadZone, 16);
+
+		if (!p)
+			return p;
+
+		memset((char *)p, 0xa9, kDeadZone);
+		memset((char *)p + kDeadZone + n*sizeof(T), 0xa9, kDeadZone);
+
+		return (pointer)((char *)p + kDeadZone);
+	}
+
+	void deallocate(pointer p, size_type n) {
+		char *p1 = (char *)p - kDeadZone;
+		char *p2 = (char *)p + n*sizeof(T);
+
+		for(uint32 i=0; i<kDeadZone; ++i) {
+			VDASSERT(p1[i] == (char)0xa9);
+			VDASSERT(p2[i] == (char)0xa9);
+		}
+
+		VDAlignedFree(p1);
+	}
+
+	size_type		max_size() const throw()			{ return MAX_INT - 2*kDeadZone; }
+
+	void			construct(pointer p, const T& val)	{ new((void *)p) T(val); }
+	void			destroy(pointer p)					{ ((T*)p)->~T(); }
+
+#if defined(_MSC_VER) && _MSC_VER < 1300
+	char *			_Charalloc(size_type n)				{ return rebind<char>::other::allocate(n); }
+#endif
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+template<class T, unsigned kAlignment = 16>
+class vdaligned_alloc {
+public:
+	typedef	size_t		size_type;
+	typedef	ptrdiff_t	difference_type;
+	typedef	T*			pointer;
+	typedef	const T*	const_pointer;
+	typedef	T&			reference;
+	typedef	const T&	const_reference;
+	typedef	T			value_type;
+
+	vdaligned_alloc() {}
+
+	template<class U, unsigned kAlignment2>
+	vdaligned_alloc(const vdaligned_alloc<U, kAlignment2>&) {}
+
+	template<class U> struct rebind { typedef vdaligned_alloc<U, kAlignment> other; };
+
+	pointer			address(reference x) const			{ return &x; }
+	const_pointer	address(const_reference x) const	{ return &x; }
+
+	pointer			allocate(size_type n, void *p = 0)	{ return (pointer)VDAlignedMalloc(n*sizeof(T), kAlignment); }
+	void			deallocate(pointer p, size_type n)	{ VDAlignedFree(p); }
+	size_type		max_size() const throw()			{ return INT_MAX; }
+
+	void			construct(pointer p, const T& val)	{ new((void *)p) T(val); }
+	void			destroy(pointer p)					{ ((T*)p)->~T(); }
+
+#if defined(_MSC_VER) && _MSC_VER < 1300
+	char *			_Charalloc(size_type n)				{ return rebind<char>::other::allocate(n); }
+#endif
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	vdblock
+//
+//	vdblock<T> is similar to vector<T>, except:
+//
+//	1) May only be used with POD types.
+//	2) No construction or destruction of elements is performed.
+//	3) Capacity is always equal to size, and reallocation is performed
+//	   whenever the size changes.
+//	4) Contents are undefined after a reallocation.
+//	5) No insertion or deletion operations are provided.
+//
+///////////////////////////////////////////////////////////////////////////
+
+template<class T, class A = vdallocator<T> >
+class vdblock : protected A {
+public:
+	typedef	T									value_type;
+	typedef	typename A::pointer					pointer;
+	typedef	typename A::const_pointer			const_pointer;
+	typedef	typename A::reference				reference;
+	typedef	typename A::const_reference			const_reference;
+	typedef	size_t								size_type;
+	typedef	ptrdiff_t							difference_type;
+	typedef	pointer								iterator;
+	typedef	const_pointer						const_iterator;
+	typedef typename vdreverse_iterator<iterator, T>::type			reverse_iterator;
+	typedef typename vdreverse_iterator<const_iterator, const T>::type	const_reverse_iterator;
+
+	vdblock(const A& alloc = A()) : A(alloc), mpBlock(NULL), mSize(0) {}
+	vdblock(size_type s, const A& alloc = A()) : A(alloc), mpBlock(A::allocate(s, 0)), mSize(s) {}
+	~vdblock() {
+		if (mpBlock)
+			A::deallocate(mpBlock, mSize);
+	}
+
+	reference				operator[](size_type n)			{ return mpBlock[n]; }
+	const_reference			operator[](size_type n) const	{ return mpBlock[n]; }
+	reference				at(size_type n)					{ return n < mSize ? mpBlock[n] : throw std::length_error; }
+	const_reference			at(size_type n) const			{ return n < mSize ? mpBlock[n] : throw std::length_error; }
+	reference				front()							{ return *mpBlock; }
+	const_reference			front() const					{ return *mpBlock; }
+	reference				back()							{ return mpBlock[mSize-1]; }
+	const_reference			back() const					{ return mpBlock[mSize-1]; }
+
+	const_pointer			data() const	{ return mpBlock; }
+	pointer					data()			{ return mpBlock; }
+
+	const_iterator			begin() const	{ return mpBlock; }
+	iterator				begin()			{ return mpBlock; }
+	const_iterator			end() const		{ return mpBlock + mSize; }
+	iterator				end()			{ return mpBlock + mSize; }
+
+	const_reverse_iterator	rbegin() const	{ return const_reverse_iterator(end()); }
+	reverse_iterator		rbegin()		{ return reverse_iterator(end()); }
+	const_reverse_iterator	rend() const	{ return const_reverse_iterator(begin()); }
+	reverse_iterator		rend()			{ return reverse_iterator(begin()); }
+
+	bool					empty() const		{ return !mSize; }
+	size_type				size() const		{ return mSize; }
+	size_type				capacity() const	{ return mSize; }
+
+	void clear() {
+		if (mpBlock)
+			A::deallocate(mpBlock, mSize);
+		mpBlock = NULL;
+		mSize = 0;
+	}
+
+	void resize(size_type s) {
+		if (s != mSize) {
+			if (mpBlock) {
+				A::deallocate(mpBlock, mSize);
+				mpBlock = NULL;
+			}
+			mSize = s;
+			if (s)
+				mpBlock = A::allocate(mSize, 0);
+		}
+	}
+
+	void resize(size_type s, const T& value) {
+		if (s != mSize) {
+			if (mpBlock) {
+				A::deallocate(mpBlock, mSize);
+				mpBlock = NULL;
+			}
+			mSize = s;
+			if (s) {
+				mpBlock = A::allocate(mSize, 0);
+				std::fill(mpBlock, mpBlock+s, value);
+			}
+		}
+	}
+
+	void swap(vdblock& x) {
+		std::swap(mpBlock, x.mpBlock);
+		std::swap(mSize, x.mSize);
+	}
+
+protected:
+	typename A::pointer		mpBlock;
+	typename A::size_type	mSize;
+
+	union PODType {
+		T x;
+	};
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	vdstructex
+//
+//	vdstructex describes an extensible format structure, such as
+//	BITMAPINFOHEADER or WAVEFORMATEX, without the pain-in-the-butt
+//	casting normally associated with one.
+//
+///////////////////////////////////////////////////////////////////////////
+
+template<class T>
+class vdstructex {
+public:
+	typedef size_t			size_type;
+	typedef T				value_type;
+
+	vdstructex() : mpMemory(NULL), mSize(0) {}
+
+	explicit vdstructex(size_t len) : mpMemory(NULL), mSize(0) {
+		resize(len);
+	}
+
+	vdstructex(const T *pStruct, size_t len) : mSize(len), mpMemory((T*)malloc(len)) {
+		memcpy(mpMemory, pStruct, len);
+	}
+
+	vdstructex(const vdstructex<T>& src) : mSize(src.mSize), mpMemory((T*)malloc(src.mSize)) {
+		memcpy(mpMemory, src.mpMemory, mSize);
+	}
+
+	~vdstructex() {
+		free(mpMemory);
+	}
+
+	bool		empty() const		{ return !mpMemory; }
+	size_type	size() const		{ return mSize; }
+	T*			data() const		{ return mpMemory; }
+
+	T&	operator *() const	{ return *(T *)mpMemory; }
+	T*	operator->() const	{ return (T *)mpMemory; }
+
+	bool operator==(const vdstructex& x) const {
+		return mSize == x.mSize && (!mSize || !memcmp(mpMemory, x.mpMemory, mSize));
+	}
+
+	bool operator!=(const vdstructex& x) const {
+		return mSize != x.mSize || (mSize && memcmp(mpMemory, x.mpMemory, mSize));
+	}
+
+	vdstructex<T>& operator=(const vdstructex<T>& src) {
+		assign(src.mpMemory, src.mSize);
+		return *this;
+	}
+
+	void assign(const T *pStruct, size_type len) {
+		if (mSize != len)
+			resize(len);
+
+		memcpy(mpMemory, pStruct, len);
+	}
+
+	void clear() {
+		free(mpMemory);
+		mpMemory = NULL;
+		mSize = 0;
+	}
+
+	void resize(size_type len) {
+		if (mSize != len)
+			mpMemory = (T *)realloc(mpMemory, mSize = len);
+	}
+
+protected:
+	size_type	mSize;
+	T *mpMemory;
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	vdlist
+//
+//	vdlist<T> is similar to list<T*>, except:
+//
+//	1) The node structure must be embedded as a superclass of T.
+//     Thus, the client is in full control of allocation.
+//	2) Node pointers may be converted back into iterators in O(1).
+//
+///////////////////////////////////////////////////////////////////////////
+
+struct vdlist_node {
+	vdlist_node *mListNodeNext, *mListNodePrev;
+};
+
+template<class T, class T_Nonconst>
+class vdlist_iterator : public vditerator<std::bidirectional_iterator_tag, T, ptrdiff_t>::type {
+public:
+	vdlist_iterator() {}
+	vdlist_iterator(T *p) : mp(p) {}
+	vdlist_iterator(const vdlist_iterator<T_Nonconst, T_Nonconst>& src) : mp(src.mp) {}
+
+	T* operator *() const {
+		return static_cast<T*>(mp);
+	}
+
+	bool operator==(const vdlist_iterator<T, T_Nonconst>& x) const {
+		return mp == x.mp;
+	}
+
+	bool operator!=(const vdlist_iterator<T, T_Nonconst>& x) const {
+		return mp != x.mp;
+	}
+
+	vdlist_iterator& operator++() {
+		mp = mp->mListNodeNext;
+		return *this;
+	}
+
+	vdlist_iterator& operator--() {
+		mp = mp->mListNodePrev;
+		return *this;
+	}
+
+	vdlist_iterator operator++(int) {
+		iterator tmp(*this);
+		mp = mp->mListNodeNext;
+		return tmp;
+	}
+
+	vdlist_iterator& operator--(int) {
+		iterator tmp(*this);
+		mp = mp->mListNodePrev;
+		return tmp;
+	}
+
+	vdlist_node *mp;
+};
+
+class vdlist_base {
+public:
+	typedef	vdlist_node						node;
+	typedef	size_t							size_type;
+	typedef	ptrdiff_t						difference_type;
+
+	bool empty() const {
+		return mAnchor.mListNodeNext == &mAnchor;
+	}
+
+	size_type size() const {
+		node *p = { mAnchor.mListNodeNext };
+		size_type s = 0;
+
+		if (p != &mAnchor)
+			do {
+				++s;
+				p = p->mListNodeNext;
+			} while(p != &mAnchor);
+
+		return s;
+	}
+
+	void clear() {
+		mAnchor.mListNodePrev	= &mAnchor;
+		mAnchor.mListNodeNext	= &mAnchor;
+	}
+
+	void pop_front() {
+		mAnchor.mListNodeNext = mAnchor.mListNodeNext->mListNodeNext;
+		mAnchor.mListNodeNext->mListNodePrev = &mAnchor;
+	}
+
+	void pop_back() {
+		mAnchor.mListNodePrev = mAnchor.mListNodePrev->mListNodePrev;
+		mAnchor.mListNodePrev->mListNodeNext = &mAnchor;
+	}
+
+	static void unlink(vdlist_node& node) {
+		vdlist_node& n1 = *node.mListNodePrev;
+		vdlist_node& n2 = *node.mListNodeNext;
+
+		n1.mListNodeNext = &n2;
+		n2.mListNodePrev = &n1;
+	}
+
+protected:
+	node	mAnchor;
+};
+
+template<class T>
+class vdlist : public vdlist_base {
+public:
+	typedef	T*								value_type;
+	typedef	T**								pointer;
+	typedef	const T**						const_pointer;
+	typedef	T*&								reference;
+	typedef	const T*&						const_reference;
+	typedef	vdlist_iterator<T, T>						iterator;
+	typedef vdlist_iterator<const T, T>					const_iterator;
+	typedef typename vdreverse_iterator<iterator, T>::type			reverse_iterator;
+	typedef typename vdreverse_iterator<const_iterator, const T>::type	const_reverse_iterator;
+
+	vdlist() {
+		mAnchor.mListNodePrev	= &mAnchor;
+		mAnchor.mListNodeNext	= &mAnchor;
+	}
+
+	iterator begin() {
+		iterator it;
+		it.mp = mAnchor.mListNodeNext;
+		return it;
+	}
+
+	const_iterator begin() const {
+		const_iterator it;
+		it.mp = mAnchor.mListNodeNext;
+		return it;
+	}
+
+	iterator end() {
+		iterator it;
+		it.mp = &mAnchor;
+		return it;
+	}
+
+	const_iterator end() const {
+		const_iterator it;
+		it.mp = &mAnchor;
+		return it;
+	}
+
+	reverse_iterator rbegin() {
+		return reverse_iterator(begin());
+	}
+
+	const_reverse_iterator rbegin() const {
+		return const_reverse_iterator(begin());
+	}
+
+	reverse_iterator rend() {
+		return reverse_iterator(end);
+	}
+
+	const_reverse_iterator rend() const {
+		return const_reverse_iterator(end());
+	}
+
+	const value_type front() const {
+		return static_cast<T *>(mAnchor.mListNodeNext);
+	}
+
+	const value_type back() const {
+		return static_cast<T *>(mAnchor.mListNodePrev);
+	}
+
+	iterator find(T *p) {
+		iterator it;
+		it.mp = mAnchor.mListNodeNext;
+
+		if (it.mp != &mAnchor)
+			do {
+				if (it.mp == static_cast<node *>(p))
+					break;
+
+				it.mp = it.mp->mListNodeNext;
+			} while(it.mp != &mAnchor);
+
+		return it;
+	}
+
+	const_iterator find(T *p) const {
+		const_iterator it;
+		it.mp = mAnchor.mListNodeNext;
+
+		if (it.mp != &mAnchor)
+			do {
+				if (it.mp == static_cast<node *>(p))
+					break;
+
+				it.mp = it.mp->mListNodeNext;
+			} while(it.mp != &mAnchor);
+
+		return it;
+	}
+
+	iterator fast_find(T *p) {
+		iterator it(p);
+		return it;
+	}
+
+	const_iterator fast_find(T *p) const {
+		iterator it(p);
+	}
+
+	void push_front(T *p) {
+		node& n = *p;
+		n.mListNodePrev = &mAnchor;
+		n.mListNodeNext = mAnchor.mListNodeNext;
+		n.mListNodeNext->mListNodePrev = &n;
+		mAnchor.mListNodeNext = &n;
+	}
+
+	void push_back(T *p) {
+		node& n = *p;
+		n.mListNodeNext = &mAnchor;
+		n.mListNodePrev = mAnchor.mListNodePrev;
+		n.mListNodePrev->mListNodeNext = &n;
+		mAnchor.mListNodePrev = &n;
+	}
+
+	iterator erase(T *p) {
+		return erase(fast_find(p));
+	}
+
+	iterator erase(iterator it) {
+		node& n = *it.mp;
+
+		n.mListNodePrev->mListNodeNext = n.mListNodeNext;
+		n.mListNodeNext->mListNodePrev = n.mListNodePrev;
+
+		it.mp = n.mListNodeNext;
+		return it;
+	}
+
+	iterator erase(iterator i1, iterator i2) {
+		node& np = *i1.mp->mListNodePrev;
+		node& nn = *i2.mp;
+
+		np.mListNodeNext = &nn;
+		nn.mListNodePrev = &np;
+
+		return i2;
+	}
+
+	void insert(iterator dst, T *src) {
+		node& ns = *src;
+		node& nd = *dst.mp;
+
+		ns.mListNodeNext = &nd;
+		ns.mListNodePrev = nd.mListNodePrev;
+		nd.mListNodePrev->mListNodeNext = &ns;
+		nd.mListNodePrev = &ns;
+	}
+
+	void insert(iterator dst, iterator i1, iterator i2) {
+		if (i1 != i2) {
+			node& np = *dst.mp->mListNodePrev;
+			node& nn = *dst.mp;
+			node& n1 = *i1.mp;
+			node& n2 = *i2.mp->mListNodePrev;
+
+			np.mListNodeNext = &n1;
+			n1.mListNodePrev = &np;
+			n2.mListNodeNext = &nn;
+			nn.mListNodePrev = &n2;
+		}
+	}
+
+	void splice(iterator dst, vdlist<T>& srclist) {
+		insert(dst, srclist.begin(), srclist.end());
+		srclist.clear();
+	}
+
+	void splice(iterator dst, vdlist<T>& srclist, iterator src) {
+		T *v = *src;
+		srclist.erase(src);
+		insert(dst, v);
+	}
+
+	void splice(iterator dst, vdlist<T>& srclist, iterator i1, iterator i2) {
+		if (dst.mp != i1.mp && dst.mp != i2.mp) {
+			srclist.erase(i1, i2);
+			insert(dst, i1, i2);
+		}
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(_DEBUG) && defined(_MSC_VER)
+	#define VD_ACCELERATE_TEMPLATES
+#endif
+
+#ifndef VDTINLINE
+	#ifdef VD_ACCELERATE_TEMPLATES
+		#ifndef VDTEXTERN
+			#define VDTEXTERN extern
+		#endif
+
+		#define VDTINLINE
+	#else
+		#define VDTINLINE inline
+	#endif
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<class T>
+class vdspan {
+public:
+	typedef	T					value_type;
+	typedef	T*					pointer;
+	typedef	const T*			const_pointer;
+	typedef	T&					reference;
+	typedef	const T&			const_reference;
+	typedef	size_t				size_type;
+	typedef	ptrdiff_t			difference_type;
+	typedef	pointer				iterator;
+	typedef const_pointer		const_iterator;
+	typedef typename vdreverse_iterator<iterator, T>::type			reverse_iterator;
+	typedef typename vdreverse_iterator<const_iterator, const T>::type	const_reverse_iterator;
+
+	VDTINLINE vdspan();
+	VDTINLINE vdspan(T *p1, T *p2);
+	VDTINLINE vdspan(T *p1, size_type len);
+
+public:
+	VDTINLINE bool					empty() const;
+	VDTINLINE size_type				size() const;
+
+	VDTINLINE pointer				data();
+	VDTINLINE const_pointer			data() const;
+
+	VDTINLINE iterator				begin();
+	VDTINLINE const_iterator			begin() const;
+	VDTINLINE iterator				end();
+	VDTINLINE const_iterator			end() const;
+
+	VDTINLINE reverse_iterator		rbegin();
+	VDTINLINE const_reverse_iterator	rbegin() const;
+	VDTINLINE reverse_iterator		rend();
+	VDTINLINE const_reverse_iterator	rend() const;
+
+	VDTINLINE reference				front();
+	VDTINLINE const_reference		front() const;
+	VDTINLINE reference				back();
+	VDTINLINE const_reference		back() const;
+
+	VDTINLINE reference				operator[](size_type n);
+	VDTINLINE const_reference		operator[](size_type n) const;
+
+protected:
+	T *mpBegin;
+	T *mpEnd;
+};
+
+#ifdef VD_ACCELERATE_TEMPLATES
+	#pragma warning(push)
+	#pragma warning(disable: 4231)		//  warning C4231: nonstandard extension used : 'extern' before template explicit instantiation
+	VDTEXTERN template vdspan<char>;
+	VDTEXTERN template vdspan<uint8>;
+	VDTEXTERN template vdspan<uint16>;
+	VDTEXTERN template vdspan<uint32>;
+	VDTEXTERN template vdspan<uint64>;
+	VDTEXTERN template vdspan<sint8>;
+	VDTEXTERN template vdspan<sint16>;
+	VDTEXTERN template vdspan<sint32>;
+	VDTEXTERN template vdspan<sint64>;
+	VDTEXTERN template vdspan<float>;
+	VDTEXTERN template vdspan<double>;
+	VDTEXTERN template vdspan<wchar_t>;
+	#pragma warning(pop)
+#endif
+
+template<class T> VDTINLINE vdspan<T>::vdspan() : mpBegin(NULL), mpEnd(NULL) {}
+template<class T> VDTINLINE vdspan<T>::vdspan(T *p1, T *p2) : mpBegin(p1), mpEnd(p2) {}
+template<class T> VDTINLINE vdspan<T>::vdspan(T *p, size_type len) : mpBegin(p), mpEnd(p+len) {}
+template<class T> VDTINLINE bool					vdspan<T>::empty() const { return mpBegin == mpEnd; }
+template<class T> VDTINLINE typename vdspan<T>::size_type			vdspan<T>::size() const { return size_type(mpEnd - mpBegin); }
+template<class T> VDTINLINE typename vdspan<T>::pointer				vdspan<T>::data() { return mpBegin; }
+template<class T> VDTINLINE typename vdspan<T>::const_pointer		vdspan<T>::data() const { return mpBegin; }
+template<class T> VDTINLINE typename vdspan<T>::iterator				vdspan<T>::begin() { return mpBegin; }
+template<class T> VDTINLINE typename vdspan<T>::const_iterator		vdspan<T>::begin() const { return mpBegin; }
+template<class T> VDTINLINE typename vdspan<T>::iterator				vdspan<T>::end() { return mpEnd; }
+template<class T> VDTINLINE typename vdspan<T>::const_iterator		vdspan<T>::end() const { return mpEnd; }
+template<class T> VDTINLINE typename vdspan<T>::reverse_iterator		vdspan<T>::rbegin() { return reverse_iterator(mpBegin); }
+template<class T> VDTINLINE typename vdspan<T>::const_reverse_iterator vdspan<T>::rbegin() const { return const_reverse_iterator(mpBegin); }
+template<class T> VDTINLINE typename vdspan<T>::reverse_iterator		vdspan<T>::rend() { return reverse_iterator(mpEnd); }
+template<class T> VDTINLINE typename vdspan<T>::const_reverse_iterator vdspan<T>::rend() const { return const_reverse_iterator(mpEnd); }
+template<class T> VDTINLINE typename vdspan<T>::reference			vdspan<T>::front() { return *mpBegin; }
+template<class T> VDTINLINE typename vdspan<T>::const_reference		vdspan<T>::front() const { return *mpBegin; }
+template<class T> VDTINLINE typename vdspan<T>::reference			vdspan<T>::back() { VDASSERT(mpBegin != mpEnd); return mpEnd[-1]; }
+template<class T> VDTINLINE typename vdspan<T>::const_reference		vdspan<T>::back() const { VDASSERT(mpBegin != mpEnd); return mpEnd[-1]; }
+template<class T> VDTINLINE typename vdspan<T>::reference			vdspan<T>::operator[](size_type n) { VDASSERT(n < size_type(mpEnd - mpBegin)); return mpBegin[n]; }
+template<class T> VDTINLINE typename vdspan<T>::const_reference		vdspan<T>::operator[](size_type n) const { VDASSERT(n < size_type(mpEnd - mpBegin)); return mpBegin[n]; }
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<class T>
+bool operator==(const vdspan<T>& x, const vdspan<T>& y) {
+	uint32 len = x.size();
+	if (len != y.size())
+		return false;
+
+	const T *px = x.data();
+	const T *py = y.data();
+
+	for(uint32 i=0; i<len; ++i) {
+		if (px[i] != py[i])
+			return false;
+	}
+
+	return true;
+}
+
+template<class T>
+inline bool operator!=(const vdspan<T>& x, const vdspan<T>& y) { return !(x == y); }
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<class T, class S, class A = vdallocator<T> >
+class vdfastvector_base : public vdspan<T> {
+public:
+	~vdfastvector_base() {
+		if (static_cast<const S&>(m).is_deallocatable_storage(mpBegin))
+			m.deallocate(mpBegin, m.eos - mpBegin);
+	}
+
+	size_type capacity() const { return size_type(m.eos - mpBegin); }
+
+public:
+	T *alloc(size_type n) {
+		size_type offset = (size_type)(mpEnd - mpBegin);
+		resize(offset + n);
+		return mpBegin + offset;
+	}
+
+	void assign(const T *p1, const T *p2) {
+		resize(p2 - p1);
+		memcpy(mpBegin, p1, (char *)p2 - (char *)p1);
+	}
+
+	void clear() {
+		mpEnd = mpBegin;
+	}
+
+	iterator erase(iterator it) {
+		VDASSERT(it - mpBegin < mpEnd - mpBegin);
+
+		memmove(it, it+1, (char *)mpEnd - (char *)(it+1));
+
+		--mpEnd;
+
+		return it;
+	}
+
+	iterator erase(iterator it1, iterator it2) {
+		VDASSERT(it1 - mpBegin <= mpEnd - mpBegin);
+		VDASSERT(it2 - mpBegin <= mpEnd - mpBegin);
+		VDASSERT(it1 <= it2);
+
+		memmove(it1, it2, (char *)mpEnd - (char *)it2);
+
+		mpEnd -= (it2 - it1);
+
+		return it1;
+	}
+
+	iterator insert(iterator it, const T& value) {
+		const T temp(value);		// copy in case value is inside container.
+
+		if (mpEnd == m.eos) {
+			difference_type delta = it - mpBegin;
+			_reserve_always_add_one();
+			it = mpBegin + delta;
+		}
+
+		memmove(it+1, it, sizeof(T) * (mpEnd - it));
+		*it = temp;
+		++mpEnd;
+		VDASSERT(mpEnd <= m.eos);
+
+		return it;
+	}
+
+	iterator insert(iterator it, size_type n, const T& value) {
+		const T temp(value);		// copy in case value is inside container.
+
+		ptrdiff_t bytesToInsert = n * sizeof(T);
+
+		if ((char *)m.eos - (char *)mpEnd < bytesToInsert) {
+			difference_type delta = it - mpBegin;
+			_reserve_always_add(bytesToInsert);
+			it = mpBegin + delta;
+		}
+
+		memmove((char *)it + bytesToInsert, it, (char *)mpEnd - (char *)it);
+		for(size_t i=0; i<n; ++i)
+			*it++ = temp;
+		mpEnd += n;
+		VDASSERT(mpEnd <= m.eos);
+		return it;
+	}
+
+	iterator insert(iterator it, const T *p1, const T *p2) {
+		ptrdiff_t elementsToCopy = p2 - p1;
+		ptrdiff_t bytesToCopy = (char *)p2 - (char *)p1;
+
+		if ((char *)m.eos - (char *)mpEnd < bytesToCopy) {
+			difference_type delta = it - mpBegin;
+			_reserve_always_add(bytesToCopy);
+			it = mpBegin + delta;
+		}
+
+		memmove((char *)it + bytesToCopy, it, (char *)mpEnd - (char *)it);
+		memcpy(it, p1, bytesToCopy);
+		mpEnd += elementsToCopy;
+		VDASSERT(mpEnd <= m.eos);
+		return it;
+	}
+
+	reference push_back() {
+		if (mpEnd == m.eos)
+			_reserve_always_add_one();
+
+		return *mpEnd++;
+	}
+
+	void push_back(const T& value) {
+		const T temp(value);		// copy in case value is inside container.
+
+		if (mpEnd == m.eos)
+			_reserve_always_add_one();
+
+		*mpEnd++ = temp;
+	}
+
+	void pop_back() {
+		VDASSERT(mpBegin != mpEnd);
+		--mpEnd;
+	}
+
+	void resize(size_type n) {
+		if (n*sizeof(T) > size_type((char *)m.eos - (char *)mpBegin))
+			_reserve_always_amortized(n);
+
+		mpEnd = mpBegin + n;
+	}
+
+	void resize(size_type n, const T& value) {
+		const T temp(value);
+
+		if (n*sizeof(T) > size_type((char *)m.eos - (char *)mpBegin)) {
+			_reserve_always_amortized(n);
+		}
+
+		const iterator newEnd(mpBegin + n);
+		if (newEnd > mpEnd)
+			std::fill(mpEnd, newEnd, temp);
+		mpEnd = newEnd;
+	}
+
+	void reserve(size_type n) {
+		if (n*sizeof(T) > size_type((char *)m.eos - (char *)mpBegin))
+			_reserve_always(n);
+	}
+
+protected:
+#ifdef _MSC_VER
+	__declspec(noinline)
+#endif
+	void _reserve_always_add_one() {
+		_reserve_always((m.eos - mpBegin) * 2 + 1);
+	}
+
+#ifdef _MSC_VER
+	__declspec(noinline)
+#endif
+	void _reserve_always_add(size_type n) {
+		_reserve_always((m.eos - mpBegin) * 2 + n);
+	}
+
+#ifdef _MSC_VER
+	__declspec(noinline)
+#endif
+	void _reserve_always(size_type n) {
+		size_type oldSize = mpEnd - mpBegin;
+		T *oldStorage = mpBegin;
+		T *newStorage = m.allocate(n, NULL);
+
+		memcpy(newStorage, mpBegin, (char *)mpEnd - (char *)mpBegin);
+		if (static_cast<const S&>(m).is_deallocatable_storage(oldStorage))
+			m.deallocate(oldStorage, m.eos - mpBegin);
+		mpBegin = newStorage;
+		mpEnd = newStorage + oldSize;
+		m.eos = newStorage + n;
+	}
+
+#ifdef _MSC_VER
+	__declspec(noinline)
+#endif
+	void _reserve_always_amortized(size_type n) {
+		size_type nextCapacity = (size_type)((m.eos - mpBegin)*2);
+
+		if (nextCapacity < n)
+			nextCapacity = n;
+
+		_reserve_always(nextCapacity);
+	}
+
+	struct : A, S {
+		T *eos;
+	} m;
+
+	union TrivialObjectConstraint {
+		T m;
+	};
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+struct vdfastvector_storage {
+	bool is_deallocatable_storage(void *p) const {
+		return p != 0;
+	}
+};
+
+template<class T, class A = vdallocator<T> >
+class vdfastvector : public vdfastvector_base<T, vdfastvector_storage, A> {
+public:
+	vdfastvector() {
+		m.eos = NULL;
+	}
+
+	vdfastvector(size_type len) {
+		mpBegin = m.allocate(len, NULL);
+		mpEnd = mpBegin + len;
+		m.eos = mpEnd;
+	}
+
+	vdfastvector(size_type len, const T& fill) {
+		mpBegin = m.allocate(len, NULL);
+		mpEnd = mpBegin + len;
+		m.eos = mpEnd;
+
+		std::fill(mpBegin, mpEnd, fill);
+	}
+
+	vdfastvector(const vdfastvector& x) {
+		size_type n = x.mpEnd - x.mpBegin;
+		mpBegin = m.allocate(n, NULL);
+		mpEnd = mpBegin + n;
+		m.eos = mpEnd;
+		memcpy(mpBegin, x.mpBegin, sizeof(T) * n);
+	}
+
+	vdfastvector(const value_type *p, const value_type *q) {
+		m.eos = NULL;
+
+		assign(p, q);
+	}
+
+	vdfastvector& operator=(const vdfastvector& x) {
+		if (this != &x)
+			assign(x.mpBegin, x.mpEnd);
+
+		return *this;
+	}
+
+	void swap(vdfastvector& x) {
+		T *p;
+
+		p = mpBegin;		mpBegin = x.mpBegin;		x.mpBegin = p;
+		p = mpEnd;			mpEnd = x.mpEnd;			x.mpEnd = p;
+		p = m.eos;			m.eos = x.m.eos;			x.m.eos = p;
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<class T, size_t N>
+struct vdfastfixedvector_storage {
+	T mArray[N];
+
+	bool is_deallocatable_storage(void *p) const {
+		return p != mArray;
+	}
+};
+
+template<class T, size_t N, class A = vdallocator<T> >
+class vdfastfixedvector : public vdfastvector_base<T, vdfastfixedvector_storage<T, N>, A> {
+public:
+	vdfastfixedvector() {
+		mpBegin = m.mArray;
+		mpEnd = m.mArray;
+		m.eos = m.mArray + N;
+	}
+
+	vdfastfixedvector(size_type len) {
+		if (len <= N) {
+			mpBegin = m.mArray;
+			mpEnd = m.mArray + len;
+			m.eos = m.mArray + N;
+		} else {
+			mpBegin = m.allocate(len, NULL);
+			mpEnd = mpBegin + len;
+			m.eos = mpEnd;
+		}
+	}
+
+	vdfastfixedvector(size_type len, const T& fill) {
+		mpBegin = m.allocate(len, NULL);
+		mpEnd = mpBegin + len;
+		m.eos = mpEnd;
+
+		std::fill(mpBegin, mpEnd, fill);
+	}
+
+	vdfastfixedvector(const vdfastfixedvector& x) {
+		size_type n = x.mpEnd - x.mpBegin;
+
+		if (n <= N) {
+			mpBegin = m.mArray;
+			mpEnd = m.mArray + n;
+			m.eos = m.mArray + N;
+		} else {
+			mpBegin = m.allocate(n, NULL);
+			mpEnd = mpBegin + n;
+			m.eos = mpEnd;
+		}
+
+		memcpy(mpBegin, x.mpBegin, sizeof(T) * n);
+	}
+
+	vdfastfixedvector(const value_type *p, const value_type *q) {
+		mpBegin = m.mArray;
+		mpEnd = m.mArray;
+		m.eos = m.mArray + N;
+
+		assign(p, q);
+	}
+
+	vdfastfixedvector& operator=(const vdfastfixedvector& x) {
+		if (this != &x)
+			assign(x.mpBegin, x.mpEnd);
+
+		return *this;
+	}
+
+	void swap(vdfastfixedvector& x) {
+		size_t this_bytes = (char *)mpEnd - (char *)mpBegin;
+		size_t other_bytes = (char *)x.mpEnd - (char *)x.mpBegin;
+
+		T *p;
+
+		if (mpBegin == m.mArray) {
+			if (x.mpBegin == x.m.mArray) {
+				if (this_bytes < other_bytes) {
+					VDSwapMemory(m.mArray, x.m.mArray, this_bytes);
+					memcpy((char *)m.mArray + this_bytes, (char *)x.m.mArray + this_bytes, other_bytes - this_bytes);
+				} else {
+					VDSwapMemory(m.mArray, x.m.mArray, other_bytes);
+					memcpy((char *)m.mArray + other_bytes, (char *)x.m.mArray + other_bytes, this_bytes - other_bytes);
+				}
+
+				mpEnd = (T *)((char *)mpBegin + other_bytes);
+				x.mpEnd = (T *)((char *)x.mpBegin + this_bytes);
+			} else {
+				memcpy(x.m.mArray, mpBegin, this_bytes);
+
+				mpBegin = x.mpBegin;
+				mpEnd = x.mpEnd;
+				m.eos = x.m.eos;
+
+				x.mpBegin = x.m.mArray;
+				x.mpEnd = (T *)((char *)x.m.mArray + this_bytes);
+				x.m.eos = x.m.mArray + N;
+			}
+		} else {
+			if (x.mpBegin == x.m.mArray) {
+				memcpy(x.m.mArray, mpBegin, other_bytes);
+
+				x.mpBegin = mpBegin;
+				x.mpEnd = mpEnd;
+				x.m.eos = m.eos;
+
+				mpBegin = m.mArray;
+				mpEnd = (T *)((char *)m.mArray + other_bytes);
+				m.eos = m.mArray + N;
+			} else {
+				p = mpBegin;		mpBegin = x.mpBegin;		x.mpBegin = p;
+				p = mpEnd;			mpEnd = x.mpEnd;			x.mpEnd = p;
+				p = m.eos;			m.eos = x.m.eos;			x.m.eos = p;
+			}
+		}
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<class T>
+struct vdfastdeque_block {
+	enum {
+		kBlockSize = 32,
+		kBlockSizeBits = 5
+	};
+
+	T data[kBlockSize];
+};
+
+template<class T, class T_Base>
+class vdfastdeque_iterator {
+public:
+	vdfastdeque_iterator(const vdfastdeque_iterator<T_Base, T_Base>&);
+	vdfastdeque_iterator(vdfastdeque_block<T_Base> **pMapEntry, uint32 index);
+
+	T& operator *() const;
+	T& operator ->() const;
+	vdfastdeque_iterator& operator++();
+	vdfastdeque_iterator operator++(int);
+	vdfastdeque_iterator& operator--();
+	vdfastdeque_iterator operator--(int);
+
+public:
+	vdfastdeque_block<T_Base> **mpMap;
+	vdfastdeque_block<T_Base> *mpBlock;
+	uint32 mIndex;
+};
+
+template<class T, class T_Base>
+vdfastdeque_iterator<T, T_Base>::vdfastdeque_iterator(const vdfastdeque_iterator<T_Base, T_Base>& x)
+	: mpMap(x.mpMap)
+	, mpBlock(x.mpBlock)
+	, mIndex(x.mIndex)
+{
+}
+
+template<class T, class T_Base>
+vdfastdeque_iterator<T, T_Base>::vdfastdeque_iterator(vdfastdeque_block<T_Base> **pMapEntry, uint32 index)
+	: mpMap(pMapEntry)
+	, mpBlock(mpMap ? *mpMap : NULL)
+	, mIndex(index)
+{
+}
+
+template<class T, class T_Base>
+T& vdfastdeque_iterator<T, T_Base>::operator *() const {
+	return mpBlock->data[mIndex];
+}
+
+template<class T, class T_Base>
+T& vdfastdeque_iterator<T, T_Base>::operator ->() const {
+	return mpBlock->data[mIndex];
+}
+
+template<class T, class T_Base>
+vdfastdeque_iterator<T, T_Base>& vdfastdeque_iterator<T, T_Base>::operator++() {
+	if (++mIndex >= vdfastdeque_block<T>::kBlockSize) {
+		mIndex = 0;
+		mpBlock = *++mpMap;
+	}
+	return *this;
+}
+
+template<class T, class T_Base>
+vdfastdeque_iterator<T, T_Base> vdfastdeque_iterator<T, T_Base>::operator++(int) {
+	vdfastdeque_iterator r(*this);
+	operator++();
+	return r;
+}
+
+template<class T, class T_Base>
+vdfastdeque_iterator<T, T_Base>& vdfastdeque_iterator<T, T_Base>::operator--() {
+	if (mIndex-- == 0) {
+		mIndex = vdfastdeque_block<T, T_Base>::kBlockSize - 1;
+		mpBlock = *--mpMap;
+	}
+	return *this;
+}
+
+template<class T, class T_Base>
+vdfastdeque_iterator<T, T_Base> vdfastdeque_iterator<T, T_Base>::operator--(int) {
+	vdfastdeque_iterator r(*this);
+	operator--();
+	return r;
+}
+
+template<class T, class U, class T_Base>
+bool operator==(const vdfastdeque_iterator<T, T_Base>& x,const vdfastdeque_iterator<U, T_Base>& y) {
+	return x.mpBlock == y.mpBlock && x.mIndex == y.mIndex;
+}
+
+template<class T, class U, class T_Base>
+bool operator!=(const vdfastdeque_iterator<T, T_Base>& x,const vdfastdeque_iterator<U, T_Base>& y) {
+	return x.mpBlock != y.mpBlock || x.mIndex != y.mIndex;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<class T, class A = vdallocator<T> >
+class vdfastdeque {
+public:
+	typedef	typename A::reference		reference;
+	typedef	typename A::const_reference	const_reference;
+	typedef	typename A::pointer			pointer;
+	typedef	typename A::const_pointer	const_pointer;
+	typedef	T					value_type;
+	typedef A					allocator_type;
+	typedef	size_t				size_type;
+	typedef	ptrdiff_t			difference_type;
+	typedef	vdfastdeque_iterator<T, T>			iterator;
+	typedef vdfastdeque_iterator<const T, T>	const_iterator;
+	typedef typename vdreverse_iterator<iterator, T>::type			reverse_iterator;
+	typedef typename vdreverse_iterator<const_iterator, const T>::type	const_reverse_iterator;
+
+	vdfastdeque();
+	~vdfastdeque();
+
+	bool				empty() const;
+	size_type			size() const;
+
+	reference			front();
+	const_reference		front() const;
+	reference			back();
+	const_reference		back() const;
+
+	iterator			begin();
+	const_iterator		begin() const;
+	iterator			end();
+	const_iterator		end() const;
+
+	reference			operator[](size_type n);
+	const_reference		operator[](size_type n) const;
+
+	void				clear();
+
+	reference			push_back();
+	void				push_back(const_reference x);
+
+	void				pop_front();
+	void				pop_back();
+
+	void				swap(vdfastdeque& x);
+
+protected:
+	void				push_back_extend();
+	void				validate();
+
+	typedef vdfastdeque_block<T> Block;
+
+	enum {
+		kBlockSize = Block::kBlockSize,
+		kBlockSizeBits = Block::kBlockSizeBits
+	};
+
+	struct M1 : public A::rebind<Block *>::other {
+		Block **mapStartAlloc;		// start of map
+		Block **mapStartCommit;		// start of range of allocated blocks
+		Block **mapStart;			// start of range of active blocks
+		Block **mapEnd;				// end of range of active blocks
+		Block **mapEndCommit;		// end of range of allocated blocks
+		Block **mapEndAlloc;		// end of map
+	} m;
+
+	struct M2 : public A::rebind<Block>::other {
+		int startIndex;
+		int endIndex;
+	} mTails;
+
+	union TrivialObjectConstraint {
+		T obj;
+	};
+};
+
+template<class T, class A>
+vdfastdeque<T, A>::vdfastdeque() {
+	m.mapStartAlloc		= NULL;
+	m.mapStartCommit	= NULL;
+	m.mapStart			= NULL;
+	m.mapEnd			= NULL;
+	m.mapEndCommit		= NULL;
+	m.mapEndAlloc		= NULL;
+	mTails.startIndex	= 0;
+	mTails.endIndex		= kBlockSize - 1;
+}
+
+template<class T, class A>
+vdfastdeque<T,A>::~vdfastdeque() {
+	while(m.mapStartCommit != m.mapEndCommit) {
+		mTails.deallocate(*m.mapStartCommit++, 1);
+	}
+
+	if (m.mapStartAlloc)
+		m.deallocate(m.mapStartAlloc, m.mapEndAlloc - m.mapStartAlloc);
+}
+
+template<class T, class A>
+bool vdfastdeque<T,A>::empty() const {
+	return size() == 0;
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::size_type vdfastdeque<T,A>::size() const {
+	if (m.mapEnd == m.mapStart)
+		return 0;
+
+	return kBlockSize * ((m.mapEnd - m.mapStart) - 1) + (mTails.endIndex + 1) - mTails.startIndex;
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::reference vdfastdeque<T,A>::front() {
+	VDASSERT(m.mapStart != m.mapEnd);
+	return (*m.mapStart)->data[mTails.startIndex];
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::const_reference vdfastdeque<T,A>::front() const {
+	VDASSERT(m.mapStart != m.mapEnd);
+	return (*m.mapStart)->data[mTails.startIndex];
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::reference vdfastdeque<T,A>::back() {
+	VDASSERT(m.mapStart != m.mapEnd);
+	return m.mapEnd[-1]->data[mTails.endIndex];
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::const_reference vdfastdeque<T,A>::back() const {
+	VDASSERT(m.mapStart != m.mapEnd);
+	return m.mapEnd[-1]->data[mTails.endIndex];
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::iterator vdfastdeque<T,A>::begin() {
+	return iterator(m.mapStart, mTails.startIndex);
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::const_iterator vdfastdeque<T,A>::begin() const {
+	return const_iterator(m.mapStart, mTails.startIndex);
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::iterator vdfastdeque<T,A>::end() {
+	if (mTails.endIndex == kBlockSize - 1)
+		return iterator(m.mapEnd, 0);
+	else
+		return iterator(m.mapEnd - 1, mTails.endIndex + 1);
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::const_iterator vdfastdeque<T,A>::end() const {
+	if (mTails.endIndex == kBlockSize - 1)
+		return const_iterator(m.mapEnd, 0);
+	else
+		return const_iterator(m.mapEnd - 1, mTails.endIndex + 1);
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::reference vdfastdeque<T,A>::operator[](size_type n) {
+	n += mTails.startIndex;
+	return m.mapStart[n >> kBlockSizeBits]->data[n & (kBlockSize - 1)];
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::const_reference vdfastdeque<T,A>::operator[](size_type n) const {
+	n += mTails.startIndex;
+	return m.mapStart[n >> kBlockSizeBits]->data[n & (kBlockSize - 1)];
+}
+
+template<class T, class A>
+void vdfastdeque<T,A>::clear() {
+	m.mapEnd			= m.mapStart;
+	mTails.startIndex	= 0;
+	mTails.endIndex		= kBlockSize - 1;
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::reference vdfastdeque<T,A>::push_back() {
+	if (mTails.endIndex >= kBlockSize - 1) {
+		push_back_extend();
+
+		mTails.endIndex = -1;
+	}
+
+	++mTails.endIndex;
+
+	VDASSERT(m.mapEnd[-1]);
+	reference r = m.mapEnd[-1]->data[mTails.endIndex];
+	return r;
+}
+
+template<class T, class A>
+void vdfastdeque<T,A>::push_back(const_reference x) {
+	const T x2(x);
+	push_back() = x2;
+}
+
+template<class T, class A>
+void vdfastdeque<T,A>::pop_front() {
+	if (++mTails.startIndex >= kBlockSize) {
+		VDASSERT(m.mapEnd != m.mapStart);
+		mTails.startIndex = 0;
+		++m.mapStart;
+	}
+}
+
+template<class T, class A>
+void vdfastdeque<T,A>::pop_back() {
+	if (--mTails.endIndex < 0) {
+		VDASSERT(m.mapEnd != m.mapStart);
+		mTails.endIndex = kBlockSize - 1;
+		--m.mapEnd;
+	}
+}
+
+template<class T, class A>
+void vdfastdeque<T,A>::swap(vdfastdeque& x) {
+	std::swap(m.mapStartAlloc, x.m.mapStartAlloc);
+	std::swap(m.mapStartCommit, x.m.mapStartCommit);
+	std::swap(m.mapStart, x.m.mapStart);
+	std::swap(m.mapEnd, x.m.mapEnd);
+	std::swap(m.mapEndCommit, x.m.mapEndCommit);
+	std::swap(m.mapEndAlloc, x.m.mapEndAlloc);
+	std::swap(mTails.startIndex, x.mTails.startIndex);
+	std::swap(mTails.endIndex, x.mTails.endIndex);
+}
+
+/////////////////////////////////
+
+template<class T, class A>
+void vdfastdeque<T,A>::push_back_extend() {
+	validate();
+
+	// check if we need to extend the map itself
+	if (m.mapEnd == m.mapEndAlloc) {
+		// can we just shift the map?
+		size_type currentMapSize = m.mapEndAlloc - m.mapStartAlloc;
+		size_type freeAtStart = m.mapStartCommit - m.mapStartAlloc;
+
+		if (freeAtStart >= 2 && (freeAtStart + freeAtStart) >= currentMapSize) {
+			size_type shiftDistance = freeAtStart >> 1;
+
+			VDASSERT(!m.mapStartAlloc[0]);
+			memmove(m.mapStartAlloc, m.mapStartAlloc + shiftDistance, sizeof(Block *) * (currentMapSize - shiftDistance));
+			memset(m.mapStartAlloc + (currentMapSize - shiftDistance), 0, shiftDistance * sizeof(Block *));
+
+			// relocate pointers
+			m.mapEndCommit		-= shiftDistance;
+			m.mapEnd			-= shiftDistance;
+			m.mapStart			-= shiftDistance;
+			m.mapStartCommit	-= shiftDistance;
+			validate();
+		} else {
+			size_type newMapSize = currentMapSize*2+1;
+
+			Block **newMap = m.allocate(newMapSize);
+
+			memcpy(newMap, m.mapStartAlloc, currentMapSize * sizeof(Block *));
+			memset(newMap + currentMapSize, 0, (newMapSize - currentMapSize) * sizeof(Block *));
+
+			// relocate pointers
+			m.mapEndAlloc		= newMap + newMapSize;
+			m.mapEndCommit		= newMap + (m.mapEndCommit		- m.mapStartAlloc);
+			m.mapEnd			= newMap + (m.mapEnd			- m.mapStartAlloc);
+			m.mapStart			= newMap + (m.mapStart			- m.mapStartAlloc);
+			m.mapStartCommit	= newMap + (m.mapStartCommit	- m.mapStartAlloc);
+
+			m.deallocate(m.mapStartAlloc, currentMapSize);
+			m.mapStartAlloc		= newMap;
+			validate();
+		}
+	}
+
+	VDASSERT(m.mapEnd != m.mapEndAlloc);
+
+	// check if we already have a block we can use
+	if (*m.mapEnd) {
+		++m.mapEnd;
+		validate();
+		return;
+	}
+
+	// check if we can steal a block from the beginning
+	if (m.mapStartCommit != m.mapStart) {
+		VDASSERT(*m.mapStartCommit);
+		if (m.mapStartCommit != m.mapEnd) {
+			*m.mapEnd = *m.mapStartCommit;
+			*m.mapStartCommit = NULL;
+			++m.mapStartCommit;
+		}
+		++m.mapEnd;
+		m.mapEndCommit = m.mapEnd;
+		validate();
+		return;
+	}
+
+	// allocate a new block
+	*m.mapEnd = mTails.allocate(1);
+	++m.mapEnd;
+	m.mapEndCommit = m.mapEnd;
+	validate();
+}
+
+template<class T, class A>
+void vdfastdeque<T,A>::validate() {
+	VDASSERT(m.mapStartAlloc <= m.mapStartCommit);
+	VDASSERT(m.mapStartCommit <= m.mapStart);
+	VDASSERT(m.mapStart <= m.mapEnd);
+	VDASSERT(m.mapEnd <= m.mapEndCommit);
+	VDASSERT(m.mapEndCommit <= m.mapEndAlloc);
+
+	VDASSERT(m.mapStartAlloc == m.mapStartCommit || !*m.mapStartAlloc);
+	VDASSERT(m.mapStartCommit == m.mapEndCommit || m.mapStartCommit[0]);
+	VDASSERT(m.mapStart == m.mapEnd || (m.mapStart[0] && m.mapEnd[-1]));
+	VDASSERT(m.mapEndCommit == m.mapEndAlloc || !m.mapEndCommit[0]);
+}
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/vdtypes.h b/src/thirdparty/VirtualDub/h/vd2/system/vdtypes.h
new file mode 100644
index 000000000..0a5a63e50
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/vdtypes.h
@@ -0,0 +1,415 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_VDTYPES_H
+#define f_VD2_SYSTEM_VDTYPES_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <algorithm>
+#include <stdio.h>
+#include <stdarg.h>
+#include <new>
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	compiler detection
+//
+///////////////////////////////////////////////////////////////////////////
+
+#ifndef VD_COMPILER_DETECTED
+	#define VD_COMPILER_DETECTED
+
+	#ifdef _MSC_VER
+		#define VD_COMPILER_MSVC	_MSC_VER
+
+		#if _MSC_VER >= 1400
+			#define VD_COMPILER_MSVC_VC8		1
+
+			#if _MSC_FULL_VER == 140040310
+				#define VD_COMPILER_MSVC_VC8_PSDK 1
+			#elif _MSC_FULL_VER == 14002207
+				#define VD_COMPILER_MSVC_VC8_DDK 1
+			#endif
+
+		#elif _MSC_VER >= 1310
+			#define VD_COMPILER_MSVC_VC71	1
+		#elif _MSC_VER >= 1300
+			#define VD_COMPILER_MSVC_VC7		1
+		#elif _MSC_VER >= 1200
+			#define VD_COMPILER_MSVC_VC6		1
+		#endif
+
+	#endif
+#endif
+
+#ifndef VD_CPU_DETECTED
+	#define VD_CPU_DETECTED
+
+	#ifdef _M_AMD64
+		#define VD_CPU_AMD64	1
+	#elif _M_IX86
+		#define VD_CPU_X86		1
+	#endif
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	types
+//
+///////////////////////////////////////////////////////////////////////////
+
+#ifndef VD_STANDARD_TYPES_DECLARED
+	#if defined(_MSC_VER)
+		typedef signed __int64		sint64;
+		typedef unsigned __int64	uint64;
+	#elif defined(__GNUC__)
+		typedef signed long long	sint64;
+		typedef unsigned long long	uint64;
+	#endif
+	typedef signed int			sint32;
+	typedef unsigned int		uint32;
+	typedef signed short		sint16;
+	typedef unsigned short		uint16;
+	typedef signed char			sint8;
+	typedef unsigned char		uint8;
+
+	typedef sint64				int64;
+	typedef sint32				int32;
+	typedef sint16				int16;
+	typedef sint8				int8;
+
+	#ifdef _M_AMD64
+		typedef sint64 sintptr;
+		typedef uint64 uintptr;
+	#else
+		#if _MSC_VER >= 1310
+			typedef __w64 sint32 sintptr;
+			typedef __w64 uint32 uintptr;
+		#else
+			typedef sint32 sintptr;
+			typedef uint32 uintptr;
+		#endif
+	#endif
+#endif
+
+#if defined(_MSC_VER)
+	#define VD64(x) x##i64
+#elif defined(__GNUC__)
+	#define VD64(x) x##ll
+#else
+	#error Please add an entry for your compiler for 64-bit constant literals.
+#endif
+
+	
+#define VDAPIENTRY			__cdecl
+
+typedef int64 VDTime;
+typedef int64 VDPosition;
+typedef	struct __VDGUIHandle *VDGUIHandle;
+
+// enforce wchar_t under Visual C++
+
+#if defined(_MSC_VER) && !defined(_WCHAR_T_DEFINED)
+	#include <ctype.h>
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	allocation
+//
+///////////////////////////////////////////////////////////////////////////
+
+#if defined(VD_COMPILER_MSVC) && (VD_COMPILER_MSVC < 1300 || (defined(VD_COMPILER_MSVC_VC8_PSDK) || defined(VD_COMPILER_MSVC_VC8_DDK)))
+#define new_nothrow new
+#else
+#define new_nothrow new(std::nothrow)
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	STL fixes
+//
+///////////////////////////////////////////////////////////////////////////
+
+#if defined(VD_COMPILER_MSVC_VC6) || defined(VD_COMPILER_MSVC_VC8_DDK) || defined(VD_COMPILER_MSVC_VC8_PSDK)
+	// The VC6 STL was deliberately borked to avoid conflicting with
+	// Windows min/max macros.  We work around this bogosity here.  Note
+	// that NOMINMAX must be defined for these to compile properly.  Also,
+	// there is a bug in the VC6 compiler that sometimes causes long
+	// lvalues to "promote" to int, causing ambiguous override errors.
+	// To avoid this, always explicitly declare which type you are using,
+	// i.e. min<int>(x,0).  None of this is a problem with VC7 or later.
+	namespace std {
+		template<class T>
+		inline const T& min(const T& x, const T& y) {
+			return _cpp_min(x, y);
+		}
+
+		template<class T>
+		inline const T& max(const T& x, const T& y) {
+			return _cpp_max(x, y);
+		}
+	};
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	compiler fixes
+//
+///////////////////////////////////////////////////////////////////////////
+
+#if defined(VD_COMPILER_MSVC) && (VD_COMPILER_MSVC < 1400 || (defined(VD_COMPILER_MSVC_VC8_PSDK) || defined(VD_COMPILER_MSVC_VC8_DDK)))
+	inline int vswprintf(wchar_t *dst, size_t bufsize, const wchar_t *format, va_list val) {
+		return _vsnwprintf(dst, bufsize, format, val);
+	}
+
+	inline int swprintf(wchar_t *dst, size_t bufsize, const wchar_t *format, ...) {
+		va_list val;
+
+		va_start(val, format);
+		int r = vswprintf(dst, bufsize, format, val);
+		va_end(val);
+
+		return r;
+	}
+
+	#define _strdup strdup
+	#define _stricmp stricmp
+	#define _strnicmp strnicmp
+	#define _wcsdup wcsdup
+	#define _wcsicmp wcsicmp
+	#define _wcsnicmp wcsnicmp
+#endif
+
+#if defined(VD_COMPILER_MSVC) && VD_COMPILER_MSVC < 1400
+	#define vdfor if(0);else for
+#else
+	#define vdfor for
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	attribute support
+//
+///////////////////////////////////////////////////////////////////////////
+
+#if defined(VD_COMPILER_MSVC)
+	#define VDINTERFACE		__declspec(novtable)
+	#define VDNORETURN		__declspec(noreturn)
+	#define VDPUREFUNC
+	#if VD_COMPILER_MSVC >= 1400
+		#define VDRESTRICT __restrict
+	#else
+		#define VDRESTRICT
+	#endif
+#elif defined(__GNUC__)
+	#define VDINTERFACE
+	#define VDNORETURN		__attribute__((noreturn))
+	#define VDPUREFUNC		__attribute__((pure))
+	#define VDRESTRICT
+#else
+	#define VDINTERFACE
+	#define VDNORETURN
+	#define VDPUREFUNC
+	#define VDRESTRICT
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	debug support
+//
+///////////////////////////////////////////////////////////////////////////
+
+enum VDAssertResult {
+	kVDAssertBreak,
+	kVDAssertContinue,
+	kVDAssertIgnore
+};
+
+extern VDAssertResult VDAssert(const char *exp, const char *file, int line);
+extern VDAssertResult VDAssertPtr(const char *exp, const char *file, int line);
+extern void VDDebugPrint(const char *format, ...);
+
+#if defined(_MSC_VER)
+	#if _MSC_VER >= 1300
+		#define VDBREAK		__debugbreak()
+	#else
+		#define VDBREAK		__asm { int 3 }
+	#endif
+#elif defined(__GNUC__)
+	#define VDBREAK		__asm__ volatile ("int3" : : )
+#else
+	#define VDBREAK		*(volatile char *)0 = *(volatile char *)0
+#endif
+
+
+#ifdef _DEBUG
+
+	namespace {
+		template<int line>
+		struct VDAssertHelper {
+			VDAssertHelper(const char *exp, const char *file) {
+				if (!sbAssertDisabled)
+					switch(VDAssert(exp, file, line)) {
+					case kVDAssertBreak:
+						VDBREAK;
+						break;
+					case kVDAssertIgnore:
+						sbAssertDisabled = true;
+						break;
+					}
+			}
+
+			static bool sbAssertDisabled;
+		};
+
+		template<int lineno>
+		bool VDAssertHelper<lineno>::sbAssertDisabled;
+	}
+
+	#define VDASSERT(exp)		if (static bool active = true) if (exp); else switch(VDAssert   (#exp, __FILE__, __LINE__)) { case kVDAssertBreak: VDBREAK; break; case kVDAssertIgnore: active = false; } else ((void)0)
+	#define VDASSERTPTR(exp) 	if (static bool active = true) if (exp); else switch(VDAssertPtr(#exp, __FILE__, __LINE__)) { case kVDAssertBreak: VDBREAK; break; case kVDAssertIgnore: active = false; } else ((void)0)
+	#define VDVERIFY(exp)		if (exp); else if (static bool active = true) switch(VDAssert   (#exp, __FILE__, __LINE__)) { case kVDAssertBreak: VDBREAK; break; case kVDAssertIgnore: active = false; } else ((void)0)
+	#define VDVERIFYPTR(exp) 	if (exp); else if (static bool active = true) switch(VDAssertPtr(#exp, __FILE__, __LINE__)) { case kVDAssertBreak: VDBREAK; break; case kVDAssertIgnore: active = false; } else ((void)0)
+	#define VDASSERTCT(exp)		(void)sizeof(int[(exp)?1:-1])
+
+	#define VDINLINEASSERT(exp)			((exp)||(VDAssertHelper<__LINE__>(#exp, __FILE__),false))
+	#define VDINLINEASSERTFALSE(exp)	((exp)&&(VDAssertHelper<__LINE__>("!("#exp")", __FILE__),true))
+
+	#define NEVER_HERE			do { if (VDAssert( "[never here]", __FILE__, __LINE__ )) VDBREAK; __assume(false); } while(false)
+	#define	VDNEVERHERE			do { if (VDAssert( "[never here]", __FILE__, __LINE__ )) VDBREAK; __assume(false); } while(false)
+
+	#define VDDEBUG				VDDebugPrint
+
+#else
+
+	#if defined(_MSC_VER)
+		#ifndef _M_AMD64
+			#define VDASSERT(exp)		__assume(!!(exp))
+			#define VDASSERTPTR(exp)	__assume(!!(exp))
+		#else
+			#define VDASSERT(exp)		__noop(exp)
+			#define VDASSERTPTR(exp)	__noop(exp)
+		#endif
+	#elif defined(__GNUC__)
+		#define VDASSERT(exp)		__builtin_expect(0 != (exp), 1)
+		#define VDASSERTPTR(exp)	__builtin_expect(0 != (exp), 1)
+	#endif
+
+	#define VDVERIFY(exp)		(exp)
+	#define VDVERIFYPTR(exp)	(exp)
+	#define VDASSERTCT(exp)
+
+	#define VDINLINEASSERT(exp)	(exp)
+	#define VDINLINEASSERTFALSE(exp)	(exp)
+
+	#if defined(VD_COMPILER_MSVC)
+		#define NEVER_HERE			__assume(false)
+		#define	VDNEVERHERE			__assume(false)
+	#else
+		#define NEVER_HERE			VDASSERT(false)
+		#define	VDNEVERHERE			VDASSERT(false)
+	#endif
+
+	extern int VDDEBUG_Helper(const char *, ...);
+	#define VDDEBUG				(void)sizeof VDDEBUG_Helper
+
+#endif
+
+#define VDDEBUG2			VDDebugPrint
+
+// TODO macros
+//
+// These produce a diagnostic during compilation that indicate a TODO for
+// later:
+//
+//		#pragma message(__TODO__ "Fix this.)
+//		#vdpragma_TODO("Fix this.")
+
+#define vdpragma_TODO2(x)	#x
+#define vdpragma_TODO1(x)	vdpragma_TODO2(x)
+#define vdpragma_TODO0		__FILE__ "(" vdpragma_TODO1(__LINE__) ") : TODO: "
+
+#ifdef _MSC_VER
+#define vdpragma_TODO(x)		message(vdpragma_TODO0 x)
+#else
+#define vdpragma_TODO(x)
+#endif
+
+// BS macros
+//
+// These tag code that is not meant to go into a final build.
+
+#define vdpragma_BS2(x)	#x
+#define vdpragma_BS1(x)	vdpragma_BS2(x)
+#define vdpragma_BS0		__FILE__ "(" vdpragma_BS1(__LINE__) ") : BS: "
+
+#ifdef _MSC_VER
+#define vdpragma_BS(x)		message(vdpragma_BS0 x)
+#else
+#define vdpragma_BS(x)
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Object scope macros
+//
+// vdobjectscope() allows you to define a construct where an object is
+// constructed and live only within the controlled statement.  This is
+// used for vdsynchronized (thread.h) and protected scopes below.
+// It relies on a strange quirk of C++ regarding initialized objects
+// in the condition of a selection statement and also horribly abuses
+// the switch statement, generating rather good code in release builds.
+// The catch is that the controlled object must implement a conversion to
+// bool returning false and must only be initialized with one argument (C
+// syntax).
+//
+// Unfortunately, handy as this macro is, it is also damned good at
+// breaking compilers.  For a start, declaring an object with a non-
+// trivial destructor in a switch() kills both VC6 and VC7 with a C1001.
+// The bug is fixed in VC8 (MSC 14.00).
+//
+// A somewhat safer alternative is the for() statement, along the lines
+// of:
+//
+// switch(bool v=false) case 0: default: for(object_def; !v; v=true)
+//
+// This avoids the conversion operator but unfortunately usually generates
+// an actual loop in the output.
+
+#if defined(VD_COMPILER_MSVC) && (VD_COMPILER_MSVC < 1400 || defined(VD_COMPILER_MSVC_VC8_DDK))
+#define vdobjectscope(object_def) if(object_def) VDNEVERHERE; else
+#else
+#define vdobjectscope(object_def) switch(object_def) case 0: default:
+#endif
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/vectors.h b/src/thirdparty/VirtualDub/h/vd2/system/vectors.h
new file mode 100644
index 000000000..6dcbe65fa
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/vectors.h
@@ -0,0 +1,568 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_VECTORS_H
+#define f_VD2_SYSTEM_VECTORS_H
+
+#ifdef _MSC_VER
+	#pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+#include <math.h>
+#include <limits>
+
+#ifndef VDFORCEINLINE
+	#define VDFORCEINLINE __forceinline
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+
+bool VDSolveLinearEquation(double *src, int n, ptrdiff_t stride_elements, double *b, double tolerance = 1e-5);
+
+///////////////////////////////////////////////////////////////////////////
+
+#include <vd2/system/vectors_float.h>
+#include <vd2/system/vectors_int.h>
+
+///////////////////////////////////////////////////////////////////////////
+
+class vdfloat2x2 {
+public:
+	enum zero_type { zero };
+	enum identity_type { identity };
+
+	typedef float			value_type;
+	typedef vdfloat2		vector_type;
+	typedef vdfloat2c		vector_ctor_type;
+	typedef vdfloat2x2		self_type;
+
+	vdfloat2x2() {}
+	vdfloat2x2(zero_type) { m[0] = m[1] = vector_ctor_type(0, 0); }
+	vdfloat2x2(identity_type) {
+		m[0] = vector_ctor_type(1, 0);
+		m[1] = vector_ctor_type(0, 1);
+	}
+
+	vector_type& operator[](int k) { return m[k]; }
+	const vector_type& operator[](int k) const { return m[k]; }
+
+	self_type operator*(const self_type& v) const {
+		self_type result;
+
+#define DO(i,j) result.m[i].v[j] = m[i].v[0]*v.m[0].v[j] + m[i].v[1]*v.m[1].v[j]
+		DO(0,0);
+		DO(0,1);
+		DO(1,0);
+		DO(1,1);
+#undef DO
+
+		return result;
+	}
+
+	vector_type operator*(const vector_type& r) const {
+		return vector_ctor_type(
+				m[0].v[0]*r.v[0] + m[0].v[1]*r.v[1],
+				m[1].v[0]*r.v[0] + m[1].v[1]*r.v[1]);
+	}
+
+	self_type transpose() const {
+		self_type res;
+
+		res.m[0].v[0] = m[0].v[0];
+		res.m[0].v[1] = m[1].v[0];
+		res.m[1].v[0] = m[0].v[1];
+		res.m[1].v[1] = m[1].v[1];
+
+		return res;
+	}
+
+	self_type adjunct() const {
+		self_type res;
+		
+		res.m[0].set(m[1].v[1], -m[0].v[1]);
+		res.m[1].set(-m[1].v[0], -m[0].v[0]);
+
+		return res;
+	}
+
+	value_type det() const {
+		return m[0].v[0]*m[1].v[1] - m[1].v[0]*m[0].v[1];
+	}
+
+	self_type operator~() const {
+		return adjunct() / det();
+	}
+
+	self_type& operator*=(const value_type factor) {
+		m[0] *= factor;
+		m[1] *= factor;
+
+		return *this;
+	}
+
+	self_type& operator/=(const value_type factor) {
+		return operator*=(value_type(1)/factor);
+	}
+
+	self_type operator*(const value_type factor) const {
+		return self_type(*this) *= factor;
+	}
+
+	self_type operator/(const value_type factor) const {
+		return self_type(*this) /= factor;
+	}
+
+	vector_type m[2];
+};
+
+class vdfloat3x3 {
+public:
+	enum zero_type { zero };
+	enum identity_type { identity };
+	enum rotation_x_type { rotation_x };
+	enum rotation_y_type { rotation_y };
+	enum rotation_z_type { rotation_z };
+
+	typedef float			value_type;
+	typedef vdfloat3		vector_type;
+	typedef vdfloat3c		vector_ctor_type;
+	typedef vdfloat3x3		self_type;
+
+	vdfloat3x3() {}
+	vdfloat3x3(zero_type) { m[0] = m[1] = m[2] = vector_ctor_type(0, 0, 0); }
+	vdfloat3x3(identity_type) {
+		m[0].set(1, 0, 0);
+		m[1].set(0, 1, 0);
+		m[2].set(0, 0, 1);
+	}
+	vdfloat3x3(rotation_x_type, value_type angle) {
+		const value_type s(sin(angle));
+		const value_type c(cos(angle));
+
+		m[0].set( 1, 0, 0);
+		m[1].set( 0, c,-s);
+		m[2].set( 0, s, c);
+	}
+
+	vdfloat3x3(rotation_y_type, value_type angle) {
+		const value_type s(sin(angle));
+		const value_type c(cos(angle));
+
+		m[0].set( c, 0, s);
+		m[1].set( 0, 1, 0);
+		m[2].set(-s, 0, c);
+	}
+	vdfloat3x3(rotation_z_type, value_type angle) {
+		const value_type s(sin(angle));
+		const value_type c(cos(angle));
+
+		m[0].set( c,-s, 0);
+		m[1].set( s, c, 0);
+		m[2].set( 0, 0, 1);
+	}
+
+	vector_type& operator[](int k) { return m[k]; }
+	const vector_type& operator[](int k) const { return m[k]; }
+
+	self_type operator*(const self_type& v) const {
+		self_type result;
+
+#define DO(i,j) result.m[i].v[j] = m[i].v[0]*v.m[0].v[j] + m[i].v[1]*v.m[1].v[j] + m[i].v[2]*v.m[2].v[j]
+		DO(0,0);
+		DO(0,1);
+		DO(0,2);
+		DO(1,0);
+		DO(1,1);
+		DO(1,2);
+		DO(2,0);
+		DO(2,1);
+		DO(2,2);
+#undef DO
+
+		return result;
+	}
+
+	vector_type operator*(const vector_type& r) const {
+		return vector_ctor_type(
+				m[0].v[0]*r.v[0] + m[0].v[1]*r.v[1] + m[0].v[2]*r.v[2],
+				m[1].v[0]*r.v[0] + m[1].v[1]*r.v[1] + m[1].v[2]*r.v[2],
+				m[2].v[0]*r.v[0] + m[2].v[1]*r.v[1] + m[2].v[2]*r.v[2]);
+	}
+
+	self_type transpose() const {
+		self_type res;
+
+		res.m[0].v[0] = m[0].v[0];
+		res.m[0].v[1] = m[1].v[0];
+		res.m[0].v[2] = m[2].v[0];
+		res.m[1].v[0] = m[0].v[1];
+		res.m[1].v[1] = m[1].v[1];
+		res.m[1].v[2] = m[2].v[1];
+		res.m[2].v[0] = m[0].v[2];
+		res.m[2].v[1] = m[1].v[2];
+		res.m[2].v[2] = m[2].v[2];
+
+		return res;
+	}
+
+	self_type adjunct() const {
+		using namespace nsVDMath;
+
+		self_type res;
+
+		res.m[0] = cross(m[1], m[2]);
+		res.m[1] = cross(m[2], m[0]);
+		res.m[2] = cross(m[0], m[1]);
+
+		return res.transpose();
+	}
+
+	value_type det() const {
+		return	+ m[0].v[0] * m[1].v[1] * m[2].v[2]
+				+ m[1].v[0] * m[2].v[1] * m[0].v[2]
+				+ m[2].v[0] * m[0].v[1] * m[1].v[2]
+				- m[0].v[0] * m[2].v[1] * m[1].v[2]
+				- m[1].v[0] * m[0].v[1] * m[2].v[2]
+				- m[2].v[0] * m[1].v[1] * m[0].v[2];
+	}
+
+	self_type operator~() const {
+		return adjunct() / det();
+	}
+
+	self_type& operator*=(const value_type factor) {
+		m[0] *= factor;
+		m[1] *= factor;
+		m[2] *= factor;
+
+		return *this;
+	}
+
+	self_type& operator/=(const value_type factor) {
+		return operator*=(value_type(1)/factor);
+	}
+
+	self_type operator*(const value_type factor) const {
+		return self_type(*this) *= factor;
+	}
+
+	self_type operator/(const value_type factor) const {
+		return self_type(*this) /= factor;
+	}
+
+	vector_type m[3];
+};
+
+class vdfloat4x4 {
+public:
+	enum zero_type { zero };
+	enum identity_type { identity };
+	enum rotation_x_type { rotation_x };
+	enum rotation_y_type { rotation_y };
+	enum rotation_z_type { rotation_z };
+
+	typedef float			value_type;
+	typedef vdfloat4		vector_type;
+	typedef vdfloat4c		vector_ctor_type;
+
+	vdfloat4x4() {}
+	vdfloat4x4(const vdfloat3x3& v) {
+		m[0].set(v.m[0].x, v.m[0].y, v.m[0].z, 0.0f);
+		m[1].set(v.m[1].x, v.m[1].y, v.m[1].z, 0.0f);
+		m[2].set(v.m[2].x, v.m[2].y, v.m[2].z, 0.0f);
+		m[3].set(0, 0, 0, 1);
+	}
+
+	vdfloat4x4(zero_type) {
+		m[0].setzero();
+		m[1].setzero();
+		m[2].setzero();
+		m[3].setzero();
+	}
+
+	vdfloat4x4(identity_type) {
+		m[0].set(1, 0, 0, 0);
+		m[1].set(0, 1, 0, 0);
+		m[2].set(0, 0, 1, 0);
+		m[3].set(0, 0, 0, 1);
+	}
+	vdfloat4x4(rotation_x_type, value_type angle) {
+		const value_type s(sin(angle));
+		const value_type c(cos(angle));
+
+		m[0].set( 1, 0, 0, 0);
+		m[1].set( 0, c,-s, 0);
+		m[2].set( 0, s, c, 0);
+		m[3].set( 0, 0, 0, 1);
+	}
+	vdfloat4x4(rotation_y_type, value_type angle) {
+		const value_type s(sin(angle));
+		const value_type c(cos(angle));
+
+		m[0].set( c, 0, s, 0);
+		m[1].set( 0, 1, 0, 0);
+		m[2].set(-s, 0, c, 0);
+		m[3].set( 0, 0, 0, 1);
+	}
+	vdfloat4x4(rotation_z_type, value_type angle) {
+		const value_type s(sin(angle));
+		const value_type c(cos(angle));
+
+		m[0].set( c,-s, 0, 0);
+		m[1].set( s, c, 0, 0);
+		m[2].set( 0, 0, 1, 0);
+		m[3].set( 0, 0, 0, 1);
+	}
+
+	const value_type *data() const { return &m[0][0]; }
+
+	vector_type& operator[](int n) { return m[n]; }
+	const vector_type& operator[](int n) const { return m[n]; }
+
+	vdfloat4x4 operator*(const vdfloat4x4& v) const {
+		vdfloat4x4 result;
+
+#define DO(i,j) result.m[i].v[j] = m[i].v[0]*v.m[0].v[j] + m[i].v[1]*v.m[1].v[j] + m[i].v[2]*v.m[2].v[j] + m[i].v[3]*v.m[3].v[j]
+		DO(0,0);
+		DO(0,1);
+		DO(0,2);
+		DO(0,3);
+		DO(1,0);
+		DO(1,1);
+		DO(1,2);
+		DO(1,3);
+		DO(2,0);
+		DO(2,1);
+		DO(2,2);
+		DO(2,3);
+		DO(3,0);
+		DO(3,1);
+		DO(3,2);
+		DO(3,3);
+#undef DO
+
+		return result;
+	}
+
+	vdfloat4x4& operator*=(const vdfloat4x4& v) {
+		return operator=(operator*(v));
+	}
+
+	vector_type operator*(const vdfloat3& r) const {
+		return vector_ctor_type(
+				m[0].v[0]*r.v[0] + m[0].v[1]*r.v[1] + m[0].v[2]*r.v[2] + m[0].v[3],
+				m[1].v[0]*r.v[0] + m[1].v[1]*r.v[1] + m[1].v[2]*r.v[2] + m[1].v[3],
+				m[2].v[0]*r.v[0] + m[2].v[1]*r.v[1] + m[2].v[2]*r.v[2] + m[2].v[3],
+				m[3].v[0]*r.v[0] + m[3].v[1]*r.v[1] + m[3].v[2]*r.v[2] + m[3].v[3]);
+	}
+
+	vector_type operator*(const vector_type& r) const {
+		return vector_ctor_type(
+				m[0].v[0]*r.v[0] + m[0].v[1]*r.v[1] + m[0].v[2]*r.v[2] + m[0].v[3]*r.v[3],
+				m[1].v[0]*r.v[0] + m[1].v[1]*r.v[1] + m[1].v[2]*r.v[2] + m[1].v[3]*r.v[3],
+				m[2].v[0]*r.v[0] + m[2].v[1]*r.v[1] + m[2].v[2]*r.v[2] + m[2].v[3]*r.v[3],
+				m[3].v[0]*r.v[0] + m[3].v[1]*r.v[1] + m[3].v[2]*r.v[2] + m[3].v[3]*r.v[3]);
+	}
+
+	vector_type m[4];
+};
+
+template<class T>
+struct VDSize {
+	typedef T value_type;
+
+	int w, h;
+
+	VDSize() {}
+	VDSize(int _w, int _h) : w(_w), h(_h) {}
+
+	bool operator==(const VDSize& s) const { return w==s.w && h==s.h; }
+	bool operator!=(const VDSize& s) const { return w!=s.w || h!=s.h; }
+
+	VDSize& operator+=(const VDSize& s) {
+		w += s.w;
+		h += s.h;
+		return *this;
+	}
+
+	T area() const { return w*h; }
+
+	void include(const VDSize& s) {
+		if (w < s.w)
+			w = s.w;
+		if (h < s.h)
+			h = s.h;
+	}
+};
+
+template<class T>
+class VDRect {
+public:
+	typedef T value_type;
+
+	VDRect();
+	VDRect(T left_, T top_, T right_, T bottom_);
+
+	bool empty() const;
+	bool valid() const;
+
+	void clear();
+	void invalidate();
+	void set(T l, T t, T r, T b);
+
+	void add(T x, T y);
+	void add(const VDRect& r);
+	void translate(T x, T y);
+	void scale(T x, T y);
+	void transform(T scaleX, T scaleY, T offsetX, T offsety);
+
+	bool operator==(const VDRect& r) const;
+	bool operator!=(const VDRect& r) const;
+
+	T width() const;
+	T height() const;
+	T area() const;
+	VDSize<T> size() const;
+
+public:
+	T left, top, right, bottom;
+};
+
+template<class T>
+VDRect<T>::VDRect() {
+}
+
+template<class T>
+VDRect<T>::VDRect(T left_, T top_, T right_, T bottom_)
+	: left(left_)
+	, top(top_)
+	, right(right_)
+	, bottom(bottom_)
+{
+}
+
+template<class T>
+bool VDRect<T>::empty() const {
+	return left >= right || top >= bottom;
+}
+
+template<class T>
+bool VDRect<T>::valid() const {
+	return left <= right;
+}
+
+template<class T>
+void VDRect<T>::clear() {
+	left = top = right = bottom = 0;
+}
+
+template<class T>
+void VDRect<T>::invalidate() {
+	left = top = (std::numeric_limits<T>::max)();
+	right = bottom = std::numeric_limits<T>::is_signed ? -(std::numeric_limits<T>::max)() : T(0);
+}
+
+template<class T>
+void VDRect<T>::set(T l, T t, T r, T b) {
+	left = l;
+	top = t;
+	right = r;
+	bottom = b;
+}
+
+template<class T>
+void VDRect<T>::add(T x, T y) {
+	if (left > x)
+		left = x;
+	if (top > y)
+		top = y;
+	if (right < x)
+		right = x;
+	if (bottom < y)
+		bottom = y;
+}
+
+template<class T>
+void VDRect<T>::add(const VDRect& src) {
+	if (left > src.left)
+		left = src.left;
+	if (top > src.top)
+		top = src.top;
+	if (right < src.right)
+		right = src.right;
+	if (bottom < src.bottom)
+		bottom = src.bottom;
+}
+
+template<class T>
+void VDRect<T>::translate(T x, T y) {
+	left += x;
+	top += y;
+	right += x;
+	bottom += y;
+}
+
+template<class T>
+void VDRect<T>::scale(T x, T y) {
+	left *= x;
+	top *= y;
+	right *= x;
+	bottom *= y;
+}
+
+template<class T>
+void VDRect<T>::transform(T scaleX, T scaleY, T offsetX, T offsetY) {
+	left	= left		* scaleX + offsetX;
+	top		= top		* scaleY + offsetY;
+	right	= right		* scaleX + offsetX;
+	bottom	= bottom	* scaleY + offsetY;
+}
+
+template<class T>
+bool VDRect<T>::operator==(const VDRect& r) const { return left==r.left && top==r.top && right==r.right && bottom==r.bottom; }
+
+template<class T>
+bool VDRect<T>::operator!=(const VDRect& r) const { return left!=r.left || top!=r.top || right!=r.right || bottom!=r.bottom; }
+
+template<class T>
+T VDRect<T>::width() const { return right-left; }
+
+template<class T>
+T VDRect<T>::height() const { return bottom-top; }
+
+template<class T>
+T VDRect<T>::area() const { return (right-left)*(bottom-top); }
+
+template<class T>
+VDSize<T> VDRect<T>::size() const { return VDSize<T>(right-left, bottom-top); }
+
+///////////////////////////////////////////////////////////////////////////////
+typedef VDSize<sint32>	vdsize32;
+typedef VDSize<float>	vdsize32f;
+typedef	VDRect<sint32>	vdrect32;
+typedef	VDRect<float>	vdrect32f;
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/vectors_float.h b/src/thirdparty/VirtualDub/h/vd2/system/vectors_float.h
new file mode 100644
index 000000000..3be7fb4ac
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/vectors_float.h
@@ -0,0 +1,207 @@
+class vdfloat2 {
+public:
+	typedef vdfloat2 self_type;
+	typedef float value_type;
+
+	void set(float x2, float y2) { x=x2; y=y2; }
+
+	float&			operator[](int k)					{ return v[k]; }
+	const float&	operator[](int k) const				{ return v[k]; }
+
+	float		lensq() const							{ return x*x + y*y; }
+
+	self_type	operator-() const						{ self_type a = {-x, -y}; return a; }
+
+	self_type	operator+(const self_type& r) const		{ self_type a = {x+r.x, y+r.y}; return a; }
+	self_type	operator-(const self_type& r) const		{ self_type a = {x-r.x, y-r.y}; return a; }
+
+	self_type&	operator+=(const self_type& r)			{ x+=r.x; y+=r.y; return *this; }
+	self_type&	operator-=(const self_type& r)			{ x-=r.x; y-=r.y; return *this; }
+
+	self_type	operator*(const float s) const			{ self_type a = {x*s, x*s}; return a; }
+	self_type&	operator*=(const float s)				{ x*=s; y*=s; return *this; }
+
+	self_type	operator/(const float s) const			{ const float inv(float(1)/s); self_type a = {x*inv, y*inv}; return a; }
+	self_type&	operator/=(const float s)				{ const float inv(float(1)/s); x*=inv; y*=inv; return *this; }
+
+	self_type	operator*(const self_type& r) const		{ self_type a = {x*r.x, y*r.y}; return a; }
+	self_type&	operator*=(const self_type& r)			{ x*=r.x; y*=r.y; return *this; }
+
+	self_type	operator/(const self_type& r) const		{ self_type a = {x/r.x, y/r.y}; return a; }
+	self_type&	operator/=(const self_type& r)			{ x/=r.x; y/=r.y; return *this; }
+
+	union {
+		struct {
+			float x;
+			float y;
+		};
+		float v[2];
+	};
+};
+
+VDFORCEINLINE vdfloat2 operator*(const float s, const vdfloat2& v) { return v*s; }
+
+///////////////////////////////////////////////////////////////////////////
+
+class vdfloat3 {
+public:
+	typedef vdfloat3 self_type;
+	typedef float value_type;
+
+	void set(float x2, float y2, float z2) { x=x2; y=y2; z=z2; }
+
+	float&			operator[](int k)					{ return v[k]; }
+	const float&	operator[](int k) const				{ return v[k]; }
+
+	float		lensq() const							{ return x*x + y*y + z*z; }
+
+	vdfloat2	project() const							{ const float inv(float(1)/z); const vdfloat2 a = {x*inv, y*inv}; return a; }
+	vdfloat2	as2d() const							{ const vdfloat2 a = {x, y}; return a; }
+
+	self_type	operator-() const						{ const self_type a = {-x, -y, -z}; return a; }
+
+	self_type	operator+(const self_type& r) const		{ const self_type a = {x+r.x, y+r.y, z+r.z}; return a; }
+	self_type	operator-(const self_type& r) const		{ const self_type a = {x-r.x, y-r.y, z-r.z}; return a; }
+
+	self_type&	operator+=(const self_type& r)			{ x+=r.x; y+=r.y; z+=r.z; return *this; }
+	self_type&	operator-=(const self_type& r)			{ x-=r.x; y-=r.y; z-=r.z; return *this; }
+
+	self_type	operator*(const float s) const			{ const self_type a = {x*s, y*s, z*s}; return a; }
+	self_type&	operator*=(const float s)				{ x*=s; y*=s; z*=s; return *this; }
+
+	self_type	operator/(const float s) const			{ const float inv(float(1)/s); const self_type a = {x*inv, y*inv, z*inv}; return a; }
+	self_type&	operator/=(const float s)				{ const float inv(float(1)/s); x*=inv; y*=inv; z*=inv; return *this; }
+
+	self_type	operator*(const self_type& r) const		{ self_type a = {x*r.x, y*r.y, z*r.z}; return a; }
+	self_type&	operator*=(const self_type& r)			{ x*=r.x; y*=r.y; z*=r.z; return *this; }
+
+	self_type	operator/(const self_type& r) const		{ self_type a = {x/r.x, y/r.y, z/r.z}; return a; }
+	self_type&	operator/=(const self_type& r)			{ x/=r.x; y/=r.y; z/=r.z; return *this; }
+
+	union {
+		struct {
+			float x;
+			float y;
+			float z;
+		};
+		float v[3];
+	};
+};
+
+VDFORCEINLINE vdfloat3 operator*(const float s, const vdfloat3& v) { return v*s; }
+
+///////////////////////////////////////////////////////////////////////////
+
+class vdfloat4 {
+public:
+	typedef vdfloat4 self_type;
+	typedef float value_type;
+
+	void setzero() { x=y=z=w = 0; }
+	void set(float x2, float y2, float z2, float w2) { x=x2; y=y2; z=z2; w=w2; }
+
+	float&			operator[](int i) { return v[i]; }
+	const float&	operator[](int i) const { return v[i]; }
+
+	float		lensq() const							{ return x*x + y*y + z*z + w*w; }
+
+	vdfloat3	project() const							{ const float inv(float(1)/w); const vdfloat3 a = {x*inv, y*inv, z*inv}; return a; }
+
+	self_type	operator-() const						{ const self_type a = {-x, -y, -z, -w}; return a; }
+
+	self_type	operator+(const self_type& r) const		{ const self_type a = {x+r.x, y+r.y, z+r.z, w+r.w}; return a; }
+	self_type	operator-(const self_type& r) const		{ const self_type a = {x-r.x, y-r.y, z-r.z, w-r.w}; return a; }
+
+	self_type&	operator+=(const self_type& r)			{ x+=r.x; y+=r.y; z+=r.z; w+=r.w; return *this; }
+	self_type&	operator-=(const self_type& r)			{ x-=r.x; y-=r.y; z-=r.z; w-=r.w; return *this; }
+
+	self_type	operator*(const float factor) const		{ const self_type a = {x*factor, y*factor, z*factor, w*factor}; return a; }
+	self_type	operator/(const float factor) const		{ const float inv(float(1) / factor); const self_type a = {x*inv, y*inv, z*inv, w*inv}; return a; }
+
+	self_type&	operator*=(const float factor)			{ x *= factor; y *= factor; z *= factor; w *= factor; return *this; }
+	self_type&	operator/=(const float factor)			{ const float inv(float(1) / factor); x *= inv; y *= inv; z *= inv; w *= inv; return *this; }
+
+	self_type	operator*(const self_type& r) const		{ self_type a = {x*r.x, y*r.y, z*r.z, w*r.w}; return a; }
+	self_type&	operator*=(const self_type& r)			{ x*=r.x; y*=r.y; z*=r.z; w*=r.w; return *this; }
+
+	self_type	operator/(const self_type& r) const		{ self_type a = {x/r.x, y/r.y, z/r.z, w*r.w}; return a; }
+	self_type&	operator/=(const self_type& r)			{ x/=r.x; y/=r.y; z/=r.z; w/=r.w; return *this; }
+
+	union {
+		struct {
+			float x;
+			float y;
+			float z;
+			float w;
+		};
+		float v[4];
+	};
+};
+
+VDFORCEINLINE vdfloat4 operator*(const float s, const vdfloat4& v) { return v*s; }
+
+///////////////////////////////////////////////////////////////////////////
+
+class vdfloat2c : public vdfloat2 {
+public:
+	VDFORCEINLINE vdfloat2c(float x2, float y2) {x=x2; y=y2;}
+	VDFORCEINLINE vdfloat2c(const float src[2]) {x=src[0]; y=src[1];}
+};
+
+class vdfloat3c : public vdfloat3 {
+public:
+	VDFORCEINLINE vdfloat3c(float x2, float y2, float z2) { x=x2; y=y2; z=z2; }
+	VDFORCEINLINE vdfloat3c(const float src[3]) { x=src[0]; y=src[1]; z=src[2]; }
+};
+
+class vdfloat4c : public vdfloat4 {
+public:
+	VDFORCEINLINE vdfloat4c(float x2, float y2, float z2, float w2) { x=x2; y=y2; z=z2; w=w2; }
+	VDFORCEINLINE vdfloat4c(const float src[4]) { x=src[0]; y=src[1]; z=src[2]; w=src[3]; }
+};
+
+
+///////////////////////////////////////////////////////////////////////////
+
+namespace nsVDMath {
+	VDFORCEINLINE float length(const vdfloat2& a) {
+		return sqrtf(a.x*a.x + a.y*a.y);
+	}
+
+	VDFORCEINLINE float length(const vdfloat3& a) {
+		return sqrtf(a.x*a.x + a.y*a.y + a.z*a.z);
+	}
+
+	VDFORCEINLINE float length(const vdfloat4& a) {
+		return sqrtf(a.x*a.x + a.y*a.y + a.z*a.z + a.w*a.w);
+	}
+
+	VDFORCEINLINE vdfloat2 normalize(const vdfloat2& a) {
+		return a / length(a);
+	}
+
+	VDFORCEINLINE vdfloat3 normalize(const vdfloat3& a) {
+		return a / length(a);
+	}
+
+	VDFORCEINLINE vdfloat4 normalize(const vdfloat4& a) {
+		return a / length(a);
+	}
+
+	VDFORCEINLINE float dot(const vdfloat2& a, const vdfloat2& b) {
+		return a.x*b.x + a.y*b.y;
+	}
+
+	VDFORCEINLINE float dot(const vdfloat3& a, const vdfloat3& b) {
+		return a.x*b.x + a.y*b.y + a.z*b.z;
+	}
+
+	VDFORCEINLINE float dot(const vdfloat4& a, const vdfloat4& b) {
+		return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
+	}
+
+	VDFORCEINLINE vdfloat3 cross(const vdfloat3& a, const vdfloat3& b) {
+		const vdfloat3 r = {a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x};
+		return r;
+	}
+};
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/vectors_int.h b/src/thirdparty/VirtualDub/h/vd2/system/vectors_int.h
new file mode 100644
index 000000000..78c796761
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/vectors_int.h
@@ -0,0 +1,183 @@
+class vdint2 {
+public:
+	typedef vdint2 self_type;
+	typedef int value_type;
+
+	void set(int x2, int y2) { x=x2; y=y2; }
+
+	int&		operator[](int k)					{ return v[k]; }
+	const int&	operator[](int k) const				{ return v[k]; }
+
+	int			lensq() const						{ return x*x + y*y; }
+	int			len() const							{ return (int)sqrtf((float)(x*x + y*y)); }
+	self_type	normalized() const					{ return *this / len(); }
+
+	self_type	operator-() const					{ const self_type a = {-x, -y}; return a; }
+
+	self_type	operator+(const self_type& r) const	{ const self_type a = {x+r.x, y+r.y}; return a; }
+	self_type	operator-(const self_type& r) const	{ const self_type a = {x-r.x, y-r.y}; return a; }
+
+	self_type&	operator+=(const self_type& r)		{ x+=r.x; y+=r.y; return *this; }
+	self_type&	operator-=(const self_type& r)		{ x-=r.x; y-=r.y; return *this; }
+
+	self_type	operator*(const int s) const		{ const self_type a = {x*s, x*s}; return a; }
+	self_type&	operator*=(const int s)				{ x*=s; y*=s; return *this; }
+
+	self_type	operator/(const int s) const		{ const self_type a = {x/s, y/s}; return a; }
+	self_type&	operator/=(const int s)				{ x/=s; y/=s; return *this; }
+
+	self_type	operator*(const self_type& r) const		{ self_type a = {x*r.x, y*r.y}; return a; }
+	self_type&	operator*=(const self_type& r)			{ x*=r.x; y*=r.y; return *this; }
+
+	self_type	operator/(const self_type& r) const		{ self_type a = {x/r.x, y/r.y}; return a; }
+	self_type&	operator/=(const self_type& r)			{ x/=r.x; y/=r.y; return *this; }
+
+	union {
+		struct {
+			int x;
+			int y;
+		};
+		int v[2];
+	};
+};
+
+VDFORCEINLINE vdint2 operator*(const int s, const vdint2& v) { return v*s; }
+
+///////////////////////////////////////////////////////////////////////////
+
+class vdint3 {
+public:
+	typedef vdint3 self_type;
+	typedef int value_type;
+
+	int&		operator[](int k)					{ return v[k]; }
+	const int&	operator[](int k) const				{ return v[k]; }
+
+	int			lensq() const						{ return x*x + y*y + z*z; }
+	int			len() const							{ return (int)sqrtf((float)(x*x + y*y + z*z)); }
+	self_type	normalized() const					{ return *this / len(); }
+
+	vdint2	project() const						{ const int inv(int(1)/z); const vdint2 a = {x*inv, y*inv}; return a; }
+	vdint2	as2d() const						{ const vdint2 a = {x, y}; return a; }
+
+	self_type	operator-() const					{ const self_type a = {-x, -y, -z}; return a; }
+
+	self_type	operator+(const self_type& r) const	{ const self_type a = {x+r.x, y+r.y, z+r.z}; return a; }
+	self_type	operator-(const self_type& r) const	{ const self_type a = {x-r.x, y-r.y, z-r.z}; return a; }
+
+	self_type&	operator+=(const self_type& r)		{ x+=r.x; y+=r.y; z+=r.z; return *this; }
+	self_type&	operator-=(const self_type& r)		{ x-=r.x; y-=r.y; z-=r.z; return *this; }
+
+	self_type	operator*(const int s) const		{ const self_type a = {x*s, y*s, z*s}; return a; }
+	self_type&	operator*=(const int s)				{ x*=s; y*=s; z*=s; return *this; }
+
+	self_type	operator/(const int s) const		{ const self_type a = {x/s, y/s, z/s}; return a; }
+	self_type&	operator/=(const int s)				{ x /= s; y /= s; z /= s; return *this; }
+
+	self_type	operator*(const self_type& r) const		{ self_type a = {x*r.x, y*r.y, z*r.z}; return a; }
+	self_type&	operator*=(const self_type& r)			{ x*=r.x; y*=r.y; z*=r.z; return *this; }
+
+	self_type	operator/(const self_type& r) const		{ self_type a = {x/r.x, y/r.y, z/r.z}; return a; }
+	self_type&	operator/=(const self_type& r)			{ x/=r.x; y/=r.y; z/=r.z; return *this; }
+
+	union {
+		struct {
+			int x;
+			int y;
+			int z;
+		};
+		int v[3];
+	};
+};
+
+VDFORCEINLINE vdint3 operator*(const int s, const vdint3& v) { return v*s; }
+
+///////////////////////////////////////////////////////////////////////////
+
+class vdint4 {
+public:
+	typedef vdint4 self_type;
+	typedef int value_type;
+
+	int&			operator[](int i) { return v[i]; }
+	const int&	operator[](int i) const { return v[i]; }
+
+	int			lensq() const						{ return x*x + y*y + z*z + w*w; }
+	int			len() const							{ return (int)sqrtf((float)(x*x + y*y + z*z + w*w)); }
+	self_type	normalized() const					{ return *this / len(); }
+
+	vdint3	project() const						{ const int inv(int(1)/w); const vdint3 a = {x*inv, y*inv, z*inv}; return a; }
+
+	self_type	operator-() const					{ const self_type a = {-x, -y, -z, -w}; return a; }
+
+	self_type	operator+(const self_type& r) const	{ const self_type a = {x+r.x, y+r.y, z+r.z, w+r.w}; return a; }
+	self_type	operator-(const self_type& r) const	{ const self_type a = {x-r.x, y-r.y, z-r.z, w-r.w}; return a; }
+
+	self_type&	operator+=(const self_type& r)		{ x+=r.x; y+=r.y; z+=r.z; w+=r.w; return *this; }
+	self_type&	operator-=(const self_type& r)		{ x-=r.x; y-=r.y; z-=r.z; w-=r.w; return *this; }
+
+	self_type	operator*(const int factor) const	{ const self_type a = {x*factor, y*factor, z*factor, w*factor}; return a; }
+	self_type	operator/(const int factor) const	{ const self_type a = {x/factor, y/factor, z/factor, w/factor}; return a; }
+
+	self_type&	operator*=(const int factor)		{ x *= factor; y *= factor; z *= factor; w *= factor; return *this; }
+	self_type&	operator/=(const int factor)		{ x /= factor; y /= factor; z /= factor; w /= factor; return *this; }
+
+	self_type	operator*(const self_type& r) const		{ self_type a = {x*r.x, y*r.y, z*r.z, w*r.w}; return a; }
+	self_type&	operator*=(const self_type& r)			{ x*=r.x; y*=r.y; z*=r.z; w*=r.w; return *this; }
+
+	self_type	operator/(const self_type& r) const		{ self_type a = {x/r.x, y/r.y, z/r.z, w*r.w}; return a; }
+	self_type&	operator/=(const self_type& r)			{ x/=r.x; y/=r.y; z/=r.z; w/=r.w; return *this; }
+
+	union {
+		struct {
+			int x;
+			int y;
+			int z;
+			int w;
+		};
+		int v[4];
+	};
+};
+
+VDFORCEINLINE vdint4 operator*(const int s, const vdint4& v) { return v*s; }
+
+///////////////////////////////////////////////////////////////////////////
+
+class vdint2c : vdint2 {
+public:
+	VDFORCEINLINE vdint2c(int x2, int y2) {x=x2; y=y2;}
+	VDFORCEINLINE vdint2c(const int src[2]) {x=src[0]; y=src[1];}
+};
+
+class vdint3c : vdint3 {
+public:
+	VDFORCEINLINE vdint3c(int x2, int y2, int z2) { x=x2; y=y2; z=z2; }
+	VDFORCEINLINE vdint3c(const int src[3]) { x=src[0]; y=src[1]; z=src[2]; }
+};
+
+class vdint4c : vdint4 {
+public:
+	VDFORCEINLINE vdint4c(int x2, int y2, int z2, int w2) { x=x2; y=y2; z=z2; w=w2; }
+	VDFORCEINLINE vdint4c(const int src[4]) { x=src[0]; y=src[1]; z=src[2]; w=src[3]; }
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+namespace nsVDMath {
+	VDFORCEINLINE int dot(const vdint2& a, const vdint2& b) {
+		return a.x*b.x + a.y*b.y;
+	}
+
+	VDFORCEINLINE int dot(const vdint3& a, const vdint3& b) {
+		return a.x*b.x + a.y*b.y + a.z*b.z;
+	}
+
+	VDFORCEINLINE int dot(const vdint4& a, const vdint4& b) {
+		return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
+	}
+
+	VDFORCEINLINE vdint3 cross(const vdint3& a, const vdint3& b) {
+		const vdint3 r = {a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x};
+		return r;
+	}
+};
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/w32assist.h b/src/thirdparty/VirtualDub/h/vd2/system/w32assist.h
new file mode 100644
index 000000000..e47e20f52
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/w32assist.h
@@ -0,0 +1,95 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_W32ASSIST_H
+#define f_VD2_SYSTEM_W32ASSIST_H
+
+#include <windows.h>
+
+#include <vd2/system/VDString.h>
+
+inline bool VDIsWindowsNT() {
+#ifdef _M_AMD64
+	return true;
+#else
+	static bool is_nt = !(GetVersion() & 0x80000000);
+
+	return is_nt;
+#endif
+}
+
+// useful constants missing from the Platform SDK
+
+enum {
+#ifdef _M_AMD64
+	MENUITEMINFO_SIZE_VERSION_400A = sizeof(MENUITEMINFOA),
+	MENUITEMINFO_SIZE_VERSION_400W = sizeof(MENUITEMINFOW)
+#else
+	MENUITEMINFO_SIZE_VERSION_400A = (offsetof(MENUITEMINFOA, cch) + sizeof(UINT)),
+	MENUITEMINFO_SIZE_VERSION_400W = (offsetof(MENUITEMINFOW, cch) + sizeof(UINT))
+#endif
+};
+
+// helper functions
+
+bool		VDIsForegroundTaskW32();
+
+LPVOID		VDConvertThreadToFiberW32(LPVOID parm);
+void		VDSwitchToFiberW32(LPVOID fiber);
+
+int			VDGetSizeOfBitmapHeaderW32(const BITMAPINFOHEADER *pHdr);
+void		VDSetWindowTextW32(HWND hwnd, const wchar_t *s);
+void		VDSetWindowTextFW32(HWND hwnd, const wchar_t *format, ...);
+VDStringW	VDGetWindowTextW32(HWND hwnd);
+void		VDAppendMenuW32(HMENU hmenu, UINT flags, UINT id, const wchar_t *text);
+void		VDCheckMenuItemByCommandW32(HMENU hmenu, UINT cmd, bool checked);
+void		VDCheckRadioMenuItemByCommandW32(HMENU hmenu, UINT cmd, bool checked);
+void		VDEnableMenuItemByCommandW32(HMENU hmenu, UINT cmd, bool checked);
+VDStringW	VDGetMenuItemTextByCommandW32(HMENU hmenu, UINT cmd);
+void		VDSetMenuItemTextByCommandW32(HMENU hmenu, UINT cmd, const wchar_t *text);
+
+LRESULT		VDDualCallWindowProcW32(WNDPROC wp, HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam);
+LRESULT		VDDualDefWindowProcW32(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam);
+
+EXECUTION_STATE VDSetThreadExecutionStateW32(EXECUTION_STATE esFlags);
+
+bool		VDSetFilePointerW32(HANDLE h, sint64 pos, DWORD dwMoveMethod);
+bool		VDGetFileSizeW32(HANDLE h, sint64& size);
+
+#if !defined(_MSC_VER) || _MSC_VER < 1300
+	HMODULE		VDGetLocalModuleHandleW32();
+#else
+	extern "C" IMAGE_DOS_HEADER __ImageBase;
+	inline HMODULE VDGetLocalModuleHandleW32() {
+		return (HINSTANCE)&__ImageBase;
+	}
+#endif
+
+bool		VDDrawTextW32(HDC hdc, const wchar_t *s, int nCount, LPRECT lpRect, UINT uFormat);
+
+bool		VDPatchModuleImportTableW32(HMODULE hmod, const char *srcModule, const char *name, void *pCompareValue, void *pNewValue, void *volatile *ppOldValue);
+bool		VDPatchModuleExportTableW32(HMODULE hmod, const char *name, void *pCompareValue, void *pNewValue, void *volatile *ppOldValue);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/win32/miniwindows.h b/src/thirdparty/VirtualDub/h/vd2/system/win32/miniwindows.h
new file mode 100644
index 000000000..be4ee5695
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/win32/miniwindows.h
@@ -0,0 +1,53 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_VD2_SYSTEM_WIN32_MINIWINDOWS_H
+#define f_VD2_SYSTEM_WIN32_MINIWINDOWS_H
+
+#define VDZCALLBACK __stdcall
+
+#ifndef _WIN64
+	typedef __w64 int		VDZINT_PTR;
+	typedef __w64 unsigned	VDZUINT_PTR;
+	typedef __w64 long		VDZLONG_PTR;
+#else
+	typedef __int64				VDZINT_PTR;
+	typedef unsigned __int64	VDZUINT_PTR;
+	typedef __int64				VDZLONG_PTR;
+#endif
+
+typedef struct HWND__	*VDZHWND;
+typedef unsigned		VDZUINT;
+typedef unsigned short	VDZWORD;
+typedef unsigned long	VDZDWORD;
+typedef VDZUINT_PTR		VDZWPARAM;
+typedef VDZLONG_PTR		VDZLPARAM;
+typedef VDZLONG_PTR		VDZLRESULT;
+typedef struct HDROP__	*VDZHDROP;
+typedef struct HACCEL__	*VDZHACCEL;
+
+typedef VDZWORD			VDZATOM;
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/zip.h b/src/thirdparty/VirtualDub/h/vd2/system/zip.h
new file mode 100644
index 000000000..06b864ccf
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/zip.h
@@ -0,0 +1,220 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#ifndef f_ZIP_H
+#define f_ZIP_H
+
+// Rest in peace, Phil Katz.
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/file.h>
+#include <vd2/system/file.h>
+#include <vd2/system/VDString.h>
+#include <string.h>
+#include <vector>
+
+class VDDeflateBitReader {
+public:
+	VDDeflateBitReader() : mpSrc(0), mBufferPt(0), accum(0), bits(0) {}
+
+	void init(IVDStream *pSrc, uint64 limit) {
+		mpSrc = pSrc;
+		mBytesLeft = limit;
+		refill();
+		consume(0);
+	}
+
+	IVDStream *stream() const {
+		return mpSrc;
+	}
+
+	unsigned long peek() const {
+		return accum;
+	}
+
+	bool consume(unsigned n) {
+//		printf("%08lx/%d\n", accum << ((-bits)&7), bits);
+		bits -= n;
+
+		if ((int)bits < 0)
+			return false;
+
+		accum >>= n;
+
+		while(bits <= 24 && (mBufferPt || refill())) {
+			accum += mBuffer[kBufferSize + mBufferPt++] << bits;
+			bits += 8;
+		}
+
+		return true;
+	}
+
+	bool refill();
+
+	bool getbit() {
+		unsigned rv = accum;
+
+		consume(1);
+
+		return (rv&1) != 0;
+	}
+
+	unsigned getbits(unsigned n) {
+		unsigned rv = accum & ((1<<n)-1);
+
+		consume(n);
+
+		return rv;
+	}
+
+	bool empty() const {
+		return bits != 0;
+	}
+
+	unsigned avail() const {
+		return bits;
+	}
+
+	unsigned bitsleft() const {
+		return bits + (mBytesLeftLimited<<3);
+	}
+
+	unsigned bytesleft() const {
+		return (bits>>3) + mBytesLeftLimited;
+	}
+
+	void align() {
+		consume(bits&7);
+	}
+
+	void readbytes(void *dst, unsigned len);
+
+protected:
+	enum { kBigAvailThreshold = 16777216 };
+	enum { kBufferSize = 256 };
+
+	unsigned long accum;
+	unsigned	bits;
+	int			mBufferPt;			// counts from -256 to 0
+	uint64		mBytesLeft;
+	unsigned	mBytesLeftLimited;
+
+	IVDStream *mpSrc;
+	uint8	mBuffer[kBufferSize];
+};
+
+class VDCRCChecker {
+public:
+	enum {
+		kCRC32		= 0xEDB88320		// CRC-32 used by PKZIP, PNG (x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + 1)
+	};
+
+	VDCRCChecker() {}
+	VDCRCChecker(uint32 crc) { Init(crc); }
+
+	void Init(uint32 crc);
+	void Process(const void *src, sint32 len);
+
+	uint32 CRC() const { return ~mValue; }
+	uint32 CRC(uint32 crc, const void *src, sint32 len);
+
+protected:
+	uint32	mValue;
+	uint32	mTable[256];
+};
+
+class VDZipStream : public IVDStream {
+public:
+	VDZipStream();
+	VDZipStream(IVDStream *pSrc, uint64 limit, bool bStored);
+	~VDZipStream();
+
+	void	Init(IVDStream *pSrc, uint64 limit, bool bStored);
+	void	EnableCRC(uint32 crc = VDCRCChecker::kCRC32) { mCRCChecker.Init(crc); mbCRCEnabled = true; }
+	uint32	CRC() { return mCRCChecker.CRC(); }
+
+	const wchar_t *GetNameForError();
+
+	sint64	Pos();
+	void	Read(void *buffer, sint32 bytes);
+	sint32	ReadData(void *buffer, sint32 bytes);
+	void	Write(const void *buffer, sint32 bytes);
+
+protected:
+	bool	ParseBlockHeader();
+	bool	Inflate();
+
+	VDDeflateBitReader mBits;					// critical -- make this first!
+	uint32	mReadPt, mWritePt, mBufferLevel;
+
+	enum {
+		kNoBlock,
+		kStoredBlock,
+		kDeflatedBlock
+	} mBlockType;
+
+	uint32	mStoredBytesLeft;
+	bool	mbNoMoreBlocks;
+	bool	mbCRCEnabled;
+
+	sint64	mPos;
+	uint8	mBuffer[65536];
+
+	uint16	mCodeDecode[32768];
+	uint8	mCodeLengths[288 + 32];
+	uint16	mDistDecode[32768];
+
+	VDCRCChecker	mCRCChecker;
+};
+
+class VDZipArchive {
+public:
+	struct FileInfo {
+		VDString	mFileName;
+		uint32		mCompressedSize;
+		uint32		mUncompressedSize;
+		uint32		mCRC32;
+		bool		mbPacked;
+	};
+
+	VDZipArchive();
+	~VDZipArchive();
+
+	void Init(IVDRandomAccessStream *pSrc);
+
+	sint32			GetFileCount();
+	const FileInfo&	GetFileInfo(sint32 idx);
+	IVDStream		*OpenRawStream(sint32 idx);
+
+protected:
+	struct FileInfoInternal : public FileInfo {
+		uint32		mDataStart;
+	};
+
+	std::vector<FileInfoInternal>	mDirectory;
+	IVDRandomAccessStream			*mpStream;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/system/h/stdafx.h b/src/thirdparty/VirtualDub/system/h/stdafx.h
new file mode 100644
index 000000000..21373ed9f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/h/stdafx.h
@@ -0,0 +1,12 @@
+#define _WIN32_WINNT 0x0400
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/atomic.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/error.h>
+#include <windows.h>
+#include <process.h>
+#include <intrin.h>
+#include <string.h>
+#include <stdarg.h>
+#include <math.h>
+#include <ctype.h>
diff --git a/src/thirdparty/VirtualDub/system/source/Error.cpp b/src/thirdparty/VirtualDub/system/source/Error.cpp
new file mode 100644
index 000000000..727354c96
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/Error.cpp
@@ -0,0 +1,340 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <stdio.h>
+#include <stdarg.h>
+#include <crtdbg.h>
+#include <windows.h>
+#include <vfw.h>
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/Error.h>
+#include <vd2/system/log.h>
+
+MyError::MyError() {
+	buf = NULL;
+}
+
+MyError::MyError(const MyError& err) {
+	buf = _strdup(err.buf);
+}
+
+MyError::MyError(const char *f, ...)
+	: buf(NULL)
+{
+	va_list val;
+
+	va_start(val, f);
+	vsetf(f, val);
+	va_end(val);
+}
+
+MyError::~MyError() {
+	free(buf);
+}
+
+void MyError::clear() {
+	if (buf)			// we do this check because debug free() always does a heapchk even if buf==NULL
+		free(buf);
+	buf = NULL;
+}
+
+void MyError::assign(const MyError& e) {
+	if (buf)
+		free(buf);
+	buf = _strdup(e.buf);
+}
+
+void MyError::assign(const char *s) {
+	if (buf)
+		free(buf);
+	buf = _strdup(s);
+}
+
+void MyError::setf(const char *f, ...) {
+	va_list val;
+
+	va_start(val, f);
+	vsetf(f,val);
+	va_end(val);
+}
+
+void MyError::vsetf(const char *f, va_list val) {
+	for(int size = 1024; size <= 32768; size += size) {
+		free(buf);
+		buf = NULL;
+
+		buf = (char *)malloc(size);
+		if (!buf)
+			return;
+
+		if ((unsigned)_vsnprintf(buf, size, f, val) < (unsigned)size)
+			return;
+	}
+
+	free(buf);
+	buf = NULL;
+}
+
+void MyError::post(HWND hWndParent, const char *title) const {
+	if (!buf || !*buf)
+		return;
+
+	VDDEBUG("*** %s: %s\n", title, buf);
+	VDLog(kVDLogError, VDswprintf(L"Error: %hs", 1, &buf));
+
+	MessageBox(hWndParent, buf, title, MB_OK | MB_ICONERROR | MB_SETFOREGROUND);
+}
+
+void MyError::discard() {
+	free(buf);
+	buf = NULL;
+}
+
+void MyError::swap(MyError& err) {
+	char *s = err.buf;
+	err.buf = buf;
+	buf = s;
+}
+
+void MyError::TransferFrom(MyError& err) {
+	if (buf)
+		free(buf);
+
+	buf = err.buf;
+	err.buf = NULL;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
+static const char *GetVCMErrorString(uint32 icErr) {
+	const char *err = "(unknown)";
+
+	// Does anyone have the *real* text strings for this?
+
+	switch(icErr) {
+	case ICERR_OK:				err = "The operation completed successfully."; break;		// sorry, couldn't resist....
+	case ICERR_UNSUPPORTED:		err = "The operation is not supported."; break;
+	case ICERR_BADFORMAT:		err = "The source image format is not acceptable."; break;
+	case ICERR_MEMORY:			err = "Not enough memory."; break;
+	case ICERR_INTERNAL:		err = "An internal error occurred."; break;
+	case ICERR_BADFLAGS:		err = "An invalid flag was specified."; break;
+	case ICERR_BADPARAM:		err = "An invalid parameter was specified."; break;
+	case ICERR_BADSIZE:			err = "An invalid size was specified."; break;
+	case ICERR_BADHANDLE:		err = "The handle is invalid."; break;
+	case ICERR_CANTUPDATE:		err = "Cannot update the destination image."; break;
+	case ICERR_ABORT:			err = "The operation was aborted by the user."; break;
+	case ICERR_ERROR:			err = "An unknown error occurred (may be corrupt data)."; break;
+	case ICERR_BADBITDEPTH:		err = "The source color depth is not acceptable."; break;
+	case ICERR_BADIMAGESIZE:	err = "The source image size is not acceptable."; break;
+	default:
+		if (icErr <= ICERR_CUSTOM) err = "A codec-specific error occurred.";
+		break;
+	}
+
+	return err;
+}
+
+MyICError::MyICError(const char *s, uint32 icErr) {
+	setf("%s error: %s (error code %ld)", s, GetVCMErrorString(icErr), icErr);
+}
+
+MyICError::MyICError(uint32 icErr, const char *format, ...) {
+	char tmpbuf[1024];
+
+	va_list val;
+	va_start(val, format);
+	tmpbuf[(sizeof tmpbuf) - 1] = 0;
+	_vsnprintf(tmpbuf, (sizeof tmpbuf) - 1, format, val);
+	va_end(val);
+
+	setf(tmpbuf, GetVCMErrorString(icErr));
+}
+
+MyMMIOError::MyMMIOError(const char *s, uint32 mmioerr) {
+	const char *err = "(Unknown)";
+
+	switch(mmioerr) {
+	case MMIOERR_FILENOTFOUND:		err = "file not found"; break;
+	case MMIOERR_OUTOFMEMORY:		err = "out of memory"; break;
+	case MMIOERR_CANNOTOPEN:		err = "couldn't open"; break;
+	case MMIOERR_CANNOTCLOSE:		err = "couldn't close"; break;
+	case MMIOERR_CANNOTREAD:		err = "couldn't read"; break;
+	case MMIOERR_CANNOTWRITE:		err = "couldn't write"; break;
+	case MMIOERR_CANNOTSEEK:		err = "couldn't seek"; break;
+	case MMIOERR_CANNOTEXPAND:		err = "couldn't expand"; break;
+	case MMIOERR_CHUNKNOTFOUND:		err = "chunk not found"; break;
+	case MMIOERR_UNBUFFERED:		err = "unbuffered"; break;
+	case MMIOERR_PATHNOTFOUND:		err = "path not found"; break;
+	case MMIOERR_ACCESSDENIED:		err = "access denied"; break;
+	case MMIOERR_SHARINGVIOLATION:	err = "sharing violation"; break;
+	case MMIOERR_NETWORKERROR:		err = "network error"; break;
+	case MMIOERR_TOOMANYOPENFILES:	err = "too many open files"; break;
+	case MMIOERR_INVALIDFILE:		err = "invalid file"; break;
+	}
+
+	setf("%s error: %s (%ld)", s, err, mmioerr);
+}
+
+MyAVIError::MyAVIError(const char *s, uint32 avierr) {
+	const char *err = "(Unknown)";
+
+	switch(avierr) {
+	case AVIERR_UNSUPPORTED:		err = "unsupported"; break;
+	case AVIERR_BADFORMAT:			err = "bad format"; break;
+	case AVIERR_MEMORY:				err = "out of memory"; break;
+	case AVIERR_INTERNAL:			err = "internal error"; break;
+	case AVIERR_BADFLAGS:			err = "bad flags"; break;
+	case AVIERR_BADPARAM:			err = "bad parameters"; break;
+	case AVIERR_BADSIZE:			err = "bad size"; break;
+	case AVIERR_BADHANDLE:			err = "bad AVIFile handle"; break;
+	case AVIERR_FILEREAD:			err = "file read error"; break;
+	case AVIERR_FILEWRITE:			err = "file write error"; break;
+	case AVIERR_FILEOPEN:			err = "file open error"; break;
+	case AVIERR_COMPRESSOR:			err = "compressor error"; break;
+	case AVIERR_NOCOMPRESSOR:		err = "compressor not available"; break;
+	case AVIERR_READONLY:			err = "file marked read-only"; break;
+	case AVIERR_NODATA:				err = "no data (?)"; break;
+	case AVIERR_BUFFERTOOSMALL:		err = "buffer too small"; break;
+	case AVIERR_CANTCOMPRESS:		err = "can't compress (?)"; break;
+	case AVIERR_USERABORT:			err = "aborted by user"; break;
+	case AVIERR_ERROR:				err = "error (?)"; break;
+	}
+
+	setf("%s error: %s (%08lx)", s, err, avierr);
+}
+
+MyMemoryError::MyMemoryError() {
+	setf("Out of memory");
+}
+
+MyWin32Error::MyWin32Error(const char *format, uint32 err, ...) {
+	char szError[1024];
+	char szTemp[1024];
+	va_list val;
+
+	va_start(val, err);
+	szError[(sizeof szError)-1] = 0;
+	_vsnprintf(szError, (sizeof szError)-1, format, val);
+	va_end(val);
+
+	// Determine the position of the last %s, and escape everything else. This doesn't
+	// track escaped % signs properly, but it works for the strings that we receive (and at
+	// worst just produces a funny message).
+	const char *keep = strstr(szError, "%s");
+	if (keep) {
+		for(;;) {
+			const char *test = strstr(keep + 1, "%s");
+
+			if (!test)
+				break;
+
+			keep = test;
+		}
+	}
+
+	char *t = szTemp;
+	char *end = szTemp + (sizeof szTemp) - 1;
+	const char *s = szError;
+
+	while(char c = *s++) {
+		if (c == '%') {
+			// We allow one %s to go through. Everything else gets escaped.
+			if (s-1 != keep) {
+				if (t >= end)
+					break;
+
+				*t++ = '%';
+			}
+		}
+
+		if (t >= end)
+			break;
+
+		*t++ = c;
+	}
+
+	*t = 0;
+
+	if (!FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
+			0,
+			err,
+			MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+			szError,
+			sizeof szError,
+			NULL))
+	{
+		szError[0] = 0;
+	}
+
+	if (szError[0]) {
+		long l = strlen(szError);
+
+		if (l>1 && szError[l-2] == '\r')
+			szError[l-2] = 0;
+		else if (szError[l-1] == '\n')
+			szError[l-1] = 0;
+	}
+
+	setf(szTemp, szError);
+}
+
+MyCrashError::MyCrashError(const char *format, uint32 dwExceptionCode) {
+	const char *s = "(Unknown Exception)";
+
+	switch(dwExceptionCode) {
+	case EXCEPTION_ACCESS_VIOLATION:
+		s = "Access Violation";
+		break;
+	case EXCEPTION_PRIV_INSTRUCTION:
+		s = "Privileged Instruction";
+		break;
+	case EXCEPTION_INT_DIVIDE_BY_ZERO:
+		s = "Integer Divide By Zero";
+		break;
+	case EXCEPTION_BREAKPOINT:
+		s = "User Breakpoint";
+		break;
+	}
+
+	setf(format, s);
+}
+
+MyUserAbortError::MyUserAbortError() {
+	buf = _strdup("");
+}
+
+MyInternalError::MyInternalError(const char *format, ...) {
+	char buf[1024];
+	va_list val;
+
+	va_start(val, format);
+	_vsnprintf(buf, (sizeof buf) - 1, format, val);
+	buf[1023] = 0;
+	va_end(val);
+
+	setf("Internal error: %s", buf);
+}
diff --git a/src/thirdparty/VirtualDub/system/source/Fraction.cpp b/src/thirdparty/VirtualDub/system/source/Fraction.cpp
new file mode 100644
index 000000000..ab6693d01
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/Fraction.cpp
@@ -0,0 +1,327 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2006 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <math.h>
+
+#include <vd2/system/fraction.h>
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/math.h>
+
+VDFraction::VDFraction(double d) {
+	int xp;
+	double mant = frexp(d, &xp);
+
+	if (xp >= 33) {
+		hi = 0xFFFFFFFF;
+		lo = 1;
+	} else if (xp < -31) {
+		hi = 0;
+		lo = 1;
+	} else if (xp >= 0) {
+		*this = reduce((uint64)(0.5 + ldexp(mant, 62)), 1i64<<(62-xp));
+	} else {
+		// This is not quite accurate for very tiny numbers.
+		VDFraction t(1.0 / d);
+		lo = t.hi;
+		hi = t.lo;
+	}
+}
+
+VDFraction VDFraction::reduce(uint64 hi, uint64 lo) {
+
+	// Check for undefined.
+
+	if (!lo)
+		return VDFraction(0,0);
+
+	// Check for zero.
+
+	if (!hi) {
+		return VDFraction(0,1);
+	}
+
+	// Check for infinity.
+
+	if (!((uint64)lo>>32) && (uint64)hi > ((uint64)lo<<32)-lo)
+		return VDFraction(0xFFFFFFFFUL, 1);
+
+	// Algorithm from Wikipedia, Continued Fractions:
+	uint64 n0 = 0;
+	uint64 d0 = 1;
+	uint32 n1 = 1;
+	uint32 d1 = 0;
+	uint64 fp = 0;
+
+	uint32 n_best;
+	uint32 d_best;
+
+	for(;;) {
+		uint64 a = hi/lo;			// next continued fraction term
+		uint64 f = hi%lo;			// remainder
+
+		uint64 n2 = n0 + n1*a;		// next convergent numerator
+		uint64 d2 = d0 + d1*a;		// next convergent denominator
+
+		uint32 n_overflow = (uint32)(n2 >> 32);
+		uint32 d_overflow = (uint32)(d2 >> 32);
+
+		if (n_overflow | d_overflow) {
+			uint64 a2 = a;
+
+			// reduce last component until numerator and denominator are within range
+			if (n_overflow)
+				a2 = (0xFFFFFFFF - n0) / n1;
+
+			if (d_overflow) {
+				uint64 a3 = (0xFFFFFFFF - d0) / d1;
+				if (a2 > a3)
+					a2 = a3;
+			}
+
+			// check if new term is better
+			// 1/2a_k admissibility test
+			if (a2*2 < a || (a2*2 == a && d0*fp <= f*d1))
+				return VDFraction((uint32)n_best, (uint32)d_best);
+
+			return VDFraction((uint32)(n0 + n1*a2), (uint32)(d0 + d1*a2));
+		}
+
+		n_best = (uint32)n2;
+		d_best = (uint32)d2;
+
+		// if fraction is exact, we're done.
+		if (!f)
+			return VDFraction((uint32)n_best, (uint32)d_best);
+
+		n0 = n1;
+		n1 = (uint32)n2;
+		d0 = d1;
+		d1 = (uint32)d2;
+		fp = f;
+
+		hi = lo;
+		lo = f;
+	}
+}
+
+// a (cond) b
+// a-b (cond) 0
+// aH*bL - aL*bh (cond) 0
+// aH*bL (cond) aL*bH
+
+bool VDFraction::operator==(VDFraction b) const {
+	return (uint64)hi * b.lo == (uint64)lo * b.hi;
+}
+
+bool VDFraction::operator!=(VDFraction b) const {
+	return (uint64)hi * b.lo != (uint64)lo * b.hi;
+}
+
+bool VDFraction::operator< (VDFraction b) const {
+	return (uint64)hi * b.lo < (uint64)lo * b.hi;
+}
+
+bool VDFraction::operator<=(VDFraction b) const {
+	return (uint64)hi * b.lo <= (uint64)lo * b.hi;
+}
+
+bool VDFraction::operator> (VDFraction b) const {
+	return (uint64)hi * b.lo > (uint64)lo * b.hi;
+}
+
+bool VDFraction::operator>=(VDFraction b) const {
+	return (uint64)hi * b.lo >= (uint64)lo * b.hi;
+}
+
+VDFraction VDFraction::operator*(VDFraction b) const {
+	return reduce((uint64)hi * b.hi, (uint64)lo * b.lo);
+}
+
+VDFraction VDFraction::operator/(VDFraction b) const {
+	return reduce((uint64)hi * b.lo, (uint64)lo * b.hi);
+}
+
+VDFraction VDFraction::operator*(unsigned long b) const {
+	return reduce((uint64)hi * b, lo);
+}
+
+VDFraction VDFraction::operator/(unsigned long b) const {
+	return reduce(hi, (uint64)lo * b);
+}
+
+VDFraction& VDFraction::operator*=(VDFraction b) {
+	return *this = reduce((uint64)hi * b.hi, (uint64)lo * b.lo);
+}
+
+VDFraction& VDFraction::operator/=(VDFraction b) {
+	return *this = reduce((uint64)hi * b.lo, (uint64)lo * b.hi);
+}
+
+VDFraction& VDFraction::operator*=(unsigned long b) {
+	return *this = reduce((uint64)hi * b, lo);
+}
+
+VDFraction& VDFraction::operator/=(unsigned long b) {
+	return *this = reduce(hi, (uint64)lo * b);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+sint64 VDFraction::scale64t(sint64 v) const {
+	uint32 r;
+	return v<0 ? -VDFractionScale64(-v, hi, lo, r) : VDFractionScale64(v, hi, lo, r);
+}
+
+sint64 VDFraction::scale64u(sint64 v) const {
+	uint32 r;
+	if (v<0) {
+		v = -VDFractionScale64(-v, hi, lo, r);
+		return v;
+	} else {
+		v = +VDFractionScale64(+v, hi, lo, r);
+		return v + (r > 0);
+	}
+}
+
+sint64 VDFraction::scale64r(sint64 v) const {
+	uint32 r;
+	if (v<0) {
+		v = -VDFractionScale64(-v, hi, lo, r);
+		return v - (r >= (lo>>1) + (lo&1));
+	} else {
+		v = +VDFractionScale64(+v, hi, lo, r);
+		return v + (r >= (lo>>1) + (lo&1));
+	}
+}
+
+sint64 VDFraction::scale64it(sint64 v) const {
+	uint32 r;
+	return v<0 ? -VDFractionScale64(-v, lo, hi, r) : +VDFractionScale64(+v, lo, hi, r);
+}
+
+sint64 VDFraction::scale64ir(sint64 v) const {
+	uint32 r;
+	if (v<0) {
+		v = -VDFractionScale64(-v, lo, hi, r);
+		return v - (r >= (hi>>1) + (hi&1));
+	} else {
+		v = +VDFractionScale64(+v, lo, hi, r);
+		return v + (r >= (hi>>1) + (hi&1));
+	}
+}
+
+sint64 VDFraction::scale64iu(sint64 v) const {
+	uint32 r;
+	if (v<0) {
+		v = -VDFractionScale64(-v, lo, hi, r);
+		return v;
+	} else {
+		v = +VDFractionScale64(+v, lo, hi, r);
+		return v + (r > 0);
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+double VDFraction::asDouble() const {
+	return (double)hi / (double)lo;
+}
+
+double VDFraction::AsInverseDouble() const {
+	return (double)lo / (double)hi;
+}
+
+unsigned long VDFraction::roundup32ul() const {
+	return (hi + (lo-1)) / lo;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+bool VDFraction::Parse(const char *s) {
+	char c;
+
+	// skip whitespace
+	while((c = *s) && (c == ' ' || c == '\t'))
+		++s;
+
+	// accumulate integer digits
+	uint64 x = 0;
+	uint64 y = 1;
+
+	while(c = *s) {
+		uint32 offset = (uint32)c - '0';
+
+		if (offset >= 10)
+			break;
+
+		x = (x * 10) + offset;
+
+		// check for overflow
+		if (x >> 32)
+			return false;
+
+		++s;
+	}
+
+	if (c == '.') {
+		++s;
+
+		while(c = *s) {
+			uint32 offset = (uint32)c - '0';
+
+			if (offset >= 10)
+				break;
+
+			if (x >= 100000000000000000 ||
+				y >= 100000000000000000) {
+				if (offset >= 5)
+					++x;
+				while((c = *s) && (unsigned)(c - '0') < 10)
+					++s;
+				break;
+			}
+
+			x = (x * 10) + offset;
+			y *= 10;
+			++s;
+		}
+	}
+
+	while(c == ' ' || c == '\t')
+		c = *++s;
+
+	// check for trailing garbage
+	if (c)
+		return false;
+
+	// check for overflow
+	if (!(y >> 32) && ((uint64)(uint32)y << 32) <= x)
+		return false;
+
+	// reduce fraction and return success
+	*this = reduce(x, y);
+	return true;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/VDNamespace.cpp b/src/thirdparty/VirtualDub/system/source/VDNamespace.cpp
new file mode 100644
index 000000000..8ba706fa3
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/VDNamespace.cpp
@@ -0,0 +1,254 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <string.h>
+#include <ctype.h>
+#include <crtdbg.h>
+
+#include <vd2/system/list.h>
+#include <vd2/system/VDNamespace.h>
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	Group
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDNamespaceGroup::VDNamespaceGroup(const char *_pszName, VDNamespaceGroup *parent)
+: VDNamespaceNode(namedup(_pszName),parent)
+{
+	const char *t = strchr(_pszName,'/');
+
+	if (t) {
+
+	} else
+		strcpy((char *)pszName, _pszName);
+}
+
+VDNamespaceGroup::~VDNamespaceGroup() {
+	delete[] (char *)pszName;
+}
+
+const char *VDNamespaceGroup::namedup(const char *s) {
+	const char *t = strchr(s,'/');
+	char *mem;
+
+	if (t) {
+		mem = new char[(t-s)+1];
+
+		memcpy(mem, s, (t-s));
+		mem[t-s] = 0;
+
+		return mem;
+	} else {
+		mem = new char[strlen(s)+1];
+
+		return strcpy(mem, s);
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Item
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDNamespaceItem::VDNamespaceItem(const char *_pszName, VDNamespaceGroup *parent, const void *src)
+: VDNamespaceNode(_pszName,parent), object(src)
+{}
+
+VDNamespaceItem::~VDNamespaceItem() {}
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	VDNamespace
+//
+///////////////////////////////////////////////////////////////////////////
+
+bool VDNamespaceCompare(const char *psz1, const char *psz2) {
+	char c, d;
+
+	while((!!(c=toupper(*psz1++)) & !!(d=toupper(*psz2++))) && c!='/' && d!='/' && c==d)
+		;
+
+	if (c=='/') c=0;
+	if (d=='/') d=0;
+
+	return c==d;
+}
+
+VDNamespace::VDNamespace() : root("", NULL) {
+}
+
+VDNamespace::~VDNamespace() {
+}
+
+VDNamespaceGroup *VDNamespace::_lookupGroup(const char *pszName, bool fCreate, bool fIsFilter) {
+	const char *pszNameLimit = pszName;
+	const char *slash = NULL;
+	VDNamespaceGroup *pGroup = &root, *pGroupNext;
+
+	while(*pszNameLimit) {
+		if (*pszNameLimit++ == '/')
+			slash = pszNameLimit - 1;
+	}
+
+	if (fIsFilter)
+		pszNameLimit = slash;
+
+	while(pszName < pszNameLimit) {
+		VDNamespaceGroup *pGroupParent = pGroup;
+
+		pGroup = pGroup->listGroups.AtHead();
+
+		while(pGroupNext = pGroup->NextFromHead()) {
+			if (VDNamespaceCompare(pszName, pGroup->pszName))
+				break;
+
+			pGroup = pGroupNext;
+		}
+
+		if (!pGroupNext && fCreate) {
+			pGroupNext = pGroup = new VDNamespaceGroup(pszName, pGroupParent);
+
+			pGroupParent->listGroups.AddTail(pGroup);
+		}
+
+		// group not found?
+
+		if (!pGroupNext) {
+			return NULL;
+		}
+
+		// advance to next slash
+
+		while(*pszName && *pszName++!='/')
+			;
+	}
+
+	return pGroup;
+}
+
+void VDNamespace::clear() {
+	root.listGroups.dispose();
+	root.listItems.dispose();
+}
+
+void VDNamespace::add(const char *pszGroup, const char *pszName, const void *pDef) {
+	VDNamespaceGroup *pGroup = _lookupGroup(pszGroup, true, false);
+	
+	pGroup->listItems.AddTail(new VDNamespaceItem(pszName, pGroup, pDef));
+}
+
+const void *VDNamespace::lookup(const char *pszName) {
+	VDNamespaceGroup *pGroup = _lookupGroup(pszName, false, true);
+
+	if (!pGroup)
+		return NULL;
+
+	const char *pszNameBase = pszName;
+
+	while(*pszName++)
+		if (pszName[-1]=='/')
+			pszNameBase = pszName;
+
+	for(ListAlloc<VDNamespaceItem >::fwit it = pGroup->listItems.begin(); it; ++it)
+		if (!_stricmp(it->pszName, pszNameBase))
+			return it->object;
+
+	return NULL;
+}
+
+bool VDNamespace::enumerateGroups(const VDNamespaceGroup *pGroupRoot, tGroupEnumerator pEnum, void *pvData) {
+	VDNamespaceGroup *pGroup, *pGroupNext;
+
+	pGroup = (pGroupRoot ? pGroupRoot : &root)->listGroups.AtHead();
+	while(pGroupNext = pGroup->NextFromHead()) {
+		if (!pEnum(this, pGroup->pszName, pGroup, pvData))
+			return false;
+
+		pGroup = pGroupNext;
+	}
+
+	return true;
+}
+
+bool VDNamespace::enumerateItems(const VDNamespaceGroup *pGroupRoot, tItemEnumerator pEnum, void *pvData) {
+	VDNamespaceItem *pEntry, *pEntryNext;
+
+	pEntry = pGroupRoot->listItems.AtHead();
+	while(pEntryNext = pEntry->NextFromHead()) {
+		if (!pEnum(this, pEntry->pszName, pEntry->object, pvData))
+			return false;
+
+		pEntry = pEntryNext;
+	}
+
+	return true;
+}
+
+VDNamespaceItem *VDNamespace::_findItemByObject(const VDNamespaceGroup *pGroup, const void *pObj) {
+	for(ListAlloc<VDNamespaceItem>::fwit it=pGroup->listItems.begin(); it; ++it) {
+		if (it->object == pObj) {
+			return it;
+		}
+	}
+
+	for(ListAlloc<VDNamespaceGroup>::fwit it2=pGroup->listGroups.begin(); it2; ++it2) {
+		VDNamespaceItem *v;
+
+		if (v = _findItemByObject(it2, pObj))
+			return v;
+	}
+
+	return NULL;
+}
+
+bool VDNamespace::_getPathByItem(const VDNamespaceNode *pEntry, char *buf, int maxlen) {
+	if (!pEntry)
+		return false;
+
+	if (maxlen < (int)strlen(pEntry->pszName)+2)
+		return false;
+
+	if (pEntry->pParent && pEntry->pParent->pParent) {
+		if (!_getPathByItem(pEntry->pParent, buf, maxlen))
+			return false;
+
+		while(*buf)
+			++buf, --maxlen;
+
+		*buf++ = '/';
+	}
+
+	strcpy(buf, pEntry->pszName);
+
+	return true;
+}
+
+bool VDNamespace::getPathByItem(const void *pObj, char *buf, int maxlen) {
+	return _getPathByItem(_findItemByObject(&root, pObj), buf, maxlen);
+}
diff --git a/src/thirdparty/VirtualDub/system/source/VDScheduler.cpp b/src/thirdparty/VirtualDub/system/source/VDScheduler.cpp
new file mode 100644
index 000000000..cdfc97269
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/VDScheduler.cpp
@@ -0,0 +1,261 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/VDScheduler.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/error.h>
+#include <windows.h>
+
+VDScheduler::VDScheduler()
+	: mpErrorCB(NULL)
+	, pWakeupSignal(NULL)
+	, pParentSchedulerNode(NULL)
+	, mbExitThreads(false)
+{
+}
+
+VDScheduler::~VDScheduler() {
+}
+
+void VDScheduler::setSignal(VDSignal *pSignal) {
+	pWakeupSignal = pSignal;
+}
+
+void VDScheduler::setSchedulerNode(VDSchedulerNode *pSchedulerNode) {
+	pParentSchedulerNode = pSchedulerNode;
+}
+
+void VDScheduler::BeginShutdown() {
+	mbExitThreads = true;
+	Ping();
+}
+
+void VDScheduler::Repost(VDSchedulerNode *pNode, bool bReschedule) {
+	vdsynchronized(csScheduler) {
+		if (pNode->bCondemned) {
+			tSuspendList::iterator it(listSuspends.begin()), itEnd(listSuspends.end());
+
+			while(it!=itEnd) {
+				VDSchedulerSuspendNode *pSuspendNode = *it;
+
+				if (pSuspendNode->mpNode == pNode) {
+					it = listSuspends.erase(it);
+					pSuspendNode->mSignal.signal();
+				} else
+					++it;
+			}
+		} else {
+			pNode->bRunning = false;
+			if (bReschedule || pNode->bReschedule) {
+				pNode->bReschedule = false;
+				pNode->bReady = true;
+				listReady.push_back(pNode);
+			} else
+				listWaiting.push_back(pNode);
+		}
+	}
+}
+
+bool VDScheduler::Run() {
+	VDSchedulerNode *pNode = NULL;
+	vdsynchronized(csScheduler) {
+		if (!listReady.empty()) {
+			pNode = listReady.front();
+			listReady.pop_front();
+			pNode->bRunning = true;
+			pNode->bReady = false;
+		}
+	}
+
+	if (!pNode)
+		return false;
+
+	bool bReschedule;
+	try {
+		bReschedule = pNode->Service();
+	} catch(MyError& e) {
+		Repost(pNode, false);
+
+		vdsynchronized(csScheduler) {
+			if (mpErrorCB) {
+				if (!mpErrorCB->OnAsyncError(e))
+					throw;
+			}
+		}
+
+		return true;
+	} catch(...) {
+		Repost(pNode, false);
+		throw;
+	}
+
+	Repost(pNode, bReschedule);
+
+	return true;
+}
+
+bool VDScheduler::IdleWait() {
+	if (mbExitThreads)
+		return false;
+
+	if (pWakeupSignal) {
+#if 0
+		while(WAIT_TIMEOUT == WaitForSingleObject(pWakeupSignal->getHandle(), 1000))
+			DumpStatus();
+#else
+		pWakeupSignal->wait();
+#endif
+	}
+
+	return true;
+}
+
+void VDScheduler::Ping() {
+	if (pWakeupSignal)
+		pWakeupSignal->signal();
+}
+
+void VDScheduler::Lock() {
+	++csScheduler;
+}
+
+void VDScheduler::Unlock() {
+	--csScheduler;
+}
+
+void VDScheduler::Reschedule(VDSchedulerNode *pNode) {
+	VDCriticalSection::AutoLock lock(csScheduler);
+
+	RescheduleFast(pNode);
+}
+
+void VDScheduler::RescheduleFast(VDSchedulerNode *pNode) {
+	if (pNode->bReady)
+		return;
+
+	pNode->bReady = true;
+
+	if (pNode->bRunning)
+		pNode->bReschedule = true;
+	else {
+		if (pWakeupSignal)
+			pWakeupSignal->signal();
+
+		if (pParentSchedulerNode)
+			pParentSchedulerNode->Reschedule();
+
+		listWaiting.erase(pNode);
+		listReady.push_back(pNode);
+	}
+}
+
+void VDScheduler::Add(VDSchedulerNode *pNode) {
+	VDASSERT(pNode);
+
+	pNode->pScheduler = this;
+	pNode->bRunning = false;
+	pNode->bReschedule = false;
+	pNode->bReady = true;
+	pNode->bCondemned = false;
+
+	vdsynchronized(csScheduler) {
+		tNodeList::iterator it(listReady.begin()), itEnd(listReady.end());
+
+		while(it != itEnd && (*it)->nPriority <= pNode->nPriority)
+			++it;
+
+		listReady.insert(it, pNode);
+	}
+
+	if (pWakeupSignal)
+		pWakeupSignal->signal();
+
+	if (pParentSchedulerNode)
+		pParentSchedulerNode->Reschedule();
+}
+
+void VDScheduler::Remove(VDSchedulerNode *pNode) {
+	VDASSERT(pNode);
+
+	VDSchedulerSuspendNode suspendNode(pNode);
+	bool running = false;
+
+	vdsynchronized(csScheduler) {
+		pNode->bCondemned = true;
+		if (pNode->bRunning) {
+			running = true;
+			listSuspends.push_back(&suspendNode);
+		} else if (pNode->bReady)
+			listReady.erase(pNode);
+		else
+			listWaiting.erase(pNode);
+	}
+
+	if (running)
+		suspendNode.mSignal.wait();
+}
+
+void VDScheduler::DumpStatus() {
+	vdsynchronized(csScheduler) {
+		VDDEBUG2("\n    Waiting nodes:\n");
+		for(tNodeList::iterator it(listWaiting.begin()), itEnd(listWaiting.end()); it!=itEnd; ++it)
+			(*it)->DumpStatus();
+		VDDEBUG2("\n    Ready nodes:\n");
+		for(tNodeList::iterator it2(listReady.begin()), it2End(listReady.end()); it2!=it2End; ++it2)
+			(*it2)->DumpStatus();
+	}
+}
+
+void VDSchedulerNode::DumpStatus() {
+	VDDEBUG2("        anonymous %p\n", this);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDSchedulerThread::VDSchedulerThread()
+	: mpScheduler(NULL)
+{
+}
+
+VDSchedulerThread::~VDSchedulerThread() {
+}
+
+bool VDSchedulerThread::Start(VDScheduler *pScheduler) {
+	mpScheduler = pScheduler;
+	return VDThread::ThreadStart();
+}
+
+void VDSchedulerThread::ThreadRun() {
+	VDScheduler& scheduler = *mpScheduler;
+
+	do {
+		while(scheduler.Run())
+			;
+	} while(scheduler.IdleWait());
+
+	scheduler.Ping();
+}
diff --git a/src/thirdparty/VirtualDub/system/source/VDString.cpp b/src/thirdparty/VirtualDub/system/source/VDString.cpp
new file mode 100644
index 000000000..5877fadb5
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/VDString.cpp
@@ -0,0 +1,209 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vd2/system/VDString.h>
+#include <vd2/system/vdstl.h>
+
+const VDStringSpanA::value_type VDStringSpanA::sNull[1] = {0};
+
+void VDStringA::push_back_extend() {
+	VDASSERT(mpEOS == mpEnd);
+	size_type current_size = (size_type)(mpEnd - mpBegin);
+
+	reserve_slow(current_size * 2 + 1, current_size);
+}
+
+void VDStringA::resize_slow(size_type n, size_type current_size) {
+	resize_slow(n, current_size, 0);
+}
+
+void VDStringA::resize_slow(size_type n, size_type current_size, value_type c) {
+	VDASSERT(n > current_size);
+
+	size_type current_capacity = (size_type)(mpEOS - mpBegin);
+	if (n > current_capacity)
+		reserve_slow(n, current_capacity);
+
+	memset(mpBegin + current_size, c, n - current_size);
+	mpEnd = mpBegin + n;
+	*mpEnd = 0;
+}
+
+void VDStringA::reserve_slow(size_type n, size_type current_capacity) {
+	VDASSERT(n > current_capacity);
+
+	size_type current_size = (size_type)(mpEnd - mpBegin);
+	value_type *s = new value_type[n + 1];
+	memcpy(s, mpBegin, (current_size + 1) * sizeof(value_type));
+	if (mpBegin != sNull)
+		delete[] mpBegin;
+
+	mpBegin = s;
+	mpEnd = s + current_size;
+	mpEOS = s + n;
+}
+
+void VDStringA::reserve_amortized_slow(size_type n, size_type current_size, size_type current_capacity) {
+	n += current_size;
+
+	size_type doublesize = current_size * 2;
+	if (n < doublesize)
+		n = doublesize;
+
+	reserve_slow(n, current_capacity);
+}
+
+VDStringA& VDStringA::sprintf(const value_type *format, ...) {
+	clear();
+	va_list val;
+	va_start(val, format);
+	append_vsprintf(format, val);
+	va_end(val);
+	return *this;
+}
+
+VDStringA& VDStringA::append_sprintf(const value_type *format, ...) {
+	va_list val;
+	va_start(val, format);
+	append_vsprintf(format, val);
+	va_end(val);
+	return *this;
+}
+
+VDStringA& VDStringA::append_vsprintf(const value_type *format, va_list val) {
+	char buf[2048];
+
+	int len = _vsnprintf(buf, 2048, format, val);
+	if (len >= 0)
+		append(buf, buf+len);
+	else {
+		int len;
+
+		vdfastvector<char> tmp;
+		for(int siz = 8192; siz <= 65536; siz += siz) {
+			tmp.resize(siz);
+
+			char *tmpp = tmp.data();
+			len = _vsnprintf(tmp.data(), siz, format, val);
+			if (len >= 0) {
+				append(tmpp, tmpp+len);
+				break;
+			}
+		}
+	}
+
+	return *this;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+const VDStringSpanW::value_type VDStringSpanW::sNull[1] = {0};
+
+void VDStringW::push_back_extend() {
+	VDASSERT(mpEOS == mpEnd);
+	size_type current_size = (size_type)(mpEnd - mpBegin);
+
+	reserve_slow(current_size * 2 + 1, current_size);
+}
+
+void VDStringW::resize_slow(size_type n, size_type current_size) {
+	VDASSERT(n > current_size);
+
+	size_type current_capacity = (size_type)(mpEOS - mpBegin);
+	if (n > current_capacity)
+		reserve_slow(n, current_capacity);
+
+	mpEnd = mpBegin + n;
+	*mpEnd = 0;
+}
+
+void VDStringW::reserve_slow(size_type n, size_type current_capacity) {
+	VDASSERT(current_capacity == (size_type)(mpEOS - mpBegin));
+	VDASSERT(n > current_capacity);
+
+	size_type current_size = (size_type)(mpEnd - mpBegin);
+	value_type *s = new value_type[n + 1];
+	memcpy(s, mpBegin, (current_size + 1) * sizeof(value_type));
+	if (mpBegin != sNull)
+		delete[] mpBegin;
+
+	mpBegin = s;
+	mpEnd = s + current_size;
+	mpEOS = s + n;
+}
+
+void VDStringW::reserve_amortized_slow(size_type n, size_type current_size, size_type current_capacity) {
+	n += current_size;
+
+	size_type doublesize = current_size * 2;
+	if (n < doublesize)
+		n = doublesize;
+
+	reserve_slow(n, current_capacity);
+}
+
+VDStringW& VDStringW::sprintf(const value_type *format, ...) {
+	clear();
+	va_list val;
+	va_start(val, format);
+	append_vsprintf(format, val);
+	va_end(val);
+	return *this;
+}
+
+VDStringW& VDStringW::append_sprintf(const value_type *format, ...) {
+	va_list val;
+	va_start(val, format);
+	append_vsprintf(format, val);
+	va_end(val);
+	return *this;
+}
+
+VDStringW& VDStringW::append_vsprintf(const value_type *format, va_list val) {
+	wchar_t buf[1024];
+
+	int len = vswprintf(buf, 1024, format, val);
+	if (len >= 0)
+		append(buf, buf+len);
+	else {
+		int len;
+
+		vdfastvector<wchar_t> tmp;
+		for(int siz = 4096; siz <= 65536; siz += siz) {
+			tmp.resize(siz);
+
+			wchar_t *tmpp = tmp.data();
+			len = vswprintf(tmpp, siz, format, val);
+			if (len >= 0) {
+				append(tmpp, tmpp+len);
+				break;
+			}
+		}
+	}
+
+	va_end(val);
+	return *this;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/a64_fraction.asm b/src/thirdparty/VirtualDub/system/source/a64_fraction.asm
new file mode 100644
index 000000000..43b0baddf
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/a64_fraction.asm
@@ -0,0 +1,58 @@
+;	VirtualDub - Video processing and capture application
+;	System library component
+;	Copyright (C) 1998-2006 Avery Lee, All Rights Reserved.
+;
+;	Beginning with 1.6.0, the VirtualDub system library is licensed
+;	differently than the remainder of VirtualDub.  This particular file is
+;	thus licensed as follows (the "zlib" license):
+;
+;	This software is provided 'as-is', without any express or implied
+;	warranty.  In no event will the authors be held liable for any
+;	damages arising from the use of this software.
+;
+;	Permission is granted to anyone to use this software for any purpose,
+;	including commercial applications, and to alter it and redistribute it
+;	freely, subject to the following restrictions:
+;
+;	1.	The origin of this software must not be misrepresented; you must
+;		not claim that you wrote the original software. If you use this
+;		software in a product, an acknowledgment in the product
+;		documentation would be appreciated but is not required.
+;	2.	Altered source versions must be plainly marked as such, and must
+;		not be misrepresented as being the original software.
+;	3.	This notice may not be removed or altered from any source
+;		distribution.
+
+		segment	.text
+
+;--------------------------------------------------------------------------
+; VDFractionScale64(
+;		[rcx] uint64 a,
+;		[rdx] uint64 b,
+;		[r8]  uint64 c,
+;		[r9]  uint32& remainder);
+;					
+;
+		global	VDFractionScale64
+VDFractionScale64:
+		mov		rax, rcx
+		mul		rdx
+		div		r8
+		mov		[r9], edx
+		ret
+
+;--------------------------------------------------------------------------
+; VDUMulDiv64x32(
+;		[rcx] uint64 a,
+;		[rdx] uint64 b,
+;		[r8]  uint64 c);
+;					
+;
+		global	VDUMulDiv64x32
+VDUMulDiv64x32:
+		mov		rax, rcx
+		mul		rdx
+		div		r8
+		ret
+
+		end
diff --git a/src/thirdparty/VirtualDub/system/source/a64_int128.asm b/src/thirdparty/VirtualDub/system/source/a64_int128.asm
new file mode 100644
index 000000000..706e298f6
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/a64_int128.asm
@@ -0,0 +1,73 @@
+;	VirtualDub - Video processing and capture application
+;	System library component
+;	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+;
+;	Beginning with 1.6.0, the VirtualDub system library is licensed
+;	differently than the remainder of VirtualDub.  This particular file is
+;	thus licensed as follows (the "zlib" license):
+;
+;	This software is provided 'as-is', without any express or implied
+;	warranty.  In no event will the authors be held liable for any
+;	damages arising from the use of this software.
+;
+;	Permission is granted to anyone to use this software for any purpose,
+;	including commercial applications, and to alter it and redistribute it
+;	freely, subject to the following restrictions:
+;
+;	1.	The origin of this software must not be misrepresented; you must
+;		not claim that you wrote the original software. If you use this
+;		software in a product, an acknowledgment in the product
+;		documentation would be appreciated but is not required.
+;	2.	Altered source versions must be plainly marked as such, and must
+;		not be misrepresented as being the original software.
+;	3.	This notice may not be removed or altered from any source
+;		distribution.
+
+		segment	.text
+
+		global	vdasm_uint128_add
+vdasm_uint128_add:
+		mov		rax, [rdx]
+		add		rax, [r8]
+		mov		[rcx], rax
+		mov		rax, [rdx+8]
+		adc		rax, [r8+8]
+		mov		[rcx+8], rax
+		ret
+
+		global	vdasm_uint128_sub
+vdasm_uint128_sub:
+		mov		rax, [rdx]
+		sub		rax, [r8]
+		mov		[rcx], rax
+		mov		rax, [rdx+8]
+		sbb		rax, [r8+8]
+		mov		[rcx+8], rax
+		ret
+
+proc_frame vdasm_uint128_mul
+		mov		[esp+8], rbx
+		[savereg	rbx, 8]
+		mov		[esp+16], rsi
+		[savereg	rsi, 16]
+end_prolog
+
+		mov		rbx, rdx			;rbx = src1
+		mov		rax, [rdx]			;rax = src1a
+		mov		rsi, [r8]			;rsi = src2a
+		mul		rsi					;rdx:rax = src1a*src2a
+		mov		[rcx], rax			;write low result
+		mov		r9, rdx				;r9 = (src1a*src2a).hi
+		mov		rax, [rbx+8]		;rax = src1b
+		mul		rsi					;rdx:rax = src1b*src2a
+		add		r9, rax				;r9 = (src1a*src2a).hi + (src1b*src2a).lo
+		mov		rax, [rbx]			;rax = src1a
+		mul		qword [r8+8]	;rdx:rax = src1a*src2b
+		add		rax, r9				;rax = (src1a*src2b).lo + (src1b*src2a).lo + (src1a*src2a).hi
+		mov		[rcx+8], rax		;write high result
+		mov		rsi, [esp+16]
+		mov		rbx, [esp+8]
+		ret
+endproc_frame
+
+		end
diff --git a/src/thirdparty/VirtualDub/system/source/a64_thunk.asm b/src/thirdparty/VirtualDub/system/source/a64_thunk.asm
new file mode 100644
index 000000000..b9e09e1e8
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/a64_thunk.asm
@@ -0,0 +1,58 @@
+	segment	.text
+		
+		global	VDMethodToFunctionThunk64
+proc_frame	VDMethodToFunctionThunk64
+		;prolog
+		db			48h				;emit REX prefix -- first instruction must be two bytes for hot patching
+		push		rbp
+		[pushreg	rbp]
+		
+		mov			rbp, rsp		;create stack pointer
+		[setframe	rbp, 0]
+		
+		mov			[rbp+16], rcx	;save arg1
+		[savereg	rcx, 0]
+		
+		mov			[rbp+24], rdx	;save arg2
+		[savereg	rcx, 8]
+
+		mov			[rbp+32], r8	;save arg3
+		[savereg	rcx, 16]
+
+		mov			[rbp+40], r9	;save arg4
+		[savereg	rcx, 24]
+		
+end_prolog
+				
+		;re-copy arguments 4 and up
+		mov			ecx, [rax+24]
+		or			ecx, ecx
+		jz			.argsdone
+		lea			rdx, [rcx+48-8]
+.argsloop:
+		push		qword [rsp+rdx]
+		sub			ecx, 8
+		jnz			.argsloop
+.argsdone:
+		
+		;load 'this' pointer
+		mov			rcx, [rax+16]
+		
+		;reload arguments 1-3
+		mov			rdx, [rbp+16]
+		mov			r8, [rbp+24]
+		mov			r9, [rbp+32]
+		
+		;reserve argument 1-4 space on stack
+		sub			rsp, 32
+		
+		;call function
+		call		qword [rax+8]
+		
+		;epilog
+		lea			rsp, [rbp]		;pop off stack frame and any additional arg space
+		pop			rbp				;restore base pointer
+		ret							;all done
+endproc_frame
+
+		end
diff --git a/src/thirdparty/VirtualDub/system/source/a_memory.asm b/src/thirdparty/VirtualDub/system/source/a_memory.asm
new file mode 100644
index 000000000..e4b6cac8b
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/a_memory.asm
@@ -0,0 +1,135 @@
+;	VirtualDub - Video processing and capture application
+;	System library component
+;	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+;
+;	Beginning with 1.6.0, the VirtualDub system library is licensed
+;	differently than the remainder of VirtualDub.  This particular file is
+;	thus licensed as follows (the "zlib" license):
+;
+;	This software is provided 'as-is', without any express or implied
+;	warranty.  In no event will the authors be held liable for any
+;	damages arising from the use of this software.
+;
+;	Permission is granted to anyone to use this software for any purpose,
+;	including commercial applications, and to alter it and redistribute it
+;	freely, subject to the following restrictions:
+;
+;	1.	The origin of this software must not be misrepresented; you must
+;		not claim that you wrote the original software. If you use this
+;		software in a product, an acknowledgment in the product
+;		documentation would be appreciated but is not required.
+;	2.	Altered source versions must be plainly marked as such, and must
+;		not be misrepresented as being the original software.
+;	3.	This notice may not be removed or altered from any source
+;		distribution.
+
+		segment	.text
+
+	global	_VDFastMemcpyPartialScalarAligned8
+_VDFastMemcpyPartialScalarAligned8:
+		mov		eax, [esp+12]
+		mov		edx, [esp+4]
+		mov		ecx, [esp+8]
+		add		ecx, eax
+		add		edx, eax
+		neg		eax
+		jz		.nobytes
+		add		eax, 8
+		jz		.doodd
+		jmp		short .xloop
+		align	16
+.xloop:
+		fild	qword [ecx+eax-8]
+		fild	qword [ecx+eax]
+		fxch
+		fistp	qword [edx+eax-8]
+		fistp	qword [edx+eax]
+		add		eax,16
+		jnc		.xloop
+		jnz		.nobytes
+.doodd:
+		fild	qword [ecx-8]
+		fistp	qword [edx-8]
+.nobytes:
+		ret
+
+	global	_VDFastMemcpyPartialMMX
+_VDFastMemcpyPartialMMX:
+		push	edi
+		push	esi
+
+		mov		edi, [esp+4+8]
+		mov		esi, [esp+8+8]
+		mov		ecx, [esp+12+8]
+		mov		edx, ecx
+		shr		ecx, 2
+		and		edx, 3
+		rep		movsd
+		mov		ecx, edx
+		rep		movsb
+		pop		esi
+		pop		edi
+		ret
+
+	global	_VDFastMemcpyPartialMMX2
+_VDFastMemcpyPartialMMX2:
+		push	ebp
+		push	edi
+		push	esi
+		push	ebx
+
+		mov		ebx, [esp+4+16]
+		mov		edx, [esp+8+16]
+		mov		eax, [esp+12+16]
+		neg		eax
+		add		eax, 63
+		jbe		.skipblastloop
+.blastloop:
+		movq	mm0, [edx]
+		movq	mm1, [edx+8]
+		movq	mm2, [edx+16]
+		movq	mm3, [edx+24]
+		movq	mm4, [edx+32]
+		movq	mm5, [edx+40]
+		movq	mm6, [edx+48]
+		movq	mm7, [edx+56]
+		movntq	[ebx], mm0
+		movntq	[ebx+8], mm1
+		movntq	[ebx+16], mm2
+		movntq	[ebx+24], mm3
+		movntq	[ebx+32], mm4
+		movntq	[ebx+40], mm5
+		movntq	[ebx+48], mm6
+		movntq	[ebx+56], mm7
+		add		ebx, 64
+		add		edx, 64
+		add		eax, 64
+		jnc		.blastloop
+.skipblastloop:
+		sub		eax, 63-7
+		jns		.noextras
+.quadloop:
+		movq	mm0, [edx]
+		movntq	[ebx], mm0
+		add		edx, 8
+		add		ebx, 8
+		add		eax, 8
+		jnc		.quadloop
+.noextras:
+		sub		eax, 7
+		jz		.nooddballs
+		mov		ecx, eax
+		neg		ecx
+		mov		esi, edx
+		mov		edi, ebx
+		rep		movsb
+.nooddballs:
+		pop		ebx
+		pop		esi
+		pop		edi
+		pop		ebp
+		ret
+
+
+		end
+
diff --git a/src/thirdparty/VirtualDub/system/source/a_thunk.asm b/src/thirdparty/VirtualDub/system/source/a_thunk.asm
new file mode 100644
index 000000000..5dcdecbbe
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/a_thunk.asm
@@ -0,0 +1,63 @@
+		segment	.text
+		
+		align		16
+		global		_VDMethodToFunctionThunk32		
+_VDMethodToFunctionThunk32:
+		pop			eax					;get return address in thunk
+		
+		;re-copy arguments
+		movzx		ecx, byte [eax+1]
+		mov			edx, ecx
+argsloop:
+		push		dword [esp+edx]
+		sub			ecx, 4
+		jnz			argsloop
+
+		push		eax					;replace thunk return address
+		
+		mov			ecx, [eax+7]		;load 'this' pointer
+		jmp			dword [eax+3]	;tail-call function
+
+		align		16
+		global		_VDMethodToFunctionThunk32_4
+_VDMethodToFunctionThunk32_4:
+		pop			eax					;get return address in thunk
+		push		dword [esp+4]		;replicate 1st argument
+		push		eax					;replace thunk return address
+		mov			ecx, [eax+7]		;load 'this' pointer
+		jmp			dword [eax+3]		;tail-call function
+
+		align		16
+		global		_VDMethodToFunctionThunk32_8
+_VDMethodToFunctionThunk32_8:
+		pop			eax					;get return address in thunk
+		push		dword [esp+8]		;replicate 2nd argument
+		push		dword [esp+8]		;replicate 1st argument
+		push		eax					;replace thunk return address
+		mov			ecx, [eax+7]		;load 'this' pointer
+		jmp			dword [eax+3]		;tail-call function
+
+		align		16
+		global		_VDMethodToFunctionThunk32_12
+_VDMethodToFunctionThunk32_12:
+		pop			eax					;get return address in thunk
+		push		dword [esp+12]		;replicate 3rd argument
+		push		dword [esp+12]		;replicate 2nd argument
+		push		dword [esp+12]		;replicate 1st argument
+		push		eax					;replace thunk return address
+		mov			ecx, [eax+7]		;load 'this' pointer
+		jmp			dword [eax+3]		;tail-call function
+
+		align		16
+		global		_VDMethodToFunctionThunk32_16
+_VDMethodToFunctionThunk32_16:
+		pop			eax					;get return address in thunk
+		push		dword [esp+16]		;replicate 4th argument
+		push		dword [esp+16]		;replicate 3rd argument
+		push		dword [esp+16]		;replicate 2nd argument
+		push		dword [esp+16]		;replicate 1st argument
+		push		eax					;replace thunk return address
+		mov			ecx, [eax+7]		;load 'this' pointer
+		jmp			dword [eax+3]		;tail-call function
+
+		end
diff --git a/src/thirdparty/VirtualDub/system/source/cache.cpp b/src/thirdparty/VirtualDub/system/source/cache.cpp
new file mode 100644
index 000000000..5da77d089
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/cache.cpp
@@ -0,0 +1,422 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2005 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vd2/system/cache.h>
+
+///////////////////////////////////////////////////////////////////////////
+
+VDCache::VDCache(IVDCacheAllocator *pAllocator)
+	: mpAllocator(pAllocator)
+	, mObjectCount(0)
+	, mObjectLimit(16)
+{
+}
+
+VDCache::~VDCache() {
+	Shutdown();
+}
+
+void VDCache::Shutdown() {
+	for(int i=0; i<kVDCacheStateCount; ++i) {
+		ObjectList& ol = mLists[i];
+
+		while(!ol.empty()) {
+			VDCachedObject *pObject = static_cast<VDCachedObject *>(ol.back());
+			ol.pop_back();
+
+			pObject->OnCacheEvict();
+			pObject->SetCache(NULL);	// will release object
+
+			if (i != kVDCacheStateFree) {
+				VDASSERT((int)--mObjectCount >= 0);
+			}
+		}
+	}
+}
+
+int VDCache::GetStateCount(int state) {
+	vdsynchronized(mLock) {
+		return mLists[state].size();
+	}
+}
+
+void VDCache::DumpListStatus(int state) {
+	vdsynchronized(mLock) {
+		ObjectList& ol = mLists[state];
+
+		for(ObjectList::iterator it(ol.begin()), itEnd(ol.end()); it!=itEnd; ++it) {
+			VDCachedObject *pObj = static_cast<VDCachedObject *>(*it);
+
+			pObj->DumpStatus();
+		}
+	}
+}
+
+VDCachedObject *VDCache::Allocate(sint64 key) {
+	VDCachedObject *pObj = NULL;
+
+	vdsynchronized(mLock) {
+		if (mObjectCount >= mObjectLimit - 1)
+			Evict(mObjectLimit - 1);
+
+		ObjectList& fl = mLists[kVDCacheStateFree];
+		ObjectList& pl = mLists[kVDCacheStatePending];
+
+		if (fl.empty()) {
+			VDCachedObject *pNewObject = mpAllocator->OnCacheAllocate();
+
+			pNewObject->SetCache(this);
+			pNewObject->SetState(kVDCacheStateFree);
+
+			fl.push_front(pNewObject);
+		}
+
+		++mObjectCount;
+
+		pObj = static_cast<VDCachedObject *>(fl.front());
+		VDASSERT(pObj->GetState() == kVDCacheStateFree);
+		pObj->AddRef();
+		pObj->SetState(kVDCacheStatePending);
+		pObj->mHashKey = key;
+		pl.splice(pl.begin(), fl, fl.fast_find(pObj));
+		mHash.insert(pObj);
+	}
+
+	return pObj;
+}
+
+VDCachedObject *VDCache::Create(sint64 key, bool& is_new) {
+	// The pending, ready, active, and complete lists are eligible for lookup.
+	// The free and aborted lists are not.
+
+	VDCachedObject *pObj = NULL;
+
+	is_new = false;
+
+	vdsynchronized(mLock) {
+		pObj = static_cast<VDCachedObject *>(mHash[key]);
+
+		if (pObj) {
+			pObj->AddRef();
+
+			VDASSERT(pObj->GetState() != kVDCacheStateFree);
+
+			if (pObj->GetState() == kVDCacheStateIdle) {
+				pObj->SetState(kVDCacheStateComplete);
+
+				ObjectList& il = mLists[kVDCacheStateIdle];
+				ObjectList& cl = mLists[kVDCacheStateComplete];
+
+				cl.splice(cl.begin(), il, il.fast_find(pObj));
+			}
+		}
+
+		if (!pObj) {
+			is_new = true;
+			pObj = Allocate(key);
+		}
+	}
+
+	return pObj;
+}
+
+void VDCache::Evict(uint32 level) {
+	if (mObjectCount <= level)
+		return;
+
+	int maxfree = mObjectCount - level;
+
+	ObjectList& il = mLists[kVDCacheStateIdle];
+	ObjectList& al = mLists[kVDCacheStateAborting];
+
+	while(maxfree-- > 0 && mObjectCount >= level && !il.empty()) {
+		VDCachedObject *pObject = static_cast<VDCachedObject *>(il.back());
+		VDASSERT(pObject->GetState() == kVDCacheStateIdle);
+		
+		pObject->SetState(kVDCacheStateAborting);
+		al.splice(al.begin(), il, pObject);
+
+		pObject->WeakAddRef();
+
+		mLock.Unlock();
+
+		pObject->OnCacheEvict();
+		pObject->WeakRelease();			// Will move to free list.
+
+		mLock.Lock();
+	}
+}
+
+void VDCache::NotifyFree(VDCachedObject *pObject) {
+	vdsynchronized(mLock) {
+		int rc = pObject->GetRefCount();
+
+		// This check is required because it is possible for a call to
+		// Allocate() to sneak in before we acquire the lock.
+		if (rc < 0x10000) {
+			VDCacheState oldState = pObject->GetState();
+			VDCacheState newState = oldState;
+
+			if (rc & 0xfffe)
+				newState = kVDCacheStateAborting;
+			else if (pObject->IsValid())
+				newState = kVDCacheStateIdle;
+			else {
+				VDVERIFY((int)--mObjectCount >= 0);
+				newState = kVDCacheStateFree;
+				mHash.erase(pObject);
+			}
+
+			if (newState != oldState) {
+				pObject->SetState(newState);
+
+				ObjectList& nl = mLists[newState];
+				ObjectList& ol = mLists[oldState];
+				nl.splice(nl.begin(), ol, ol.fast_find(pObject));
+			}
+
+			if (oldState == kVDCacheStatePending || oldState == kVDCacheStateReady)
+				pObject->OnCacheAbortPending();
+		}
+	}
+}
+
+void VDCache::Schedule(VDCachedObject *pObject) {
+	vdsynchronized(mLock) {
+		VDCacheState oldState = pObject->GetState();
+
+		VDASSERT(oldState == kVDCacheStatePending || oldState == kVDCacheStateActive);
+
+		ObjectList& ol = mLists[oldState];
+		ObjectList& nl = mLists[kVDCacheStateReady];
+
+		nl.splice(nl.back(), ol, ol.fast_find(pObject));
+		pObject->SetState(kVDCacheStateReady);
+	}
+}
+
+VDCachedObject *VDCache::GetNextReady() {
+	VDCachedObject *pObject = NULL;
+
+	vdsynchronized(mLock) {
+		ObjectList& rl = mLists[kVDCacheStateReady];
+		ObjectList& al = mLists[kVDCacheStateActive];
+
+		if (!rl.empty()) {
+			pObject = static_cast<VDCachedObject *>(rl.front());
+			VDASSERT(pObject->GetState() == kVDCacheStateReady);
+
+			al.splice(al.end(), rl, rl.begin());
+
+			pObject->SetState(kVDCacheStateActive);
+			pObject->AddRef();
+		}
+	}
+
+	return pObject;
+}
+
+void VDCache::MarkCompleted(VDCachedObject *pObject) {
+	vdsynchronized(mLock) {
+		VDCacheState oldState = pObject->GetState();
+		VDASSERT(oldState == kVDCacheStatePending || oldState == kVDCacheStateActive);
+
+		ObjectList& al = mLists[oldState];
+		ObjectList& cl = mLists[kVDCacheStateComplete];
+
+		if (!al.empty()) {
+			cl.splice(cl.end(), al, al.fast_find(pObject));
+
+			pObject = static_cast<VDCachedObject *>(cl.back());
+			pObject->SetState(kVDCacheStateComplete);
+		}
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDCachedObject::VDCachedObject()
+	: mRefCount(0)
+	, mpCache(NULL)
+{
+}
+
+int VDCachedObject::AddRef() {
+	int rv = (mRefCount += 0x10000);
+
+	return rv >> 16;
+}
+
+int VDCachedObject::Release() {
+	int rv = (mRefCount -= 0x10000);
+
+	VDASSERT(rv >= 0);
+
+	if (rv < 0x10000) {
+		if (!rv)
+			delete this;
+		else if (mpCache)
+			mpCache->NotifyFree(this);
+	}
+
+	return rv >> 16;
+}
+
+void VDCachedObject::WeakAddRef() {
+	mRefCount += 2;
+}
+
+void VDCachedObject::WeakRelease() {
+	int rv = (mRefCount -= 2);
+
+	VDASSERT((rv & 0xffff) < 0x8000);
+
+	if (rv < 2) {
+		if (!rv)
+			delete this;
+		else
+			mpCache->NotifyFree(this);
+	}
+}
+
+void VDCachedObject::SetCache(VDCache *pCache) {
+	mpCache = pCache;
+	if (pCache)
+		++mRefCount;
+	else {
+		if (!--mRefCount)
+			delete this;
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDPool::VDPool(IVDPoolAllocator *pAllocator)
+	: mpAllocator(pAllocator)
+	, mObjectCount(0)
+	, mObjectLimit(16)
+{
+}
+
+VDPool::~VDPool() {
+	Shutdown();
+}
+
+void VDPool::Shutdown() {
+	for(int i=0; i<kVDPoolStateCount; ++i) {
+		ObjectList& ol = mLists[i];
+
+		while(!ol.empty()) {
+			VDPooledObject *pObject = static_cast<VDPooledObject *>(ol.back());
+			ol.pop_back();
+
+			pObject->SetPool(NULL);	// will release object
+
+			VDASSERT((int)--mObjectCount >= 0);
+		}
+	}
+}
+
+VDPooledObject *VDPool::Allocate() {
+	VDPooledObject *pObj = NULL;
+
+	vdsynchronized(mLock) {
+		ObjectList& fl = mLists[kVDPoolStateFree];
+		ObjectList& pl = mLists[kVDPoolStateActive];
+
+		if (fl.empty()) {
+			VDPooledObject *pNewObject = mpAllocator->OnPoolAllocate();
+
+			pNewObject->SetPool(this);
+
+			fl.push_front(pNewObject);
+			++mObjectCount;
+		}
+
+		pObj = static_cast<VDPooledObject *>(fl.front());
+		pObj->AddRef();
+		pl.splice(pl.begin(), fl, fl.fast_find(pObj));
+	}
+
+	return pObj;
+}
+
+void VDPool::NotifyFree(VDPooledObject *pObject) {
+	vdsynchronized(mLock) {
+		// This check is required because it is possible for a call to
+		// Allocate() to sneak in before we acquire the lock.
+
+		if (pObject->GetRefCount() < 2) {
+			VDPoolState oldState = kVDPoolStateActive;
+			VDPoolState newState = kVDPoolStateFree;
+
+			mLists[kVDPoolStateActive].erase(pObject);
+
+			if (mObjectCount > mObjectLimit) {
+				delete pObject;
+				--mObjectCount;
+			} else
+				mLists[kVDPoolStateFree].push_back(pObject);
+		}
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDPooledObject::VDPooledObject()
+	: mRefCount(0)
+	, mpPool(NULL)
+{
+}
+
+int VDPooledObject::AddRef() {
+	return (mRefCount += 2) >> 1;
+}
+
+int VDPooledObject::Release() {
+	int rv = (mRefCount -= 2);
+
+	VDASSERT(rv >= 0);
+
+	if (rv < 2) {
+		if (!rv)
+			delete this;
+		else if (mpPool)
+			mpPool->NotifyFree(this);
+	}
+
+	return rv >> 1;
+}
+
+void VDPooledObject::SetPool(VDPool *pPool) {
+	mpPool = pPool;
+	if (pPool)
+		++mRefCount;
+	else {
+		if (!--mRefCount)
+			delete this;
+	}
+}
diff --git a/src/thirdparty/VirtualDub/system/source/cmdline.cpp b/src/thirdparty/VirtualDub/system/source/cmdline.cpp
new file mode 100644
index 000000000..2bd1cbe42
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/cmdline.cpp
@@ -0,0 +1,178 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2005 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vd2/system/cmdline.h>
+
+VDCommandLine::VDCommandLine() {
+}
+
+VDCommandLine::VDCommandLine(const wchar_t *s) {
+	Init(s);
+}
+
+VDCommandLine::~VDCommandLine() {
+}
+
+void VDCommandLine::Init(const wchar_t *s) {
+	for(;;) {
+		while(iswspace(*s))
+			++s;
+
+		if (!*s)
+			break;
+
+		Token te = { (int)mLine.size(), *s == L'/', *s == L'"' };
+
+		if (te.mbIsSwitch) {
+			mLine.push_back(L'/');
+			++s;
+		}
+
+		mTokens.push_back(te);
+
+		// special case for /?
+		if (te.mbIsSwitch && *s == L'?') {
+			mLine.push_back(L'?');
+			++s;
+		}
+
+		while(*s && *s != L' ' && *s != L'/') {
+			if (te.mbIsSwitch) {
+				if (!isalnum((unsigned char)*s))
+					break;
+
+				mLine.push_back(*s++);
+			} else if (*s == L'"') {
+				++s;
+				while(*s && *s != L'"')
+					mLine.push_back(*s++);
+
+				if (*s) {
+					++s;
+
+					if (*s == ',') {
+						++s;
+						break;
+					}
+				}
+			} else
+				mLine.push_back(*s++);
+		}
+
+		mLine.push_back(0);
+	}
+}
+
+uint32 VDCommandLine::GetCount() const {
+	return mTokens.size();
+}
+
+const wchar_t *VDCommandLine::operator[](int index) const {
+	return (uint32)index < mTokens.size() ? mLine.data() + mTokens[index].mTokenIndex : NULL;
+}
+
+bool VDCommandLine::GetNextArgument(VDCommandLineIterator& it, const wchar_t *& token, bool& isSwitch) const {
+	int count = (int)mTokens.size();
+
+	if (it.mIndex >= count)
+		return false;
+
+	token = mLine.data() + mTokens[it.mIndex].mTokenIndex;
+	isSwitch = mTokens[it.mIndex].mbIsSwitch;
+
+	++it.mIndex;
+	return true;
+}
+
+bool VDCommandLine::GetNextNonSwitchArgument(VDCommandLineIterator& it, const wchar_t *& token) const {
+	int count = (int)mTokens.size();
+
+	if (it.mIndex >= count)
+		return false;
+
+	if (mTokens[it.mIndex].mbIsSwitch)
+		return false;
+
+	token = mLine.data() + mTokens[it.mIndex++].mTokenIndex;
+	return true;
+}
+
+bool VDCommandLine::GetNextSwitchArgument(VDCommandLineIterator& it, const wchar_t *& token) const {
+	int count = (int)mTokens.size();
+
+	if (it.mIndex >= count)
+		return false;
+
+	if (!mTokens[it.mIndex].mbIsSwitch)
+		return false;
+
+	token = mLine.data() + mTokens[it.mIndex++].mTokenIndex;
+	return true;
+}
+
+bool VDCommandLine::FindAndRemoveSwitch(const wchar_t *name) {
+	int count = (int)mTokens.size();
+
+	for(int i=1; i<count; ++i) {
+		if (mTokens[i].mbIsSwitch && !_wcsicmp(name, mLine.data() + mTokens[i].mTokenIndex + 1)) {
+			mTokens.erase(mTokens.begin() + i);
+			return true;
+		}
+	}
+
+	return false;
+}
+
+bool VDCommandLine::FindAndRemoveSwitch(const wchar_t *name, const wchar_t *& token) {
+	int count = (int)mTokens.size();
+	size_t namelen = wcslen(name);
+
+	for(int i=1; i<count; ++i) {
+		if (!mTokens[i].mbIsSwitch)
+			continue;
+		
+		const wchar_t *s = mLine.data() + mTokens[i].mTokenIndex + 1;
+
+		if (!_wcsnicmp(name, s, namelen)) {
+			token = s+namelen;
+
+			switch(*token) {
+				case L':':
+					++token;
+					break;
+				case 0:
+					break;
+				default:
+					continue;
+			}
+
+			mTokens.erase(mTokens.begin() + i);
+			return true;
+		}
+	}
+
+	return false;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/cpuaccel.cpp b/src/thirdparty/VirtualDub/system/source/cpuaccel.cpp
new file mode 100644
index 000000000..eb326e9ae
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/cpuaccel.cpp
@@ -0,0 +1,251 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <wtypes.h>
+#include <winnt.h>
+#include <intrin.h>
+#include <vd2/system/cpuaccel.h>
+
+static long g_lCPUExtensionsEnabled;
+static long g_lCPUExtensionsAvailable;
+
+extern "C" {
+	bool FPU_enabled, MMX_enabled, SSE_enabled, ISSE_enabled, SSE2_enabled;
+};
+
+
+#ifdef _M_AMD64
+
+	long CPUCheckForExtensions() {
+		long flags = CPUF_SUPPORTS_FPU;
+
+		// This code used to use IsProcessorFeaturePresent(), but this function is somewhat
+		// suboptimal in Win64 -- for one thing, it doesn't return true for MMX, at least
+		// on Vista 64.
+
+		// check for SSE3, SSSE3, SSE4.1
+		int cpuInfo[4];
+		__cpuid(cpuInfo, 1);
+
+		if (cpuInfo[3] & (1 << 23))
+			flags |= CPUF_SUPPORTS_MMX;
+
+		if (cpuInfo[3] & (1 << 25))
+			flags |= CPUF_SUPPORTS_SSE | CPUF_SUPPORTS_INTEGER_SSE;
+
+		if (cpuInfo[3] & (1 << 26))
+			flags |= CPUF_SUPPORTS_SSE2;
+
+		if (cpuInfo[2] & 0x00000001)
+			flags |= CPUF_SUPPORTS_SSE3;
+
+		if (cpuInfo[2] & 0x00000200)
+			flags |= CPUF_SUPPORTS_SSSE3;
+
+		if (cpuInfo[2] & 0x00080000)
+			flags |= CPUF_SUPPORTS_SSE41;
+
+		// check for 3DNow!, 3DNow! extensions
+		__cpuid(cpuInfo, 0x80000000);
+		if (cpuInfo[0] >= 0x80000001) {
+			__cpuid(cpuInfo, 0x80000001);
+
+			if (cpuInfo[3] & (1 << 31))
+				flags |= CPUF_SUPPORTS_3DNOW;
+
+			if (cpuInfo[3] & (1 << 30))
+				flags |= CPUF_SUPPORTS_3DNOW_EXT;
+
+			if (cpuInfo[3] & (1 << 22))
+				flags |= CPUF_SUPPORTS_INTEGER_SSE;
+		}
+
+		return flags;
+	}
+
+#else
+
+	// This is ridiculous.
+
+	static long CPUCheckForSSESupport() {
+		__try {
+	//		__asm andps xmm0,xmm0
+
+			__asm _emit 0x0f
+			__asm _emit 0x54
+			__asm _emit 0xc0
+
+		} __except(EXCEPTION_EXECUTE_HANDLER) {
+			if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION)
+				g_lCPUExtensionsAvailable &= ~(CPUF_SUPPORTS_SSE|CPUF_SUPPORTS_SSE2|CPUF_SUPPORTS_SSE3|CPUF_SUPPORTS_SSSE3);
+		}
+
+		return g_lCPUExtensionsAvailable;
+	}
+
+	long __declspec(naked) CPUCheckForExtensions() {
+		__asm {
+			push	ebp
+			push	edi
+			push	esi
+			push	ebx
+
+			xor		ebp,ebp			;cpu flags - if we don't have CPUID, we probably
+									;won't want to try FPU optimizations.
+
+			;check for CPUID.
+
+			pushfd					;flags -> EAX
+			pop		eax
+			or		eax,00200000h	;set the ID bit
+			push	eax				;EAX -> flags
+			popfd
+			pushfd					;flags -> EAX
+			pop		eax
+			and		eax,00200000h	;ID bit set?
+			jz		done			;nope...
+
+			;CPUID exists, check for features register.
+
+			mov		ebp,00000003h
+			xor		eax,eax
+			cpuid
+			or		eax,eax
+			jz		done			;no features register?!?
+
+			;features register exists, look for MMX, SSE, SSE2.
+
+			mov		eax,1
+			cpuid
+			mov		ebx,edx
+			and		ebx,00800000h	;MMX is bit 23 of EDX
+			shr		ebx,21
+			or		ebp,ebx			;set bit 2 if MMX exists
+
+			mov		ebx,edx
+			and		edx,02000000h	;SSE is bit 25 of EDX
+			shr		edx,25
+			neg		edx
+			and		edx,00000018h	;set bits 3 and 4 if SSE exists
+			or		ebp,edx
+
+			and		ebx,04000000h	;SSE2 is bit 26 of EDX
+			shr		ebx,21
+			and		ebx,00000020h	;set bit 5
+			or		ebp,ebx
+
+			test	ecx, 1			;SSE3 is bit 0 of ECX
+			jz		no_sse3
+			or		ebp, 100h
+no_sse3:
+
+			test	ecx, 200h		;SSSE3 is bit 9 of ECX
+			jz		no_ssse3
+			or		ebp, 200h
+no_ssse3:
+
+			test	ecx, 80000h		;SSE4_1 is bit 19 of ECX
+			jz		no_sse4_1
+			or		ebp, 400h
+no_sse4_1:
+
+			;check for vendor feature register (K6/Athlon).
+
+			mov		eax,80000000h
+			cpuid
+			mov		ecx,80000001h
+			cmp		eax,ecx
+			jb		done
+
+			;vendor feature register exists, look for 3DNow! and Athlon extensions
+
+			mov		eax,ecx
+			cpuid
+
+			mov		eax,edx
+			and		edx,80000000h	;3DNow! is bit 31
+			shr		edx,25
+			or		ebp,edx			;set bit 6
+
+			mov		edx,eax
+			and		eax,40000000h	;3DNow!2 is bit 30
+			shr		eax,23
+			or		ebp,eax			;set bit 7
+
+			and		edx,00400000h	;AMD MMX extensions (integer SSE) is bit 22
+			shr		edx,19
+			or		ebp,edx
+
+	done:
+			mov		eax,ebp
+			mov		g_lCPUExtensionsAvailable, ebp
+
+			;Full SSE and SSE-2 require OS support for the xmm* registers.
+
+			test	eax,00000030h
+			jz		nocheck
+			call	CPUCheckForSSESupport
+	nocheck:
+			pop		ebx
+			pop		esi
+			pop		edi
+			pop		ebp
+			ret
+		}
+	}
+
+#endif
+
+long CPUEnableExtensions(long lEnableFlags) {
+	g_lCPUExtensionsEnabled = lEnableFlags;
+
+	MMX_enabled = !!(g_lCPUExtensionsEnabled & CPUF_SUPPORTS_MMX);
+	FPU_enabled = !!(g_lCPUExtensionsEnabled & CPUF_SUPPORTS_FPU);
+	SSE_enabled = !!(g_lCPUExtensionsEnabled & CPUF_SUPPORTS_SSE);
+	ISSE_enabled = !!(g_lCPUExtensionsEnabled & CPUF_SUPPORTS_INTEGER_SSE);
+	SSE2_enabled = !!(g_lCPUExtensionsEnabled & CPUF_SUPPORTS_SSE2);
+
+	return g_lCPUExtensionsEnabled;
+}
+
+long CPUGetAvailableExtensions() {
+	return g_lCPUExtensionsAvailable;
+}
+
+long CPUGetEnabledExtensions() {
+	return g_lCPUExtensionsEnabled;
+}
+
+void VDCPUCleanupExtensions() {
+#ifndef _M_AMD64
+	if (ISSE_enabled)
+		__asm sfence
+	if (MMX_enabled)
+		__asm emms
+#else
+	_mm_sfence();
+#endif
+}
diff --git a/src/thirdparty/VirtualDub/system/source/debug.cpp b/src/thirdparty/VirtualDub/system/source/debug.cpp
new file mode 100644
index 000000000..9bb6a3dc6
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/debug.cpp
@@ -0,0 +1,290 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <stdio.h>
+
+#include <windows.h>
+#include <intrin.h>
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/system/debug.h>
+#include <vd2/system/thread.h>
+
+#ifdef _DEBUG
+
+class VDSafeMessageBoxThreadW32 : public VDThread {
+public:
+	VDSafeMessageBoxThreadW32(HWND hwndParent, const char *pszText, const char *pszCaption, DWORD dwFlags)
+		: mhwndParent(hwndParent)
+		, mpszText(pszText)
+		, mpszCaption(pszCaption)
+		, mdwFlags(dwFlags)
+	{
+	}
+
+	DWORD GetResult() const { return mdwResult; }
+
+protected:
+	void ThreadRun() {
+		mdwResult = MessageBox(mhwndParent, mpszText, mpszCaption, mdwFlags);
+	}
+
+	HWND mhwndParent;
+	const char *const mpszText;
+	const char *const mpszCaption;
+	const DWORD mdwFlags;
+	DWORD mdwResult;
+};
+
+UINT VDSafeMessageBoxW32(HWND hwndParent, const char *pszText, const char *pszCaption, DWORD dwFlags) {
+	VDSafeMessageBoxThreadW32 mbox(hwndParent, pszText, pszCaption, dwFlags);
+
+	mbox.ThreadStart();
+	mbox.ThreadWait();
+	return mbox.GetResult();
+}
+
+VDAssertResult VDAssert(const char *exp, const char *file, int line) {
+	DWORD dwOldError = GetLastError();
+	char szText[1024];
+
+	VDDEBUG("%s(%d): Assert failed: %s\n", file, line, exp);
+
+	wsprintf(szText,
+		"Assert failed in module %s, line %d:\n"
+		"\n"
+		"\t%s\n"
+		"\n"
+		"Break into debugger?", file, line, exp);
+
+	UINT result = VDSafeMessageBoxW32(NULL, szText, "Assert failure", MB_ABORTRETRYIGNORE|MB_ICONWARNING|MB_TASKMODAL);
+
+	SetLastError(dwOldError);
+
+	switch(result) {
+	case IDABORT:
+		::Sleep(250);				// Pause for a moment so the VC6 debugger doesn't freeze.
+		return kVDAssertBreak;
+	case IDRETRY:
+		return kVDAssertContinue;
+	default:
+		VDNEVERHERE;
+	case IDIGNORE:
+		return kVDAssertIgnore;
+	}
+}
+
+VDAssertResult VDAssertPtr(const char *exp, const char *file, int line) {
+	DWORD dwOldError = GetLastError();
+	char szText[1024];
+
+	VDDEBUG("%s(%d): Assert failed: %s is not a valid pointer\n", file, line, exp);
+
+	wsprintf(szText,
+		"Assert failed in module %s, line %d:\n"
+		"\n"
+		"\t(%s) not a valid pointer\n"
+		"\n"
+		"Break into debugger?", file, line, exp);
+
+	UINT result = VDSafeMessageBoxW32(NULL, szText, "Assert failure", MB_ABORTRETRYIGNORE|MB_ICONWARNING|MB_TASKMODAL);
+
+	SetLastError(dwOldError);
+
+	switch(result) {
+	case IDABORT:
+		return kVDAssertBreak;
+	case IDRETRY:
+		return kVDAssertContinue;
+	default:
+		VDNEVERHERE;
+	case IDIGNORE:
+		return kVDAssertIgnore;
+	}
+}
+
+#endif
+
+void VDProtectedAutoScopeICLWorkaround() {}
+
+void VDDebugPrint(const char *format, ...) {
+	char buf[4096];
+
+	va_list val;
+	va_start(val, format);
+	_vsnprintf(buf, sizeof buf, format, val);
+	va_end(val);
+	Sleep(0);
+	OutputDebugString(buf);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+namespace {
+	IVDExternalCallTrap *g_pExCallTrap;
+}
+
+void VDSetExternalCallTrap(IVDExternalCallTrap *trap) {
+	g_pExCallTrap = trap;
+}
+
+#if defined(WIN32) && defined(_M_IX86)
+	namespace {
+		bool IsFPUStateOK(unsigned& ctlword) {
+			ctlword = 0;
+
+			__asm mov eax, ctlword
+			__asm fnstcw [eax]
+
+			ctlword &= 0x0f3f;
+
+			return ctlword == 0x023f;
+		}
+
+		void ResetFPUState() {
+			static const unsigned ctlword = 0x027f;
+
+			__asm fnclex
+			__asm fldcw ctlword
+		}
+
+		bool IsSSEStateOK(uint32& ctlword) {
+			ctlword = _mm_getcsr();
+
+			// Intel C/C++ flips FTZ and DAZ. :(
+			return (ctlword & 0x7f80) == 0x1f80;
+		}
+
+		void ResetSSEState() {
+			_mm_setcsr(0x1f80);
+		}
+	}
+
+	bool IsMMXState() {
+		char	buf[28];
+		unsigned short tagword;
+
+		__asm fnstenv buf		// this resets the FPU control word somehow!?
+
+		tagword = *(unsigned short *)(buf + 8);
+
+		return (tagword != 0xffff);
+	}
+	void ClearMMXState() {
+		if (MMX_enabled)
+			__asm emms
+		else {
+			__asm {
+				ffree st(0)
+				ffree st(1)
+				ffree st(2)
+				ffree st(3)
+				ffree st(4)
+				ffree st(5)
+				ffree st(6)
+				ffree st(7)
+			}
+		}
+	}
+
+	void VDClearEvilCPUStates() {
+		ResetFPUState();
+		ClearMMXState();
+	}
+
+	void VDPreCheckExternalCodeCall(const char *file, int line) {
+		unsigned fpucw;
+		uint32 mxcsr;
+		bool bFPUStateBad = !IsFPUStateOK(fpucw);
+		bool bSSEStateBad = SSE_enabled && !IsSSEStateOK(mxcsr);
+		bool bMMXStateBad = IsMMXState();
+
+		if (bMMXStateBad || bFPUStateBad || bSSEStateBad) {
+			ClearMMXState();
+			ResetFPUState();
+			if (SSE_enabled)
+				ResetSSEState();
+		}
+
+		if (g_pExCallTrap) {
+			if (bMMXStateBad)
+				g_pExCallTrap->OnMMXTrap(NULL, file, line);
+
+			if (bFPUStateBad)
+				g_pExCallTrap->OnFPUTrap(NULL, file, line, fpucw);
+
+			if (bSSEStateBad)
+				g_pExCallTrap->OnSSETrap(NULL, file, line, mxcsr);
+		}
+	}
+
+	void VDPostCheckExternalCodeCall(const wchar_t *mpContext, const char *mpFile, int mLine) {
+		unsigned fpucw;
+		uint32 mxcsr;
+		bool bFPUStateBad = !IsFPUStateOK(fpucw);
+		bool bSSEStateBad = SSE_enabled && !IsSSEStateOK(mxcsr);
+		bool bMMXStateBad = IsMMXState();
+		bool bBadState = bMMXStateBad || bFPUStateBad || bSSEStateBad;
+
+		if (bBadState) {
+			ClearMMXState();
+			ResetFPUState();
+			if (SSE_enabled)
+				ResetSSEState();
+		}
+
+		if (g_pExCallTrap) {
+			if (bMMXStateBad)
+				g_pExCallTrap->OnMMXTrap(mpContext, mpFile, mLine);
+
+			if (bFPUStateBad)
+				g_pExCallTrap->OnFPUTrap(mpContext, mpFile, mLine, fpucw);
+
+			if (bSSEStateBad)
+				g_pExCallTrap->OnSSETrap(mpContext, mpFile, mLine, mxcsr);
+		}
+	}
+
+#else
+
+	bool IsMMXState() {
+		return false;
+	}
+
+	void ClearMMXState() {
+	}
+
+	void VDClearEvilCPUStates() {
+	}
+
+	void VDPreCheckExternalCodeCall(const char *file, int line) {
+	}
+
+	void VDPostCheckExternalCodeCall(const wchar_t *mpContext, const char *mpFile, int mLine) {
+	}
+
+#endif
diff --git a/src/thirdparty/VirtualDub/system/source/debugx86.cpp b/src/thirdparty/VirtualDub/system/source/debugx86.cpp
new file mode 100644
index 000000000..bbbd5e180
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/debugx86.cpp
@@ -0,0 +1,154 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/debugx86.h>
+
+bool VDIsValidCallX86(const char *buf, int len) {
+	// Permissible CALL sequences that we care about:
+	//
+	//	E8 xx xx xx xx			CALL near relative
+	//	FF (group 2)			CALL near absolute indirect
+	//
+	// Minimum sequence is 2 bytes (call eax).
+	// Maximum sequence is 7 bytes (call dword ptr [eax+disp32]).
+
+	if (len >= 5 && buf[-5] == (char)0xE8)
+		return true;
+
+	// FF 14 xx					CALL [reg32+reg32*scale]
+
+	if (len >= 3 && buf[-3] == (char)0xFF && buf[-2]==0x14)
+		return true;
+
+	// FF 15 xx xx xx xx		CALL disp32
+
+	if (len >= 6 && buf[-6] == (char)0xFF && buf[-5]==0x15)
+		return true;
+
+	// FF 00-3F(!14/15)			CALL [reg32]
+
+	if (len >= 2 && buf[-2] == (char)0xFF && (unsigned char)buf[-1] < 0x40)
+		return true;
+
+	// FF D0-D7					CALL reg32
+
+	if (len >= 2 && buf[-2] == (char)0xFF && (buf[-1]&0xF8) == 0xD0)
+		return true;
+
+	// FF 50-57 xx				CALL [reg32+reg32*scale+disp8]
+
+	if (len >= 3 && buf[-3] == (char)0xFF && (buf[-2]&0xF8) == 0x50)
+		return true;
+
+	// FF 90-97 xx xx xx xx xx	CALL [reg32+reg32*scale+disp32]
+
+	if (len >= 7 && buf[-7] == (char)0xFF && (buf[-6]&0xF8) == 0x90)
+		return true;
+
+	return false;
+}
+
+VDInstructionTypeX86 VDGetInstructionTypeX86(const void *p) {
+	struct local {
+		static bool RangeHitTest(const uint8 *range, uint8 c) {
+			while(*range) {
+				if (c>=range[0] && c<=range[1])
+					return true;
+				range += 2;
+			}
+
+			return false;
+		}
+	};
+
+	VDInstructionTypeX86 type = kX86InstUnknown;
+
+	__try {
+		unsigned char buf[8];
+
+		memcpy(buf, p, 8);
+
+		if (buf[0] == 0x0f && buf[1] == 0x0f)
+			type = kX86Inst3DNow;			// Conveniently, all 3DNow! instructions begin 0F 0F
+		else if ((buf[0] == 0xdb || buf[0] == 0xdf) && (buf[1]>=0xe8 && buf[1]<=0xf7))
+			type = kX86InstP6;				// DB/DF E8-F7: FCOMI/FCOMIP/FUCOMI/FUCOMIP (P6)
+		else if ((buf[0]&0xfe)==0xda && (buf[1]&0xe0)==0xc0)
+			type = kX86InstP6;				// DA/DB C0-DF: FCMOVcc (P6)
+		else if (buf[0] == 0x0f && (buf[1]&0xf0)==0x40)
+			type = kX86InstP6;				// 0F 40-4F: CMOVcc (P6)
+		else {
+			const unsigned char *s = buf;
+			bool bWide = false;
+			bool bRepF2 = false;
+			bool bRepF3 = false;
+
+			// At this point we're down to MMX, SSE, SSE2 -- which makes things simpler
+			// as we must see F2 0F, F3 0F, or 0F next.  MMX ops use 0F exclusively,
+			// some SSE ops use F2, and a few SSE2 ones use F3.  If we see 66 on an
+			// MMX or SSE op it's automatically SSE2 as it's either a 128-bit MMX op
+			// or a double-precision version of an SSE one.
+
+			if (*s == 0x66) {		// 66h override used by SSE2 and is supposed to be ahead of F2/F3 in encodings
+				++s;
+				bWide = true;
+			}
+
+			if (*s == 0xf2) {
+				++s;
+				bRepF2 = true;
+			}
+
+			if (*s == 0xf3) {
+				++s;
+				bRepF3 = true;
+			}
+
+			if (*s++ == 0x0f) {
+				// SSE - 1x, 28-2F, 5x, C2, AE
+				// MMX2 - 70, C4-C6, D7, DA, DE, E0, E3, E4, E7, EA, EE, F6, F7
+				// MMX - 6x, 7x, Dx, Ex, and Fx except for MMX2
+				// SSE2 - C3, SSE ops with 66 or F2, MMX/MMX2 ops with 66/F2/F3
+
+				static const uint8 sse_ranges[]={0x10,0x1f,0x28,0x2f,0x50,0x5f,0xc2,0xc2,0xae,0xae,0};
+				static const uint8 sse2_ranges[]={0xc3,0xc3,0};
+				static const uint8 mmx2_ranges[]={0x70,0x70,0xc4,0xc6,0xd7,0xd7,0xda,0xda,0xde,0xde,0xe0,0xe0,0xe3,0xe4,0xe7,0xe7,0xea,0xea,0xee,0xee,0xf6,0xf7,0};
+				static const uint8 mmx_ranges[]={0x60,0x7f,0xd0,0xff,0};
+
+				if (local::RangeHitTest(sse_ranges, *s))
+					type = (bWide||bRepF2) ? kX86InstSSE2 : kX86InstSSE;
+				else if (local::RangeHitTest(sse2_ranges, *s))
+					type = kX86InstSSE2;
+				else if (local::RangeHitTest(mmx2_ranges, *s))
+					type = (bWide||bRepF2||bRepF3) ? kX86InstSSE2 : kX86InstMMX2;
+				else if (local::RangeHitTest(mmx_ranges, *s))
+					type = (bWide||bRepF2||bRepF3) ? kX86InstSSE2 : kX86InstMMX;
+			}
+		}
+	} __except(1) {
+	}
+	return type;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/event.cpp b/src/thirdparty/VirtualDub/system/source/event.cpp
new file mode 100644
index 000000000..368f03cb1
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/event.cpp
@@ -0,0 +1,81 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2006 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/event.h>
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDDelegate::VDDelegate() {
+	mpPrev = mpNext = this;
+}
+
+VDDelegate::~VDDelegate() {
+	VDDelegateNode *next = mpNext;
+	VDDelegateNode *prev = mpPrev;
+	prev->mpNext = next;
+	next->mpPrev = prev;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDEventBase::VDEventBase() {
+	mAnchor.mpPrev = mAnchor.mpNext = &mAnchor;
+}
+
+VDEventBase::~VDEventBase() {
+	while(mAnchor.mpPrev != &mAnchor)
+		Remove(static_cast<VDDelegate&>(*mAnchor.mpPrev));
+}
+
+void VDEventBase::Add(VDDelegate& dbase) {
+	VDDelegateNode *next = mAnchor.mpNext;
+
+	VDASSERT(dbase.mpPrev == &dbase);
+
+	mAnchor.mpNext = &dbase;
+	dbase.mpPrev = &mAnchor;
+	dbase.mpNext = next;
+	next->mpPrev = &dbase;
+}
+
+void VDEventBase::Remove(VDDelegate& dbase) {
+	VDASSERT(dbase.mpPrev != &dbase);
+
+	VDDelegateNode *next = dbase.mpNext;
+	VDDelegateNode *prev = dbase.mpPrev;
+	prev->mpNext = next;
+	next->mpPrev = prev;
+	dbase.mpPrev = dbase.mpNext = &dbase;
+}
+
+void VDEventBase::Raise(void *src, const void *info) {
+	for(VDDelegateNode *node = mAnchor.mpNext; node != &mAnchor; node = node->mpNext) {
+		VDDelegate& dbase = static_cast<VDDelegate&>(*node);
+
+		dbase.mpCallback(src, info, dbase);
+	}
+}
diff --git a/src/thirdparty/VirtualDub/system/source/file.cpp b/src/thirdparty/VirtualDub/system/source/file.cpp
new file mode 100644
index 000000000..11ab82eeb
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/file.cpp
@@ -0,0 +1,795 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <windows.h>
+
+#include <vd2/system/error.h>
+#include <vd2/system/filesys.h>
+#include <vd2/system/VDString.h>
+#include <vd2/system/file.h>
+
+namespace {
+	bool IsWindowsNT() {
+		static bool sbIsNT = (LONG)GetVersion()>=0;
+		return sbIsNT;
+	}
+
+	bool IsHardDrivePath(const wchar_t *path) {
+		const VDStringW rootPath(VDFileGetRootPath(path));
+
+		UINT type = GetDriveTypeW(rootPath.c_str());
+
+		return type == DRIVE_FIXED || type == DRIVE_UNKNOWN || type == DRIVE_REMOVABLE;
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	VDFile
+//
+///////////////////////////////////////////////////////////////////////////////
+
+using namespace nsVDFile;
+
+VDFile::VDFile(const char *pszFileName, uint32 flags)
+	: mhFile(NULL)
+{
+	open_internal(pszFileName, NULL, flags, true);
+}
+
+VDFile::VDFile(const wchar_t *pwszFileName, uint32 flags)
+	: mhFile(NULL)
+{
+	open_internal(NULL, pwszFileName, flags, true);
+}
+
+VDFile::VDFile(HANDLE h)
+	: mhFile(h)
+{
+	LONG lo, hi = 0;
+
+	lo = SetFilePointer(h, 0, &hi, FILE_CURRENT);
+
+	mFilePosition = (uint32)lo + ((uint64)(uint32)hi << 32);
+}
+
+VDFile::~VDFile() {
+	closeNT();
+}
+
+void VDFile::open(const char *pszFilename, uint32 flags) {
+	open_internal(pszFilename, NULL, flags, true);
+}
+
+void VDFile::open(const wchar_t *pwszFilename, uint32 flags) {
+	open_internal(NULL, pwszFilename, flags, true);
+}
+
+bool VDFile::openNT(const wchar_t *pwszFilename, uint32 flags) {
+	return open_internal(NULL, pwszFilename, flags, false);
+}
+
+bool VDFile::open_internal(const char *pszFilename, const wchar_t *pwszFilename, uint32 flags, bool throwOnError) {
+	close();
+
+	mpFilename = _wcsdup(VDFileSplitPath(pszFilename ? VDTextAToW(pszFilename).c_str() : pwszFilename));
+	if (!mpFilename) {
+		if (!throwOnError)
+			return false;
+		throw MyMemoryError();
+	}
+
+	// At least one of the read/write flags must be set.
+	VDASSERT(flags & (kRead | kWrite));
+
+	DWORD dwDesiredAccess = 0;
+
+	if (flags & kRead)  dwDesiredAccess  = GENERIC_READ;
+	if (flags & kWrite) dwDesiredAccess |= GENERIC_WRITE;
+
+	// Win32 docs are screwed here -- FILE_SHARE_xxx is the inverse of a deny flag.
+
+	DWORD dwShareMode = FILE_SHARE_READ | FILE_SHARE_WRITE;
+	if (flags & kDenyRead)	dwShareMode = FILE_SHARE_WRITE;
+	if (flags & kDenyWrite) dwShareMode &= ~FILE_SHARE_WRITE;
+
+	// One of the creation flags must be set.
+	VDASSERT(flags & kCreationMask);
+
+	DWORD dwCreationDisposition;
+
+	uint32 creationType = flags & kCreationMask;
+
+	switch(creationType) {
+	case kOpenExisting:		dwCreationDisposition = OPEN_EXISTING; break;
+	case kOpenAlways:		dwCreationDisposition = OPEN_ALWAYS; break;
+	case kCreateAlways:		dwCreationDisposition = CREATE_ALWAYS; break;
+	case kCreateNew:		dwCreationDisposition = CREATE_NEW; break;
+	case kTruncateExisting:	dwCreationDisposition = TRUNCATE_EXISTING; break;
+	default:
+		VDNEVERHERE;
+		return false;
+	}
+
+	VDASSERT((flags & (kSequential | kRandomAccess)) != (kSequential | kRandomAccess));
+
+	DWORD dwAttributes = FILE_ATTRIBUTE_NORMAL;
+
+	if (flags & kSequential)	dwAttributes |= FILE_FLAG_SEQUENTIAL_SCAN;
+	if (flags & kRandomAccess)	dwAttributes |= FILE_FLAG_RANDOM_ACCESS;
+	if (flags & kWriteThrough)	dwAttributes |= FILE_FLAG_WRITE_THROUGH;
+	if (flags & kUnbuffered)	dwAttributes |= FILE_FLAG_NO_BUFFERING;
+
+	VDStringA tempFilenameA;
+	VDStringW tempFilenameW;
+
+	if (IsWindowsNT()) {
+		if (pszFilename) {
+			tempFilenameW = VDTextAToW(pszFilename);
+			pwszFilename = tempFilenameW.c_str();
+			pszFilename = NULL;
+		}
+	} else {
+		if (pwszFilename) {
+			tempFilenameA = VDTextWToA(pwszFilename);
+			pszFilename = tempFilenameA.c_str();
+			pwszFilename = NULL;
+		}
+	}
+
+	if (pszFilename)
+		mhFile = CreateFileA(pszFilename, dwDesiredAccess, dwShareMode, NULL, dwCreationDisposition, dwAttributes, NULL);
+	else {
+		if (!IsHardDrivePath(pwszFilename))
+			flags &= ~FILE_FLAG_NO_BUFFERING;
+
+		mhFile = CreateFileW(pwszFilename, dwDesiredAccess, dwShareMode, NULL, dwCreationDisposition, dwAttributes, NULL);
+	}
+
+	DWORD err = GetLastError();
+
+	// If we failed and FILE_FLAG_NO_BUFFERING was set, strip it and try again.
+	// VPC and Novell shares sometimes do this....
+	if (mhFile == INVALID_HANDLE_VALUE && err != ERROR_FILE_NOT_FOUND && err != ERROR_PATH_NOT_FOUND) {
+		if (dwAttributes & FILE_FLAG_NO_BUFFERING) {
+			dwAttributes &= ~FILE_FLAG_NO_BUFFERING;
+			dwAttributes |= FILE_FLAG_WRITE_THROUGH;
+
+			if (pszFilename)
+				mhFile = CreateFileA(pszFilename, dwDesiredAccess, dwShareMode, NULL, dwCreationDisposition, dwAttributes, NULL);
+			else
+				mhFile = CreateFileW(pwszFilename, dwDesiredAccess, dwShareMode, NULL, dwCreationDisposition, dwAttributes, NULL);
+
+			err = GetLastError();
+		}
+	}
+
+	// INVALID_HANDLE_VALUE isn't NULL.  *sigh*
+
+	if (mhFile == INVALID_HANDLE_VALUE) {
+		mhFile = NULL;
+
+		if (!throwOnError)
+			return false;
+
+		throw MyWin32Error("Cannot open file \"%ls\":\n%%s", err, mpFilename.get());
+	}
+
+	mFilePosition = 0;
+	return true;
+}
+
+bool VDFile::closeNT() {
+	if (mhFile) {
+		HANDLE h = mhFile;
+		mhFile = NULL;
+		if (!CloseHandle(h))
+			return false;
+	}
+
+	return true;
+}
+
+void VDFile::close() {
+	if (!closeNT())
+		throw MyWin32Error("Cannot complete file \"%ls\": %%s", GetLastError(), mpFilename.get());
+}
+
+bool VDFile::truncateNT() {
+	return 0 != SetEndOfFile(mhFile);
+}
+
+void VDFile::truncate() {
+	if (!truncateNT())
+		throw MyWin32Error("Cannot truncate file \"%ls\": %%s", GetLastError(), mpFilename.get());
+}
+
+bool VDFile::extendValidNT(sint64 pos) {
+	if (GetVersion() & 0x80000000)
+		return true;				// No need, Windows 95/98/ME do this automatically anyway.
+
+	// The SetFileValidData() API is only available on XP and Server 2003.
+
+	typedef BOOL (APIENTRY *tpSetFileValidData)(HANDLE hFile, LONGLONG ValidDataLength);		// Windows XP, Server 2003
+	static tpSetFileValidData pSetFileValidData = (tpSetFileValidData)GetProcAddress(GetModuleHandle("kernel32"), "SetFileValidData");
+
+	if (!pSetFileValidData) {
+		SetLastError(ERROR_CALL_NOT_IMPLEMENTED);
+		return false;
+	}
+
+	return 0 != pSetFileValidData(mhFile, pos);
+}
+
+void VDFile::extendValid(sint64 pos) {
+	if (!extendValidNT(pos))
+		throw MyWin32Error("Cannot extend file \"%ls\": %%s", GetLastError(), mpFilename.get());
+}
+
+bool VDFile::enableExtendValid() {
+	if (GetVersion() & 0x80000000)
+		return true;				// Not Windows NT, no privileges involved
+
+	// SetFileValidData() requires the SE_MANAGE_VOLUME_NAME privilege, so we must enable it
+	// on the process token. We don't attempt to strip the privilege afterward as that would
+	// introduce race conditions.
+	bool bSuccessful = false;
+	DWORD err = 0;
+
+	SetLastError(0);
+
+	HANDLE h;
+	if (OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES|TOKEN_QUERY, &h)) {
+		LUID luid;
+
+		if (LookupPrivilegeValue(NULL, SE_MANAGE_VOLUME_NAME, &luid)) {
+			TOKEN_PRIVILEGES tp;
+			tp.PrivilegeCount = 1;
+			tp.Privileges[0].Luid = luid;
+			tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+
+			if (AdjustTokenPrivileges(h, FALSE, &tp, 0, NULL, NULL))
+				bSuccessful = true;
+			else
+				err = GetLastError();
+		}
+
+		CloseHandle(h);
+	}
+
+	if (!bSuccessful && err)
+		SetLastError(err);
+
+	return bSuccessful;
+}
+
+long VDFile::readData(void *buffer, long length) {
+	DWORD dwActual;
+
+	if (!ReadFile(mhFile, buffer, (DWORD)length, &dwActual, NULL))
+		throw MyWin32Error("Cannot read from file \"%ls\": %%s", GetLastError(), mpFilename.get());
+
+	mFilePosition += dwActual;
+
+	return dwActual;
+}
+
+void VDFile::read(void *buffer, long length) {
+	if (length != readData(buffer, length))
+		throw MyWin32Error("Cannot read from file \"%ls\": Premature end of file.", GetLastError(), mpFilename.get());
+}
+
+long VDFile::writeData(const void *buffer, long length) {
+	DWORD dwActual;
+	bool success = false;
+
+	if (!WriteFile(mhFile, buffer, (DWORD)length, &dwActual, NULL) || dwActual != (DWORD)length)
+		goto found_error;
+
+	mFilePosition += dwActual;
+
+	return dwActual;
+
+found_error:
+	throw MyWin32Error("Cannot write to file \"%ls\": %%s", GetLastError(), mpFilename.get());
+}
+
+void VDFile::write(const void *buffer, long length) {
+	if (length != writeData(buffer, length))
+		throw MyWin32Error("Cannot write to file \"%ls\": Unable to write all data.", GetLastError(), mpFilename.get());
+}
+
+bool VDFile::seekNT(sint64 newPos, eSeekMode mode) {
+	DWORD dwMode;
+
+	switch(mode) {
+	case kSeekStart:
+		dwMode = FILE_BEGIN;
+		break;
+	case kSeekCur:
+		dwMode = FILE_CURRENT;
+		break;
+	case kSeekEnd:
+		dwMode = FILE_END;
+		break;
+	default:
+		VDNEVERHERE;
+		return false;
+	}
+
+	union {
+		sint64 pos;
+		LONG l[2];
+	} u = { newPos };
+
+	u.l[0] = SetFilePointer(mhFile, u.l[0], &u.l[1], dwMode);
+
+	if (u.l[0] == -1 && GetLastError() != NO_ERROR)
+		return false;
+
+	mFilePosition = u.pos;
+	return true;
+}
+
+void VDFile::seek(sint64 newPos, eSeekMode mode) {
+	if (!seekNT(newPos, mode))
+		throw MyWin32Error("Cannot seek within file \"%ls\": %%s", GetLastError(), mpFilename.get());
+}
+
+bool VDFile::skipNT(sint64 delta) {
+	if (!delta)
+		return true;
+
+	char buf[1024];
+
+	if (delta <= sizeof buf) {
+		return (long)delta == readData(buf, (long)delta);
+	} else
+		return seekNT(delta, kSeekCur);
+}
+
+void VDFile::skip(sint64 delta) {
+	if (!delta)
+		return;
+
+	char buf[1024];
+
+	if (delta > 0 && delta <= sizeof buf) {
+		if ((long)delta != readData(buf, (long)delta))
+			throw MyWin32Error("Cannot seek within file \"%ls\": %%s", GetLastError(), mpFilename.get());
+	} else
+		seek(delta, kSeekCur);
+}
+
+sint64 VDFile::size() {
+	union {
+		uint64 siz;
+		DWORD l[2];
+	} u;
+
+	u.l[0] = GetFileSize(mhFile, &u.l[1]);
+
+	DWORD err;
+
+	if (u.l[0] == (DWORD)-1L && (err = GetLastError()) != NO_ERROR)
+		throw MyWin32Error("Cannot retrieve size of file \"%ls\": %%s", GetLastError(), mpFilename.get());
+
+	return (sint64)u.siz;
+}
+
+sint64 VDFile::tell() {
+	return mFilePosition;
+}
+
+bool VDFile::isOpen() {
+	return mhFile != 0;
+}
+
+VDFileHandle VDFile::getRawHandle() {
+	return mhFile;
+}
+
+void *VDFile::AllocUnbuffer(size_t nBytes) {
+	return VirtualAlloc(NULL, nBytes, MEM_COMMIT, PAGE_READWRITE);
+}
+
+void VDFile::FreeUnbuffer(void *p) {
+	VirtualFree(p, 0, MEM_RELEASE);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDFileStream::~VDFileStream() {
+}
+
+const wchar_t *VDFileStream::GetNameForError() {
+	return getFilenameForError();
+}
+
+sint64 VDFileStream::Pos() {
+	return tell();
+}
+
+void VDFileStream::Read(void *buffer, sint32 bytes) {
+	read(buffer, bytes);
+}
+
+sint32 VDFileStream::ReadData(void *buffer, sint32 bytes) {
+	return readData(buffer, bytes);
+}
+
+void VDFileStream::Write(const void *buffer, sint32 bytes) {
+	write(buffer, bytes);
+}
+
+sint64 VDFileStream::Length() {
+	return size();
+}
+
+void VDFileStream::Seek(sint64 offset) {
+	seek(offset);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDMemoryStream::VDMemoryStream(const void *pSrc, uint32 len) 
+	: mpSrc((const char *)pSrc)
+	, mPos(0)
+	, mLength(len)
+{
+}
+
+const wchar_t *VDMemoryStream::GetNameForError() {
+	return L"memory stream";
+}
+
+sint64 VDMemoryStream::Pos() {
+	return mPos;
+}
+
+void VDMemoryStream::Read(void *buffer, sint32 bytes) {
+	if (bytes != ReadData(buffer, bytes))
+		throw MyError("Attempt to read beyond stream.");
+}
+
+sint32 VDMemoryStream::ReadData(void *buffer, sint32 bytes) {
+	if (bytes <= 0)
+		return 0;
+
+	if (bytes + mPos > mLength)
+		bytes = mLength - mPos;
+
+	if (bytes > 0) {
+		memcpy(buffer, mpSrc+mPos, bytes);
+		mPos += bytes;
+	}
+
+	return bytes;
+}
+
+void VDMemoryStream::Write(const void *buffer, sint32 bytes) {
+	throw MyError("Memory streams are read-only.");
+}
+
+sint64 VDMemoryStream::Length() {
+	return mLength;
+}
+
+void VDMemoryStream::Seek(sint64 offset) {
+	if (offset < 0 || offset > mLength)
+		throw MyError("Invalid seek position");
+
+	mPos = (uint32)offset;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDBufferedStream::VDBufferedStream(IVDRandomAccessStream *pSrc, uint32 bufferSize)
+	: mpSrc(pSrc)
+	, mBuffer(bufferSize)
+	, mBasePosition(0)
+	, mBufferOffset(0)
+	, mBufferValidSize(0)
+{
+}
+
+VDBufferedStream::~VDBufferedStream() {
+}
+
+const wchar_t *VDBufferedStream::GetNameForError() {
+	return mpSrc->GetNameForError();
+}
+
+sint64 VDBufferedStream::Pos() {
+	return mBasePosition + mBufferOffset;
+}
+
+void VDBufferedStream::Read(void *buffer, sint32 bytes) {
+	if (bytes != ReadData(buffer, bytes))
+		throw MyError("Cannot read %d bytes at location %08llx from %ls", bytes, mBasePosition + mBufferOffset, mpSrc->GetNameForError());
+}
+
+sint32 VDBufferedStream::ReadData(void *buffer, sint32 bytes) {
+	if (bytes <= 0)
+		return 0;
+
+	uint32 actual = 0;
+	for(;;) {
+		uint32 tc = mBufferValidSize - mBufferOffset;
+
+		if (tc > (uint32)bytes)
+			tc = (uint32)bytes;
+
+		if (tc) {
+			if (buffer) {
+				memcpy(buffer, mBuffer.data() + mBufferOffset, tc);
+				buffer = (char *)buffer + tc;
+			}
+
+			mBufferOffset += tc;
+			bytes -= tc;
+			actual += tc;
+
+			if (!bytes)
+				break;
+		}
+
+		// At this point, the buffer is empty.
+		if (mBufferValidSize) {
+			VDASSERT(mBufferOffset >= mBufferValidSize);
+
+			mBasePosition += mBufferValidSize;
+			mBufferOffset = 0;
+			mBufferValidSize = 0;
+		}
+
+		// If the remaining read is large, issue it directly to the underlying stream.
+		if (buffer && (uint32)bytes >= mBuffer.size() * 2) {
+			sint32 localActual = mpSrc->ReadData(buffer, bytes);
+			mBasePosition += localActual;
+			actual += localActual;
+			break;
+		}
+
+		// Refill the buffer.
+		mBufferValidSize = mpSrc->ReadData(mBuffer.data(), mBuffer.size());
+		mBufferOffset = 0;
+		if (!mBufferValidSize)
+			break;
+	}
+
+	return actual;
+}
+
+void VDBufferedStream::Write(const void *buffer, sint32 bytes) {
+	throw MyError("Buffered streams are read-only.");
+}
+
+sint64 VDBufferedStream::Length() {
+	return mpSrc->Length();
+}
+
+void VDBufferedStream::Seek(sint64 offset) {
+	// check if an in-buffer skip is possible
+	sint64 relativeOffset = offset - mBasePosition;
+	if (relativeOffset >= 0 && relativeOffset <= (sint64)mBufferValidSize) {
+		mBufferOffset = (uint32)relativeOffset;
+		return;
+	}
+
+	// flush buffer
+	mBufferOffset = 0;
+	mBufferValidSize = 0;
+
+	// issue seek
+	mpSrc->Seek(offset);
+	mBasePosition = offset;
+}
+
+void VDBufferedStream::Skip(sint64 size) {
+	sint64 targetPos = mBasePosition + mBufferOffset + size;
+	sint64 bufferEnd = mBasePosition + mBufferValidSize;
+
+	// check if we can do a buffered skip
+	if (targetPos >= bufferEnd && targetPos < bufferEnd + (sint64)mBuffer.size()) {
+		Read(NULL, (sint32)size);
+		return;
+	}
+
+	// issue a seek
+	Seek(targetPos);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDTextStream::VDTextStream(IVDStream *pSrc)
+	: mpSrc(pSrc)
+	, mBufferPos(0)
+	, mBufferLimit(0)
+	, mState(kFetchLine)
+	, mFileBuffer(kFileBufferSize)
+{
+}
+
+VDTextStream::~VDTextStream() {
+}
+
+const char *VDTextStream::GetNextLine() {
+	if (!mpSrc)
+		return NULL;
+
+	mLineBuffer.clear();
+
+	for(;;) {
+		if (mBufferPos >= mBufferLimit) {
+			mBufferPos = 0;
+			mBufferLimit = mpSrc->ReadData(mFileBuffer.data(), mFileBuffer.size());
+
+			if (!mBufferLimit) {
+				mpSrc = NULL;
+
+				if (mLineBuffer.empty())
+					return NULL;
+
+				mLineBuffer.push_back(0);
+
+				return mLineBuffer.data();
+			}
+		}
+
+		switch(mState) {
+
+			case kEatNextIfCR:
+				mState = kFetchLine;
+				if (mFileBuffer[mBufferPos] == '\r')
+					++mBufferPos;
+				continue;
+
+			case kEatNextIfLF:
+				mState = kFetchLine;
+				if (mFileBuffer[mBufferPos] == '\n')
+					++mBufferPos;
+				continue;
+
+			case kFetchLine:
+				uint32 base = mBufferPos;
+
+				do {
+					const char c = mFileBuffer[mBufferPos++];
+
+					if (c == '\r') {
+						mState = kEatNextIfLF;
+						mLineBuffer.insert(mLineBuffer.end(), mFileBuffer.begin() + base, mFileBuffer.begin() + (mBufferPos-1));
+						mLineBuffer.push_back(0);
+						return mLineBuffer.data();
+					}
+					if (c == '\n') {
+						mState = kEatNextIfCR;
+						mLineBuffer.insert(mLineBuffer.end(), mFileBuffer.begin() + base, mFileBuffer.begin() + (mBufferPos-1));
+						mLineBuffer.push_back(0);
+						return mLineBuffer.data();
+					}
+				} while(mBufferPos < mBufferLimit);
+				mLineBuffer.insert(mLineBuffer.end(), mFileBuffer.begin() + base, mFileBuffer.begin() + mBufferLimit);
+				break;
+		}
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDTextInputFile::VDTextInputFile(const wchar_t *filename, uint32 flags)
+	: mFileStream(filename, flags | nsVDFile::kRead)
+	, mTextStream(&mFileStream)
+{
+}
+
+VDTextInputFile::~VDTextInputFile() {
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDTextOutputStream::VDTextOutputStream(IVDStream *stream)
+	: mpDst(stream)
+	, mLevel(0)
+{
+}
+
+VDTextOutputStream::~VDTextOutputStream() {
+	try { 
+		Flush();
+	} catch(const MyError&) {
+		// ignore errors in destructor
+	}
+}
+
+void VDTextOutputStream::Flush() {
+	if (mLevel) {
+		mpDst->Write(mBuf, mLevel);
+		mLevel = 0;
+	}
+}
+
+void VDTextOutputStream::Write(const char *s, int len) {
+	PutData(s, len);
+}
+
+void VDTextOutputStream::PutLine() {
+	PutData("\r\n", 2);
+}
+
+void VDTextOutputStream::PutLine(const char *s) {
+	PutData(s, strlen(s));
+	PutData("\r\n", 2);
+}
+
+void VDTextOutputStream::FormatLine(const char *format, ...) {
+	va_list val;
+
+	va_start(val, format);
+
+	int rv = -1;
+	if (mLevel < kBufSize-4)
+		rv = _vsnprintf(mBuf+mLevel, kBufSize-mLevel, format, val);
+
+	if (rv >= 0)
+		mLevel += rv;
+	else
+		FormatLine2(format, val);
+
+	PutData("\r\n", 2);
+	va_end(val);
+}
+
+void VDTextOutputStream::FormatLine2(const char *format, va_list val) {
+	char buf[3072];
+
+	int rv = _vsnprintf(buf, 3072, format, val);
+	if (rv > 0)
+		PutData(buf, rv);
+}
+
+void VDTextOutputStream::PutData(const char *s, int len) {
+	while(len > 0) {
+		int left = kBufSize - mLevel;
+		if (!left) {
+			mpDst->Write(mBuf, kBufSize);
+			mLevel = 0;
+			left = kBufSize;
+		}
+
+		int tc = len;
+
+		if (tc > left)
+			tc = left;
+
+		memcpy(mBuf + mLevel, s, tc);
+
+		s += tc;
+		len -= tc;
+		mLevel += tc;
+	}
+}
diff --git a/src/thirdparty/VirtualDub/system/source/fileasync.cpp b/src/thirdparty/VirtualDub/system/source/fileasync.cpp
new file mode 100644
index 000000000..18f97afc8
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/fileasync.cpp
@@ -0,0 +1,832 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <windows.h>
+#include <malloc.h>
+#include <vd2/system/error.h>
+#include <vd2/system/file.h>
+#include <vd2/system/fileasync.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/system/VDString.h>
+#include <vd2/system/VDRingBuffer.h>
+#include <vd2/system/w32assist.h>
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	VDFileAsync - Windows 9x implementation
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDFileAsync9x : public IVDFileAsync, protected VDThread {
+public:
+	VDFileAsync9x(bool useFastMode);
+	~VDFileAsync9x();
+
+	void SetPreemptiveExtend(bool b) { mbPreemptiveExtend = b; }
+	bool IsPreemptiveExtendActive() { return mbPreemptiveExtend; }
+
+	bool IsOpen() { return mhFileSlow != INVALID_HANDLE_VALUE; }
+
+	void Open(const wchar_t *pszFilename, uint32 count, uint32 bufferSize);
+	void Close();
+	void FastWrite(const void *pData, uint32 bytes);
+	void FastWriteEnd();
+	void Write(sint64 pos, const void *pData, uint32 bytes);
+	bool Extend(sint64 pos);
+	void Truncate(sint64 pos);
+	void SafeTruncateAndClose(sint64 pos);
+	sint64 GetSize();
+	sint64 GetFastWritePos() { return mClientFastPointer; }
+
+protected:
+	void WriteZero(sint64 pos, uint32 bytes);
+	void Seek(sint64 pos);
+	bool SeekNT(sint64 pos);
+	void ThrowError();
+	void ThreadRun();
+
+	HANDLE		mhFileSlow;
+	HANDLE		mhFileFast;
+	uint32		mBlockSize;
+	uint32		mBlockCount;
+	uint32		mSectorSize;
+	sint64		mClientFastPointer;
+
+	const bool		mbUseFastMode;
+
+	volatile bool	mbPreemptiveExtend;
+
+	enum {
+		kStateNormal,
+		kStateFlush,
+		kStateAbort
+	};
+	VDAtomicInt	mState;
+
+	VDSignal	mReadOccurred;
+	VDSignal	mWriteOccurred;
+
+	VDRingBuffer<char, VDFileUnbufferAllocator<char> >	mBuffer;
+
+	VDStringA	mFilename;
+	VDAtomicPtr<MyError>	mpError;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+VDFileAsync9x::VDFileAsync9x(bool useFastMode)
+	: mhFileSlow(INVALID_HANDLE_VALUE)
+	, mhFileFast(INVALID_HANDLE_VALUE)
+	, mClientFastPointer(0)
+	, mbUseFastMode(useFastMode)
+	, mbPreemptiveExtend(false)
+	, mpError(NULL)
+{
+}
+
+VDFileAsync9x::~VDFileAsync9x() {
+	Close();
+}
+
+void VDFileAsync9x::Open(const wchar_t *pszFilename, uint32 count, uint32 bufferSize) {
+	try {
+		mFilename = VDTextWToA(pszFilename);
+
+		mhFileSlow = CreateFile(mFilename.c_str(), GENERIC_WRITE, FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_WRITE_THROUGH, NULL);
+		if (mhFileSlow == INVALID_HANDLE_VALUE)
+			throw MyWin32Error("Unable to open file \"%s\" for write: %%s", GetLastError(), mFilename.c_str());
+
+		if (mbUseFastMode)
+			mhFileFast = CreateFile(mFilename.c_str(), GENERIC_WRITE, FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_NO_BUFFERING, NULL);
+
+		mSectorSize = 4096;		// guess for now... proper way would be GetVolumeMountPoint() followed by GetDiskFreeSpace().
+
+		mBlockSize = bufferSize;
+		mBlockCount = count;
+		mBuffer.Init(count * bufferSize);
+
+		mState = kStateNormal;
+	} catch(const MyError&) {
+		Close();
+		throw;
+	}
+
+	ThreadStart();
+}
+
+void VDFileAsync9x::Close() {
+	mState = kStateAbort;
+	mWriteOccurred.signal();
+	ThreadWait();
+
+	if (mhFileSlow != INVALID_HANDLE_VALUE) {
+		CloseHandle(mhFileSlow);
+		mhFileSlow = INVALID_HANDLE_VALUE;
+	}
+	if (mhFileFast != INVALID_HANDLE_VALUE) {
+		CloseHandle(mhFileFast);
+		mhFileFast = INVALID_HANDLE_VALUE;
+	}
+}
+
+void VDFileAsync9x::FastWrite(const void *pData, uint32 bytes) {
+	if (mhFileFast == INVALID_HANDLE_VALUE) {
+		if (pData)
+			Write(mClientFastPointer, pData, bytes);
+		else
+			WriteZero(mClientFastPointer, bytes);
+	} else {
+		if (mpError)
+			ThrowError();
+
+		uint32 bytesLeft = bytes;
+		while(bytesLeft) {
+			int actual;
+			void *p = mBuffer.LockWrite(bytesLeft, actual);
+
+			if (!actual) {
+				mReadOccurred.wait();
+				if (mpError)
+					ThrowError();
+				continue;
+			}
+
+			if (pData) {
+				memcpy(p, pData, actual);
+				pData = (const char *)pData + actual;
+			} else {
+				memset(p, 0, actual);
+			}
+			mBuffer.UnlockWrite(actual);
+			mWriteOccurred.signal();
+			bytesLeft -= actual;
+		}
+	}
+
+	mClientFastPointer += bytes;
+}
+
+void VDFileAsync9x::FastWriteEnd() {
+	FastWrite(NULL, mSectorSize - 1);
+
+	mState = kStateFlush;
+	mWriteOccurred.signal();
+	ThreadWait();
+
+	if (mpError)
+		ThrowError();
+}
+
+void VDFileAsync9x::Write(sint64 pos, const void *p, uint32 bytes) {
+	Seek(pos);
+
+	DWORD dwActual;
+	if (!WriteFile(mhFileSlow, p, bytes, &dwActual, NULL) || dwActual != bytes)
+		throw MyWin32Error("Write error occurred on file \"%s\": %%s\n", GetLastError(), mFilename.c_str());
+}
+
+void VDFileAsync9x::WriteZero(sint64 pos, uint32 bytes) {
+	uint32 bufsize = bytes > 2048 ? 2048 : bytes;
+	void *p = _alloca(bufsize);
+	memset(p, 0, bufsize);
+
+	while(bytes > 0) {
+		uint32 tc = bytes > 2048 ? 2048 : bytes;
+
+		Write(pos, p, tc);
+		pos += tc;
+		bytes -= tc;
+	}
+}
+
+bool VDFileAsync9x::Extend(sint64 pos) {
+	return SeekNT(pos) && SetEndOfFile(mhFileSlow);
+}
+
+void VDFileAsync9x::Truncate(sint64 pos) {
+	Seek(pos);
+	if (!SetEndOfFile(mhFileSlow))
+		throw MyWin32Error("I/O error on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+}
+
+void VDFileAsync9x::SafeTruncateAndClose(sint64 pos) {
+	if (mhFileSlow != INVALID_HANDLE_VALUE) {
+		FastWrite(NULL, mSectorSize - 1);
+
+		mState = kStateFlush;
+		mWriteOccurred.signal();
+		ThreadWait();
+
+		Extend(pos);
+		Close();
+	}
+}
+
+sint64 VDFileAsync9x::GetSize() {
+	DWORD dwSizeHigh;
+	DWORD dwSizeLow = GetFileSize(mhFileSlow, &dwSizeHigh);
+
+	if (dwSizeLow == (DWORD)-1 && GetLastError() != NO_ERROR)
+		throw MyWin32Error("I/O error on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+
+	return dwSizeLow + ((sint64)dwSizeHigh << 32);
+}
+
+void VDFileAsync9x::Seek(sint64 pos) {
+	if (!SeekNT(pos))
+		throw MyWin32Error("I/O error on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+}
+
+bool VDFileAsync9x::SeekNT(sint64 pos) {
+	LONG posHi = (LONG)(pos >> 32);
+	DWORD result = SetFilePointer(mhFileSlow, (LONG)pos, &posHi, FILE_BEGIN);
+
+	if (result == INVALID_SET_FILE_POINTER) {
+		DWORD dwError = GetLastError();
+
+		if (dwError != NO_ERROR)
+			return false;
+	}
+
+	return true;
+}
+
+void VDFileAsync9x::ThrowError() {
+	MyError *e = mpError.xchg(NULL);
+
+	if (e) {
+		if (mhFileFast != INVALID_HANDLE_VALUE) {
+			CloseHandle(mhFileFast);
+			mhFileFast = INVALID_HANDLE_VALUE;
+		}
+
+		MyError tmp;
+		tmp.TransferFrom(*e);
+		delete e;
+		throw tmp;
+	}
+}
+
+void VDFileAsync9x::ThreadRun() {
+	bool	bPreemptiveExtend = mbPreemptiveExtend;
+	sint64	currentSize;
+	sint64	pos = 0;
+	uint32	bufferSize = mBlockCount * mBlockSize;
+	HANDLE  hFile = mhFileFast != INVALID_HANDLE_VALUE ? mhFileFast : mhFileSlow;
+
+	try {
+		if (!VDGetFileSizeW32(hFile, currentSize))
+			throw MyWin32Error("I/O error on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+
+		for(;;) {
+			int state = mState;
+
+			if (state == kStateAbort)
+				break;
+
+			int actual;
+			const void *p = mBuffer.LockRead(mBlockSize, actual);
+
+			if ((uint32)actual < mBlockSize) {
+				if (state == kStateNormal) {
+					mWriteOccurred.wait();
+					continue;
+				}
+
+				VDASSERT(state == kStateFlush);
+
+				actual &= ~(mSectorSize-1);
+				if (!actual)
+					break;
+			} else {
+				if (bPreemptiveExtend) {
+					sint64 checkpt = pos + mBlockSize + bufferSize;
+
+					if (checkpt > currentSize) {
+						currentSize += bufferSize;
+						if (currentSize < checkpt)
+							currentSize = checkpt;
+
+						if (!VDSetFilePointerW32(hFile, currentSize, FILE_BEGIN)
+							|| !SetEndOfFile(hFile))
+							mbPreemptiveExtend = bPreemptiveExtend = false;
+
+						if (!VDSetFilePointerW32(hFile, pos, FILE_BEGIN))
+							throw MyWin32Error("Seek error occurred on file \"%s\": %%s\n", GetLastError(), mFilename.c_str());
+					}
+				}
+			}
+
+			DWORD dwActual;
+			if (!WriteFile(hFile, p, actual, &dwActual, NULL) || dwActual != actual) {
+				DWORD dwError = GetLastError();
+				throw MyWin32Error("Write error occurred on file \"%s\": %%s\n", dwError, mFilename.c_str());
+			}
+
+			pos += actual;
+
+			mBuffer.UnlockRead(actual);
+
+			mReadOccurred.signal();
+		}
+	} catch(MyError& e) {
+		MyError *p = new MyError;
+
+		p->TransferFrom(e);
+		delete mpError.xchg(p);
+		mReadOccurred.signal();
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	VDFileAsync - Windows NT implementation
+//
+///////////////////////////////////////////////////////////////////////////
+
+struct VDFileAsyncNTBuffer : public OVERLAPPED {
+	bool	mbActive;
+	bool	mbPending;
+	uint32	mLength;
+
+	VDFileAsyncNTBuffer() : mbActive(false), mbPending(false) { hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); }
+	~VDFileAsyncNTBuffer() { if (hEvent) CloseHandle(hEvent); }
+};
+
+class VDFileAsyncNT : public IVDFileAsync, private VDThread {
+public:
+	VDFileAsyncNT();
+	~VDFileAsyncNT();
+
+	void SetPreemptiveExtend(bool b) { mbPreemptiveExtend = b; }
+	bool IsPreemptiveExtendActive() { return mbPreemptiveExtend; }
+
+	bool IsOpen() { return mhFileSlow != INVALID_HANDLE_VALUE; }
+
+	void Open(const wchar_t *pszFilename, uint32 count, uint32 bufferSize);
+	void Close();
+	void FastWrite(const void *pData, uint32 bytes);
+	void FastWriteEnd();
+	void Write(sint64 pos, const void *pData, uint32 bytes);
+	bool Extend(sint64 pos);
+	void Truncate(sint64 pos);
+	void SafeTruncateAndClose(sint64 pos);
+	sint64 GetSize();
+	sint64 GetFastWritePos() { return mClientFastPointer; }
+
+protected:
+	void WriteZero(sint64 pos, uint32 bytes);
+	void Seek(sint64 pos);
+	bool SeekNT(sint64 pos);
+	void ThrowError();
+	void ThreadRun();
+
+	HANDLE		mhFileSlow;
+	HANDLE		mhFileFast;
+	uint32		mBlockSize;
+	uint32		mBlockCount;
+	uint32		mBufferSize;
+	uint32		mSectorSize;
+
+	enum {
+		kStateNormal,
+		kStateFlush,
+		kStateAbort
+	};
+	VDAtomicInt	mState;
+
+	VDSignal	mReadOccurred;
+	VDSignal	mWriteOccurred;
+
+	uint32		mWriteOffset;
+	VDAtomicInt	mBufferLevel;
+	sint64		mClientFastPointer;
+	sint64		mFastPointer;
+
+	volatile bool	mbPreemptiveExtend;
+
+	vdautoarrayptr<VDFileAsyncNTBuffer>	mpBlocks;
+
+	vdblock<char, VDFileUnbufferAllocator<char> >	mBuffer;
+
+	VDAtomicPtr<MyError>	mpError;
+	VDStringA	mFilename;
+};
+
+VDFileAsyncNT::VDFileAsyncNT()
+	: mhFileSlow(INVALID_HANDLE_VALUE)
+	, mhFileFast(INVALID_HANDLE_VALUE)
+	, mFastPointer(0)
+	, mClientFastPointer(0)
+	, mbPreemptiveExtend(false)
+	, mpError(NULL)
+{
+}
+
+VDFileAsyncNT::~VDFileAsyncNT() {
+	Close();
+}
+
+void VDFileAsyncNT::Open(const wchar_t *pszFilename, uint32 count, uint32 bufferSize) {
+	try {
+		mFilename = VDTextWToA(pszFilename);
+
+		mhFileSlow = CreateFileW(pszFilename, GENERIC_WRITE, FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+		if (mhFileSlow == INVALID_HANDLE_VALUE)
+			throw MyWin32Error("Unable to open file \"%s\" for write: %%s", GetLastError(), mFilename.c_str());
+
+		mhFileFast = CreateFileW(pszFilename, GENERIC_WRITE, FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_NO_BUFFERING | FILE_FLAG_OVERLAPPED, NULL);
+		if (mhFileFast == INVALID_HANDLE_VALUE)
+			mhFileFast = CreateFileW(pszFilename, GENERIC_WRITE, FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_WRITE_THROUGH | FILE_FLAG_OVERLAPPED, NULL);
+
+		mSectorSize = 4096;		// guess for now... proper way would be GetVolumeMountPoint() followed by GetDiskFreeSpace().
+
+		mBlockSize = bufferSize;
+		mBlockCount = count;
+		mBufferSize = mBlockSize * mBlockCount;
+
+		mWriteOffset = 0;
+		mBufferLevel = 0;
+
+		mState = kStateNormal;
+
+		if (mhFileFast != INVALID_HANDLE_VALUE) {
+			mpBlocks = new VDFileAsyncNTBuffer[count];
+			mBuffer.resize(count * bufferSize);
+			ThreadStart();
+		}
+	} catch(const MyError&) {
+		Close();
+		throw;
+	}
+}
+
+void VDFileAsyncNT::Close() {
+	mState = kStateAbort;
+	mWriteOccurred.signal();
+	ThreadWait();
+
+	if (mpError) {
+		delete mpError;
+		mpError = NULL;
+	}
+
+	if (mhFileSlow != INVALID_HANDLE_VALUE) {
+		CloseHandle(mhFileSlow);
+		mhFileSlow = INVALID_HANDLE_VALUE;
+	}
+	if (mhFileFast != INVALID_HANDLE_VALUE) {
+		CloseHandle(mhFileFast);
+		mhFileFast = INVALID_HANDLE_VALUE;
+	}
+
+	mpBlocks = NULL;
+}
+
+void VDFileAsyncNT::FastWrite(const void *pData, uint32 bytes) {
+	if (mhFileFast == INVALID_HANDLE_VALUE) {
+		if (pData)
+			Write(mClientFastPointer, pData, bytes);
+		else
+			WriteZero(mClientFastPointer, bytes);
+	} else {
+		if (mpError)
+			ThrowError();
+
+		uint32 bytesLeft = bytes;
+		while(bytesLeft) {
+			uint32 actual = mBufferSize - mBufferLevel;
+
+			if (actual > bytesLeft)
+				actual = bytesLeft;
+
+			if (mWriteOffset + actual > mBufferSize)
+				actual = mBufferSize - mWriteOffset;
+
+			if (!actual) {
+				mReadOccurred.wait();
+				if (mpError)
+					ThrowError();
+				continue;
+			}
+
+			if (pData) {
+				memcpy(&mBuffer[mWriteOffset], pData, actual);
+				pData = (const char *)pData + actual;
+			} else {
+				memset(&mBuffer[mWriteOffset], 0, actual);
+			}
+
+			uint32 oldWriteOffset = mWriteOffset;
+			mWriteOffset += actual;
+			if (mWriteOffset >= mBufferSize)
+				mWriteOffset = 0;
+			mBufferLevel += actual;
+
+			// only bother signaling if the write offset crossed a block boundary
+			if (oldWriteOffset % mBlockSize + actual >= mBlockSize) {
+				mWriteOccurred.signal();
+				if (mpError)
+					ThrowError();
+			}
+
+			bytesLeft -= actual;
+		}
+	}
+		
+	mClientFastPointer += bytes;
+}
+
+void VDFileAsyncNT::FastWriteEnd() {
+	FastWrite(NULL, mSectorSize - 1);
+	mState = kStateFlush;
+	mWriteOccurred.signal();
+	ThreadWait();
+	if (mpError)
+		ThrowError();
+}
+
+void VDFileAsyncNT::Write(sint64 pos, const void *p, uint32 bytes) {
+	Seek(pos);
+
+	DWORD dwActual;
+	if (!WriteFile(mhFileSlow, p, bytes, &dwActual, NULL) || dwActual != bytes)
+		throw MyWin32Error("Write error occurred on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+}
+
+void VDFileAsyncNT::WriteZero(sint64 pos, uint32 bytes) {
+	uint32 bufsize = bytes > 2048 ? 2048 : bytes;
+	void *p = _alloca(bufsize);
+	memset(p, 0, bufsize);
+
+	while(bytes > 0) {
+		uint32 tc = bytes > 2048 ? 2048 : bytes;
+
+		Write(pos, p, tc);
+		pos += tc;
+		bytes -= tc;
+	}
+}
+
+bool VDFileAsyncNT::Extend(sint64 pos) {
+	return SeekNT(pos) && SetEndOfFile(mhFileSlow);
+}
+
+void VDFileAsyncNT::Truncate(sint64 pos) {
+	Seek(pos);
+	if (!SetEndOfFile(mhFileSlow))
+		throw MyWin32Error("I/O error on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+}
+
+void VDFileAsyncNT::SafeTruncateAndClose(sint64 pos) {
+	if (isThreadAttached()) {
+		mState = kStateAbort;
+		mWriteOccurred.signal();
+		ThreadWait();
+
+		if (mpError) {
+			delete mpError;
+			mpError = NULL;
+		}
+	}
+
+	if (mhFileSlow != INVALID_HANDLE_VALUE) {
+		Extend(pos);
+		Close();
+	}
+}
+
+sint64 VDFileAsyncNT::GetSize() {
+	DWORD dwSizeHigh;
+	DWORD dwSizeLow = GetFileSize(mhFileSlow, &dwSizeHigh);
+
+	if (dwSizeLow == (DWORD)-1 && GetLastError() != NO_ERROR)
+		throw MyWin32Error("I/O error on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+
+	return dwSizeLow + ((sint64)dwSizeHigh << 32);
+}
+
+void VDFileAsyncNT::Seek(sint64 pos) {
+	if (!SeekNT(pos))
+		throw MyWin32Error("I/O error on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+}
+
+bool VDFileAsyncNT::SeekNT(sint64 pos) {
+	LONG posHi = (LONG)(pos >> 32);
+	DWORD result = SetFilePointer(mhFileSlow, (LONG)pos, &posHi, FILE_BEGIN);
+
+	if (result == INVALID_SET_FILE_POINTER) {
+		DWORD dwError = GetLastError();
+
+		if (dwError != NO_ERROR)
+			return false;
+	}
+
+	return true;
+}
+
+void VDFileAsyncNT::ThrowError() {
+	MyError *e = mpError.xchg(NULL);
+
+	if (e) {
+		if (mhFileFast != INVALID_HANDLE_VALUE) {
+			CloseHandle(mhFileFast);
+			mhFileFast = INVALID_HANDLE_VALUE;
+		}
+
+		MyError tmp;
+		tmp.TransferFrom(*e);
+		delete e;
+		throw tmp;
+	}
+}
+
+void VDFileAsyncNT::ThreadRun() {
+	int requestHead = 0;
+	int requestTail = 0;
+	int requestCount = mBlockCount;
+	uint32 pendingLevel = 0;
+	uint32 readOffset = 0;
+	bool	bPreemptiveExtend = mbPreemptiveExtend;
+	sint64	currentSize;
+
+	try {
+		if (!VDGetFileSizeW32(mhFileFast, currentSize))
+			throw MyWin32Error("I/O error on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+
+		for(;;) {
+			int state = mState;
+
+			if (state == kStateAbort) {
+				typedef BOOL (WINAPI *tpCancelIo)(HANDLE);
+				static const tpCancelIo pCancelIo = (tpCancelIo)GetProcAddress(GetModuleHandle("kernel32"), "CancelIo");
+				pCancelIo(mhFileFast);
+				break;
+			}
+
+			uint32 actual = mBufferLevel - pendingLevel;
+			VDASSERT((int)actual >= 0);
+			if (readOffset + actual > mBufferSize)
+				actual = mBufferSize - readOffset;
+
+			if (actual < mBlockSize) {
+				if (state == kStateNormal || actual < mSectorSize) {
+					// check for blocks that have completed
+					bool blocksCompleted = false;
+					for(;;) {
+						VDFileAsyncNTBuffer& buf = mpBlocks[requestTail];
+
+						if (!buf.mbActive) {
+							if (state == kStateFlush)
+								goto all_done;
+
+							if (!blocksCompleted) {
+								// wait for further writes
+								mWriteOccurred.wait();
+							}
+							break;
+						}
+
+						if (buf.mbPending) {
+							HANDLE h[2] = {buf.hEvent, mWriteOccurred.getHandle()};
+							DWORD waitResult = WaitForMultipleObjects(2, h, FALSE, INFINITE);
+
+							if (waitResult == WAIT_OBJECT_0+1)	// write pending
+								break;
+
+							DWORD dwActual;
+							if (!GetOverlappedResult(mhFileFast, &buf, &dwActual, TRUE))
+								throw MyWin32Error("Write error occurred on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+						}
+
+						buf.mbActive = false;
+
+						blocksCompleted = true;
+
+						if (++requestTail >= requestCount)
+							requestTail = 0;
+
+						mBufferLevel -= buf.mLength;
+						pendingLevel -= buf.mLength;
+						VDASSERT((int)mBufferLevel >= 0);
+						VDASSERT((int)pendingLevel >= 0);
+
+						mReadOccurred.signal();
+
+					}
+
+					if (state == kStateNormal)
+						continue;
+				}
+
+				VDASSERT(state == kStateFlush);
+
+				actual &= ~(mSectorSize-1);
+
+				VDASSERT(actual > 0);
+			} else {
+				actual = mBlockSize;
+
+				if (bPreemptiveExtend) {
+					sint64 checkpt = mFastPointer + mBlockSize + mBufferSize;
+
+					if (checkpt > currentSize) {
+						currentSize += mBufferSize;
+						if (currentSize < checkpt)
+							currentSize = checkpt;
+
+						if (!VDSetFilePointerW32(mhFileFast, currentSize, FILE_BEGIN)
+							|| !SetEndOfFile(mhFileFast))
+							mbPreemptiveExtend = bPreemptiveExtend = false;
+					}
+				}
+			}
+
+			// Issue a write to OS
+			VDFileAsyncNTBuffer& buf = mpBlocks[requestHead];
+
+			VDASSERT(!buf.mbActive);
+
+			DWORD dwActual;
+
+			buf.Offset = (DWORD)mFastPointer;
+			buf.OffsetHigh = (DWORD)((uint64)mFastPointer >> 32);
+			buf.Internal = 0;
+			buf.InternalHigh = 0;
+			buf.mLength = actual;
+			buf.mbPending = false;
+
+			if (!WriteFile(mhFileFast, &mBuffer[readOffset], actual, &dwActual, &buf)) {
+				if (GetLastError() != ERROR_IO_PENDING)
+					throw MyWin32Error("Write error occurred on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+
+				buf.mbPending = true;
+			}
+
+			buf.mbActive = true;
+
+			pendingLevel += actual;
+			VDASSERT(pendingLevel <= (uint32)mBufferLevel);
+
+			readOffset += actual;
+			VDASSERT(readOffset <= mBufferSize);
+			if (readOffset >= mBufferSize)
+				readOffset = 0;
+
+			mFastPointer += actual;
+
+			if (++requestHead >= requestCount)
+				requestHead = 0;
+		}
+all_done:
+		;
+
+	} catch(MyError& e) {
+		MyError *p = new MyError;
+
+		p->TransferFrom(e);
+		delete mpError.xchg(p);
+		mReadOccurred.signal();
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+IVDFileAsync *VDCreateFileAsync(IVDFileAsync::Mode mode) {
+	switch(mode) {
+
+		case IVDFileAsync::kModeAsynchronous:
+			if (VDIsWindowsNT())
+				return new VDFileAsyncNT;
+			// Can't do async I/O. Fall-through to 9x method.
+		case IVDFileAsync::kModeThreaded:
+			return new VDFileAsync9x(true);
+
+		default:
+			return new VDFileAsync9x(false);
+	}
+}
diff --git a/src/thirdparty/VirtualDub/system/source/filesys.cpp b/src/thirdparty/VirtualDub/system/source/filesys.cpp
new file mode 100644
index 000000000..a85c0f5c7
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/filesys.cpp
@@ -0,0 +1,663 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <ctype.h>
+#include <string.h>
+
+#include <vd2/system/VDString.h>
+#include <vd2/system/filesys.h>
+#include <vd2/system/Error.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/system/w32assist.h>
+
+///////////////////////////////////////////////////////////////////////////
+
+template<class T, class U>
+static inline T splitimpL(const T& string, const U *s) {
+	const U *p = string.c_str();
+	return T(p, s - p);
+}
+
+template<class T, class U>
+static inline T splitimpR(const T& string, const U *s) {
+	const U *p = string.c_str();
+	return T(s);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+const char *VDFileSplitFirstDir(const char *s) {
+	const char *start = s;
+
+	while(*s++)
+		if (s[-1] == ':' || s[-1] == '\\' || s[-1] == '/')
+			return s;
+
+	return start;
+}
+
+const wchar_t *VDFileSplitFirstDir(const wchar_t *s) {
+	const wchar_t *start = s;
+
+	while(*s++)
+		if (s[-1] == L':' || s[-1] == L'\\' || s[-1] == L'/')
+			return s;
+
+	return start;
+}
+
+const char *VDFileSplitPath(const char *s) {
+	const char *lastsep = s;
+
+	while(*s++)
+		if (s[-1] == ':' || s[-1] == '\\' || s[-1] == '/')
+			lastsep = s;
+
+	return lastsep;
+}
+
+const wchar_t *VDFileSplitPath(const wchar_t *s) {
+	const wchar_t *lastsep = s;
+
+	while(*s++)
+		if (s[-1] == L':' || s[-1] == L'\\' || s[-1] == L'/')
+			lastsep = s;
+
+	return lastsep;
+}
+
+VDString  VDFileSplitPathLeft (const VDString&  s) { return splitimpL(s, VDFileSplitPath(s.c_str())); }
+VDStringW VDFileSplitPathLeft (const VDStringW& s) { return splitimpL(s, VDFileSplitPath(s.c_str())); }
+VDString  VDFileSplitPathRight(const VDString&  s) { return splitimpR(s, VDFileSplitPath(s.c_str())); }
+VDStringW VDFileSplitPathRight(const VDStringW& s) { return splitimpR(s, VDFileSplitPath(s.c_str())); }
+
+const char *VDFileSplitRoot(const char *s) {
+	// Test for a UNC path.
+	if (s[0] == '\\' && s[1] == '\\') {
+		// For these, we scan for the fourth backslash.
+		s += 2;
+		for(int i=0; i<2; ++i) {
+			while(*s && *s != '\\')
+				++s;
+			if (*s == '\\')
+				++s;
+		}
+		return s;
+	}
+
+	const char *const t = s;
+
+	while(*s && *s != ':' && *s != '/' && *s != '\\')
+		++s;
+
+	return *s ? *s == ':' && (s[1]=='/' || s[1]=='\\') ? s+2 : s+1 : t;
+}
+
+const wchar_t *VDFileSplitRoot(const wchar_t *s) {
+	// Test for a UNC path.
+	if (s[0] == '\\' && s[1] == '\\') {
+		// For these, we scan for the fourth backslash.
+		s += 2;
+		for(int i=0; i<2; ++i) {
+			while(*s && *s != '\\')
+				++s;
+			if (*s == '\\')
+				++s;
+		}
+		return s;
+	}
+
+	const wchar_t *const t = s;
+
+	while(*s && *s != L':' && *s != L'/' && *s != L'\\')
+		++s;
+
+	return *s ? *s == L':' && (s[1]==L'/' || s[1]==L'\\') ? s+2 : s+1 : t;
+}
+
+VDString  VDFileSplitRoot(const VDString&  s) { return splitimpL(s, VDFileSplitRoot(s.c_str())); }
+VDStringW VDFileSplitRoot(const VDStringW& s) { return splitimpL(s, VDFileSplitRoot(s.c_str())); }
+
+const char *VDFileSplitExt(const char *s) {
+	const char *t = s;
+
+	while(*t)
+		++t;
+
+	const char *const end = t;
+
+	while(t>s) {
+		--t;
+
+		if (*t == '.')
+			return t;
+
+		if (*t == ':' || *t == '\\' || *t == '/')
+			break;
+	}
+
+	return NULL;
+}
+
+const wchar_t *VDFileSplitExt(const wchar_t *s) {
+	const wchar_t *t = s;
+
+	while(*t)
+		++t;
+
+	const wchar_t *const end = t;
+
+	while(t>s) {
+		--t;
+
+		if (*t == L'.')
+			return t;
+
+		if (*t == L':' || *t == L'\\' || *t == L'/')
+			break;
+	}
+
+	return end;
+}
+
+VDString  VDFileSplitExtLeft (const VDString&  s) { return splitimpL(s, VDFileSplitExt(s.c_str())); }
+VDStringW VDFileSplitExtLeft (const VDStringW& s) { return splitimpL(s, VDFileSplitExt(s.c_str())); }
+VDString  VDFileSplitExtRight(const VDString&  s) { return splitimpR(s, VDFileSplitExt(s.c_str())); }
+VDStringW VDFileSplitExtRight(const VDStringW& s) { return splitimpR(s, VDFileSplitExt(s.c_str())); }
+
+/////////////////////////////////////////////////////////////////////////////
+
+bool VDFileWildMatch(const char *pattern, const char *path) {
+	// What we do here is split the string into segments that are bracketed
+	// by sequences of asterisks. The trick is that the first match for a
+	// segment as the best possible match, so we can continue. So we just
+	// take each segment at a time and walk it forward until we find the
+	// first match or we fail.
+	//
+	// Time complexity is O(NM), where N=length of string and M=length of
+	// the pattern. In practice, it's rather fast.
+
+	bool star = false;
+	int i = 0;
+	for(;;) {
+		char c = (char)tolower((unsigned char)pattern[i]);
+		if (c == '*') {
+			star = true;
+			pattern += i+1;
+			if (!*pattern)
+				return true;
+			path += i;
+			i = 0;
+			continue;
+		}
+
+		char d = (char)tolower((unsigned char)path[i]);
+		++i;
+
+		if (c == '?') {		// ? matches any character but null.
+			if (!d)
+				return false;
+		} else if (c != d) {	// Literal character must match itself.
+			// If we're at the end of the string or there is no
+			// previous asterisk (anchored search), there's no other
+			// match to find.
+			if (!star || !d || !i)
+				return false;
+
+			// Restart segment search at next position in path.
+			++path;
+			i = 0;
+			continue;
+		}
+
+		if (!c)
+			return true;
+	}
+}
+
+bool VDFileWildMatch(const wchar_t *pattern, const wchar_t *path) {
+	// What we do here is split the string into segments that are bracketed
+	// by sequences of asterisks. The trick is that the first match for a
+	// segment as the best possible match, so we can continue. So we just
+	// take each segment at a time and walk it forward until we find the
+	// first match or we fail.
+	//
+	// Time complexity is O(NM), where N=length of string and M=length of
+	// the pattern. In practice, it's rather fast.
+
+	bool star = false;
+	int i = 0;
+	for(;;) {
+		wchar_t c = towlower(pattern[i]);
+		if (c == L'*') {
+			star = true;
+			pattern += i+1;
+			if (!*pattern)
+				return true;
+			path += i;
+			i = 0;
+			continue;
+		}
+
+		wchar_t d = towlower(path[i]);
+		++i;
+
+		if (c == L'?') {		// ? matches any character but null.
+			if (!d)
+				return false;
+		} else if (c != d) {	// Literal character must match itself.
+			// If we're at the end of the string or there is no
+			// previous asterisk (anchored search), there's no other
+			// match to find.
+			if (!star || !d || !i)
+				return false;
+
+			// Restart segment search at next position in path.
+			++path;
+			i = 0;
+			continue;
+		}
+
+		if (!c)
+			return true;
+	}
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
+#include <windows.h>
+#include <vd2/system/w32assist.h>
+
+sint64 VDGetDiskFreeSpace(const wchar_t *path) {
+	typedef BOOL (WINAPI *tpGetDiskFreeSpaceExA)(LPCSTR lpDirectoryName, PULARGE_INTEGER lpFreeBytesAvailable, PULARGE_INTEGER lpTotalNumberOfBytes, PULARGE_INTEGER lpTotalNumberOfFreeBytes);
+	typedef BOOL (WINAPI *tpGetDiskFreeSpaceExW)(LPCWSTR lpDirectoryName, PULARGE_INTEGER lpFreeBytesAvailable, PULARGE_INTEGER lpTotalNumberOfBytes, PULARGE_INTEGER lpTotalNumberOfFreeBytes);
+
+	static bool sbChecked = false;
+	static tpGetDiskFreeSpaceExA spGetDiskFreeSpaceExA;
+	static tpGetDiskFreeSpaceExW spGetDiskFreeSpaceExW;
+
+	if (!sbChecked) {
+		HMODULE hmodKernel = GetModuleHandle("kernel32.dll");
+		spGetDiskFreeSpaceExA = (tpGetDiskFreeSpaceExA)GetProcAddress(hmodKernel, "GetDiskFreeSpaceExA");
+		spGetDiskFreeSpaceExW = (tpGetDiskFreeSpaceExW)GetProcAddress(hmodKernel, "GetDiskFreeSpaceExW");
+
+		sbChecked = true;
+	}
+
+	if (spGetDiskFreeSpaceExA) {
+		BOOL success;
+		uint64 freeClient, totalBytes, totalFreeBytes;
+		VDStringW directoryName(path);
+
+		if (!directoryName.empty()) {
+			wchar_t c = directoryName[directoryName.length()-1];
+
+			if (c != L'/' && c != L'\\')
+				directoryName += L'\\';
+		}
+
+		if ((LONG)GetVersion() < 0)
+			success = spGetDiskFreeSpaceExA(VDTextWToA(directoryName).c_str(), (PULARGE_INTEGER)&freeClient, (PULARGE_INTEGER)&totalBytes, (PULARGE_INTEGER)&totalFreeBytes);
+		else
+			success = spGetDiskFreeSpaceExW(directoryName.c_str(), (PULARGE_INTEGER)&freeClient, (PULARGE_INTEGER)&totalBytes, (PULARGE_INTEGER)&totalFreeBytes);
+
+		return success ? (sint64)freeClient : -1;
+	} else {
+		DWORD sectorsPerCluster, bytesPerSector, freeClusters, totalClusters;
+		BOOL success;
+
+		VDStringW rootPath(VDFileGetRootPath(path));
+
+		if ((LONG)GetVersion() < 0)
+			success = GetDiskFreeSpaceA(rootPath.empty() ? NULL : VDTextWToA(rootPath).c_str(), &sectorsPerCluster, &bytesPerSector, &freeClusters, &totalClusters);
+		else
+			success = GetDiskFreeSpaceW(rootPath.empty() ? NULL : rootPath.c_str(), &sectorsPerCluster, &bytesPerSector, &freeClusters, &totalClusters);
+
+		return success ? (sint64)((uint64)sectorsPerCluster * bytesPerSector * freeClusters) : -1;
+	}
+}
+
+bool VDDoesPathExist(const wchar_t *fileName) {
+	bool bExists;
+
+	if (!(GetVersion() & 0x80000000)) {
+		bExists = ((DWORD)-1 != GetFileAttributesW(fileName));
+	} else {
+		bExists = ((DWORD)-1 != GetFileAttributesA(VDTextWToA(fileName).c_str()));
+	}
+
+	return bExists;
+}
+
+void VDCreateDirectory(const wchar_t *path) {
+	// can't create dir with trailing slash
+	VDStringW::size_type l(wcslen(path));
+
+	if (l) {
+		const wchar_t c = path[l-1];
+
+		if (c == L'/' || c == L'\\') {
+			VDCreateDirectory(VDStringW(path, l-1).c_str());
+			return;
+		}
+	}
+
+	BOOL succeeded;
+
+	if (!(GetVersion() & 0x80000000)) {
+		succeeded = CreateDirectoryW(path, NULL);
+	} else {
+		succeeded = CreateDirectoryA(VDTextWToA(path).c_str(), NULL);
+	}
+
+	if (!succeeded)
+		throw MyWin32Error("Cannot create directory: %%s", GetLastError());
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+bool VDDeletePathAutodetect(const wchar_t *path);
+bool (*VDRemoveFile)(const wchar_t *path) = VDDeletePathAutodetect;
+
+namespace {
+	typedef BOOL (APIENTRY *tpDeleteFileW)(LPCWSTR path);
+	tpDeleteFileW spDeleteFileW;
+}
+
+bool VDDeleteFile9x(const wchar_t *path) {
+	return !!DeleteFileA(VDTextWToA(path).c_str());
+}
+
+bool VDDeleteFileNT(const wchar_t *path) {
+	return !!spDeleteFileW(path);
+}
+
+bool VDDeletePathAutodetect(const wchar_t *path) {
+	if (VDIsWindowsNT()) {
+		spDeleteFileW = (tpDeleteFileW)GetProcAddress(GetModuleHandle("kernel32"), "DeleteFileW");
+		VDRemoveFile = VDDeleteFileNT;
+	} else
+		VDRemoveFile = VDDeleteFile9x;
+
+	return VDRemoveFile(path);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+namespace {
+	typedef BOOL (WINAPI *tpGetVolumePathNameW)(LPCWSTR lpszPathName, LPWSTR lpszVolumePathName, DWORD cchBufferLength);
+	typedef BOOL (WINAPI *tpGetFullPathNameW)(LPCWSTR lpFileName, DWORD nBufferLength, LPWSTR lpBuffer, LPWSTR *lpFilePart);
+}
+
+uint64 VDFileGetLastWriteTime(const wchar_t *path) {
+	if (VDIsWindowsNT()) {
+		WIN32_FIND_DATAW fdw;
+		HANDLE h = FindFirstFileW(path, &fdw);
+		if (h == INVALID_HANDLE_VALUE)
+			return 0;
+
+		FindClose(h);
+
+		return ((uint64)fdw.ftLastWriteTime.dwHighDateTime << 32) + fdw.ftLastWriteTime.dwLowDateTime;
+	} else {
+		WIN32_FIND_DATAA fda;
+		HANDLE h = FindFirstFileA(VDTextWToA(path).c_str(), &fda);
+		if (h == INVALID_HANDLE_VALUE)
+			return 0;
+
+		FindClose(h);
+
+		return ((uint64)fda.ftLastWriteTime.dwHighDateTime << 32) + fda.ftLastWriteTime.dwLowDateTime;
+	}
+}
+
+VDStringW VDFileGetRootPath(const wchar_t *path) {
+	static tpGetVolumePathNameW spGetVolumePathNameW = (tpGetVolumePathNameW)GetProcAddress(GetModuleHandle("kernel32.dll"), "GetVolumePathNameW");
+	static tpGetFullPathNameW spGetFullPathNameW = (tpGetFullPathNameW)GetProcAddress(GetModuleHandle("kernel32.dll"), "GetFullPathNameW");
+
+	VDStringW fullPath(VDGetFullPath(path));
+
+	// Windows 2000/XP path
+	if (spGetVolumePathNameW) {
+		vdblock<wchar_t> buf(std::max<size_t>(fullPath.size() + 1, MAX_PATH));
+
+		if (spGetVolumePathNameW(path, buf.data(), buf.size()))
+			return VDStringW(buf.data());
+	}
+
+	// Windows 95/98/ME/NT4 path
+	const wchar_t *s = fullPath.c_str();
+	VDStringW root(s, VDFileSplitRoot(s) - s);
+	VDFileFixDirPath(root);
+	return root;
+}
+
+VDStringW VDGetFullPath(const wchar_t *partialPath) {
+	static tpGetFullPathNameW spGetFullPathNameW = (tpGetFullPathNameW)GetProcAddress(GetModuleHandle("kernel32.dll"), "GetFullPathNameW");
+
+	union {
+		char		a[MAX_PATH];
+		wchar_t		w[MAX_PATH];
+	} tmpBuf;
+
+	if (spGetFullPathNameW && !(GetVersion() & 0x80000000)) {
+		LPWSTR p;
+
+		tmpBuf.w[0] = 0;
+		DWORD count = spGetFullPathNameW(partialPath, MAX_PATH, tmpBuf.w, &p);
+
+		if (count < MAX_PATH)
+			return VDStringW(tmpBuf.w);
+
+		VDStringW tmp(count);
+
+		DWORD newCount = spGetFullPathNameW(partialPath, count, (wchar_t *)tmp.data(), &p);
+		if (newCount < count)
+			return tmp;
+
+		return VDStringW(partialPath);
+	} else {
+		LPSTR p;
+		VDStringA pathA(VDTextWToA(partialPath));
+
+		tmpBuf.a[0] = 0;
+		DWORD count = GetFullPathNameA(pathA.c_str(), MAX_PATH, tmpBuf.a, &p);
+
+		if (count < MAX_PATH)
+			return VDStringW(VDTextAToW(tmpBuf.a));
+
+		VDStringA tmpA(count);
+
+		DWORD newCount = GetFullPathNameA(pathA.c_str(), count, (char *)tmpA.data(), &p);
+		if (newCount < count)
+			return VDTextAToW(tmpA);
+
+		return VDStringW(partialPath);
+	}
+}
+
+VDStringW VDMakePath(const wchar_t *base, const wchar_t *file) {
+	if (!*base)
+		return VDStringW(file);
+
+	VDStringW result(base);
+
+	const wchar_t c = result[result.size() - 1];
+
+	if (c != L'/' && c != L'\\' && c != L':')
+		result += L'\\';
+
+	result.append(file);
+
+	return result;
+}
+
+void VDFileFixDirPath(VDStringW& path) {
+	if (!path.empty()) {
+		wchar_t c = path[path.size()-1];
+
+		if (c != L'/' && c != L'\\' && c != L':')
+			path += L'\\';
+	}
+}
+
+namespace {
+	VDStringW VDGetModulePathW32(HINSTANCE hInst) {
+		union {
+			wchar_t w[MAX_PATH];
+			char a[MAX_PATH];
+		} buf;
+
+		VDStringW wstr;
+
+		if (VDIsWindowsNT()) {
+			wcscpy(buf.w, L".");
+			if (GetModuleFileNameW(hInst, buf.w, MAX_PATH))
+				*VDFileSplitPath(buf.w) = 0;
+			wstr = buf.w;
+		} else {
+			strcpy(buf.a, ".");
+			if (GetModuleFileNameA(hInst, buf.a, MAX_PATH))
+				*VDFileSplitPath(buf.a) = 0;
+			wstr = VDTextAToW(buf.a, -1);
+		}
+
+		VDStringW wstr2(VDGetFullPath(wstr.c_str()));
+
+		return wstr2;
+	}
+}
+
+VDStringW VDGetLocalModulePath() {
+	return VDGetModulePathW32(VDGetLocalModuleHandleW32());
+}
+
+VDStringW VDGetProgramPath() {
+	return VDGetModulePathW32(NULL);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDDirectoryIterator::VDDirectoryIterator(const wchar_t *path)
+	: mSearchPath(path)
+	, mpHandle(NULL)
+	, mbSearchComplete(false)
+{
+	mBasePath = VDFileSplitPathLeft(mSearchPath);
+	VDFileFixDirPath(mBasePath);
+}
+
+VDDirectoryIterator::~VDDirectoryIterator() {
+	if (mpHandle)
+		FindClose((HANDLE)mpHandle);
+}
+
+bool VDDirectoryIterator::Next() {
+	if (mbSearchComplete)
+		return false;
+
+	union {
+		WIN32_FIND_DATAA a;
+		WIN32_FIND_DATAW w;
+	} wfd;
+
+	if (GetVersion() & 0x80000000) {
+		if (mpHandle)
+			mbSearchComplete = !FindNextFileA((HANDLE)mpHandle, &wfd.a);
+		else {
+			mpHandle = FindFirstFileA(VDTextWToA(mSearchPath).c_str(), &wfd.a);
+			mbSearchComplete = (INVALID_HANDLE_VALUE == mpHandle);
+		}
+		if (mbSearchComplete)
+			return false;
+
+		mbDirectory = (wfd.a.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0;
+		mFilename = VDTextAToW(wfd.a.cFileName);
+		mFileSize = wfd.a.nFileSizeLow + ((sint64)wfd.w.nFileSizeHigh << 32);
+	} else {
+		if (mpHandle)
+			mbSearchComplete = !FindNextFileW((HANDLE)mpHandle, &wfd.w);
+		else {
+			mpHandle = FindFirstFileW(mSearchPath.c_str(), &wfd.w);
+			mbSearchComplete = (INVALID_HANDLE_VALUE == mpHandle);
+		}
+		if (mbSearchComplete)
+			return false;
+
+		mbDirectory = (wfd.w.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0;
+		mFilename = wfd.w.cFileName;
+		mFileSize = wfd.w.nFileSizeLow + ((sint64)wfd.w.nFileSizeHigh << 32);
+	}
+
+	return true;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+#ifdef _DEBUG
+
+struct VDSystemFilesysTestObject {
+	VDSystemFilesysTestObject() {
+#define TEST(fn, x, y1, y2) VDASSERT(!strcmp(fn(x), y2)); VDASSERT(!wcscmp(fn(L##x), L##y2)); VDASSERT(fn##Left(VDStringA(x))==y1); VDASSERT(fn##Right(VDStringA(x))==y2); VDASSERT(fn##Left(VDStringW(L##x))==L##y1); VDASSERT(fn##Right(VDStringW(L##x))==L##y2)
+		TEST(VDFileSplitPath, "", "", "");
+		TEST(VDFileSplitPath, "x", "", "x");
+		TEST(VDFileSplitPath, "x\\y", "x\\", "y");
+		TEST(VDFileSplitPath, "x\\y\\z", "x\\y\\", "z");
+		TEST(VDFileSplitPath, "x\\", "x\\", "");
+		TEST(VDFileSplitPath, "x\\y\\z\\", "x\\y\\z\\", "");
+		TEST(VDFileSplitPath, "c:", "c:", "");
+		TEST(VDFileSplitPath, "c:x", "c:", "x");
+		TEST(VDFileSplitPath, "c:\\", "c:\\", "");
+		TEST(VDFileSplitPath, "c:\\x", "c:\\", "x");
+		TEST(VDFileSplitPath, "c:\\x\\", "c:\\x\\", "");
+		TEST(VDFileSplitPath, "c:\\x\\", "c:\\x\\", "");
+		TEST(VDFileSplitPath, "c:x\\y", "c:x\\", "y");
+		TEST(VDFileSplitPath, "\\\\server\\share\\", "\\\\server\\share\\", "");
+		TEST(VDFileSplitPath, "\\\\server\\share\\x", "\\\\server\\share\\", "x");
+#undef TEST
+#define TEST(fn, x, y1, y2) VDASSERT(!strcmp(fn(x), y2)); VDASSERT(!wcscmp(fn(L##x), L##y2)); VDASSERT(fn(VDStringA(x))==y1); VDASSERT(fn(VDStringW(L##x))==L##y1)
+		TEST(VDFileSplitRoot, "", "", "");
+		TEST(VDFileSplitRoot, "c:", "c:", "");
+		TEST(VDFileSplitRoot, "c:x", "c:", "x");
+		TEST(VDFileSplitRoot, "c:x\\", "c:", "x\\");
+		TEST(VDFileSplitRoot, "c:x\\y", "c:", "x\\y");
+		TEST(VDFileSplitRoot, "c:\\", "c:\\", "");
+		TEST(VDFileSplitRoot, "c:\\x", "c:\\", "x");
+		TEST(VDFileSplitRoot, "c:\\x\\", "c:\\", "x\\");
+		TEST(VDFileSplitRoot, "\\", "\\", "");
+		TEST(VDFileSplitRoot, "\\x", "\\", "x");
+		TEST(VDFileSplitRoot, "\\x\\", "\\", "x\\");
+		TEST(VDFileSplitRoot, "\\x\\y", "\\", "x\\y");
+		TEST(VDFileSplitRoot, "\\\\server\\share", "\\\\server\\share", "");
+		TEST(VDFileSplitRoot, "\\\\server\\share\\", "\\\\server\\share\\", "");
+		TEST(VDFileSplitRoot, "\\\\server\\share\\x", "\\\\server\\share\\", "x");
+		TEST(VDFileSplitRoot, "\\\\server\\share\\x\\", "\\\\server\\share\\", "x\\");
+		TEST(VDFileSplitRoot, "\\\\server\\share\\x\\y", "\\\\server\\share\\", "x\\y");
+#undef TEST
+	}
+} g_VDSystemFilesysTestObject;
+
+#endif
diff --git a/src/thirdparty/VirtualDub/system/source/filewatcher.cpp b/src/thirdparty/VirtualDub/system/source/filewatcher.cpp
new file mode 100644
index 000000000..3d32150fd
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/filewatcher.cpp
@@ -0,0 +1,117 @@
+#include "stdafx.h"
+#include <windows.h>
+#include <vd2/system/filesys.h>
+#include <vd2/system/filewatcher.h>
+#include <vd2/system/thunk.h>
+#include <vd2/system/w32assist.h>
+
+VDFileWatcher::VDFileWatcher()
+	: mChangeHandle(INVALID_HANDLE_VALUE)
+	, mLastWriteTime(0)
+	, mbRepeatRequested(false)
+	, mbThunksInited(false)
+	, mpThunk(NULL)
+	, mTimerId(0)
+{
+}
+
+VDFileWatcher::~VDFileWatcher() {
+	Shutdown();
+}
+
+bool VDFileWatcher::IsActive() const {
+	return mChangeHandle != INVALID_HANDLE_VALUE;
+}
+
+void VDFileWatcher::Init(const wchar_t *file, IVDFileWatcherCallback *callback) {
+	Shutdown();
+
+	const wchar_t *pathEnd = VDFileSplitPath(file);
+
+	VDStringW basePath(file, pathEnd);
+
+	if (basePath.empty())
+		basePath = L".";
+
+	if (VDIsWindowsNT())
+		mChangeHandle = FindFirstChangeNotificationW(basePath.c_str(), FALSE, FILE_NOTIFY_CHANGE_SIZE | FILE_NOTIFY_CHANGE_LAST_WRITE);
+	else
+		mChangeHandle = FindFirstChangeNotificationA(VDTextWToA(basePath).c_str(), FALSE, FILE_NOTIFY_CHANGE_SIZE | FILE_NOTIFY_CHANGE_LAST_WRITE);
+
+	if (mChangeHandle == INVALID_HANDLE_VALUE)
+		throw MyError("Unable to monitor file: %ls", file);
+
+	mPath = file;
+	mLastWriteTime = VDFileGetLastWriteTime(mPath.c_str());
+	mpCB = callback;
+	mbRepeatRequested = false;
+
+	if (callback) {
+		if (!mbThunksInited)
+			mbThunksInited = VDInitThunkAllocator();
+
+		if (mbThunksInited) {
+			mpThunk = VDCreateFunctionThunkFromMethod(this, &VDFileWatcher::StaticTimerCallback, true);
+
+			if (mpThunk) {
+				mTimerId = SetTimer(NULL, 0, 1000, (TIMERPROC)mpThunk);
+			}
+		}
+	}
+}
+
+void VDFileWatcher::Shutdown() {
+	if (mChangeHandle != INVALID_HANDLE_VALUE) {
+		FindCloseChangeNotification(mChangeHandle);
+		mChangeHandle = INVALID_HANDLE_VALUE;
+	}
+
+	if (mTimerId) {
+		KillTimer(NULL, mTimerId);
+		mTimerId = 0;
+	}
+
+	if (mpThunk) {
+		VDDestroyFunctionThunk(mpThunk);
+		mpThunk = NULL;
+	}
+
+	if (mbThunksInited) {
+		mbThunksInited = false;
+
+		VDShutdownThunkAllocator();
+	}
+}
+
+bool VDFileWatcher::Wait(uint32 delay) {
+	if (mChangeHandle == INVALID_HANDLE_VALUE)
+		return false;
+
+	if (WAIT_OBJECT_0 != WaitForSingleObject(mChangeHandle, delay))
+		return false;
+
+	FindNextChangeNotification(mChangeHandle);
+
+	uint64 t = VDFileGetLastWriteTime(mPath.c_str());
+
+	if (mLastWriteTime == t)
+		return false;
+
+	mLastWriteTime = t;
+	return true;
+}
+
+void VDFileWatcher::StaticTimerCallback(void *, unsigned, unsigned, unsigned long) {
+	if (mbRepeatRequested) {
+		if (mpCB)
+			mbRepeatRequested = !mpCB->OnFileUpdated(mPath.c_str());
+		else
+			mbRepeatRequested = false;
+		return;
+	}
+
+	if (Wait(0)) {
+		if (mpCB)
+			mbRepeatRequested = !mpCB->OnFileUpdated(mPath.c_str());
+	}
+}
diff --git a/src/thirdparty/VirtualDub/system/source/halffloat.cpp b/src/thirdparty/VirtualDub/system/source/halffloat.cpp
new file mode 100644
index 000000000..9875a3003
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/halffloat.cpp
@@ -0,0 +1,79 @@
+#include "stdafx.h"
+#include <vd2/system/halffloat.h>
+
+uint16 VDConvertFloatToHalf(const void *f) {
+	uint32 v = *(const uint32 *)f;
+
+	uint32 sign = (v >> 16) & 0x8000;
+	sint32 exmant = v & 0x7fffffff;
+
+	if (exmant > 0x7f800000) {
+		// convert NaNs directly
+		exmant = (exmant & 0x00400000) + 0x47a00000;
+	} else if (exmant > 0x47800000) {
+		// clamp large numbers to infinity
+		exmant = 0x47800000;
+	} else if (exmant < 0x33800000) {
+		// clamp very tiny numbers to zero
+		exmant = 0x38000000;
+	} else if (exmant < 0x38800000) {
+		// normalized finite converting to denormal
+		uint32 ex = exmant & 0x7f800000;
+		uint32 mant = (exmant & 0x007fffff) | 0x800000;
+		uint32 sticky = 0;
+
+		while(ex < 0x38800000) {
+			ex += 0x00800000;
+			sticky |= mant;
+			mant >>= 1;
+		}
+
+		// round to nearest even
+		sticky |= mant >> 13;
+
+		// round up with sticky bits
+		mant += (sticky & 1);
+
+		// round up with round bit
+		mant += 0x0fff;
+
+		exmant = ex + mant - 0x800000;
+	} else {
+		// round normal numbers using round to nearest even
+		exmant |= (exmant & 0x00002000) >> 13;
+		exmant += 0x00000fff;
+	}
+
+	// shift and rebias exponent
+	exmant -= 0x38000000;
+	exmant >>= 13;
+
+	return (uint16)(sign + exmant);
+}
+
+void VDConvertHalfToFloat(uint16 h, void *dst) {
+	uint32 sign = ((uint32)h << 16) & 0x80000000;
+	uint32 exmant = (uint32)h & 0x7fff;
+	uint32 v = 0;
+
+	if (exmant >= 0x7c00) {
+		// infinity or NaN
+		v = (exmant << 13) + 0x70000000;
+	} else if (exmant >= 0x0400) {
+		// normalized finite
+		v = (exmant << 13) + 0x38000000;
+	} else if (exmant) {
+		// denormal
+		uint32 ex32 = 0x38000000;
+		uint32 mant32 = (exmant & 0x3ff) << 13;
+
+		while(!(mant32 & 0x800000)) {
+			mant32 <<= 1;
+			ex32 -= 0x800000;
+		}
+
+		v = ex32 + mant32;
+	}
+
+	*(uint32 *)dst = v + sign;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/hash.cpp b/src/thirdparty/VirtualDub/system/source/hash.cpp
new file mode 100644
index 000000000..8962a511d
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/hash.cpp
@@ -0,0 +1,98 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vd2/system/hash.h>
+#include <vd2/system/binary.h>
+
+// Based on: SuperFastHash by Paul Hsieh
+//		     http://www.azillionmonkeys.com/qed/hash.html
+
+uint32 VDHashString32(const char *s) {
+	uint32 len = (uint32)strlen(s);
+
+	return VDHashString32(s, len);
+}
+
+uint32 VDHashString32(const char *s, uint32 len) {
+	uint32 hash = len;
+
+	uint32 rem = len & 3;
+	len >>= 2;
+
+	uint32 tmp;
+	for(uint32 i=0; i<len; ++i) {
+		hash += VDReadUnalignedU16(s);
+		tmp = (VDReadUnalignedU16(s + 2) << 11) ^ hash;
+		hash = (hash << 16) ^ tmp;
+		s += 4;
+		hash += hash >> 11;
+	}
+
+	switch(rem) {
+		case 3:
+			hash += VDReadUnalignedU16(s);
+			hash ^= hash << 16;
+			hash ^= (uint32)(uint8)s[2] << 18;
+			hash += hash >> 11;
+			break;
+		case 2:
+			hash += VDReadUnalignedU16(s);
+			hash ^= hash << 11;
+			hash += hash >> 17;
+			break;
+		case 1:
+			hash += (uint8)s[0];
+			hash ^= hash << 10;
+			hash += hash >> 1;
+			break;
+	}
+
+	hash ^= hash << 3;
+	hash += hash >> 5;
+	hash ^= hash << 4;
+	hash += hash >> 17;
+	hash ^= hash << 25;
+	hash += hash >> 6;
+
+	return hash;
+}
+
+uint32 VDHashString32I(const wchar_t *s) {
+	uint32 len = (uint32)wcslen(s);
+
+	return VDHashString32I(s, len);
+}
+
+uint32 VDHashString32I(const wchar_t *s, uint32 len) {
+	uint32 hash = 2166136261;
+
+	for(uint32 i=0; i<len; ++i) {
+		hash *= 16777619;
+		hash ^= (uint32)towlower(*s++);
+	}
+
+	return hash;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/int128.cpp b/src/thirdparty/VirtualDub/system/source/int128.cpp
new file mode 100644
index 000000000..fbc8ece86
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/int128.cpp
@@ -0,0 +1,478 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <math.h>
+
+#include <vd2/system/int128.h>
+
+#ifndef _M_AMD64
+	void __declspec(naked) __cdecl vdasm_uint128_add(uint64 dst[2], const uint64 x[2], const uint64 y[2]) {
+		__asm {
+			push	ebx
+
+			mov		ebx, [esp+16]
+			mov		ecx, [esp+12]
+			mov		edx, [esp+8]
+
+			mov		eax, [ecx+0]
+			add		eax, [ebx+0]
+			mov		[edx+0],eax
+			mov		eax, [ecx+4]
+			adc		eax, [ebx+4]
+			mov		[edx+4],eax
+			mov		eax, [ecx+8]
+			adc		eax, [ebx+8]
+			mov		[edx+8],eax
+			mov		eax, [ecx+12]
+			adc		eax, [ebx+12]
+			mov		[edx+12],eax
+
+			pop		ebx
+			ret
+		}
+	}
+
+	void __declspec(naked) __cdecl vdasm_uint128_sub(uint64 dst[2], const uint64 x[2], const uint64 y[2]) {
+		__asm {
+			push	ebx
+
+			mov		ebx, [esp+16]
+			mov		ecx, [esp+12]
+			mov		edx, [esp+8]
+
+			mov		eax, [ecx+0]
+			sub		eax, [ebx+0]
+			mov		[edx+0],eax
+			mov		eax, [ecx+4]
+			sbb		eax, [ebx+4]
+			mov		[edx+4],eax
+			mov		eax, [ecx+8]
+			sbb		eax, [ebx+8]
+			mov		[edx+8],eax
+			mov		eax, [ecx+12]
+			sbb		eax, [ebx+12]
+			mov		[edx+12],eax
+
+			pop		ebx
+			ret
+		}
+	}
+
+	void __declspec(naked) vdint128::setSquare(sint64 v) {
+		__asm {
+			push	edi
+			push	esi
+			push	ebx
+			mov		eax, [esp+20]
+			cdq
+			mov		esi, eax
+			mov		eax, [esp+16]
+			xor		eax, edx
+			xor		esi, edx
+			sub		eax, edx
+			sbb		esi, edx
+			mov		ebx, eax
+			mul		eax
+			mov		[ecx], eax
+			mov		edi, edx
+			mov		eax, ebx
+			mul		esi
+			mov		ebx, 0
+			add		eax, eax
+			adc		edx, edx
+			add		eax, edi
+			adc		edx, 0
+			mov		edi, edx
+			adc		ebx, 0
+			mov		[ecx+4], eax
+			mov		eax, esi
+			mul		esi
+			add		eax, edi
+			adc		edx, ebx
+			mov		[ecx+8], eax
+			mov		[ecx+12], edx
+			pop		ebx
+			pop		esi
+			pop		edi
+			ret		8
+		}
+	}
+
+	const vdint128 __declspec(naked) vdint128::operator<<(int v) const {
+		__asm {
+			push	ebp
+			push	ebx
+			push	esi
+			push	edi
+
+			mov		esi,ecx
+			mov		edx,[esp+20]
+
+			mov		ecx,[esp+24]
+			cmp		ecx,128
+			jae		zeroit
+
+			mov		eax,[esi+12]
+			mov		ebx,[esi+8]
+			mov		edi,[esi+4]
+			mov		ebp,[esi]
+
+	dwordloop:
+			cmp		ecx,32
+			jb		bits
+
+			mov		eax,ebx
+			mov		ebx,edi
+			mov		edi,ebp
+			xor		ebp,ebp
+			sub		ecx,32
+			jmp		short dwordloop
+
+	bits:
+			shld	eax,ebx,cl
+			shld	ebx,edi,cl
+			mov		[edx+12],eax
+			mov		[edx+8],ebx
+			shld	edi,ebp,cl
+
+			shl		ebp,cl
+			mov		[edx+4],edi
+			mov		[edx],ebp
+
+			pop		edi
+			pop		esi
+			pop		ebx
+			pop		ebp
+			mov		eax,[esp+4]
+			ret		8
+
+	zeroit:
+			xor		eax,eax
+			mov		[edx+0],eax
+			mov		[edx+4],eax
+			mov		[edx+8],eax
+			mov		[edx+12],eax
+
+			pop		edi
+			pop		esi
+			pop		ebx
+			pop		ebp
+			mov		eax,[esp+4]
+			ret		8
+		}
+	}
+
+	const vdint128 __declspec(naked) vdint128::operator>>(int v) const {
+		__asm {
+			push	ebp
+			push	ebx
+			push	esi
+			push	edi
+
+			mov		esi,ecx
+			mov		edx,[esp+20]
+
+			mov		eax,[esi+12]
+			mov		ecx,[esp+24]
+			cmp		ecx,127
+			jae		clearit
+
+			mov		ebx,[esi+8]
+			mov		edi,[esi+4]
+			mov		ebp,[esi]
+
+	dwordloop:
+			cmp		ecx,32
+			jb		bits
+
+			mov		ebp,edi
+			mov		edi,ebx
+			mov		ebx,eax
+			sar		eax,31
+			sub		ecx,32
+			jmp		short dwordloop
+
+	bits:
+			shrd	ebp,edi,cl
+			shrd	edi,ebx,cl
+			mov		[edx],ebp
+			mov		[edx+4],edi
+			shrd	ebx,eax,cl
+
+			sar		eax,cl
+			mov		[edx+8],ebx
+			mov		[edx+12],eax
+
+			pop		edi
+			pop		esi
+			pop		ebx
+			pop		ebp
+			mov		eax,[esp+4]
+			ret		8
+
+	clearit:
+			sar		eax, 31
+			mov		[edx+0],eax
+			mov		[edx+4],eax
+			mov		[edx+8],eax
+			mov		[edx+12],eax
+
+			pop		edi
+			pop		esi
+			pop		ebx
+			pop		ebp
+			mov		eax,[esp+4]
+			ret		8
+		}
+	}
+
+	const vduint128 __declspec(naked) vduint128::operator<<(int v) const {
+		__asm {
+			push	ebp
+			push	ebx
+			push	esi
+			push	edi
+
+			mov		esi,ecx
+			mov		edx,[esp+20]
+
+			mov		ecx,[esp+24]
+			cmp		ecx,128
+			jae		zeroit
+
+			mov		eax,[esi+12]
+			mov		ebx,[esi+8]
+			mov		edi,[esi+4]
+			mov		ebp,[esi]
+
+	dwordloop:
+			cmp		ecx,32
+			jb		bits
+
+			mov		eax,ebx
+			mov		ebx,edi
+			mov		edi,ebp
+			xor		ebp,ebp
+			sub		ecx,32
+			jmp		short dwordloop
+
+	bits:
+			shld	eax,ebx,cl
+			shld	ebx,edi,cl
+			mov		[edx+12],eax
+			mov		[edx+8],ebx
+			shld	edi,ebp,cl
+
+			shl		ebp,cl
+			mov		[edx+4],edi
+			mov		[edx],ebp
+
+			pop		edi
+			pop		esi
+			pop		ebx
+			pop		ebp
+			mov		eax,[esp+4]
+			ret		8
+
+	zeroit:
+			xor		eax,eax
+			mov		[edx+0],eax
+			mov		[edx+4],eax
+			mov		[edx+8],eax
+			mov		[edx+12],eax
+
+			pop		edi
+			pop		esi
+			pop		ebx
+			pop		ebp
+			mov		eax,[esp+4]
+			ret		8
+		}
+	}
+
+	const vduint128 __declspec(naked) vduint128::operator>>(int v) const {
+		__asm {
+			push	ebp
+			push	ebx
+			push	esi
+			push	edi
+
+			mov		esi,ecx
+			mov		edx,[esp+20]
+
+			mov		eax,[esi+12]
+			mov		ecx,[esp+24]
+			cmp		ecx,127
+			jae		clearit
+
+			mov		ebx,[esi+8]
+			mov		edi,[esi+4]
+			mov		ebp,[esi]
+
+	dwordloop:
+			cmp		ecx,32
+			jb		bits
+
+			mov		ebp,edi
+			mov		edi,ebx
+			mov		ebx,eax
+			xor		eax,eax
+			sub		ecx,32
+			jmp		short dwordloop
+
+	bits:
+			shrd	ebp,edi,cl
+			shrd	edi,ebx,cl
+			mov		[edx],ebp
+			mov		[edx+4],edi
+			shrd	ebx,eax,cl
+
+			shr		eax,cl
+			mov		[edx+8],ebx
+			mov		[edx+12],eax
+
+			pop		edi
+			pop		esi
+			pop		ebx
+			pop		ebp
+			mov		eax,[esp+4]
+			ret		8
+
+	clearit:
+			sar		eax, 31
+			mov		[edx+0],eax
+			mov		[edx+4],eax
+			mov		[edx+8],eax
+			mov		[edx+12],eax
+
+			pop		edi
+			pop		esi
+			pop		ebx
+			pop		ebp
+			mov		eax,[esp+4]
+			ret		8
+		}
+	}
+#endif
+
+const vdint128 vdint128::operator*(const vdint128& x) const {
+	vdint128 X = x.abs();
+	vdint128 Y = abs();
+
+	vduint128 bd(VDUMul64x64To128(X.q[0], Y.q[0]));
+
+	bd.q[1] += X.q[0]*Y.q[1] + X.q[1]*Y.q[0];
+
+	return (q[1]^x.q[1])<0 ? -(const vdint128&)bd : (const vdint128&)bd;
+}
+
+const vdint128 vdint128::operator/(int x) const {
+	vdint128 r;
+	sint64 accum;
+
+	r.d[3] = d[3] / x;
+	
+	accum = ((sint64)(d[3] % x) << 32) + d[2];
+	r.d[2] = (sint32)(accum / x);
+
+	accum = ((accum % x) << 32) + d[1];
+	r.d[1] = (sint32)(accum / x);
+
+	accum = ((accum % x) << 32) + d[0];
+	r.d[0] = (sint32)(accum / x);
+
+	return r;
+}
+
+vdint128::operator double() const {
+	return (double)(unsigned long)q[0]
+		+ ldexp((double)(unsigned long)((unsigned __int64)q[0]>>32), 32)
+		+ ldexp((double)q[1], 64);
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
+const vduint128 vduint128::operator*(const vduint128& x) const {
+	vduint128 result(VDUMul64x64To128(q[0], x.q[0]));
+
+	result.q[1] += q[0]*x.q[1] + q[1]*x.q[0];
+
+	return result;
+}
+
+#ifdef _M_IX86
+	vduint128 __declspec(naked) __cdecl VDUMul64x64To128(uint64 x, uint64 y) {
+		__asm {
+			mov		ecx,[esp+4]
+
+			mov		eax,[esp+8]
+			mul		dword ptr [esp+16]		;EDX:EAX = BD
+			mov		[ecx+0],eax
+			mov		[ecx+4],edx
+
+			mov		eax,[esp+12]
+			mul		dword ptr [esp+20]		;EDX:EAX = AC
+			mov		[ecx+8],eax
+			mov		[ecx+12],edx
+
+			mov		eax,[esp+8]
+			mul		dword ptr [esp+20]		;EDX:EAX = BC
+			add		[ecx+4],eax
+			adc		[ecx+8],edx
+			adc		dword ptr [ecx+12], 0
+
+			mov		eax,[esp+12]
+			mul		dword ptr [esp+16]		;EDX:EAX = AD
+			add		[ecx+4],eax
+			adc		[ecx+8],edx
+			adc		dword ptr [ecx+12], 0
+
+			mov		eax, ecx
+			ret
+		}
+	}
+#endif
+
+uint64 VDUDiv128x64To64(const vduint128& dividend, uint64 divisor, uint64& remainder) {
+	vduint128 temp(dividend);
+	vduint128 divisor2(divisor);
+
+	divisor2 <<= 63;
+
+	uint64 result = 0;
+	for(int i=0; i<64; ++i) {
+		result += result;
+		if (temp >= divisor2) {
+			temp -= divisor2;
+			++result;
+		}
+		temp += temp;
+	}
+
+	remainder = temp.q[1];
+
+	return result;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/list.cpp b/src/thirdparty/VirtualDub/system/source/list.cpp
new file mode 100644
index 000000000..bf443b6a6
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/list.cpp
@@ -0,0 +1,97 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	For those of you who say this looks familiar... it should.  This is
+//	the same linked-list style that the Amiga Exec uses, with dummy head
+//	and tail nodes.  It's really a very convienent way to implement
+//	doubly-linked lists.
+//
+
+#include "stdafx.h"
+#include <algorithm>
+#include <vd2/system/list.h>
+
+List::List() {
+	Init();
+}
+
+void List::Init() {
+	head.next = tail.prev = 0;
+	head.prev = &tail;
+	tail.next = &head;
+}
+
+ListNode *List::RemoveHead() {
+	if (head.prev->prev) {
+		ListNode *t = head.prev;
+
+		head.prev->Remove();
+		return t;
+	}
+
+	return 0;
+}
+
+ListNode *List::RemoveTail() {
+	if (tail.next->next) {
+		ListNode *t = tail.next;
+
+		tail.next->Remove();
+		return t;
+	}
+
+	return 0;
+}
+
+void List::Take(List &from) {
+	if (from.IsEmpty())
+		return;
+
+	head.prev = from.head.prev;
+	tail.next = from.tail.next;
+	head.prev->next = &head;
+	tail.next->prev = &tail;
+
+	from.Init();
+}
+
+void List::Swap(List &dst) {
+	if (IsEmpty())
+		Take(dst);
+	else if (dst.IsEmpty())
+		dst.Take(*this);
+	else {
+		std::swap(head.prev, dst.head.prev);
+		std::swap(tail.next, dst.tail.next);
+
+		head.prev->next = &head;
+		tail.next->prev = &tail;
+
+		dst.head.prev->next = &dst.head;
+		dst.tail.next->prev = &dst.tail;
+	}
+}
diff --git a/src/thirdparty/VirtualDub/system/source/log.cpp b/src/thirdparty/VirtualDub/system/source/log.cpp
new file mode 100644
index 000000000..fce3df920
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/log.cpp
@@ -0,0 +1,171 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vd2/system/vdtypes.h>
+#include <list>
+#include <utility>
+#include <vd2/system/log.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/VDString.h>
+
+namespace {
+	wchar_t		g_log[16384];			// 32K log
+	int			g_logHead, g_logTail;
+	VDCriticalSection	g_csLog;
+
+	typedef std::list<std::pair<IVDLogger *, VDThreadID> > tVDLoggers;
+	tVDLoggers g_loggers;
+}
+
+void VDLog(int severity, const VDStringW& s) {
+	int strSize = s.size() + 1;
+
+	if (strSize >= 16384) {
+		VDASSERT(false);
+		return;
+	}
+
+	vdsynchronized(g_csLog) {
+		for(;;) {
+			int currentSize = (g_logTail - g_logHead) & 16383;
+
+			if (currentSize + strSize < 16384)	// NOTE: This means that the last byte in the ring buffer can never be used.
+				break;
+
+			while(g_log[g_logHead++ & 16383])
+				;
+
+			g_logHead &= 16383;
+		}
+
+		const wchar_t *ps = s.data();
+
+		g_log[g_logTail++] = severity;
+
+		for(int i=1; i<strSize; ++i)
+			g_log[g_logTail++ & 16383] = *ps++;
+
+		g_log[g_logTail++ & 16383] = 0;
+
+		g_logTail &= 16383;
+
+		VDThreadID currentThread = VDGetCurrentThreadID();
+		for(tVDLoggers::const_iterator it(g_loggers.begin()), itEnd(g_loggers.end()); it!=itEnd; ++it) {
+			if (!(*it).second || currentThread == (*it).second)
+				(*it).first->AddLogEntry(severity, s);
+		}
+	}
+}
+
+void VDLogF(int severity, const wchar_t *format, ...) {
+	va_list val;
+	va_start(val, format);
+	VDStringW s;
+	s.append_vsprintf(format, val);
+	va_end(val);
+
+	VDLog(severity, s);
+}
+
+void VDAttachLogger(IVDLogger *pLogger, bool bThisThreadOnly, bool bReplayLog) {
+	vdsynchronized(g_csLog) {
+		g_loggers.push_back(tVDLoggers::value_type(pLogger, bThisThreadOnly ? VDGetCurrentThreadID() : 0));
+
+		if (bReplayLog) {
+			int idx = g_logHead;
+
+			while(idx != g_logTail) {
+				int severity = g_log[idx++];
+				int headidx = idx;
+
+				idx &= 16383;
+
+				for(;;) {
+					wchar_t c = g_log[idx];
+
+					idx = (idx+1) & 16383;
+
+					if (!c)
+						break;
+				}
+
+				if (idx > headidx) {
+					pLogger->AddLogEntry(severity, VDStringW(g_log + headidx, idx-headidx-1));
+				} else {
+					VDStringW t(idx+16383-headidx);
+
+					std::copy(g_log + headidx, g_log + 16384, const_cast<wchar_t *>(t.data()));
+					std::copy(g_log, g_log + idx - 1, const_cast<wchar_t *>(t.data() + (16384 - headidx)));
+					pLogger->AddLogEntry(severity, t);
+				}
+			}
+		}
+	}
+}
+
+void VDDetachLogger(IVDLogger *pLogger) {
+	vdsynchronized(g_csLog) {
+		for(tVDLoggers::iterator it(g_loggers.begin()), itEnd(g_loggers.end()); it!=itEnd; ++it) {
+			if (pLogger == (*it).first) {
+				g_loggers.erase(it);
+				break;
+			}
+		}
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	autologger
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDAutoLogger::VDAutoLogger(int min_severity)
+	: mbAttached(true)
+	, mMinSeverity(min_severity)
+{
+	VDAttachLogger(this, false, false);
+}
+
+VDAutoLogger::~VDAutoLogger() {
+	if (mbAttached)
+		VDDetachLogger(this);
+}
+
+void VDAutoLogger::AddLogEntry(int severity, const VDStringW& s) {
+	if (severity >= mMinSeverity)
+		mEntries.push_back(Entry(severity, s));
+}
+
+const VDAutoLogger::tEntries& VDAutoLogger::GetEntries() {
+	if (mbAttached) {
+		VDDetachLogger(this);
+		mbAttached = false;
+	}
+
+	return mEntries;
+}
+
diff --git a/src/thirdparty/VirtualDub/system/source/math.cpp b/src/thirdparty/VirtualDub/system/source/math.cpp
new file mode 100644
index 000000000..5368b13dc
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/math.cpp
@@ -0,0 +1,146 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <math.h>
+#include <vd2/system/math.h>
+#include <vd2/system/int128.h>
+
+int VDRoundToInt(double x) {
+	return (int)floor(x + 0.5);
+}
+
+long VDRoundToLong(double x) {
+	return (long)floor(x + 0.5);
+}
+
+sint32 VDRoundToInt32(double x) {
+	return (sint32)floor(x + 0.5);
+}
+
+sint64 VDRoundToInt64(double x) {
+	return (sint64)floor(x + 0.5);
+}
+
+#ifdef _M_IX86
+	sint64 __declspec(naked) __stdcall VDFractionScale64(uint64 a, uint32 b, uint32 c, uint32& remainder) {
+		__asm {
+			push	edi
+			push	ebx
+			mov		edi, [esp+12+8]			;edi = b
+			mov		eax, [esp+4+8]			;eax = a[lo]
+			mul		edi						;edx:eax = a[lo]*b
+			mov		ecx, eax				;ecx = (a*b)[lo]
+			mov		eax, [esp+8+8]			;eax = a[hi]
+			mov		ebx, edx				;ebx = (a*b)[mid]
+			mul		edi						;edx:eax = a[hi]*b
+			add		eax, ebx
+			mov		ebx, [esp+16+8]			;ebx = c
+			adc		edx, 0
+			div		ebx						;eax = (a*b)/c [hi], edx = (a[hi]*b)%c
+			mov		edi, eax				;edi = (a[hi]*b)/c
+			mov		eax, ecx				;eax = (a*b)[lo]
+			mov		ecx, [esp+20+8]
+			div		ebx						;eax = (a*b)/c [lo], edx = (a*b)%c
+			mov		[ecx], edx
+			mov		edx, edi
+			pop		ebx
+			pop		edi
+			ret		20
+		}
+	}
+
+	uint64 __declspec(naked) __stdcall VDUMulDiv64x32(uint64 a, uint32 b, uint32 c) {
+		__asm {
+			mov		eax, [esp+4]			;eax = a0
+			mul		dword ptr [esp+12]		;edx:eax = a0*b
+			mov		dword ptr [esp+4], eax	;tmp = a0*b[0:31]
+			mov		ecx, edx				;ecx = a0*b[32:63]
+			mov		eax, [esp+8]			;eax = a1
+			mul		dword ptr [esp+12]		;edx:eax = a1*b
+			add		eax, ecx				;edx:eax += a0*b[32:95]
+			adc		edx, 0					;(cont.)
+			cmp		edx, [esp+16]			;test if a*b[64:95] >= c; equiv to a*b >= (c<<64)
+			jae		invalid					;abort if so (overflow)
+			div		dword ptr [esp+16]		;edx,eax = ((a*b)[32:95]/c, (a*b)[32:95]%c)
+			mov		ecx, eax
+			mov		eax, [esp+4]
+			div		dword ptr [esp+16]
+			mov		edx, ecx
+			ret		16
+invalid:
+			mov		eax, -1					;return FFFFFFFF'FFFFFFFF
+			mov		edx, -1
+			ret		16
+		}
+	}
+#endif
+
+sint64 VDMulDiv64(sint64 a, sint64 b, sint64 c) {
+	bool flip = false;
+
+	if (a < 0) {
+		a = -a;
+		flip = true;
+	}
+
+	if (b < 0) {
+		b = -b;
+		flip = !flip;
+	}
+
+	if (c < 0) {
+		c = -c;
+		flip = !flip;
+	}
+
+	uint64 rem;
+	uint64 v = VDUDiv128x64To64(VDUMul64x64To128((uint64)a, (uint64)b), (uint64)c, rem);
+
+	if ((rem+rem) >= (uint64)c)
+		++v;
+
+	return flip ? -(sint64)v : (sint64)v;
+}
+
+bool VDVerifyFiniteFloats(const float *p0, uint32 n) {
+	const uint32 *p = (const uint32 *)p0;
+
+	while(n--) {
+		uint32 v = *p++;
+
+		// 00000000				zero
+		// 00000001-007FFFFF	denormal
+		// 00800000-7F7FFFFF	finite
+		// 7F800000				infinity
+		// 7F800001-7FBFFFFF	SNaN
+		// 7FC00000-7FFFFFFF	QNaN
+
+		if ((v & 0x7FFFFFFF) >= 0x7F800000)
+			return false;
+	}
+
+	return true;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/memory.cpp b/src/thirdparty/VirtualDub/system/source/memory.cpp
new file mode 100644
index 000000000..3e03b5d34
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/memory.cpp
@@ -0,0 +1,456 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <malloc.h>
+#include <windows.h>
+#include <vd2/system/atomic.h>
+#include <vd2/system/memory.h>
+#include <vd2/system/cpuaccel.h>
+
+void *VDAlignedMalloc(size_t n, unsigned alignment) {
+	return _aligned_malloc(n, alignment);
+}
+
+void VDAlignedFree(void *p) {
+	_aligned_free(p);
+}
+
+void *VDAlignedVirtualAlloc(size_t n) {
+	return VirtualAlloc(NULL, n, MEM_COMMIT, PAGE_READWRITE);
+}
+
+void VDAlignedVirtualFree(void *p) {
+	VirtualFree(p, 0, MEM_RELEASE);
+}
+
+void VDSwapMemoryScalar(void *p0, void *p1, size_t bytes) {
+	uint32 *dst0 = (uint32 *)p0;
+	uint32 *dst1 = (uint32 *)p1;
+
+	while(bytes >= 4) {
+		uint32 a = *dst0;
+		uint32 b = *dst1;
+
+		*dst0++ = b;
+		*dst1++ = a;
+
+		bytes -= 4;
+	}
+
+	char *dstb0 = (char *)dst0;
+	char *dstb1 = (char *)dst1;
+
+	while(bytes--) {
+		char a = *dstb0;
+		char b = *dstb1;
+
+		*dstb0++ = b;
+		*dstb1++ = a;
+	}
+}
+
+#if defined(VD_CPU_AMD64) || defined(VD_CPU_X86)
+	void VDSwapMemorySSE(void *p0, void *p1, size_t bytes) {
+		if (((uint32)(size_t)p0 | (uint32)(size_t)p1) & 15)
+			return VDSwapMemoryScalar(p0, p1, bytes);
+
+		__m128 *pv0 = (__m128 *)p0;
+		__m128 *pv1 = (__m128 *)p1;
+
+		size_t veccount = bytes >> 4;
+		if (veccount) {
+			do {
+				__m128 v0 = *pv0;
+				__m128 v1 = *pv1;
+
+				*pv0++ = v1;
+				*pv1++ = v0;
+			} while(--veccount);
+		}
+
+		uint32 left = bytes & 15;
+		if (left) {
+			uint8 *pb0 = (uint8 *)pv0;
+			uint8 *pb1 = (uint8 *)pv1;
+			do {
+				uint8 b0 = *pb0;
+				uint8 b1 = *pb1;
+
+				*pb0++ = b1;
+				*pb1++ = b0;
+			} while(--left);
+		}
+	}
+#endif
+
+void (__cdecl *VDSwapMemory)(void *p0, void *p1, size_t bytes) = VDSwapMemoryScalar;
+
+void VDInvertMemory(void *p, unsigned bytes) {
+	char *dst = (char *)p;
+
+	if (!bytes)
+		return;
+
+	while((int)dst & 3) {
+		*dst = ~*dst;
+		++dst;
+
+		if (!--bytes)
+			return;
+	}
+
+	unsigned lcount = bytes >> 2;
+
+	if (lcount)
+		do {
+			*(long *)dst = ~*(long *)dst;
+			dst += 4;
+		} while(--lcount);
+
+	bytes &= 3;
+
+	while(bytes--) {
+		*dst = ~*dst;
+		++dst;
+	}
+}
+
+namespace {
+	uintptr VDGetSystemPageSizeW32() {
+		SYSTEM_INFO sysInfo;
+		GetSystemInfo(&sysInfo);
+
+		return sysInfo.dwPageSize;
+	}
+
+	uintptr VDGetSystemPageSize() {
+		static uintptr pageSize = VDGetSystemPageSizeW32();
+
+		return pageSize;
+	}
+}
+
+bool VDIsValidReadRegion(const void *p0, size_t bytes) {
+	if (!bytes)
+		return true;
+
+	if (!p0)
+		return false;
+
+	uintptr pageSize = VDGetSystemPageSize();
+	uintptr p = (uintptr)p0;
+	uintptr pLimit = p + (bytes-1);
+
+	__try {
+		for(;;) {
+			*(volatile char *)p;
+
+			if (pLimit - p < pageSize)
+				break;
+
+			p += pageSize;
+		}
+	} __except(1) {
+		return false;
+	}
+
+	return true;
+}
+
+bool VDIsValidWriteRegion(void *p0, size_t bytes) {
+	if (!bytes)
+		return true;
+
+	if (!p0)
+		return false;
+
+	// Note: Unlike IsValidWritePtr(), this is threadsafe.
+
+	uintptr pageSize = VDGetSystemPageSize();
+	uintptr p = (uintptr)p0;
+	uintptr pLimit = p + (bytes-1);
+	p &= ~(uintptr)3;
+
+	__try {
+		for(;;) {
+			VDAtomicInt::staticCompareExchange((volatile int *)p, 0xa5, 0xa5);
+
+			if (pLimit - p < pageSize)
+				break;
+
+			p += pageSize;
+		}
+	} __except(1) {
+		return false;
+	}
+
+	return true;
+}
+
+bool VDCompareRect(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, size_t w, size_t h) {
+	if (!w || !h)
+		return false;
+
+	do {
+		if (memcmp(dst, src, w))
+			return true;
+
+		dst = (char *)dst + dstpitch;
+		src = (const char *)src + srcpitch;
+	} while(--h);
+
+	return false;
+}
+
+const void *VDMemCheck8(const void *src, uint8 value, size_t count) {
+	if (count) {
+		const uint8 *src8 = (const uint8 *)src;
+
+		do {
+			if (*src8 != value)
+				return src8;
+
+			++src8;
+		} while(--count);
+	}
+
+	return NULL;
+}
+
+void VDMemset8(void *dst, uint8 value, size_t count) {
+	if (count) {
+		uint8 *dst2 = (uint8 *)dst;
+
+		do {
+			*dst2++ = value;
+		} while(--count);
+	}
+}
+
+void VDMemset16(void *dst, uint16 value, size_t count) {
+	if (count) {
+		uint16 *dst2 = (uint16 *)dst;
+
+		do {
+			*dst2++ = value;
+		} while(--count);
+	}
+}
+
+void VDMemset24(void *dst, uint32 value, size_t count) {
+	if (count) {
+		uint8 *dst2 = (uint8 *)dst;
+		uint8 c0 = (uint8)value;
+		uint8 c1 = (uint8)(value >> 8);
+		uint8 c2 = (uint8)(value >> 16);
+
+		do {
+			*dst2++ = c0;
+			*dst2++ = c1;
+			*dst2++ = c2;
+		} while(--count);
+	}
+}
+
+void VDMemset32(void *dst, uint32 value, size_t count) {
+	if (count) {
+		uint32 *dst2 = (uint32 *)dst;
+
+		do {
+			*dst2++ = value;
+		} while(--count);
+	}
+}
+
+void VDMemset64(void *dst, uint64 value, size_t count) {
+	if (count) {
+		uint64 *dst2 = (uint64 *)dst;
+
+		do {
+			*dst2++ = value;
+		} while(--count);
+	}
+}
+
+void VDMemset128(void *dst, const void *src0, size_t count) {
+	if (count) {
+		const uint32 *src = (const uint32 *)src0;
+		uint32 a0 = src[0];
+		uint32 a1 = src[1];
+		uint32 a2 = src[2];
+		uint32 a3 = src[3];
+
+		uint32 *dst2 = (uint32 *)dst;
+
+		do {
+			dst2[0] = a0;
+			dst2[1] = a1;
+			dst2[2] = a2;
+			dst2[3] = a3;
+			dst2 += 4;
+		} while(--count);
+	}
+}
+
+void VDMemsetPointer(void *dst, const void *value, size_t count) {
+#if defined(_M_IX86)
+	VDMemset32(dst, (uint32)(size_t)value, count);
+#elif defined(_M_AMD64)
+	VDMemset64(dst, (uint64)(size_t)value, count);
+#else
+	#error Unknown pointer size
+#endif
+}
+
+void VDMemset8Rect(void *dst, ptrdiff_t pitch, uint8 value, size_t w, size_t h) {
+	if (w>0 && h>0) {
+		do {
+			memset(dst, value, w);
+			dst = (char *)dst + pitch;
+		} while(--h);
+	}
+}
+
+void VDMemset16Rect(void *dst, ptrdiff_t pitch, uint16 value, size_t w, size_t h) {
+	if (w>0 && h>0) {
+		do {
+			VDMemset16(dst, value, w);
+			dst = (char *)dst + pitch;
+		} while(--h);
+	}
+}
+
+void VDMemset24Rect(void *dst, ptrdiff_t pitch, uint32 value, size_t w, size_t h) {
+	if (w>0 && h>0) {
+		do {
+			VDMemset24(dst, value, w);
+			dst = (char *)dst + pitch;
+		} while(--h);
+	}
+}
+
+void VDMemset32Rect(void *dst, ptrdiff_t pitch, uint32 value, size_t w, size_t h) {
+	if (w>0 && h>0) {
+		do {
+			VDMemset32(dst, value, w);
+			dst = (char *)dst + pitch;
+		} while(--h);
+	}
+}
+
+#if defined(_WIN32) && defined(_M_IX86)
+	extern "C" void __cdecl VDFastMemcpyPartialScalarAligned8(void *dst, const void *src, size_t bytes);
+	extern "C" void __cdecl VDFastMemcpyPartialMMX(void *dst, const void *src, size_t bytes);
+	extern "C" void __cdecl VDFastMemcpyPartialMMX2(void *dst, const void *src, size_t bytes);
+
+	void VDFastMemcpyPartialScalar(void *dst, const void *src, size_t bytes) {
+		if (!(((int)dst | (int)src | bytes) & 7))
+			VDFastMemcpyPartialScalarAligned8(dst, src, bytes);
+		else
+			memcpy(dst, src, bytes);
+	}
+
+	void VDFastMemcpyFinishScalar() {
+	}
+
+	void __cdecl VDFastMemcpyFinishMMX() {
+		__asm emms
+	}
+
+	void __cdecl VDFastMemcpyFinishMMX2() {
+		__asm emms
+		__asm sfence
+	}
+
+	void (__cdecl *VDFastMemcpyPartial)(void *dst, const void *src, size_t bytes) = VDFastMemcpyPartialScalar;
+	void (__cdecl *VDFastMemcpyFinish)() = VDFastMemcpyFinishScalar;
+
+	void VDFastMemcpyAutodetect() {
+		long exts = CPUGetEnabledExtensions();
+
+		if (exts & CPUF_SUPPORTS_SSE) {
+			VDFastMemcpyPartial = VDFastMemcpyPartialMMX2;
+			VDFastMemcpyFinish	= VDFastMemcpyFinishMMX2;
+			VDSwapMemory		= VDSwapMemorySSE;
+		} else if (exts & CPUF_SUPPORTS_INTEGER_SSE) {
+			VDFastMemcpyPartial = VDFastMemcpyPartialMMX2;
+			VDFastMemcpyFinish	= VDFastMemcpyFinishMMX2;
+			VDSwapMemory		= VDSwapMemoryScalar;
+		} else if (exts & CPUF_SUPPORTS_MMX) {
+			VDFastMemcpyPartial = VDFastMemcpyPartialMMX;
+			VDFastMemcpyFinish	= VDFastMemcpyFinishMMX;
+			VDSwapMemory		= VDSwapMemoryScalar;
+		} else {
+			VDFastMemcpyPartial = VDFastMemcpyPartialScalar;
+			VDFastMemcpyFinish	= VDFastMemcpyFinishScalar;
+			VDSwapMemory		= VDSwapMemoryScalar;
+		}
+	}
+
+#else
+	void VDFastMemcpyPartial(void *dst, const void *src, size_t bytes) {
+		memcpy(dst, src, bytes);
+	}
+
+	void VDFastMemcpyFinish() {
+	}
+
+	void VDFastMemcpyAutodetect() {
+	}
+#endif
+
+void VDMemcpyRect(void *dst, ptrdiff_t dststride, const void *src, ptrdiff_t srcstride, size_t w, size_t h) {
+	if (w <= 0 || h <= 0)
+		return;
+
+	if (w == srcstride && w == dststride)
+		VDFastMemcpyPartial(dst, src, w*h);
+	// MPC custom code (begin)
+	else if (w == -srcstride && w == -dststride)
+		VDFastMemcpyPartial((char *)dst + dststride * (h - 1), (char *)src + srcstride * (h - 1), w*h);
+	// MPC custom code (end)
+	else {
+		char *dst2 = (char *)dst;
+		const char *src2 = (const char *)src;
+
+		do {
+			VDFastMemcpyPartial(dst2, src2, w);
+			dst2 += dststride;
+			src2 += srcstride;
+		} while(--h);
+	}
+	VDFastMemcpyFinish();
+}
+
+bool VDMemcpyGuarded(void *dst, const void *src, size_t bytes) {
+	__try {
+		memcpy(dst, src, bytes);
+	} __except(GetExceptionCode() == STATUS_ACCESS_VIOLATION ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
+		return false;
+	}
+
+	return true;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/profile.cpp b/src/thirdparty/VirtualDub/system/source/profile.cpp
new file mode 100644
index 000000000..3c91adb07
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/profile.cpp
@@ -0,0 +1,234 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <windows.h>
+#include <vd2/system/profile.h>
+
+///////////////////////////////////////////////////////////////////////////
+
+VDRTProfiler *g_pCentralProfiler;
+
+void VDInitProfilingSystem() {
+	if (!g_pCentralProfiler)
+		g_pCentralProfiler = new VDRTProfiler;
+}
+
+void VDDeinitProfilingSystem() {
+	delete g_pCentralProfiler;
+	g_pCentralProfiler = 0;
+}
+
+VDRTProfiler *VDGetRTProfiler() {
+	return g_pCentralProfiler;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDRTProfiler::VDRTProfiler()
+	: mbEnableCollection(false)
+{
+	LARGE_INTEGER freq;
+	QueryPerformanceFrequency(&freq);
+	mPerfFreq = freq.QuadPart;
+}
+
+VDRTProfiler::~VDRTProfiler() {
+}
+
+void VDRTProfiler::BeginCollection() {
+	mbEnableCollection = true;
+}
+
+void VDRTProfiler::EndCollection() {
+	mbEnableCollection = false;
+}
+
+void VDRTProfiler::Swap() {
+	vdsynchronized(mLock) {
+		LARGE_INTEGER tim;
+		QueryPerformanceCounter(&tim);
+
+		mSnapshotTime = tim.QuadPart;
+
+		// update channels
+		uint32 channelCount = mChannelArray.size();
+		mChannelArrayToPaint.resize(channelCount);
+
+		for(uint32 i=0; i<channelCount; ++i) {
+			Channel& src = mChannelArray[i];
+			Channel& dst = mChannelArrayToPaint[i];
+
+			dst.mpName = src.mpName;
+
+			dst.mEventList.clear();
+			dst.mEventList.swap(src.mEventList);
+			if (src.mbEventPending) {
+				src.mEventList.push_back(dst.mEventList.back());
+				src.mEventList.back().mEndTime = mSnapshotTime;
+			}
+		}
+
+		// update counters
+		Counters::iterator itC(mCounterArray.begin()), itCEnd(mCounterArray.end());
+		for(; itC != itCEnd; ++itC) {
+			Counter& ctr = *itC;
+
+			ctr.mDataLast = ctr.mData;
+
+			switch(ctr.mType) {
+				case kCounterTypeUint32:
+					ctr.mData.u32 = *(const uint32 *)ctr.mpData;
+					break;
+				case kCounterTypeDouble:
+					ctr.mData.d = *(const double *)ctr.mpData;
+					break;
+			}
+		}
+
+		mCounterArrayToPaint = mCounterArray;
+	}
+}
+
+int VDRTProfiler::AllocChannel(const char *name) {
+	uint32 i;
+
+	vdsynchronized(mLock) {
+		const uint32 nChannels = mChannelArray.size();
+
+		for(i=0; i<nChannels; ++i)
+			if (!mChannelArray[i].mpName)
+				break;
+
+		if (mChannelArray.size() <= i)
+			mChannelArray.resize(i + 1);
+
+		mChannelArray[i].mpName = name;
+		mChannelArray[i].mbEventPending = false;
+	}
+
+	return (int)i;
+}
+
+void VDRTProfiler::FreeChannel(int ch) {
+	vdsynchronized(mLock) {
+		mChannelArray[ch].mpName = 0;
+		mChannelArray[ch].mEventList.clear();
+	}
+}
+
+void VDRTProfiler::BeginEvent(int channel, uint32 color, const char *name) {
+	if (mbEnableCollection) {
+		LARGE_INTEGER tim;
+		QueryPerformanceCounter(&tim);
+		vdsynchronized(mLock) {
+			Channel& chan = mChannelArray[channel];
+
+			if (!chan.mbEventPending) {
+				chan.mbEventPending = true;
+				chan.mEventList.push_back(Event());
+				Event& ev = chan.mEventList.back();
+				ev.mpName = name;
+				ev.mColor = color;
+				ev.mStartTime = tim.QuadPart;
+				ev.mEndTime = tim.QuadPart;
+			}
+		}
+	}
+}
+
+void VDRTProfiler::EndEvent(int channel) {
+	if (mbEnableCollection) {
+		LARGE_INTEGER tim;
+
+		QueryPerformanceCounter(&tim);
+		vdsynchronized(mLock) {
+			Channel& chan = mChannelArray[channel];
+
+			if (chan.mbEventPending) {
+				chan.mEventList.back().mEndTime = tim.QuadPart;
+				chan.mbEventPending = false;
+			}
+		}
+	}
+}
+
+void VDRTProfiler::RegisterCounterU32(const char *name, const uint32 *val) {
+	RegisterCounter(name, val, kCounterTypeUint32);
+}
+
+void VDRTProfiler::RegisterCounterD(const char *name, const double *val) {
+	RegisterCounter(name, val, kCounterTypeDouble);
+}
+
+struct VDRTProfiler::CounterByNamePred {
+	bool operator()(const char *name1, const char *name2) const {
+		return strcmp(name1, name2) < 0;
+	}
+
+	bool operator()(const char *name1, const Counter& ctr) const {
+		return strcmp(name1, ctr.mpName) < 0;
+	}
+
+	bool operator()(const Counter& ctr, const char *name2) const {
+		return strcmp(ctr.mpName, name2) < 0;
+	}
+
+	bool operator()(const Counter& ctr1, const Counter& ctr2) const {
+		return strcmp(ctr1.mpName, ctr2.mpName) < 0;
+	}
+};
+
+void VDRTProfiler::RegisterCounter(const char *name, const void *val, CounterType type) {
+	VDASSERT(val);
+
+	vdsynchronized(mLock) {
+		Counters::iterator itBegin(mCounterArray.end());
+		Counters::iterator itEnd(mCounterArray.end());
+		Counters::iterator it(std::upper_bound(itBegin, itEnd, name, CounterByNamePred()));
+
+		it = mCounterArray.insert(it, Counter());
+		Counter& ctr = *it;
+
+		memset(&ctr.mData, 0, sizeof ctr.mData);
+		memset(&ctr.mDataLast, 0, sizeof ctr.mDataLast);
+		ctr.mpData = val;
+		ctr.mpName = name;
+		ctr.mType = type;
+	}
+}
+
+void VDRTProfiler::UnregisterCounter(void *p) {
+	vdsynchronized(mLock) {
+		Counters::iterator it(mCounterArray.begin()), itEnd(mCounterArray.end());
+		for(; it!=itEnd; ++it) {
+			const Counter& counter = *it;
+			if (counter.mpData == p) {
+				mCounterArray.erase(it);
+				return;
+			}
+		}
+	}
+}
diff --git a/src/thirdparty/VirtualDub/system/source/progress.cpp b/src/thirdparty/VirtualDub/system/source/progress.cpp
new file mode 100644
index 000000000..1ac26a0f6
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/progress.cpp
@@ -0,0 +1,35 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <stdio.h>
+#include <stdarg.h>
+
+#include <vd2/system/tls.h>
+#include <vd2/system/progress.h>
+#include <vd2/system/error.h>
+#include <vd2/system/atomic.h>
+#include <vd2/system/thread.h>
+
diff --git a/src/thirdparty/VirtualDub/system/source/protscope.cpp b/src/thirdparty/VirtualDub/system/source/protscope.cpp
new file mode 100644
index 000000000..612082824
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/protscope.cpp
@@ -0,0 +1,37 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vd2/system/protscope.h>
+
+VDProtectedAutoScope *VDGetProtectedScopeLinkNull() {
+	return NULL;
+}
+
+void VDSetProtectedScopeLinkNull(VDProtectedAutoScope *) {
+}
+
+tpVDGetProtectedScopeLink g_pVDGetProtectedScopeLink = VDGetProtectedScopeLinkNull;
+tpVDSetProtectedScopeLink g_pVDSetProtectedScopeLink = VDSetProtectedScopeLinkNull;
diff --git a/src/thirdparty/VirtualDub/system/source/refcount.cpp b/src/thirdparty/VirtualDub/system/source/refcount.cpp
new file mode 100644
index 000000000..f0d82760a
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/refcount.cpp
@@ -0,0 +1,29 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2009 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vd2/system/refcount.h>
+
+vdsaferelease_t vdsaferelease;
diff --git a/src/thirdparty/VirtualDub/system/source/registry.cpp b/src/thirdparty/VirtualDub/system/source/registry.cpp
new file mode 100644
index 000000000..18506e7f6
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/registry.cpp
@@ -0,0 +1,243 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <windows.h>
+
+#include <vd2/system/VDString.h>
+#include <vd2/system/registry.h>
+
+VDRegistryKey::VDRegistryKey(const char *keyName, bool global, bool write) {
+	const HKEY rootKey = global ? HKEY_LOCAL_MACHINE : HKEY_CURRENT_USER;
+
+	if (write) {
+		if (RegCreateKeyEx(rootKey, keyName, 0, NULL, REG_OPTION_NON_VOLATILE, KEY_ALL_ACCESS, NULL, (PHKEY)&pHandle, NULL))
+			pHandle = NULL;
+	} else {
+		if (RegOpenKeyEx(rootKey, keyName, 0, KEY_READ, (PHKEY)&pHandle))
+			pHandle = NULL;
+	}
+}
+
+VDRegistryKey::~VDRegistryKey() {
+	if (pHandle)
+		RegCloseKey((HKEY)pHandle);
+}
+
+bool VDRegistryKey::setBool(const char *pszName, bool v) const {
+	if (pHandle) {
+		DWORD dw = v;
+
+		if (RegSetValueEx((HKEY)pHandle, pszName, 0, REG_DWORD, (const BYTE *)&dw, sizeof dw))
+			return true;
+	}
+
+	return false;
+}
+
+bool VDRegistryKey::setInt(const char *pszName, int i) const {
+	if (pHandle) {
+		DWORD dw = i;
+
+		if (RegSetValueEx((HKEY)pHandle, pszName, 0, REG_DWORD, (const BYTE *)&dw, sizeof dw))
+			return true;
+	}
+
+	return false;
+}
+
+bool VDRegistryKey::setString(const char *pszName, const char *pszString) const {
+	if (pHandle) {
+		if (RegSetValueEx((HKEY)pHandle, pszName, 0, REG_SZ, (const BYTE *)pszString, strlen(pszString)))
+			return true;
+	}
+
+	return false;
+}
+
+bool VDRegistryKey::setString(const char *pszName, const wchar_t *pszString) const {
+	if (pHandle) {
+		if (GetVersion() & 0x80000000) {
+			VDStringA s(VDTextWToA(pszString));
+
+			if (RegSetValueEx((HKEY)pHandle, pszName, 0, REG_SZ, (const BYTE *)s.data(), s.size()))
+				return true;
+		} else {
+			if (RegSetValueExW((HKEY)pHandle, VDTextAToW(pszName).c_str(), 0, REG_SZ, (const BYTE *)pszString, sizeof(wchar_t) * wcslen(pszString)))
+				return true;
+		}
+	}
+
+	return false;
+}
+
+bool VDRegistryKey::setBinary(const char *pszName, const char *data, int len) const {
+	if (pHandle) {
+		if (RegSetValueEx((HKEY)pHandle, pszName, 0, REG_BINARY, (const BYTE *)data, len))
+			return true;
+	}
+
+	return false;
+}
+
+bool VDRegistryKey::getBool(const char *pszName, bool def) const {
+	DWORD type, v, s=sizeof(DWORD);
+
+	if (!pHandle || RegQueryValueEx((HKEY)pHandle, pszName, 0, &type, (BYTE *)&v, &s)
+		|| type != REG_DWORD)
+		return def;
+
+	return v != 0;
+}
+
+int VDRegistryKey::getInt(const char *pszName, int def) const {
+	DWORD type, v, s=sizeof(DWORD);
+
+	if (!pHandle || RegQueryValueEx((HKEY)pHandle, pszName, 0, &type, (BYTE *)&v, &s)
+		|| type != REG_DWORD)
+		return def;
+
+	return (int)v;
+}
+
+int VDRegistryKey::getEnumInt(const char *pszName, int maxVal, int def) const {
+	int v = getInt(pszName, def);
+
+	if (v<0 || v>=maxVal)
+		v = def;
+
+	return v;
+}
+
+bool VDRegistryKey::getString(const char *pszName, VDStringA& str) const {
+	DWORD type, s = sizeof(DWORD);
+
+	if (!pHandle || RegQueryValueEx((HKEY)pHandle, pszName, 0, &type, NULL, &s) || type != REG_SZ)
+		return false;
+
+	str.resize(s);
+	if (RegQueryValueEx((HKEY)pHandle, pszName, 0, NULL, (BYTE *)str.data(), &s))
+		return false;
+
+	if (!s)
+		str.clear();
+	else
+		str.resize(strlen(str.c_str()));		// Trim off pesky terminating NULLs.
+
+	return true;
+}
+
+bool VDRegistryKey::getString(const char *pszName, VDStringW& str) const {
+	if (!pHandle)
+		return false;
+
+	if (GetVersion() & 0x80000000) {
+		VDStringA v;
+		if (!getString(pszName, v))
+			return false;
+		str = VDTextAToW(v);
+		return true;
+	}
+
+	const VDStringW wsName(VDTextAToW(pszName));
+	DWORD type, s = sizeof(DWORD);
+
+	if (!pHandle || RegQueryValueExW((HKEY)pHandle, wsName.c_str(), 0, &type, NULL, &s) || type != REG_SZ)
+		return false;
+
+	if (s <= 0)
+		str.clear();
+	else {
+		str.resize((s + sizeof(wchar_t) - 1) / sizeof(wchar_t));
+
+		if (RegQueryValueExW((HKEY)pHandle, wsName.c_str(), 0, NULL, (BYTE *)&str[0], &s))
+			return false;
+
+		str.resize(wcslen(str.c_str()));		// Trim off pesky terminating NULLs.
+	}
+
+	return true;
+}
+
+int VDRegistryKey::getBinaryLength(const char *pszName) const {
+	DWORD type, s = sizeof(DWORD);
+
+	if (!pHandle || RegQueryValueEx((HKEY)pHandle, pszName, 0, &type, NULL, &s)
+		|| type != REG_BINARY)
+		return -1;
+
+	return s;
+}
+
+bool VDRegistryKey::getBinary(const char *pszName, char *buf, int maxlen) const {
+	DWORD type, s = maxlen;
+
+	if (!pHandle || RegQueryValueEx((HKEY)pHandle, pszName, 0, &type, (BYTE *)buf, &s) || maxlen < (int)s || type != REG_BINARY)
+		return false;
+
+	return true;
+}
+
+bool VDRegistryKey::removeValue(const char *name) {
+	if (!pHandle || RegDeleteValue((HKEY)pHandle, name))
+		return false;
+
+	return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDRegistryValueIterator::VDRegistryValueIterator(const VDRegistryKey& key)
+	: mpHandle(key.getRawHandle())
+	, mIndex(0)
+{
+}
+
+const char *VDRegistryValueIterator::Next() {
+	DWORD len = sizeof(mName)/sizeof(mName[0]);
+	LONG error = RegEnumValueA((HKEY)mpHandle, mIndex, mName, &len, NULL, NULL, NULL, NULL);
+
+	if (error)
+		return NULL;
+
+	++mIndex;
+	return mName;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDString VDRegistryAppKey::s_appbase;
+
+VDRegistryAppKey::VDRegistryAppKey() : VDRegistryKey(s_appbase.c_str()) {
+}
+
+VDRegistryAppKey::VDRegistryAppKey(const char *pszKey, bool write)
+	: VDRegistryKey((s_appbase + pszKey).c_str(), false, write)
+{
+}
+
+void VDRegistryAppKey::setDefaultKey(const char *pszAppName) {
+	s_appbase = pszAppName;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/stdaccel.cpp b/src/thirdparty/VirtualDub/system/source/stdaccel.cpp
new file mode 100644
index 000000000..4cbfdcd18
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/stdaccel.cpp
@@ -0,0 +1,42 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#define VDTEXTERN
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/vdstl.h>
+
+template vdspan<char>;
+template vdspan<uint8>;
+template vdspan<uint16>;
+template vdspan<uint32>;
+template vdspan<uint64>;
+template vdspan<sint8>;
+template vdspan<sint16>;
+template vdspan<sint32>;
+template vdspan<sint64>;
+template vdspan<float>;
+template vdspan<double>;
+template vdspan<wchar_t>;
diff --git a/src/thirdparty/VirtualDub/system/source/stdafx.cpp b/src/thirdparty/VirtualDub/system/source/stdafx.cpp
new file mode 100644
index 000000000..acf0b47e4
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/stdafx.cpp
@@ -0,0 +1,46 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include <stdafx.h>
+
+#ifdef _MSC_VER
+	#pragma hdrstop
+#endif
+
+// compiler/setup checks
+
+#if defined(_MSC_VER)
+	#if _MSC_VER < 1300
+		#include <windows.h>
+
+		#line 1 " \n \n \n***** You do not have the correct version of the Microsoft Platform SDK installed *****\nPlease see Docs\\index.html for details.\n \n \n"
+		namespace { const DWORD PlatformSDKTest = INVALID_SET_FILE_POINTER; }
+		#line 1 ""
+
+		#line 1 " \n \n \n***** You do not have the Visual C++ Processor Pack installed *****\nPlease see Docs\\index.html for details.\n \n \n"
+		namespace { void VCPPCheck() { __asm { sfence } } }
+		#line 1 ""
+	#endif
+#endif
diff --git a/src/thirdparty/VirtualDub/system/source/strutil.cpp b/src/thirdparty/VirtualDub/system/source/strutil.cpp
new file mode 100644
index 000000000..2d9becc85
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/strutil.cpp
@@ -0,0 +1,99 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <string.h>
+#include <ctype.h>
+
+#include <vd2/system/strutil.h>
+
+char *strncpyz(char *strDest, const char *strSource, size_t count) {
+	char *s;
+
+	s = strncpy(strDest, strSource, count);
+	strDest[count-1] = 0;
+
+	return s;
+}
+
+wchar_t *wcsncpyz(wchar_t *strDest, const wchar_t *strSource, size_t count) {
+	wchar_t *s;
+
+	s = wcsncpy(strDest, strSource, count);
+	strDest[count-1] = 0;
+
+	return s;
+}
+
+const char *strskipspace(const char *s) {
+	while(isspace((unsigned char)*s++))
+		;
+
+	return s-1;
+}
+
+size_t vdstrlcpy(char *dst, const char *src, size_t size) {
+	size_t len = strlen(src);
+
+	if (size) {
+		if (size > len)
+			size = len;
+
+		memcpy(dst, src, size);
+		dst[size] = 0;
+	}
+	return len;
+}
+
+size_t vdwcslcpy(wchar_t *dst, const wchar_t *src, size_t size) {
+	size_t len = wcslen(src);
+
+	if (size) {
+		if (size > len)
+			size = len;
+
+		memcpy(dst, src, size * sizeof(wchar_t));
+		dst[size] = 0;
+	}
+	return len;
+}
+
+size_t vdstrlcat(char *dst, const char *src, size_t size) {
+	size_t dlen = strlen(dst);
+	size_t slen = strlen(src);
+
+	if (dlen < size) {
+		size_t maxappend = size - dlen - 1;
+		if (maxappend > slen)
+			maxappend = slen;
+
+		if (maxappend) {
+			memcpy(dst + dlen, src, maxappend);
+			dst[dlen+maxappend] = 0;
+		}
+	}
+
+	return dlen+slen;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/text.cpp b/src/thirdparty/VirtualDub/system/source/text.cpp
new file mode 100644
index 000000000..64f263d88
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/text.cpp
@@ -0,0 +1,652 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vector>
+#include <algorithm>
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include <windows.h>
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/system/text.h>
+#include <vd2/system/tls.h>
+#include <vd2/system/VDString.h>
+
+int VDTextWToA(char *dst, int max_dst, const wchar_t *src, int max_src) {
+	VDASSERTPTR(dst);
+	VDASSERTPTR(src);
+	VDASSERT(max_dst>0);
+
+	*dst = 0;
+
+	int len = WideCharToMultiByte(CP_ACP, 0, src, max_src, dst, max_dst, NULL, NULL);
+
+	// remove null terminator if source was null-terminated (source
+	// length was provided)
+	return max_src<0 && len>0 ? len-1 : len;
+}
+
+int VDTextAToW(wchar_t *dst, int max_dst, const char *src, int max_src) {
+	VDASSERTPTR(dst);
+	VDASSERTPTR(src);
+	VDASSERT(max_dst>0);
+
+	*dst = 0;
+
+	int len = MultiByteToWideChar(CP_ACP, 0, src, max_src, dst, max_dst);
+
+	// remove null terminator if source was null-terminated (source
+	// length was provided)
+	return max_src<0 && len>0 ? len-1 : len;
+}
+
+VDStringA VDTextWToA(const VDStringW& sw) {
+	return VDTextWToA(sw.data(), sw.length());
+}
+
+VDStringA VDTextWToA(const wchar_t *src, int srclen) {
+	VDStringA s;
+
+	if (src) {
+		int l = VDTextWToALength(src, srclen);
+
+		if (l) {
+			s.resize(l);
+			VDTextWToA((char *)s.data(), l+1, src, srclen);
+		}
+	}
+
+	return s;
+}
+
+VDStringW VDTextAToW(const VDStringA& s) {
+	return VDTextAToW(s.data(), s.length());
+}
+
+VDStringW VDTextAToW(const char *src, int srclen) {
+	VDStringW sw;
+
+	if (src) {
+		int l = VDTextAToWLength(src, srclen);
+
+		if (l) {
+			sw.resize(l);
+			VDTextAToW(&sw[0], sw.length()+1, src, srclen);
+		}
+	}
+
+	return sw;
+}
+
+int VDTextWToALength(const wchar_t *s, int length) {
+	SetLastError(0);
+	int rv = WideCharToMultiByte(CP_ACP, 0, s, length, NULL, 0, NULL, 0);
+
+	if (length < 0 && rv>0)
+		--rv;
+
+	return rv;
+}
+
+int VDTextAToWLength(const char *s, int length) {
+	SetLastError(0);
+	int rv = MultiByteToWideChar(CP_ACP, 0, s, length, NULL, 0);
+
+	if (length < 0 && rv > 0)
+		--rv;
+
+	return rv;
+}
+
+namespace {
+	// UTF8:
+	//      000000000xxxxxxx -> 0xxxxxxx
+	//      00000yyyyyxxxxxx -> 110yyyyy 10xxxxxx
+	//      zzzzyyyyyyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx
+	// uuuuuzzzzyyyyyyxxxxxx -> 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
+	//               (UTF16) -> 110110wwwwzzzzyy (uuuuu = wwww+1)
+	//                          110111yyyyxxxxxx
+	int VDGetCharLengthInUTF8(wchar_t c) {
+		if (c < 0x0080)			// 7 bits
+			return 1;
+		else if (c < 0x0800)	// 11 bits
+			return 2;
+		else if (c < 0x10000)	// 16 bits
+			return 3;
+		else if (c < 0x200000)	// 21 bits
+			return 4;
+		else {
+			VDASSERT(false);
+			return 1;			// Uh oh.  Well, we're screwed.
+		}
+	}
+
+	bool VDIsUnicodeSurrogateFirst(wchar_t c) {
+		return (c >= 0xD800 && c < 0xDC00); 
+	}
+
+	bool VDIsUnicodeSurrogateSecond(wchar_t c) {
+		return (c >= 0xDC00 && c < 0xE000);
+	}
+};
+
+VDStringA VDTextWToU8(const VDStringW& s) {
+	return VDTextWToU8(s.data(), s.length());
+}
+
+VDStringA VDTextWToU8(const wchar_t *s, int length) {
+	vdfastvector<char> temp;
+
+	if (length<0) {
+		const wchar_t *t = s;
+		do {
+			++length;
+		} while(*t++);
+	}
+
+	while(length--) {
+		uint32 c = *s++;
+
+		if (VDIsUnicodeSurrogateFirst(c)) {
+			if (!length || !VDIsUnicodeSurrogateSecond(*s)) {
+				VDASSERT(false);
+				c = '?';
+			} else {
+				c = 0x10000 + ((c & 0x3ff)<<10) + (*s++ & 0x3ff);
+				--length;
+			}
+		}
+
+		if (c < 0x0080) {
+			temp.push_back((char)c);
+		} else {
+			if (c < 0x0800)
+				temp.push_back((char)(0xc0 + (c>>6)));
+			else {
+				if (c < 0x10000)
+					temp.push_back((char)(0xe0 + (c>>12)));
+				else {
+					temp.push_back((char)(0xf0 + ((c>>18) & 0x07)));
+					temp.push_back((char)(0x80 + ((c>>12) & 0x3f)));
+				}
+				temp.push_back((char)(0x80 + ((c>>6) & 0x3f)));
+			}
+			temp.push_back((char)(0x80 + (c & 0x3f)));
+		}
+	}
+
+	VDStringA a(temp.data(), temp.size());
+
+	return a;
+}
+
+VDStringW VDTextU8ToW(const VDStringA& s) {
+	return VDTextU8ToW(s.data(), s.length());
+}
+
+VDStringW VDTextU8ToW(const char *s, int length) {
+	vdfastvector<wchar_t> temp;
+
+	if (length<0) {
+		const char *t = s;
+		VDASSERT(length == -1);
+		do {
+			++length;
+		} while(*t++);
+	}
+
+	while(length--) {
+		unsigned char c = (char)*s++;
+		uint32	wc = c;			// we reconstruct UTF-32 first and then split to UTF-16 if necessary
+
+		if (c >= 0x80) {
+			int required_extra = 0;
+
+			if (c < 0xc0 || c >= 0xf7) {
+				VDASSERT(false);
+				break;
+			}
+
+			while(c >= 0xc0) {
+				c <<= 1;
+				++required_extra;
+			}
+
+			wc = (c&0x3f) >> required_extra;
+
+			do {
+				char d;
+
+				if (!length-- || (((d=*s++)&0xc0)!=0x80))
+					goto bad_sequence_exit;
+
+				wc = (wc<<6) + (d&0x3f);
+			} while(--required_extra);
+		}
+
+		// Two cases here.  If we are using UTF-16, surrogates need to be split in half.  If we are using
+		// UTF-32, surrogates need to be combined.
+
+		if (sizeof(wchar_t) > 2) {
+			if (VDIsUnicodeSurrogateSecond(wc)) {
+				if (temp.empty() || !VDIsUnicodeSurrogateFirst(temp.back())) {
+					VDASSERT(false);
+					break;
+				}
+
+				temp.back() = 0x10000 + ((temp.back()&0x3ff) << 10) + (wc & 0x3ff);
+				continue;
+			}
+		} else {
+			if (wc >= 0x10000) {
+				wc -= 0x10000;
+				temp.push_back(0xD800 + ((wc & 0x3ff) >> 10));
+				wc = 0xDC00 + (wc&0x3ff);
+			}
+		}
+		temp.push_back(wc);
+	}
+bad_sequence_exit:
+
+	VDStringW w(temp.data(), temp.size());
+
+	return w;
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+//	VirtualDub's very own printf() functions.
+//
+//	VD[v|a]swprintf() differs from wsprintf() in the following ways:
+//
+//	* The output is a string.
+//	* All parameters must be passed by pointer instead of by value.
+//	* The 'll' modifier permits long long / __int64 integers.
+//	* [n] allows picking parameters out of order.
+//	* %lc/%ls forces Unicode; %hc/%hs forces ANSI.
+
+VDStringW VDaswprintf(const wchar_t *format, int args, const void *const *argv) {
+	const void *const *argv0 = argv;
+	vdfastfixedvector<wchar_t, 256> out;
+	wchar_t c;
+
+	VDStringW tempConv;
+
+	while(c = *format) {
+		if (c != L'%') {
+			const wchar_t *s = format;
+
+			while(*s && *s != L'%')
+				++s;
+
+			int len = s - format;
+			int clen = out.size();
+
+			out.resize(clen + len);
+
+			std::copy(format, s, &out[clen]);
+
+			format = s;
+		} else {
+			++format;
+
+			// check for %%
+
+			if (*format == L'%') {
+				++format;
+				out.push_back(L'%');
+				continue;
+			}
+
+			// Check for a renumbering identifier.
+
+			if (*format == L'[') {
+				++format;
+
+				int newid = wcstol(format, const_cast<wchar_t **>(&format), 0);
+
+				VDASSERT(newid >= 0 && newid < args);
+
+				argv = argv0 + newid;
+
+				VDVERIFY(*format++ == L']');
+			}
+
+			// process flags
+
+			struct {
+				bool bLeftAlign:1,		// pad right with spaces (priority over zero pad)
+					bZeroPad:1,			// pad left with zeroes
+					bPositiveSign:1,	// prefix with + or -; priority over bPositiveBlank
+					bPositiveBlank:1,	// prefix with space for nonnegative
+					bPrefix:1;			// prefix with 0, 0x, 0X, or force decimal point
+			} flags={false};
+			int width = 0;
+			int precision = -1;
+
+			for(;;) {
+				c = *format;
+
+				if (c == L'0')
+					flags.bZeroPad = true;
+				else if (c == L' ')
+					flags.bPositiveBlank = true;
+				else if (c == L'#')
+					flags.bPrefix = true;
+				else if (c == L'-')
+					flags.bLeftAlign = true;
+				else if (c == L'+')
+					flags.bPositiveSign = true;
+				else
+					break;
+
+				++format;
+			}
+
+			// process width
+
+			c = *format;
+			if (c == L'*') {
+				++format;
+				width = *(int *)*argv++;
+			} else if (iswdigit(c))
+				width = (int)wcstol(format, const_cast<wchar_t **>(&format), 0);
+
+			// process precision
+
+			if (*format == L'.') {
+				c = *++format;
+
+				if (c == L'*') {
+					++format;
+					precision = *(int *)*argv++;
+				} else if (iswdigit(c))
+					precision = (int)wcstol(format, const_cast<wchar_t **>(&format), 0);
+			}
+
+			// process flags
+
+			enum { kDefault, kLong, kLongLong, kShort } size = kDefault;
+
+			c = *format;
+
+			if (c == L'l') {
+				++format;
+				size = kLong;
+
+				if (*format == L'l') {
+					++format;
+					size = kLongLong;
+				}
+
+			} else if (c == L'h') {
+				++format;
+				size = kShort;
+			}
+
+			// process format character
+
+			wchar_t xf[32], buf[32], *pxf = xf, *pbuf0 = buf, *pbuf = buf;
+			int zero_pad = 0;
+
+			switch(*format++) {
+			case L'd':
+			case L'i':
+			case L'o':
+			case L'u':
+			case L'x':
+			case L'X':
+				*pxf++ = '%';
+				if (flags.bPrefix)
+					*pxf++ = '#';
+				if (flags.bPositiveBlank)
+					*pxf++ = ' ';
+				if (flags.bPositiveSign)
+					*pxf++ = '+';
+
+				switch(size) {
+				case kShort:
+					*pxf++ = 'h';
+					*pxf++ = format[-1];
+					*pxf = 0;
+					pbuf += swprintf(pbuf, sizeof buf / sizeof buf[0], xf, *(const short *)*argv++);
+					break;
+				case kDefault:
+					*pxf++ = format[-1];
+					*pxf = 0;
+					pbuf += swprintf(pbuf, sizeof buf / sizeof buf[0], xf, *(const int *)*argv++);
+					break;
+				case kLong:
+					*pxf++ = 'l';
+					*pxf++ = format[-1];
+					*pxf = 0;
+					pbuf += swprintf(pbuf, sizeof buf / sizeof buf[0], xf, *(const long *)*argv++);
+					break;
+				case kLongLong:
+#if defined(_MSC_VER)
+					*pxf++ = 'I';
+					*pxf++ = '6';
+					*pxf++ = '4';
+#elif defined(__GNUC__)
+					*pxf++ = 'l';
+					*pxf++ = 'l';
+#else
+#error Please insert the appropriate 64-bit printf format for your platform.
+#endif
+					*pxf++ = format[-1];
+					*pxf = 0;
+					pbuf += swprintf(pbuf, sizeof buf / sizeof buf[0], xf, *(const int64 *)*argv++);
+					break;
+				default:
+					VDNEVERHERE;
+				}
+
+				if (pbuf - pbuf0 < precision)
+					zero_pad = precision - (pbuf - pbuf0);
+
+				break;
+
+			case L'c':
+				if (size == kShort) {
+					char buf[2] = {*(const char *)*argv++, 0};
+					pbuf += VDTextAToW(pbuf, 4, buf);
+				} else
+					*pbuf++ = *(const wchar_t *)*argv++;
+				break;
+
+			case L's':
+				if (size == kShort) {
+					const char *s = *(const char *const *)*argv++;
+					int maxsrc = strlen(s);
+
+					if (precision >= 0 && precision < maxsrc)
+						maxsrc = precision;
+
+					tempConv = VDTextAToW(s, maxsrc);
+					pbuf0 = const_cast<wchar_t *>(tempConv.c_str());
+
+					pbuf = pbuf0 + tempConv.size();
+				} else {
+					pbuf = pbuf0 = *(wchar_t *const *)*argv++;
+
+					while(*pbuf && precision) {
+						++pbuf;
+						--precision;
+					}
+				}
+				break;
+
+			case L'e':
+			case L'E':
+			case L'f':
+			case L'F':
+			case L'g':
+			case L'G':
+				// We place an artificial limit of 256 characters on the precision value.
+				{
+					if (precision > 256)
+						precision = 256;
+
+					tempConv.resize(256);
+					pbuf0 = pbuf = const_cast<wchar_t *>(tempConv.data());
+
+					*pxf++ = '%';
+					if (flags.bPrefix)
+						*pxf++ = '#';
+					if (flags.bPositiveBlank)
+						*pxf++ = ' ';
+					if (flags.bPositiveSign)
+						*pxf++ = '+';
+					if (precision>=0) {
+						*pxf++ = '.';
+						*pxf++ = '*';
+					}
+					*pxf++ = format[-1];
+					*pxf = 0;
+
+					if (precision >= 0)
+						pbuf += swprintf(pbuf, 256, xf, precision, *(const double *)*argv++);
+					else
+						pbuf += swprintf(pbuf, 256, xf, *(const double *)*argv++);
+				}
+				break;
+
+			case L'n':	// no flags honored; precision ignored
+				*(int *)(*argv++) = out.size();
+				continue;
+			case L'p':	// no flags honored; precision ignored
+				pbuf += swprintf(pbuf, sizeof buf / sizeof buf[0], L"%p", *(void *const *)*argv++);
+				break;
+
+			case L'z':
+				switch(*format++) {
+				case L's':
+					{
+						int64 value;
+
+						switch(size) {
+						case kShort:	value = *(const short *)*argv++;	break;
+						case kDefault:	value = *(const int *)*argv++;		break;
+						case kLong:		value = *(const long *)*argv++;		break;
+						case kLongLong:	value = *(const int64 *)*argv++;	break;
+							break;
+						default:
+							VDNEVERHERE;
+						}
+
+						if (value < 0)
+							*pbuf++ = L'-';
+						else if (flags.bPositiveSign)
+							*pbuf++ = L'+';
+						else if (flags.bPositiveBlank)
+							*pbuf++ = L' ';
+
+						if (value < (VD64(10) << 10))
+							pbuf += swprintf(pbuf, (buf + sizeof(buf) / sizeof(buf[0])) - pbuf, L"%d bytes", (int)value);
+						else if (value < (VD64(10) << 20))
+							pbuf += swprintf(pbuf, (buf + sizeof(buf) / sizeof(buf[0])) - pbuf, L"%d KB", (int)((sint32)value >> 10));
+						else if (value < (VD64(10) << 30))
+							pbuf += swprintf(pbuf, (buf + sizeof(buf) / sizeof(buf[0])) - pbuf, L"%d MB", (int)((sint32)value >> 20));
+						else if (value < (VD64(10) << 40))
+							pbuf += swprintf(pbuf, (buf + sizeof(buf) / sizeof(buf[0])) - pbuf, L"%d GB", (int)(value >> 30));
+						else
+							pbuf += swprintf(pbuf, (buf + sizeof(buf) / sizeof(buf[0])) - pbuf, L"%d TB", (int)(value >> 40));
+					}
+
+					break;
+				}
+				break;
+
+			}
+
+			int string_width = (pbuf - pbuf0) + zero_pad;
+			int string_delta = width - string_width;
+
+			if (!flags.bLeftAlign && string_delta > 0) {
+				int siz = out.size();
+				out.resize(siz + string_delta, flags.bZeroPad ? L'0' : L' ');
+			}
+
+			if (zero_pad) {
+				int siz = out.size();
+				out.resize(siz + zero_pad);
+				std::fill(&out[siz], &out[siz+zero_pad], L'0');
+			}
+
+			if (pbuf != pbuf0) {
+				int siz = out.size();
+				out.resize(siz + (pbuf - pbuf0));
+
+				std::copy(pbuf0, pbuf, &out[siz]);
+			}
+
+			if (flags.bLeftAlign && string_delta > 0) {
+				int siz = out.size();
+				out.resize(siz + string_delta);
+				std::fill(&out[siz], &out[siz+string_delta], L' ');
+			}
+		}
+	}
+
+	out.push_back(0);
+
+	return VDStringW(out.data());
+}
+
+VDStringW VDvswprintf(const wchar_t *format, int args, va_list val) {
+	if (args < 16) {
+		const void *argv[16];
+
+		for(int i=0; i<args; ++i)
+			argv[i] = va_arg(val, const void *);
+
+		va_end(val);
+
+		return VDaswprintf(format, args, argv);
+	} else {
+		vdblock<const void *> argv(args);
+
+		for(int i=0; i<args; ++i)
+			argv[i] = va_arg(val, const void *);
+
+		va_end(val);
+
+		return VDaswprintf(format, args, argv.data());
+	}
+}
+
+VDStringW VDswprintf(const wchar_t *format, int args, ...) {
+	va_list val;
+
+	va_start(val, args);
+	VDStringW r = VDvswprintf(format, args, val);
+	va_end(val);
+
+	return r;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/thread.cpp b/src/thirdparty/VirtualDub/system/source/thread.cpp
new file mode 100644
index 000000000..910678bc4
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/thread.cpp
@@ -0,0 +1,274 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <process.h>
+
+#include <windows.h>
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/tls.h>
+#include <vd2/system/protscope.h>
+
+namespace {
+	//
+	// This apparently came from one a talk by one of the Visual Studio
+	// developers, i.e. I didn't write it.
+	//
+	#define MS_VC_EXCEPTION 0x406d1388
+
+	typedef struct tagTHREADNAME_INFO
+	{
+		DWORD dwType;        // must be 0x1000
+		LPCSTR szName;       // pointer to name (in same addr space)
+		DWORD dwThreadID;    // thread ID (-1 caller thread)
+		DWORD dwFlags;       // reserved for future use, most be zero
+	} THREADNAME_INFO;
+}
+
+VDThreadID VDGetCurrentThreadID() {
+	return (VDThreadID)GetCurrentThreadId();
+}
+
+VDProcessId VDGetCurrentProcessId() {
+	return (VDProcessId)GetCurrentProcessId();
+}
+
+void VDSetThreadDebugName(VDThreadID tid, const char *name) {
+	THREADNAME_INFO info;
+	info.dwType		= 0x1000;
+	info.szName		= name;
+	info.dwThreadID	= tid;
+	info.dwFlags	= 0;
+
+	__try {
+		RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(DWORD), (ULONG_PTR *)&info);
+	} __except (EXCEPTION_CONTINUE_EXECUTION) {
+	}
+}
+
+void VDThreadSleep(int milliseconds) {
+	if (milliseconds > 0)
+		::Sleep(milliseconds);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDThread::VDThread(const char *pszDebugName)
+	: mpszDebugName(pszDebugName)
+	, mhThread(0)
+	, mThreadID(0)
+{
+}
+
+VDThread::~VDThread() throw() {
+	if (isThreadAttached())
+		ThreadWait();
+}
+
+bool VDThread::ThreadStart() {
+	VDASSERT(!isThreadAttached());
+
+	if (!isThreadAttached())
+		mhThread = (void *)_beginthreadex(NULL, 0, StaticThreadStart, this, 0, &mThreadID);
+
+	return mhThread != 0;
+}
+
+void VDThread::ThreadDetach() {
+	if (isThreadAttached()) {
+		CloseHandle((HANDLE)mhThread);
+		mhThread = NULL;
+		mThreadID = 0;
+	}
+}
+
+void VDThread::ThreadWait() {
+	if (isThreadAttached()) {
+		WaitForSingleObject((HANDLE)mhThread, INFINITE);
+		ThreadDetach();
+		mThreadID = 0;
+	}
+}
+
+bool VDThread::isThreadActive() {
+	if (isThreadAttached()) {
+		if (WAIT_TIMEOUT == WaitForSingleObject((HANDLE)mhThread, 0))
+			return true;
+
+		ThreadDetach();
+		mThreadID = 0;
+	}
+	return false;
+}
+
+void VDThread::ThreadFinish() {
+	_endthreadex(0);
+}
+
+void *VDThread::ThreadLocation() const {
+	if (!isThreadAttached())
+		return NULL;
+
+	CONTEXT ctx;
+
+	ctx.ContextFlags = CONTEXT_CONTROL;
+
+	SuspendThread(mhThread);
+	GetThreadContext(mhThread, &ctx);
+	ResumeThread(mhThread);
+
+#ifdef _M_AMD64
+	return (void *)ctx.Rip;
+#else
+	return (void *)ctx.Eip;
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+unsigned __stdcall VDThread::StaticThreadStart(void *pThisAsVoid) {
+	VDThread *pThis = static_cast<VDThread *>(pThisAsVoid);
+
+	// We cannot use mThreadID here because it might already have been
+	// invalidated by a detach in the main thread.
+	if (pThis->mpszDebugName)
+		VDSetThreadDebugName(GetCurrentThreadId(), pThis->mpszDebugName);
+
+	VDInitThreadData(pThis->mpszDebugName);
+
+	vdprotected1("running thread \"%.64s\"", const char *, pThis->mpszDebugName) {
+		pThis->ThreadRun();
+	}
+
+	// NOTE: Do not put anything referencing this here, since our object
+	//       may have been destroyed by the threaded code.
+
+	VDDeinitThreadData();
+
+	return 0;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+void VDCriticalSection::StructCheck() {
+	VDASSERTCT(sizeof(CritSec) == sizeof(CRITICAL_SECTION));
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDSignal::VDSignal() {
+	hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
+}
+
+VDSignalPersistent::VDSignalPersistent() {
+	hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
+}
+
+VDSignalBase::~VDSignalBase() {
+	CloseHandle(hEvent);
+}
+
+void VDSignalBase::signal() {
+	SetEvent(hEvent);
+}
+
+void VDSignalBase::wait() {
+	WaitForSingleObject(hEvent, INFINITE);
+}
+
+bool VDSignalBase::check() {
+	return WAIT_OBJECT_0 == WaitForSingleObject(hEvent, 0);
+}
+
+int VDSignalBase::wait(VDSignalBase *second) {
+	HANDLE		hArray[16];
+	DWORD		dwRet;
+
+	hArray[0] = hEvent;
+	hArray[1] = second->hEvent;
+
+	dwRet = WaitForMultipleObjects(2, hArray, FALSE, INFINITE);
+
+	return dwRet == WAIT_FAILED ? -1 : dwRet - WAIT_OBJECT_0;
+}
+
+int VDSignalBase::wait(VDSignalBase *second, VDSignalBase *third) {
+	HANDLE		hArray[3];
+	DWORD		dwRet;
+
+	hArray[0] = hEvent;
+	hArray[1] = second->hEvent;
+	hArray[2] = third->hEvent;
+
+	dwRet = WaitForMultipleObjects(3, hArray, FALSE, INFINITE);
+
+	return dwRet == WAIT_FAILED ? -1 : dwRet - WAIT_OBJECT_0;
+}
+
+int VDSignalBase::waitMultiple(const VDSignalBase **signals, int count) {
+	VDASSERT(count <= 16);
+
+	HANDLE handles[16];
+	int active = 0;
+
+	for(int i=0; i<count; ++i) {
+		HANDLE h = signals[i]->hEvent;
+
+		if (h)
+			handles[active++] = h;
+	}
+
+	if (!active)
+		return -1;
+
+	DWORD dwRet = WaitForMultipleObjects(active, handles, FALSE, INFINITE);
+
+	return dwRet == WAIT_FAILED ? -1 : dwRet - WAIT_OBJECT_0;
+}
+
+void VDSignalPersistent::unsignal() {
+	ResetEvent(hEvent);
+}
+
+VDSemaphore::VDSemaphore(int initial)
+	: mKernelSema(CreateSemaphore(NULL, initial, 0x0fffffff, NULL))
+{
+}
+
+VDSemaphore::~VDSemaphore() {
+	if (mKernelSema)
+		CloseHandle(mKernelSema);
+}
+
+void VDSemaphore::Reset(int count) {
+	// reset semaphore to zero
+	while(WAIT_OBJECT_0 == WaitForSingleObject(mKernelSema, 0))
+		;
+
+	if (count)
+		ReleaseSemaphore(mKernelSema, count, NULL);
+}
diff --git a/src/thirdparty/VirtualDub/system/source/thunk.cpp b/src/thirdparty/VirtualDub/system/source/thunk.cpp
new file mode 100644
index 000000000..b39089116
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/thunk.cpp
@@ -0,0 +1,306 @@
+#include "stdafx.h"
+#include <windows.h>
+#include <map>
+#include <vd2/system/atomic.h>
+#include <vd2/system/refcount.h>
+#include <vd2/system/thunk.h>
+#include <vd2/system/binary.h>
+
+class IVDJITAllocator {};
+
+class VDJITAllocator : public vdrefcounted<IVDJITAllocator> {
+public:
+	VDJITAllocator();
+	~VDJITAllocator();
+
+	void *Allocate(size_t len);
+	void Free(void *p, size_t len);
+
+	void EndUpdate(void *p, size_t len);
+
+protected:
+	typedef std::map<void *, size_t> FreeChunks;
+	FreeChunks mFreeChunks;
+	FreeChunks::iterator mNextChunk;
+
+	typedef std::map<void *, size_t> Allocations;
+	Allocations mAllocations;
+
+	uintptr		mAllocationGranularity;
+};
+
+VDJITAllocator::VDJITAllocator()
+	: mNextChunk(mFreeChunks.end())
+{
+	SYSTEM_INFO si;
+	GetSystemInfo(&si);
+
+	mAllocationGranularity = si.dwAllocationGranularity;
+}
+
+VDJITAllocator::~VDJITAllocator() {
+	for(Allocations::iterator it(mAllocations.begin()), itEnd(mAllocations.end()); it!=itEnd; ++it) {
+		VirtualFree(it->first, 0, MEM_RELEASE);
+	}
+}
+
+void *VDJITAllocator::Allocate(size_t len) {
+	len = (len + 15) & ~(size_t)15;
+
+	FreeChunks::iterator itMark(mNextChunk), itEnd(mFreeChunks.end()), it(itMark);
+
+	if (it == itEnd)
+		it = mFreeChunks.begin();
+
+	for(;;) {
+		for(; it!=itEnd; ++it) {
+			if (it->second >= len) {
+				it->second -= len;
+
+				void *p = (char *)it->first + it->second;
+
+				if (!it->second) {
+					if (mNextChunk == it)
+						++mNextChunk;
+
+					mFreeChunks.erase(it);
+				}
+
+				return p;
+			}
+		}
+
+		if (itEnd == itMark)
+			break;
+
+		it = mFreeChunks.begin();
+		itEnd = itMark;
+	}
+
+	size_t alloclen = (len + mAllocationGranularity - 1) & ~(mAllocationGranularity - 1);
+
+	void *p = VirtualAlloc(NULL, alloclen, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+	if (p) {
+		try {
+			Allocations::iterator itA(mAllocations.insert(Allocations::value_type(p, alloclen)).first);
+
+			try {
+				if (len < alloclen)
+					mFreeChunks.insert(FreeChunks::value_type((char *)p + len, alloclen - len));
+
+			} catch(...) {
+				mAllocations.erase(itA);
+				throw;
+			}
+		} catch(...) {
+			VirtualFree(p, 0, MEM_RELEASE);
+			p = NULL;
+		}
+	}
+
+	return p;
+}
+
+void VDJITAllocator::Free(void *p, size_t len) {
+	VDASSERT(p);
+	VDASSERT(len < 0x10000);
+
+	FreeChunks::iterator cur(mFreeChunks.lower_bound(p));
+	if (cur != mFreeChunks.end() && (char *)p + len == cur->first) {
+		len += cur->second;
+		if (mNextChunk == cur)
+			++mNextChunk;
+		cur = mFreeChunks.erase(cur);
+	}
+
+	if (cur != mFreeChunks.begin()) {
+		FreeChunks::iterator prev(cur);
+
+		--prev;
+		if ((char *)prev->first + prev->second == p) {
+			p = prev->first;
+			len += prev->second;
+			if (mNextChunk == prev)
+				++mNextChunk;
+			mFreeChunks.erase(prev);
+		}
+	}
+
+	uintptr start = (size_t)p;
+	uintptr end = start + len;
+
+	if (!((start | end) & (mAllocationGranularity - 1))) {
+		Allocations::iterator it(mAllocations.find(p));
+
+		if (it != mAllocations.end()) {
+			VirtualFree((void *)start, 0, MEM_RELEASE);
+			mAllocations.erase(it);
+			return;
+		}
+	}
+
+	mFreeChunks.insert(FreeChunks::value_type((void *)start, end-start));
+}
+
+void VDJITAllocator::EndUpdate(void *p, size_t len) {
+	FlushInstructionCache(GetCurrentProcess(), p, len);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDJITAllocator *g_pVDJITAllocator;
+VDAtomicInt g_VDJITAllocatorLock;
+
+bool VDInitThunkAllocator() {
+	bool success = true;
+
+	while(g_VDJITAllocatorLock.xchg(1))
+		::Sleep(1);
+
+	if (!g_pVDJITAllocator) {
+		g_pVDJITAllocator = new_nothrow VDJITAllocator;
+		if (!g_pVDJITAllocator)
+			success = false;
+	}
+
+	if (success)
+		g_pVDJITAllocator->AddRef();
+
+	VDVERIFY(1 == g_VDJITAllocatorLock.xchg(0));
+
+	return success;
+}
+
+void VDShutdownThunkAllocator() {
+	while(g_VDJITAllocatorLock.xchg(1))
+		::Sleep(1);
+
+	VDASSERT(g_pVDJITAllocator);
+
+	if (!g_pVDJITAllocator->Release())
+		g_pVDJITAllocator = NULL;
+
+	VDVERIFY(1 == g_VDJITAllocatorLock.xchg(0));
+}
+
+void *VDAllocateThunkMemory(size_t len) {
+	return g_pVDJITAllocator->Allocate(len);
+}
+
+void VDFreeThunkMemory(void *p, size_t len) {
+	g_pVDJITAllocator->Free(p, len);
+}
+
+void VDSetThunkMemory(void *p, const void *src, size_t len) {
+	memcpy(p, src, len);
+	g_pVDJITAllocator->EndUpdate(p, len);
+}
+
+void VDFlushThunkMemory(void *p, size_t len) {
+	g_pVDJITAllocator->EndUpdate(p, len);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+#ifdef _M_AMD64
+	extern "C" void VDMethodToFunctionThunk64();
+#else
+	extern "C" void VDMethodToFunctionThunk32();
+	extern "C" void VDMethodToFunctionThunk32_4();
+	extern "C" void VDMethodToFunctionThunk32_8();
+	extern "C" void VDMethodToFunctionThunk32_12();
+	extern "C" void VDMethodToFunctionThunk32_16();
+#endif
+
+VDFunctionThunk *VDCreateFunctionThunkFromMethod(void *method, void *pThis, size_t argbytes, bool stdcall_thunk) {
+#if defined(_M_IX86)
+	void *pThunk = VDAllocateThunkMemory(16);
+
+	if (!pThunk)
+		return NULL;
+
+	if (stdcall_thunk || !argbytes) {	// thiscall -> stdcall (easy case)
+		uint8 thunkbytes[16]={
+			0xB9, 0x00, 0x00, 0x00, 0x00,				// mov ecx, this
+			0xE9, 0x00, 0x00, 0x00, 0x00				// jmp fn
+		};
+
+
+		VDWriteUnalignedLEU32(thunkbytes+1, (uint32)(uintptr)pThis);
+		VDWriteUnalignedLEU32(thunkbytes+6, (uint32)method - ((uint32)pThunk + 10));
+
+		VDSetThunkMemory(pThunk, thunkbytes, 15);
+	} else {				// thiscall -> cdecl (hard case)
+		uint8 thunkbytes[16]={
+			0xE8, 0x00, 0x00, 0x00, 0x00,				// call VDFunctionThunk32
+			0xC3,										// ret
+			argbytes,									// db argbytes
+			0,											// db 0
+			0x00, 0x00, 0x00, 0x00,						// dd method
+			0x00, 0x00, 0x00, 0x00,						// dd this
+		};
+
+		void *adapter;
+
+		switch(argbytes) {
+		case 4:		adapter = VDMethodToFunctionThunk32_4;	break;
+		case 8:		adapter = VDMethodToFunctionThunk32_8;	break;
+		case 12:	adapter = VDMethodToFunctionThunk32_12;	break;
+		case 16:	adapter = VDMethodToFunctionThunk32_16;	break;
+		default:	adapter = VDMethodToFunctionThunk32;	break;
+		}
+
+		VDWriteUnalignedLEU32(thunkbytes+1, (uint32)(uintptr)adapter - ((uint32)pThunk + 5));
+		VDWriteUnalignedLEU32(thunkbytes+8, (uint32)(uintptr)method);
+		VDWriteUnalignedLEU32(thunkbytes+12, (uint32)(uintptr)pThis);
+
+		VDSetThunkMemory(pThunk, thunkbytes, 16);
+	}
+
+	return (VDFunctionThunk *)pThunk;
+#elif defined(_M_AMD64)
+	void *pThunk = VDAllocateThunkMemory(44);
+	if (!pThunk)
+		return NULL;
+
+	uint8 thunkbytes[44]={
+		0x48, 0x8D, 0x04, 0x25, 0x10, 0x00, 0x00,	// lea rax, [eip+16]
+		0x00,
+		0xFF, 0x24, 0x25, 0x08, 0x00, 0x00, 0x00,	// jmp qword ptr [rip+8]
+		0x90,										// nop
+		0, 0, 0, 0, 0, 0, 0, 0,						// dq VDFunctionThunk64
+		0, 0, 0, 0, 0, 0, 0, 0,						// dq method
+		0, 0, 0, 0, 0, 0, 0, 0,						// dq this
+		0, 0, 0, 0									// dd argspillbytes
+	};
+
+	VDWriteUnalignedLEU64(thunkbytes+16, (uint64)(uintptr)VDMethodToFunctionThunk64);
+	VDWriteUnalignedLEU64(thunkbytes+24, (uint64)(uintptr)method);
+	VDWriteUnalignedLEU64(thunkbytes+32, (uint64)(uintptr)pThis);
+
+	// The stack must be aligned to a 16 byte boundary when the CALL
+	// instruction occurs. On entry to VDFunctionThunk64(), the stack is misaligned
+	// to 16n+8. Therefore, the number of argbytes must be 16m+8 and the number of
+	// argspillbytes must be 16m+8-24.
+	VDWriteUnalignedLEU32(thunkbytes+40, argbytes < 32 ? 0 : ((argbytes - 16 + 15) & ~15));
+
+	VDSetThunkMemory(pThunk, thunkbytes, 44);
+
+	return (VDFunctionThunk *)pThunk;
+#else
+	return NULL;
+#endif
+}
+
+void VDDestroyFunctionThunk(VDFunctionThunk *pFnThunk) {
+	// validate thunk
+#if defined(_M_IX86)
+	VDASSERT(((const uint8 *)pFnThunk)[0] == 0xB9 || ((const uint8 *)pFnThunk)[0] == 0xE8);
+	VDFreeThunkMemory(pFnThunk, 16);
+#elif defined(_M_AMD64)
+	VDFreeThunkMemory(pFnThunk, 44);
+#else
+	VDASSERT(false);
+#endif
+
+}
diff --git a/src/thirdparty/VirtualDub/system/source/time.cpp b/src/thirdparty/VirtualDub/system/source/time.cpp
new file mode 100644
index 000000000..ae0c3e4bf
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/time.cpp
@@ -0,0 +1,270 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <new>
+
+#include <windows.h>
+#include <mmsystem.h>
+
+#include <vd2/system/time.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/thunk.h>
+
+#ifdef _MSC_VER
+	#pragma comment(lib, "winmm")
+#endif
+
+uint32 VDGetCurrentTick() {
+	return (uint32)GetTickCount();
+}
+
+uint64 VDGetPreciseTick() {
+	LARGE_INTEGER li;
+	QueryPerformanceCounter(&li);
+	return li.QuadPart;
+}
+
+namespace {
+	uint64 VDGetPreciseTicksPerSecondNowI() {
+		LARGE_INTEGER freq;
+		QueryPerformanceFrequency(&freq);
+		return freq.QuadPart;
+	}
+
+	double VDGetPreciseTicksPerSecondNow() {
+		LARGE_INTEGER freq;
+		QueryPerformanceFrequency(&freq);
+		return (double)freq.QuadPart;
+	}
+}
+
+uint64 VDGetPreciseTicksPerSecondI() {
+	static uint64 ticksPerSecond = VDGetPreciseTicksPerSecondNowI();
+
+	return ticksPerSecond;
+}
+
+double VDGetPreciseTicksPerSecond() {
+	static double ticksPerSecond = VDGetPreciseTicksPerSecondNow();
+
+	return ticksPerSecond;
+}
+
+double VDGetPreciseSecondsPerTick() {
+	static double secondsPerTick = 1.0 / VDGetPreciseTicksPerSecondNow();
+
+	return secondsPerTick;
+}
+
+uint32 VDGetAccurateTick() {
+	return timeGetTime();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+VDCallbackTimer::VDCallbackTimer()
+	: mTimerAccuracy(0)
+{
+}
+
+VDCallbackTimer::~VDCallbackTimer() {
+	Shutdown();
+}
+
+bool VDCallbackTimer::Init(IVDTimerCallback *pCB, uint32 period_ms) {
+	return Init2(pCB, period_ms * 10000);
+}
+
+bool VDCallbackTimer::Init2(IVDTimerCallback *pCB, uint32 period_100ns) {
+	return Init3(pCB, period_100ns, period_100ns >> 1, true);
+}
+
+bool VDCallbackTimer::Init3(IVDTimerCallback *pCB, uint32 period_100ns, uint32 accuracy_100ns, bool precise) {
+	Shutdown();
+
+	mpCB = pCB;
+	mbExit = false;
+	mbPrecise = precise;
+
+	UINT accuracy = accuracy_100ns / 10000;
+	if (accuracy > 10)
+		accuracy = 10;
+
+	TIMECAPS tc;
+	if (TIMERR_NOERROR == timeGetDevCaps(&tc, sizeof tc)) {
+		if (accuracy < tc.wPeriodMin)
+			accuracy = tc.wPeriodMin;
+		if (accuracy > tc.wPeriodMax)
+			accuracy = tc.wPeriodMax;
+	}
+
+	if (TIMERR_NOERROR == timeBeginPeriod(accuracy)) {
+		mTimerAccuracy = accuracy;
+		mTimerPeriod = period_100ns;
+		mTimerPeriodAdjustment = 0;
+		mTimerPeriodDelta = 0;
+
+		if (ThreadStart())
+			return true;
+	}
+
+	Shutdown();
+
+	return false;
+}
+
+void VDCallbackTimer::Shutdown() {
+	if (isThreadActive()) {
+		mbExit = true;
+		msigExit.signal();
+		ThreadWait();
+	}
+
+	if (mTimerAccuracy) {
+		timeEndPeriod(mTimerAccuracy);
+		mTimerAccuracy = 0;
+	}
+}
+
+void VDCallbackTimer::SetRateDelta(int delta_100ns) {
+	mTimerPeriodDelta = delta_100ns;
+}
+
+void VDCallbackTimer::AdjustRate(int adjustment_100ns) {
+	mTimerPeriodAdjustment += adjustment_100ns;
+}
+
+bool VDCallbackTimer::IsTimerRunning() const {
+	return const_cast<VDCallbackTimer *>(this)->isThreadActive();
+}
+
+void VDCallbackTimer::ThreadRun() {
+	uint32 timerPeriod = mTimerPeriod;
+	uint32 periodHi = timerPeriod / 10000;
+	uint32 periodLo = timerPeriod % 10000;
+	uint32 nextTimeHi = VDGetAccurateTick() + periodHi;
+	uint32 nextTimeLo = periodLo;
+
+	uint32 maxDelay = mTimerPeriod / 2000;
+
+	SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
+
+	HANDLE hExit = msigExit.getHandle();
+
+	if (!mbPrecise) {
+		while(!mbExit) {
+			DWORD res = ::WaitForSingleObject(hExit, periodHi);
+
+			if (res != WAIT_TIMEOUT)
+				break;
+
+			mpCB->TimerCallback();
+		}
+	} else {
+		while(!mbExit) {
+			uint32 currentTime = VDGetAccurateTick();
+			sint32 delta = nextTimeHi - currentTime;
+
+			if (delta > 0) {
+				// safety guard against the clock going nuts
+				DWORD res;
+				if ((uint32)delta > maxDelay)
+					res = ::WaitForSingleObject(hExit, maxDelay);
+				else
+					res = ::WaitForSingleObject(hExit, nextTimeHi - currentTime);
+
+				if (res != WAIT_TIMEOUT)
+					break;
+			}
+
+			if ((uint32)abs(delta) > maxDelay) {
+				nextTimeHi = currentTime + periodHi;
+				nextTimeLo = periodLo;
+			} else {
+				nextTimeLo += periodLo;
+				nextTimeHi += periodHi;
+				if (nextTimeLo >= 10000) {
+					nextTimeLo -= 10000;
+					++nextTimeHi;
+				}
+			}
+
+			mpCB->TimerCallback();
+
+			int adjust = mTimerPeriodAdjustment.xchg(0);
+			int perdelta = mTimerPeriodDelta;
+
+			if (adjust || perdelta) {
+				timerPeriod += adjust;
+				periodHi = (timerPeriod+perdelta) / 10000;
+				periodLo = (timerPeriod+perdelta) % 10000;
+			}
+		}
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDLazyTimer::VDLazyTimer()
+	: mTimerId(NULL)
+	, mpCB(NULL)
+{
+	if (!VDInitThunkAllocator())
+		throw MyError("Unable to initialize thunk allocator.");
+
+	mpThunk = VDCreateFunctionThunkFromMethod(this, &VDLazyTimer::StaticTimeCallback, true);
+	if (!mpThunk) {
+		VDShutdownThunkAllocator();
+		throw MyError("Unable to create timer thunk.");
+	}
+}
+
+VDLazyTimer::~VDLazyTimer() {
+	Stop();
+
+	VDDestroyFunctionThunk(mpThunk);
+	VDShutdownThunkAllocator();
+}
+
+void VDLazyTimer::SetOneShot(IVDTimerCallback *pCB, uint32 delay) {
+	Stop();
+
+	mpCB = pCB;
+	mTimerId = SetTimer(NULL, 0, delay, (TIMERPROC)mpThunk);
+}
+
+void VDLazyTimer::Stop() {
+	if (mTimerId) {
+		KillTimer(NULL, mTimerId);
+		mTimerId = 0;
+	}
+}
+
+void VDLazyTimer::StaticTimeCallback(VDZHWND hwnd, VDZUINT msg, VDZUINT_PTR id, VDZDWORD time) {
+	Stop();
+
+	if (mpCB)
+		mpCB->TimerCallback();
+}
diff --git a/src/thirdparty/VirtualDub/system/source/tls.cpp b/src/thirdparty/VirtualDub/system/source/tls.cpp
new file mode 100644
index 000000000..71044d1e5
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/tls.cpp
@@ -0,0 +1,43 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vd2/system/tls.h>
+
+VDThreadInitHook g_pInitHook;
+
+void VDInitThreadData(const char *pszThreadName) {
+	if (g_pInitHook)
+		g_pInitHook(true, pszThreadName);
+}
+
+void VDDeinitThreadData() {
+	if (g_pInitHook)
+		g_pInitHook(false, NULL);
+}
+
+void VDSetThreadInitHook(VDThreadInitHook pHook) {
+	g_pInitHook = pHook;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/vdstl.cpp b/src/thirdparty/VirtualDub/system/source/vdstl.cpp
new file mode 100644
index 000000000..cabfee02f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/vdstl.cpp
@@ -0,0 +1,32 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2008 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vd2/system/error.h>
+#include <vd2/system/vdstl.h>
+
+void VDNORETURN vdallocator_base::throw_oom() {
+	throw MyMemoryError();
+}
diff --git a/src/thirdparty/VirtualDub/system/source/vectors.cpp b/src/thirdparty/VirtualDub/system/source/vectors.cpp
new file mode 100644
index 000000000..c54885c45
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/vectors.cpp
@@ -0,0 +1,77 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vd2/system/vdstl.h>
+#include <vd2/system/vectors.h>
+
+bool VDSolveLinearEquation(double *src, int n, ptrdiff_t stride_elements, double *b, double tolerance) {
+	vdfastvector<double *> array(n);
+	double **m = &array[0];
+	int i, j, k;
+
+	for(i=0; i<n; ++i) {
+		m[i] = src;
+		src += stride_elements;
+	}
+
+	// factor U
+	for(i=0; i<n; ++i) {
+		int best = i;
+
+		for(j=i+1; j<n; ++j) {
+			if (fabs(m[best][i]) < fabs(m[j][i]))
+				best = j;
+		}
+
+		std::swap(m[i], m[best]);
+		std::swap(b[i], b[best]);
+
+		if (fabs(m[i][i]) < tolerance)
+			return false;
+
+		double f = 1.0 / m[i][i];
+
+		m[i][i] = 1.0;
+
+		for(j=i+1; j<n; ++j)
+			m[i][j] *= f;
+
+		b[i] *= f;
+
+		for(j=i+1; j<n; ++j) {
+			b[j] -= b[i] * m[j][i];
+			for(k=n-1; k>=i; --k)
+				m[j][k] -= m[i][k] * m[j][i];
+		}
+	}
+
+	// factor L
+	for(i=n-1; i>=0; --i)
+		for(j=i-1; j>=0; --j)
+			b[j] -= b[i] * m[j][i];
+
+	return true;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/w32assist.cpp b/src/thirdparty/VirtualDub/system/source/w32assist.cpp
new file mode 100644
index 000000000..1faf527ed
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/w32assist.cpp
@@ -0,0 +1,580 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vd2/system/w32assist.h>
+#include <vd2/system/text.h>
+#include <vd2/system/vdstl.h>
+
+bool VDIsForegroundTaskW32() {
+	HWND hwndFore = GetForegroundWindow();
+
+	if (!hwndFore)
+		return false;
+
+	DWORD dwProcessId = 0;
+	GetWindowThreadProcessId(hwndFore, &dwProcessId);
+
+	return dwProcessId == GetCurrentProcessId();
+}
+
+LPVOID VDConvertThreadToFiberW32(LPVOID parm) {
+	typedef LPVOID (WINAPI *tpConvertThreadToFiber)(LPVOID p);
+	static tpConvertThreadToFiber ctof = (tpConvertThreadToFiber)GetProcAddress(GetModuleHandle("kernel32"), "ConvertThreadToFiber");
+
+	if (!ctof)
+		return NULL;
+
+	return ctof(parm);
+}
+
+void VDSwitchToFiberW32(LPVOID fiber) {
+	typedef void (WINAPI *tpSwitchToFiber)(LPVOID p);
+	static tpSwitchToFiber stof = (tpSwitchToFiber)GetProcAddress(GetModuleHandle("kernel32"), "SwitchToFiber");
+
+	if (stof)
+		stof(fiber);
+}
+
+int VDGetSizeOfBitmapHeaderW32(const BITMAPINFOHEADER *pHdr) {
+	int palents = 0;
+
+	if ((pHdr->biCompression == BI_RGB || pHdr->biCompression == BI_RLE4 || pHdr->biCompression == BI_RLE8) && pHdr->biBitCount <= 8) {
+		palents = pHdr->biClrUsed;
+		if (!palents)
+			palents = 1 << pHdr->biBitCount;
+	}
+	int size = pHdr->biSize + palents * sizeof(RGBQUAD);
+
+	if (pHdr->biSize < sizeof(BITMAPV4HEADER) && pHdr->biCompression == BI_BITFIELDS)
+		size += sizeof(DWORD) * 3;
+
+	return size;
+}
+
+void VDSetWindowTextW32(HWND hwnd, const wchar_t *s) {
+	if (VDIsWindowsNT()) {
+		SetWindowTextW(hwnd, s);
+	} else {
+		SetWindowTextA(hwnd, VDTextWToA(s).c_str());
+	}
+}
+
+void VDSetWindowTextFW32(HWND hwnd, const wchar_t *format, ...) {
+	va_list val;
+
+	va_start(val, format);
+	{
+		wchar_t buf[512];
+		int r = vswprintf(buf, 512, format, val);
+
+		if ((unsigned)r < 512) {
+			VDSetWindowTextW32(hwnd, buf);
+			va_end(val);
+			return;
+		}
+	}
+
+	VDStringW s;
+	s.append_vsprintf(format, val);
+	VDSetWindowTextW32(hwnd, s.c_str());
+
+	va_end(val);
+}
+
+VDStringW VDGetWindowTextW32(HWND hwnd) {
+	union {
+		wchar_t w[256];
+		char a[512];
+	} buf;
+
+	if (VDIsWindowsNT()) {
+		int len = GetWindowTextLengthW(hwnd);
+
+		if (len > 255) {
+			vdblock<wchar_t> tmp(len + 1);
+			len = GetWindowTextW(hwnd, tmp.data(), tmp.size());
+
+			VDStringW text(tmp.data(), len);
+			return text;
+		} else if (len > 0) {
+			len = GetWindowTextW(hwnd, buf.w, 256);
+
+			VDStringW text(buf.w, len);
+			return text;
+		}
+	} else {
+		int len = GetWindowTextLengthA(hwnd);
+
+		if (len > 511) {
+			vdblock<char> tmp(len + 1);
+			len = GetWindowTextA(hwnd, tmp.data(), tmp.size());
+
+			VDStringW text(VDTextAToW(tmp.data(), len));
+			return text;
+		} else if (len > 0) {
+			len = GetWindowTextA(hwnd, buf.a, 512);
+
+			VDStringW text(VDTextAToW(buf.a, len));
+			return text;
+		}
+	}
+
+	return VDStringW();
+}
+
+void VDAppendMenuW32(HMENU hmenu, UINT flags, UINT id, const wchar_t *text){
+	if (VDIsWindowsNT()) {
+		AppendMenuW(hmenu, flags, id, text);
+	} else {
+		AppendMenuA(hmenu, flags, id, VDTextWToA(text).c_str());
+	}
+}
+
+void VDCheckMenuItemByCommandW32(HMENU hmenu, UINT cmd, bool checked) {
+	CheckMenuItem(hmenu, cmd, checked ? MF_BYCOMMAND|MF_CHECKED : MF_BYCOMMAND|MF_UNCHECKED);
+}
+
+void VDCheckRadioMenuItemByCommandW32(HMENU hmenu, UINT cmd, bool checked) {
+	MENUITEMINFOA mii;
+
+	mii.cbSize = sizeof(MENUITEMINFOA);
+	mii.fMask = MIIM_FTYPE | MIIM_STATE;
+	if (GetMenuItemInfo(hmenu, cmd, FALSE, &mii)) {
+		mii.fType |= MFT_RADIOCHECK;
+		mii.fState &= ~MFS_CHECKED;
+		if (checked)
+			mii.fState |= MFS_CHECKED;
+		SetMenuItemInfo(hmenu, cmd, FALSE, &mii);
+	}
+}
+
+void VDEnableMenuItemByCommandW32(HMENU hmenu, UINT cmd, bool checked) {
+	EnableMenuItem(hmenu, cmd, checked ? MF_BYCOMMAND|MF_ENABLED : MF_BYCOMMAND|MF_GRAYED);
+}
+
+VDStringW VDGetMenuItemTextByCommandW32(HMENU hmenu, UINT cmd) {
+	VDStringW s;
+
+	if (VDIsWindowsNT()) {
+		MENUITEMINFOW mmiW;
+		vdfastfixedvector<wchar_t, 256> bufW;
+
+		mmiW.cbSize		= MENUITEMINFO_SIZE_VERSION_400W;
+		mmiW.fMask		= MIIM_TYPE;
+		mmiW.fType		= MFT_STRING;
+		mmiW.dwTypeData	= NULL;
+		mmiW.cch		= 0;		// required to avoid crash on NT4
+
+		if (GetMenuItemInfoW(hmenu, cmd, FALSE, &mmiW)) {
+			bufW.resize(mmiW.cch + 1, 0);
+			++mmiW.cch;
+			mmiW.dwTypeData = bufW.data();
+
+			if (GetMenuItemInfoW(hmenu, cmd, FALSE, &mmiW))
+				s = bufW.data();
+		}
+	} else {
+		MENUITEMINFOA mmiA;
+		vdfastfixedvector<char, 256> bufA;
+
+		mmiA.cbSize		= MENUITEMINFO_SIZE_VERSION_400A;
+		mmiA.fMask		= MIIM_TYPE;
+		mmiA.fType		= MFT_STRING;
+		mmiA.dwTypeData	= NULL;
+
+		if (GetMenuItemInfoA(hmenu, cmd, FALSE, &mmiA)) {
+			bufA.resize(mmiA.cch + 1, 0);
+			++mmiA.cch;
+			mmiA.dwTypeData = bufA.data();
+
+			if (GetMenuItemInfoA(hmenu, cmd, FALSE, &mmiA))
+				s = VDTextAToW(bufA.data());
+		}
+	}
+
+	return s;
+}
+
+void VDSetMenuItemTextByCommandW32(HMENU hmenu, UINT cmd, const wchar_t *text) {
+	if (VDIsWindowsNT()) {
+		MENUITEMINFOW mmiW;
+
+		mmiW.cbSize		= MENUITEMINFO_SIZE_VERSION_400W;
+		mmiW.fMask		= MIIM_TYPE;
+		mmiW.fType		= MFT_STRING;
+		mmiW.dwTypeData	= (LPWSTR)text;
+
+		SetMenuItemInfoW(hmenu, cmd, FALSE, &mmiW);
+	} else {
+		MENUITEMINFOA mmiA;
+		VDStringA textA(VDTextWToA(text));
+
+		mmiA.cbSize		= MENUITEMINFO_SIZE_VERSION_400A;
+		mmiA.fMask		= MIIM_TYPE;
+		mmiA.fType		= MFT_STRING;
+		mmiA.dwTypeData	= (LPSTR)textA.c_str();
+
+		SetMenuItemInfoA(hmenu, cmd, FALSE, &mmiA);
+	}
+}
+
+LRESULT	VDDualCallWindowProcW32(WNDPROC wp, HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam) {
+	return (IsWindowUnicode(hwnd) ? CallWindowProcW : CallWindowProcA)(wp, hwnd, msg, wParam, lParam);
+}
+
+LRESULT VDDualDefWindowProcW32(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam) {
+	return IsWindowUnicode(hwnd) ? DefWindowProcW(hwnd, msg, wParam, lParam) : DefWindowProcA(hwnd, msg, wParam, lParam);
+}
+
+EXECUTION_STATE VDSetThreadExecutionStateW32(EXECUTION_STATE esFlags) {
+	EXECUTION_STATE es = 0;
+
+	// SetThreadExecutionState(): requires Windows 98+/2000+.
+	typedef EXECUTION_STATE (WINAPI *tSetThreadExecutionState)(EXECUTION_STATE);
+	static tSetThreadExecutionState pFunc = (tSetThreadExecutionState)GetProcAddress(GetModuleHandle("kernel32"), "SetThreadExecutionState");
+
+	if (pFunc)
+		es = pFunc(esFlags);
+
+	return es;
+}
+
+bool VDSetFilePointerW32(HANDLE h, sint64 pos, DWORD dwMoveMethod) {
+	LONG posHi = (LONG)(pos >> 32);
+	DWORD result = SetFilePointer(h, (LONG)pos, &posHi, dwMoveMethod);
+
+	if (result != INVALID_SET_FILE_POINTER)
+		return true;
+
+	DWORD dwError = GetLastError();
+
+	return (dwError == NO_ERROR);
+}
+
+bool VDGetFileSizeW32(HANDLE h, sint64& size) {
+	DWORD dwSizeHigh;
+	DWORD dwSizeLow = GetFileSize(h, &dwSizeHigh);
+
+	if (dwSizeLow == (DWORD)-1 && GetLastError() != NO_ERROR)
+		return false;
+
+	size = dwSizeLow + ((sint64)dwSizeHigh << 32);
+	return true;
+}
+
+#if !defined(_MSC_VER) || _MSC_VER < 1300
+HMODULE VDGetLocalModuleHandleW32() {
+	MEMORY_BASIC_INFORMATION meminfo;
+	static HMODULE shmod = (VirtualQuery(&VDGetLocalModuleHandleW32, &meminfo, sizeof meminfo), (HMODULE)meminfo.AllocationBase);
+
+	return shmod;
+}
+#endif
+
+bool VDDrawTextW32(HDC hdc, const wchar_t *s, int nCount, LPRECT lpRect, UINT uFormat) {
+	RECT r;
+	if (VDIsWindowsNT()) {
+		// If multiline and vcentered (not normally supported...)
+		if (!((uFormat ^ DT_VCENTER) & (DT_VCENTER|DT_SINGLELINE))) {
+			uFormat &= ~DT_VCENTER;
+
+			r = *lpRect;
+			if (!DrawTextW(hdc, s, nCount, &r, uFormat | DT_CALCRECT))
+				return false;
+
+			int dx = ((lpRect->right - lpRect->left) - (r.right - r.left)) >> 1;
+			int dy = ((lpRect->bottom - lpRect->top) - (r.bottom - r.top)) >> 1;
+
+			r.left += dx;
+			r.right += dx;
+			r.top += dy;
+			r.bottom += dy;
+			lpRect = &r;
+		}
+
+		return !!DrawTextW(hdc, s, nCount, lpRect, uFormat);
+	} else {
+		VDStringA strA(VDTextWToA(s, nCount));
+
+		// If multiline and vcentered (not normally supported...)
+		if (!((uFormat ^ DT_VCENTER) & (DT_VCENTER|DT_SINGLELINE))) {
+			uFormat &= ~DT_VCENTER;
+
+			r = *lpRect;
+			if (!DrawTextA(hdc, strA.data(), strA.size(), &r, uFormat | DT_CALCRECT))
+				return false;
+
+			int dx = ((lpRect->right - lpRect->left) - (r.right - r.left)) >> 1;
+			int dy = ((lpRect->bottom - lpRect->top) - (r.bottom - r.top)) >> 1;
+
+			r.left += dx;
+			r.right += dx;
+			r.top += dy;
+			r.bottom += dy;
+			lpRect = &r;
+		}
+
+		return !!DrawTextA(hdc, strA.data(), strA.size(), lpRect, uFormat);
+	}
+}
+
+bool VDPatchModuleImportTableW32(HMODULE hmod, const char *srcModule, const char *name, void *pCompareValue, void *pNewValue, void *volatile *ppOldValue) {
+	char *pBase = (char *)hmod;
+
+	__try {
+		// The PEheader offset is at hmod+0x3c.  Add the size of the optional header
+		// to step to the section headers.
+
+		const uint32 peoffset = ((const long *)pBase)[15];
+		const uint32 signature = *(uint32 *)(pBase + peoffset);
+
+		if (signature != IMAGE_NT_SIGNATURE)
+			return false;
+
+		const IMAGE_FILE_HEADER *pHeader = (const IMAGE_FILE_HEADER *)(pBase + peoffset + 4);
+
+		// Verify the PE optional structure.
+
+		if (pHeader->SizeOfOptionalHeader < 104)
+			return false;
+
+		// Find import header.
+
+		const IMAGE_IMPORT_DESCRIPTOR *pImportDir;
+		int nImports;
+
+		switch(*(short *)((char *)pHeader + IMAGE_SIZEOF_FILE_HEADER)) {
+
+#ifdef _M_AMD64
+		case IMAGE_NT_OPTIONAL_HDR64_MAGIC:
+			{
+				const IMAGE_OPTIONAL_HEADER64 *pOpt = (IMAGE_OPTIONAL_HEADER64 *)((const char *)pHeader + sizeof(IMAGE_FILE_HEADER));
+
+				if (pOpt->NumberOfRvaAndSizes < 2)
+					return false;
+
+				pImportDir = (const IMAGE_IMPORT_DESCRIPTOR *)(pBase + pOpt->DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress);
+				nImports = pOpt->DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].Size / sizeof(IMAGE_IMPORT_DESCRIPTOR);
+			}
+			break;
+#else
+		case IMAGE_NT_OPTIONAL_HDR32_MAGIC:
+			{
+				const IMAGE_OPTIONAL_HEADER32 *pOpt = (IMAGE_OPTIONAL_HEADER32 *)((const char *)pHeader + sizeof(IMAGE_FILE_HEADER));
+
+				if (pOpt->NumberOfRvaAndSizes < 2)
+					return false;
+
+				pImportDir = (const IMAGE_IMPORT_DESCRIPTOR *)(pBase + pOpt->DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress);
+				nImports = pOpt->DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].Size / sizeof(IMAGE_IMPORT_DESCRIPTOR);
+			}
+			break;
+#endif
+
+		default:		// reject PE32+
+			return false;
+		}
+
+		// Hmmm... no imports?
+
+		if ((const char *)pImportDir == pBase)
+			return false;
+
+		// Scan down the import entries.  We are looking for MSVFW32.
+
+		int i;
+
+		for(i=0; i<nImports; ++i) {
+			if (!_stricmp(pBase + pImportDir[i].Name, srcModule))
+				break;
+		}
+
+		if (i >= nImports)
+			return false;
+
+		// Found it.  Start scanning MSVFW32 imports until we find DrawDibDraw.
+
+		const long *pImports = (const long *)(pBase + pImportDir[i].OriginalFirstThunk);
+		void * volatile *pVector = (void * volatile *)(pBase + pImportDir[i].FirstThunk);
+
+		while(*pImports) {
+			if (*pImports >= 0) {
+				const char *pName = pBase + *pImports + 2;
+
+				if (!strcmp(pName, name)) {
+
+					// Found it!  Reset the protection.
+
+					DWORD dwOldProtect;
+
+					if (VirtualProtect((void *)pVector, sizeof(void *), PAGE_EXECUTE_READWRITE, &dwOldProtect)) {
+						if (ppOldValue) {
+							for(;;) {
+								void *old = *pVector;
+								if (pCompareValue && pCompareValue != old)
+									return false;
+
+								*ppOldValue = old;
+								if (old == VDAtomicCompareExchangePointer(pVector, pNewValue, old))
+									break;
+							}
+						} else {
+							*pVector = pNewValue;
+						}
+
+						VirtualProtect((void *)pVector, sizeof(void *), dwOldProtect, &dwOldProtect);
+
+						return true;
+					}
+
+					break;
+				}
+			}
+
+			++pImports;
+			++pVector;
+		}
+	} __except(1) {
+	}
+
+	return false;
+}
+
+bool VDPatchModuleExportTableW32(HMODULE hmod, const char *name, void *pCompareValue, void *pNewValue, void *volatile *ppOldValue) {
+	char *pBase = (char *)hmod;
+
+	__try {
+		// The PEheader offset is at hmod+0x3c.  Add the size of the optional header
+		// to step to the section headers.
+
+		const uint32 peoffset = ((const long *)pBase)[15];
+		const uint32 signature = *(uint32 *)(pBase + peoffset);
+
+		if (signature != IMAGE_NT_SIGNATURE)
+			return false;
+
+		const IMAGE_FILE_HEADER *pHeader = (const IMAGE_FILE_HEADER *)(pBase + peoffset + 4);
+
+		// Verify the PE optional structure.
+
+		if (pHeader->SizeOfOptionalHeader < 104)
+			return false;
+
+		// Find export directory.
+
+		const IMAGE_EXPORT_DIRECTORY *pExportDir;
+
+		switch(*(short *)((char *)pHeader + IMAGE_SIZEOF_FILE_HEADER)) {
+
+#ifdef _M_AMD64
+		case IMAGE_NT_OPTIONAL_HDR64_MAGIC:
+			{
+				const IMAGE_OPTIONAL_HEADER64 *pOpt = (IMAGE_OPTIONAL_HEADER64 *)((const char *)pHeader + sizeof(IMAGE_FILE_HEADER));
+
+				if (pOpt->NumberOfRvaAndSizes < 1)
+					return false;
+
+				DWORD exportDirRVA = pOpt->DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress;
+
+				if (!exportDirRVA)
+					return false;
+
+				pExportDir = (const IMAGE_EXPORT_DIRECTORY *)(pBase + exportDirRVA);
+			}
+			break;
+#else
+		case IMAGE_NT_OPTIONAL_HDR32_MAGIC:
+			{
+				const IMAGE_OPTIONAL_HEADER32 *pOpt = (IMAGE_OPTIONAL_HEADER32 *)((const char *)pHeader + sizeof(IMAGE_FILE_HEADER));
+
+				if (pOpt->NumberOfRvaAndSizes < 1)
+					return false;
+
+				DWORD exportDirRVA = pOpt->DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress;
+
+				if (!exportDirRVA)
+					return false;
+
+				pExportDir = (const IMAGE_EXPORT_DIRECTORY *)(pBase + exportDirRVA);
+			}
+			break;
+#endif
+
+		default:		// reject PE32+
+			return false;
+		}
+
+		// Scan for the export name.
+		DWORD nameCount = pExportDir->AddressOfNames;
+		const DWORD *nameRVAs = (const DWORD *)(pBase + pExportDir->AddressOfNames);
+		const WORD *nameOrdinals = (const WORD *)(pBase + pExportDir->AddressOfNameOrdinals);
+		DWORD *functionTable = (DWORD *)(pBase + pExportDir->AddressOfFunctions);
+
+		for(DWORD i=0; i<nameCount; ++i) {
+			DWORD nameRVA = nameRVAs[i];
+			const char *pName = (const char *)(pBase + nameRVA);
+
+			// compare names
+			if (!strcmp(pName, name)) {
+
+				// name matches -- look up the function entry
+				WORD ordinal = nameOrdinals[i];
+				DWORD *pRVA = &functionTable[ordinal];
+				
+				// Reset the protection.
+
+				DWORD newRVA = (DWORD)pNewValue - (DWORD)pBase;
+
+				DWORD dwOldProtect;
+				if (VirtualProtect((void *)pRVA, sizeof(DWORD), PAGE_EXECUTE_READWRITE, &dwOldProtect)) {
+					if (ppOldValue) {
+						for(;;) {
+							DWORD oldRVA = *pRVA;
+							void *old = pBase + oldRVA;
+							if (pCompareValue && pCompareValue != old)
+								return false;
+
+							*ppOldValue = pBase + oldRVA;
+							if (oldRVA == VDAtomicInt::staticCompareExchange((volatile int *)pRVA, newRVA, oldRVA))
+								break;
+						}
+					} else {
+						*pRVA = newRVA;
+					}
+
+					VirtualProtect((void *)pRVA, sizeof(DWORD), dwOldProtect, &dwOldProtect);
+
+					return true;
+				}
+
+				break;
+			}
+		}
+	} __except(1) {
+	}
+
+	return false;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/zip.cpp b/src/thirdparty/VirtualDub/system/source/zip.cpp
new file mode 100644
index 000000000..8ea2ce7bf
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/zip.cpp
@@ -0,0 +1,603 @@
+//	VirtualDub - Video processing and capture application
+//	System library component
+//	Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+//	Beginning with 1.6.0, the VirtualDub system library is licensed
+//	differently than the remainder of VirtualDub.  This particular file is
+//	thus licensed as follows (the "zlib" license):
+//
+//	This software is provided 'as-is', without any express or implied
+//	warranty.  In no event will the authors be held liable for any
+//	damages arising from the use of this software.
+//
+//	Permission is granted to anyone to use this software for any purpose,
+//	including commercial applications, and to alter it and redistribute it
+//	freely, subject to the following restrictions:
+//
+//	1.	The origin of this software must not be misrepresented; you must
+//		not claim that you wrote the original software. If you use this
+//		software in a product, an acknowledgment in the product
+//		documentation would be appreciated but is not required.
+//	2.	Altered source versions must be plainly marked as such, and must
+//		not be misrepresented as being the original software.
+//	3.	This notice may not be removed or altered from any source
+//		distribution.
+
+#include "stdafx.h"
+#include <vd2/system/zip.h>
+#include <vd2/system/error.h>
+
+bool VDDeflateBitReader::refill() {
+	sint32 tc = mBytesLeft>kBufferSize?kBufferSize:(sint32)mBytesLeft;
+
+	if (!tc)
+		return false;
+
+	mpSrc->Read(mBuffer+kBufferSize-tc, tc);	// might throw
+
+	mBufferPt = -tc;
+
+	mBytesLeftLimited = mBytesLeft > kBigAvailThreshold ? kBigAvailThreshold : (unsigned)mBytesLeft;
+	mBytesLeft -= tc;
+
+	return true;
+}
+
+void VDDeflateBitReader::readbytes(void *dst, unsigned len) {
+	// LAME: OPTIMIZE LATER
+	uint8 *dst2 = (uint8 *)dst;
+	while(len-->0)
+		*dst2++ = getbits(8);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+void VDCRCChecker::Init(uint32 crc) {
+	mValue = 0xFFFFFFFF;
+
+	for(int i=0; i<256; ++i) {
+		unsigned v = i;
+		for(int j=0; j<8; ++j)
+			v = (v>>1) ^ (crc & -(sint32)(v&1));
+
+		mTable[i] = v;
+	}
+}
+
+void VDCRCChecker::Process(const void *src0, sint32 count) {
+	const uint8 *src = (const uint8 *)src0;
+
+	uint32 v = mValue;
+
+	// This code is from the PNG spec.
+	if (count > 0)
+		do {
+			v = mTable[(uint8)v ^ *src++] ^ (v >> 8);
+		} while(--count);
+
+	mValue = v;
+}
+
+uint32 VDCRCChecker::CRC(uint32 crc, const void *src, sint32 len) {
+	Init(crc);
+	Process(src, len);
+	return CRC();
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDZipStream::VDZipStream()
+	: mPos(0)
+	, mbCRCEnabled(false)
+{
+}
+
+VDZipStream::VDZipStream(IVDStream *pSrc, uint64 limit, bool bStored) {
+	Init(pSrc, limit, bStored);
+}
+
+VDZipStream::~VDZipStream() {
+}
+
+
+void VDZipStream::Init(IVDStream *pSrc, uint64 limit, bool bStored) {
+	mBits.init(pSrc, limit);
+	mBlockType = kNoBlock;
+	mReadPt = mWritePt = mBufferLevel = 0;
+	mStoredBytesLeft = 0;
+	mbNoMoreBlocks = false;
+
+	if (bStored) {
+		mStoredBytesLeft = (uint32)limit;
+		mbNoMoreBlocks = true;
+		mBlockType = kStoredBlock;
+	}
+}
+
+const wchar_t *VDZipStream::GetNameForError() {
+	return mBits.stream()->GetNameForError();
+}
+
+sint64 VDZipStream::Pos() {
+	return mPos;
+}
+
+void VDZipStream::Read(void *buffer, sint32 bytes) {
+	if (bytes != ReadData(buffer, bytes))
+		throw MyError("Read error on compressed data");
+}
+
+sint32 VDZipStream::ReadData(void *dst0, sint32 bytes) {
+	sint32 actual = 0;
+
+	uint8 *dst = (uint8 *)dst0;
+
+	while(bytes > 0) {
+		if (mBufferLevel > 0) {
+			unsigned tc = std::min<unsigned>(mBufferLevel, bytes);
+			unsigned bp = 65536 - mReadPt;
+
+			if (bp < tc) {
+				memcpy(dst, mBuffer+mReadPt, bp);
+				memcpy(dst+bp, mBuffer, tc-bp);
+				mReadPt = tc-bp;
+			} else {
+				memcpy(dst, mBuffer+mReadPt, tc);
+				mReadPt += tc;
+			}
+			mBufferLevel -= tc;
+			dst += tc;
+			bytes -= tc;
+			actual += tc;
+		} else {
+			uint32 origWritePt = mWritePt;
+			uint32 origBufferLevel = mBufferLevel;
+
+			if (!Inflate())
+				break;
+
+			if (mbCRCEnabled && mBufferLevel != origBufferLevel) {
+				if (mWritePt <= origWritePt) {
+					mCRCChecker.Process(mBuffer+origWritePt, 65536 - origWritePt);
+					mCRCChecker.Process(mBuffer, mWritePt);
+				} else {
+					mCRCChecker.Process(mBuffer+origWritePt, mWritePt - origWritePt);
+				}
+			}
+		}
+	}
+
+	mPos += actual;
+	return actual;
+}
+
+void VDZipStream::Write(const void *buffer, sint32 bytes) {
+	throw MyError("Zip streams are read-only.");
+}
+
+bool VDZipStream::Inflate() {
+	if (mBlockType == kNoBlock)
+		if (mbNoMoreBlocks || !ParseBlockHeader())
+			return false;
+
+	if (mBlockType == kStoredBlock) {
+		while(mBufferLevel < 65536) {
+			if (mStoredBytesLeft <= 0) {
+				mBlockType = kNoBlock;
+				break;
+			}
+			uint32 tc = std::min<uint32>(65536 - mWritePt, std::min<uint32>(65536 - mBufferLevel, mStoredBytesLeft));
+
+			mBits.readbytes(mBuffer + mWritePt, tc);
+
+			mWritePt = (mWritePt + tc) & 65535;
+			mStoredBytesLeft -= tc;
+			mBufferLevel += tc;
+		}
+	} else {
+		static const unsigned len_tbl[32]={
+			3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,
+			131,163,195,227,258
+		};
+
+		static const unsigned char len_bits_tbl[32]={
+			0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0
+		};
+
+		static const unsigned char dist_bits_tbl[]={
+			0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13
+		};
+
+		static const unsigned dist_tbl[]={
+			1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,
+			6145,8193,12289,16385,24577
+		};
+
+		while(mBufferLevel < 65024) {
+			unsigned code, bits;
+
+			code	= mCodeDecode[mBits.peek() & 0x7fff];
+			bits	= mCodeLengths[code];
+
+			if (!mBits.consume(bits))
+				return false;
+
+			if (code == 256) {
+				mBlockType = kNoBlock;
+				break;
+			} else if (code >= 257) {
+				unsigned	dist, len;
+
+				code -= 257;
+
+				len = len_tbl[code] + mBits.getbits(len_bits_tbl[code]);
+
+				if (len < 3)
+					return false;	// can happen with a bad static block
+
+				code = mDistDecode[mBits.peek() & 0x7fff];
+				bits = mCodeLengths[code + 288];
+
+				if (!mBits.consume(bits))
+					return false;
+
+				dist = dist_tbl[code] + mBits.getbits(dist_bits_tbl[code]);
+
+				unsigned copysrc = (mWritePt - dist) & 65535;
+
+				mBufferLevel += len;
+
+				// NOTE: This can be a self-replicating copy.  It must be ascending and it must
+				//		 be by bytes.
+//				printf("%08lx: distance %04x count %d\n", mWritePt, dist, len);
+				do {
+					mBuffer[mWritePt++] = mBuffer[copysrc++];
+					mWritePt &= 65535;
+					copysrc &= 65535;
+				} while(--len);
+			} else {
+//				printf("%08lx: literal %02x\n", mWritePt, code);
+				mBuffer[mWritePt++] = code;
+				mWritePt &= 65535;
+				++mBufferLevel;
+			}
+		}
+	}
+
+	return true;
+}
+
+namespace {
+	static unsigned revword8(unsigned x) {
+		x = (unsigned char )((x << 4) + (x >> 4));
+		x = ((x << 2) & 0xcc) + ((x >> 2) & 0x33);
+		return ((x << 1) & 0xaa) + ((x >> 1) & 0x55);
+	}
+
+	static unsigned revword15(unsigned x) {
+		x = ((x << 8) & 0xff00) + ((x >> 8) & 0x00ff);
+		x = ((x << 4) & 0xf0f0) + ((x >> 4) & 0x0f0f);
+		x = ((x << 2) & 0xcccc) + ((x >> 2) & 0x3333);
+		return (x & 0x5555) + ((x >> 2) & 0x2aaa);
+	}
+
+	static bool InflateExpandTable256(unsigned char *dst, unsigned char *lens, unsigned codes) {
+		unsigned	k;
+		unsigned	ki;
+		unsigned	base=0;
+
+		for(unsigned i=1; i<16; ++i) {
+			ki = 1<<i;
+
+			for(unsigned j=0; j<codes; ++j) {
+				if (lens[j] == i) {
+					for(k=base; k<0x100; k+=ki)
+						dst[k] = j;
+
+					base = revword8((revword8(base)+(0x100 >> i)) & 0xff);
+				}
+			}
+		}
+
+		return !base;
+	}
+
+	static bool InflateExpandTable32K(unsigned short *dst, unsigned char *lens, unsigned codes) {
+		unsigned	k;
+		unsigned	ki;
+		unsigned	base=0;
+
+		for(int i=1; i<16; ++i) {
+			ki = 1<<i;
+
+			for(unsigned j=0; j<codes; ++j) {
+				if (lens[j] == i) {
+					for(k=base; k<0x8000; k+=ki)
+						dst[k] = j;
+
+					base = revword15(revword15(base)+(0x8000 >> i));
+				}
+			}
+		}
+
+		return !base;
+	}
+}
+
+bool VDZipStream::ParseBlockHeader() {
+	unsigned char ltbl_lengths[20];
+	unsigned char ltbl_decode[256];
+
+	if (mBits.getbit())
+		mbNoMoreBlocks = true;
+
+	unsigned type = mBits.getbits(2);
+
+	switch(type) {
+	case 0:		// stored
+		{
+			mBits.align();
+			if (mBits.avail() < 32)
+				return false;
+
+			mStoredBytesLeft = mBits.getbits(16);
+
+			uint32 invCount = mBits.getbits(16);
+
+			if ((uint16)~invCount != mStoredBytesLeft)
+				return false;
+
+			if (mBits.bytesleft() < mStoredBytesLeft)
+				return false;
+
+			mBlockType = kStoredBlock;
+		}
+		break;
+	case 1:		// static trees
+		{
+			int i;
+
+			for(i=0; i<144; ++i) mCodeLengths[i] = 8;
+			for(   ; i<256; ++i) mCodeLengths[i] = 9;
+			for(   ; i<280; ++i) mCodeLengths[i] = 7;
+			for(   ; i<288; ++i) mCodeLengths[i] = 8;
+			for(i=0; i< 32; ++i) mCodeLengths[i+288] = 5;
+
+			if (!InflateExpandTable32K(mCodeDecode, mCodeLengths, 288)) {
+				VDASSERT(false);		// code table bad
+				return false;
+			}
+			if (!InflateExpandTable32K(mDistDecode, mCodeLengths+288, 32)) {
+				VDASSERT(false);		// distance table bad
+				return false;
+			}
+
+			mBlockType = kDeflatedBlock;
+		}
+		break;
+	case 2:		// dynamic trees
+		{
+			if (mBits.avail() < 16)
+				return false;
+
+			const unsigned	code_count	= mBits.getbits(5) + 257;
+			const unsigned	dist_count	= mBits.getbits(5) + 1;
+			const unsigned	total_count	= code_count + dist_count;
+			const unsigned	ltbl_count	= mBits.getbits(4) + 4;
+
+			// decompress length table tree
+
+			if (mBits.bitsleft() < 3*ltbl_count)
+				return false;
+
+			memset(ltbl_lengths, 0, sizeof ltbl_lengths);
+
+			static const unsigned char hclen_tbl[]={
+				16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15
+			};
+
+			for(unsigned i=0; i<ltbl_count; ++i) {
+				ltbl_lengths[hclen_tbl[i]] = mBits.getbits(3);
+			}
+
+			if (!InflateExpandTable256(ltbl_decode, ltbl_lengths, 20)) {
+				VDASSERT(false);	// tree table bad
+				return false;
+			}
+
+			// decompress length table
+
+			unsigned j=0;
+			unsigned last = 0;
+			while(j < total_count) {
+				unsigned k = ltbl_decode[0xff & mBits.peek()];
+				unsigned run = 1;
+
+				if (!mBits.consume(ltbl_lengths[k]))
+					return false;
+
+				switch(k) {
+				case 16:	// last run of 3-6
+					if (mBits.avail() < 2)
+						return false;
+					run = mBits.getbits(2) + 3;
+					break;
+				case 17:	// zero run of 3-10
+					if (mBits.avail() < 3)
+						return false;
+					run = mBits.getbits(3) + 3;
+					last = 0;
+					break;
+				case 18:	// zero run of 11-138
+					if (mBits.avail() < 7)
+						return false;
+					run = mBits.getbits(7) + 11;
+					last = 0;
+					break;
+				default:
+					last = k;
+				}
+
+				if (run+j > total_count) {
+					VDASSERT(false);	// tree table bad
+					return false;
+				}
+
+				do {
+					mCodeLengths[j++] = last;
+				} while(--run);
+			}
+
+			memmove(mCodeLengths + 288, mCodeLengths + code_count, dist_count);
+
+			if (!InflateExpandTable32K(mCodeDecode, mCodeLengths, code_count)) {
+				VDASSERT(false);	// code table bad
+				return false;
+			}
+			if (!InflateExpandTable32K(mDistDecode, mCodeLengths+288, dist_count)) {
+				VDASSERT(false);	// data table bad
+				return false;
+			}
+			mBlockType = kDeflatedBlock;
+		}
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+#pragma pack(push, 2)
+
+namespace {
+	enum {
+		kZipMethodStore		= 0,
+		kZipMethodDeflate	= 8
+	};
+
+	struct ZipFileHeader {
+		enum { kSignature = 0x04034b50 };
+		uint32		signature;
+		uint16		version_required;
+		uint16		flags;
+		uint16		method;
+		uint16		mod_time;
+		uint16		mod_date;
+		uint32		crc32;
+		uint32		compressed_size;
+		uint32		uncompressed_size;
+		uint16		filename_len;
+		uint16		extrafield_len;
+	};
+
+	struct ZipFileEntry {
+		enum { kSignature = 0x02014b50 };
+		uint32		signature;
+		uint16		version_create;
+		uint16		version_required;
+		uint16		flags;
+		uint16		method;
+		uint16		mod_time;
+		uint16		mod_date;
+		uint32		crc32;
+		uint32		compressed_size;
+		uint32		uncompressed_size;
+		uint16		filename_len;
+		uint16		extrafield_len;
+		uint16		comment_len;
+		uint16		diskno;
+		uint16		internal_attrib;
+		uint32		external_attrib;
+		uint32		reloff_localhdr;
+	};
+
+	struct ZipCentralDir {
+		enum { kSignature = 0x06054b50 };
+
+		uint32		signature;
+		uint16		diskno;
+		uint16		diskno_dir;
+		uint16		dirents;
+		uint16		dirents_total;
+		uint32		dirsize;
+		uint32		diroffset;
+		uint16		comment_len;
+	};
+}
+
+#pragma pack(pop)
+
+VDZipArchive::VDZipArchive() {
+}
+
+VDZipArchive::~VDZipArchive() {
+}
+
+void VDZipArchive::Init(IVDRandomAccessStream *pSrc) {
+	mpStream = pSrc;
+
+	// This seek is wrong for files with zip comments, but we aren't creating
+	// a general purpose Unzip utility anyway.
+	mpStream->Seek(mpStream->Length() - sizeof(ZipCentralDir));
+
+	ZipCentralDir cdirhdr;
+
+	mpStream->Read(&cdirhdr, sizeof cdirhdr);
+	if (cdirhdr.signature != ZipCentralDir::kSignature)
+		throw MyError("Zip file has missing or bad central directory");
+
+	mDirectory.resize(cdirhdr.dirents_total);
+
+	mpStream->Seek(cdirhdr.diroffset);
+
+	for(int i=0; i<cdirhdr.dirents_total; ++i) {
+		FileInfoInternal& fii = mDirectory[i];
+		ZipFileEntry ent;
+
+		mpStream->Read(&ent, sizeof ent);
+		if (ent.signature != ZipFileEntry::kSignature)
+			throw MyError("Zip directory is bad");
+
+		if (ent.method != kZipMethodStore && ent.method != kZipMethodDeflate)
+			throw MyError("Unsupported compression method in zip archive");
+
+		fii.mDataStart			= ent.reloff_localhdr;
+		fii.mCompressedSize		= ent.compressed_size;
+		fii.mUncompressedSize	= ent.uncompressed_size;
+		fii.mCRC32				= ent.crc32;
+		fii.mbPacked			= ent.method == kZipMethodDeflate;
+		fii.mFileName.resize(ent.filename_len);
+
+		mpStream->Read(&*fii.mFileName.begin(), ent.filename_len);
+		
+		mpStream->Seek(mpStream->Pos() + ent.extrafield_len + ent.comment_len);
+	}
+}
+
+sint32 VDZipArchive::GetFileCount() {
+	return mDirectory.size();
+}
+
+const VDZipArchive::FileInfo& VDZipArchive::GetFileInfo(sint32 idx) {
+	VDASSERT((size_t)idx < mDirectory.size());
+	return mDirectory[idx];
+}
+
+IVDStream *VDZipArchive::OpenRawStream(sint32 idx) {
+	const FileInfoInternal& fi = mDirectory[idx];
+
+	mpStream->Seek(fi.mDataStart);
+
+	ZipFileHeader hdr;
+	mpStream->Read(&hdr, sizeof hdr);
+
+	if (hdr.signature != ZipFileHeader::kSignature)
+		throw MyError("Bad header for file in zip archive");
+
+	mpStream->Seek(fi.mDataStart + sizeof(hdr) + hdr.filename_len + hdr.extrafield_len);
+
+	return mpStream;
+}
diff --git a/src/thirdparty/VirtualDub/system/system.vcproj b/src/thirdparty/VirtualDub/system/system.vcproj
new file mode 100644
index 000000000..2744ccea9
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/system.vcproj
@@ -0,0 +1,1906 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="9,00"
+	Name="system"
+	ProjectGUID="{C2082189-3ECB-4079-91FA-89D3C8A305C0}"
+	RootNamespace="system"
+	TargetFrameworkVersion="131072"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+		<Platform
+			Name="x64"
+		/>
+	</Platforms>
+	<ToolFiles>
+		<ToolFile
+			RelativePath="..\..\..\YASM.rules"
+		/>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\..\..\common.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="YASM"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="1"
+				InlineFunctionExpansion="2"
+				AdditionalIncludeDirectories="..\h,.\h"
+				PreprocessorDefinitions="NDEBUG;_LIB;WIN32;NOMINMAX;WIN32_LEAN_AND_MEAN"
+				StringPooling="true"
+				RuntimeLibrary="0"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="2"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				DebugInformationFormat="3"
+				DisableSpecificWarnings="4244;4267;4996"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				SuppressStartupBanner="true"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+				SuppressStartupBanner="true"
+				OutputFile=".\..\lib\Release/system.bsc"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|x64"
+			OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\..\..\common.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="YASM"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="1"
+				InlineFunctionExpansion="2"
+				AdditionalIncludeDirectories="..\h,.\h"
+				PreprocessorDefinitions="NDEBUG;_LIB;WIN32;NOMINMAX;WIN32_LEAN_AND_MEAN"
+				StringPooling="true"
+				RuntimeLibrary="0"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="2"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				DebugInformationFormat="3"
+				DisableSpecificWarnings="4244;4267;4996"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				SuppressStartupBanner="true"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+				SuppressStartupBanner="true"
+				OutputFile=".\..\lib\Release/system.bsc"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\..\..\common.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="YASM"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="../h,.\h"
+				PreprocessorDefinitions="_DEBUG;_LIB;WIN32;NOMINMAX;WIN32_LEAN_AND_MEAN"
+				StringPooling="true"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="1"
+				UsePrecompiledHeader="2"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				DebugInformationFormat="3"
+				DisableSpecificWarnings="4244;4267;4996"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				SuppressStartupBanner="true"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+				SuppressStartupBanner="true"
+				OutputFile=".\..\lib\Debug/system.bsc"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|x64"
+			OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\..\..\common.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="YASM"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="../h,.\h"
+				PreprocessorDefinitions="_DEBUG;_LIB;WIN32;NOMINMAX;WIN32_LEAN_AND_MEAN"
+				StringPooling="true"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="1"
+				UsePrecompiledHeader="2"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				DebugInformationFormat="3"
+				DisableSpecificWarnings="4244;4267;4996"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				SuppressStartupBanner="true"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+				SuppressStartupBanner="true"
+				OutputFile=".\..\lib\Debug/system.bsc"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+			>
+			<File
+				RelativePath="..\h\vd2\system\source\bitmath.cpp"
+				>
+			</File>
+			<File
+				RelativePath="source\cache.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\cmdline.cpp"
+				>
+			</File>
+			<File
+				RelativePath="source\cpuaccel.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\debug.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\debugx86.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\Error.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\event.cpp"
+				>
+			</File>
+			<File
+				RelativePath="source\file.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\fileasync.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\filesys.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\filewatcher.cpp"
+				>
+			</File>
+			<File
+				RelativePath="source\Fraction.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\halffloat.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\source\hash.cpp"
+				>
+			</File>
+			<File
+				RelativePath="source\int128.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\list.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\log.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\math.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\memory.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\profile.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\progress.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\protscope.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\source\refcount.cpp"
+				>
+			</File>
+			<File
+				RelativePath="source\registry.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\strutil.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\text.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\thread.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\thunk.cpp"
+				>
+			</File>
+			<File
+				RelativePath="source\time.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\tls.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\VDNamespace.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\VDScheduler.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\vdstl.cpp"
+				>
+			</File>
+			<File
+				RelativePath="source\VDString.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\vectors.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\w32assist.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\zip.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl"
+			>
+			<File
+				RelativePath="..\h\vd2\system\atomic.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\binary.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\bitmath.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\cache.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\cmdline.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\cpuaccel.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\debug.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\debugx86.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\Error.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\event.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\file.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\fileasync.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\filesys.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\filewatcher.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\Fraction.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\halffloat.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\hash.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\int128.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\list.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\log.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\math.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\memory.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\profile.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\progress.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\protscope.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\refcount.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\registry.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\strutil.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\text.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\thread.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\thunk.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\time.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\tls.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\unknown.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\vdalloc.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\VDNamespace.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\VDQueue.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\VDRingBuffer.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\VDScheduler.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\vdstl.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\VDString.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\VD2\system\vdtypes.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\vectors.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\vectors_float.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\vectors_int.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\w32assist.h"
+				>
+			</File>
+			<File
+				RelativePath="..\h\vd2\system\zip.h"
+				>
+			</File>
+			<Filter
+				Name="win32"
+				>
+				<File
+					RelativePath="..\h\vd2\system\win32\miniwindows.h"
+					>
+				</File>
+			</Filter>
+		</Filter>
+		<Filter
+			Name="Assembly Files (x86)"
+			Filter="asm"
+			>
+			<File
+				RelativePath="source\a_memory.asm"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\a_thunk.asm"
+				>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+		<Filter
+			Name="Assembly Files (AMD64)"
+			>
+			<File
+				RelativePath="source\a64_fraction.asm"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\a64_int128.asm"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\source\a64_thunk.asm"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="YASM"
+					/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+		<Filter
+			Name="Precompiled Header Support"
+			>
+			<File
+				RelativePath=".\source\stdaccel.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="3"
+						InlineFunctionExpansion="0"
+						EnableIntrinsicFunctions="false"
+						BasicRuntimeChecks="0"
+						UsePrecompiledHeader="0"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="3"
+						BasicRuntimeChecks="0"
+						UsePrecompiledHeader="0"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="source\stdafx.cpp"
+				>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions=""
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="h\stdafx.h"
+				>
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
author	kinddragon <kinddragon@users.sourceforge.net>	2010-05-21 04:53:52 +0400
committer	kinddragon <kinddragon@users.sourceforge.net>	2010-05-21 04:53:52 +0400
commit	37f62abd654047d060c86d6c76cd2f6862f89b94 (patch)
tree	83eb125bd86f8a685928e290e2ec929ce633bc53 /src
parent	dae6425e0c23576dac77c3afae1dc6de22f983d5 (diff)