Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/mpc-hc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorkinddragon <kinddragon@users.sourceforge.net>2010-05-21 04:53:52 +0400
committerkinddragon <kinddragon@users.sourceforge.net>2010-05-21 04:53:52 +0400
commit37f62abd654047d060c86d6c76cd2f6862f89b94 (patch)
tree83eb125bd86f8a685928e290e2ec929ce633bc53 /src
parentdae6425e0c23576dac77c3afae1dc6de22f983d5 (diff)
DSUtil now use new VirtualDub libraries (SSE2 deinterlacing for MPEG2 decoder)
AudioSwitcher rare memory corruption fixed git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@1907 10f7b99b-c216-0410-bff0-8a66a9350fd8
Diffstat (limited to 'src')
-rw-r--r--src/DSUtil/deinterlace.cpp526
-rw-r--r--src/DSUtil/dsutil.vcproj132
-rw-r--r--src/DSUtil/vd.cpp970
-rw-r--r--src/DSUtil/vd.h18
-rw-r--r--src/DSUtil/vd_asm.cpp290
-rw-r--r--src/DSUtil/vd_asm.h11
-rw-r--r--src/YASM.rules19
-rw-r--r--src/common.vsprops1
-rw-r--r--src/filters/renderer/VideoRenderers/DX9AllocatorPresenter.cpp10
-rw-r--r--src/filters/switcher/AudioSwitcher/AudioSwitcher.cpp5
-rw-r--r--src/filters/transform/BaseVideoFilter/BaseVideoFilter.cpp5
-rw-r--r--src/filters/transform/Mpeg2DecFilter/libmpeg2.cpp1
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/Kasumi.vcproj1527
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/bitutils.h26
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/blt_setup.h62
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/blt_spanutils.h23
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/blt_spanutils_x86.h35
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/resample_stages.h80
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/resample_stages_reference.h156
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/resample_stages_x64.h26
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/resample_stages_x86.h193
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit.h83
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit_16f.h39
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit_base.h129
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit_fill.h55
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit_gen.h167
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit_input.h69
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit_pal.h148
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample.h83
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample_special.h81
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample_special_x86.h26
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit_rgb.h552
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit_rgb_x86.h114
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit_swizzle.h343
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit_swizzle_x86.h71
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit_v210.h72
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit_ycbcr.h584
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/h/uberblit_ycbcr_x86.h27
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/a64_resample.asm64620
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb.asm812
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb2yuv_mmx.asm652
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb_mmx.asm806
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/a_bltyuv2rgb_sse2.asm161
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/a_resample_mmx.asm1559
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/a_resample_sse41.asm358
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/a_spanutils_isse.asm193
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/a_stretchrgb_mmx.asm326
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/a_stretchrgb_point.asm96
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/a_triblt.inc24
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/a_triblt_mmx.asm425
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/a_triblt_scalar.asm36
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/a_triblt_sse2.asm197
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/alphablt.cpp76
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/blt.cpp273
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/blt_reference.cpp259
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/blt_reference_pal.cpp545
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/blt_reference_rgb.cpp310
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuv.cpp1590
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuv2yuv.cpp260
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuvrev.cpp530
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/blt_setup.cpp17
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/blt_spanutils.cpp365
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/blt_spanutils_x86.cpp170
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/blt_uberblit.cpp19
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/blt_x86.cpp144
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/pixel.cpp667
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/pixmaputils.cpp519
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/region.cpp1334
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/resample.cpp348
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/resample_kernels.cpp255
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/resample_stages.cpp149
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/resample_stages_reference.cpp425
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/resample_stages_x64.cpp26
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/resample_stages_x86.cpp1277
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/stretchblt_reference.cpp816
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/tables.cpp204
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/triblt.cpp1717
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/uberblit.cpp903
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/uberblit_16f.cpp40
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/uberblit_gen.cpp1597
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample.cpp623
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample_special.cpp186
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample_special_x86.cpp35
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/uberblit_swizzle.cpp89
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/uberblit_swizzle_x86.cpp400
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/uberblit_v210.cpp199
-rw-r--r--src/thirdparty/VirtualDub/Kasumi/source/uberblit_ycbcr_x86.cpp35
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/Kasumi/blitter.h19
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/Kasumi/pixel.h40
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmap.h76
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmapops.h20
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmaputils.h171
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/Kasumi/region.h92
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/Kasumi/resample.h31
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/Kasumi/resample_kernels.h91
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/Kasumi/tables.h41
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/Kasumi/text.h62
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/Kasumi/triblt.h71
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/Error.h119
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/Fraction.h95
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/VDNamespace.h157
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/VDQueue.h90
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/VDRingBuffer.h301
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/VDScheduler.h125
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/VDString.h1134
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/atomic.h282
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/binary.h184
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/bitmath.h75
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/cache.h325
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/cmdline.h69
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/cpuaccel.h49
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/debug.h96
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/debugx86.h37
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/event.h201
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/file.h323
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/fileasync.h64
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/filesys.h170
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/filewatcher.h45
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/halffloat.h9
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/hash.h47
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/int128.h361
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/list.h275
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/log.h70
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/math.h259
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/memory.h84
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/profile.h167
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/progress.h96
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/protscope.h245
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/refcount.h282
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/registry.h84
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/source/bitmath.cpp67
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/strutil.h44
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/text.h60
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/thread.h269
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/thunk.h76
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/time.h118
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/tls.h38
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/unknown.h77
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/vdalloc.h123
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/vdstl.h1610
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/vdtypes.h415
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/vectors.h568
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/vectors_float.h207
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/vectors_int.h183
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/w32assist.h95
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/win32/miniwindows.h53
-rw-r--r--src/thirdparty/VirtualDub/h/vd2/system/zip.h220
-rw-r--r--src/thirdparty/VirtualDub/system/h/stdafx.h12
-rw-r--r--src/thirdparty/VirtualDub/system/source/Error.cpp340
-rw-r--r--src/thirdparty/VirtualDub/system/source/Fraction.cpp327
-rw-r--r--src/thirdparty/VirtualDub/system/source/VDNamespace.cpp254
-rw-r--r--src/thirdparty/VirtualDub/system/source/VDScheduler.cpp261
-rw-r--r--src/thirdparty/VirtualDub/system/source/VDString.cpp209
-rw-r--r--src/thirdparty/VirtualDub/system/source/a64_fraction.asm58
-rw-r--r--src/thirdparty/VirtualDub/system/source/a64_int128.asm73
-rw-r--r--src/thirdparty/VirtualDub/system/source/a64_thunk.asm58
-rw-r--r--src/thirdparty/VirtualDub/system/source/a_memory.asm135
-rw-r--r--src/thirdparty/VirtualDub/system/source/a_thunk.asm63
-rw-r--r--src/thirdparty/VirtualDub/system/source/cache.cpp422
-rw-r--r--src/thirdparty/VirtualDub/system/source/cmdline.cpp178
-rw-r--r--src/thirdparty/VirtualDub/system/source/cpuaccel.cpp251
-rw-r--r--src/thirdparty/VirtualDub/system/source/debug.cpp290
-rw-r--r--src/thirdparty/VirtualDub/system/source/debugx86.cpp154
-rw-r--r--src/thirdparty/VirtualDub/system/source/event.cpp81
-rw-r--r--src/thirdparty/VirtualDub/system/source/file.cpp795
-rw-r--r--src/thirdparty/VirtualDub/system/source/fileasync.cpp832
-rw-r--r--src/thirdparty/VirtualDub/system/source/filesys.cpp663
-rw-r--r--src/thirdparty/VirtualDub/system/source/filewatcher.cpp117
-rw-r--r--src/thirdparty/VirtualDub/system/source/halffloat.cpp79
-rw-r--r--src/thirdparty/VirtualDub/system/source/hash.cpp98
-rw-r--r--src/thirdparty/VirtualDub/system/source/int128.cpp478
-rw-r--r--src/thirdparty/VirtualDub/system/source/list.cpp97
-rw-r--r--src/thirdparty/VirtualDub/system/source/log.cpp171
-rw-r--r--src/thirdparty/VirtualDub/system/source/math.cpp146
-rw-r--r--src/thirdparty/VirtualDub/system/source/memory.cpp456
-rw-r--r--src/thirdparty/VirtualDub/system/source/profile.cpp234
-rw-r--r--src/thirdparty/VirtualDub/system/source/progress.cpp35
-rw-r--r--src/thirdparty/VirtualDub/system/source/protscope.cpp37
-rw-r--r--src/thirdparty/VirtualDub/system/source/refcount.cpp29
-rw-r--r--src/thirdparty/VirtualDub/system/source/registry.cpp243
-rw-r--r--src/thirdparty/VirtualDub/system/source/stdaccel.cpp42
-rw-r--r--src/thirdparty/VirtualDub/system/source/stdafx.cpp46
-rw-r--r--src/thirdparty/VirtualDub/system/source/strutil.cpp99
-rw-r--r--src/thirdparty/VirtualDub/system/source/text.cpp652
-rw-r--r--src/thirdparty/VirtualDub/system/source/thread.cpp274
-rw-r--r--src/thirdparty/VirtualDub/system/source/thunk.cpp306
-rw-r--r--src/thirdparty/VirtualDub/system/source/time.cpp270
-rw-r--r--src/thirdparty/VirtualDub/system/source/tls.cpp43
-rw-r--r--src/thirdparty/VirtualDub/system/source/vdstl.cpp32
-rw-r--r--src/thirdparty/VirtualDub/system/source/vectors.cpp77
-rw-r--r--src/thirdparty/VirtualDub/system/source/w32assist.cpp580
-rw-r--r--src/thirdparty/VirtualDub/system/source/zip.cpp603
-rw-r--r--src/thirdparty/VirtualDub/system/system.vcproj1906
193 files changed, 51711 insertions, 1178 deletions
diff --git a/src/DSUtil/deinterlace.cpp b/src/DSUtil/deinterlace.cpp
new file mode 100644
index 000000000..a66915dfd
--- /dev/null
+++ b/src/DSUtil/deinterlace.cpp
@@ -0,0 +1,526 @@
+// VirtualDub - Video processing and capture application
+// Copyright (C) 1998-2001 Avery Lee
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#include "stdafx.h"
+#include <emmintrin.h>
+#include <vd2/system/cpuaccel.h>
+
+#define uint8 unsigned char
+#define uint32 unsigned int
+#define uint64 unsigned __int64
+
+#ifdef _M_IX86
+#define VD_CPU_X86
+#endif
+
+#ifdef _M_X64
+#define VD_CPU_AMD64
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+
+#pragma warning(disable: 4799) // warning C4799: function has no EMMS instruction
+
+///////////////////////////////////////////////////////////////////////////
+
+#ifdef _M_IX86
+static void __declspec(naked) asm_blend_row_clipped(void *dst, const void *src, uint32 w, ptrdiff_t srcpitch) {
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov edi,[esp+20]
+ mov esi,[esp+24]
+ sub edi,esi
+ mov ebp,[esp+28]
+ mov edx,[esp+32]
+
+xloop:
+ mov ecx,[esi]
+ mov eax,0fefefefeh
+
+ mov ebx,[esi+edx]
+ and eax,ecx
+
+ shr eax,1
+ and ebx,0fefefefeh
+
+ shr ebx,1
+ add esi,4
+
+ add eax,ebx
+ dec ebp
+
+ mov [edi+esi-4],eax
+ jnz xloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ };
+}
+
+static void __declspec(naked) asm_blend_row(void *dst, const void *src, uint32 w, ptrdiff_t srcpitch) {
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov edi,[esp+20]
+ mov esi,[esp+24]
+ sub edi,esi
+ mov ebp,[esp+28]
+ mov edx,[esp+32]
+
+xloop:
+ mov ecx,[esi]
+ mov eax,0fcfcfcfch
+
+ mov ebx,[esi+edx]
+ and eax,ecx
+
+ shr ebx,1
+ mov ecx,[esi+edx*2]
+
+ shr ecx,2
+ and ebx,07f7f7f7fh
+
+ shr eax,2
+ and ecx,03f3f3f3fh
+
+ add eax,ebx
+ add esi,4
+
+ add eax,ecx
+ dec ebp
+
+ mov [edi+esi-4],eax
+ jnz xloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ };
+}
+
+static void __declspec(naked) asm_blend_row_MMX(void *dst, const void *src, uint32 w, ptrdiff_t srcpitch) {
+ static const __declspec(align(8)) __int64 mask0 = 0xfcfcfcfcfcfcfcfci64;
+ static const __declspec(align(8)) __int64 mask1 = 0x7f7f7f7f7f7f7f7fi64;
+ static const __declspec(align(8)) __int64 mask2 = 0x3f3f3f3f3f3f3f3fi64;
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov edi,[esp+20]
+ mov esi,[esp+24]
+ sub edi,esi
+ mov ebp,[esp+28]
+ mov edx,[esp+32]
+
+ movq mm5,mask0
+ movq mm6,mask1
+ movq mm7,mask2
+ inc ebp
+ shr ebp,1
+xloop:
+ movq mm2,[esi]
+ movq mm0,mm5
+
+ movq mm1,[esi+edx]
+ pand mm0,mm2
+
+ psrlq mm1,1
+ movq mm2,[esi+edx*2]
+
+ psrlq mm2,2
+ pand mm1,mm6
+
+ psrlq mm0,2
+ pand mm2,mm7
+
+ paddb mm0,mm1
+ add esi,8
+
+ paddb mm0,mm2
+ dec ebp
+
+ movq [edi+esi-8],mm0
+ jne xloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ };
+}
+
+static void __declspec(naked) asm_blend_row_ISSE(void *dst, const void *src, uint32 w, ptrdiff_t srcpitch) {
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov edi,[esp+20]
+ mov esi,[esp+24]
+ sub edi,esi
+ mov ebp,[esp+28]
+ mov edx,[esp+32]
+
+ inc ebp
+ shr ebp,1
+ pcmpeqb mm7, mm7
+
+ align 16
+xloop:
+ movq mm0, [esi]
+ movq mm2, mm7
+ pxor mm0, mm7
+
+ pxor mm2, [esi+edx*2]
+ pavgb mm0, mm2
+ pxor mm0, mm7
+
+ pavgb mm0, [esi+edx]
+ add esi,8
+
+ movq [edi+esi-8],mm0
+ dec ebp
+ jne xloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ };
+}
+#else
+static void asm_blend_row_clipped(void *dst0, const void *src0, uint32 w, ptrdiff_t srcpitch) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint32 *src = (const uint32 *)src0;
+ const uint32 *src2 = (const uint32 *)((const char *)src + srcpitch);
+
+ do {
+ const uint32 x = *src++;
+ const uint32 y = *src2++;
+
+ *dst++ = (x|y) - (((x^y)&0xfefefefe)>>1);
+ } while(--w);
+}
+
+static void asm_blend_row(void *dst0, const void *src0, uint32 w, ptrdiff_t srcpitch) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint32 *src = (const uint32 *)src0;
+ const uint32 *src2 = (const uint32 *)((const char *)src + srcpitch);
+ const uint32 *src3 = (const uint32 *)((const char *)src2 + srcpitch);
+
+ do {
+ const uint32 a = *src++;
+ const uint32 b = *src2++;
+ const uint32 c = *src3++;
+ const uint32 hi = (a & 0xfcfcfc) + 2*(b & 0xfcfcfc) + (c & 0xfcfcfc);
+ const uint32 lo = (a & 0x030303) + 2*(b & 0x030303) + (c & 0x030303) + 0x020202;
+
+ *dst++ = (hi + (lo & 0x0c0c0c))>>2;
+ } while(--w);
+}
+#endif
+
+#if defined(VD_CPU_X86) || defined(VD_CPU_AMD64)
+ static void asm_blend_row_SSE2(void *dst, const void *src, uint32 w, ptrdiff_t srcpitch) {
+ __m128i zero = _mm_setzero_si128();
+ __m128i inv = _mm_cmpeq_epi8(zero, zero);
+
+ w = (w + 3) >> 2;
+
+ const __m128i *src1 = (const __m128i *)src;
+ const __m128i *src2 = (const __m128i *)((const char *)src + srcpitch);
+ const __m128i *src3 = (const __m128i *)((const char *)src + srcpitch*2);
+ __m128i *dstrow = (__m128i *)dst;
+ do {
+ __m128i a = *src1++;
+ __m128i b = *src2++;
+ __m128i c = *src3++;
+
+ *dstrow++ = _mm_avg_epu8(_mm_xor_si128(_mm_avg_epu8(_mm_xor_si128(a, inv), _mm_xor_si128(c, inv)), inv), b);
+ } while(--w);
+ }
+
+#endif
+
+namespace {
+
+ void Average_scalar(void *dst, ptrdiff_t dstPitch, const void *src1, const void *src2, ptrdiff_t srcPitch, uint32 w16, uint32 h) {
+ uint32 w4 = w16 << 2;
+ do {
+ uint32 *dstv = (uint32 *)dst;
+ uint32 *src1v = (uint32 *)src1;
+ uint32 *src2v = (uint32 *)src2;
+
+ for(uint32 i=0; i<w4; ++i) {
+ uint32 a = src1v[i];
+ uint32 b = src2v[i];
+
+ dstv[i] = (a|b) - (((a^b) & 0xfefefefe) >> 1);
+ }
+
+ dst = (char *)dst + dstPitch;
+ src1 = (char *)src1 + srcPitch;
+ src2 = (char *)src2 + srcPitch;
+ } while(--h);
+ }
+
+#if defined(VD_CPU_X86)
+ void __declspec(naked) __cdecl Average_MMX(void *dst, ptrdiff_t dstPitch, const void *src1, const void *src2, ptrdiff_t srcPitch, uint32 w16, uint32 h) {
+ static const __declspec(align(8)) uint64 x7fb = 0x7f7f7f7f7f7f7f7f;
+ static const __declspec(align(8)) uint64 xfeb = 0xfefefefefefefefe;
+
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov esi, [esp+24+16]
+ mov eax, [esp+4+16]
+ shl esi, 4
+ mov ecx, [esp+12+16]
+ mov edx, [esp+16+16]
+ mov ebp, [esp+20+16]
+ mov edi, [esp+8+16]
+ sub edi, esi
+ sub ebp, esi
+
+ movq mm6, x7fb
+ movq mm7, xfeb
+
+ mov esi, [esp+28+16]
+yloop:
+ mov ebx, [esp+24+16]
+mainRowLoop:
+ movq mm0, [ecx]
+ movq mm3, [ecx + 8]
+ movq mm1, mm0
+ movq mm2, [edx]
+ movq mm4, mm3
+ movq mm5, [edx + 8]
+ por mm1, mm2
+ pxor mm0, mm2
+ por mm4, mm5
+ pxor mm3, mm5
+ psrlq mm0, 1
+ pand mm3, mm7
+ pand mm0, mm6
+ psrlq mm3, 1
+ psubb mm1, mm0
+ psubb mm4, mm3
+ add ecx, 16
+ movq [eax], mm1
+ movq [eax+8], mm4
+ add edx, 16
+ add eax, 16
+ dec ebx
+ jne mainRowLoop
+
+ add eax, edi
+ add ecx, ebp
+ add edx, ebp
+ dec esi
+ jne yloop
+
+ emms
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ }
+ }
+
+ void __declspec(naked) __cdecl Average_ISSE(void *dst, ptrdiff_t dstPitch, const void *src1, const void *src2, ptrdiff_t srcPitch, uint32 w16, uint32 h) {
+ static const __declspec(align(8)) uint64 x7fb = 0x7f7f7f7f7f7f7f7f;
+ static const __declspec(align(8)) uint64 xfeb = 0xfefefefefefefefe;
+
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov esi, [esp+24+16]
+ mov eax, [esp+4+16]
+ shl esi, 4
+ mov ecx, [esp+12+16]
+ mov edx, [esp+16+16]
+ mov ebp, [esp+20+16]
+ mov edi, [esp+8+16]
+ sub edi, esi
+ sub ebp, esi
+
+ movq mm6, x7fb
+ movq mm7, xfeb
+
+ mov esi, [esp+28+16]
+yloop:
+ mov ebx, [esp+24+16]
+mainRowLoop:
+ movq mm0, [ecx]
+ movq mm1, [ecx + 8]
+ movq mm2, [edx]
+ movq mm3, [edx + 8]
+ pavgb mm0, mm2
+ pavgb mm1, mm3
+ movq [eax], mm0
+ add ecx, 16
+ add edx, 16
+ movq [eax+8], mm1
+ add eax, 16
+ dec ebx
+ jne mainRowLoop
+
+ add eax, edi
+ add ecx, ebp
+ add edx, ebp
+ dec esi
+ jne yloop
+
+ emms
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ }
+ }
+#endif
+
+#if defined(VD_CPU_X86) || defined(VD_CPU_AMD64)
+ void Average_SSE2(void *dst, ptrdiff_t dstPitch, const void *src1, const void *src2, ptrdiff_t srcPitch, uint32 w16, uint32 h) {
+ do {
+ __m128i *dstv = (__m128i *)dst;
+ __m128i *src1v = (__m128i *)src1;
+ __m128i *src2v = (__m128i *)src2;
+
+ for(uint32 i=0; i<w16; ++i)
+ dstv[i] = _mm_avg_epu8(src1v[i], src2v[i]);
+
+ dst = (char *)dst + dstPitch;
+ src1 = (char *)src1 + srcPitch;
+ src2 = (char *)src2 + srcPitch;
+ } while(--h);
+ }
+#endif
+
+ void InterpPlane_Bob(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, uint32 w, uint32 h, bool interpField2) {
+ void (*blend_func)(void *dst, ptrdiff_t dstPitch, const void *src1, const void *src2, ptrdiff_t srcPitch, uint32 w16, uint32 h);
+#if defined(VD_CPU_X86)
+ if (SSE2_enabled)
+ blend_func = Average_SSE2;
+ else if (ISSE_enabled)
+ blend_func = Average_ISSE;
+ else if (MMX_enabled)
+ blend_func = Average_MMX;
+ else
+ blend_func = Average_scalar;
+#else
+ blend_func = Average_SSE2;
+#endif
+
+ w = (w + 3) >> 2;
+
+ int y0 = interpField2 ? 1 : 2;
+
+ if (!interpField2)
+ memcpy(dst, src, w * 4);
+
+ if (h > y0) {
+ ASSERT(((UINT_PTR)dst & 0xF) == 0);
+ ASSERT((dstpitch & 0xF) == 0);
+ ASSERT(((UINT_PTR)src & 0xF) == 0);
+ ASSERT((srcpitch*(y0 - 1) & 0xF) == 0);
+ blend_func((char *)dst + dstpitch*y0,
+ dstpitch*2,
+ (const char *)src + srcpitch*(y0 - 1),
+ (const char *)src + srcpitch*(y0 + 1),
+ srcpitch*2,
+ (w + 3) >> 2,
+ (h - y0) >> 1);
+ }
+
+ if (interpField2)
+ memcpy((char *)dst + dstpitch*(h - 1), (const char *)src + srcpitch*(h - 1), w*4);
+
+#ifdef _M_IX86
+ if (MMX_enabled)
+ __asm emms
+#endif
+ }
+
+ void BlendPlane(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, uint32 w, uint32 h) {
+ void (*blend_func)(void *, const void *, uint32, ptrdiff_t);
+#if defined(VD_CPU_X86)
+ if (SSE2_enabled)
+ blend_func = asm_blend_row_SSE2;
+ else
+ blend_func = ISSE_enabled ? asm_blend_row_ISSE : MMX_enabled ? asm_blend_row_MMX : asm_blend_row;
+#else
+ blend_func = asm_blend_row_SSE2;
+#endif
+
+ w = (w + 3) >> 2;
+
+ asm_blend_row_clipped(dst, src, w, srcpitch);
+ if (h-=2)
+ do {
+ dst = ((char *)dst + dstpitch);
+
+ blend_func(dst, src, w, srcpitch);
+
+ src = ((char *)src + srcpitch);
+ } while(--h);
+
+ asm_blend_row_clipped((char *)dst + dstpitch, src, w, srcpitch);
+
+#ifdef _M_IX86
+ if (MMX_enabled)
+ __asm emms
+#endif
+ }
+}
+
+void DeinterlaceBlend(BYTE* dst, BYTE* src, DWORD w, DWORD h, DWORD dstpitch, DWORD srcpitch)
+{
+ BlendPlane(dst, dstpitch, src, srcpitch, w, h);
+}
+
+void DeinterlaceBob(BYTE* dst, BYTE* src, DWORD w, DWORD h, DWORD dstpitch, DWORD srcpitch, bool topfield)
+{
+ topfield = !topfield;
+
+ InterpPlane_Bob(dst, dstpitch, src, srcpitch, w, h, topfield);
+}
diff --git a/src/DSUtil/dsutil.vcproj b/src/DSUtil/dsutil.vcproj
index 49162423b..69a625c18 100644
--- a/src/DSUtil/dsutil.vcproj
+++ b/src/DSUtil/dsutil.vcproj
@@ -44,7 +44,7 @@
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
+ AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;;..\thirdparty\VirtualDub\h"
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;"
/>
<Tool
@@ -105,7 +105,7 @@
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
+ AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;;..\thirdparty\VirtualDub\h"
PreprocessorDefinitions="_WIN64;_DEBUG;_LIB;"
Detect64BitPortabilityProblems="false"
DebugInformationFormat="3"
@@ -167,7 +167,7 @@
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
+ AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;;..\thirdparty\VirtualDub\h"
PreprocessorDefinitions="WIN32;NDEBUG;_LIB"
BufferSecurityCheck="true"
EnableEnhancedInstructionSet="1"
@@ -233,7 +233,7 @@
<Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
+ AdditionalIncludeDirectories="..\..\include;..\filters\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;;..\thirdparty\VirtualDub\h"
PreprocessorDefinitions="_WIN64;NDEBUG;_LIB"
BufferSecurityCheck="true"
EnableEnhancedInstructionSet="0"
@@ -278,130 +278,8 @@
Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm"
>
<File
- RelativePath=".\a_yuv2rgb.asm"
+ RelativePath=".\deinterlace.cpp"
>
- <FileConfiguration
- Name="Debug Unicode|Win32"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="yasm -X vc -g cv8 -f &quot;$(PlatformName)&quot; -o &quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Debug Unicode|x64"
- ExcludedFromBuild="true"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="ml64 /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release Unicode|Win32"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="yasm -X vc -g cv8 -f &quot;$(PlatformName)&quot; -o &quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release Unicode|x64"
- ExcludedFromBuild="true"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="yasm -X vc -g cv8 -f &quot;$(PlatformName)&quot; -o &quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- </File>
- <File
- RelativePath=".\a_yuvtable.asm"
- >
- <FileConfiguration
- Name="Debug Unicode|Win32"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="yasm -X vc -g cv8 -f &quot;$(PlatformName)&quot; -o &quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Debug Unicode|x64"
- ExcludedFromBuild="true"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="ml64 /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release Unicode|Win32"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="yasm -X vc -g cv8 -f &quot;$(PlatformName)&quot; -o &quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release Unicode|x64"
- ExcludedFromBuild="true"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="ml64 /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- </File>
- <File
- RelativePath=".\convert_a.asm"
- >
- <FileConfiguration
- Name="Debug Unicode|Win32"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Debug Unicode|x64"
- ExcludedFromBuild="true"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="ml64 /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release Unicode|Win32"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release Unicode|x64"
- ExcludedFromBuild="true"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="ml64 /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;&#x0D;&#x0A;"
- Outputs="$(OutDir)\$(InputName).obj"
- />
- </FileConfiguration>
</File>
<File
RelativePath=".\DSMPropertyBag.cpp"
diff --git a/src/DSUtil/vd.cpp b/src/DSUtil/vd.cpp
index 063fbfe06..0d7f77aaa 100644
--- a/src/DSUtil/vd.cpp
+++ b/src/DSUtil/vd.cpp
@@ -1,5 +1,6 @@
// VirtualDub - Video processing and capture application
-// Copyright (C) 1998-2001 Avery Lee
+// Graphics support library
+// Copyright (C) 1998-2007 Avery Lee
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -16,8 +17,7 @@
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
//
// Notes:
-// - BitBltFromI420ToRGB is from VirtualDub
-// - The core assembly function of CCpuID is from DVD2AVI
+// - VDPixmapBlt is from VirtualDub
// - sse2 yv12 to yuy2 conversion by Haali
// (- vd.cpp/h should be renamed to something more sensible already :)
@@ -27,300 +27,272 @@
#include "vd_asm.h"
#include <intrin.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/system/memory.h>
+
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/Kasumi/pixmapops.h>
+
#pragma warning(disable : 4799) // no emms... blahblahblah
+void VDCPUTest() {
+ SYSTEM_INFO si;
+
+ long lEnableFlags = CPUCheckForExtensions();
+
+ GetSystemInfo(&si);
+
+ if (si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_INTEL)
+ if (si.wProcessorLevel < 4)
+ lEnableFlags &= ~CPUF_SUPPORTS_FPU; // Not strictly true, but very slow anyway
+
+ // Enable FPU support...
+
+ CPUEnableExtensions(lEnableFlags);
+
+ VDFastMemcpyAutodetect();
+}
+
CCpuID g_cpuid;
CCpuID::CCpuID()
-{
- int CPUInfo[4] = {-1};
- __cpuid(CPUInfo, 1);
- int t = CPUInfo[3];
-
- int mflags = 0;
- mflags |= ((t&0x00800000)!=0) ? mmx : 0; // STD MMX
- mflags |= ((t&0x02000000)!=0) ? ssemmx+ssefpu : 0; // STD SSE
- mflags |= ((t&0x04000000)!=0) ? sse2 : 0; // SSE2
+{
+ VDCPUTest();
- t = CPUInfo[2];
- mflags |= ((t&0x00000001)!=0) ? sse3 : 0; // SSE3
+ long lEnableFlags = CPUGetEnabledExtensions();
- // 3dnow
- __cpuid(CPUInfo, 0x80000001);
- t = CPUInfo[3];
- mflags |= ((t&0x80000000)!=0) ? _3dnow : 0; // 3D NOW
- mflags |= ((t&0x00400000)!=0) ? ssemmx : 0; // SSE MMX
+ int flags = 0;
+ flags |= !!(lEnableFlags & CPUF_SUPPORTS_MMX) ? mmx : 0; // STD MMX
+ flags |= !!(lEnableFlags & CPUF_SUPPORTS_INTEGER_SSE) ? ssemmx : 0; // SSE MMX
+ flags |= !!(lEnableFlags & CPUF_SUPPORTS_SSE) ? ssefpu : 0; // STD SSE
+ flags |= !!(lEnableFlags & CPUF_SUPPORTS_SSE2) ? sse2 : 0; // SSE2
+ flags |= !!(lEnableFlags & CPUF_SUPPORTS_3DNOW) ? _3dnow : 0; // 3DNow
// result
- m_flags = (flag_t)mflags;
+ m_flags = (flag_t)flags;
}
-void memcpy_accel(void* dst, const void* src, size_t len)
+bool BitBltFromI420ToI420(int w, int h, BYTE* dsty, BYTE* dstu, BYTE* dstv, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
{
-#ifndef _WIN64
- if((g_cpuid.m_flags & CCpuID::ssefpu) && len >= 128
- && !((DWORD)src&15) && !((DWORD)dst&15))
- {
- __asm
- {
- mov esi, dword ptr [src]
- mov edi, dword ptr [dst]
- mov ecx, len
- shr ecx, 7
- memcpy_accel_sse_loop:
- prefetchnta [esi+16*8]
- movaps xmm0, [esi]
- movaps xmm1, [esi+16*1]
- movaps xmm2, [esi+16*2]
- movaps xmm3, [esi+16*3]
- movaps xmm4, [esi+16*4]
- movaps xmm5, [esi+16*5]
- movaps xmm6, [esi+16*6]
- movaps xmm7, [esi+16*7]
- movntps [edi], xmm0
- movntps [edi+16*1], xmm1
- movntps [edi+16*2], xmm2
- movntps [edi+16*3], xmm3
- movntps [edi+16*4], xmm4
- movntps [edi+16*5], xmm5
- movntps [edi+16*6], xmm6
- movntps [edi+16*7], xmm7
- add esi, 128
- add edi, 128
- dec ecx
- jne memcpy_accel_sse_loop
- mov ecx, len
- and ecx, 127
- cmp ecx, 0
- je memcpy_accel_sse_end
- memcpy_accel_sse_loop2:
- mov dl, byte ptr[esi]
- mov byte ptr[edi], dl
- inc esi
- inc edi
- dec ecx
- jne memcpy_accel_sse_loop2
- memcpy_accel_sse_end:
- emms
- sfence
- }
- }
- else if((g_cpuid.m_flags & CCpuID::mmx) && len >= 64
- && !((DWORD)src&7) && !((DWORD)dst&7))
- {
- __asm
- {
- mov esi, dword ptr [src]
- mov edi, dword ptr [dst]
- mov ecx, len
- shr ecx, 6
- memcpy_accel_mmx_loop:
- movq mm0, qword ptr [esi]
- movq mm1, qword ptr [esi+8*1]
- movq mm2, qword ptr [esi+8*2]
- movq mm3, qword ptr [esi+8*3]
- movq mm4, qword ptr [esi+8*4]
- movq mm5, qword ptr [esi+8*5]
- movq mm6, qword ptr [esi+8*6]
- movq mm7, qword ptr [esi+8*7]
- movq qword ptr [edi], mm0
- movq qword ptr [edi+8*1], mm1
- movq qword ptr [edi+8*2], mm2
- movq qword ptr [edi+8*3], mm3
- movq qword ptr [edi+8*4], mm4
- movq qword ptr [edi+8*5], mm5
- movq qword ptr [edi+8*6], mm6
- movq qword ptr [edi+8*7], mm7
- add esi, 64
- add edi, 64
- loop memcpy_accel_mmx_loop
- mov ecx, len
- and ecx, 63
- cmp ecx, 0
- je memcpy_accel_mmx_end
- memcpy_accel_mmx_loop2:
- mov dl, byte ptr [esi]
- mov byte ptr [edi], dl
- inc esi
- inc edi
- dec ecx
- jne memcpy_accel_mmx_loop2
- memcpy_accel_mmx_end:
- emms
- }
- }
- else
-#endif
- {
- memcpy(dst, src, len);
- }
+ VDPixmap srcbm = {0};
+
+ srcbm.data = srcy;
+ srcbm.pitch = srcpitch;
+ srcbm.w = w;
+ srcbm.h = h;
+ srcbm.format = nsVDPixmap::kPixFormat_YUV420_Planar;
+ srcbm.data2 = srcu;
+ srcbm.pitch2 = srcpitch / 2;
+ srcbm.data3 = srcv;
+ srcbm.pitch3 = srcpitch / 2;
+
+ VDPixmap dstpxm = {0};
+
+ dstpxm.data = dsty;
+ dstpxm.pitch = dstpitch;
+ dstpxm.w = w;
+ dstpxm.h = h;
+ dstpxm.format = nsVDPixmap::kPixFormat_YUV420_Planar;
+ dstpxm.data2 = dstu;
+ dstpxm.pitch2 = dstpitch / 2;
+ dstpxm.data3 = dstv;
+ dstpxm.pitch3 = dstpitch / 2;
+
+ return VDPixmapBlt(dstpxm, srcbm);
}
-static void yuvtoyuy2row_c(BYTE* dst, BYTE* srcy, BYTE* srcu, BYTE* srcv, DWORD width)
+bool BitBltFromYUY2ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* src, int srcpitch)
{
- WORD* dstw = (WORD*)dst;
- for(; width > 1; width -= 2)
- {
- *dstw++ = (*srcu++<<8)|*srcy++;
- *dstw++ = (*srcv++<<8)|*srcy++;
- }
+ VDPixmap srcbm = {0};
+
+ srcbm.data = src;
+ srcbm.pitch = srcpitch;
+ srcbm.w = w;
+ srcbm.h = h;
+ srcbm.format = nsVDPixmap::kPixFormat_YUV422_YUYV;
+
+ VDPixmap dstpxm = {
+ dst,
+ NULL,
+ w,
+ h,
+ dstpitch
+ };
+
+ dstpxm.format = nsVDPixmap::kPixFormat_YUV422_YUYV;
+
+ return VDPixmapBlt(dstpxm, srcbm);
}
-static void yuvtoyuy2row_avg_c(BYTE* dst, BYTE* srcy, BYTE* srcu, BYTE* srcv, DWORD width, DWORD pitchuv)
+bool BitBltFromI420ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
{
- WORD* dstw = (WORD*)dst;
- for(; width > 1; width -= 2, srcu++, srcv++)
- {
- *dstw++ = (((srcu[0]+srcu[pitchuv])>>1)<<8)|*srcy++;
- *dstw++ = (((srcv[0]+srcv[pitchuv])>>1)<<8)|*srcy++;
- }
+ VDPixmap srcbm = {0};
+
+ srcbm.data = srcy;
+ srcbm.pitch = srcpitch;
+ srcbm.w = w;
+ srcbm.h = h;
+ srcbm.format = nsVDPixmap::kPixFormat_YUV420_Planar;
+ srcbm.data2 = srcu;
+ srcbm.pitch2 = srcpitch/2;
+ srcbm.data3 = srcv;
+ srcbm.pitch3 = srcpitch/2;
+
+ VDPixmap dstpxm = {
+ (char *)dst + dstpitch * (h - 1),
+ NULL,
+ w,
+ h,
+ -dstpitch
+ };
+
+ switch(dbpp) {
+ case 16: dstpxm.format = nsVDPixmap::kPixFormat_XRGB1555; break;
+ case 24: dstpxm.format = nsVDPixmap::kPixFormat_RGB888; break;
+ case 32: dstpxm.format = nsVDPixmap::kPixFormat_XRGB8888; break;
+ default:
+ VDASSERT(false);
+ }
+
+ // TODO: check correct conversion work (555->565) when dpp == 16
+
+ return VDPixmapBlt(dstpxm, srcbm);
}
-static void asm_blend_row_clipped_c(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
+bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
{
- BYTE* src2 = src + srcpitch;
- do
+ if(srcpitch == 0) srcpitch = w;
+
+#ifndef _WIN64
+ if((g_cpuid.m_flags & CCpuID::sse2)
+ && !((DWORD_PTR)srcy&15) && !((DWORD_PTR)srcu&15) && !((DWORD_PTR)srcv&15) && !(srcpitch&31)
+ && !((DWORD_PTR)dst&15) && !(dstpitch&15))
{
- *dst++ = (*src++ + *src2++ + 1) >> 1;
- } while(w--);
+ if(w<=0 || h<=0 || (w&1) || (h&1))
+ return(false);
+
+ yv12_yuy2_sse2(srcy, srcu, srcv, srcpitch/2, w/2, h, dst, dstpitch);
+ return(true);
+ }
+#endif
+
+ VDPixmap srcbm = {0};
+
+ srcbm.data = srcy;
+ srcbm.pitch = srcpitch;
+ srcbm.w = w;
+ srcbm.h = h;
+ srcbm.format = nsVDPixmap::kPixFormat_YUV420_Planar;
+ srcbm.data2 = srcu;
+ srcbm.pitch2 = srcpitch/2;
+ srcbm.data3 = srcv;
+ srcbm.pitch3 = srcpitch/2;
+
+ VDPixmap dstpxm = {
+ dst,
+ NULL,
+ w,
+ h,
+ dstpitch
+ };
+
+ dstpxm.format = nsVDPixmap::kPixFormat_YUV422_YUYV;
+
+ return VDPixmapBlt(dstpxm, srcbm);
}
-static void asm_blend_row_c(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
+bool BitBltFromRGBToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch, int sbpp)
{
- BYTE* src2 = src + srcpitch;
- BYTE* src3 = src2 + srcpitch;
- do
- {
- *dst++ = (*src++ + (*src2++ << 1) + *src3++ + 2) >> 2;
- } while(w--);
+ VDPixmap srcbm = {
+ (char *)src + srcpitch * (h - 1),
+ NULL,
+ w,
+ h,
+ -srcpitch
+ };
+
+ switch(dbpp) {
+ case 8: srcbm.format = nsVDPixmap::kPixFormat_Pal8; break;
+ case 16: srcbm.format = nsVDPixmap::kPixFormat_XRGB1555; break;
+ case 24: srcbm.format = nsVDPixmap::kPixFormat_RGB888; break;
+ case 32: srcbm.format = nsVDPixmap::kPixFormat_XRGB8888; break;
+ default:
+ VDASSERT(false);
+ }
+
+ VDPixmap dstpxm = {
+ (char *)dst + dstpitch * (h - 1),
+ NULL,
+ w,
+ h,
+ -dstpitch
+ };
+
+ switch(dbpp) {
+ case 8: dstpxm.format = nsVDPixmap::kPixFormat_Pal8; break;
+ case 16: dstpxm.format = nsVDPixmap::kPixFormat_XRGB1555; break;
+ case 24: dstpxm.format = nsVDPixmap::kPixFormat_RGB888; break;
+ case 32: dstpxm.format = nsVDPixmap::kPixFormat_XRGB8888; break;
+ default:
+ VDASSERT(false);
+ }
+
+ return VDPixmapBlt(dstpxm, srcbm);
}
-bool BitBltFromI420ToI420(int w, int h, BYTE* dsty, BYTE* dstu, BYTE* dstv, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
+bool BitBltFromYUY2ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch)
{
- if((w&1))
- return(false);
-
- if(w > 0 && w == srcpitch && w == dstpitch)
- {
- memcpy_accel(dsty, srcy, h*srcpitch);
- memcpy_accel(dstu, srcu, h/2*srcpitch/2);
- memcpy_accel(dstv, srcv, h/2*srcpitch/2);
- }
- else
- {
- int pitch = min(abs(srcpitch), abs(dstpitch));
-
- for(ptrdiff_t y = 0; y < h; y++, srcy += srcpitch, dsty += dstpitch)
- memcpy_accel(dsty, srcy, pitch);
+ if(srcpitch == 0) srcpitch = w;
- srcpitch >>= 1;
- dstpitch >>= 1;
+ VDPixmap srcbm = {0};
- pitch = min(abs(srcpitch), abs(dstpitch));
+ srcbm.data = src;
+ srcbm.pitch = srcpitch;
+ srcbm.w = w;
+ srcbm.h = h;
+ srcbm.format = nsVDPixmap::kPixFormat_YUV422_YUYV;
- for(ptrdiff_t y = 0; y < h; y+=2, srcu += srcpitch, dstu += dstpitch)
- memcpy_accel(dstu, srcu, pitch);
+ VDPixmap dstpxm = {
+ (char *)dst + dstpitch * (h - 1),
+ NULL,
+ w,
+ h,
+ -dstpitch
+ };
- for(ptrdiff_t y = 0; y < h; y+=2, srcv += srcpitch, dstv += dstpitch)
- memcpy_accel(dstv, srcv, pitch);
+ switch(dbpp) {
+ case 16: dstpxm.format = nsVDPixmap::kPixFormat_XRGB1555; break;
+ case 24: dstpxm.format = nsVDPixmap::kPixFormat_RGB888; break;
+ case 32: dstpxm.format = nsVDPixmap::kPixFormat_XRGB8888; break;
+ default:
+ VDASSERT(false);
}
- return(true);
+ return VDPixmapBlt(dstpxm, srcbm);
}
-bool BitBltFromYUY2ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* src, int srcpitch)
+static void yuvtoyuy2row_c(BYTE* dst, BYTE* srcy, BYTE* srcu, BYTE* srcv, DWORD width)
{
- if(w > 0 && w == srcpitch && w == dstpitch)
- {
- memcpy_accel(dst, src, h*srcpitch);
- }
- else
+ WORD* dstw = (WORD*)dst;
+ for(; width > 1; width -= 2)
{
- int pitch = min(abs(srcpitch), abs(dstpitch));
-
- for(ptrdiff_t y = 0; y < h; y++, src += srcpitch, dst += dstpitch)
- memcpy_accel(dst, src, pitch);
+ *dstw++ = (*srcu++<<8)|*srcy++;
+ *dstw++ = (*srcv++<<8)|*srcy++;
}
-
- return(true);
}
-#ifndef _WIN64
-extern "C" void asm_YUVtoRGB32_row(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB24_row(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB16_row(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB32_row_MMX(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB24_row_MMX(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB16_row_MMX(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB32_row_ISSE(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB24_row_ISSE(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-extern "C" void asm_YUVtoRGB16_row_ISSE(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width);
-#endif
-
-bool BitBltFromI420ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
+static void yuvtoyuy2row_avg_c(BYTE* dst, BYTE* srcy, BYTE* srcu, BYTE* srcv, DWORD width, DWORD pitchuv)
{
- if(w<=0 || h<=0 || (w&1) || (h&1))
- return(false);
-
-#ifndef _WIN64
- void (*asm_YUVtoRGB_row)(void* ARGB1, void* ARGB2, BYTE* Y1, BYTE* Y2, BYTE* U, BYTE* V, long width) = NULL;;
-
- if((g_cpuid.m_flags & CCpuID::ssefpu) && !(w&7))
- {
- switch(dbpp)
- {
- case 16: asm_YUVtoRGB_row = asm_YUVtoRGB16_row/*_ISSE*/; break; // TODO: fix _ISSE (555->565)
- case 24: asm_YUVtoRGB_row = asm_YUVtoRGB24_row_ISSE; break;
- case 32: asm_YUVtoRGB_row = asm_YUVtoRGB32_row_ISSE; break;
- }
- }
- else if((g_cpuid.m_flags & CCpuID::mmx) && !(w&7))
- {
- switch(dbpp)
- {
- case 16: asm_YUVtoRGB_row = asm_YUVtoRGB16_row/*_MMX*/; break; // TODO: fix _MMX (555->565)
- case 24: asm_YUVtoRGB_row = asm_YUVtoRGB24_row_MMX; break;
- case 32: asm_YUVtoRGB_row = asm_YUVtoRGB32_row_MMX; break;
- }
- }
- else
- {
- switch(dbpp)
- {
- case 16: asm_YUVtoRGB_row = asm_YUVtoRGB16_row; break;
- case 24: asm_YUVtoRGB_row = asm_YUVtoRGB24_row; break;
- case 32: asm_YUVtoRGB_row = asm_YUVtoRGB32_row; break;
- }
- }
-
- if(!asm_YUVtoRGB_row)
- return(false);
-
- do
+ WORD* dstw = (WORD*)dst;
+ for(; width > 1; width -= 2, srcu++, srcv++)
{
- asm_YUVtoRGB_row(dst + dstpitch, dst, srcy + srcpitch, srcy, srcu, srcv, w/2);
-
- dst += 2*dstpitch;
- srcy += srcpitch*2;
- srcu += srcpitch/2;
- srcv += srcpitch/2;
+ *dstw++ = (((srcu[0]+srcu[pitchuv])>>1)<<8)|*srcy++;
+ *dstw++ = (((srcv[0]+srcv[pitchuv])>>1)<<8)|*srcy++;
}
- while(h -= 2);
-
- if(g_cpuid.m_flags & CCpuID::mmx)
- __asm emms
-
- if(g_cpuid.m_flags & CCpuID::ssefpu)
- __asm sfence
-
- return(true);
-#else
- ASSERT(FALSE);
- return(false);
-#endif
}
-bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch, bool fInterlaced)
+bool BitBltFromI420ToYUY2Interlaced(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch)
{
if(w<=0 || h<=0 || (w&1) || (h&1))
return(false);
@@ -332,16 +304,15 @@ bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYT
#ifndef _WIN64
if((g_cpuid.m_flags & CCpuID::sse2)
- && !((DWORD_PTR)srcy&15) && !((DWORD_PTR)srcu&15) && !((DWORD_PTR)srcv&15) && !(srcpitch&31)
- && !((DWORD_PTR)dst&15) && !(dstpitch&15))
+ && !((DWORD_PTR)srcy&15) && !((DWORD_PTR)srcu&15) && !((DWORD_PTR)srcv&15) && !(srcpitch&31)
+ && !((DWORD_PTR)dst&15) && !(dstpitch&15))
{
- if(!fInterlaced) yv12_yuy2_sse2(srcy, srcu, srcv, srcpitch/2, w/2, h, dst, dstpitch);
- else yv12_yuy2_sse2_interlaced(srcy, srcu, srcv, srcpitch/2, w/2, h, dst, dstpitch);
+ yv12_yuy2_sse2_interlaced(srcy, srcu, srcv, srcpitch/2, w/2, h, dst, dstpitch);
return(true);
}
else
{
- ASSERT(!fInterlaced);
+ ASSERT(FALSE);
}
if((g_cpuid.m_flags & CCpuID::mmx) && !(w&7))
@@ -359,15 +330,16 @@ bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYT
if(!yuvtoyuy2row)
return(false);
+ int halfsrcpitch = srcpitch/2;
do
{
yuvtoyuy2row(dst, srcy, srcu, srcv, w);
- yuvtoyuy2row_avg(dst + dstpitch, srcy + srcpitch, srcu, srcv, w, srcpitch/2);
+ yuvtoyuy2row_avg(dst + dstpitch, srcy + srcpitch, srcu, srcv, w, halfsrcpitch);
dst += 2*dstpitch;
- srcy += srcpitch*2;
- srcu += srcpitch/2;
- srcv += srcpitch/2;
+ srcy += halfsrcpitch;
+ srcu += halfsrcpitch;
+ srcv += halfsrcpitch;
}
while((h -= 2) > 2);
@@ -381,481 +353,3 @@ bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYT
return(true);
}
-
-bool BitBltFromRGBToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch, int sbpp)
-{
- if(dbpp == sbpp)
- {
- int rowbytes = w*dbpp>>3;
-
- if(rowbytes > 0 && rowbytes == srcpitch && rowbytes == dstpitch)
- {
- memcpy_accel(dst, src, h*rowbytes);
- }
- else
- {
- for(ptrdiff_t y = 0; y < h; y++, src += srcpitch, dst += dstpitch)
- memcpy_accel(dst, src, rowbytes);
- }
-
- return(true);
- }
-
- if(sbpp != 16 && sbpp != 24 && sbpp != 32
- || dbpp != 16 && dbpp != 24 && dbpp != 32)
- return(false);
-
- if(dbpp == 16)
- {
- for(ptrdiff_t y = 0; y < h; y++, src += srcpitch, dst += dstpitch)
- {
- if(sbpp == 24)
- {
- BYTE* s = (BYTE*)src;
- WORD* d = (WORD*)dst;
- for(ptrdiff_t x = 0; x < w; x++, s+=3, d++)
- *d = (WORD)(((*((DWORD*)s)>>8)&0xf800)|((*((DWORD*)s)>>5)&0x07e0)|((*((DWORD*)s)>>3)&0x1f));
- }
- else if(sbpp == 32)
- {
- DWORD* s = (DWORD*)src;
- WORD* d = (WORD*)dst;
- for(ptrdiff_t x = 0; x < w; x++, s++, d++)
- *d = (WORD)(((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x1f));
- }
- }
- }
- else if(dbpp == 24)
- {
- for(ptrdiff_t y = 0; y < h; y++, src += srcpitch, dst += dstpitch)
- {
- if(sbpp == 16)
- {
- WORD* s = (WORD*)src;
- BYTE* d = (BYTE*)dst;
- for(ptrdiff_t x = 0; x < w; x++, s++, d+=3)
- { // not tested, r-g-b might be in reverse
- d[0] = (*s&0x001f)<<3;
- d[1] = (*s&0x07e0)<<5;
- d[2] = (*s&0xf800)<<8;
- }
- }
- else if(sbpp == 32)
- {
- BYTE* s = (BYTE*)src;
- BYTE* d = (BYTE*)dst;
- for(ptrdiff_t x = 0; x < w; x++, s+=4, d+=3)
- {d[0] = s[0]; d[1] = s[1]; d[2] = s[2];}
- }
- }
- }
- else if(dbpp == 32)
- {
- for(ptrdiff_t y = 0; y < h; y++, src += srcpitch, dst += dstpitch)
- {
- if(sbpp == 16)
- {
- WORD* s = (WORD*)src;
- DWORD* d = (DWORD*)dst;
- for(ptrdiff_t x = 0; x < w; x++, s++, d++)
- *d = ((*s&0xf800)<<8)|((*s&0x07e0)<<5)|((*s&0x001f)<<3);
- }
- else if(sbpp == 24)
- {
- BYTE* s = (BYTE*)src;
- DWORD* d = (DWORD*)dst;
- for(ptrdiff_t x = 0; x < w; x++, s+=3, d++)
- *d = *((DWORD*)s)&0xffffff;
- }
- }
- }
-
- return(true);
-}
-
-void DeinterlaceBlend(BYTE* dst, BYTE* src, DWORD rowbytes, DWORD h, DWORD dstpitch, DWORD srcpitch)
-{
- void (*blend_row_clipped)(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch) = NULL;
- void (*blend_row)(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch) = NULL;
-
-#ifndef _WIN64
- if((g_cpuid.m_flags & CCpuID::sse2) && !((DWORD)src&0xf) && !((DWORD)dst&0xf) && !(srcpitch&0xf))
- {
- blend_row_clipped = asm_blend_row_clipped_SSE2;
- blend_row = asm_blend_row_SSE2;
- }
- else if(g_cpuid.m_flags & CCpuID::mmx)
- {
- blend_row_clipped = asm_blend_row_clipped_MMX;
- blend_row = asm_blend_row_MMX;
- }
- else
-#endif
- {
- blend_row_clipped = asm_blend_row_clipped_c;
- blend_row = asm_blend_row_c;
- }
-
- if(!blend_row_clipped)
- return;
-
- blend_row_clipped(dst, src, rowbytes, srcpitch);
-
- if((h -= 2) > 0) do
- {
- dst += dstpitch;
- blend_row(dst, src, rowbytes, srcpitch);
- src += srcpitch;
- }
- while(--h);
-
- blend_row_clipped(dst + dstpitch, src, rowbytes, srcpitch);
-
-#ifndef _WIN64
- if(g_cpuid.m_flags & CCpuID::mmx)
- __asm emms
-#endif
-}
-
-void DeinterlaceBob(BYTE* dst, BYTE* src, DWORD rowbytes, DWORD h, DWORD dstpitch, DWORD srcpitch, bool topfield)
-{
- if(topfield)
- {
- BitBltFromRGBToRGB(rowbytes, h/2, dst, dstpitch*2, 8, src, srcpitch*2, 8);
- AvgLines8(dst, h, dstpitch);
- }
- else
- {
- BitBltFromRGBToRGB(rowbytes, h/2, dst + dstpitch, dstpitch*2, 8, src + srcpitch, srcpitch*2, 8);
- AvgLines8(dst + dstpitch, h-1, dstpitch);
- }
-}
-
-void AvgLines8(BYTE* dst, DWORD h, DWORD pitch)
-{
- if(h <= 1)
- return;
-
- BYTE* s = dst;
- BYTE* d = dst + (h-2)*pitch;
-
- for(; s < d; s += pitch*2)
- {
- BYTE* tmp = s;
-
-#ifndef _WIN64
- if((g_cpuid.m_flags & CCpuID::sse2) && !((DWORD)tmp&0xf) && !((DWORD)pitch&0xf))
- {
- __asm
- {
- mov esi, tmp
- mov ebx, pitch
-
- mov ecx, ebx
- shr ecx, 4
-
-AvgLines8_sse2_loop:
- movdqa xmm0, [esi]
- pavgb xmm0, [esi+ebx*2]
- movdqa [esi+ebx], xmm0
- add esi, 16
-
- dec ecx
- jnz AvgLines8_sse2_loop
-
- mov tmp, esi
- }
-
- for(ptrdiff_t i = pitch&7; i--; tmp++)
- {
- tmp[pitch] = (tmp[0] + tmp[pitch<<1] + 1) >> 1;
- }
- }
- else if(g_cpuid.m_flags & CCpuID::mmx)
- {
- __asm
- {
- mov esi, tmp
- mov ebx, pitch
-
- mov ecx, ebx
- shr ecx, 3
-
- pxor mm7, mm7
-AvgLines8_mmx_loop:
- movq mm0, [esi]
- movq mm1, mm0
-
- punpcklbw mm0, mm7
- punpckhbw mm1, mm7
-
- movq mm2, [esi+ebx*2]
- movq mm3, mm2
-
- punpcklbw mm2, mm7
- punpckhbw mm3, mm7
-
- paddw mm0, mm2
- psrlw mm0, 1
-
- paddw mm1, mm3
- psrlw mm1, 1
-
- packuswb mm0, mm1
-
- movq [esi+ebx], mm0
-
- lea esi, [esi+8]
-
- dec ecx
- jnz AvgLines8_mmx_loop
-
- mov tmp, esi
- }
-
- for(ptrdiff_t i = pitch&7; i--; tmp++)
- {
- tmp[pitch] = (tmp[0] + tmp[pitch<<1] + 1) >> 1;
- }
- }
- else
-#endif
- {
- for(ptrdiff_t i = pitch; i--; tmp++)
- {
- tmp[pitch] = (tmp[0] + tmp[pitch<<1] + 1) >> 1;
- }
- }
- }
-
- if(!(h&1) && h >= 2)
- {
- dst += (h-2)*pitch;
- memcpy_accel(dst + pitch, dst, pitch);
- }
-
-#ifndef _WIN64
- __asm emms;
-#endif
-}
-
-void AvgLines555(BYTE* dst, DWORD h, DWORD pitch)
-{
- if(h <= 1)
- return;
-
- unsigned __int64 __0x03e003e003e003e0 = 0x03e003e003e003e0;
- unsigned __int64 __0x001f001f001f001f = 0x001f001f001f001f;
-
- BYTE* s = dst;
- BYTE* d = dst + (h-2)*pitch;
-
- for(; s < d; s += pitch*2)
- {
- BYTE* tmp = s;
-
-#ifndef _WIN64
- __asm
- {
- mov esi, tmp
- mov ebx, pitch
-
- mov ecx, ebx
- shr ecx, 3
-
- movq mm6, __0x03e003e003e003e0
- movq mm7, __0x001f001f001f001f
-
-AvgLines555_loop:
- movq mm0, [esi]
- movq mm1, mm0
- movq mm2, mm0
-
- psrlw mm0, 10 // red1 bits: mm0 = 001f001f001f001f
- pand mm1, mm6 // green1 bits: mm1 = 03e003e003e003e0
- pand mm2, mm7 // blue1 bits: mm2 = 001f001f001f001f
-
- movq mm3, [esi+ebx*2]
- movq mm4, mm3
- movq mm5, mm3
-
- psrlw mm3, 10 // red2 bits: mm3 = 001f001f001f001f
- pand mm4, mm6 // green2 bits: mm4 = 03e003e003e003e0
- pand mm5, mm7 // blue2 bits: mm5 = 001f001f001f001f
-
- paddw mm0, mm3
- psrlw mm0, 1 // (red1+red2)/2
- psllw mm0, 10 // red bits at 7c007c007c007c00
-
- paddw mm1, mm4
- psrlw mm1, 1 // (green1+green2)/2
- pand mm1, mm6 // green bits at 03e003e003e003e0
-
- paddw mm2, mm5
- psrlw mm2, 1 // (blue1+blue2)/2
- // blue bits at 001f001f001f001f (no need to pand, lower bits were discareded)
-
- por mm0, mm1
- por mm0, mm2
-
- movq [esi+ebx], mm0
-
- lea esi, [esi+8]
-
- dec ecx
- jnz AvgLines555_loop
-
- mov tmp, esi
- }
-#endif
-
- for(ptrdiff_t i = (pitch&7)>>1; i--; tmp++)
- {
- tmp[pitch] =
- ((((*tmp&0x7c00) + (tmp[pitch<<1]&0x7c00)) >> 1)&0x7c00)|
- ((((*tmp&0x03e0) + (tmp[pitch<<1]&0x03e0)) >> 1)&0x03e0)|
- ((((*tmp&0x001f) + (tmp[pitch<<1]&0x001f)) >> 1)&0x001f);
- }
- }
-
- if(!(h&1) && h >= 2)
- {
- dst += (h-2)*pitch;
- memcpy_accel(dst + pitch, dst, pitch);
- }
-
-#ifndef _WIN64
- __asm emms;
-#endif
-}
-
-void AvgLines565(BYTE* dst, DWORD h, DWORD pitch)
-{
- if(h <= 1)
- return;
-
- unsigned __int64 __0x07e007e007e007e0 = 0x07e007e007e007e0;
- unsigned __int64 __0x001f001f001f001f = 0x001f001f001f001f;
-
- BYTE* s = dst;
- BYTE* d = dst + (h-2)*pitch;
-
- for(; s < d; s += pitch*2)
- {
- WORD* tmp = (WORD*)s;
-
-#ifndef _WIN64
- __asm
- {
- mov esi, tmp
- mov ebx, pitch
-
- mov ecx, ebx
- shr ecx, 3
-
- movq mm6, __0x07e007e007e007e0
- movq mm7, __0x001f001f001f001f
-
-AvgLines565_loop:
- movq mm0, [esi]
- movq mm1, mm0
- movq mm2, mm0
-
- psrlw mm0, 11 // red1 bits: mm0 = 001f001f001f001f
- pand mm1, mm6 // green1 bits: mm1 = 07e007e007e007e0
- pand mm2, mm7 // blue1 bits: mm2 = 001f001f001f001f
-
- movq mm3, [esi+ebx*2]
- movq mm4, mm3
- movq mm5, mm3
-
- psrlw mm3, 11 // red2 bits: mm3 = 001f001f001f001f
- pand mm4, mm6 // green2 bits: mm4 = 07e007e007e007e0
- pand mm5, mm7 // blue2 bits: mm5 = 001f001f001f001f
-
- paddw mm0, mm3
- psrlw mm0, 1 // (red1+red2)/2
- psllw mm0, 11 // red bits at f800f800f800f800
-
- paddw mm1, mm4
- psrlw mm1, 1 // (green1+green2)/2
- pand mm1, mm6 // green bits at 03e003e003e003e0
-
- paddw mm2, mm5
- psrlw mm2, 1 // (blue1+blue2)/2
- // blue bits at 001f001f001f001f (no need to pand, lower bits were discareded)
-
- por mm0, mm1
- por mm0, mm2
-
- movq [esi+ebx], mm0
-
- lea esi, [esi+8]
-
- dec ecx
- jnz AvgLines565_loop
-
- mov tmp, esi
- }
-#else
- for(ptrdiff_t wd=(pitch>>3);wd--;tmp++)
- {
- tmp[0] =
- ((((*tmp&0xf800) + (tmp[pitch<<1]&0xf800)) >> 1)&0xf800)|
- ((((*tmp&0x07e0) + (tmp[pitch<<1]&0x07e0)) >> 1)&0x07e0)|
- ((((*tmp&0x001f) + (tmp[pitch<<1]&0x001f)) >> 1)&0x001f);
- }
-#endif
-
- for(ptrdiff_t i = (pitch&7)>>1; i--; tmp++)
- {
- tmp[pitch] =
- ((((*tmp&0xf800) + (tmp[pitch<<1]&0xf800)) >> 1)&0xf800)|
- ((((*tmp&0x07e0) + (tmp[pitch<<1]&0x07e0)) >> 1)&0x07e0)|
- ((((*tmp&0x001f) + (tmp[pitch<<1]&0x001f)) >> 1)&0x001f);
- }
- }
-
- if(!(h&1) && h >= 2)
- {
- dst += (h-2)*pitch;
- memcpy_accel(dst + pitch, dst, pitch);
- }
-
-#ifndef _WIN64
- __asm emms;
-#endif
-}
-
-#ifndef _WIN64
-extern "C" void mmx_YUY2toRGB24(const BYTE* src, BYTE* dst, const BYTE* src_end, int src_pitch, int row_size, bool rec709);
-extern "C" void mmx_YUY2toRGB32(const BYTE* src, BYTE* dst, const BYTE* src_end, int src_pitch, int row_size, bool rec709);
-#endif
-
-bool BitBltFromYUY2ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch)
-{
- void (* YUY2toRGB)(const BYTE* src, BYTE* dst, const BYTE* src_end, int src_pitch, int row_size, bool rec709) = NULL;
-
-#ifndef _WIN64
- if(g_cpuid.m_flags & CCpuID::mmx)
- {
- YUY2toRGB =
- dbpp == 32 ? mmx_YUY2toRGB32 :
- dbpp == 24 ? mmx_YUY2toRGB24 :
- // dbpp == 16 ? mmx_YUY2toRGB16 : // TODO
- NULL;
- }
- else
-#endif
- {
- ASSERT(FALSE);
- // TODO
- }
-
- if(!YUY2toRGB)
- return(false);
-
- YUY2toRGB(src, dst, src + h*srcpitch, srcpitch, w, false);
-
- return(true);
-}
diff --git a/src/DSUtil/vd.h b/src/DSUtil/vd.h
index a69e406c0..0db586cec 100644
--- a/src/DSUtil/vd.h
+++ b/src/DSUtil/vd.h
@@ -1,5 +1,6 @@
// VirtualDub - Video processing and capture application
-// Copyright (C) 1998-2001 Avery Lee
+// Graphics support library
+// Copyright (C) 1998-2007 Avery Lee
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -16,25 +17,22 @@
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
//
// Notes:
-// - BitBltFromI420ToRGB is from VirtualDub
-// - BitBltFromYUY2ToRGB is from AviSynth 2.52
+// - VDPixmapBlt is from VirtualDub
+// - sse2 yv12 to yuy2 conversion by Haali
// (- vd.cpp/h should be renamed to something more sensible already :)
#pragma once
-class CCpuID {public: CCpuID(); enum flag_t {mmx=1, ssemmx=2, ssefpu=4, sse2=8, _3dnow=16, sse3=32} m_flags;};
+class CCpuID {public: CCpuID(); enum flag_t {mmx=1, ssemmx=2, ssefpu=4, sse2=8, _3dnow=16} m_flags;};
extern CCpuID g_cpuid;
extern bool BitBltFromI420ToI420(int w, int h, BYTE* dsty, BYTE* dstu, BYTE* dstv, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch);
-extern bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch, bool fInterlaced = false);
+extern bool BitBltFromI420ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch);
+extern bool BitBltFromI420ToYUY2Interlaced(int w, int h, BYTE* dst, int dstpitch, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch);
extern bool BitBltFromI420ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* srcy, BYTE* srcu, BYTE* srcv, int srcpitch /* TODO: , bool fInterlaced = false */);
extern bool BitBltFromYUY2ToYUY2(int w, int h, BYTE* dst, int dstpitch, BYTE* src, int srcpitch);
extern bool BitBltFromYUY2ToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch);
extern bool BitBltFromRGBToRGB(int w, int h, BYTE* dst, int dstpitch, int dbpp, BYTE* src, int srcpitch, int sbpp);
extern void DeinterlaceBlend(BYTE* dst, BYTE* src, DWORD rowbytes, DWORD h, DWORD dstpitch, DWORD srcpitch);
-extern void DeinterlaceBob(BYTE* dst, BYTE* src, DWORD rowbytes, DWORD h, DWORD dstpitch, DWORD srcpitch, bool topfield);
-
-extern void AvgLines8(BYTE* dst, DWORD h, DWORD pitch);
-extern void AvgLines555(BYTE* dst, DWORD h, DWORD pitch);
-extern void AvgLines565(BYTE* dst, DWORD h, DWORD pitch); \ No newline at end of file
+extern void DeinterlaceBob(BYTE* dst, BYTE* src, DWORD rowbytes, DWORD h, DWORD dstpitch, DWORD srcpitch, bool topfield); \ No newline at end of file
diff --git a/src/DSUtil/vd_asm.cpp b/src/DSUtil/vd_asm.cpp
index 851449089..3fc521844 100644
--- a/src/DSUtil/vd_asm.cpp
+++ b/src/DSUtil/vd_asm.cpp
@@ -1,5 +1,6 @@
// VirtualDub - Video processing and capture application
-// Copyright (C) 1998-2001 Avery Lee
+// Graphics support library
+// Copyright (C) 1998-2007 Avery Lee
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -16,7 +17,7 @@
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
//
// Notes:
-// - BitBltFromI420ToRGB is from VirtualDub
+// - VDPixmapBlt is from VirtualDub
// - sse2 yv12 to yuy2 conversion by Haali
// (- vd.cpp/h should be renamed to something more sensible already :)
@@ -428,289 +429,4 @@ last4:
ret
};
}
-
-void __declspec(naked) asm_blend_row_clipped_MMX(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
-{
- static const __int64 _x0001000100010001 = 0x0001000100010001;
-
- __asm {
- push ebp
- push edi
- push esi
- push ebx
-
- mov edi,[esp+20]
- mov esi,[esp+24]
- sub edi,esi
- mov ebp,[esp+28]
- mov edx,[esp+32]
-
- shr ebp, 3
-
- movq mm6, _x0001000100010001
- pxor mm7, mm7
-
-xloop:
- movq mm0, [esi]
- movq mm3, mm0
- punpcklbw mm0, mm7
- punpckhbw mm3, mm7
-
- movq mm1, [esi+edx]
- movq mm4, mm1
- punpcklbw mm1, mm7
- punpckhbw mm4, mm7
-
- paddw mm1, mm0
- paddw mm1, mm6
- psrlw mm1, 1
-
- paddw mm4, mm3
- paddw mm4, mm6
- psrlw mm4, 1
-
- add esi, 8
- packuswb mm1, mm4
- movq [edi+esi-8], mm1
-
- dec ebp
- jne xloop
-
- pop ebx
- pop esi
- pop edi
- pop ebp
- ret
- };
-}
-
-void __declspec(naked) asm_blend_row_MMX(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
-{
- static const __int64 mask0 = 0xfcfcfcfcfcfcfcfci64;
- static const __int64 mask1 = 0x7f7f7f7f7f7f7f7fi64;
- static const __int64 mask2 = 0x3f3f3f3f3f3f3f3fi64;
- static const __int64 _x0002000200020002 = 0x0002000200020002;
-
- __asm {
- push ebp
- push edi
- push esi
- push ebx
-
- mov edi, [esp+20]
- mov esi, [esp+24]
- sub edi, esi
- mov ebp, [esp+28]
- mov edx, [esp+32]
-
- shr ebp, 3
-
- movq mm6, _x0002000200020002
- pxor mm7, mm7
-
-xloop:
- movq mm0, [esi]
- movq mm3, mm0
- punpcklbw mm0, mm7
- punpckhbw mm3, mm7
-
- movq mm1, [esi+edx]
- movq mm4, mm1
- punpcklbw mm1, mm7
- punpckhbw mm4, mm7
-
- movq mm2, [esi+edx*2]
- movq mm5, mm2
- punpcklbw mm2, mm7
- punpckhbw mm5, mm7
-
- psllw mm1, 1
- paddw mm1, mm0
- paddw mm1, mm2
- paddw mm1, mm6
- psrlw mm1, 2
-
- psllw mm4, 1
- paddw mm4, mm3
- paddw mm4, mm5
- paddw mm4, mm6
- psrlw mm4, 2
-
- add esi, 8
- packuswb mm1, mm4
- movq [edi+esi-8], mm1
-
- dec ebp
- jne xloop
-
- // sadly the original code makes a lot of visible banding artifacts on yuv
- // (it seems those shiftings without rounding introduce too much error)
-/*
- mov edi,[esp+20]
- mov esi,[esp+24]
- sub edi,esi
- mov ebp,[esp+28]
- mov edx,[esp+32]
-
- movq mm5,mask0
- movq mm6,mask1
- movq mm7,mask2
- shr ebp,1
- jz oddpart
-
-xloop:
- movq mm2,[esi]
- movq mm0,mm5
-
- movq mm1,[esi+edx]
- pand mm0,mm2
-
- psrlq mm1,1
- movq mm2,[esi+edx*2]
-
- psrlq mm2,2
- pand mm1,mm6
-
- psrlq mm0,2
- pand mm2,mm7
-
- paddb mm0,mm1
- add esi,8
-
- paddb mm0,mm2
- dec ebp
-
- movq [edi+esi-8],mm0
- jne xloop
-
-oddpart:
- test byte ptr [esp+28],1
- jz nooddpart
-
- mov ecx,[esi]
- mov eax,0fcfcfcfch
- mov ebx,[esi+edx]
- and eax,ecx
- shr ebx,1
- mov ecx,[esi+edx*2]
- shr ecx,2
- and ebx,07f7f7f7fh
- shr eax,2
- and ecx,03f3f3f3fh
- add eax,ebx
- add eax,ecx
- mov [edi+esi],eax
-
-nooddpart:
-*/
- pop ebx
- pop esi
- pop edi
- pop ebp
- ret
- };
-}
-
-__declspec(align(16)) static BYTE const_1_16_bytes[] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
-
-void asm_blend_row_SSE2(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
-{
- __asm
- {
- mov edx, srcpitch
- mov esi, src
- mov edi, dst
- sub edi, esi
- mov ecx, w
- mov ebx, ecx
- shr ecx, 4
- and ebx, 15
-
- movdqa xmm7, [const_1_16_bytes]
-
-asm_blend_row_SSE2_loop:
- movdqa xmm0, [esi]
- movdqa xmm1, [esi+edx]
- movdqa xmm2, [esi+edx*2]
- pavgb xmm0, xmm1
- pavgb xmm2, xmm1
- psubusb xmm0, xmm7
- pavgb xmm0, xmm2
- movdqa [esi+edi], xmm0
- add esi, 16
- dec ecx
- jnz asm_blend_row_SSE2_loop
-
- test ebx,15
- jz asm_blend_row_SSE2_end
-
- mov ecx, ebx
- xor ax, ax
- xor bx, bx
- xor dx, dx
-asm_blend_row_SSE2_loop2:
- mov al, [esi]
- mov bl, [esi+edx]
- mov dl, [esi+edx*2]
- add ax, bx
- inc ax
- shr ax, 1
- add dx, bx
- inc dx
- shr dx, 1
- add ax, dx
- shr ax, 1
- mov [esi+edi], al
- inc esi
- dec ecx
- jnz asm_blend_row_SSE2_loop2
-
-asm_blend_row_SSE2_end:
- }
-}
-
-void asm_blend_row_clipped_SSE2(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch)
-{
- __asm
- {
- mov edx, srcpitch
- mov esi, src
- mov edi, dst
- sub edi, esi
- mov ecx, w
- mov ebx, ecx
- shr ecx, 4
- and ebx, 15
-
- movdqa xmm7, [const_1_16_bytes]
-
-asm_blend_row_clipped_SSE2_loop:
- movdqa xmm0, [esi]
- movdqa xmm1, [esi+edx]
- pavgb xmm0, xmm1
- movdqa [esi+edi], xmm0
- add esi, 16
- dec ecx
- jnz asm_blend_row_clipped_SSE2_loop
-
- test ebx,15
- jz asm_blend_row_clipped_SSE2_end
-
- mov ecx, ebx
- xor ax, ax
- xor bx, bx
-asm_blend_row_clipped_SSE2_loop2:
- mov al, [esi]
- mov bl, [esi+edx]
- add ax, bx
- inc ax
- shr ax, 1
- mov [esi+edi], al
- inc esi
- dec ecx
- jnz asm_blend_row_clipped_SSE2_loop2
-
-asm_blend_row_clipped_SSE2_end:
- }
-}
#endif
diff --git a/src/DSUtil/vd_asm.h b/src/DSUtil/vd_asm.h
index c1c78f39b..7c1f2f134 100644
--- a/src/DSUtil/vd_asm.h
+++ b/src/DSUtil/vd_asm.h
@@ -1,5 +1,6 @@
// VirtualDub - Video processing and capture application
-// Copyright (C) 1998-2001 Avery Lee
+// Graphics support library
+// Copyright (C) 1998-2007 Avery Lee
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -16,8 +17,7 @@
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
//
// Notes:
-// - BitBltFromI420ToRGB is from VirtualDub
-// - BitBltFromYUY2ToRGB is from AviSynth 2.52
+// - VDPixmapBlt is from VirtualDub
// (- vd.cpp/h should be renamed to something more sensible already :)
#pragma once
@@ -31,9 +31,4 @@ void yv12_yuy2_row_sse2_linear();
void yv12_yuy2_row_sse2_linear_interlaced();
void yv12_yuy2_sse2(const BYTE *Y, const BYTE *U, const BYTE *V, int halfstride, unsigned halfwidth, unsigned height, BYTE *YUY2, int d_stride);
void yv12_yuy2_sse2_interlaced(const BYTE *Y, const BYTE *U, const BYTE *V, int halfstride, unsigned halfwidth, unsigned height, BYTE *YUY2, int d_stride);
-
-void asm_blend_row_clipped_MMX(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch);
-void asm_blend_row_MMX(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch);
-void asm_blend_row_SSE2(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch);
-void asm_blend_row_clipped_SSE2(BYTE* dst, BYTE* src, DWORD w, DWORD srcpitch);
#endif
diff --git a/src/YASM.rules b/src/YASM.rules
new file mode 100644
index 000000000..e212a4f17
--- /dev/null
+++ b/src/YASM.rules
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="utf-8"?>
+<VisualStudioToolFile
+ Name="YASM"
+ Version="8.00"
+ >
+ <Rules>
+ <CustomBuildRule
+ Name="YASM"
+ DisplayName="YASM"
+ CommandLine="yasm -X vc -g cv8 -f $(PlatformName) -o &quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+ Outputs="$(IntDir)\$(InputName).obj"
+ FileExtensions="*.asm;*. asm64"
+ ExecutionDescription="Assembling: $(InputFileName)"
+ >
+ <Properties>
+ </Properties>
+ </CustomBuildRule>
+ </Rules>
+</VisualStudioToolFile>
diff --git a/src/common.vsprops b/src/common.vsprops
index b3e6a9f51..dcd9ad94c 100644
--- a/src/common.vsprops
+++ b/src/common.vsprops
@@ -8,6 +8,7 @@
>
<Tool
Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories="&quot;$(SolutionDir)src\DSUtil\&quot;"
PreprocessorDefinitions="WINVER=0x0600"
EnableFunctionLevelLinking="true"
WarningLevel="3"
diff --git a/src/filters/renderer/VideoRenderers/DX9AllocatorPresenter.cpp b/src/filters/renderer/VideoRenderers/DX9AllocatorPresenter.cpp
index 49630a357..8ab629875 100644
--- a/src/filters/renderer/VideoRenderers/DX9AllocatorPresenter.cpp
+++ b/src/filters/renderer/VideoRenderers/DX9AllocatorPresenter.cpp
@@ -1888,11 +1888,11 @@ HRESULT CDX9AllocatorPresenter::AlphaBlt(RECT* pSrc, RECT* pDst, IDirect3DTextur
{(float)dst.right, (float)dst.bottom, 0.5f, 2.0f, (float)src.right / w, (float)src.bottom / h},
};
/*
- for(int i = 0; i < countof(pVertices); i++)
- {
- pVertices[i].x -= 0.5;
- pVertices[i].y -= 0.5;
- }
+ for(int i = 0; i < countof(pVertices); i++)
+ {
+ pVertices[i].x -= 0.5;
+ pVertices[i].y -= 0.5;
+ }
*/
hr = m_pD3DDev->SetTexture(0, pTexture);
diff --git a/src/filters/switcher/AudioSwitcher/AudioSwitcher.cpp b/src/filters/switcher/AudioSwitcher/AudioSwitcher.cpp
index 687785c43..b6844870d 100644
--- a/src/filters/switcher/AudioSwitcher/AudioSwitcher.cpp
+++ b/src/filters/switcher/AudioSwitcher/AudioSwitcher.cpp
@@ -268,12 +268,13 @@ HRESULT CAudioSwitcherFilter::Transform(IMediaSample* pIn, IMediaSample* pOut)
if(FAILED(hr = pOut->GetPointer(&pDataOut))) return hr;
if(!pDataIn || !pDataOut || len < 0 || lenout < 0) return S_FALSE;
- // len = 0 doesn't mean it's failed, return S_OK otherwise might skrew the sound
+ // len = 0 doesn't mean it's failed, return S_OK otherwise might screw the sound
if(len == 0) {pOut->SetActualDataLength(0); return S_OK;}
if(m_fCustomChannelMapping)
{
- if(m_chs[wfe->nChannels-1].GetCount() > 0)
+ size_t channelsCount = m_chs[wfe->nChannels-1].GetCount();
+ if(channelsCount > 0 && wfeout->nChannels <= channelsCount)
{
for(int i = 0; i < wfeout->nChannels; i++)
{
diff --git a/src/filters/transform/BaseVideoFilter/BaseVideoFilter.cpp b/src/filters/transform/BaseVideoFilter/BaseVideoFilter.cpp
index ac9f2a811..f24d3be27 100644
--- a/src/filters/transform/BaseVideoFilter/BaseVideoFilter.cpp
+++ b/src/filters/transform/BaseVideoFilter/BaseVideoFilter.cpp
@@ -319,7 +319,10 @@ HRESULT CBaseVideoFilter::CopyBuffer(BYTE* pOut, BYTE** ppIn, int w, int h, int
if(bihOut.biCompression == '2YUY')
{
- BitBltFromI420ToYUY2(w, h, pOut, bihOut.biWidth*2, pIn, pInU, pInV, pitchIn, fInterlaced);
+ if (!fInterlaced)
+ BitBltFromI420ToYUY2(w, h, pOut, bihOut.biWidth*2, pIn, pInU, pInV, pitchIn);
+ else
+ BitBltFromI420ToYUY2Interlaced(w, h, pOut, bihOut.biWidth*2, pIn, pInU, pInV, pitchIn);
}
else if(bihOut.biCompression == '024I' || bihOut.biCompression == 'VUYI' || bihOut.biCompression == '21VY')
{
diff --git a/src/filters/transform/Mpeg2DecFilter/libmpeg2.cpp b/src/filters/transform/Mpeg2DecFilter/libmpeg2.cpp
index bb3aceb00..6251a2bf5 100644
--- a/src/filters/transform/Mpeg2DecFilter/libmpeg2.cpp
+++ b/src/filters/transform/Mpeg2DecFilter/libmpeg2.cpp
@@ -1314,6 +1314,7 @@ int CMpeg2Dec::sequence_ext()
if(!(buffer[1] & 8))
{
sequence->flags &= ~SEQ_FLAG_PROGRESSIVE_SEQUENCE;
+ sequence->width = (sequence->width + 31) & ~31;
sequence->height = (sequence->height + 31) & ~31;
}
diff --git a/src/thirdparty/VirtualDub/Kasumi/Kasumi.vcproj b/src/thirdparty/VirtualDub/Kasumi/Kasumi.vcproj
new file mode 100644
index 000000000..40e1e5220
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/Kasumi.vcproj
@@ -0,0 +1,1527 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+ ProjectType="Visual C++"
+ Version="9,00"
+ Name="Kasumi"
+ ProjectGUID="{0D252872-7542-4232-8D02-53F9182AEE15}"
+ RootNamespace="Kasumi"
+ TargetFrameworkVersion="131072"
+ >
+ <Platforms>
+ <Platform
+ Name="Win32"
+ />
+ <Platform
+ Name="x64"
+ />
+ </Platforms>
+ <ToolFiles>
+ <ToolFile
+ RelativePath="..\..\..\YASM.rules"
+ />
+ </ToolFiles>
+ <Configurations>
+ <Configuration
+ Name="Release|Win32"
+ OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+ IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+ ConfigurationType="4"
+ InheritedPropertySheets="..\..\..\common.vsprops"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="false"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="YASM"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ InlineFunctionExpansion="1"
+ AdditionalIncludeDirectories="h;..\h"
+ PreprocessorDefinitions="NDEBUG;WIN32;_WINDOWS;WIN32_LEAN_AND_MEAN;NOMINMAX"
+ StringPooling="true"
+ MinimalRebuild="true"
+ RuntimeLibrary="0"
+ EnableFunctionLevelLinking="true"
+ WarningLevel="3"
+ SuppressStartupBanner="true"
+ DebugInformationFormat="3"
+ DisableSpecificWarnings="4244;4267"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="NDEBUG"
+ Culture="1033"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ SuppressStartupBanner="true"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ SuppressStartupBanner="true"
+ OutputFile=".\..\lib\Release/Kasumi.bsc"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Release|x64"
+ OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+ IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+ ConfigurationType="4"
+ InheritedPropertySheets="..\..\..\common.vsprops"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="false"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="YASM"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ TargetEnvironment="3"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="2"
+ InlineFunctionExpansion="1"
+ AdditionalIncludeDirectories="h;..\h"
+ PreprocessorDefinitions="NDEBUG;WIN32;_WINDOWS;WIN32_LEAN_AND_MEAN;NOMINMAX"
+ StringPooling="true"
+ MinimalRebuild="true"
+ RuntimeLibrary="0"
+ EnableFunctionLevelLinking="true"
+ WarningLevel="3"
+ SuppressStartupBanner="true"
+ DebugInformationFormat="3"
+ DisableSpecificWarnings="4244;4267"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="NDEBUG"
+ Culture="1033"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ SuppressStartupBanner="true"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ SuppressStartupBanner="true"
+ OutputFile=".\..\lib\Release/Kasumi.bsc"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Debug|Win32"
+ OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+ IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+ ConfigurationType="4"
+ InheritedPropertySheets="..\..\..\common.vsprops"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="false"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="YASM"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories="h;..\h"
+ PreprocessorDefinitions="_DEBUG;WIN32;_WINDOWS;WIN32_LEAN_AND_MEAN;NOMINMAX"
+ StringPooling="true"
+ MinimalRebuild="true"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="1"
+ WarningLevel="3"
+ SuppressStartupBanner="true"
+ DebugInformationFormat="3"
+ DisableSpecificWarnings="4244;4267"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="_DEBUG"
+ Culture="1033"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ SuppressStartupBanner="true"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ SuppressStartupBanner="true"
+ OutputFile=".\..\lib\Debug/Kasumi.bsc"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Debug|x64"
+ OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+ IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+ ConfigurationType="4"
+ InheritedPropertySheets="..\..\..\common.vsprops"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="false"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="YASM"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ TargetEnvironment="3"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories="h;..\h"
+ PreprocessorDefinitions="_DEBUG;WIN32;_WINDOWS;WIN32_LEAN_AND_MEAN;NOMINMAX"
+ StringPooling="true"
+ MinimalRebuild="true"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="1"
+ WarningLevel="3"
+ SuppressStartupBanner="true"
+ DebugInformationFormat="3"
+ DisableSpecificWarnings="4244;4267"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="_DEBUG"
+ Culture="1033"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ SuppressStartupBanner="true"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ SuppressStartupBanner="true"
+ OutputFile=".\..\lib\Debug/Kasumi.bsc"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ </Configurations>
+ <References>
+ </References>
+ <Files>
+ <Filter
+ Name="Source Files"
+ Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+ >
+ <File
+ RelativePath=".\source\alphablt.cpp"
+ >
+ </File>
+ <File
+ RelativePath="source\blt.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\blt_reference.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\blt_reference_pal.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\blt_reference_rgb.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\blt_reference_yuv.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\blt_reference_yuv2yuv.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\blt_reference_yuvrev.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\blt_setup.cpp"
+ >
+ </File>
+ <File
+ RelativePath="source\blt_spanutils.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\blt_uberblit.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\source\pixel.cpp"
+ >
+ </File>
+ <File
+ RelativePath="source\pixmaputils.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\region.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\resample.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\resample_kernels.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\source\resample_stages.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\source\resample_stages_reference.cpp"
+ >
+ </File>
+ <File
+ RelativePath="source\stretchblt_reference.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\tables.cpp"
+ >
+ </File>
+ <File
+ RelativePath="source\triblt.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\uberblit.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\source\uberblit_16f.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\source\uberblit_gen.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\source\uberblit_resample.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\source\uberblit_resample_special.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\source\uberblit_swizzle.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\source\uberblit_swizzle_x86.cpp"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\uberblit_v210.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\source\uberblit_ycbcr_x86.cpp"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ </File>
+ </Filter>
+ <Filter
+ Name="Header Files"
+ Filter="h;hpp;hxx;hm;inl"
+ >
+ <File
+ RelativePath="h\bitutils.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\blt_setup.h"
+ >
+ </File>
+ <File
+ RelativePath="h\blt_spanutils.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\blt_spanutils_x86.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\Kasumi\pixel.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\Kasumi\pixmap.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\Kasumi\pixmapops.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\Kasumi\pixmaputils.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\Kasumi\region.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\Kasumi\resample.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\resample_stages.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\resample_stages_reference.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\resample_stages_x64.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\resample_stages_x86.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\Kasumi\tables.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\Kasumi\text.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\Kasumi\triblt.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit_16f.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit_base.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit_fill.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit_gen.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit_input.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit_pal.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit_resample.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit_resample_special.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit_resample_special_x86.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit_rgb.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit_rgb_x86.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit_swizzle.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit_swizzle_x86.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit_v210.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit_ycbcr.h"
+ >
+ </File>
+ <File
+ RelativePath=".\h\uberblit_ycbcr_x86.h"
+ >
+ </File>
+ </Filter>
+ <Filter
+ Name="Assembly files (x86)"
+ Filter="asm"
+ >
+ <File
+ RelativePath="source\a_bltrgb.asm"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\a_bltrgb2yuv_mmx.asm"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\a_bltrgb_mmx.asm"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\a_bltyuv2rgb_sse2.asm"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\a_resample_mmx.asm"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\a_resample_sse41.asm"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\a_spanutils_isse.asm"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\a_stretchrgb_mmx.asm"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\a_stretchrgb_point.asm"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\a_triblt_mmx.asm"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\a_triblt_scalar.asm"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\a_triblt_sse2.asm"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ </Filter>
+ <Filter
+ Name="Source Files (x86)"
+ >
+ <File
+ RelativePath=".\source\blt_spanutils_x86.cpp"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\blt_x86.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\resample_stages_x86.cpp"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\uberblit_resample_special_x86.cpp"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ </File>
+ </Filter>
+ <Filter
+ Name="Assembly files (AMD64)"
+ Filter=".asm64"
+ >
+ <File
+ RelativePath="source\a64_resample.asm64"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ </Filter>
+ <Filter
+ Name="Source Files (x64)"
+ >
+ <File
+ RelativePath=".\source\resample_stages_x64.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ </File>
+ </Filter>
+ <Filter
+ Name="Interface Header Files"
+ >
+ <File
+ RelativePath="..\h\vd2\Kasumi\blitter.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\Kasumi\resample_kernels.h"
+ >
+ </File>
+ </Filter>
+ <File
+ RelativePath="source\a_triblt.inc"
+ >
+ </File>
+ </Files>
+ <Globals>
+ </Globals>
+</VisualStudioProject>
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/bitutils.h b/src/thirdparty/VirtualDub/Kasumi/h/bitutils.h
new file mode 100644
index 000000000..8cba85ffd
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/bitutils.h
@@ -0,0 +1,26 @@
+#ifndef f_VD2_KASUMI_BITUTILS_H
+#define f_VD2_KASUMI_BITUTILS_H
+
+#include <vd2/system/vdtypes.h>
+
+namespace nsVDPixmapBitUtils {
+ inline uint32 avg_8888_11(uint32 x, uint32 y) {
+ return (x|y) - (((x^y)&0xfefefefe)>>1);
+ }
+
+ inline uint32 avg_8888_121(uint32 x, uint32 y, uint32 z) {
+ return avg_8888_11(avg_8888_11(x,z), y);
+ }
+
+ inline uint32 avg_0808_14641(uint32 a, uint32 b, uint32 c, uint32 d, uint32 e) {
+ a &= 0xff00ff;
+ b &= 0xff00ff;
+ c &= 0xff00ff;
+ d &= 0xff00ff;
+ e &= 0xff00ff;
+
+ return (((a+e) + 4*(b+d) + 6*c + 0x080008)&0x0ff00ff0)>>4;
+ }
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/blt_setup.h b/src/thirdparty/VirtualDub/Kasumi/h/blt_setup.h
new file mode 100644
index 000000000..19b7bc62c
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/blt_setup.h
@@ -0,0 +1,62 @@
+#ifndef f_VD2_KASUMI_BLT_SETUP_H
+#define f_VD2_KASUMI_BLT_SETUP_H
+
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+
+typedef void (*VDPixmapPalettedBlitterFn)(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h, const void *pal);
+typedef void (*VDPixmapChunkyBlitterFn)(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+
+void VDPixmapBltDirectPalettedConversion(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h, VDPixmapPalettedBlitterFn pBlitter);
+
+template<VDPixmapPalettedBlitterFn palettedBlitter>
+void VDPixmapBlitterPalettedAdapter(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h)
+{
+ if (dst.format == nsVDPixmap::kPixFormat_XRGB8888)
+ palettedBlitter(dst.data, dst.pitch, src.data, src.pitch, w, h, src.palette);
+ else
+ VDPixmapBltDirectPalettedConversion(dst, src, w, h, palettedBlitter);
+}
+
+template<VDPixmapChunkyBlitterFn chunkyBlitter>
+void VDPixmapBlitterChunkyAdapter(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h)
+{
+ chunkyBlitter(dst.data, dst.pitch, src.data, src.pitch, w, h);
+}
+
+struct VDPixmapFormatSubset {
+public:
+ VDPixmapFormatSubset() : mFormatCount(0) {}
+
+ VDPixmapFormatSubset& operator=(int format) {
+ mFormatCount = 0;
+ mFormats[mFormatCount++] = format;
+ return *this;
+ }
+
+ VDPixmapFormatSubset& operator,(int format) {
+ VDASSERT(mFormatCount < nsVDPixmap::kPixFormat_Max_Standard);
+ mFormats[mFormatCount++] = format;
+ return *this;
+ }
+
+ int mFormatCount;
+ int mFormats[nsVDPixmap::kPixFormat_Max_Standard];
+};
+
+class VDPixmapBlitterTable {
+public:
+ void Clear();
+ void AddBlitter(int srcFormat, int dstFormat, VDPixmapBlitterFn blitter);
+ void AddBlitter(const VDPixmapFormatSubset& srcFormats, VDPixmapFormatSubset& dstFormats, VDPixmapBlitterFn blitter);
+
+ VDPixmapBlitterFn mTable[nsVDPixmap::kPixFormat_Max_Standard][nsVDPixmap::kPixFormat_Max_Standard];
+};
+
+inline void VDPixmapBlitterTable::AddBlitter(int srcFormat, int dstFormat, VDPixmapBlitterFn blitter) {
+ mTable[srcFormat][dstFormat] = blitter;
+}
+
+
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/blt_spanutils.h b/src/thirdparty/VirtualDub/Kasumi/h/blt_spanutils.h
new file mode 100644
index 000000000..ef723b3f8
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/blt_spanutils.h
@@ -0,0 +1,23 @@
+#ifndef f_VD2_KASUMI_BLT_SPANUTILS_H
+#define f_VD2_KASUMI_BLT_SPANUTILS_H
+
+#include <vd2/system/vdtypes.h>
+
+namespace nsVDPixmapSpanUtils {
+ void horiz_expand2x_centered (uint8 *dst, const uint8 *src, sint32 w);
+ void horiz_expand2x_coaligned (uint8 *dst, const uint8 *src, sint32 w);
+ void horiz_expand4x_coaligned (uint8 *dst, const uint8 *src, sint32 w);
+ void horiz_compress2x_coaligned (uint8 *dst, const uint8 *src, sint32 w);
+ void horiz_compress2x_centered (uint8 *dst, const uint8 *src, sint32 w);
+ void horiz_compress4x_coaligned (uint8 *dst, const uint8 *src, sint32 w);
+ void horiz_compress4x_centered (uint8 *dst, const uint8 *src, sint32 w);
+ void horiz_realign_to_centered (uint8 *dst, const uint8 *src, sint32 w);
+ void horiz_realign_to_coaligned (uint8 *dst, const uint8 *src, sint32 w);
+ void vert_expand2x_centered (uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase);
+ void vert_expand4x_centered (uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase);
+ void vert_compress2x_centered_fast (uint8 *dst, const uint8 *const *srcarray, sint32 w, uint8 phase);
+ void vert_compress2x_centered (uint8 *dst, const uint8 *const *srcarray, sint32 w, uint8 phase);
+ void vert_compress4x_centered(uint8 *dst, const uint8 *const *srcarray, sint32 w, uint8 phase);
+}
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/blt_spanutils_x86.h b/src/thirdparty/VirtualDub/Kasumi/h/blt_spanutils_x86.h
new file mode 100644
index 000000000..c697485a2
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/blt_spanutils_x86.h
@@ -0,0 +1,35 @@
+// VirtualDub - Video processing and capture application
+// Graphics support library
+// Copyright (C) 1998-2007 Avery Lee
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#ifndef f_VD2_KASUMI_BLT_SPANUTILS_X86_H
+#define f_VD2_KASUMI_BLT_SPANUTILS_X86_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+
+namespace nsVDPixmapSpanUtils {
+ void horiz_expand2x_coaligned_ISSE(uint8 *dst, const uint8 *src, sint32 w);
+ void horiz_expand4x_coaligned_MMX(uint8 *dst, const uint8 *src, sint32 w);
+ void vert_expand2x_centered_ISSE(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase);
+ void vert_expand4x_centered_ISSE(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase);
+}
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/resample_stages.h b/src/thirdparty/VirtualDub/Kasumi/h/resample_stages.h
new file mode 100644
index 000000000..588fda9ad
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/resample_stages.h
@@ -0,0 +1,80 @@
+#ifndef f_VD2_KASUMI_RESAMPLE_STAGES_H
+#define f_VD2_KASUMI_RESAMPLE_STAGES_H
+
+#include <vd2/Kasumi/pixmap.h>
+
+class IVDResamplerFilter;
+struct VDResamplerAxis;
+
+class VDSteppedAllocator {
+public:
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+
+ VDSteppedAllocator(size_t initialSize = 1024);
+ ~VDSteppedAllocator();
+
+ void clear();
+ void *allocate(size_type n);
+
+protected:
+ struct Block {
+ Block *next;
+ };
+
+ Block *mpHead;
+ char *mpAllocNext;
+ size_t mAllocLeft;
+ size_t mAllocNext;
+ size_t mAllocInit;
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (common)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class IVDResamplerStage {
+public:
+ virtual ~IVDResamplerStage() {}
+
+#if 0
+ void *operator new(size_t n, VDSteppedAllocator& a) {
+ return a.allocate(n);
+ }
+
+ void operator delete(void *p, VDSteppedAllocator& a) {
+ }
+
+private:
+ // these should NEVER be called
+ void operator delete(void *p) {}
+#endif
+};
+
+class IVDResamplerSeparableRowStage2 {
+public:
+ virtual void Init(const VDResamplerAxis& axis, uint32 srcw) = 0;
+ virtual void Process(void *dst, const void *src, uint32 w) = 0;
+};
+
+class IVDResamplerSeparableRowStage : public IVDResamplerStage {
+public:
+ virtual IVDResamplerSeparableRowStage2 *AsRowStage2() { return NULL; }
+ virtual void Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx) = 0;
+ virtual int GetWindowSize() const = 0;
+};
+
+class IVDResamplerSeparableColStage : public IVDResamplerStage {
+public:
+ virtual int GetWindowSize() const = 0;
+ virtual void Process(void *dst, const void *const *src, uint32 w, sint32 phase) = 0;
+};
+
+void VDResamplerGenerateTable(sint32 *dst, const IVDResamplerFilter& filter);
+void VDResamplerGenerateTableF(float *dst, const IVDResamplerFilter& filter);
+void VDResamplerGenerateTable2(sint32 *dst, const IVDResamplerFilter& filter, sint32 count, sint32 u, sint32 dudx);
+void VDResamplerSwizzleTable(sint32 *dst, unsigned pairs);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_reference.h b/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_reference.h
new file mode 100644
index 000000000..296882ceb
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_reference.h
@@ -0,0 +1,156 @@
+#ifndef f_VD2_KASUMI_RESAMPLE_STAGES_REFERENCE_H
+#define f_VD2_KASUMI_RESAMPLE_STAGES_REFERENCE_H
+
+#include <vd2/system/vdstl.h>
+#include "resample_stages.h"
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (portable)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDResamplerRowStageSeparablePoint8 : public IVDResamplerSeparableRowStage {
+public:
+ int GetWindowSize() const;
+ void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerRowStageSeparablePoint16 : public IVDResamplerSeparableRowStage {
+public:
+ int GetWindowSize() const;
+ void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerRowStageSeparablePoint32 : public IVDResamplerSeparableRowStage {
+public:
+ int GetWindowSize() const;
+ void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerRowStageSeparableLinear8 : public IVDResamplerSeparableRowStage {
+public:
+ int GetWindowSize() const;
+ virtual void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerRowStageSeparableLinear8_phaseZeroStepHalf : public VDResamplerRowStageSeparableLinear8 {
+public:
+ void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerRowStageSeparableLinear32 : public IVDResamplerSeparableRowStage {
+public:
+ int GetWindowSize() const;
+ void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerColStageSeparableLinear8 : public IVDResamplerSeparableColStage {
+public:
+ int GetWindowSize() const;
+ void Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase);
+};
+
+class VDResamplerColStageSeparableLinear32 : public IVDResamplerSeparableColStage {
+public:
+ int GetWindowSize() const;
+ void Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase);
+};
+
+class VDResamplerRowStageSeparableTable8 : public IVDResamplerSeparableRowStage {
+public:
+ VDResamplerRowStageSeparableTable8(const IVDResamplerFilter& filter);
+
+ int GetWindowSize() const;
+
+ void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+
+protected:
+ vdblock<sint32, vdaligned_alloc<sint32> > mFilterBank;
+};
+
+class VDResamplerRowStageSeparableTable32 : public IVDResamplerSeparableRowStage {
+public:
+ VDResamplerRowStageSeparableTable32(const IVDResamplerFilter& filter);
+
+ int GetWindowSize() const;
+
+ void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+
+protected:
+ vdblock<sint32, vdaligned_alloc<sint32> > mFilterBank;
+};
+
+class VDResamplerRowStageSeparableTable32F : public IVDResamplerSeparableRowStage {
+public:
+ VDResamplerRowStageSeparableTable32F(const IVDResamplerFilter& filter);
+
+ int GetWindowSize() const;
+
+ void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+
+protected:
+ vdblock<float, vdaligned_alloc<float> > mFilterBank;
+};
+
+class VDResamplerRowStageSeparableTable32Fx4 : public IVDResamplerSeparableRowStage {
+public:
+ VDResamplerRowStageSeparableTable32Fx4(const IVDResamplerFilter& filter);
+
+ int GetWindowSize() const;
+
+ void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+
+protected:
+ vdblock<float, vdaligned_alloc<float> > mFilterBank;
+};
+
+class VDResamplerColStageSeparableTable8 : public IVDResamplerSeparableColStage {
+public:
+ VDResamplerColStageSeparableTable8(const IVDResamplerFilter& filter);
+
+ int GetWindowSize() const;
+
+ void Process(void *dst0, const void *const *src0, uint32 w, sint32 phase);
+
+protected:
+ vdblock<sint32, vdaligned_alloc<sint32> > mFilterBank;
+};
+
+class VDResamplerColStageSeparableTable32 : public IVDResamplerSeparableColStage {
+public:
+ VDResamplerColStageSeparableTable32(const IVDResamplerFilter& filter);
+
+ int GetWindowSize() const;
+
+ void Process(void *dst0, const void *const *src0, uint32 w, sint32 phase);
+
+protected:
+ vdblock<sint32, vdaligned_alloc<sint32> > mFilterBank;
+};
+
+class VDResamplerColStageSeparableTable32F : public IVDResamplerSeparableColStage {
+public:
+ VDResamplerColStageSeparableTable32F(const IVDResamplerFilter& filter);
+
+ int GetWindowSize() const;
+
+ void Process(void *dst0, const void *const *src0, uint32 w, sint32 phase);
+
+protected:
+ vdblock<float, vdaligned_alloc<float> > mFilterBank;
+};
+
+class VDResamplerColStageSeparableTable32Fx4 : public IVDResamplerSeparableColStage {
+public:
+ VDResamplerColStageSeparableTable32Fx4(const IVDResamplerFilter& filter);
+
+ int GetWindowSize() const;
+
+ void Process(void *dst0, const void *const *src0, uint32 w, sint32 phase);
+
+protected:
+ vdblock<float, vdaligned_alloc<float> > mFilterBank;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_x64.h b/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_x64.h
new file mode 100644
index 000000000..fd719f732
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_x64.h
@@ -0,0 +1,26 @@
+#ifndef f_VD2_KASUMI_RESAMPLE_STAGES_X64_H
+#define f_VD2_KASUMI_RESAMPLE_STAGES_X64_H
+
+#include "resample_stages_reference.h"
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (SSE2, AMD64)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDResamplerSeparableTableRowStageSSE2 : public VDResamplerRowStageSeparableTable32 {
+public:
+ VDResamplerSeparableTableRowStageSSE2(const IVDResamplerFilter& filter);
+
+ void Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerSeparableTableColStageSSE2 : public VDResamplerColStageSeparableTable32 {
+public:
+ VDResamplerSeparableTableColStageSSE2(const IVDResamplerFilter& filter);
+
+ void Process(void *dst, const void *const *src, uint32 w, sint32 phase);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_x86.h b/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_x86.h
new file mode 100644
index 000000000..41e16b23d
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/resample_stages_x86.h
@@ -0,0 +1,193 @@
+#ifndef f_VD2_KASUMI_RESAMPLE_STAGES_X86
+#define f_VD2_KASUMI_RESAMPLE_STAGES_X86
+
+#include "resample_stages_reference.h"
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (scalar, x86)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDResamplerSeparablePointRowStageX86 : public IVDResamplerSeparableRowStage {
+public:
+ int GetWindowSize() const;
+ void Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx);
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (MMX, x86)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDResamplerSeparablePointRowStageMMX : public IVDResamplerSeparableRowStage {
+public:
+ int GetWindowSize() const;
+ void Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerSeparableLinearRowStageMMX : public IVDResamplerSeparableRowStage {
+public:
+ int GetWindowSize() const;
+ void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerSeparableLinearColStageMMX : public IVDResamplerSeparableColStage {
+public:
+ int GetWindowSize() const;
+ void Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase);
+};
+
+class VDResamplerSeparableCubicRowStageMMX : public IVDResamplerSeparableRowStage {
+public:
+ VDResamplerSeparableCubicRowStageMMX(double A);
+
+ int GetWindowSize() const;
+ void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+
+protected:
+ vdblock<sint32, vdaligned_alloc<sint32> > mFilterBank;
+};
+
+class VDResamplerSeparableCubicColStageMMX : public IVDResamplerSeparableColStage {
+public:
+ VDResamplerSeparableCubicColStageMMX(double A);
+
+ int GetWindowSize() const;
+ void Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase);
+
+protected:
+ vdblock<sint32, vdaligned_alloc<sint32> > mFilterBank;
+};
+
+class VDResamplerSeparableTableRowStage8MMX : public VDResamplerRowStageSeparableTable32, public IVDResamplerSeparableRowStage2 {
+public:
+ VDResamplerSeparableTableRowStage8MMX(const IVDResamplerFilter& filter);
+
+ IVDResamplerSeparableRowStage2 *AsRowStage2() { return this; }
+
+ void Init(const VDResamplerAxis& axis, uint32 srcw);
+ void Process(void *dst, const void *src, uint32 w);
+ void Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx);
+
+protected:
+ void RedoRowFilters(const VDResamplerAxis& axis, uint32 w, uint32 srcw);
+
+ int mAlignedKernelWidth;
+ int mAlignedKernelSize;
+ ptrdiff_t mRowKernelSize;
+ uint32 mLastSrcWidth;
+ uint32 mLastDstWidth;
+ sint32 mLastU;
+ sint32 mLastDUDX;
+
+ bool mbQuadOptimizationEnabled[4];
+ int mKernelSizeByOffset[4];
+ ptrdiff_t mTailOffset[4];
+
+ vdfastvector<sint16, vdaligned_alloc<sint16> > mRowKernels;
+};
+
+class VDResamplerSeparableTableRowStageMMX : public VDResamplerRowStageSeparableTable32 {
+public:
+ VDResamplerSeparableTableRowStageMMX(const IVDResamplerFilter& filter);
+
+ void Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerSeparableTableColStage8MMX : public VDResamplerColStageSeparableTable8 {
+public:
+ VDResamplerSeparableTableColStage8MMX(const IVDResamplerFilter& filter);
+
+ void Process(void *dst, const void *const *src, uint32 w, sint32 phase);
+};
+
+class VDResamplerSeparableTableColStageMMX : public VDResamplerColStageSeparableTable32 {
+public:
+ VDResamplerSeparableTableColStageMMX(const IVDResamplerFilter& filter);
+
+ void Process(void *dst, const void *const *src, uint32 w, sint32 phase);
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (ISSE, x86)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDResamplerRowStageSeparableLinear8_phaseZeroStepHalf_ISSE : public VDResamplerRowStageSeparableLinear8 {
+public:
+ void Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx);
+};
+
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (SSE2, x86)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDResamplerSeparableCubicColStageSSE2 : public VDResamplerSeparableCubicColStageMMX {
+public:
+ VDResamplerSeparableCubicColStageSSE2(double A);
+
+ void Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase);
+};
+
+class VDResamplerSeparableTableRowStageSSE2 : public VDResamplerSeparableTableRowStageMMX {
+public:
+ VDResamplerSeparableTableRowStageSSE2(const IVDResamplerFilter& filter);
+
+ void Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx);
+};
+
+class VDResamplerSeparableTableColStageSSE2 : public VDResamplerSeparableTableColStageMMX {
+public:
+ VDResamplerSeparableTableColStageSSE2(const IVDResamplerFilter& filter);
+
+ void Process(void *dst, const void *const *src, uint32 w, sint32 phase);
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (SSE4.1, x86)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDResamplerSeparableTableRowStage8SSE41 : public VDResamplerRowStageSeparableTable32, public IVDResamplerSeparableRowStage2 {
+public:
+ VDResamplerSeparableTableRowStage8SSE41(const IVDResamplerFilter& filter);
+
+ IVDResamplerSeparableRowStage2 *AsRowStage2() { return this; }
+
+ void Init(const VDResamplerAxis& axis, uint32 srcw);
+ void Process(void *dst, const void *src, uint32 w);
+ void Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx);
+
+protected:
+ void RedoRowFilters(const VDResamplerAxis& axis, uint32 w, uint32 srcw);
+
+ int mAlignedKernelWidth;
+ int mAlignedKernelSize;
+ ptrdiff_t mRowKernelSize;
+ uint32 mLastSrcWidth;
+ uint32 mLastDstWidth;
+ sint32 mLastU;
+ sint32 mLastDUDX;
+
+ bool mbQuadOptimizationEnabled[8];
+ int mKernelSizeByOffset[8];
+ ptrdiff_t mTailOffset[8];
+
+ vdfastvector<sint16, vdaligned_alloc<sint16> > mRowKernels;
+};
+
+class VDResamplerSeparableTableColStage8SSE41 : public VDResamplerColStageSeparableTable8 {
+public:
+ VDResamplerSeparableTableColStage8SSE41(const IVDResamplerFilter& filter);
+
+ void Process(void *dst, const void *const *src, uint32 w, sint32 phase);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit.h
new file mode 100644
index 000000000..72f8ee060
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit.h
@@ -0,0 +1,83 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_H
+#define f_VD2_KASUMI_UBERBLIT_H
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/system/vectors.h>
+#include <vd2/Kasumi/blitter.h>
+
+struct VDPixmap;
+
+enum VDPixmapFormatToken {
+ kVDPixType_1 = 0x00000001,
+ kVDPixType_2 = 0x00000002,
+ kVDPixType_4 = 0x00000003,
+ kVDPixType_8 = 0x00000004,
+ kVDPixType_555_LE = 0x00000005,
+ kVDPixType_565_LE = 0x00000006,
+ kVDPixType_1555_LE = 0x00000007,
+ kVDPixType_888 = 0x00000008,
+ kVDPixType_8888 = 0x00000009,
+ kVDPixType_16F_LE = 0x0000000A,
+ kVDPixType_16Fx4_LE = 0x0000000B,
+ kVDPixType_16F_16F_16F_LE = 0x0000000C,
+ kVDPixType_32F_LE = 0x0000000D,
+ kVDPixType_32Fx4_LE = 0x0000000E,
+ kVDPixType_32F_32F_32F_LE = 0x0000000F,
+ kVDPixType_8_8_8 = 0x00000010,
+ kVDPixType_B8G8_R8G8 = 0x00000011, // UYVY
+ kVDPixType_G8B8_G8R8 = 0x00000012, // YUYV
+ kVDPixType_V210 = 0x00000013, // v210 (4:2:2 10 bit)
+ kVDPixType_8_B8R8 = 0x00000014, // NV12
+ kVDPixType_B8R8 = 0x00000015,
+ kVDPixType_Mask = 0x0000003F,
+
+ kVDPixSamp_444 = 0x00000040,
+ kVDPixSamp_422 = 0x00000080,
+ kVDPixSamp_422_JPEG = 0x000000C0,
+ kVDPixSamp_420_MPEG2 = 0x00000100,
+ kVDPixSamp_420_MPEG2INT = 0x00000140,
+ kVDPixSamp_420_MPEG1 = 0x00000180,
+ kVDPixSamp_420_DVPAL = 0x000001C0,
+ kVDPixSamp_411 = 0x00000200,
+ kVDPixSamp_410 = 0x00000240,
+ kVDPixSamp_Mask = 0x00000FC0,
+ kVDPixSamp_Bits = 6,
+
+ kVDPixSpace_Pal = 0x00001000,
+ kVDPixSpace_RGB = 0x00002000,
+ kVDPixSpace_BGR = 0x00003000,
+ kVDPixSpace_BGRA = 0x00004000,
+ kVDPixSpace_Y_601 = 0x00005000,
+ kVDPixSpace_Y_709 = 0x00006000,
+ kVDPixSpace_YCC_601 = 0x00007000,
+ kVDPixSpace_YCC_709 = 0x00008000,
+ kVDPixSpace_YCC_JPEG = 0x00009000,
+ kVDPixSpace_Mask = 0x0003F000,
+};
+
+struct VDPixmapSamplingInfo {
+ int mCXOffset16;
+ int mCrYOffset16;
+ int mCbYOffset16;
+ int mCXBits;
+ int mCYBits;
+};
+
+uint32 VDPixmapGetFormatTokenFromFormat(int format);
+const VDPixmapSamplingInfo& VDPixmapGetSamplingInfo(uint32 samplingToken);
+
+class IVDPixmapGen {
+public:
+ virtual ~IVDPixmapGen() {}
+ virtual void AddWindowRequest(int minY, int maxY) = 0;
+ virtual void Start() = 0;
+ virtual sint32 GetWidth(int srcIndex) const = 0;
+ virtual sint32 GetHeight(int srcIndex) const = 0;
+ virtual bool IsStateful() const = 0;
+ virtual uint32 GetType(uint32 output) const = 0;
+ virtual const void *GetRow(sint32 y, uint32 output) = 0;
+ virtual void ProcessRow(void *dst, sint32 y) = 0;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_16f.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_16f.h
new file mode 100644
index 000000000..513c4fb4f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_16f.h
@@ -0,0 +1,39 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_16F_H
+#define f_VD2_KASUMI_UBERBLIT_16F_H
+
+#include <vd2/system/cpuaccel.h>
+#include "uberblit_base.h"
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 32F -> 16F
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_32F_To_16F : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start();
+
+ uint32 GetType(uint32 output) const;
+
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 16F -> 32F
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_16F_To_32F : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start();
+
+ uint32 GetType(uint32 output) const;
+
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_base.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_base.h
new file mode 100644
index 000000000..675619a7b
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_base.h
@@ -0,0 +1,129 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_BASE_H
+#define f_VD2_KASUMI_UBERBLIT_BASE_H
+
+#include <vd2/system/vdstl.h>
+#include "uberblit.h"
+
+class VDPixmapGenWindowBased : public IVDPixmapGen {
+public:
+ VDPixmapGenWindowBased()
+ : mWindowMinDY(0xffff)
+ , mWindowMaxDY(-0xffff) {}
+
+ void SetOutputSize(sint32 w, sint32 h) {
+ mWidth = w;
+ mHeight = h;
+ }
+
+ void AddWindowRequest(int minDY, int maxDY) {
+ if (mWindowMinDY > minDY)
+ mWindowMinDY = minDY;
+ if (mWindowMaxDY < maxDY)
+ mWindowMaxDY = maxDY;
+ }
+
+ void StartWindow(uint32 rowbytes, int outputCount = 1) {
+ VDASSERT(mWindowMaxDY >= mWindowMinDY);
+ mWindowSize = mWindowMaxDY + 1 - mWindowMinDY;
+
+ mWindowPitch = (rowbytes + 15) & ~15;
+ mWindowBuffer.resize(mWindowPitch * mWindowSize * outputCount);
+ mWindow.resize(mWindowSize * 2);
+
+ for(sint32 i=0; i<mWindowSize; ++i)
+ mWindow[i] = mWindow[i + mWindowSize] = &mWindowBuffer[mWindowPitch * outputCount * i];
+
+ mWindowIndex = 0;
+ mWindowLastY = -0x3FFFFFFF;
+ }
+
+ sint32 GetWidth(int) const { return mWidth; }
+ sint32 GetHeight(int) const { return mHeight; }
+
+ bool IsStateful() const {
+ return true;
+ }
+
+ const void *GetRow(sint32 y, uint32 index) {
+ sint32 tostep = y - mWindowLastY;
+ VDASSERT(y >= mWindowLastY - (sint32)mWindowSize + 1);
+
+ if (tostep >= mWindowSize) {
+ mWindowLastY = y - 1;
+ tostep = 1;
+ }
+
+ while(tostep-- > 0) {
+ ++mWindowLastY;
+ Compute(mWindow[mWindowIndex], mWindowLastY);
+ if (++mWindowIndex >= mWindowSize)
+ mWindowIndex = 0;
+ }
+
+ return mWindow[y + mWindowSize - 1 - mWindowLastY + mWindowIndex];
+ }
+
+ void ProcessRow(void *dst, sint32 y) {
+ Compute(dst, y);
+ }
+
+protected:
+ virtual void Compute(void *dst0, sint32 y) = 0;
+
+ vdfastvector<uint8> mWindowBuffer;
+ vdfastvector<uint8 *> mWindow;
+ sint32 mWindowPitch;
+ sint32 mWindowIndex;
+ sint32 mWindowMinDY;
+ sint32 mWindowMaxDY;
+ sint32 mWindowSize;
+ sint32 mWindowLastY;
+ sint32 mWidth;
+ sint32 mHeight;
+};
+
+class VDPixmapGenWindowBasedOneSource : public VDPixmapGenWindowBased {
+public:
+ void InitSource(IVDPixmapGen *src, uint32 srcindex) {
+ mpSrc = src;
+ mSrcIndex = srcindex;
+ mSrcWidth = src->GetWidth(srcindex);
+ mSrcHeight = src->GetHeight(srcindex);
+ mWidth = mSrcWidth;
+ mHeight = mSrcHeight;
+ }
+
+ void AddWindowRequest(int minDY, int maxDY) {
+ VDPixmapGenWindowBased::AddWindowRequest(minDY, maxDY);
+ mpSrc->AddWindowRequest(minDY, maxDY);
+ }
+
+ void StartWindow(uint32 rowbytes, int outputCount = 1) {
+ mpSrc->Start();
+
+ VDPixmapGenWindowBased::StartWindow(rowbytes, outputCount);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return mpSrc->GetType(mSrcIndex);
+ }
+
+protected:
+ virtual void Compute(void *dst0, sint32 y) = 0;
+
+ IVDPixmapGen *mpSrc;
+ uint32 mSrcIndex;
+ sint32 mSrcWidth;
+ sint32 mSrcHeight;
+};
+
+class VDPixmapGenWindowBasedOneSourceSimple : public VDPixmapGenWindowBasedOneSource {
+public:
+ void Init(IVDPixmapGen *src, uint32 srcindex) {
+ InitSource(src, srcindex);
+
+ src->AddWindowRequest(0, 0);
+ }
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_fill.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_fill.h
new file mode 100644
index 000000000..ba02a2877
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_fill.h
@@ -0,0 +1,55 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_FILL_H
+#define f_VD2_KASUMI_UBERBLIT_FILL_H
+
+#include "uberblit.h"
+#include "uberblit_base.h"
+
+class VDPixmapGenFill8 : public IVDPixmapGen {
+public:
+ void Init(uint8 fill, uint32 bpr, sint32 width, sint32 height, uint32 type) {
+ mRow.resize(bpr, fill);
+ mWidth = width;
+ mHeight = height;
+ mType = type;
+ }
+
+ void AddWindowRequest(int minY, int maxY) {
+ }
+
+ void Start() {
+ }
+
+ sint32 GetWidth(int) const {
+ return mWidth;
+ }
+
+ sint32 GetHeight(int) const {
+ return mHeight;
+ }
+
+ bool IsStateful() const {
+ return false;
+ }
+
+ const void *GetRow(sint32 y, uint32 output) {
+ return mRow.data();
+ }
+
+ void ProcessRow(void *dst, sint32 y) {
+ if (!mRow.empty())
+ memset(dst, mRow[0], mRow.size());
+ }
+
+ uint32 GetType(uint32 index) const {
+ return mType;
+ }
+
+protected:
+ sint32 mWidth;
+ sint32 mHeight;
+ uint32 mType;
+
+ vdfastvector<uint8> mRow;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_gen.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_gen.h
new file mode 100644
index 000000000..3937fbba7
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_gen.h
@@ -0,0 +1,167 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_GEN_H
+#define f_VD2_KASUMI_UBERBLIT_GEN_H
+
+#include <vd2/system/vectors.h>
+#include "uberblit.h"
+
+class IVDPixmapGenSrc;
+
+class VDPixmapUberBlitterDirectCopy : public IVDPixmapBlitter {
+public:
+ VDPixmapUberBlitterDirectCopy();
+ ~VDPixmapUberBlitterDirectCopy();
+
+ void Blit(const VDPixmap& dst, const VDPixmap& src);
+ void Blit(const VDPixmap& dst, const vdrect32 *rDst, const VDPixmap& src);
+};
+
+class VDPixmapUberBlitter : public IVDPixmapBlitter {
+public:
+ VDPixmapUberBlitter();
+ ~VDPixmapUberBlitter();
+
+ void Blit(const VDPixmap& dst, const VDPixmap& src);
+ void Blit(const VDPixmap& dst, const vdrect32 *rDst, const VDPixmap& src);
+
+protected:
+ void Blit(const VDPixmap& dst, const vdrect32 *rDst);
+ void Blit3(const VDPixmap& dst, const vdrect32 *rDst);
+ void Blit3Split(const VDPixmap& dst, const vdrect32 *rDst);
+ void Blit3Separated(const VDPixmap& px, const vdrect32 *rDst);
+ void Blit2(const VDPixmap& dst, const vdrect32 *rDst);
+ void Blit2Separated(const VDPixmap& px, const vdrect32 *rDst);
+
+ friend class VDPixmapUberBlitterGenerator;
+
+ struct OutputEntry {
+ IVDPixmapGen *mpSrc;
+ int mSrcIndex;
+ } mOutputs[3];
+
+ struct SourceEntry {
+ IVDPixmapGenSrc *mpSrc;
+ int mSrcIndex;
+ int mSrcPlane;
+ int mSrcX;
+ int mSrcY;
+ };
+
+ typedef vdfastvector<IVDPixmapGen *> Generators;
+ Generators mGenerators;
+
+ typedef vdfastvector<SourceEntry> Sources;
+ Sources mSources;
+
+ bool mbIndependentChromaPlanes;
+ bool mbIndependentPlanes;
+};
+
+class VDPixmapUberBlitterGenerator {
+public:
+ VDPixmapUberBlitterGenerator();
+ ~VDPixmapUberBlitterGenerator();
+
+ void swap(int index);
+ void dup();
+ void pop();
+
+ void ldsrc(int srcIndex, int srcPlane, int x, int y, uint32 w, uint32 h, uint32 type, uint32 bpr);
+
+ void ldconst(uint8 fill, uint32 bpr, uint32 w, uint32 h, uint32 type);
+
+ void extract_8in16(int offset, uint32 w, uint32 h);
+ void extract_8in32(int offset, uint32 w, uint32 h);
+ void swap_8in16(uint32 w, uint32 h, uint32 bpr);
+
+ void conv_Pal1_to_8888(int srcIndex);
+ void conv_Pal2_to_8888(int srcIndex);
+ void conv_Pal4_to_8888(int srcIndex);
+ void conv_Pal8_to_8888(int srcIndex);
+
+ void conv_555_to_8888();
+ void conv_565_to_8888();
+ void conv_888_to_8888();
+ void conv_555_to_565();
+ void conv_565_to_555();
+ void conv_8888_to_X32F();
+ void conv_8_to_32F();
+ void conv_16F_to_32F();
+ void conv_V210_to_32F();
+
+ void conv_8888_to_555();
+ void conv_8888_to_565();
+ void conv_8888_to_888();
+ void conv_32F_to_8();
+ void conv_X32F_to_8888();
+ void conv_32F_to_16F();
+ void conv_32F_to_V210();
+
+ void convd_8888_to_555();
+ void convd_8888_to_565();
+ void convd_32F_to_8();
+ void convd_X32F_to_8888();
+
+ void interleave_B8G8_R8G8();
+ void interleave_G8B8_G8R8();
+ void interleave_X8R8G8B8();
+ void interleave_B8R8();
+
+ void ycbcr601_to_rgb32();
+ void ycbcr709_to_rgb32();
+ void rgb32_to_ycbcr601();
+ void rgb32_to_ycbcr709();
+
+ void ycbcr601_to_rgb32_32f();
+ void ycbcr709_to_rgb32_32f();
+ void rgb32_to_ycbcr601_32f();
+ void rgb32_to_ycbcr709_32f();
+
+ void ycbcr601_to_ycbcr709();
+ void ycbcr709_to_ycbcr601();
+
+ void pointh(float xoffset, float xfactor, uint32 w);
+ void pointv(float yoffset, float yfactor, uint32 h);
+ void linearh(float xoffset, float xfactor, uint32 w, bool interpOnly);
+ void linearv(float yoffset, float yfactor, uint32 h, bool interpOnly);
+ void linear(float xoffset, float xfactor, uint32 w, float yoffset, float yfactor, uint32 h);
+ void cubich(float xoffset, float xfactor, uint32 w, float splineFactor, bool interpOnly);
+ void cubicv(float yoffset, float yfactor, uint32 h, float splineFactor, bool interpOnly);
+ void cubic(float xoffset, float xfactor, uint32 w, float yoffset, float yfactor, uint32 h, float splineFactor);
+ void lanczos3h(float xoffset, float xfactor, uint32 w);
+ void lanczos3v(float yoffset, float yfactor, uint32 h);
+ void lanczos3(float xoffset, float xfactor, uint32 w, float yoffset, float yfactor, uint32 h);
+
+ IVDPixmapBlitter *create();
+
+protected:
+ void MarkDependency(IVDPixmapGen *dst, IVDPixmapGen *src);
+
+ struct StackEntry {
+ IVDPixmapGen *mpSrc;
+ uint32 mSrcIndex;
+
+ StackEntry() {}
+ StackEntry(IVDPixmapGen *src, uint32 index) : mpSrc(src), mSrcIndex(index) {}
+ };
+
+ vdfastvector<StackEntry> mStack;
+
+ typedef vdfastvector<IVDPixmapGen *> Generators;
+ Generators mGenerators;
+
+ struct Dependency {
+ int mDstIdx;
+ int mSrcIdx;
+ };
+
+ vdfastvector<Dependency> mDependencies;
+
+ typedef VDPixmapUberBlitter::SourceEntry SourceEntry;
+ vdfastvector<SourceEntry> mSources;
+};
+
+void VDPixmapGenerate(void *dst, ptrdiff_t pitch, sint32 bpr, sint32 height, IVDPixmapGen *gen, int genIndex);
+IVDPixmapBlitter *VDCreatePixmapUberBlitterDirectCopy(const VDPixmap& dst, const VDPixmap& src);
+IVDPixmapBlitter *VDCreatePixmapUberBlitterDirectCopy(const VDPixmapLayout& dst, const VDPixmapLayout& src);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_input.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_input.h
new file mode 100644
index 000000000..bfd5ebad5
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_input.h
@@ -0,0 +1,69 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_INPUT_H
+#define f_VD2_KASUMI_UBERBLIT_INPUT_H
+
+#include "uberblit.h"
+#include "uberblit_base.h"
+
+class IVDPixmapGenSrc {
+public:
+ virtual void SetSource(const void *src, ptrdiff_t pitch, const uint32 *palette) = 0;
+};
+
+class VDPixmapGenSrc : public IVDPixmapGen, public IVDPixmapGenSrc {
+public:
+ void Init(sint32 width, sint32 height, uint32 type, uint32 bpr) {
+ mWidth = width;
+ mHeight = height;
+ mType = type;
+ mBpr = bpr;
+ }
+
+ void SetSource(const void *src, ptrdiff_t pitch, const uint32 *palette) {
+ mpSrc = src;
+ mPitch = pitch;
+ }
+
+ void AddWindowRequest(int minY, int maxY) {
+ }
+
+ void Start() {
+ }
+
+ sint32 GetWidth(int) const {
+ return mWidth;
+ }
+
+ sint32 GetHeight(int) const {
+ return mHeight;
+ }
+
+ bool IsStateful() const {
+ return false;
+ }
+
+ const void *GetRow(sint32 y, uint32 output) {
+ if (y < 0)
+ y = 0;
+ else if (y >= mHeight)
+ y = mHeight - 1;
+ return vdptroffset(mpSrc, mPitch*y);
+ }
+
+ void ProcessRow(void *dst, sint32 y) {
+ memcpy(dst, GetRow(y, 0), mBpr);
+ }
+
+ uint32 GetType(uint32 index) const {
+ return mType;
+ }
+
+protected:
+ const void *mpSrc;
+ ptrdiff_t mPitch;
+ size_t mBpr;
+ sint32 mWidth;
+ sint32 mHeight;
+ uint32 mType;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_pal.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_pal.h
new file mode 100644
index 000000000..e3958b458
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_pal.h
@@ -0,0 +1,148 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_PAL_H
+#define f_VD2_KASUMI_UBERBLIT_PAL_H
+
+#include "uberblit_base.h"
+#include "uberblit_input.h"
+
+class VDPixmapGenBase_Pal_To_X8R8G8B8 : public VDPixmapGenWindowBasedOneSourceSimple, public IVDPixmapGenSrc {
+public:
+ void Start() {
+ StartWindow(mWidth * 4);
+ }
+
+ void Init(IVDPixmapGen *src, int srcIndex) {
+ InitSource(src, srcIndex);
+ }
+
+ void SetSource(const void *src, ptrdiff_t pitch, const uint32 *palette) {
+ mpPal = palette;
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8888;
+ }
+
+protected:
+ const uint32 *mpPal;
+};
+
+class VDPixmapGen_Pal1_To_X8R8G8B8 : public VDPixmapGenBase_Pal_To_X8R8G8B8 {
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+ sint32 h = mHeight;
+
+ const uint32 *pal = mpPal;
+
+ src += (w-1) >> 3;
+ dst += (w-1) & ~7;
+
+ int wt = w;
+
+ uint8 v = src[0] >> ((-wt) & 7);
+
+ switch(wt & 7) {
+ do {
+ v = src[0];
+
+ case 0: dst[7] = pal[v&1]; v >>= 1;
+ case 7: dst[6] = pal[v&1]; v >>= 1;
+ case 6: dst[5] = pal[v&1]; v >>= 1;
+ case 5: dst[4] = pal[v&1]; v >>= 1;
+ case 4: dst[3] = pal[v&1]; v >>= 1;
+ case 3: dst[2] = pal[v&1]; v >>= 1;
+ case 2: dst[1] = pal[v&1]; v >>= 1;
+ case 1: dst[0] = pal[v&1]; v >>= 1;
+
+ dst -= 8;
+ --src;
+ } while((wt -= 8) > 0);
+ }
+ }
+};
+
+class VDPixmapGen_Pal2_To_X8R8G8B8 : public VDPixmapGenBase_Pal_To_X8R8G8B8 {
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+ sint32 h = mHeight;
+
+ const uint32 *pal = mpPal;
+
+ src += (w-1) >> 2;
+ dst += (w-1) & ~3;
+
+ int wt = w;
+
+ uint8 v = src[0] >> (((-wt) & 3)*2);
+
+ switch(wt & 3) {
+ do {
+ v = src[0];
+
+ case 0: dst[3] = pal[v&3]; v >>= 2;
+ case 3: dst[2] = pal[v&3]; v >>= 2;
+ case 2: dst[1] = pal[v&3]; v >>= 2;
+ case 1: dst[0] = pal[v&3]; v >>= 2;
+
+ dst -= 4;
+ --src;
+ } while((wt -= 4) > 0);
+ }
+ }
+};
+
+class VDPixmapGen_Pal4_To_X8R8G8B8 : public VDPixmapGenBase_Pal_To_X8R8G8B8 {
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+ sint32 h = mHeight;
+
+ const uint32 *pal = mpPal;
+
+ src += (w-1) >> 1;
+ dst += ((w-1) & ~1);
+
+ int wt = w;
+
+ uint8 v = src[0] >> (((-wt) & 1)*4);
+
+ switch(wt & 1) {
+ do {
+ v = src[0];
+
+ case 0: dst[1] = pal[v&15]; v >>= 4;
+ case 1: dst[0] = pal[v&15]; v >>= 4;
+
+ dst -= 2;
+ --src;
+ } while((wt -= 2) > 0);
+ }
+ }
+};
+
+class VDPixmapGen_Pal8_To_X8R8G8B8 : public VDPixmapGenBase_Pal_To_X8R8G8B8 {
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+ sint32 h = mHeight;
+
+ const uint32 *pal = mpPal;
+
+ int wt = w;
+
+ do {
+ *dst++ = pal[*src++];
+ } while(--wt);
+ }
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample.h
new file mode 100644
index 000000000..a3bb7e70c
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample.h
@@ -0,0 +1,83 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_RESAMPLE_H
+#define f_VD2_KASUMI_UBERBLIT_RESAMPLE_H
+
+#include <vd2/system/vdstl.h>
+#include <vd2/system/math.h>
+#include "uberblit.h"
+#include "uberblit_base.h"
+#include <vd2/Kasumi/resample_kernels.h>
+
+class IVDResamplerSeparableRowStage;
+class IVDResamplerSeparableRowStage2;
+class IVDResamplerSeparableColStage;
+
+namespace nsVDPixmap {
+ enum FilterMode {
+ kFilterPoint,
+ kFilterLinear,
+ kFilterCubic,
+ kFilterLanczos3,
+ kFilterCount
+ };
+}
+
+class VDPixmapGenResampleRow : public VDPixmapGenWindowBasedOneSource {
+public:
+ VDPixmapGenResampleRow();
+ ~VDPixmapGenResampleRow();
+
+ void Init(IVDPixmapGen *src, uint32 srcIndex, uint32 width, float offset, float step, nsVDPixmap::FilterMode filterMode, float filterFactor, bool interpolationOnly);
+
+ void Start();
+
+ uint32 GetType(uint32 output) const {
+ return mpSrc->GetType(mSrcIndex);
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y);
+ void Compute8(void *dst0, sint32 y);
+ void Compute32(void *dst0, sint32 y);
+ void Compute128(void *dst0, sint32 y);
+
+ IVDResamplerSeparableRowStage *mpRowStage;
+ IVDResamplerSeparableRowStage2 *mpRowStage2;
+
+ uint32 mRowFiltW;
+ uint32 mBytesPerSample;
+
+ VDResamplerAxis mAxis;
+
+ vdblock<void *> mWindow;
+ void **mpAllocWindow;
+ vdblock<uint32, vdaligned_alloc<uint32> > mTempSpace;
+};
+
+class VDPixmapGenResampleCol : public VDPixmapGenWindowBasedOneSource {
+public:
+ VDPixmapGenResampleCol();
+ ~VDPixmapGenResampleCol();
+
+ void Init(IVDPixmapGen *src, uint32 srcIndex, uint32 height, float offset, float step, nsVDPixmap::FilterMode filterMode, float filterFactor, bool interpolationOnly);
+
+ void Start();
+
+ uint32 GetType(uint32 output) const {
+ return mpSrc->GetType(mSrcIndex);
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y);
+
+ IVDResamplerSeparableColStage *mpColStage;
+
+ uint32 mWinSize;
+ uint32 mBytesPerSample;
+ uint32 mBytesPerRow;
+
+ VDResamplerAxis mAxis;
+
+ vdblock<const void *> mWindow;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample_special.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample_special.h
new file mode 100644
index 000000000..0f97ba1cf
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample_special.h
@@ -0,0 +1,81 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_RESAMPLE_SPECIAL_H
+#define f_VD2_KASUMI_UBERBLIT_RESAMPLE_SPECIAL_H
+
+#include <vd2/system/vdstl.h>
+#include <vd2/system/math.h>
+#include "uberblit.h"
+#include "uberblit_base.h"
+
+class VDPixmapGenResampleRow_d2_p0_lin_u8 : public VDPixmapGenWindowBasedOneSource {
+public:
+ void Init(IVDPixmapGen *src, uint32 srcIndex);
+ void Start();
+
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleRow_d4_p0_lin_u8 : public VDPixmapGenWindowBasedOneSource {
+public:
+ void Init(IVDPixmapGen *src, uint32 srcIndex);
+ void Start();
+
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleRow_x2_p0_lin_u8 : public VDPixmapGenWindowBasedOneSource {
+public:
+ void Init(IVDPixmapGen *src, uint32 srcIndex);
+ void Start();
+
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleRow_x4_p0_lin_u8 : public VDPixmapGenWindowBasedOneSource {
+public:
+ void Init(IVDPixmapGen *src, uint32 srcIndex);
+ void Start();
+
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleCol_x2_phalf_lin_u8: public VDPixmapGenWindowBasedOneSource {
+public:
+ void Init(IVDPixmapGen *src, uint32 srcIndex);
+ void Start();
+
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleCol_x4_p1half_lin_u8: public VDPixmapGenWindowBasedOneSource {
+public:
+ void Init(IVDPixmapGen *src, uint32 srcIndex);
+ void Start();
+
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleCol_d2_pnqrtr_lin_u8: public VDPixmapGenWindowBasedOneSource {
+public:
+ void Init(IVDPixmapGen *src, uint32 srcIndex);
+ void Start();
+
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleCol_d4_pn38_lin_u8: public VDPixmapGenWindowBasedOneSource {
+public:
+ void Init(IVDPixmapGen *src, uint32 srcIndex);
+ void Start();
+
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample_special_x86.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample_special_x86.h
new file mode 100644
index 000000000..6634869aa
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_resample_special_x86.h
@@ -0,0 +1,26 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_RESAMPLE_SPECIAL_X86_H
+#define f_VD2_KASUMI_UBERBLIT_RESAMPLE_SPECIAL_X86_H
+
+#include "uberblit_resample_special.h"
+
+class VDPixmapGenResampleRow_x2_p0_lin_u8_ISSE : public VDPixmapGenResampleRow_x2_p0_lin_u8 {
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleRow_x4_p0_lin_u8_MMX : public VDPixmapGenResampleRow_x4_p0_lin_u8 {
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleCol_d2_pnqrtr_lin_u8_ISSE: public VDPixmapGenResampleCol_d2_pnqrtr_lin_u8 {
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGenResampleCol_d4_pn38_lin_u8_ISSE: public VDPixmapGenResampleCol_d4_pn38_lin_u8 {
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_rgb.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_rgb.h
new file mode 100644
index 000000000..21925af2a
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_rgb.h
@@ -0,0 +1,552 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_RGB_H
+#define f_VD2_KASUMI_UBERBLIT_RGB_H
+
+#include <vd2/system/cpuaccel.h>
+#include "uberblit_base.h"
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 16-bit crossconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_X1R5G5B5_To_R5G6B5 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start() {
+ StartWindow(mWidth * 2);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_565_LE;
+ }
+
+protected:
+ virtual void Compute(void *dst0, sint32 y) {
+ uint16 *dst = (uint16 *)dst0;
+ const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+
+ for(sint32 i=0; i<w; ++i) {
+ uint32 px = src[i];
+
+ px += (px & 0x7fe0);
+ px += (px & 0x400) >> 5;
+
+ dst[i] = (uint16)px;
+ }
+ }
+};
+
+class VDPixmapGen_R5G6B5_To_X1R5G5B5 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start() {
+ StartWindow(mWidth * 2);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8888;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint16 *dst = (uint16 *)dst0;
+ const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+
+ for(sint32 i=0; i<w; ++i) {
+ uint32 px = src[i];
+
+ px &= 0xffdf;
+ px -= (px & 0xffc0) >> 1;
+
+ dst[i] = (uint16)px;
+ }
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 32-bit upconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_X1R5G5B5_To_X8R8G8B8 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start() {
+ StartWindow(mWidth * 4);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8888;
+ }
+
+protected:
+ virtual void Compute(void *dst0, sint32 y) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+
+ for(sint32 i=0; i<w; ++i) {
+ uint32 px = src[i];
+ uint32 px5 = ((px & 0x7c00) << 9) + ((px & 0x03e0) << 6) + ((px & 0x001f) << 3);
+
+ dst[i] = px5 + ((px5 >> 5) & 0x070707);
+ }
+ }
+};
+
+class VDPixmapGen_R5G6B5_To_X8R8G8B8 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start() {
+ StartWindow(mWidth * 4);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8888;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+
+ for(sint32 i=0; i<w; ++i) {
+ uint32 px = src[i];
+ uint32 px_rb5 = ((px & 0xf800) << 8) + ((px & 0x001f) << 3);
+ uint32 px_g6 = ((px & 0x07e0) << 5);
+
+ dst[i] = px_rb5 + px_g6 + (((px_rb5 >> 5) + (px_g6 >> 6)) & 0x070307);
+ }
+ }
+};
+
+class VDPixmapGen_R8G8B8_To_A8R8G8B8 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start() {
+ StartWindow(mWidth * 4);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8888;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+
+ for(sint32 i=0; i<w; ++i) {
+ dst[0] = src[0];
+ dst[1] = src[1];
+ dst[2] = src[2];
+ dst[3] = 255;
+ dst += 4;
+ src += 3;
+ }
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 32-bit downconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_X8R8G8B8_To_X1R5G5B5 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start() {
+ StartWindow(mWidth * 2);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_1555_LE;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint16 *dst = (uint16 *)dst0;
+ const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+
+ for(sint32 i=0; i<w; ++i) {
+ uint32 px = src[i];
+
+ dst[i] = ((px >> 9) & 0x7c00) + ((px >> 6) & 0x03e0) + ((px >> 3) & 0x001f);
+ }
+ }
+};
+
+class VDPixmapGen_X8R8G8B8_To_R5G6B5 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start() {
+ StartWindow(mWidth * 2);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_565_LE;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint16 *dst = (uint16 *)dst0;
+ const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+
+ for(sint32 i=0; i<w; ++i) {
+ uint32 px = src[i];
+
+ dst[i] = ((px >> 8) & 0xf800) + ((px >> 5) & 0x07e0) + ((px >> 3) & 0x001f);
+ }
+ }
+};
+
+class VDPixmapGen_X8R8G8B8_To_R8G8B8 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start() {
+ StartWindow(mWidth * 3);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_888;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+
+ for(sint32 i=0; i<w; ++i) {
+ dst[0] = src[0];
+ dst[1] = src[1];
+ dst[2] = src[2];
+
+ dst += 3;
+ src += 4;
+ }
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 32-bit downconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_X8R8G8B8_To_X1R5G5B5_Dithered : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start() {
+ StartWindow(mWidth * 2);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_1555_LE;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint16 *dst = (uint16 *)dst0;
+ const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+
+ static const uint32 kDitherMatrix[4][4][2]={
+ { 0x00000000, 0x00000000, 0x04000400, 0x00040000, 0x01000100, 0x00010000, 0x05000500, 0x00050000 },
+ { 0x06000600, 0x00060000, 0x02000200, 0x00020000, 0x07000700, 0x00070000, 0x03000300, 0x00030000 },
+ { 0x01800180, 0x00018000, 0x05800580, 0x00058000, 0x00800080, 0x00008000, 0x04800480, 0x00048000 },
+ { 0x07800780, 0x00078000, 0x03800380, 0x00038000, 0x06800680, 0x00068000, 0x02800280, 0x00028000 },
+ };
+
+ const uint32 (*drow)[2] = kDitherMatrix[y & 3];
+
+ for(sint32 i=0; i<w; ++i) {
+ uint32 px = src[i];
+ uint32 drg = drow[i & 3][0];
+ uint32 db = drow[i & 3][1];
+ uint32 rb = (px & 0xff00ff) * 249 + drg;
+ uint32 g = (px & 0xff00) * 249 + db;
+
+ dst[i] = ((rb >> 17) & 0x7c00) + ((g >> 14) & 0x03e0) + ((rb >> 11) & 0x001f);
+ }
+ }
+};
+
+class VDPixmapGen_X8R8G8B8_To_R5G6B5_Dithered : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start() {
+ StartWindow(mWidth * 2);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_565_LE;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint16 *dst = (uint16 *)dst0;
+ const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+
+ static const uint32 kDitherMatrix[4][4][2]={
+ { 0x00000000, 0x00000000, 0x04000400, 0x00020000, 0x01000100, 0x00008000, 0x05000500, 0x00028000 },
+ { 0x06000600, 0x00030000, 0x02000200, 0x00010000, 0x07000700, 0x00038000, 0x03000300, 0x00018000 },
+ { 0x01800180, 0x0000c000, 0x05800580, 0x0002c000, 0x00800080, 0x00004000, 0x04800480, 0x00024000 },
+ { 0x07800780, 0x0003c000, 0x03800380, 0x0001c000, 0x06800680, 0x00034000, 0x02800280, 0x00014000 },
+ };
+
+ const uint32 (*drow)[2] = kDitherMatrix[y & 3];
+
+ for(sint32 i=0; i<w; ++i) {
+ uint32 px = src[i];
+ uint32 drg = drow[i & 3][0];
+ uint32 db = drow[i & 3][1];
+ uint32 rb = (px & 0xff00ff) * 249 + drg;
+ uint32 g = (px & 0xff00) * 253 + db;
+
+ dst[i] = ((rb >> 16) & 0xf800) + ((g >> 13) & 0x07e0) + ((rb >> 11) & 0x001f);
+ }
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 32F upconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_8_To_32F : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start() {
+ StartWindow(mWidth * 4);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_32F_LE;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ float *dst = (float *)dst0;
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+
+ VDCPUCleanupExtensions();
+
+ for(sint32 i=0; i<w; ++i)
+ *dst++ = (float)*src++ * (1.0f / 255.0f);
+ }
+};
+
+class VDPixmapGen_X8R8G8B8_To_X32B32G32R32F : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start() {
+ StartWindow(mWidth * 16);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_32Fx4_LE;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ float *dst = (float *)dst0;
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+
+ VDCPUCleanupExtensions();
+
+ for(sint32 i=0; i<w; ++i) {
+ dst[0] = (float)src[2] * (1.0f / 255.0f);
+ dst[1] = (float)src[1] * (1.0f / 255.0f);
+ dst[2] = (float)src[0] * (1.0f / 255.0f);
+ dst[3] = 1.0f;
+ dst += 4;
+ src += 4;
+ }
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 32F downconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_32F_To_8 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start() {
+ StartWindow(mWidth);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint8 *dst = (uint8 *)dst0;
+ const float *src = (const float *)mpSrc->GetRow(y, mSrcIndex);
+ sint32 w = mWidth;
+
+ VDCPUCleanupExtensions();
+
+ for(sint32 i=0; i<w; ++i) {
+ float b = *src++;
+
+ uint32 ib = VDClampedRoundFixedToUint8Fast(b);
+
+ dst[i] = (uint8)ib;
+ }
+ }
+};
+
+class VDPixmapGen_32F_To_8_Dithered : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start() {
+ StartWindow(mWidth);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint8 *dst = (uint8 *)dst0;
+ const float *src = (const float *)mpSrc->GetRow(y, mSrcIndex);
+ VDCPUCleanupExtensions();
+
+ sint32 w = mWidth;
+
+#define X(v) ((v) - 0x49400000)
+
+ static const sint32 kDitherMatrix[4][4]={
+ { X( 0), X( 8), X( 2), X(10), },
+ { X(12), X( 4), X(14), X( 6), },
+ { X( 3), X(11), X( 1), X( 9), },
+ { X(15), X( 7), X(13), X( 5), },
+ };
+
+#undef X
+
+ const sint32 *pDitherRow = kDitherMatrix[y & 3];
+
+ for(sint32 i=0; i<w; ++i) {
+ float b = *src++;
+
+ sint32 addend = pDitherRow[i & 3];
+ union {
+ float f;
+ sint32 i;
+ } cb = {b * 255.0f + 786432.0f};
+
+ sint32 vb = ((sint32)cb.i + addend) >> 4;
+
+ if ((uint32)vb >= 0x100)
+ vb = (uint8)(~vb >> 31);
+
+ dst[i] = (uint8)vb;
+ }
+ }
+};
+
+class VDPixmapGen_X32B32G32R32F_To_X8R8G8B8 : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start() {
+ StartWindow(mWidth * 4);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8888;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint32 *dst = (uint32 *)dst0;
+ const float *src = (const float *)mpSrc->GetRow(y, mSrcIndex);
+
+ VDCPUCleanupExtensions();
+
+ sint32 w = mWidth;
+
+ for(sint32 i=0; i<w; ++i) {
+ float r = src[0];
+ float g = src[1];
+ float b = src[2];
+ src += 4;
+
+ uint32 ir = VDClampedRoundFixedToUint8Fast(r) << 16;
+ uint32 ig = VDClampedRoundFixedToUint8Fast(g) << 8;
+ uint32 ib = VDClampedRoundFixedToUint8Fast(b);
+
+ dst[i] = ir + ig + ib;
+ }
+ }
+};
+
+class VDPixmapGen_X32B32G32R32F_To_X8R8G8B8_Dithered : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start() {
+ StartWindow(mWidth * 4);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8888;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint32 *dst = (uint32 *)dst0;
+ const float *src = (const float *)mpSrc->GetRow(y, mSrcIndex);
+
+ VDCPUCleanupExtensions();
+
+ sint32 w = mWidth;
+
+#define X(v) ((v) - 0x49400000)
+
+ static const sint32 kDitherMatrix[4][4]={
+ { X( 0), X( 8), X( 2), X(10), },
+ { X(12), X( 4), X(14), X( 6), },
+ { X( 3), X(11), X( 1), X( 9), },
+ { X(15), X( 7), X(13), X( 5), },
+ };
+
+#undef X
+
+ const sint32 *pDitherRow = kDitherMatrix[y & 3];
+
+ for(sint32 i=0; i<w; ++i) {
+ float r = src[0];
+ float g = src[1];
+ float b = src[2];
+ src += 4;
+
+ sint32 addend = pDitherRow[i & 3];
+ union {
+ float f;
+ sint32 i;
+ } cr = {r * 255.0f + 786432.0f},
+ cg = {g * 255.0f + 786432.0f},
+ cb = {b * 255.0f + 786432.0f};
+
+ sint32 vr = ((sint32)cr.i + addend) >> 4;
+ sint32 vg = ((sint32)cg.i + addend) >> 4;
+ sint32 vb = ((sint32)cb.i + addend) >> 4;
+
+ if ((uint32)vr >= 0x100)
+ vr = (uint8)(~vr >> 31);
+
+ if ((uint32)vg >= 0x100)
+ vg = (uint8)(~vg >> 31);
+
+ if ((uint32)vb >= 0x100)
+ vb = (uint8)(~vb >> 31);
+
+ dst[i] = (vr << 16) + (vg << 8) + vb;
+ }
+ }
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_rgb_x86.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_rgb_x86.h
new file mode 100644
index 000000000..ececed120
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_rgb_x86.h
@@ -0,0 +1,114 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_RGB_X86_H
+#define f_VD2_KASUMI_UBERBLIT_RGB_X86_H
+
+#include <vd2/system/cpuaccel.h>
+#include "uberblit_base.h"
+
+extern "C" void vdasm_pixblt_XRGB1555_to_XRGB8888_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+extern "C" void vdasm_pixblt_RGB565_to_XRGB8888_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+extern "C" void vdasm_pixblt_RGB565_to_XRGB1555_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+extern "C" void vdasm_pixblt_XRGB1555_to_RGB565_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+extern "C" void vdasm_pixblt_XRGB8888_to_XRGB1555_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+extern "C" void vdasm_pixblt_XRGB8888_to_RGB565_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+extern "C" void vdasm_pixblt_XRGB8888_to_RGB888_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+extern "C" void vdasm_pixblt_RGB888_to_XRGB8888_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 16-bit crossconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_X1R5G5B5_To_R5G6B5_MMX : public VDPixmapGen_X1R5G5B5_To_R5G6B5 {
+protected:
+ virtual void Compute(void *dst0, sint32 y) {
+ uint16 *dst = (uint16 *)dst0;
+ const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+
+ vdasm_pixblt_XRGB1555_to_RGB565_MMX(dst, 0, src, 0, mWidth, 1);
+ }
+};
+
+class VDPixmapGen_R5G6B5_To_X1R5G5B5_MMX : public VDPixmapGen_R5G6B5_To_X1R5G5B5 {
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint16 *dst = (uint16 *)dst0;
+ const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+
+ vdasm_pixblt_RGB565_to_XRGB1555_MMX(dst, 0, src, 0, mWidth, 1);
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 32-bit upconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_X1R5G5B5_To_X8R8G8B8_MMX : public VDPixmapGen_X1R5G5B5_To_X8R8G8B8 {
+protected:
+ virtual void Compute(void *dst0, sint32 y) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+
+ vdasm_pixblt_XRGB1555_to_XRGB8888_MMX(dst, 0, src, 0, mWidth, 1);
+ }
+};
+
+class VDPixmapGen_R5G6B5_To_X8R8G8B8_MMX : public VDPixmapGen_R5G6B5_To_X8R8G8B8 {
+protected:
+ virtual void Compute(void *dst0, sint32 y) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+
+ vdasm_pixblt_RGB565_to_XRGB8888_MMX(dst, 0, src, 0, mWidth, 1);
+ }
+};
+
+class VDPixmapGen_R8G8B8_To_X8R8G8B8_MMX : public VDPixmapGen_R8G8B8_To_A8R8G8B8 {
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+ vdasm_pixblt_RGB888_to_XRGB8888_MMX(dst, 0, src, 0, mWidth, 1);
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 32-bit downconverters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_X8R8G8B8_To_X1R5G5B5_MMX : public VDPixmapGen_X8R8G8B8_To_X1R5G5B5 {
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint16 *dst = (uint16 *)dst0;
+ const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+
+ vdasm_pixblt_XRGB8888_to_XRGB1555_MMX(dst, 0, src, 0, mWidth, 1);
+ }
+};
+
+class VDPixmapGen_X8R8G8B8_To_R5G6B5_MMX : public VDPixmapGen_X8R8G8B8_To_R5G6B5 {
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint16 *dst = (uint16 *)dst0;
+ const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+
+ vdasm_pixblt_XRGB8888_to_RGB565_MMX(dst, 0, src, 0, mWidth, 1);
+ }
+};
+
+class VDPixmapGen_X8R8G8B8_To_R8G8B8_MMX : public VDPixmapGen_X8R8G8B8_To_R8G8B8 {
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+ vdasm_pixblt_XRGB8888_to_RGB888_MMX(dst, 0, src, 0, mWidth, 1);
+ }
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_swizzle.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_swizzle.h
new file mode 100644
index 000000000..a87fe1f5c
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_swizzle.h
@@ -0,0 +1,343 @@
+// VirtualDub - Video processing and capture application
+// Graphics support library
+// Copyright (C) 1998-2008 Avery Lee
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#ifndef f_VD2_KASUMI_UBERBLIT_SWIZZLE_H
+#define f_VD2_KASUMI_UBERBLIT_SWIZZLE_H
+
+#include <vd2/system/cpuaccel.h>
+#include "uberblit_base.h"
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// generic converters
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_Swap8In16 : public VDPixmapGenWindowBasedOneSource {
+public:
+ void Init(IVDPixmapGen *gen, int srcIndex, uint32 w, uint32 h, uint32 bpr);
+ void Start();
+
+ uint32 GetType(uint32 index) const;
+
+protected:
+ void Compute(void *dst0, sint32 y);
+
+ uint32 mRowLength;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 32-bit deinterleavers
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_8In16 : public VDPixmapGenWindowBasedOneSource {
+public:
+ void Init(IVDPixmapGen *gen, int srcIndex, int offset, uint32 w, uint32 h) {
+ InitSource(gen, srcIndex);
+ mOffset = offset;
+ SetOutputSize(w, h);
+ gen->AddWindowRequest(0, 0);
+ }
+
+ void Start() {
+ StartWindow(mWidth);
+ }
+
+ uint32 GetType(uint32 index) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ const uint8 *srcp = (const uint8 *)mpSrc->GetRow(y, mSrcIndex) + mOffset;
+ uint8 *dst = (uint8 *)dst0;
+ sint32 w = mWidth;
+ for(sint32 x=0; x<w; ++x) {
+ *dst++ = *srcp;
+ srcp += 2;
+ }
+ }
+
+ int mOffset;
+};
+
+class VDPixmapGen_8In32 : public VDPixmapGenWindowBasedOneSource {
+public:
+ void Init(IVDPixmapGen *gen, int srcIndex, int offset, uint32 w, uint32 h) {
+ InitSource(gen, srcIndex);
+ mOffset = offset;
+ SetOutputSize(w, h);
+ gen->AddWindowRequest(0, 0);
+ }
+
+ void Start() {
+ StartWindow(mWidth);
+ }
+
+ uint32 GetType(uint32 index) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_8;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ const uint8 *srcp = (const uint8 *)mpSrc->GetRow(y, mSrcIndex) + mOffset;
+ uint8 *dst = (uint8 *)dst0;
+ sint32 w = mWidth;
+ for(sint32 x=0; x<w; ++x) {
+ *dst++ = *srcp;
+ srcp += 4;
+ }
+ }
+
+ int mOffset;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 16-bit interleavers
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_B8x2_To_B8R8 : public VDPixmapGenWindowBased {
+public:
+ void Init(IVDPixmapGen *srcCb, uint32 srcindexCb, IVDPixmapGen *srcCr, uint32 srcindexCr);
+ void Start();
+ uint32 GetType(uint32 output) const;
+
+protected:
+ void Compute(void *dst0, sint32 y);
+
+ IVDPixmapGen *mpSrcCb;
+ uint32 mSrcIndexCb;
+ IVDPixmapGen *mpSrcCr;
+ uint32 mSrcIndexCr;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 32-bit interleavers
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_B8x3_To_G8B8_G8R8 : public VDPixmapGenWindowBased {
+public:
+ void Init(IVDPixmapGen *srcCr, uint32 srcindexCr, IVDPixmapGen *srcY, uint32 srcindexY, IVDPixmapGen *srcCb, uint32 srcindexCb) {
+ mpSrcY = srcY;
+ mSrcIndexY = srcindexY;
+ mpSrcCb = srcCb;
+ mSrcIndexCb = srcindexCb;
+ mpSrcCr = srcCr;
+ mSrcIndexCr = srcindexCr;
+ mWidth = srcY->GetWidth(srcindexY);
+ mHeight = srcY->GetHeight(srcindexY);
+
+ srcY->AddWindowRequest(0, 0);
+ srcCb->AddWindowRequest(0, 0);
+ srcCr->AddWindowRequest(0, 0);
+ }
+
+ void Start() {
+ mpSrcY->Start();
+ mpSrcCb->Start();
+ mpSrcCr->Start();
+
+ StartWindow(((mWidth + 1) & ~1) * 2);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrcY->GetType(mSrcIndexY) & ~kVDPixType_Mask) | kVDPixType_B8G8_R8G8;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint8 *VDRESTRICT dst = (uint8 *)dst0;
+ const uint8 *VDRESTRICT srcY = (const uint8 *)mpSrcY->GetRow(y, mSrcIndexY);
+ const uint8 *VDRESTRICT srcCb = (const uint8 *)mpSrcCb->GetRow(y, mSrcIndexCb);
+ const uint8 *VDRESTRICT srcCr = (const uint8 *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+ sint32 w = mWidth >> 1;
+ for(sint32 x=0; x<w; ++x) {
+ uint8 y1 = srcY[0];
+ uint8 cb = srcCb[0];
+ uint8 y2 = srcY[1];
+ uint8 cr = srcCr[0];
+
+ dst[0] = y1;
+ dst[1] = cb;
+ dst[2] = y2;
+ dst[3] = cr;
+
+ srcY += 2;
+ ++srcCb;
+ ++srcCr;
+ dst += 4;
+ }
+
+ if (mWidth & 1) {
+ uint8 y1 = srcY[0];
+ uint8 cb = srcCb[0];
+ uint8 cr = srcCr[0];
+
+ dst[0] = y1;
+ dst[1] = cb;
+ dst[2] = y1;
+ dst[3] = cr;
+ }
+ }
+
+ IVDPixmapGen *mpSrcY;
+ uint32 mSrcIndexY;
+ IVDPixmapGen *mpSrcCb;
+ uint32 mSrcIndexCb;
+ IVDPixmapGen *mpSrcCr;
+ uint32 mSrcIndexCr;
+};
+
+class VDPixmapGen_B8x3_To_B8G8_R8G8 : public VDPixmapGenWindowBased {
+public:
+ void Init(IVDPixmapGen *srcCr, uint32 srcindexCr, IVDPixmapGen *srcY, uint32 srcindexY, IVDPixmapGen *srcCb, uint32 srcindexCb) {
+ mpSrcY = srcY;
+ mSrcIndexY = srcindexY;
+ mpSrcCb = srcCb;
+ mSrcIndexCb = srcindexCb;
+ mpSrcCr = srcCr;
+ mSrcIndexCr = srcindexCr;
+ mWidth = srcY->GetWidth(srcindexY);
+ mHeight = srcY->GetHeight(srcindexY);
+
+ srcY->AddWindowRequest(0, 0);
+ srcCb->AddWindowRequest(0, 0);
+ srcCr->AddWindowRequest(0, 0);
+ }
+
+ void Start() {
+ mpSrcY->Start();
+ mpSrcCb->Start();
+ mpSrcCr->Start();
+
+ StartWindow(((mWidth + 1) & ~1) * 2);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrcY->GetType(mSrcIndexY) & ~kVDPixType_Mask) | kVDPixType_G8B8_G8R8;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint8 * VDRESTRICT dst = (uint8 *)dst0;
+ const uint8 *VDRESTRICT srcY = (const uint8 *)mpSrcY->GetRow(y, mSrcIndexY);
+ const uint8 *VDRESTRICT srcCb = (const uint8 *)mpSrcCb->GetRow(y, mSrcIndexCb);
+ const uint8 *VDRESTRICT srcCr = (const uint8 *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+ sint32 w2 = mWidth >> 1;
+ for(sint32 x=0; x<w2; ++x) {
+ uint8 cb = srcCb[0];
+ uint8 y1 = srcY[0];
+ uint8 cr = srcCr[0];
+ uint8 y2 = srcY[1];
+
+ dst[0] = cb;
+ dst[1] = y1;
+ dst[2] = cr;
+ dst[3] = y2;
+ dst += 4;
+ srcY += 2;
+ ++srcCb;
+ ++srcCr;
+ }
+
+ if (mWidth & 1) {
+ uint8 cb = srcCb[0];
+ uint8 y1 = srcY[0];
+ uint8 cr = srcCr[0];
+
+ dst[0] = cb;
+ dst[1] = y1;
+ dst[2] = cr;
+ dst[3] = y1;
+ }
+ }
+
+ IVDPixmapGen *mpSrcY;
+ uint32 mSrcIndexY;
+ IVDPixmapGen *mpSrcCb;
+ uint32 mSrcIndexCb;
+ IVDPixmapGen *mpSrcCr;
+ uint32 mSrcIndexCr;
+};
+
+class VDPixmapGen_B8x3_To_X8R8G8B8 : public VDPixmapGenWindowBased {
+public:
+ void Init(IVDPixmapGen *srcCr, uint32 srcindexCr, IVDPixmapGen *srcY, uint32 srcindexY, IVDPixmapGen *srcCb, uint32 srcindexCb) {
+ mpSrcY = srcY;
+ mSrcIndexY = srcindexY;
+ mpSrcCb = srcCb;
+ mSrcIndexCb = srcindexCb;
+ mpSrcCr = srcCr;
+ mSrcIndexCr = srcindexCr;
+ mWidth = srcY->GetWidth(srcindexY);
+ mHeight = srcY->GetHeight(srcindexY);
+
+ srcY->AddWindowRequest(0, 0);
+ srcCb->AddWindowRequest(0, 0);
+ srcCr->AddWindowRequest(0, 0);
+ }
+
+ void Start() {
+ mpSrcY->Start();
+ mpSrcCb->Start();
+ mpSrcCr->Start();
+
+ StartWindow(mWidth * 4);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrcY->GetType(mSrcIndexY) & ~kVDPixType_Mask) | kVDPixType_8888;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *srcY = (const uint8 *)mpSrcY->GetRow(y, mSrcIndexY);
+ const uint8 *srcCb = (const uint8 *)mpSrcCb->GetRow(y, mSrcIndexCb);
+ const uint8 *srcCr = (const uint8 *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+ for(sint32 x=0; x<mWidth; ++x) {
+ uint8 y = *srcY++;
+ uint8 cb = *srcCb++;
+ uint8 cr = *srcCr++;
+
+ dst[0] = cb;
+ dst[1] = y;
+ dst[2] = cr;
+ dst[3] = 255;
+ dst += 4;
+ }
+ }
+
+ IVDPixmapGen *mpSrcY;
+ uint32 mSrcIndexY;
+ IVDPixmapGen *mpSrcCb;
+ uint32 mSrcIndexCb;
+ IVDPixmapGen *mpSrcCr;
+ uint32 mSrcIndexCr;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_swizzle_x86.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_swizzle_x86.h
new file mode 100644
index 000000000..fecec9a53
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_swizzle_x86.h
@@ -0,0 +1,71 @@
+// VirtualDub - Video processing and capture application
+// Graphics support library
+// Copyright (C) 1998-2008 Avery Lee
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#ifndef f_VD2_KASUMI_UBERBLIT_SWIZZLE_X86_H
+#define f_VD2_KASUMI_UBERBLIT_SWIZZLE_X86_H
+
+#include "uberblit_swizzle.h"
+
+class VDPixmapGen_8In16_Even_MMX : public VDPixmapGen_8In16 {
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGen_8In16_Odd_MMX : public VDPixmapGen_8In16 {
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGen_8In32_MMX : public VDPixmapGen_8In32 {
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGen_Swap8In16_MMX : public VDPixmapGen_Swap8In16 {
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 16-bit interleavers
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_B8x2_To_B8R8_MMX : public VDPixmapGen_B8x2_To_B8R8 {
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 32-bit interleavers
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_B8x3_To_G8B8_G8R8_MMX : public VDPixmapGen_B8x3_To_G8B8_G8R8 {
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+class VDPixmapGen_B8x3_To_B8G8_R8G8_MMX : public VDPixmapGen_B8x3_To_B8G8_R8G8 {
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_v210.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_v210.h
new file mode 100644
index 000000000..aa734aa36
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_v210.h
@@ -0,0 +1,72 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_V210_H
+#define f_VD2_KASUMI_UBERBLIT_V210_H
+
+#include <vd2/system/cpuaccel.h>
+#include "uberblit_base.h"
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// 32F -> V210
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_32F_To_V210 : public VDPixmapGenWindowBased {
+public:
+ void Init(IVDPixmapGen *srcR, uint32 srcindexR, IVDPixmapGen *srcG, uint32 srcindexG, IVDPixmapGen *srcB, uint32 srcindexB) {
+ mpSrcR = srcR;
+ mSrcIndexR = srcindexR;
+ mpSrcG = srcG;
+ mSrcIndexG = srcindexG;
+ mpSrcB = srcB;
+ mSrcIndexB = srcindexB;
+ mWidth = srcG->GetWidth(srcindexG);
+ mHeight = srcG->GetHeight(srcindexG);
+
+ srcR->AddWindowRequest(0, 0);
+ srcG->AddWindowRequest(0, 0);
+ srcB->AddWindowRequest(0, 0);
+ }
+
+ void Start() {
+ mpSrcR->Start();
+ mpSrcG->Start();
+ mpSrcB->Start();
+
+ int qw = (mWidth + 47) / 48;
+ StartWindow(qw * 128);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrcG->GetType(mSrcIndexG) & ~kVDPixType_Mask) | kVDPixType_V210;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y);
+
+ IVDPixmapGen *mpSrcR;
+ uint32 mSrcIndexR;
+ IVDPixmapGen *mpSrcG;
+ uint32 mSrcIndexG;
+ IVDPixmapGen *mpSrcB;
+ uint32 mSrcIndexB;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// V210 -> 32F
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGen_V210_To_32F : public VDPixmapGenWindowBasedOneSourceSimple {
+public:
+ void Start();
+ const void *GetRow(sint32 y, uint32 index);
+
+ sint32 GetWidth(int index) const;
+ uint32 GetType(uint32 output) const;
+
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+#endif // f_VD2_KASUMI_UBERBLIT_V210_H
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_ycbcr.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_ycbcr.h
new file mode 100644
index 000000000..2eb62da01
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_ycbcr.h
@@ -0,0 +1,584 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_YCBCR_H
+#define f_VD2_KASUMI_UBERBLIT_YCBCR_H
+
+#include <vd2/system/cpuaccel.h>
+#include <vd2/system/math.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include "uberblit.h"
+#include "uberblit_base.h"
+
+class VDPixmapGenYCbCrToRGBBase : public VDPixmapGenWindowBased {
+public:
+ void Init(IVDPixmapGen *srcCr, uint32 srcindexCr, IVDPixmapGen *srcY, uint32 srcindexY, IVDPixmapGen *srcCb, uint32 srcindexCb) {
+ mpSrcY = srcY;
+ mSrcIndexY = srcindexY;
+ mpSrcCb = srcCb;
+ mSrcIndexCb = srcindexCb;
+ mpSrcCr = srcCr;
+ mSrcIndexCr = srcindexCr;
+ mWidth = srcY->GetWidth(srcindexY);
+ mHeight = srcY->GetHeight(srcindexY);
+
+ srcY->AddWindowRequest(0, 0);
+ srcCb->AddWindowRequest(0, 0);
+ srcCr->AddWindowRequest(0, 0);
+ }
+
+
+protected:
+ IVDPixmapGen *mpSrcY;
+ uint32 mSrcIndexY;
+ IVDPixmapGen *mpSrcCb;
+ uint32 mSrcIndexCb;
+ IVDPixmapGen *mpSrcCr;
+ uint32 mSrcIndexCr;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Rec.601 converters
+//
+// -->Kr=0.299; Kb=0.114; Z=0; S=255; L = [Kr 1-Kr-Kb Kb]; Y = [219*(L-Z)/S 16]; U = [112*([0 0 1]-L)/((1-Kb)*S) 128]; V =
+// [112*([1 0 0]-L)/((1-Kr)*S) 128]; M = [Y; U; V; 0 0 0 1]; disp(M); disp(inv(M));
+//
+// ! 0.2567882 0.5041294 0.0979059 16. !
+// ! - 0.1482229 - 0.2909928 0.4392157 128. !
+// ! 0.4392157 - 0.3677883 - 0.0714274 128. !
+// ! 0. 0. 0. 1. !
+//
+// ! 1.1643836 - 5.599D-17 1.5960268 - 222.92157 !
+// ! 1.1643836 - 0.3917623 - 0.8129676 135.57529 !
+// ! 1.1643836 2.0172321 - 1.110D-16 - 276.83585 !
+// ! 0. 0. 0. 1. !
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGenYCbCr601ToRGB32 : public VDPixmapGenYCbCrToRGBBase {
+public:
+ void Start() {
+ mpSrcY->Start();
+ mpSrcCb->Start();
+ mpSrcCr->Start();
+
+ StartWindow(mWidth * 4);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrcY->GetType(mSrcIndexY) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_8888 | kVDPixSpace_BGR;
+ }
+
+protected:
+ virtual void Compute(void *dst0, sint32 y) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *srcY = (const uint8 *)mpSrcY->GetRow(y, mSrcIndexY);
+ const uint8 *srcCb = (const uint8 *)mpSrcCb->GetRow(y, mSrcIndexCb);
+ const uint8 *srcCr = (const uint8 *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+ VDCPUCleanupExtensions();
+
+ for(sint32 i=0; i<mWidth; ++i) {
+ sint32 y = srcY[i];
+ sint32 cb = srcCb[i];
+ sint32 cr = srcCr[i];
+
+ float yf = (1.164f / 255.0f)*(y - 16);
+
+ dst[0] = VDClampedRoundFixedToUint8Fast(yf + (2.018f / 255.0f) * (cb - 128));
+ dst[1] = VDClampedRoundFixedToUint8Fast(yf - (0.813f / 255.0f) * (cr - 128) - (0.391f / 255.0f) * (cb - 128));
+ dst[2] = VDClampedRoundFixedToUint8Fast(yf + (1.596f / 255.0f) * (cr - 128));
+ dst[3] = 0xff;
+
+ dst += 4;
+ }
+ }
+};
+
+class VDPixmapGenYCbCr601ToRGB32F : public VDPixmapGenYCbCrToRGBBase {
+public:
+ void Start() {
+ mpSrcY->Start();
+ mpSrcCb->Start();
+ mpSrcCr->Start();
+
+ StartWindow(mWidth * 16);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrcY->GetType(mSrcIndexY) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_32Fx4_LE | kVDPixSpace_BGR;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ float *dst = (float *)dst0;
+ const float *srcY = (const float *)mpSrcY->GetRow(y, mSrcIndexY);
+ const float *srcCb = (const float *)mpSrcCb->GetRow(y, mSrcIndexCb);
+ const float *srcCr = (const float *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+ VDCPUCleanupExtensions();
+
+ for(sint32 i=0; i<mWidth; ++i) {
+ float y = srcY[i];
+ float cb = srcCb[i] - (128.0f / 255.0f);
+ float cr = srcCr[i] - (128.0f / 255.0f);
+
+ float yf = 1.164f * (y - 16.0f / 255.0f);
+
+ dst[0] = yf + 1.596f * cr;
+ dst[1] = yf - 0.813f * cr - 0.391f * cb;
+ dst[2] = yf + 2.018f * cb;
+ dst[3] = 1.0f;
+ dst += 4;
+ }
+ }
+};
+
+class VDPixmapGenRGB32ToYCbCr601 : public VDPixmapGenWindowBasedOneSource {
+public:
+ void Init(IVDPixmapGen *src, uint32 srcindex) {
+ InitSource(src, srcindex);
+ }
+
+ void Start() {
+ StartWindow(mWidth, 3);
+ }
+
+ const void *GetRow(sint32 y, uint32 index) {
+ return (const uint8 *)VDPixmapGenWindowBasedOneSource::GetRow(y, index) + mWindowPitch * index;
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_8 | kVDPixSpace_YCC_601;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint8 *dstCb = (uint8 *)dst0;
+ uint8 *dstY = dstCb + mWindowPitch;
+ uint8 *dstCr = dstY + mWindowPitch;
+
+ const uint8 *srcRGB = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+ for(sint32 i=0; i<mWidth; ++i) {
+ int r = (int)srcRGB[2];
+ int g = (int)srcRGB[1];
+ int b = (int)srcRGB[0];
+ srcRGB += 4;
+
+
+ // -2->round(inv([1 0 0 0; 0 1 0 0; 0 0 1 0; -16 -128 -128 1] * [1.1643828 1.1643828 1.1643828 0; 1.5960273 -0.8129688 0 0;
+ // 0 -0.3917617 2.0172305 0; 0 0 0 1]) .* 65536)
+ // ans =
+ //
+ // ! 16829. 28784. - 9714. 0. !
+ // ! 33039. - 24103. - 19071. 0. !
+ // ! 6416. - 4681. 28784. 0. !
+ // ! 1048576. 8388608. 8388608. 65536. !
+
+ *dstCb++ = (28784*r - 24103*g - 4681*b + 8388608 + 32768) >> 16;
+ *dstY ++ = (16829*r + 33039*g + 6416*b + 1048576 + 32768) >> 16;
+ *dstCr++ = (-9714*r - 19071*g + 28784*b + 8388608 + 32768) >> 16;
+ }
+ }
+};
+
+class VDPixmapGenRGB32FToYCbCr601 : public VDPixmapGenWindowBasedOneSource {
+public:
+ void Init(IVDPixmapGen *src, uint32 srcindex) {
+ InitSource(src, srcindex);
+ }
+
+ void Start() {
+ StartWindow(mWidth * sizeof(float), 3);
+ }
+
+ const void *GetRow(sint32 y, uint32 index) {
+ return (const uint8 *)VDPixmapGenWindowBasedOneSource::GetRow(y, index) + mWindowPitch * index;
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_32F_LE | kVDPixSpace_YCC_709;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ float *dstCb = (float *)dst0;
+ float *dstY = dstCb + mWindowPitch;
+ float *dstCr = dstY + mWindowPitch;
+
+ const float *srcRGB = (const float *)mpSrc->GetRow(y, mSrcIndex);
+
+ for(sint32 i=0; i<mWidth; ++i) {
+ float r = srcRGB[2];
+ float g = srcRGB[1];
+ float b = srcRGB[0];
+ srcRGB += 4;
+
+ *dstCb++ = -0.1482229f*r - 0.2909928f*g + 0.4392157f*b + (128.0f / 255.0f);
+ *dstY++ = 0.2567882f*r + 0.5041294f*g + 0.0979059f*b + ( 16.0f / 255.0f);
+ *dstCr++ = 0.4392157f*r - 0.3677883f*g - 0.0714274f*b + (128.0f / 255.0f);
+ }
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Rec.709 converters
+//
+//
+// -->Kr=0.2126; Kb=0.0722; Z=0; S=255; L = [Kr 1-Kr-Kb Kb]; Y = [219*(L-Z)/S 16]; U = [112*([0 0 1]-L)/((1-Kb)*S) 128]; V
+// = [112*([1 0 0]-L)/((1-Kr)*S) 128]; M = [Y; U; V; 0 0 0 1]; disp(M); disp(inv(M));
+//
+// ! 0.1825859 0.6142306 0.0620071 16. !
+// ! - 0.1006437 - 0.3385720 0.4392157 128. !
+// ! 0.4392157 - 0.3989422 - 0.0402735 128. !
+// ! 0. 0. 0. 1. !
+//
+// ! 1.1643836 - 2.932D-17 1.7927411 - 248.10099 !
+// ! 1.1643836 - 0.2132486 - 0.5329093 76.87808 !
+// ! 1.1643836 2.1124018 - 5.551D-17 - 289.01757 !
+// ! 0. 0. 0. 1. !
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGenYCbCr709ToRGB32 : public VDPixmapGenYCbCrToRGBBase {
+public:
+ void Start() {
+ mpSrcY->Start();
+ mpSrcCb->Start();
+ mpSrcCr->Start();
+
+ StartWindow(mWidth * 4);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrcY->GetType(mSrcIndexY) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_8888 | kVDPixSpace_BGR;
+ }
+
+protected:
+ virtual void Compute(void *dst0, sint32 y) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *srcY = (const uint8 *)mpSrcY->GetRow(y, mSrcIndexY);
+ const uint8 *srcCb = (const uint8 *)mpSrcCb->GetRow(y, mSrcIndexCb);
+ const uint8 *srcCr = (const uint8 *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+ VDCPUCleanupExtensions();
+
+ for(sint32 i=0; i<mWidth; ++i) {
+ sint32 y = srcY[i];
+ sint32 cb = srcCb[i];
+ sint32 cr = srcCr[i];
+
+ float yf = (1.164f / 255.0f)*(y - 16);
+
+ dst[0] = VDClampedRoundFixedToUint8Fast(yf + (2.112f / 255.0f) * (cb - 128));
+ dst[1] = VDClampedRoundFixedToUint8Fast(yf - (0.533f / 255.0f) * (cr - 128) - (0.213f / 255.0f) * (cb - 128));
+ dst[2] = VDClampedRoundFixedToUint8Fast(yf + (1.793f / 255.0f) * (cr - 128));
+ dst[3] = 0xff;
+
+ dst += 4;
+ }
+ }
+};
+
+class VDPixmapGenYCbCr709ToRGB32F : public VDPixmapGenYCbCrToRGBBase {
+public:
+ void Start() {
+ mpSrcY->Start();
+ mpSrcCb->Start();
+ mpSrcCr->Start();
+
+ StartWindow(mWidth * 16);
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrcY->GetType(mSrcIndexY) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_32Fx4_LE | kVDPixSpace_BGR;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ float *dst = (float *)dst0;
+ const float *srcY = (const float *)mpSrcY->GetRow(y, mSrcIndexY);
+ const float *srcCb = (const float *)mpSrcCb->GetRow(y, mSrcIndexCb);
+ const float *srcCr = (const float *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+ VDCPUCleanupExtensions();
+
+ for(sint32 i=0; i<mWidth; ++i) {
+ float y = srcY[i];
+ float cb = srcCb[i] - (128.0f/255.0f);
+ float cr = srcCr[i] - (128.0f/255.0f);
+
+ float yf = 1.164f * (y - 16.0f / 255.0f);
+
+ dst[0] = yf + 1.793f * cr;
+ dst[1] = yf - 0.533f * cr - 0.213f * cb;
+ dst[2] = yf + 2.112f * cb;
+ dst[3] = 1.0f;
+ dst += 4;
+ }
+ }
+};
+
+class VDPixmapGenRGB32ToYCbCr709 : public VDPixmapGenWindowBasedOneSource {
+public:
+ void Init(IVDPixmapGen *src, uint32 srcindex) {
+ InitSource(src, srcindex);
+ }
+
+ void Start() {
+ StartWindow(mWidth, 3);
+ }
+
+ const void *GetRow(sint32 y, uint32 index) {
+ return (const uint8 *)VDPixmapGenWindowBasedOneSource::GetRow(y, index) + mWindowPitch * index;
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_8 | kVDPixSpace_YCC_709;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint8 *dstCb = (uint8 *)dst0;
+ uint8 *dstY = dstCb + mWindowPitch;
+ uint8 *dstCr = dstY + mWindowPitch;
+
+ const uint8 *srcRGB = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+ for(sint32 i=0; i<mWidth; ++i) {
+ int r = (int)srcRGB[2];
+ int g = (int)srcRGB[1];
+ int b = (int)srcRGB[0];
+ srcRGB += 4;
+
+ *dstCb++ = (28784*r - 26145*g - 2639*b + 8388608 + 32768) >> 16;
+ *dstY ++ = (11966*r + 40254*g + 4064*b + 1048576 + 32768) >> 16;
+ *dstCr++ = (-6596*r - 22189*g + 28784*b + 8388608 + 32768) >> 16;
+ }
+ }
+};
+
+class VDPixmapGenRGB32FToYCbCr709 : public VDPixmapGenWindowBasedOneSource {
+public:
+ void Init(IVDPixmapGen *src, uint32 srcindex) {
+ InitSource(src, srcindex);
+ }
+
+ void Start() {
+ StartWindow(mWidth * sizeof(float), 3);
+ }
+
+ const void *GetRow(sint32 y, uint32 index) {
+ return (const uint8 *)VDPixmapGenWindowBasedOneSource::GetRow(y, index) + mWindowPitch * index;
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_32F_LE | kVDPixSpace_YCC_709;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 y) {
+ float *dstCb = (float *)dst0;
+ float *dstY = dstCb + mWindowPitch;
+ float *dstCr = dstY + mWindowPitch;
+
+ const float *srcRGB = (const float *)mpSrc->GetRow(y, mSrcIndex);
+
+ VDCPUCleanupExtensions();
+
+ for(sint32 i=0; i<mWidth; ++i) {
+ float r = srcRGB[2];
+ float g = srcRGB[1];
+ float b = srcRGB[0];
+ srcRGB += 4;
+
+ *dstCb++ = -0.1006437f*r - 0.3385720f*g + 0.4392157f*b + (128.0f / 255.0f);
+ *dstY++ = 0.1825859f*r + 0.6142306f*g + 0.0620071f*b + ( 16.0f / 255.0f);
+ *dstCr++ = 0.4392157f*r - 0.3989422f*g - 0.0402735f*b + (128.0f / 255.0f);
+ }
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Rec.601 <-> Rec.709 converters
+//
+// Rec.601 to Rec.709:
+//
+// 1. - 0.1155497 - 0.2079376 41.406386
+// 0 1.0186397 0.1146180 - 17.056983
+// 0 0.0750494 1.0253271 - 12.848195
+//
+// Rec.709 to Rec.601:
+//
+// 1. 0.0993117 0.1916995 - 37.249435
+// 0 0.9898538 - 0.1106525 15.462234
+// 0 - 0.0724530 0.9833978 11.399058
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+class VDPixmapGenYCbCr601ToYCbCr709 : public VDPixmapGenYCbCrToRGBBase {
+public:
+ void Start() {
+ mpSrcY->Start();
+ mpSrcCb->Start();
+ mpSrcCr->Start();
+
+ StartWindow(mWidth, 3);
+ }
+
+ const void *GetRow(sint32 y, uint32 index) {
+ return (const uint8 *)VDPixmapGenYCbCrToRGBBase::GetRow(y, index) + mWindowPitch * index;
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrcY->GetType(mSrcIndexY) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_8 | kVDPixSpace_YCC_709;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 ypos) {
+ uint8 *dstCr = (uint8 *)dst0;
+ uint8 *dstY = dstCr + mWindowPitch;
+ uint8 *dstCb = dstY + mWindowPitch;
+
+ const uint8 *srcY = (const uint8 *)mpSrcY->GetRow(ypos, mSrcIndexY);
+ const uint8 *srcCb = (const uint8 *)mpSrcCb->GetRow(ypos, mSrcIndexCb);
+ const uint8 *srcCr = (const uint8 *)mpSrcCr->GetRow(ypos, mSrcIndexCr);
+
+ for(sint32 i=0; i<mWidth; ++i) {
+ sint32 y = srcY[i];
+ sint32 cb = srcCb[i];
+ sint32 cr = srcCr[i];
+
+ *dstY++ = y + ((-7573*cb - 13627*cr + 2713609 + 32768) >> 16);
+ *dstCb++ = (66758*cb + 7512*cr - 1117846 + 32768) >> 16;
+ *dstCr++ = (4918*cb + 67196*cr - 842019 + 32768) >> 16;
+ }
+ }
+};
+
+class VDPixmapGenYCbCr709ToYCbCr601 : public VDPixmapGenYCbCrToRGBBase {
+public:
+ void Start() {
+ mpSrcY->Start();
+ mpSrcCb->Start();
+ mpSrcCr->Start();
+
+ StartWindow(mWidth, 3);
+ }
+
+ const void *GetRow(sint32 y, uint32 index) {
+ return (const uint8 *)VDPixmapGenYCbCrToRGBBase::GetRow(y, index) + mWindowPitch * index;
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrcY->GetType(mSrcIndexY) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_8 | kVDPixSpace_YCC_709;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 ypos) {
+ uint8 *dstCr = (uint8 *)dst0;
+ uint8 *dstY = dstCr + mWindowPitch;
+ uint8 *dstCb = dstY + mWindowPitch;
+
+ const uint8 *srcY = (const uint8 *)mpSrcY->GetRow(ypos, mSrcIndexY);
+ const uint8 *srcCb = (const uint8 *)mpSrcCb->GetRow(ypos, mSrcIndexCb);
+ const uint8 *srcCr = (const uint8 *)mpSrcCr->GetRow(ypos, mSrcIndexCr);
+
+ for(sint32 i=0; i<mWidth; ++i) {
+ sint32 y = srcY[i];
+ sint32 cb = srcCb[i];
+ sint32 cr = srcCr[i];
+
+ *dstY++ = y + ((6508*cb + 12563*cr - 2441088 + 32768) >> 16);
+ *dstCb++ = (64871*cb - 7252*cr + 1013376 + 32768) >> 16;
+ *dstCr++ = (-4748*cb + 64448*cr + 747008 + 32768) >> 16;
+ }
+ }
+};
+
+class VDPixmapGenYCbCr601ToYCbCr709_32F : public VDPixmapGenYCbCrToRGBBase {
+public:
+ void Start() {
+ mpSrcY->Start();
+ mpSrcCb->Start();
+ mpSrcCr->Start();
+
+ StartWindow(mWidth * sizeof(float), 3);
+ }
+
+ const void *GetRow(sint32 y, uint32 index) {
+ return (const uint8 *)VDPixmapGenYCbCrToRGBBase::GetRow(y, index) + mWindowPitch * index;
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrcY->GetType(mSrcIndexY) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_32F_LE | kVDPixSpace_YCC_709;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 ypos) {
+ float *dstCr = (float *)dst0;
+ float *dstY = vdptroffset(dstCr, mWindowPitch);
+ float *dstCb = vdptroffset(dstY, mWindowPitch);
+
+ const float *srcY = (const float *)mpSrcY->GetRow(ypos, mSrcIndexY);
+ const float *srcCb = (const float *)mpSrcCb->GetRow(ypos, mSrcIndexCb);
+ const float *srcCr = (const float *)mpSrcCr->GetRow(ypos, mSrcIndexCr);
+
+ VDCPUCleanupExtensions();
+
+ for(sint32 i=0; i<mWidth; ++i) {
+ float y = srcY[i];
+ float cb = srcCb[i] - (128.0f / 255.0f);
+ float cr = srcCr[i] - (128.0f / 255.0f);
+
+ *dstY++ = y - 0.1155497f*cb - 0.2079376f*cr;
+ *dstCb++ = 1.0186397f*cb + 0.1146180f*cr + (128.0f / 255.0f);
+ *dstCr++ = 0.0750494f*cb + 1.0253271f*cr + (128.0f / 255.0f);
+ }
+ }
+};
+
+class VDPixmapGenYCbCr709ToYCbCr601_32F : public VDPixmapGenYCbCrToRGBBase {
+public:
+ void Start() {
+ mpSrcY->Start();
+ mpSrcCb->Start();
+ mpSrcCr->Start();
+
+ StartWindow(mWidth * sizeof(float), 3);
+ }
+
+ const void *GetRow(sint32 y, uint32 index) {
+ return (const uint8 *)VDPixmapGenYCbCrToRGBBase::GetRow(y, index) + mWindowPitch * index;
+ }
+
+ uint32 GetType(uint32 output) const {
+ return (mpSrcY->GetType(mSrcIndexY) & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixType_32F_LE | kVDPixSpace_YCC_709;
+ }
+
+protected:
+ void Compute(void *dst0, sint32 ypos) {
+ float *dstCr = (float *)dst0;
+ float *dstY = vdptroffset(dstCr, mWindowPitch);
+ float *dstCb = vdptroffset(dstY, mWindowPitch);
+
+ const float *srcY = (const float *)mpSrcY->GetRow(ypos, mSrcIndexY);
+ const float *srcCb = (const float *)mpSrcCb->GetRow(ypos, mSrcIndexCb);
+ const float *srcCr = (const float *)mpSrcCr->GetRow(ypos, mSrcIndexCr);
+
+ VDCPUCleanupExtensions();
+
+ for(sint32 i=0; i<mWidth; ++i) {
+ float y = srcY[i];
+ float cb = srcCb[i] - (128.0f / 255.0f);
+ float cr = srcCr[i] - (128.0f / 255.0f);
+
+ *dstY++ = y - 0.1155497f*cb - 0.2079376f*cr;
+ *dstCb++ = 0.9898538f*cb - 0.1106525f*cr + (128.0f / 255.0f);
+ *dstCr++ = - 0.0724530f*cb + 0.9833978f*cr + (128.0f / 255.0f);
+ }
+ }
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/h/uberblit_ycbcr_x86.h b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_ycbcr_x86.h
new file mode 100644
index 000000000..fd9a66908
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/h/uberblit_ycbcr_x86.h
@@ -0,0 +1,27 @@
+#ifndef f_VD2_KASUMI_UBERBLIT_YCBCR_X86_H
+#define f_VD2_KASUMI_UBERBLIT_YCBCR_X86_H
+
+#include <vd2/system/cpuaccel.h>
+#include "uberblit.h"
+#include "uberblit_ycbcr.h"
+
+extern "C" void __cdecl vdasm_pixblt_YUV444Planar_to_XRGB8888_scan_MMX(void *dst, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 count);
+
+class VDPixmapGenYCbCr601ToRGB32_MMX : public VDPixmapGenYCbCr601ToRGB32 {
+protected:
+ void Compute(void *dst0, sint32 y) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *srcY = (const uint8 *)mpSrcY->GetRow(y, mSrcIndexY);
+ const uint8 *srcCb = (const uint8 *)mpSrcCb->GetRow(y, mSrcIndexCb);
+ const uint8 *srcCr = (const uint8 *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+ vdasm_pixblt_YUV444Planar_to_XRGB8888_scan_MMX(dst, srcY, srcCb, srcCr, mWidth);
+ }
+};
+
+class VDPixmapGenRGB32ToYCbCr601_SSE2 : public VDPixmapGenRGB32ToYCbCr601 {
+protected:
+ void Compute(void *dst0, sint32 y);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a64_resample.asm64 b/src/thirdparty/VirtualDub/Kasumi/source/a64_resample.asm64
new file mode 100644
index 000000000..e6de1eabf
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a64_resample.asm64
@@ -0,0 +1,620 @@
+; VirtualDub - Video processing and capture application
+; Graphics support library
+; Copyright (C) 1998-2004 Avery Lee
+;
+; This program is free software; you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or
+; (at your option) any later version.
+;
+; This program is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with this program; if not, write to the Free Software
+; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+;
+
+ default rel
+
+ segment .rdata, align=16
+
+ align 16
+roundval dq 0000200000002000h, 0000200000002000h
+
+
+ segment .text
+
+
+%macro VDSAVE 1-*
+
+ %rep %0
+ %rotate -1
+ push %1
+ [pushreg %1]
+ %endrep
+
+%endmacro
+
+%macro VDRESTORE 1-*
+
+ %rep %0
+ pop %1
+
+ %rotate 1
+ %endrep
+
+%endmacro
+
+%macro VDSAVEXMM128 2
+%assign %%count %2 + 1 - %1
+%assign %%stkoffset 0
+%assign %%reg %1
+
+ sub rsp, %%count*16+8
+ [allocstack %%count*16]
+
+ %rep %%count
+ movdqa oword [rsp+%%stkoffset], xmm %+ %%reg
+ [savexmm128 xmm %+ %%reg, %%stkoffset]
+
+ %assign %%stkoffset %%stkoffset + 16
+ %assign %%reg %%reg + 1
+ %endrep
+%endmacro
+
+%macro VDRESTOREXMM128 2
+%assign %%count %2+1-%1
+%assign %%stkoffset %%count*16
+%assign %%reg %2
+
+ %rep %%count
+ %assign %%stkoffset %%stkoffset-16
+ movdqa xmm %+ %%reg, oword [rsp+%%stkoffset]
+
+ %assign %%reg %%reg-1
+ %endrep
+
+ add rsp, %%count*16+8
+%endmacro
+
+;-------------------------------------------------------------------------
+;
+; long vdasm_resize_table_row_SSE2(
+; Pixel *out, // rcx
+; Pixel *in, // rdx
+; int *filter, // r8
+; int filter_width, // r9d
+; PixDim w, // [rsp+40]
+; long accum, // [rsp+48]
+; long frac); // [rsp+56]
+;
+ global vdasm_resize_table_row_SSE2
+proc_frame vdasm_resize_table_row_SSE2
+
+ VDSAVE rbx, rsi, rdi, rbp, r12, r13, r14, r15
+ VDSAVEXMM128 6, 15
+end_prolog
+
+ .parms equ rsp+168+64
+
+ mov r10d, dword [.parms+40]
+ shl r10, 2
+ add rcx, r10
+ neg r10
+ shl r9d, 2 ;filter_width <<= 2
+
+ movaps xmm6, oword [roundval]
+ pxor xmm5, xmm5
+ mov rsi, rdx
+ shr rsi, 2
+
+ mov edi, [.parms+48]
+ mov eax, edi
+ shl edi, 16
+ sar rax, 16
+ add rsi, rax
+ mov ebp, [.parms+56]
+ movsxd r11, ebp
+ shl ebp, 16
+ sar r11, 16
+
+ ;register map
+ ;
+ ;eax temp coefficient pair counter
+ ;rbx temp coefficient pointer
+ ;rcx destination
+ ;rdx temp source
+ ;rsi source/4
+ ;edi accumulator
+ ;ebp fractional increment
+ ;r8 filter
+ ;r9 filter_width*4
+ ;r10 -width*4
+ ;r11 integral increment
+ ;r12
+ ;r13
+ ;r14
+ ;r15
+
+ cmp r9d, 16
+ jz .accel_4coeff
+ cmp r9d, 24
+ jz .accel_6coeff
+
+ test r9d, 8
+ jz .pixelloop_even_pairs
+ cmp r9d, 8
+ jnz .pixelloop_odd_pairs
+
+.pixelloop_single_pairs:
+ mov eax, edi
+ shr eax, 24
+ imul eax, r9d
+
+ lea rdx, [rsi*4]
+
+ movd xmm0, dword [rdx] ;xmm0 = p0
+ movd xmm1, dword [rdx+4] ;xmm1 = p1
+ punpcklbw xmm0, xmm1
+ punpcklbw xmm0, xmm5
+ movq xmm1, qword [r8+rax]
+ pshufd xmm1, xmm1, 01000100b
+ pmaddwd xmm0, xmm1
+
+ movdqa xmm4, xmm6
+ paddd xmm4, xmm0
+
+ psrad xmm4, 14
+ packssdw xmm4, xmm4
+ packuswb xmm4, xmm4
+
+ add edi, ebp
+ adc rsi, r11
+
+ movd dword [rcx+r10], xmm4
+ add r10, 4
+ jnz .pixelloop_single_pairs
+ jmp .xit
+
+.pixelloop_odd_pairs:
+ movdqa xmm4, xmm6
+
+ mov eax, edi
+ shr eax, 24
+ imul eax, r9d
+ lea rbx, [r8+rax]
+
+ lea rdx, [rsi*4]
+ lea rax, [r9-8]
+.coeffloop_odd_pairs:
+ movd xmm0, dword [rdx] ;xmm0 = p0
+ movd xmm1, dword [rdx+4] ;xmm1 = p1
+ movd xmm2, dword [rdx+8] ;xmm2 = p2
+ movd xmm3, dword [rdx+12] ;xmm3 = p3
+ add rdx, 16
+ punpcklbw xmm0, xmm1
+ punpcklbw xmm2, xmm3
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm2, xmm5
+ movq xmm1, qword [rbx]
+ movq xmm3, qword [rbx+8]
+ add rbx, 16
+ pshufd xmm1, xmm1, 01000100b
+ pshufd xmm3, xmm3, 01000100b
+ pmaddwd xmm0, xmm1
+ pmaddwd xmm2, xmm3
+ paddd xmm0, xmm2
+ paddd xmm4, xmm0
+ sub eax, 16
+ jnz .coeffloop_odd_pairs
+
+ movd xmm0, dword [rdx] ;xmm0 = p0
+ movd xmm1, dword [rdx+4] ;xmm1 = p1
+ punpcklbw xmm0, xmm1
+ punpcklbw xmm0, xmm5
+ movq xmm1, qword [rbx]
+ pshufd xmm1, xmm1, 01000100b
+ pmaddwd xmm0, xmm1
+ paddd xmm4, xmm0
+
+ psrad xmm4, 14
+ packssdw xmm4, xmm4
+ packuswb xmm4, xmm4
+
+ add edi, ebp
+ adc rsi, r11
+
+ movd dword [rcx+r10], xmm4
+ add r10, 4
+ jnz .pixelloop_odd_pairs
+ jmp .xit
+
+.pixelloop_even_pairs:
+ movdqa xmm4, xmm6
+
+ mov eax, edi
+ shr eax, 24
+ imul eax, r9d
+ lea rbx, [r8+rax]
+
+ lea rdx, [rsi*4]
+ mov eax, r9d
+.coeffloop_even_pairs:
+ movd xmm0, dword [rdx] ;xmm0 = p0
+ movd xmm1, dword [rdx+4] ;xmm1 = p1
+ movd xmm2, dword [rdx+8] ;xmm2 = p2
+ movd xmm3, dword [rdx+12] ;xmm3 = p3
+ add rdx, 16
+ punpcklbw xmm0, xmm1
+ punpcklbw xmm2, xmm3
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm2, xmm5
+ movq xmm1, qword [rbx]
+ movq xmm3, qword [rbx+8]
+ add rbx, 16
+ pshufd xmm1, xmm1, 01000100b
+ pshufd xmm3, xmm3, 01000100b
+ pmaddwd xmm0, xmm1
+ pmaddwd xmm2, xmm3
+ paddd xmm0, xmm2
+ paddd xmm4, xmm0
+ sub eax, 16
+ jnz .coeffloop_even_pairs
+
+ psrad xmm4, 14
+ packssdw xmm4, xmm4
+ packuswb xmm4, xmm4
+
+ add edi, ebp
+ adc rsi, r11
+
+ movd dword [rcx+r10], xmm4
+ add r10, 4
+ jnz .pixelloop_even_pairs
+
+.xit:
+ VDRESTOREXMM128 6, 15
+ VDRESTORE rbx, rsi, rdi, rbp, r12, r13, r14, r15
+ ret
+
+.accel_4coeff:
+.pixelloop_4coeff:
+ pxor xmm5, xmm5
+ movdqa xmm4, xmm6
+
+ mov eax, 0ff000000h
+ lea rdx, [rsi*4]
+ and eax, edi
+ shr eax, 20
+ lea rbx, [r8+rax]
+
+ movd xmm0, dword [rdx] ;xmm0 = p0
+ movd xmm1, dword [rdx+4] ;xmm1 = p1
+ movd xmm2, dword [rdx+8] ;xmm2 = p2
+ movd xmm3, dword [rdx+12] ;xmm3 = p3
+ punpcklbw xmm0, xmm1
+ punpcklbw xmm2, xmm3
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm2, xmm5
+ movq xmm1, qword [rbx]
+ movq xmm3, qword [rbx+8]
+ pshufd xmm1, xmm1, 01000100b
+ pshufd xmm3, xmm3, 01000100b
+ pmaddwd xmm0, xmm1
+ pmaddwd xmm2, xmm3
+ paddd xmm0, xmm2
+ paddd xmm4, xmm0
+
+ psrad xmm4, 14
+ packssdw xmm4, xmm4
+ packuswb xmm4, xmm4
+
+ add edi, ebp
+ adc rsi, r11
+
+ movd dword [rcx+r10], xmm4
+ add r10, 4
+ jnz .pixelloop_4coeff
+ jmp .xit
+
+.accel_6coeff:
+.pixelloop_6coeff:
+ pxor xmm5, xmm5
+ movdqa xmm4, xmm6
+
+ lea rdx, [rsi*4]
+ mov eax, edi
+ shr eax, 24
+ lea rax, [rax+rax*2]
+ lea rbx, [r8+rax*8]
+
+ movd xmm0, dword [rdx] ;xmm0 = p0
+ movd xmm1, dword [rdx+4] ;xmm1 = p1
+ movd xmm2, dword [rdx+8] ;xmm2 = p2
+ movd xmm3, dword [rdx+12] ;xmm3 = p3
+ movd xmm8, dword [rdx+16] ;xmm6 = p4
+ movd xmm9, dword [rdx+20] ;xmm7 = p5
+ punpcklbw xmm0, xmm1
+ punpcklbw xmm2, xmm3
+ punpcklbw xmm8, xmm9
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm2, xmm5
+ punpcklbw xmm8, xmm5
+ movq xmm1, qword [rbx]
+ movq xmm3, qword [rbx+8]
+ movq xmm9, qword [rbx+16]
+ pshufd xmm1, xmm1, 01000100b
+ pshufd xmm3, xmm3, 01000100b
+ pshufd xmm9, xmm9, 01000100b
+ pmaddwd xmm0, xmm1
+ pmaddwd xmm2, xmm3
+ pmaddwd xmm8, xmm9
+ paddd xmm0, xmm2
+ paddd xmm4, xmm0
+ paddd xmm4, xmm8
+
+ psrad xmm4, 14
+ packssdw xmm4, xmm4
+ packuswb xmm4, xmm4
+
+ add edi, ebp
+ adc rsi, r11
+
+ movd dword [rcx+r10], xmm4
+ add r10, 4
+ jnz .pixelloop_6coeff
+ jmp .xit
+endproc_frame
+
+
+;--------------------------------------------------------------------------
+;
+; vdasm_resize_table_col_SSE2(
+; uint32 *dst, // rcx
+; const uint32 *const *srcs, // rdx
+; int *filter, // r8
+; int filter_width, // r9d
+; PixDim w, // [rsp+40] -> r10d
+; );
+;
+ global vdasm_resize_table_col_SSE2
+proc_frame vdasm_resize_table_col_SSE2
+ VDSAVE rbx, rsi, rdi, rbp, r12, r13, r14, r15
+ VDSAVEXMM128 6, 15
+end_prolog
+
+ .parms equ rsp+168+64
+
+ mov r10d, [.parms+40] ;r10d = w
+
+ pxor xmm5, xmm5
+ movdqa xmm4, oword [roundval]
+ xor rbx, rbx ;rbx = source offset
+
+ cmp r9d, 4
+ jz .accel_4coeff
+ cmp r9d, 6
+ jz .accel_6coeff
+
+ shr r9d, 1 ;r9d = filter pair count
+
+.pixelloop:
+ mov rax, rdx ;rax = row pointer table
+ mov rdi, r8 ;rdi = filter
+ mov r11d, r9d ;r11d = filter width counter
+ movdqa xmm2, xmm4
+.coeffloop:
+ mov rsi, [rax]
+
+ movd xmm0, dword [rsi+rbx]
+
+ mov rsi, [rax+8]
+ add rax, 16
+
+ movd xmm1, dword [rsi+rbx]
+ punpcklbw xmm0, xmm1
+
+ punpcklbw xmm0, xmm5
+
+ movq xmm1, qword [rdi]
+ pshufd xmm1, xmm1, 01000100b
+
+ pmaddwd xmm0, xmm1
+
+ paddd xmm2, xmm0
+
+ add rdi,8
+
+ sub r11d,1
+ jne .coeffloop
+
+ psrad xmm2,14
+ packssdw xmm2,xmm2
+ add rbx,4
+ packuswb xmm2,xmm2
+
+ movd dword [rcx],xmm2
+ add rcx,4
+ sub r10d,1
+ jne .pixelloop
+
+.xit:
+ VDRESTOREXMM128 6, 15
+ VDRESTORE rbx, rsi, rdi, rbp, r12, r13, r14, r15
+ ret
+
+.accel_4coeff:
+ mov r12, [rdx]
+ mov r13, [rdx+8]
+ mov r14, [rdx+16]
+ mov r15, [rdx+24]
+ movq xmm8, qword [r8]
+ punpcklqdq xmm8, xmm8
+ movq xmm9, qword [r8+8]
+ punpcklqdq xmm9, xmm9
+
+ sub r10d, 1
+ jc .oddpixel_4coeff
+.pixelloop_4coeff:
+ movq xmm0, qword [r12+rbx]
+ movq xmm1, qword [r13+rbx]
+ movq xmm2, qword [r14+rbx]
+ movq xmm3, qword [r15+rbx]
+
+ punpcklbw xmm0, xmm1
+ punpcklbw xmm2, xmm3
+
+ movdqa xmm1, xmm0
+ movdqa xmm3, xmm2
+
+ punpcklbw xmm0, xmm5
+ punpckhbw xmm1, xmm5
+ punpcklbw xmm2, xmm5
+ punpckhbw xmm3, xmm5
+
+ pmaddwd xmm0, xmm8
+ pmaddwd xmm1, xmm8
+ pmaddwd xmm2, xmm9
+ pmaddwd xmm3, xmm9
+
+ paddd xmm0, xmm4
+ paddd xmm1, xmm4
+ paddd xmm0, xmm2
+ paddd xmm1, xmm3
+
+ psrad xmm0, 14
+ psrad xmm1, 14
+ packssdw xmm0, xmm1
+ packuswb xmm0, xmm0
+
+ movq qword [rcx], xmm0
+ add rcx, 8
+ add rbx, 8
+ sub r10d, 2
+ ja .pixelloop_4coeff
+ jnz .xit
+.oddpixel_4coeff:
+ movd xmm0, dword [r12+rbx]
+ movd xmm1, dword [r13+rbx]
+ movd xmm2, dword [r14+rbx]
+ movd xmm3, dword [r15+rbx]
+
+ punpcklbw xmm0, xmm1
+ punpcklbw xmm2, xmm3
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm2, xmm5
+
+ pmaddwd xmm0, xmm8
+ pmaddwd xmm2, xmm9
+
+ paddd xmm0, xmm4
+ paddd xmm0, xmm2
+
+ psrad xmm0, 14
+ packssdw xmm0, xmm0
+ packuswb xmm0, xmm0
+
+ movd dword [rcx], xmm0
+
+ jmp .xit
+
+.accel_6coeff:
+ mov r12, [rdx]
+ mov r13, [rdx+8]
+ mov r14, [rdx+16]
+ mov r15, [rdx+24]
+ mov rsi, [rdx+32]
+ mov rdx, [rdx+40]
+ movq xmm10, qword [r8]
+ punpcklqdq xmm10, xmm10
+ movq xmm11, qword [r8+8]
+ punpcklqdq xmm11, xmm11
+ movq xmm12, qword [r8+16]
+ punpcklqdq xmm12, xmm12
+
+ sub r10d, 1
+ jc .oddpixel_6coeff
+.pixelloop_6coeff:
+ movq xmm0, qword [r12+rbx]
+ movq xmm1, qword [r13+rbx]
+ movq xmm2, qword [r14+rbx]
+ movq xmm3, qword [r15+rbx]
+ movq xmm8, qword [rsi+rbx]
+ movq xmm9, qword [rdx+rbx]
+
+ punpcklbw xmm0, xmm1
+ punpcklbw xmm2, xmm3
+ punpcklbw xmm8, xmm9
+
+ movdqa xmm1, xmm0
+ movdqa xmm3, xmm2
+ movdqa xmm9, xmm8
+
+ punpcklbw xmm0, xmm5
+ punpckhbw xmm1, xmm5
+ punpcklbw xmm2, xmm5
+ punpckhbw xmm3, xmm5
+ punpcklbw xmm8, xmm5
+ punpckhbw xmm9, xmm5
+
+ pmaddwd xmm0, xmm10
+ pmaddwd xmm1, xmm10
+ pmaddwd xmm2, xmm11
+ pmaddwd xmm3, xmm11
+ pmaddwd xmm8, xmm12
+ pmaddwd xmm9, xmm12
+
+ paddd xmm0, xmm4
+ paddd xmm1, xmm4
+ paddd xmm2, xmm8
+ paddd xmm3, xmm9
+ paddd xmm0, xmm2
+ paddd xmm1, xmm3
+
+ psrad xmm0, 14
+ psrad xmm1, 14
+ packssdw xmm0, xmm1
+ packuswb xmm0, xmm0
+
+ movq qword [rcx], xmm0
+ add rcx, 8
+ add rbx, 8
+ sub r10d, 2
+ ja .pixelloop_6coeff
+ jnz .xit
+.oddpixel_6coeff:
+ movd xmm0, dword [r12+rbx]
+ movd xmm1, dword [r13+rbx]
+ movd xmm2, dword [r14+rbx]
+ movd xmm3, dword [r15+rbx]
+ movd xmm8, dword [rsi+rbx]
+ movd xmm9, dword [rdx+rbx]
+
+ punpcklbw xmm0, xmm1
+ punpcklbw xmm2, xmm3
+ punpcklbw xmm8, xmm9
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm2, xmm5
+ punpcklbw xmm8, xmm5
+
+ pmaddwd xmm0, xmm10
+ pmaddwd xmm2, xmm11
+ pmaddwd xmm8, xmm12
+
+ paddd xmm0, xmm4
+ paddd xmm2, xmm8
+ paddd xmm0, xmm2
+
+ psrad xmm0, 14
+ packssdw xmm0, xmm0
+ packuswb xmm0, xmm0
+
+ movd dword [rcx], xmm0
+
+ jmp .xit
+endproc_frame
+
+ end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb.asm
new file mode 100644
index 000000000..f3503807e
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb.asm
@@ -0,0 +1,812 @@
+ section .text
+
+ global _vdasm_pixblt_RGB565_to_XRGB1555
+_vdasm_pixblt_RGB565_to_XRGB1555:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov ebp, [esp+20+16]
+ mov edi, [esp+24+16]
+ add ebp, ebp
+ mov edx, [esp+4+16]
+ mov ecx, [esp+12+16]
+ lea edx, [edx+ebp-2]
+ lea ecx, [ecx+ebp-2]
+ neg ebp
+ mov [esp+20+16], ebp
+
+.yloop:
+ mov ebp, [esp+20+16]
+ add ebp, 2
+ jbe .odd
+
+.xloop:
+ mov eax, [ecx+ebp]
+ mov ebx, 0ffc0ffc0h
+
+ and ebx, eax
+ and eax, 0001f001fh
+
+ shr ebx, 1
+
+ add eax, ebx
+
+ mov [edx+ebp], eax
+ add ebp, 4
+
+ jnc .xloop
+ jnz .noodd
+.odd:
+ movzx eax, word [ecx]
+ mov ebx, 0ffc0ffc0h
+ and ebx, eax
+ and eax, 0001f001fh
+ shr ebx, 1
+ add eax, ebx
+ mov [edx], ax
+.noodd:
+ add ecx, [esp+16+16]
+ add edx, [esp+8+16]
+ dec edi
+ jne .yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+
+ global _vdasm_pixblt_RGB888_to_XRGB1555
+_vdasm_pixblt_RGB888_to_XRGB1555:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov esi,[esp+12+16]
+ mov edi,[esp+4+16]
+
+ mov ebp,[esp+20+16]
+ lea eax,[ebp+ebp]
+ lea ebx,[ebp+eax]
+ sub [esp+8+16],eax
+ sub [esp+16+16],ebx
+
+ mov edx,[esp+24+16]
+.yloop:
+ mov ebp,[esp+20+16]
+ push ebp
+ push edx
+ shr ebp,1
+ jz .checkodd
+.xloop:
+ mov eax,[esi+2] ;u
+ add esi,6 ;v
+
+ mov ebx,eax ;u
+ mov ecx,eax ;v
+ shr ebx,11 ;u
+ and ecx,00f80000h ;v
+ shr eax,17 ;u
+ and ebx,0000001fh ;v
+ shr ecx,14 ;u
+ and eax,00007c00h ;v
+ or ebx,ecx ;u
+ add edi,4 ;v
+ or ebx,eax ;u
+
+ mov ecx,[esi-6] ;v
+ mov edx,ebx ;u
+ mov eax,ecx ;v
+
+ shl edx,16 ;u
+ mov ebx,ecx ;v
+ shr ebx,3 ;u
+ and ecx,0000f800h ;v
+ shr eax,9 ;u
+ and ebx,0000001fh ;v
+ shr ecx,6 ;u
+ and eax,00007c00h ;v
+ or eax,ecx ;u
+ or edx,ebx ;v
+ or edx,eax ;u
+ sub ebp,1 ;v
+ mov [edi-4],edx ;u
+ jne .xloop ;v
+.checkodd:
+ pop edx
+ pop ebp
+ and ebp,1
+ jz .noodd
+ movzx eax,word [esi]
+ movzx ebx,byte [esi+2]
+ shl ebx,16
+ add esi,3
+ add eax,ebx
+
+ mov ebx,eax
+ mov ecx,eax
+ shr ebx,3
+ and ecx,0000f800h
+ shr eax,9
+ and ebx,0000001fh
+ shr ecx,6
+ and eax,00007c00h
+ or ebx,ecx
+ or ebx,eax
+ mov [edi+0],bl
+ mov [edi+1],bh
+ add edi,2
+.noodd:
+
+ add esi,[esp+16+16]
+ add edi,[esp+ 8+16]
+
+ sub edx,1
+ jne .yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+
+ ret
+
+ global _vdasm_pixblt_XRGB8888_to_XRGB1555
+_vdasm_pixblt_XRGB8888_to_XRGB1555:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov ebp, [esp+20+16]
+ mov edx, [esp+4+16]
+ add ebp, ebp
+ mov ecx, [esp+12+16]
+ lea edx, [edx+ebp-2]
+ lea ecx, [ecx+ebp*2-4]
+ neg ebp
+ mov [esp+20+16], ebp
+
+.yloop:
+ mov ebp, [esp+20+16]
+ add ebp, 2
+ jbe .odd
+
+.xloop:
+ mov eax, [ecx+ebp*2]
+ mov ebx, 00f80000h
+ and ebx, eax
+ mov esi, eax
+ shr ebx, 9
+ and esi, 0000f800h
+ shr esi, 6
+ and eax, 000000f8h
+ shr eax, 3
+ add ebx, esi
+ mov esi, [ecx+ebp*2+4]
+ add eax, ebx
+ mov ebx, esi
+ and esi, 00f80000h
+ shl esi, 7
+ mov edi, ebx
+ and edi, 0000f800h
+ add eax, esi
+ shl edi, 10
+ and ebx, 000000f8h
+ shl ebx, 13
+ add eax, edi
+ add eax, ebx
+ mov [edx+ebp], eax
+ add ebp, 4
+ jnc .xloop
+ jnz .noodd
+.odd:
+ mov eax, [ecx]
+ mov ebx, 00f80000h
+ and ebx, eax
+ mov esi, eax
+ shr ebx, 9
+ and esi, 0000f800h
+ shr esi, 6
+ and eax, 000000f8h
+ shr eax, 3
+ add ebx, esi
+ add eax, ebx
+ mov [edx], ax
+.noodd:
+ add ecx, [esp+16+16]
+ add edx, [esp+8+16]
+ dec dword [esp+24+16]
+ jne .yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ global _vdasm_pixblt_XRGB1555_to_RGB565
+_vdasm_pixblt_XRGB1555_to_RGB565:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov ebp, [esp+20+16]
+ mov edi, [esp+24+16]
+ add ebp, ebp
+ mov edx, [esp+4+16]
+ mov ecx, [esp+12+16]
+ lea edx, [edx+ebp-2]
+ lea ecx, [ecx+ebp-2]
+ neg ebp
+ mov [esp+20+16], ebp
+
+.yloop:
+ mov ebp, [esp+20+16]
+ add ebp, 2
+ jbe .odd
+
+.xloop:
+ mov eax, [ecx+ebp]
+ mov ebx, 02000200h
+
+ mov esi, eax
+ and ebx, eax
+
+ shr ebx, 4
+ and esi, 0ffe0ffe0h
+
+ add eax, esi
+
+ add eax, ebx
+
+ mov [edx+ebp], eax
+ add ebp, 4
+
+ jnc .xloop
+ jnz .noodd
+.odd:
+ movzx eax, word [ecx]
+ mov ebx, 02000200h
+ mov esi, eax
+ and ebx, eax
+ shr ebx, 4
+ and esi, 0ffe0ffe0h
+ add eax, esi
+ add eax, ebx
+ mov [edx], ax
+.noodd:
+ add ecx, [esp+16+16]
+ add edx, [esp+8+16]
+ dec edi
+ jne .yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ global _vdasm_pixblt_RGB888_to_RGB565
+_vdasm_pixblt_RGB888_to_RGB565:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov esi,[esp+12+16]
+ mov edi,[esp+4+16]
+
+ mov ebp,[esp+20+16]
+ lea eax,[ebp+ebp]
+ lea ebx,[ebp+eax]
+ sub [esp+8+16],eax
+ sub [esp+16+16],ebx
+
+ mov edx,[esp+24+16]
+.yloop:
+ mov ebp,[esp+20+16]
+ push ebp
+ push edx
+ shr ebp,1
+ jz .checkodd
+.xloop:
+ mov eax,[esi+2] ;u
+ add esi,6 ;v
+
+ mov ebx,eax ;u
+ mov ecx,eax ;v
+ shr ebx,11 ;u
+ and ecx,00fc0000h ;v
+ shr eax,16 ;u
+ and ebx,0000001fh ;v
+ shr ecx,13 ;u
+ and eax,0000f800h ;v
+ or ebx,ecx ;u
+ add edi,4 ;v
+ or ebx,eax ;u
+
+ mov ecx,[esi-6] ;v
+ mov edx,ebx ;u
+ mov eax,ecx ;v
+
+ shl edx,16 ;u
+ mov ebx,ecx ;v
+ shr ebx,3 ;u
+ and ecx,0000fc00h ;v
+ shr eax,8 ;u
+ and ebx,0000001fh ;v
+ shr ecx,5 ;u
+ and eax,0000f800h ;v
+ or eax,ecx ;u
+ or edx,ebx ;v
+ or edx,eax ;u
+ sub ebp,1 ;v
+ mov [edi-4],edx ;u
+ jne .xloop ;v
+.checkodd:
+ pop edx
+ pop ebp
+ and ebp,1
+ jz .noodd
+ movzx eax,word [esi]
+ movzx ebx,byte [esi+2]
+ shl ebx,16
+ add esi,3
+ add eax,ebx
+
+ mov ebx,eax
+ mov ecx,eax
+ shr ebx,3
+ and ecx,0000fc00h
+ shr eax,8
+ and ebx,0000001fh
+ shr ecx,5
+ and eax,0000f800h
+ or ebx,ecx
+ or ebx,eax
+ mov [edi+0],bl
+ mov [edi+1],bh
+ add edi,2
+.noodd:
+
+ add esi,[esp+16+16]
+ add edi,[esp+ 8+16]
+
+ sub edx,1
+ jne .yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+
+ ret
+
+ global _vdasm_pixblt_XRGB8888_to_RGB565
+_vdasm_pixblt_XRGB8888_to_RGB565:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov ebp, [esp+20+16]
+ mov edx, [esp+4+16]
+ add ebp, ebp
+ mov ecx, [esp+12+16]
+ lea edx, [edx+ebp-2]
+ lea ecx, [ecx+ebp*2-4]
+ neg ebp
+ mov [esp+20+16], ebp
+
+.yloop:
+ mov ebp, [esp+20+16]
+ add ebp, 2
+ jbe .odd
+
+.xloop:
+ mov eax, [ecx+ebp*2]
+ mov ebx, 00f80000h
+ and ebx, eax
+ mov esi, eax
+ shr ebx, 8
+ and esi, 0000fc00h
+ shr esi, 5
+ and eax, 000000f8h
+ shr eax, 3
+ add ebx, esi
+ mov esi, [ecx+ebp*2+4]
+ add eax, ebx
+ mov ebx, esi
+ and esi, 00f80000h
+ shl esi, 8
+ mov edi, ebx
+ and edi, 0000fc00h
+ add eax, esi
+ shl edi, 11
+ and ebx, 000000f8h
+ shl ebx, 13
+ add eax, edi
+ add eax, ebx
+ mov [edx+ebp], eax
+ add ebp, 4
+ jnc .xloop
+ jnz .noodd
+.odd:
+ mov eax, [ecx]
+ mov ebx, 00f80000h
+ and ebx, eax
+ mov esi, eax
+ shr ebx, 8
+ and esi, 0000fc00h
+ shr esi, 5
+ and eax, 000000f8h
+ shr eax, 3
+ add ebx, esi
+ add eax, ebx
+ mov [edx], ax
+.noodd:
+ add ecx, [esp+16+16]
+ add edx, [esp+8+16]
+ dec dword [esp+24+16]
+ jne .yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+
+ global _vdasm_pixblt_XRGB8888_to_RGB888
+_vdasm_pixblt_XRGB8888_to_RGB888:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov esi,[esp+12+16]
+ mov edi,[esp+4+16]
+
+ mov ecx,[esp+20+16]
+ lea eax,[ecx+ecx*2]
+ lea ebx,[ecx*4]
+ sub [esp+8+16],eax
+ sub [esp+16+16],ebx
+
+ mov edx,[esp+24+16]
+.yloop:
+ mov ecx,[esp+20+16]
+ push ecx
+ push edx
+ shr ecx,2
+ jz .checkodd
+.xloop:
+ mov eax,[esi] ;EAX = xxr0g0b0
+ mov ebx,[esi+4] ;EBX = xxr1g1b1
+ mov edx,ebx ;EDX = xxr1g1b1
+ mov ebp,[esi+8] ;EBP = xxr2g2b2
+ shl ebx,24 ;EBX = b1000000
+ and eax,00ffffffh ;EAX = 00r0g0b0
+ shr edx,8 ;EDX = 00xxr1g1
+ or eax,ebx ;EAX = b1r0g0b0
+ mov [edi],eax
+ mov ebx,ebp ;EBX = xxr2g2b2
+ shl ebp,16 ;EBP = g2b20000
+ and edx,0000ffffh ;EDX = 0000r1g1
+ or ebp,edx ;EBP = g2b2r1g1
+ mov eax,[esi+12] ;EAX = xxr3g3b3
+ shr ebx,16 ;EBX = 0000xxr2
+ add edi,12
+ shl eax,8 ;EAX = r3g3b300
+ and ebx,000000ffh ;EBX = 000000r2
+ or eax,ebx ;EAX = r3g3b3r2
+ mov [edi+4-12],ebp
+ add esi,16
+ mov [edi+8-12],eax
+ sub ecx,1
+ jne .xloop
+.checkodd:
+ pop edx
+ pop ecx
+ and ecx,3
+ jz .noodd
+.oddloop:
+ mov eax,[esi]
+ add esi,4
+ mov [edi],ax
+ shr eax,16
+ mov [edi+2],al
+ add edi,3
+ sub ecx,1
+ jnz .oddloop
+.noodd:
+ add esi,[esp+16+16]
+ add edi,[esp+ 8+16]
+
+ sub edx,1
+ jne .yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ global _vdasm_pixblt_XRGB1555_to_XRGB8888
+_vdasm_pixblt_XRGB1555_to_XRGB8888:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov ebp, [esp+20+16]
+ add ebp, ebp
+ mov edx, [esp+4+16]
+ mov ecx, [esp+12+16]
+ lea edx, [edx+ebp*2-4]
+ lea ecx, [ecx+ebp-2]
+ neg ebp
+ mov [esp+20+16], ebp
+
+.yloop:
+ mov ebp, [esp+20+16]
+ add ebp, 2
+ jbe .odd
+
+.xloop:
+ mov eax, [ecx+ebp]
+ mov ebx, 00007c00h
+ and ebx, eax
+ mov esi, eax
+ shl ebx, 9
+ and esi, 000003e0h
+ shl esi, 6
+ mov edi, eax
+ and eax, 0000001fh
+ add ebx, esi
+ shl eax, 3
+ mov esi, edi
+ shr edi, 7
+ add eax, ebx
+ and edi, 00f80000h
+ mov ebx, esi
+ shr esi, 13
+ and ebx, 03e00000h
+ shr ebx, 10
+ and esi, 000000f8h
+ add ebx, edi
+ add ebx, esi
+ mov edi, eax
+ and eax, 00e0e0e0h
+ shr eax, 5
+ mov esi, ebx
+ shr ebx, 5
+ add eax, edi
+ and ebx, 00070707h
+ add ebx, esi
+ mov [edx+ebp*2], eax
+ mov [edx+ebp*2+4], ebx
+ add ebp, 4
+ jnc .xloop
+ jnz .noodd
+.odd:
+ movzx eax, word [ecx]
+ mov ebx, 00007c00h
+ and ebx, eax
+ mov esi, eax
+ shl ebx, 9
+ and esi, 000003e0h
+ shl esi, 6
+ and eax, 0000001fh
+ shl eax, 3
+ add ebx, esi
+ add eax, ebx
+ mov ebx, 00e0e0e0h
+ and ebx, eax
+ shr ebx, 5
+ add eax, ebx
+ mov [edx], eax
+.noodd:
+ add ecx, [esp+16+16]
+ add edx, [esp+8+16]
+ dec dword [esp+24+16]
+ jne .yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+
+ global _vdasm_pixblt_RGB565_to_XRGB8888
+_vdasm_pixblt_RGB565_to_XRGB8888:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov ebp, [esp+20+16]
+ add ebp, ebp
+ mov edx, [esp+4+16]
+ mov ecx, [esp+12+16]
+ lea edx, [edx+ebp*2-4]
+ lea ecx, [ecx+ebp-2]
+ neg ebp
+ mov [esp+20+16], ebp
+
+.yloop:
+ mov ebp, [esp+20+16]
+ add ebp, 2
+ jbe .odd
+
+.xloop:
+ movzx eax, word [ecx+ebp]
+ mov ebx, 0000f800h
+ and ebx, eax
+ mov esi, eax
+ shl ebx, 8
+ mov edi, eax
+ shl eax, 3
+ and esi, 000007e0h
+ and eax, 000000f8h
+ add ebx, eax
+ shl esi, 5
+ mov eax, ebx
+ shr ebx, 5
+ and edi, 00000600h
+ shr edi, 1
+ and ebx, 00070007h
+ add esi, edi
+ add eax, ebx
+ add eax, esi
+ mov [edx+ebp*2], eax
+
+ movzx eax, word [ecx+ebp+2]
+ mov ebx, 0000f800h
+ and ebx, eax
+ mov esi, eax
+ shl ebx, 8
+ mov edi, eax
+ shl eax, 3
+ and esi, 000007e0h
+ and eax, 000000f8h
+ add ebx, eax
+ shl esi, 5
+ mov eax, ebx
+ shr ebx, 5
+ and edi, 00000600h
+ shr edi, 1
+ and ebx, 00070007h
+ add esi, edi
+ add eax, ebx
+ add eax, esi
+ mov [edx+ebp*2+4], eax
+
+ add ebp, 4
+
+ jnc .xloop
+ jnz .noodd
+.odd:
+ movzx eax, word [ecx]
+ mov ebx, 0000f800h
+ and ebx, eax
+ mov esi, eax
+ shl ebx, 8
+ mov edi, eax
+ shl eax, 3
+ and esi, 000007e0h
+ and eax, 000000f8h
+ add ebx, eax
+ shl esi, 5
+ mov eax, ebx
+ shr ebx, 5
+ and edi, 00000600h
+ shr edi, 1
+ and ebx, 00070007h
+ add esi, edi
+ add eax, ebx
+ add eax, esi
+ mov [edx], eax
+.noodd:
+ add ecx, [esp+16+16]
+ add edx, [esp+8+16]
+ dec dword [esp+24+16]
+ jne .yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+
+ global _vdasm_pixblt_RGB888_to_XRGB8888
+_vdasm_pixblt_RGB888_to_XRGB8888:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov esi,[esp+12+16]
+ mov edi,[esp+4+16]
+
+ mov ecx,[esp+20+16]
+ lea eax,[ecx+ecx*2]
+ lea ebx,[ecx*4]
+ sub [esp+8+16],ebx
+ sub [esp+16+16],eax
+
+ mov edx,[esp+24+16]
+.yloop:
+ mov ebp,[esp+20+16]
+ shr ebp,2
+ push edx
+ jz .checkodd
+.xloop:
+ mov eax,[esi] ;EAX: b1r0g0b0
+ mov ebx,[esi+4] ;EBX: g2b2r1g1
+
+ mov [edi],eax
+ mov ecx,ebx ;ECX: g2b2r1g1
+
+ shr eax,24 ;EAX: ------b1
+ mov edx,[esi+8] ;EDX: r3g3b3r2
+
+ shr ecx,16 ;ECX: ----g2b2
+ add edi,16
+
+ shl ebx,8 ;EBX: b2r1g1--
+ add esi,12
+
+ or eax,ebx ;EAX: b2r1g1b1
+ mov ebx,edx ;EBX: r3g3b3r2
+
+ shr ebx,8 ;EBX: --r3g3b3
+ mov [edi+4-16],eax
+
+ shl edx,16 ;EDX: b3r2----
+ mov [edi+12-16],ebx
+
+ or edx,ecx ;EDX: b3r2g2b2
+ sub ebp,1
+
+ mov [edi+8-16],edx
+ jne .xloop
+
+.checkodd:
+ pop edx
+ mov ebx,[esp+20+16]
+ and ebx,3
+ jz .noodd
+.oddloop:
+ mov ax,[esi]
+ mov cl,[esi+2]
+ mov [edi],ax
+ mov [edi+2],cl
+ add esi,3
+ add edi,4
+ sub ebx,1
+ jne .oddloop
+.noodd:
+
+ add esi,[esp+16+16]
+ add edi,[esp+ 8+16]
+
+ sub edx,1
+ jne .yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+
+ ret
+
+ end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb2yuv_mmx.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb2yuv_mmx.asm
new file mode 100644
index 000000000..6a00d826f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb2yuv_mmx.asm
@@ -0,0 +1,652 @@
+ section .rdata, rdata
+
+y_co dq 0004a004a004a004ah
+cr_co_r dq 000cc00cc00cc00cch
+cb_co_b dq 00081008100810081h ;note: divided by two
+cr_co_g dq 0ff98ff98ff98ff98h
+cb_co_g dq 0ffceffceffceffceh
+y_bias dq 0fb7afb7afb7afb7ah
+c_bias dq 0ff80ff80ff80ff80h
+interp dq 06000400020000000h
+rb_mask_555 dq 07c1f7c1f7c1f7c1fh
+g_mask_555 dq 003e003e003e003e0h
+rb_mask_565 dq 0f81ff81ff81ff81fh
+g_mask_565 dq 007e007e007e007e0h
+
+cr_coeff dq 000003313e5fc0000h
+cb_coeff dq 000000000f377408dh
+rgb_bias dq 000007f2180887eebh
+
+msb_inv dq 08000800080008000h
+
+ section .text
+
+;============================================================================
+
+%macro YUV411PLANAR_TO_RGB_PROLOG 0
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov eax, [esp+4+16]
+ mov ecx, [esp+8+16]
+ mov edx, [esp+12+16]
+ mov ebx, [esp+16+16]
+ mov ebp, [esp+20+16]
+
+ pxor mm7, mm7
+%endmacro
+
+%macro YUV411PLANAR_TO_RGB_CORE_MMX 0
+ movd mm0, dword [ecx] ;mm0 = Y3Y2Y1Y0
+ add ecx, 4
+ punpcklbw mm0, mm7 ;mm0 = Y3 | Y2 | Y1 | Y0
+ movq mm1, mm0
+ pmullw mm0, [y_co]
+ paddw mm1, [y_bias]
+ paddsw mm0, mm0
+ paddsw mm0, mm1
+
+ movzx esi, word [ebx]
+ movzx edi, word [edx]
+ add ebx, 1
+ add edx, 1
+
+ movd mm1, esi
+ movd mm2, edi
+
+ punpcklbw mm1, mm7
+ paddw mm1, [c_bias]
+ punpcklwd mm1, mm1
+ movq mm3, mm1
+ punpckldq mm1, mm1
+ punpckhdq mm3, mm3
+
+ punpcklbw mm2, mm7
+ paddw mm2, [c_bias]
+ punpcklwd mm2, mm2
+ movq mm4, mm2
+ punpckldq mm2, mm2
+ punpckhdq mm4, mm4
+
+ psubw mm3, mm1
+ psubw mm4, mm2
+ paddw mm3, mm3
+ paddw mm4, mm4
+
+ pmulhw mm3, [interp]
+ pmulhw mm4, [interp]
+
+ paddw mm1, mm3
+ paddw mm2, mm4
+
+ movq mm3, mm1
+ movq mm4, mm2
+
+ pmullw mm1, [cr_co_r]
+ pmullw mm2, [cb_co_b]
+ pmullw mm3, [cr_co_g]
+ pmullw mm4, [cb_co_g]
+
+ paddsw mm2, mm2
+ paddsw mm1, mm0
+ paddsw mm3, mm4
+ paddsw mm2, mm0
+ paddsw mm3, mm0
+
+ psraw mm1, 7
+ psraw mm2, 7
+ psraw mm3, 7
+
+ packuswb mm1, mm1
+ packuswb mm2, mm2
+ packuswb mm3, mm3
+%endmacro
+
+%macro YUV411PLANAR_TO_RGB_CORE_ISSE 0
+ movd mm0, dword [ecx] ;mm0 = Y3Y2Y1Y0
+ add ecx, 4
+ punpcklbw mm0, mm7 ;mm0 = Y3 | Y2 | Y1 | Y0
+ movq mm1, mm0
+ pmullw mm0, [y_co]
+ paddw mm1, [y_bias]
+ paddsw mm0, mm0
+ paddsw mm0, mm1
+
+ movzx esi, word [ebx]
+ movzx edi, word [edx]
+ add ebx, 1
+ add edx, 1
+
+ movd mm1, esi
+ movd mm2, edi
+
+ punpcklbw mm1, mm7
+ paddw mm1, [c_bias]
+ pshufw mm3, mm1, 01010101b
+ pshufw mm1, mm1, 00000000b
+
+ punpcklbw mm2, mm7
+ paddw mm2, [c_bias]
+ pshufw mm4, mm2, 01010101b
+ pshufw mm2, mm2, 00000000b
+
+ psubw mm3, mm1
+ psubw mm4, mm2
+ paddw mm3, mm3
+ paddw mm4, mm4
+
+ pmulhw mm3, [interp]
+ pmulhw mm4, [interp]
+
+ paddw mm1, mm3
+ paddw mm2, mm4
+
+ psllw mm1, 3
+ psllw mm2, 3
+
+ movq mm3, [cr_co_g]
+ movq mm4, [cb_co_g]
+
+ pmullw mm3, mm1
+ pmullw mm4, mm2
+ pmullw mm1, [cr_co_r]
+ pmullw mm2, [cb_co_b]
+
+ paddsw mm2, mm2
+ paddsw mm1, mm0
+ paddsw mm3, mm4
+ paddsw mm2, mm0
+ paddsw mm3, mm0
+
+ psraw mm1, 7
+ psraw mm2, 7
+ psraw mm3, 7
+
+ packuswb mm1, mm1
+ packuswb mm2, mm2
+ packuswb mm3, mm3
+%endmacro
+
+%macro YUV411PLANAR_TO_RGB_EPILOG 0
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+%endmacro
+
+ global _vdasm_pixblt_YUV411Planar_to_XRGB1555_scan_MMX
+_vdasm_pixblt_YUV411Planar_to_XRGB1555_scan_MMX:
+ YUV411PLANAR_TO_RGB_PROLOG
+.xloop:
+ YUV411PLANAR_TO_RGB_CORE_MMX
+
+ psrlw mm1, 1
+ psrlw mm2, 3
+ punpcklbw mm2, mm1
+ punpcklbw mm3, mm3
+ psllw mm3, 2
+ pand mm2, [rb_mask_555]
+ pand mm3, [g_mask_555]
+ por mm2, mm3
+
+ movq [eax], mm2
+ add eax, 8
+
+ sub ebp, 1
+ jne .xloop
+
+ YUV411PLANAR_TO_RGB_EPILOG
+
+;============================================================================
+
+ global _vdasm_pixblt_YUV411Planar_to_RGB565_scan_MMX
+_vdasm_pixblt_YUV411Planar_to_RGB565_scan_MMX:
+ YUV411PLANAR_TO_RGB_PROLOG
+.xloop:
+ YUV411PLANAR_TO_RGB_CORE_MMX
+
+ psrlw mm2, 3
+ punpcklbw mm2, mm1
+ punpcklbw mm3, mm3
+ psllw mm3, 3
+ pand mm2, [rb_mask_565]
+ pand mm3, [g_mask_565]
+ por mm2, mm3
+
+ movq [eax], mm2
+ add eax, 8
+
+ sub ebp, 1
+ jne .xloop
+
+ YUV411PLANAR_TO_RGB_EPILOG
+
+;============================================================================
+
+ global _vdasm_pixblt_YUV411Planar_to_XRGB8888_scan_MMX
+_vdasm_pixblt_YUV411Planar_to_XRGB8888_scan_MMX:
+ YUV411PLANAR_TO_RGB_PROLOG
+.xloop:
+ YUV411PLANAR_TO_RGB_PROLOG
+
+ punpcklbw mm2, mm1
+ punpcklbw mm3, mm3
+ movq mm1, mm2
+ punpcklbw mm1, mm3
+ punpckhbw mm2, mm3
+
+ movq [eax], mm1
+ movq [eax+8], mm2
+ add eax, 16
+
+ sub ebp, 1
+ jne .xloop
+
+ YUV411PLANAR_TO_RGB_EPILOG
+
+;============================================================================
+
+ global _vdasm_pixblt_YUV411Planar_to_XRGB1555_scan_ISSE
+_vdasm_pixblt_YUV411Planar_to_XRGB1555_scan_ISSE:
+ YUV411PLANAR_TO_RGB_PROLOG
+.xloop:
+ YUV411PLANAR_TO_RGB_CORE_ISSE
+
+ psrlw mm1, 1
+ psrlw mm2, 3
+ punpcklbw mm2, mm1
+ punpcklbw mm3, mm3
+ psllw mm3, 2
+ pand mm2, [rb_mask_555]
+ pand mm3, [g_mask_555]
+ por mm2, mm3
+
+ movq [eax], mm2
+ add eax, 8
+
+ sub ebp, 1
+ jne .xloop
+
+ YUV411PLANAR_TO_RGB_EPILOG
+
+;============================================================================
+
+ global _vdasm_pixblt_YUV411Planar_to_RGB565_scan_ISSE
+_vdasm_pixblt_YUV411Planar_to_RGB565_scan_ISSE:
+ YUV411PLANAR_TO_RGB_PROLOG
+.xloop:
+ YUV411PLANAR_TO_RGB_CORE_ISSE
+
+ psrlw mm2, 3
+ punpcklbw mm2, mm1
+ punpcklbw mm3, mm3
+ psllw mm3, 3
+ pand mm2, [rb_mask_565]
+ pand mm3, [g_mask_565]
+ por mm2, mm3
+
+ movq [eax], mm2
+ add eax, 8
+
+ sub ebp, 1
+ jne .xloop
+
+ YUV411PLANAR_TO_RGB_EPILOG
+
+;============================================================================
+
+ global _vdasm_pixblt_YUV411Planar_to_XRGB8888_scan_ISSE
+_vdasm_pixblt_YUV411Planar_to_XRGB8888_scan_ISSE:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov eax, [esp+4+16]
+ mov ecx, [esp+8+16]
+ mov edx, [esp+12+16]
+ mov ebx, [esp+16+16]
+ mov ebp, [esp+20+16]
+
+ pxor mm7, mm7
+
+ movzx esi, byte [ebx]
+ movzx edi, byte [edx]
+ add ebx, 1
+ add edx, 1
+
+ movd mm1, esi
+ movd mm2, edi
+
+ psllw mm1, 3
+ psllw mm2, 3
+
+ pshufw mm5, mm1, 0
+ pshufw mm6, mm2, 0
+
+ pmulhw mm5, [cr_coeff]
+ pmulhw mm6, [cb_coeff]
+ paddw mm6, mm5
+ paddw mm6, [rgb_bias]
+
+.xloop:
+ movd mm0, dword [ecx];mm0 = Y3Y2Y1Y0
+ add ecx, 4
+ punpcklbw mm0, mm7 ;mm0 = Y3 | Y2 | Y1 | Y0
+ psllw mm0, 3
+ pmulhw mm0, [y_co]
+ pxor mm0, [msb_inv]
+
+ movzx esi, byte [ebx]
+ movzx edi, byte [edx]
+ add ebx, 1
+ add edx, 1
+
+ movd mm1, esi
+ movd mm2, edi
+
+ psllw mm1, 3
+ psllw mm2, 3
+
+ pshufw mm1, mm1, 0
+ pshufw mm2, mm2, 0
+
+ pmulhw mm1, [cr_coeff]
+ pmulhw mm2, [cb_coeff]
+ paddw mm1, mm2
+ paddw mm1, [rgb_bias]
+
+ movq mm2, mm1
+ pavgw mm2, mm6 ;mm2 = 1/2
+ pshufw mm3, mm0, 00000000b
+ paddw mm3, mm6
+ pavgw mm6, mm2 ;mm1 = 1/4
+ pshufw mm4, mm0, 01010101b
+ paddw mm4, mm6
+ packuswb mm3, mm4
+ movq [eax], mm3
+
+ pshufw mm3, mm0, 10101010b
+ paddw mm3, mm2
+ pshufw mm0, mm0, 11111111b
+ pavgw mm2, mm1 ;mm2 = 3/4
+ paddw mm2, mm0
+ packuswb mm3, mm2
+ movq [eax+8], mm3
+
+ movq mm6, mm1
+
+ add eax, 16
+
+ sub ebp, 1
+ jne .xloop
+
+ YUV411PLANAR_TO_RGB_EPILOG
+
+;==========================================================================
+
+%macro YUV444PLANAR_TO_RGB_PROLOG 0
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov eax, [esp+4+16]
+ mov ecx, [esp+8+16]
+ mov edx, [esp+12+16]
+ mov ebx, [esp+16+16]
+ mov ebp, [esp+20+16]
+%endmacro
+
+%macro YUV444PLANAR_TO_RGB_CORE 0
+ movq mm3, mm0
+ pmullw mm0, [y_co]
+ paddw mm1, [c_bias]
+ paddw mm2, [c_bias]
+ paddw mm0, [y_bias]
+ paddsw mm0, mm0
+ paddsw mm0, mm3
+
+ movq mm3, [cr_co_g]
+ movq mm4, [cb_co_g]
+
+ pmullw mm3, mm1
+ pmullw mm4, mm2
+ pmullw mm1, [cr_co_r]
+ pmullw mm2, [cb_co_b]
+
+ paddsw mm2, mm2
+ paddsw mm1, mm0
+ paddsw mm3, mm4
+ paddsw mm2, mm0
+ paddsw mm3, mm0
+
+ psraw mm1, 7
+ psraw mm2, 7
+ psraw mm3, 7
+
+ packuswb mm1, mm1
+ packuswb mm2, mm2
+ packuswb mm3, mm3
+%endmacro
+
+%macro YUV444PLANAR_TO_RGB_EPILOG 0
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+%endmacro
+
+;==========================================================================
+
+ global _vdasm_pixblt_YUV444Planar_to_XRGB1555_scan_MMX
+_vdasm_pixblt_YUV444Planar_to_XRGB1555_scan_MMX:
+ YUV444PLANAR_TO_RGB_PROLOG
+
+ pxor mm7, mm7
+ movq mm5, [rb_mask_555]
+ movq mm6, [g_mask_555]
+
+ sub ebp, 3
+ jbe .oddcheck
+.xloop4:
+ movd mm0, dword [ecx];mm0 = Y3Y2Y1Y0
+ movd mm1, dword [ebx]
+ movd mm2, dword [edx]
+ add ecx, 4
+ add ebx, 4
+ add edx, 4
+ punpcklbw mm0, mm7 ;mm0 = Y3 | Y2 | Y1 | Y0
+ punpcklbw mm1, mm7
+ punpcklbw mm2, mm7
+
+ YUV444PLANAR_TO_RGB_CORE
+
+ psrlw mm1, 1
+ psrlw mm2, 3
+ punpcklbw mm2, mm1
+ punpcklbw mm3, mm3
+ psllw mm3, 2
+ pand mm2, mm5
+ pand mm3, mm6
+ por mm2, mm3
+
+ movq [eax], mm2
+ add eax, 8
+
+ sub ebp, 4
+ ja .xloop4
+.oddcheck:
+ add ebp, 3
+ jz .noodd
+.xloop:
+ movzx edi, byte [ecx] ;mm0 = Y3Y2Y1Y0
+ movd mm0, edi
+ movzx edi, byte [ebx]
+ movd mm1, edi
+ movzx edi, byte [edx]
+ movd mm2, edi
+ add ecx, 1
+ add ebx, 1
+ add edx, 1
+
+ YUV444PLANAR_TO_RGB_CORE
+
+ psrlw mm1, 1
+ psrlw mm2, 3
+ punpcklbw mm2, mm1
+ punpcklbw mm3, mm3
+ psllw mm3, 2
+ pand mm2, mm5
+ pand mm3, mm6
+ por mm2, mm3
+
+ movd edi, mm2
+ mov [eax], di
+ add eax, 2
+
+ sub ebp, 1
+ jnz .xloop
+.noodd:
+ YUV444PLANAR_TO_RGB_EPILOG
+
+;==========================================================================
+
+ global _vdasm_pixblt_YUV444Planar_to_RGB565_scan_MMX
+_vdasm_pixblt_YUV444Planar_to_RGB565_scan_MMX:
+ YUV444PLANAR_TO_RGB_PROLOG
+
+ pxor mm7, mm7
+ movq mm5, [rb_mask_565]
+ movq mm6, [g_mask_565]
+
+ sub ebp, 3
+ jbe .oddcheck
+.xloop4:
+ movd mm0, dword [ecx];mm0 = Y3Y2Y1Y0
+ movd mm1, dword [ebx]
+ movd mm2, dword [edx]
+ add ecx, 4
+ add ebx, 4
+ add edx, 4
+ punpcklbw mm0, mm7 ;mm0 = Y3 | Y2 | Y1 | Y0
+ punpcklbw mm1, mm7
+ punpcklbw mm2, mm7
+
+ YUV444PLANAR_TO_RGB_CORE
+
+ psrlw mm2, 3
+ punpcklbw mm2, mm1
+ punpcklbw mm3, mm3
+ psllw mm3, 3
+ pand mm2, mm5
+ pand mm3, mm6
+ por mm2, mm3
+
+ movq [eax], mm2
+ add eax, 8
+
+ sub ebp, 4
+ ja .xloop4
+.oddcheck:
+ add ebp, 3
+ jz .noodd
+.xloop:
+ movzx edi, byte [ecx] ;mm0 = Y3Y2Y1Y0
+ movd mm0, edi
+ movzx edi, byte [ebx]
+ movd mm1, edi
+ movzx edi, byte [edx]
+ movd mm2, edi
+ add ecx, 1
+ add ebx, 1
+ add edx, 1
+
+ YUV444PLANAR_TO_RGB_CORE
+
+ psrlw mm2, 3
+ punpcklbw mm2, mm1
+ punpcklbw mm3, mm3
+ psllw mm3, 3
+ pand mm2, mm5
+ pand mm3, mm6
+ por mm2, mm3
+
+ movd edi, mm2
+ mov [eax], di
+ add eax, 2
+
+ sub ebp, 1
+ jnz .xloop
+.noodd:
+ YUV444PLANAR_TO_RGB_EPILOG
+
+;==========================================================================
+
+ global _vdasm_pixblt_YUV444Planar_to_XRGB8888_scan_MMX
+_vdasm_pixblt_YUV444Planar_to_XRGB8888_scan_MMX:
+ YUV444PLANAR_TO_RGB_PROLOG
+
+ pxor mm7, mm7
+
+ sub ebp, 3
+ jbe .oddcheck
+.xloop4:
+ movd mm0, dword [ecx];mm0 = Y3Y2Y1Y0
+ movd mm1, dword [ebx]
+ movd mm2, dword [edx]
+ add ecx, 4
+ add ebx, 4
+ add edx, 4
+ punpcklbw mm0, mm7 ;mm0 = Y3 | Y2 | Y1 | Y0
+ punpcklbw mm1, mm7
+ punpcklbw mm2, mm7
+
+ YUV444PLANAR_TO_RGB_CORE
+
+ punpcklbw mm2, mm1
+ punpcklbw mm3, mm3
+ movq mm1, mm2
+ punpcklbw mm1, mm3
+ punpckhbw mm2, mm3
+
+ movq [eax], mm1
+ movq [eax+8], mm2
+ add eax, 16
+
+ sub ebp, 4
+ ja .xloop4
+.oddcheck:
+ add ebp, 3
+ jz .noodd
+.xloop:
+ movzx edi, byte [ecx] ;mm0 = Y3Y2Y1Y0
+ movd mm0, edi
+ movzx edi, byte [ebx]
+ movd mm1, edi
+ movzx edi, byte [edx]
+ movd mm2, edi
+ add ecx, 1
+ add ebx, 1
+ add edx, 1
+ punpcklbw mm0, mm7 ;mm0 = Y3 | Y2 | Y1 | Y0
+
+ YUV444PLANAR_TO_RGB_CORE
+
+ punpcklbw mm2, mm1
+ punpcklbw mm3, mm3
+ punpcklbw mm2, mm3
+
+ movd dword [eax], mm2
+ add eax, 4
+
+ sub ebp, 1
+ jnz .xloop
+.noodd:
+ YUV444PLANAR_TO_RGB_EPILOG
+
+ end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb_mmx.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb_mmx.asm
new file mode 100644
index 000000000..aa0b99987
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_bltrgb_mmx.asm
@@ -0,0 +1,806 @@
+ section .rdata, rdata
+
+x07b dq 00707070707070707h
+x0200w dq 00200020002000200h
+x001fw dq 0001f001f001f001fh
+xffc0w dq 0ffc0ffc0ffc0ffc0h
+xffe0w dq 0ffe0ffe0ffe0ffe0h
+x2080w dq 02080208020802080h
+x4200w dq 04200420042004200h
+rb_mask5 dq 000f800f800f800f8h
+g_mask5 dq 00000f8000000f800h
+g_mask6 dq 00000fc000000fc00h
+rb_mul_565 dq 02000000420000004h
+rb_mul_555 dq 02000000820000008h
+r_mask_555 dq 07c007c007c007c00h
+g_mask_555 dq 003e003e003e003e0h
+b_mask_555 dq 0001f001f001f001fh
+r_mask_565 dq 0f800f800f800f800h
+g_mask_565 dq 007e007e007e007e0h
+b_mask_565 dq 0001f001f001f001fh
+
+%macro prologue 1
+ push ebx
+ push esi
+ push edi
+ push ebp
+ ;.fpo (0,%1,4,4,1,0)
+%endmacro
+
+%macro epilogue 0
+ pop ebp
+ pop edi
+ pop esi
+ pop ebx
+%endmacro
+
+ section .text
+
+ global _vdasm_pixblt_RGB565_to_XRGB1555_MMX
+_vdasm_pixblt_RGB565_to_XRGB1555_MMX:
+ prologue 6
+
+ mov ebp, [esp+20+16]
+ mov edi, [esp+24+16]
+ add ebp, ebp
+ mov edx, [esp+4+16]
+ mov ecx, [esp+12+16]
+ lea edx, [edx+ebp-6]
+ lea ecx, [ecx+ebp-6]
+ neg ebp
+ mov [esp+20+16], ebp
+
+ movq mm5, [x001fw]
+ movq mm4, [xffc0w]
+
+.yloop:
+ mov ebp, [esp+20+16]
+ add ebp, 6
+ jbe .odd
+
+.xloop:
+ movq mm0, [ecx+ebp]
+ movq mm1, mm5
+ pand mm1, mm0
+ pand mm0, mm4
+ psrlq mm0, 1
+ paddw mm0, mm1
+ movq [edx+ebp], mm0
+ add ebp, 8
+ jnc .xloop
+
+ sub ebp, 6
+ jz .noodd
+.odd:
+ movzx eax, word [ecx+ebp+6]
+ mov ebx, 0001f001fh
+ and ebx, eax
+ and eax, 0ffc0ffc0h
+ shr eax, 1
+ add eax, ebx
+ mov [edx+ebp+6], ax
+ add ebp, 2
+ jnz .odd
+.noodd:
+ add ecx, [esp+16+16]
+ add edx, [esp+8+16]
+ dec edi
+ jne .yloop
+
+ emms
+ epilogue
+ ret
+
+ global _vdasm_pixblt_XRGB8888_to_XRGB1555_MMX
+_vdasm_pixblt_XRGB8888_to_XRGB1555_MMX:
+ prologue 6
+
+ mov ebp, [esp+20+16]
+ mov edi, [esp+24+16]
+ add ebp, ebp
+ mov edx, [esp+4+16]
+ mov ecx, [esp+12+16]
+ lea edx, [edx+ebp-14]
+ lea ecx, [ecx+ebp*2-28]
+ neg ebp
+ mov [esp+20+16], ebp
+
+ movq mm5,[rb_mul_555]
+ movq mm6,[rb_mask5]
+ movq mm7,[g_mask5]
+
+.yloop:
+ mov ebp, [esp+20+16]
+ add ebp, 14
+ jbe .odd
+
+ ;This code uses the "pmaddwd" trick for 32->16 conversions from Intel's MMX
+ ;Application Notes.
+
+ movq mm0,[ecx+ebp*2] ;allocate 0 (0123)
+ movq mm2,mm0 ;allocate 2 (0 23)
+
+ movq mm1,[ecx+ebp*2+8] ;allocate 1 (0123)
+ movq mm3,mm1 ;allocate 3 (0123)
+ pand mm0,mm6
+ pmaddwd mm0,mm5
+ pand mm1,mm6
+ pmaddwd mm1,mm5
+ pand mm2,mm7
+ pand mm3,mm7
+ jmp .xloopstart
+
+ align 16
+.xloop:
+ movq mm0,[ecx+ebp*2] ;allocate 0 (01234)
+ por mm4,mm2 ;free 2 (01 34)
+
+ por mm3,mm1 ;free 3 (01 34)
+ movq mm2,mm0 ;allocate 2 (0 234)
+
+ movq mm1,[ecx+ebp*2+8] ;allocate 1 (01234)
+ psrld mm4,6
+
+ psrld mm3,6
+ pand mm0,mm6
+
+ packssdw mm4,mm3 ;free 3 (012 4)
+ movq mm3,mm1 ;allocate 3 (01234)
+
+ pmaddwd mm0,mm5
+ pand mm1,mm6
+
+ pmaddwd mm1,mm5
+ pand mm2,mm7
+
+ movq [edx+ebp-8],mm4 ;free 4 (0123 )
+ pand mm3,mm7
+
+.xloopstart:
+ movq mm4,[ecx+ebp*2+16] ;allocate 4 (01234)
+ por mm0,mm2 ;free 2 (01 34)
+
+ por mm1,mm3 ;free 3 (01 4)
+ psrld mm0,6
+
+ movq mm3,[ecx+ebp*2+24] ;allocate 3 (01 34)
+ movq mm2,mm4 ;allocate 2 (01234)
+
+ psrld mm1,6
+ pand mm4,mm6
+
+ packssdw mm0,mm1 ;free 1 (0 234)
+ movq mm1,mm3 ;allocate 1 (01234)
+
+ movq [edx+ebp],mm0 ;free 0 ( 1234)
+ pand mm3,mm6
+
+ pmaddwd mm4,mm5
+ add ebp,16
+
+ pmaddwd mm3,mm5
+ pand mm2,mm7
+
+ pand mm1,mm7
+ jnc .xloop
+
+ por mm4,mm2 ;free 2 (01 34)
+ por mm3,mm1 ;free 3 (01 34)
+ psrld mm4,6
+ psrld mm3,6
+ packssdw mm4,mm3 ;free 3 (012 4)
+ movq [edx+ebp-8],mm4 ;free 4 (0123 )
+
+.odd:
+ sub ebp, 14
+ jz .noodd
+.oddloop:
+ mov eax, [ecx+ebp*2+28]
+ mov ebx, 00f80000h
+ mov esi, eax
+ and ebx, eax
+ shr ebx, 9
+ and esi, 0000f800h
+ shr esi, 6
+ and eax, 000000f8h
+ shr eax, 3
+ add esi, ebx
+ add eax, esi
+ mov [edx+ebp+14], ax
+ add ebp, 2
+ jnz .oddloop
+.noodd:
+ add ecx, [esp+16+16]
+ add edx, [esp+8+16]
+ dec edi
+ jne .yloop
+
+ emms
+ epilogue
+ ret
+
+ global _vdasm_pixblt_XRGB1555_to_RGB565_MMX
+_vdasm_pixblt_XRGB1555_to_RGB565_MMX:
+ prologue 6
+
+ mov ebp, [esp+20+16]
+ mov edi, [esp+24+16]
+ add ebp, ebp
+ mov edx, [esp+4+16]
+ mov ecx, [esp+12+16]
+ lea edx, [edx+ebp-6]
+ lea ecx, [ecx+ebp-6]
+ neg ebp
+ mov [esp+20+16], ebp
+
+ movq mm5, [x0200w]
+ movq mm4, [xffe0w]
+
+.yloop:
+ mov ebp, [esp+20+16]
+ add ebp, 6
+ jbe .odd
+
+.xloop:
+ movq mm0, [ecx+ebp]
+ movq mm1, mm4
+ movq mm2, mm0
+ pand mm1, mm0
+ pand mm0, mm5
+ paddw mm1, mm2
+ psrlq mm0, 4
+ paddw mm0, mm1
+ movq [edx+ebp], mm0
+ add ebp, 8
+ jnc .xloop
+
+.odd:
+ sub ebp, 6
+ jz .noodd
+.oddloop:
+ movzx eax, word [ecx+ebp+6]
+ mov ebx, 02000200h
+ mov esi, eax
+ and ebx, eax
+ shr ebx, 4
+ and esi, 0ffe0ffe0h
+ add eax, esi
+ add eax, ebx
+ mov [edx+ebp+6], ax
+ add ebp, 2
+ jnz .oddloop
+.noodd:
+ add ecx, [esp+16+16]
+ add edx, [esp+8+16]
+ dec edi
+ jne .yloop
+
+ emms
+ epilogue
+ ret
+
+
+ global _vdasm_pixblt_XRGB8888_to_RGB565_MMX
+_vdasm_pixblt_XRGB8888_to_RGB565_MMX:
+ prologue 6
+
+ mov ebp, [esp+20+16]
+ mov edi, [esp+24+16]
+ add ebp, ebp
+ mov edx, [esp+4+16]
+ mov ecx, [esp+12+16]
+ lea edx, [edx+ebp-14]
+ lea ecx, [ecx+ebp*2-28]
+ neg ebp
+ mov [esp+20+16], ebp
+
+ movq mm5,[rb_mul_565]
+ movq mm6,[rb_mask5]
+ movq mm7,[g_mask6]
+
+.yloop:
+ mov ebp, [esp+20+16]
+ add ebp, 14
+ jbe .odd
+
+ ;This code uses the "pmaddwd" trick for 32->16 conversions from Intel's MMX
+ ;Application Notes.
+
+ movq mm0,[ecx+ebp*2] ;allocate 0 (0123)
+ movq mm2,mm0 ;allocate 2 (0 23)
+
+ movq mm1,[ecx+ebp*2+8] ;allocate 1 (0123)
+ movq mm3,mm1 ;allocate 3 (0123)
+ pand mm0,mm6
+ pmaddwd mm0,mm5
+ pand mm1,mm6
+ pmaddwd mm1,mm5
+ pand mm2,mm7
+ pand mm3,mm7
+ jmp .xloopstart
+
+ align 16
+.xloop:
+ movq mm0,[ecx+ebp*2] ;allocate 0 (01234)
+ por mm4,mm2 ;free 2 (01 34)
+
+ por mm3,mm1 ;free 3 (01 34)
+ pslld mm4,16-5
+
+ pslld mm3,16-5
+ movq mm2,mm0 ;allocate 2 (0 234)
+
+ movq mm1,[ecx+ebp*2+8] ;allocate 1 (01234)
+ psrad mm4,16
+
+ psrad mm3,16
+ pand mm0,mm6
+
+ packssdw mm4,mm3 ;free 3 (012 4)
+ movq mm3,mm1 ;allocate 3 (01234)
+
+ pmaddwd mm0,mm5
+ pand mm1,mm6
+
+ pmaddwd mm1,mm5
+ pand mm2,mm7
+
+ movq [edx+ebp-8],mm4 ;free 4 (0123 )
+ pand mm3,mm7
+
+.xloopstart:
+ movq mm4,[ecx+ebp*2+16] ;allocate 4 (01234)
+ por mm0,mm2 ;free 2 (01 34)
+
+ por mm1,mm3 ;free 3 (01 4)
+ pslld mm0,16-5
+
+ movq mm3,[ecx+ebp*2+24] ;allocate 3 (01 34)
+ pslld mm1,16-5
+
+ psrad mm0,16
+ movq mm2,mm4 ;allocate 2 (01234)
+
+ psrad mm1,16
+ pand mm4,mm6
+
+ packssdw mm0,mm1 ;free 1 (0 234)
+ movq mm1,mm3 ;allocate 1 (01234)
+
+ movq [edx+ebp],mm0 ;free 0 ( 1234)
+ pand mm3,mm6
+
+ pmaddwd mm4,mm5
+ add ebp,16
+
+ pmaddwd mm3,mm5
+ pand mm2,mm7
+
+ pand mm1,mm7
+ jnc .xloop
+
+ por mm4,mm2 ;free 2 (01 34)
+ por mm3,mm1 ;free 3 (01 34)
+ psllq mm4,16-5
+ psllq mm3,16-5
+ psrad mm4,16
+ psrad mm3,16
+ packssdw mm4,mm3 ;free 3 (012 4)
+ movq [edx+ebp-8],mm4 ;free 4 (0123 )
+
+.odd:
+ sub ebp, 14
+ jz .noodd
+.oddloop:
+ mov eax, [ecx+ebp*2+28]
+ mov ebx, 00f80000h
+ mov esi, eax
+ and ebx, eax
+ and eax, 000000f8h
+ shr eax, 3
+ and esi, 0000fc00h
+ shr ebx, 8
+ shr esi, 5
+ add eax, ebx
+ add eax, esi
+ mov [edx+ebp+14], ax
+ add ebp, 2
+ jnz .oddloop
+.noodd:
+ add ecx, [esp+16+16]
+ add edx, [esp+8+16]
+ dec edi
+ jne .yloop
+
+ emms
+ epilogue
+ ret
+
+ global _vdasm_pixblt_XRGB8888_to_RGB888_MMX
+_vdasm_pixblt_XRGB8888_to_RGB888_MMX:
+ prologue 6
+
+ mov esi,[esp+12+16]
+ mov edi,[esp+4+16]
+
+ mov ecx,[esp+20+16]
+ lea eax,[ecx+ecx*2]
+ lea ebx,[ecx*4]
+ sub [esp+8+16],eax
+ sub [esp+16+16],ebx
+
+ pcmpeqb mm7,mm7
+ psrld mm7,8
+ movq mm6,mm7
+ psllq mm7,32 ;mm7 = high rgb mask
+ psrlq mm6,32 ;mm6 = low rgb mask
+
+ mov ebp,[esp+20+16]
+ mov edx,[esp+24+16]
+ mov eax,[esp+16+16]
+ mov ebx,[esp+ 8+16]
+.yloop:
+ mov ecx,ebp
+ shr ecx,3
+ jz .checkodd
+.xloop:
+ movq mm0,[esi] ;mm0 = a1r1g1b1a0r0g0b0
+ movq mm1,mm6
+
+ movq mm2,[esi+8] ;mm2 = a3r3g3b3a2r2g2b2
+ pand mm1,mm0 ;mm1 = ----------r0g0b0
+
+ movq mm3,mm6
+ pand mm0,mm7 ;mm0 = --r1g1b1--------
+
+ movq mm4,mm2
+ pand mm3,mm2 ;mm3 = ----------r2g2b2
+
+ psrlq mm0,8 ;mm0 = ----r1g1b1------
+ pand mm2,mm7 ;mm2 = --r3g3b3--------
+
+ movq mm5,[esi+16] ;mm5 = a5r5g5b5a4r4g4b4
+ psllq mm4,48 ;mm4 = g2b2------------
+
+ por mm0,mm1 ;mm0 = ----r1g1b1r0g0b0
+ psrlq mm3,16 ;mm3 = --------------r2
+
+ por mm0,mm4 ;mm0 = g2b2r1g1b1r0g0b0
+ movq mm1,mm6
+
+ pand mm1,mm5 ;mm1 = ----------r4g4b4
+ psrlq mm2,24 ;mm2 = --------r3g3b3--
+
+ movq [edi],mm0
+ pand mm5,mm7 ;mm5 = --r5g5b5--------
+
+ psllq mm1,32 ;mm1 = --r4g4b4--------
+ movq mm4,mm5 ;mm4 = --r5g5b5--------
+
+ por mm2,mm3 ;mm2 = --------r3g3b3r2
+ psllq mm5,24 ;mm5 = b5--------------
+
+ movq mm3,[esi+24] ;mm3 = a7r7g7b7a6r6g6b6
+ por mm2,mm1 ;mm2 = --r4g4b4r3g3b3r2
+
+ movq mm1,mm6
+ por mm2,mm5 ;mm2 = b5r4g4b4r3g3b3r2
+
+ psrlq mm4,40 ;mm4 = ------------r5g5
+ pand mm1,mm3 ;mm1 = ----------r6g6b6
+
+ psllq mm1,16 ;mm1 = ------r6g6b6----
+ pand mm3,mm7 ;mm3 = --r7g7b7--------
+
+ por mm4,mm1 ;mm4 = ------r6g6b6r5g5
+ psllq mm3,8 ;mm3 = r7g7b7----------
+
+ movq [edi+8],mm2
+ por mm4,mm3 ;mm4 = r7g7b7r6g6b6r5g5
+
+ add esi,32
+ sub ecx,1
+
+ movq [edi+16],mm4 ;mm3
+
+ lea edi,[edi+24]
+ jne .xloop
+
+.checkodd:
+ mov ecx,ebp
+ and ecx,7
+ jz .noodd
+ movd mm0,eax
+.oddloop:
+ mov eax,[esi]
+ add esi,4
+ mov [edi],ax
+ shr eax,16
+ mov [edi+2],al
+ add edi,3
+ sub ecx,1
+ jnz .oddloop
+ movd eax,mm0
+.noodd:
+ add esi,eax
+ add edi,ebx
+
+ sub edx,1
+ jne .yloop
+
+ emms
+
+ epilogue
+ ret
+
+ global _vdasm_pixblt_XRGB1555_to_XRGB8888_MMX
+_vdasm_pixblt_XRGB1555_to_XRGB8888_MMX:
+ prologue 6
+
+ mov ebp, [esp+20+16]
+ mov edi, [esp+24+16]
+ add ebp, ebp
+ mov edx, [esp+4+16]
+ mov ecx, [esp+12+16]
+ lea edx, [edx+ebp*2-12]
+ lea ecx, [ecx+ebp-6]
+ neg ebp
+ mov [esp+20+16], ebp
+
+ movq mm5, [r_mask_555]
+ movq mm6, [g_mask_555]
+ movq mm7, [b_mask_555]
+
+.yloop:
+ mov ebp, [esp+20+16]
+ add ebp, 6
+ jbe .odd
+
+.xloop:
+ movq mm0, [ecx+ebp]
+ movq mm1, mm6
+ movq mm2, mm7
+ pand mm1, mm0
+ pand mm2, mm0
+ pand mm0, mm5
+
+ paddw mm0, mm0
+ pmulhw mm1, [x4200w]
+ psllq mm2, 3
+ paddw mm0, mm2
+ movq mm2, mm0
+ psrlw mm0, 5
+ pand mm0, [x07b]
+ paddw mm0, mm2
+ movq mm2, mm0
+ punpcklbw mm0, mm1
+ punpckhbw mm2, mm1
+
+ movq [edx+ebp*2], mm0
+ movq [edx+ebp*2+8], mm2
+ add ebp, 8
+ jnc .xloop
+.odd:
+ sub ebp, 6
+ jz .noodd
+.oddloop:
+ movzx eax, word [ecx+ebp+6]
+ mov ebx, 03e0h
+ mov esi, 001fh
+ and ebx, eax
+ and esi, eax
+ and eax, 07c00h
+ shl esi, 3
+ shl ebx, 6
+ shl eax, 9
+ add ebx, esi
+ add eax, ebx
+ mov ebx, eax
+ shr eax, 5
+ and eax, 070707h
+ add eax, ebx
+ mov [edx+ebp*2+12], eax
+ add ebp, 2
+ jnz .oddloop
+.noodd:
+ add ecx, [esp+16+16]
+ add edx, [esp+8+16]
+ dec edi
+ jne .yloop
+
+ emms
+ epilogue
+ ret
+
+
+ global _vdasm_pixblt_RGB565_to_XRGB8888_MMX
+_vdasm_pixblt_RGB565_to_XRGB8888_MMX:
+ prologue 6
+
+ mov ebp, [esp+20+16]
+ mov edi, [esp+24+16]
+ add ebp, ebp
+ mov edx, [esp+4+16]
+ mov ecx, [esp+12+16]
+ lea edx, [edx+ebp*2-12]
+ lea ecx, [ecx+ebp-6]
+ neg ebp
+ mov [esp+20+16], ebp
+
+ movq mm5, [r_mask_565]
+ movq mm6, [g_mask_565]
+ movq mm7, [b_mask_565]
+
+.yloop:
+ mov ebp, [esp+20+16]
+ add ebp, 6
+ jbe .odd
+
+.xloop:
+ movq mm0, [ecx+ebp]
+ movq mm1, mm6
+ movq mm2, mm7
+ pand mm1, mm0
+ pand mm2, mm0
+ pand mm0, mm5
+
+ pmulhw mm1, [x2080w]
+ psllq mm2, 3
+ paddw mm0, mm2
+ movq mm2, mm0
+ psrlw mm0, 5
+ pand mm0, [x07b]
+ paddw mm0, mm2
+ movq mm2, mm0
+ punpcklbw mm0, mm1
+ punpckhbw mm2, mm1
+
+ movq [edx+ebp*2], mm0
+ movq [edx+ebp*2+8], mm2
+ add ebp, 8
+ jnc .xloop
+
+.odd:
+ sub ebp, 6
+ jz .noodd
+ push edi
+.oddloop:
+ movzx eax, word [ecx+ebp+6]
+ mov ebx, 0000f800h
+ and ebx, eax
+ mov esi, eax
+ shl ebx, 8
+ mov edi, eax
+ shl eax, 3
+ and esi, 000007e0h
+ and eax, 000000f8h
+ add ebx, eax
+ shl esi, 5
+ mov eax, ebx
+ shr ebx, 5
+ and edi, 00000600h
+ shr edi, 1
+ and ebx, 00070007h
+ add esi, edi
+ add eax, ebx
+ add eax, esi
+ mov [edx+ebp*2+12], eax
+ add ebp, 2
+ jnz .oddloop
+ pop edi
+.noodd:
+ add ecx, [esp+16+16]
+ add edx, [esp+8+16]
+ dec edi
+ jne .yloop
+
+ emms
+ epilogue
+ ret
+
+
+ global _vdasm_pixblt_RGB888_to_XRGB8888_MMX
+_vdasm_pixblt_RGB888_to_XRGB8888_MMX:
+ prologue 6
+
+ mov esi,[esp+12+16]
+ mov edi,[esp+4+16]
+
+ mov ecx,[esp+20+16]
+ lea eax,[ecx+ecx*2]
+ lea ebx,[ecx*4]
+ sub [esp+8+16],ebx
+ sub [esp+16+16],eax
+
+ mov edx,[esp+24+16]
+ mov ebx,[esp+20+16]
+ mov ecx,[esp+16+16]
+ mov eax,[esp+ 8+16]
+
+ ;ebx horizontal count backup
+ ;ecx source modulo
+ ;edx vertical count
+ ;esi source
+ ;edi destination
+ ;ebp horizontal count
+
+.yloop:
+ mov ebp,ebx
+ shr ebp,3
+ jz .checkodd
+.xloop:
+ movq mm0,[esi] ;mm0: g2b2r1g1b1r0g0b0
+ movq mm1,mm0 ;
+
+ psrlq mm1,24 ;mm1: ------g2b2r1g1b1
+ movq mm2,mm0 ;
+
+ movq mm3,[esi+8] ;mm3: b5r4g4b4r3g3b3r2
+ punpckldq mm0,mm1 ;mm0: b2r1g1b1b1r0g0b0 [qword 0 ready]
+
+ movq mm4,mm3 ;mm4: b5r4g4b4r3g3b3r2
+ psllq mm3,48 ;mm3: b3r2------------
+
+ movq mm5,mm4 ;mm5: b5r4g4b4r3g3b3r2
+ psrlq mm2,16 ;mm2: ----g2b2--------
+
+ movq mm1,[esi+16] ;mm1: r7g7b7r6g6b6r5g5
+ por mm2,mm3 ;mm2: b3r2g2b2--------
+
+ movq [edi],mm0 ;
+ psllq mm4,24 ;mm4: b4r3g3b3r2------
+
+ movq mm3,mm5 ;mm3: b5r4g4b4r3g3b3r2
+ psrlq mm5,24 ;mm5: ------b5r4g4b4r3
+
+ movq mm0,mm1 ;mm0: r7g7b7r6g6b6r5g5
+ psllq mm1,40 ;mm1: b6r5g5----------
+
+ punpckhdq mm2,mm4 ;mm2: b4r3g3b3b3r2g2b2 [qword 1 ready]
+ por mm1,mm5 ;mm1: b6r5g5b5r4g4b4r3
+
+ movq mm4,mm0 ;mm4: r7g7b7r6g6b6r5g5
+ punpckhdq mm3,mm1 ;mm3: b6r5g5b5b5r4g4b4 [qword 2 ready]
+
+ movq [edi+8],mm2
+ psrlq mm0,16 ;mm0: ----r7g7b7r6g6b6
+
+ movq [edi+16],mm3
+ psrlq mm4,40 ;mm4: ----------r7g7b7
+
+ punpckldq mm0,mm4 ;mm0: --r7g7b7b7r6g6b6 [qword 3 ready]
+ add esi,24
+
+ movq [edi+24],mm0
+
+ add edi,32
+ sub ebp,1
+ jne .xloop
+
+.checkodd:
+ mov ebp,ebx
+ and ebp,7
+ jz .noodd
+ movd mm7,eax
+.oddloop:
+ mov ax,[esi]
+ mov [edi],ax
+ mov al,[esi+2]
+ mov [edi+2],al
+ add esi,3
+ add edi,4
+ sub ebp,1
+ jne .oddloop
+
+ movd eax,mm7
+.noodd:
+ add esi,ecx
+ add edi,eax
+
+ sub edx,1
+ jne .yloop
+ emms
+ epilogue
+ ret
+
+ end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_bltyuv2rgb_sse2.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_bltyuv2rgb_sse2.asm
new file mode 100644
index 000000000..87ff13b56
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_bltyuv2rgb_sse2.asm
@@ -0,0 +1,161 @@
+ section .rdata, rdata
+
+ align 16
+
+bytemasks dd 000000ffh, 0000ffffh, 00ffffffh
+
+ section .text
+
+;============================================================================
+
+ global _vdasm_pixblt_XRGB8888_to_YUV444Planar_scan_SSE2
+_vdasm_pixblt_XRGB8888_to_YUV444Planar_scan_SSE2:
+ push edi
+ push esi
+ push ebx
+
+ mov eax, [esp+4+12]
+ mov ebx, [esp+8+12]
+ mov ecx, [esp+12+12]
+ mov edx, [esp+16+12]
+ mov esi, [esp+20+12]
+ mov edi, [esp+24+12]
+
+ pcmpeqb xmm6, xmm6
+ psrlw xmm6, 8 ;xmm6 = 00FF x 8
+
+ sub esi, 4
+ js .postcheck
+.xloop:
+ movdqu xmm2, [edx] ;xmm0 = X3R3G3B3X2R2G2B2X1R1G1B1X0R0G0B0
+ add edx, 16
+ movdqa xmm5, xmm2
+ pand xmm2, xmm6 ;xmm0 = R3 B3 R2 B2 R1 B1 R0 B0
+ psrlw xmm5, 8 ;xmm1 = X3 G3 X2 G2 X1 G1 X0 G0
+ movdqa xmm0, [edi+0] ;coeff_rb_to_y
+ movdqa xmm1, [edi+16] ;coeff_rb_to_u
+ movdqa xmm3, [edi+32] ;coeff_g_to_y
+ movdqa xmm4, [edi+48] ;coeff_g_to_u
+ pmaddwd xmm0, xmm2
+ pmaddwd xmm1, xmm2
+ pmaddwd xmm2, [edi+64] ;coeff_rb_to_v
+ pmaddwd xmm3, xmm5
+ pmaddwd xmm4, xmm5
+ pmaddwd xmm5, [edi+80] ;coeff_g_to_v
+ paddd xmm0, xmm3
+ paddd xmm1, xmm4
+ paddd xmm2, xmm5
+ paddd xmm0, [edi+96] ;bias_y
+ paddd xmm1, [edi+112] ;bias_c
+ paddd xmm2, [edi+112] ;bias_c
+ psrad xmm0, 15
+ psrad xmm1, 15
+ psrad xmm2, 15
+ packssdw xmm0, xmm0
+ packssdw xmm1, xmm1
+ packssdw xmm2, xmm2
+ packuswb xmm0, xmm0
+ packuswb xmm1, xmm1
+ packuswb xmm2, xmm2
+ movd [eax], xmm0
+ movd [ebx], xmm1
+ movd [ecx], xmm2
+ add eax, 4
+ add ebx, 4
+ add ecx, 4
+ sub esi, 4
+ jns .xloop
+.postcheck:
+ jmp dword [.finaltable + esi*4 + 16]
+.complete:
+ pop ebx
+ pop esi
+ pop edi
+ ret
+
+.finaltable:
+ dd .complete
+ dd .do1
+ dd .do2
+ dd .do3
+
+.finaltable2:
+ dd .fin1
+ dd .fin2
+ dd .fin3
+
+.do1:
+ movd xmm2, [edx]
+ jmp short .dofinal
+.do2:
+ movq xmm2, [edx]
+ jmp short .dofinal
+.do3:
+ movq xmm2, [edx]
+ movd xmm1, [edx]
+ movlhps xmm2, xmm1
+.dofinal:
+ movdqa xmm5, xmm2
+ pand xmm2, xmm6 ;xmm0 = R3 B3 R2 B2 R1 B1 R0 B0
+ psrlw xmm5, 8 ;xmm1 = X3 G3 X2 G2 X1 G1 X0 G0
+ movdqa xmm0, [edi+0] ;coeff_rb_to_y
+ movdqa xmm1, [edi+16] ;coeff_rb_to_u
+ movdqa xmm3, [edi+32] ;coeff_g_to_y
+ movdqa xmm4, [edi+48] ;coeff_g_to_u
+ pmaddwd xmm0, xmm2
+ pmaddwd xmm1, xmm2
+ pmaddwd xmm2, [edi+64] ;coeff_rb_to_v
+ pmaddwd xmm3, xmm5
+ pmaddwd xmm4, xmm5
+ pmaddwd xmm5, [edi+80] ;coeff_g_to_v
+ paddd xmm0, xmm3
+ paddd xmm1, xmm4
+ paddd xmm2, xmm5
+ paddd xmm0, [edi+96] ;bias_y
+ paddd xmm1, [edi+112] ;bias_c
+ paddd xmm2, [edi+112] ;bias_c
+ psrad xmm0, 15
+ psrad xmm1, 15
+ psrad xmm2, 15
+ packssdw xmm0, xmm0
+ packssdw xmm1, xmm1
+ packssdw xmm2, xmm2
+ packuswb xmm0, xmm0
+ packuswb xmm1, xmm1
+ movd xmm7, [bytemasks + esi*4 + 12]
+ packuswb xmm2, xmm2
+
+ jmp dword [.finaltable2 + esi*4 + 12]
+
+.fin1:
+ movd edx, xmm0
+ mov [eax], dl
+ movd edx, xmm1
+ mov [ebx], dl
+ movd edx, xmm2
+ mov [ecx], dl
+ jmp .complete
+.fin2:
+ movd edx, xmm0
+ mov [eax], dx
+ movd edx, xmm1
+ mov [ebx], dx
+ movd edx, xmm2
+ mov [ecx], dx
+ jmp .complete
+.fin3:
+ movd edx, xmm0
+ mov [eax], dx
+ shr edx, 16
+ mov [eax+2], dl
+ movd edx, xmm1
+ mov [ebx], dx
+ shr edx, 16
+ mov [ebx+2], dl
+ movd edx, xmm2
+ mov [ecx], dx
+ shr edx, 16
+ mov [ecx+2], dl
+ jmp .complete
+
+ end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_resample_mmx.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_resample_mmx.asm
new file mode 100644
index 000000000..912c655ab
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_resample_mmx.asm
@@ -0,0 +1,1559 @@
+; VirtualDub - Video processing and capture application
+; Graphics support library
+; Copyright (C) 1998-2004 Avery Lee
+;
+; This program is free software; you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or
+; (at your option) any later version.
+;
+; This program is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with this program; if not, write to the Free Software
+; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+;
+ section .rdata, rdata, align=16
+
+x0002000200020002 dq 0002000200020002h
+x0004000400040004 dq 0004000400040004h
+x0008000800080008 dq 0008000800080008h
+x0000200000002000 dq 0000200000002000h
+
+ align 16
+MMX_roundval dq 0000200000002000h, 0000200000002000h
+
+
+;**************************************************************************
+
+x0000FFFF0000FFFF dq 0000FFFF0000FFFFh
+x0000010100000101 dq 0000010100000101h
+x0100010001000100 dq 0100010001000100h
+
+ section .text
+
+;--------------------------------------------------------------------------
+;_vdasm_resize_interp_row_run_MMX(
+; [esp+ 4] void *dst,
+; [esp+ 8] void *src,
+; [esp+12] ulong width,
+; [esp+16] __int64 xaccum,
+; [esp+24] __int64 x_inc);
+;
+ global _vdasm_resize_interp_row_run_MMX
+_vdasm_resize_interp_row_run_MMX:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov esi, [esp+8+16]
+ mov edi, [esp+4+16]
+ mov ebp, [esp+12+16]
+
+ movd mm4, dword [esp+16+16]
+ pxor mm7, mm7
+ movd mm6, dword [esp+24+16]
+ punpckldq mm4, mm4
+ punpckldq mm6, mm6
+
+ shr esi, 2
+
+ mov eax, [esp+16+16]
+ mov ebx, [esp+20+16]
+ add esi, ebx
+ mov ebx, [esp+24+16]
+ mov ecx, [esp+28+16]
+
+ shl ebp,2
+ add edi,ebp
+ neg ebp
+
+.colloop:
+ movd mm1, dword [esi*4+4]
+ movq mm5, mm4
+
+ movd mm0, dword [esi*4]
+ punpcklbw mm1, mm7
+
+ punpcklbw mm0, mm7
+ psrld mm5, 24
+
+ movq mm3, [x0100010001000100]
+ packssdw mm5, mm5
+
+ pmullw mm1, mm5
+ psubw mm3, mm5
+
+ pmullw mm0, mm3
+ paddd mm4, mm6
+
+ ;stall
+ ;stall
+
+ ;stall
+ ;stall
+
+ paddw mm0, mm1
+
+ psrlw mm0, 8
+ add eax, ebx
+
+ adc esi, ecx
+ packuswb mm0, mm0
+
+ movd dword [edi+ebp],mm0
+
+ add ebp, 4
+ jnz .colloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+
+
+;**************************************************************************
+
+;vdasm_resize_interp_col_run_MMX(
+; [esp+ 4] void *dst,
+; [esp+ 8] void *src1,
+; [esp+12] void *src2,
+; [esp+16] ulong width,
+; [esp+20] ulong yaccum);
+
+
+ global _vdasm_resize_interp_col_run_MMX
+_vdasm_resize_interp_col_run_MMX:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov esi, [esp+8+16]
+ mov edx, [esp+12+16]
+ mov edi, [esp+4+16]
+ mov ebp, [esp+16+16]
+
+ movd mm4, dword [esp+20+16]
+ pxor mm7, mm7
+ punpcklwd mm4, mm4
+ punpckldq mm4, mm4
+ psrlw mm4, 8
+ pxor mm4, [x0000FFFF0000FFFF]
+ paddw mm4, [x0000010100000101]
+
+ shl ebp, 2
+ add edi, ebp
+ add esi, ebp
+ add edx, ebp
+ neg ebp
+
+.colloop:
+ movd mm0, dword [esi+ebp]
+ movd mm2, dword [edx+ebp]
+
+ punpcklbw mm0, mm7
+ punpcklbw mm2, mm7
+
+ movq mm1, mm0
+ punpcklwd mm0, mm2
+ punpckhwd mm1, mm2
+
+ pmaddwd mm0, mm4
+ pmaddwd mm1, mm4
+
+ psrad mm0, 8
+ psrad mm1, 8
+
+ packssdw mm0, mm1
+ packuswb mm0, mm0
+
+ movd dword [edi+ebp],mm0
+
+ add ebp, 4
+ jnz .colloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+
+;--------------------------------------------------------------------------
+;vdasm_resize_ccint_row_MMX(dst, src, count, xaccum, xinc, tbl);
+
+ global _vdasm_resize_ccint_row_MMX
+_vdasm_resize_ccint_row_MMX:
+ push ebx
+ push esi
+ push edi
+ push ebp
+
+ mov ebx, [esp+4+16] ;ebx = dest addr
+ mov ecx, [esp+12+16] ;ecx = count
+
+ mov ebp, [esp+20+16] ;ebp = increment
+ mov edi, ebp ;edi = increment
+ shl ebp, 16 ;ebp = fractional increment
+ mov esi, [esp+16+16] ;esi = 16:16 position
+ sar edi, 16 ;edi = integer increment
+ mov [esp+20+16], ebp ;xinc = fractional increment
+ mov ebp, esi ;ebp = 16:16 position
+ shr esi, 16 ;esi = integer position
+ shl ebp, 16 ;ebp = fraction
+ mov [esp+16+16], ebp ;xaccum = fraction
+
+ mov eax, [esp+8+16]
+
+ shr ebp, 24 ;ebp = fraction (0...255)
+ mov [esp+8+16], edi
+ shl ebp, 4 ;ebp = fraction*16
+ mov edi, ebp
+ mov ebp, [esp+4+16] ;ebp = destination
+
+ shr eax, 2
+ add eax, esi
+ shl ecx, 2 ;ecx = count*4
+ lea ebp, [ebp+ecx-4]
+ neg ecx ;ecx = -count*4
+
+ movq mm6, [x0000200000002000]
+ pxor mm7, mm7
+
+ mov edx,[esp+16+16] ;edx = fractional accumulator
+ mov esi,[esp+20+16] ;esi = fractional increment
+
+ mov ebx,[esp+24+16] ;ebx = coefficient pointer
+
+ movd mm0,dword [eax*4]
+ movd mm1,dword [eax*4+4]
+ punpcklbw mm0,mm7 ;mm0 = [a1][r1][g1][b1]
+
+ ;borrow stack pointer
+ push 0 ;don't crash
+ push dword [fs:0]
+ mov dword [fs:0], esp
+ mov esp, [esp+8+24] ;esp = integer increment
+ jmp short ccint_loop_MMX_start
+
+ ;EAX source pointer / 4
+ ;EBX coefficient pointer
+ ;ECX count
+ ;EDX fractional accumulator
+ ;ESI fractional increment
+ ;EDI coefficient offset
+ ;ESP integer increment
+ ;EBP destination pointer
+
+ align 16
+ccint_loop_MMX:
+ movd mm0,dword [eax*4]
+ packuswb mm2,mm2 ;mm0 = [a][r][g][b][a][r][g][b]
+
+ movd mm1,dword [eax*4+4]
+ punpcklbw mm0,mm7 ;mm0 = [a1][r1][g1][b1]
+
+ movd dword [ebp+ecx],mm2
+ccint_loop_MMX_start:
+ movq mm4,mm0 ;mm0 = [a1][r1][g1][b1]
+
+ movd mm2,dword [eax*4+8]
+ punpcklbw mm1,mm7 ;mm1 = [a2][r2][g2][b2]
+
+ movd mm3,dword [eax*4+12]
+ punpcklbw mm2,mm7 ;mm2 = [a3][r3][g3][b3]
+
+ punpcklbw mm3,mm7 ;mm3 = [a4][r4][g4][b4]
+ movq mm5,mm2 ;mm2 = [a3][r3][g3][b3]
+
+ add edx,esi ;add fractional increment
+ punpcklwd mm0,mm1 ;mm0 = [g2][g1][b2][b1]
+
+ pmaddwd mm0,[ebx+edi]
+ punpcklwd mm2,mm3 ;mm2 = [g4][g3][b4][b3]
+
+ pmaddwd mm2,[ebx+edi+8]
+ punpckhwd mm4,mm1 ;mm4 = [a2][a1][r2][r1]
+
+ pmaddwd mm4,[ebx+edi]
+ punpckhwd mm5,mm3 ;mm5 = [a4][a3][b4][b3]
+
+ pmaddwd mm5,[ebx+edi+8]
+ paddd mm0,mm6
+
+ adc eax,esp ;add integer increment and fractional bump to offset
+ mov edi,0ff000000h
+
+ paddd mm2,mm0 ;mm0 = [ g ][ b ]
+ paddd mm4,mm6
+
+ psrad mm2,14
+ paddd mm4,mm5 ;mm4 = [ a ][ r ]
+
+ and edi,edx
+ psrad mm4,14
+
+ shr edi,20 ;edi = fraction (0...255)*16
+ add ecx,4
+
+ packssdw mm2,mm4 ;mm0 = [ a ][ r ][ g ][ b ]
+ jnc ccint_loop_MMX
+
+ packuswb mm2,mm2 ;mm0 = [a][r][g][b][a][r][g][b]
+ movd dword [ebp],mm2
+
+ mov esp, dword [fs:0]
+ pop dword [fs:0]
+ pop eax
+
+ pop ebp
+ pop edi
+ pop esi
+ pop ebx
+ ret
+
+;--------------------------------------------------------------------------
+;vdasm_resize_ccint_col_MMX(dst, src1, src2, src3, src4, count, tbl);
+
+ global _vdasm_resize_ccint_col_MMX
+_vdasm_resize_ccint_col_MMX:
+ push ebx
+ push esi
+ push edi
+ push ebp
+
+ mov ebp, [esp+4+16] ;ebp = dest addr
+ mov esi, [esp+24+16] ;esi = count
+ add esi, esi
+ add esi, esi
+
+ mov eax, [esp+8+16] ;eax = row 1
+ mov ebx, [esp+12+16] ;ebx = row 2
+ mov ecx, [esp+16+16] ;ecx = row 3
+ mov edx, [esp+20+16] ;edx = row 4
+ mov edi, [esp+28+16] ;edi = coefficient ptr
+
+ add eax, esi
+ add ebx, esi
+ add ecx, esi
+ add edx, esi
+ add ebp, esi
+ neg esi
+
+ movq mm4,[edi]
+ movq mm5,[edi+8]
+ movq mm6,[x0000200000002000]
+ pxor mm7,mm7
+
+ movd mm2,dword [eax+esi]
+ movd mm1,dword [ebx+esi] ;mm1 = pixel1
+ punpcklbw mm2,mm7
+ jmp short ccint_col_loop_MMX.entry
+
+ align 16
+ccint_col_loop_MMX:
+ movd mm2,dword [eax+esi] ;mm2 = pixel0
+ packuswb mm0,mm0
+
+ movd mm1,dword [ebx+esi] ;mm1 = pixel1
+ pxor mm7,mm7
+
+ movd dword [ebp+esi-4],mm0
+ punpcklbw mm2,mm7
+
+ccint_col_loop_MMX.entry:
+ punpcklbw mm1,mm7
+ movq mm0,mm2
+
+ movd mm3,dword [edx+esi] ;mm3 = pixel3
+ punpcklwd mm0,mm1 ;mm0 = [g1][g0][b1][b0]
+
+ pmaddwd mm0,mm4
+ punpckhwd mm2,mm1 ;mm2 = [a1][a0][r1][r0]
+
+ movd mm1,dword [ecx+esi] ;mm1 = pixel2
+ punpcklbw mm3,mm7
+
+ pmaddwd mm2,mm4
+ punpcklbw mm1,mm7
+
+ movq mm7,mm1
+ punpcklwd mm1,mm3 ;mm1 = [g3][g2][b3][b2]
+
+ punpckhwd mm7,mm3 ;mm7 = [a3][a2][r3][r2]
+ pmaddwd mm1,mm5
+
+ pmaddwd mm7,mm5
+ paddd mm0,mm6
+
+ paddd mm2,mm6
+ paddd mm0,mm1
+
+ paddd mm2,mm7
+ psrad mm0,14
+
+ psrad mm2,14
+ add esi,4
+
+ packssdw mm0,mm2
+ jne ccint_col_loop_MMX
+
+ packuswb mm0,mm0
+ movd dword [ebp-4],mm0
+
+ pop ebp
+ pop edi
+ pop esi
+ pop ebx
+ ret
+
+;--------------------------------------------------------------------------
+;vdasm_resize_ccint_col_SSE2(dst, src1, src2, src3, src4, count, tbl);
+
+ global _vdasm_resize_ccint_col_SSE2
+_vdasm_resize_ccint_col_SSE2:
+ push ebx
+ push esi
+ push edi
+ push ebp
+
+ mov ebp,[esp + 4 + 16] ;ebp = dest addr
+ mov esi,[esp + 24 + 16] ;esi = count
+ add esi,esi
+ add esi,esi
+
+ mov eax,[esp + 8 + 16] ;eax = row 1
+ mov ebx,[esp + 12 + 16] ;ebx = row 2
+ mov ecx,[esp + 16 + 16] ;ecx = row 3
+ mov edx,[esp + 20 + 16] ;edx = row 4
+ mov edi,[esp + 28 + 16] ;edi = coefficient ptr
+
+ neg esi
+
+ add esi,4
+ jz ccint_col_SSE2_odd
+
+ movq xmm4,qword [edi]
+ movq xmm5,qword [edi+8]
+ punpcklqdq xmm4,xmm4
+ punpcklqdq xmm5,xmm5
+ movq xmm6,[x0000200000002000]
+ punpcklqdq xmm6,xmm6
+ pxor xmm7,xmm7
+
+; jmp short ccint_col_loop_SSE2.entry
+
+; align 16
+ccint_col_loop_SSE2:
+ movq xmm0, qword [eax]
+ add eax, 8
+ movq xmm1, qword [ebx]
+ add ebx, 8
+ movq xmm2, qword [ecx]
+ add ecx, 8
+ movq xmm3, qword [edx]
+ add edx, 8
+ punpcklbw xmm0,xmm1
+ punpcklbw xmm2,xmm3
+ movdqa xmm1,xmm0
+ movdqa xmm3,xmm2
+ punpcklbw xmm0,xmm7
+ punpckhbw xmm1,xmm7
+ punpcklbw xmm2,xmm7
+ punpckhbw xmm3,xmm7
+ pmaddwd xmm0,xmm4
+ pmaddwd xmm1,xmm4
+ pmaddwd xmm2,xmm5
+ pmaddwd xmm3,xmm5
+ paddd xmm0,xmm6
+ paddd xmm1,xmm6
+ paddd xmm0,xmm2
+ paddd xmm1,xmm3
+ psrad xmm0,14
+ psrad xmm1,14
+ packssdw xmm0,xmm1
+ packuswb xmm0,xmm0
+ movdq2q mm0,xmm0
+ movntq [ebp],mm0
+ add ebp,8
+ add esi,8
+ jnc ccint_col_loop_SSE2
+ jnz ccint_col_SSE2_noodd
+ccint_col_SSE2_odd:
+ movd mm0, dword [eax]
+ pxor mm7,mm7
+ movd mm1, dword [ebx]
+ movdq2q mm4,xmm4
+ movd mm2, dword [ecx]
+ movdq2q mm5,xmm5
+ movd mm3, dword [edx]
+ movdq2q mm6,xmm6
+ punpcklbw mm0,mm1
+ punpcklbw mm2,mm3
+ movq mm1,mm0
+ movq mm3,mm2
+ punpcklbw mm0,mm7
+ punpckhbw mm1,mm7
+ punpcklbw mm2,mm7
+ punpckhbw mm3,mm7
+ pmaddwd mm0,mm4
+ pmaddwd mm1,mm4
+ pmaddwd mm2,mm5
+ pmaddwd mm3,mm5
+ paddd mm0,mm6
+ paddd mm2,mm6
+ paddd mm0,mm2
+ paddd mm1,mm3
+ psrad mm0,14
+ psrad mm1,14
+ packssdw mm0,mm1
+ packuswb mm0,mm0
+ movd eax,mm0
+ movnti [ebp],eax
+
+ccint_col_SSE2_noodd:
+ pop ebp
+ pop edi
+ pop esi
+ pop ebx
+ ret
+
+
+
+;-------------------------------------------------------------------------
+;
+; long resize_table_row_MMX(Pixel *out, Pixel *in, int *filter, int filter_width, PixDim w, long accum, long frac);
+
+ .code
+
+ global _vdasm_resize_table_row_MMX
+_vdasm_resize_table_row_MMX:
+ push ebp
+ push esi
+ push edi
+ push ebx
+
+ cmp dword [esp+16+16], 4
+ jz .accel_4coeff
+ cmp dword [esp+16+16], 6
+ jz .accel_6coeff
+ cmp dword [esp+16+16], 8
+ jz .accel_8coeff
+
+ mov eax,[esp + 24 + 16]
+ mov ebp,[esp + 20 + 16]
+ mov ebx,[esp + 8 + 16]
+ mov edi,[esp + 4 + 16]
+
+ mov esi,eax
+ mov edx,eax
+
+ pxor mm5,mm5
+
+ mov ecx,[esp + 16 + 16]
+ shr ecx,1
+ mov [esp+16+16],ecx
+ test ecx,1
+ jnz .pixelloop_odd_pairs
+
+.pixelloop_even_pairs:
+ shr esi,14
+ and edx,0000ff00h
+ and esi,byte -4
+
+ mov ecx,[esp + 16 + 16]
+ shr edx,5
+ add esi,ebx
+ imul edx,ecx
+ add eax,[esp + 28 + 16]
+ add edx,[esp + 12 + 16]
+
+ movq mm6,[MMX_roundval]
+ pxor mm3,mm3
+ movq mm7,mm6
+ pxor mm2,mm2
+
+.coeffloop_unaligned_even_pairs:
+ movd mm0,dword [esi+0]
+ paddd mm7,mm2 ;accumulate alpha/red (pixels 2/3)
+
+ punpcklbw mm0,[esi+4] ;mm1=[a0][a1][r0][r1][g0][g1][b0][b1]
+ paddd mm6,mm3 ;accumulate green/blue (pixels 2/3)
+
+ movd mm2,dword [esi+8]
+ movq mm1,mm0 ;mm0=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+ punpcklbw mm2,[esi+12] ;mm2=[a2][a3][r2][r3][g2][g3][b2][b3]
+
+ punpckhbw mm0,mm5 ;mm0=[ a0 ][ a1 ][ r0 ][ r1 ]
+ movq mm3,mm2 ;mm3=[a2][a3][r2][r3][g2][g3][b2][b3]
+
+ pmaddwd mm0,[edx] ;mm0=[a0*f0+a1*f1][r0*f0+r1*f1]
+ punpcklbw mm1,mm5 ;mm1=[ g0 ][ g1 ][ b0 ][ b1 ]
+
+ pmaddwd mm1,[edx] ;mm1=[g0*f0+g1*f1][b0*f0+b1*f1]
+ punpckhbw mm2,mm5 ;mm2=[ a2 ][ a3 ][ r0 ][ r1 ]
+
+ pmaddwd mm2,[edx+8] ;mm2=[a2*f2+a3*f3][r2*f2+r3*f3]
+ punpcklbw mm3,mm5 ;mm3=[ g2 ][ g3 ][ b2 ][ b3 ]
+
+ pmaddwd mm3,[edx+8] ;mm3=[g2*f2+g3*f3][b2*f2+b3*f3]
+ paddd mm7,mm0 ;accumulate alpha/red (pixels 0/1)
+
+ paddd mm6,mm1 ;accumulate green/blue (pixels 0/1)
+ add edx,16
+
+ add esi,16
+ sub ecx,2
+
+ jne .coeffloop_unaligned_even_pairs
+
+ paddd mm7,mm2 ;accumulate alpha/red (pixels 2/3)
+ paddd mm6,mm3 ;accumulate green/blue (pixels 2/3)
+
+ psrad mm7,14
+ psrad mm6,14
+
+ packssdw mm6,mm7
+ add edi,4
+
+ packuswb mm6,mm6
+ sub ebp,1
+
+ mov esi,eax
+ mov edx,eax
+
+ movd dword [edi-4],mm6
+ jne .pixelloop_even_pairs
+
+ pop ebx
+ pop edi
+ pop esi
+ pop ebp
+
+ ret
+
+;----------------------------------------------------------------
+
+.pixelloop_odd_pairs:
+ shr esi,14
+ and edx,0000ff00h
+ and esi,byte -4
+
+ mov ecx,[esp + 16 + 16]
+ shr edx,5
+ add esi,ebx
+ imul edx,ecx
+ add eax,[esp + 28 + 16]
+ sub ecx,1
+ add edx,[esp + 12 + 16]
+
+ movq mm6,[MMX_roundval]
+ pxor mm3,mm3
+ pxor mm2,mm2
+ movq mm7,mm6
+
+.coeffloop_unaligned_odd_pairs:
+ movd mm0,dword [esi+0]
+ paddd mm7,mm2 ;accumulate alpha/red (pixels 2/3)
+
+ punpcklbw mm0,[esi+4] ;mm1=[a0][a1][r0][r1][g0][g1][b0][b1]
+ paddd mm6,mm3 ;accumulate green/blue (pixels 2/3)
+
+ movd mm2,dword [esi+8]
+ movq mm1,mm0 ;mm0=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+ punpcklbw mm2,[esi+12] ;mm2=[a2][a3][r2][r3][g2][g3][b2][b3]
+
+ punpckhbw mm0,mm5 ;mm0=[ a0 ][ a1 ][ r0 ][ r1 ]
+ movq mm3,mm2 ;mm3=[a2][a3][r2][r3][g2][g3][b2][b3]
+
+ pmaddwd mm0,[edx] ;mm0=[a0*f0+a1*f1][r0*f0+r1*f1]
+ punpcklbw mm1,mm5 ;mm1=[ g0 ][ g1 ][ b0 ][ b1 ]
+
+ pmaddwd mm1,[edx] ;mm1=[g0*f0+g1*f1][b0*f0+b1*f1]
+ punpckhbw mm2,mm5 ;mm2=[ a2 ][ a3 ][ r0 ][ r1 ]
+
+ pmaddwd mm2,[edx+8] ;mm2=[a2*f2+a3*f3][r2*f2+r3*f3]
+ punpcklbw mm3,mm5 ;mm3=[ g2 ][ g3 ][ b2 ][ b3 ]
+
+ pmaddwd mm3,[edx+8] ;mm3=[g2*f2+g3*f3][b2*f2+b3*f3]
+ paddd mm7,mm0 ;accumulate alpha/red (pixels 0/1)
+
+ paddd mm6,mm1 ;accumulate green/blue (pixels 0/1)
+ add edx,16
+
+ add esi,16
+ sub ecx,2
+
+ jne .coeffloop_unaligned_odd_pairs
+
+ paddd mm7,mm2 ;accumulate alpha/red (pixels 2/3)
+ paddd mm6,mm3 ;accumulate green/blue (pixels 2/3)
+
+ ;finish up odd pair
+
+ movd mm0,dword [esi] ;mm0 = [x1][r1][g1][b1]
+ punpcklbw mm0,[esi+4] ;mm2 = [x0][x1][r0][r1][g0][g1][b0][b1]
+ movq mm1,mm0
+ punpcklbw mm0,mm5 ;mm0 = [g0][g1][b0][b1]
+ punpckhbw mm1,mm5 ;mm1 = [x0][x1][r0][r1]
+
+ pmaddwd mm0,[edx]
+ pmaddwd mm1,[edx]
+
+ paddd mm6,mm0
+ paddd mm7,mm1
+
+ ;combine into pixel
+
+ psrad mm6,14
+
+ psrad mm7,14
+
+ packssdw mm6,mm7
+ add edi,4
+
+ packuswb mm6,mm6
+ sub ebp,1
+
+ mov esi,eax
+ mov edx,eax
+
+ movd dword [edi-4],mm6
+ jne .pixelloop_odd_pairs
+
+ pop ebx
+ pop edi
+ pop esi
+ pop ebp
+
+ ret
+
+;----------------------------------------------------------------
+
+.accel_4coeff:
+ mov eax,[esp + 24 + 16]
+ mov ebp,[esp + 20 + 16]
+ add ebp,ebp
+ add ebp,ebp
+ mov ebx,[esp + 8 + 16]
+ mov edi,[esp + 4 + 16]
+ add edi,ebp
+ neg ebp
+
+ mov esi,eax
+ mov edx,eax
+
+ movq mm4,[MMX_roundval]
+ pxor mm5,mm5
+
+ mov ecx,[esp+12+16]
+
+.pixelloop_4coeff:
+ shr esi,14
+ and edx,0000ff00h
+ and esi,byte -4
+
+ shr edx,4
+ add esi,ebx
+ add eax,[esp+28+16]
+ add edx,ecx
+
+ movd mm0,dword [esi+0]
+ movd mm2,dword [esi+8]
+ punpcklbw mm0,[esi+4] ;mm0=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+ movq mm1,mm0 ;mm1=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+ punpckhbw mm0,mm5 ;mm0=[ a0 ][ a1 ][ r0 ][ r1 ]
+
+ pmaddwd mm0,[edx] ;mm0=[a0*f0+a1*f1][r0*f0+r1*f1]
+ punpcklbw mm2,[esi+12] ;mm2=[a2][a3][r2][r3][g2][g3][b2][b3]
+
+ movq mm3,mm2 ;mm3=[a2][a3][r2][r3][g2][g3][b2][b3]
+ punpcklbw mm1,mm5 ;mm1=[ g0 ][ g1 ][ b0 ][ b1 ]
+
+ pmaddwd mm1,[edx] ;mm1=[g0*f0+g1*f1][b0*f0+b1*f1]
+ punpckhbw mm2,mm5 ;mm2=[ a2 ][ a3 ][ r0 ][ r1 ]
+
+ pmaddwd mm2,[edx+8] ;mm2=[a2*f2+a3*f3][r2*f2+r3*f3]
+ punpcklbw mm3,mm5 ;mm3=[ g2 ][ g3 ][ b2 ][ b3 ]
+
+ pmaddwd mm3,[edx+8] ;mm3=[g2*f2+g3*f3][b2*f2+b3*f3]
+ paddd mm0,mm4 ;accumulate alpha/red (pixels 0/1)
+
+ paddd mm1,mm4 ;accumulate green/blue (pixels 0/1)
+
+ paddd mm0,mm2 ;accumulate alpha/red (pixels 2/3)
+ paddd mm1,mm3 ;accumulate green/blue (pixels 2/3)
+
+ psrad mm0,14
+ psrad mm1,14
+
+ packssdw mm1,mm0
+ mov esi,eax
+
+ packuswb mm1,mm1
+ mov edx,eax
+
+ movd dword [edi+ebp],mm1
+ add ebp,4
+ jne .pixelloop_4coeff
+
+ pop ebx
+ pop edi
+ pop esi
+ pop ebp
+
+ ret
+
+
+;----------------------------------------------------------------
+
+.accel_6coeff:
+ mov eax,[esp + 24 + 16]
+ mov ebp,[esp + 20 + 16]
+ add ebp,ebp
+ add ebp,ebp
+ mov ebx,[esp + 8 + 16]
+ mov edi,[esp + 4 + 16]
+ add edi,ebp
+ neg ebp
+
+ mov esi,eax
+ mov edx,eax
+
+ movq mm4,[MMX_roundval]
+ pxor mm5,mm5
+
+ mov ecx,[esp+12+16]
+
+.pixelloop_6coeff:
+ shr esi,14
+ and edx,0000ff00h
+ and esi,byte -4
+
+ shr edx,5
+ lea edx,[edx+edx*2]
+ add esi,ebx
+ add eax,[esp+28+16]
+ add edx,ecx
+
+ movd mm0,dword [esi+0]
+ movd mm2,dword [esi+8]
+ punpcklbw mm0,[esi+4] ;mm0=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+ movq mm1,mm0 ;mm1=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+ punpckhbw mm0,mm5 ;mm0=[ a0 ][ a1 ][ r0 ][ r1 ]
+
+ pmaddwd mm0,[edx] ;mm0=[a0*f0+a1*f1][r0*f0+r1*f1]
+ punpcklbw mm2,[esi+12] ;mm2=[a2][a3][r2][r3][g2][g3][b2][b3]
+
+ movq mm3,mm2 ;mm3=[a2][a3][r2][r3][g2][g3][b2][b3]
+ punpcklbw mm1,mm5 ;mm1=[ g0 ][ g1 ][ b0 ][ b1 ]
+
+ pmaddwd mm1,[edx] ;mm1=[g0*f0+g1*f1][b0*f0+b1*f1]
+ punpckhbw mm2,mm5 ;mm2=[ a2 ][ a3 ][ r0 ][ r1 ]
+
+ pmaddwd mm2,[edx+8] ;mm2=[a2*f2+a3*f3][r2*f2+r3*f3]
+ punpcklbw mm3,mm5 ;mm3=[ g2 ][ g3 ][ b2 ][ b3 ]
+
+ pmaddwd mm3,[edx+8] ;mm3=[g2*f2+g3*f3][b2*f2+b3*f3]
+ paddd mm0,mm4 ;accumulate alpha/red (pixels 0/1)
+
+ paddd mm1,mm4 ;accumulate green/blue (pixels 0/1)
+
+ paddd mm0,mm2 ;accumulate alpha/red (pixels 2/3)
+ paddd mm1,mm3 ;accumulate green/blue (pixels 2/3)
+
+ movd mm6,dword [esi+16]
+
+ punpcklbw mm6,[esi+20] ;mm1=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+ movq mm7,mm6 ;mm0=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+ punpckhbw mm6,mm5 ;mm0=[ a0 ][ a1 ][ r0 ][ r1 ]
+
+ pmaddwd mm6,[edx+16] ;mm0=[a0*f0+a1*f1][r0*f0+r1*f1]
+ punpcklbw mm7,mm5 ;mm1=[ g0 ][ g1 ][ b0 ][ b1 ]
+
+ pmaddwd mm7,[edx+16] ;mm1=[g0*f0+g1*f1][b0*f0+b1*f1]
+ paddd mm0,mm6 ;accumulate alpha/red (pixels 0/1)
+
+ paddd mm1,mm7 ;accumulate green/blue (pixels 0/1)
+
+
+ psrad mm0,14
+ psrad mm1,14
+
+ packssdw mm1,mm0
+ mov esi,eax
+
+ packuswb mm1,mm1
+ mov edx,eax
+
+ movd dword [edi+ebp],mm1
+ add ebp,4
+ jne .pixelloop_6coeff
+
+ pop ebx
+ pop edi
+ pop esi
+ pop ebp
+
+ ret
+
+;----------------------------------------------------------------
+
+.accel_8coeff:
+ mov eax,[esp + 24 + 16]
+ mov ebp,[esp + 20 + 16]
+ add ebp,ebp
+ add ebp,ebp
+ mov ebx,[esp + 8 + 16]
+ mov edi,[esp + 4 + 16]
+ add edi,ebp
+ neg ebp
+
+ mov esi,eax
+ mov edx,eax
+
+ movq mm4,[MMX_roundval]
+ pxor mm5,mm5
+
+ mov ecx,[esp+12+16]
+
+.pixelloop_8coeff:
+ shr esi,14
+ and edx,0000ff00h
+ and esi,byte -4
+
+ shr edx,3
+ add esi,ebx
+ add eax,[esp+28+16]
+ add edx,ecx
+
+ movd mm0,dword [esi+0]
+ movd mm2,dword [esi+8]
+ punpcklbw mm0,[esi+4] ;mm0=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+ movq mm1,mm0 ;mm1=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+ punpckhbw mm0,mm5 ;mm0=[ a0 ][ a1 ][ r0 ][ r1 ]
+
+ pmaddwd mm0,[edx] ;mm0=[a0*f0+a1*f1][r0*f0+r1*f1]
+ punpcklbw mm2,[esi+12] ;mm2=[a2][a3][r2][r3][g2][g3][b2][b3]
+
+ movq mm3,mm2 ;mm3=[a2][a3][r2][r3][g2][g3][b2][b3]
+ punpcklbw mm1,mm5 ;mm1=[ g0 ][ g1 ][ b0 ][ b1 ]
+
+ pmaddwd mm1,[edx] ;mm1=[g0*f0+g1*f1][b0*f0+b1*f1]
+ punpckhbw mm2,mm5 ;mm2=[ a2 ][ a3 ][ r0 ][ r1 ]
+
+ pmaddwd mm2,[edx+8] ;mm2=[a2*f2+a3*f3][r2*f2+r3*f3]
+ punpcklbw mm3,mm5 ;mm3=[ g2 ][ g3 ][ b2 ][ b3 ]
+
+ pmaddwd mm3,[edx+8] ;mm3=[g2*f2+g3*f3][b2*f2+b3*f3]
+ paddd mm0,mm4 ;accumulate alpha/red (pixels 0/1)
+
+ paddd mm1,mm4 ;accumulate green/blue (pixels 0/1)
+
+ paddd mm0,mm2 ;accumulate alpha/red (pixels 2/3)
+ paddd mm1,mm3 ;accumulate green/blue (pixels 2/3)
+
+
+ movd mm6,dword [esi+16]
+
+ punpcklbw mm6,[esi+20] ;mm1=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+ movd mm2,dword [esi+24]
+
+ punpcklbw mm2,[esi+28] ;mm2=[a2][a3][r2][r3][g2][g3][b2][b3]
+ movq mm7,mm6 ;mm0=[a0][a1][r0][r1][g0][g1][b0][b1]
+
+ punpckhbw mm6,mm5 ;mm0=[ a0 ][ a1 ][ r0 ][ r1 ]
+ movq mm3,mm2 ;mm3=[a2][a3][r2][r3][g2][g3][b2][b3]
+
+ pmaddwd mm6,[edx+16] ;mm0=[a0*f0+a1*f1][r0*f0+r1*f1]
+ punpcklbw mm7,mm5 ;mm1=[ g0 ][ g1 ][ b0 ][ b1 ]
+
+ pmaddwd mm7,[edx+16] ;mm1=[g0*f0+g1*f1][b0*f0+b1*f1]
+ punpckhbw mm2,mm5 ;mm2=[ a2 ][ a3 ][ r0 ][ r1 ]
+
+ pmaddwd mm2,[edx+24] ;mm2=[a2*f2+a3*f3][r2*f2+r3*f3]
+ punpcklbw mm3,mm5 ;mm3=[ g2 ][ g3 ][ b2 ][ b3 ]
+
+ pmaddwd mm3,[edx+24] ;mm3=[g2*f2+g3*f3][b2*f2+b3*f3]
+ paddd mm0,mm6 ;accumulate alpha/red (pixels 0/1)
+
+ paddd mm1,mm7 ;accumulate green/blue (pixels 0/1)
+ paddd mm0,mm2 ;accumulate alpha/red (pixels 0/1)
+
+ paddd mm1,mm3 ;accumulate green/blue (pixels 0/1)
+
+
+ psrad mm0,14
+ psrad mm1,14
+
+ packssdw mm1,mm0
+ mov esi,eax
+
+ packuswb mm1,mm1
+ mov edx,eax
+
+ movd dword [edi+ebp],mm1
+ add ebp,4
+ jne .pixelloop_8coeff
+
+ pop ebx
+ pop edi
+ pop esi
+ pop ebp
+
+ ret
+
+
+
+
+
+
+
+;-------------------------------------------------------------------------
+;
+; long resize_table_col_MMX(Pixel *out, Pixel **in_table, int *filter, int filter_width, PixDim w, long frac);
+
+ global _vdasm_resize_table_col_MMX
+_vdasm_resize_table_col_MMX:
+ push ebp
+ push esi
+ push edi
+ push ebx
+
+ mov edx,[esp + 12 + 16]
+ mov eax,[esp + 24 + 16]
+ shl eax,2
+ imul eax,[esp + 16 + 16]
+ add edx,eax
+ mov [esp + 12 + 16], edx ;[esp+12+28] = filter pointer
+
+ mov ebp,[esp + 20 + 16] ;ebp = pixel counter
+ mov edi,[esp + 4 + 16] ;edi = destination pointer
+
+ pxor mm5,mm5
+
+ cmp dword [esp+16+16], 4
+ jz .accel_4coeff
+ cmp dword [esp+16+16], 6
+ jz .accel_6coeff
+
+ mov ecx,[esp + 16 + 16]
+ shr ecx,1
+ mov [esp + 16 + 16],ecx ;ecx = filter pair count
+
+ xor ebx,ebx ;ebx = source offset
+
+ mov ecx,[esp + 16 + 16] ;ecx = filter width counter
+.pixelloop:
+ mov eax,[esp + 8 + 16] ;esi = row pointer table
+ movq mm6,[MMX_roundval]
+ movq mm7,mm6
+ pxor mm0,mm0
+ pxor mm1,mm1
+.coeffloop:
+ mov esi,[eax]
+ paddd mm6,mm0
+
+ movd mm0,dword [esi+ebx] ;mm0 = [0][0][0][0][x0][r0][g0][b0]
+ paddd mm7,mm1
+
+ mov esi,[eax+4]
+ add eax,8
+
+ movd mm1,dword [esi+ebx] ;mm1 = [0][0][0][0][x1][r1][g1][b1]
+ punpcklbw mm0,mm1 ;mm0 = [x0][x1][r0][r1][g0][g1][b0][b1]
+
+ movq mm1,mm0
+ punpcklbw mm0,mm5 ;mm0 = [g1][g0][b1][b0]
+
+ pmaddwd mm0,[edx]
+ punpckhbw mm1,mm5 ;mm1 = [x1][x0][r1][r0]
+
+ pmaddwd mm1,[edx]
+ add edx,8
+
+ sub ecx,1
+ jne .coeffloop
+
+ paddd mm6,mm0
+ paddd mm7,mm1
+
+ psrad mm6,14
+ psrad mm7,14
+ add edi,4
+ packssdw mm6,mm7
+ add ebx,4
+ packuswb mm6,mm6
+ sub ebp,1
+
+ mov ecx,[esp + 16 + 16] ;ecx = filter width counter
+ mov edx,[esp + 12 + 16] ;edx = filter bank pointer
+
+ movd dword [edi-4],mm6
+ jne .pixelloop
+
+.xit:
+ pop ebx
+ pop edi
+ pop esi
+ pop ebp
+ ret
+
+
+
+.accel_4coeff:
+ movq mm2,[edx]
+ movq mm3,[edx+8]
+
+ mov esi,[esp+8+16] ;esi = row pointer table
+ mov eax,[esi]
+ add ebp,ebp
+ mov ebx,[esi+4]
+ add ebp,ebp
+ mov ecx,[esi+8]
+ mov esi,[esi+12]
+ add eax,ebp
+ add ebx,ebp
+ add ecx,ebp
+ add esi,ebp
+ add edi,ebp
+ neg ebp
+
+ ;EAX source 0
+ ;EBX source 1
+ ;ECX source 2
+ ;ESI source 3
+ ;EDI destination
+ ;EBP counter
+
+ movq mm4,[MMX_roundval]
+
+.pixelloop4:
+ movd mm6,dword [eax+ebp] ;mm0 = [0][0][0][0][x0][r0][g0][b0]
+
+ punpcklbw mm6,[ebx+ebp] ;mm0 = [x0][x1][r0][r1][g0][g1][b0][b1]
+
+ movq mm7,mm6
+ punpcklbw mm6,mm5 ;mm0 = [g1][g0][b1][b0]
+
+ pmaddwd mm6,mm2
+ punpckhbw mm7,mm5 ;mm1 = [x1][x0][r1][r0]
+
+ movd mm0,dword [ecx+ebp] ;mm0 = [0][0][0][0][x0][r0][g0][b0]
+ pmaddwd mm7,mm2
+
+ punpcklbw mm0,[esi+ebp] ;mm0 = [x0][x1][r0][r1][g0][g1][b0][b1]
+ paddd mm6,mm4
+
+ movq mm1,mm0
+ punpcklbw mm0,mm5 ;mm0 = [g1][g0][b1][b0]
+
+ pmaddwd mm0,mm3
+ punpckhbw mm1,mm5 ;mm1 = [x1][x0][r1][r0]
+
+ pmaddwd mm1,mm3
+ paddd mm7,mm4
+
+ paddd mm6,mm0
+ paddd mm7,mm1
+
+ psrad mm6,14
+ psrad mm7,14
+ packssdw mm6,mm7
+ packuswb mm6,mm6
+
+ movd dword [edi+ebp],mm6
+
+ add ebp,4
+ jne .pixelloop4
+ jmp .xit
+
+.accel_6coeff:
+ movq mm2,[edx]
+ movq mm3,[edx+8]
+ movq mm4,[edx+16]
+
+ push 0
+ push dword [fs:0]
+ mov dword [fs:0],esp
+
+ mov esp,[esp+8+24] ;esp = row pointer table
+ mov eax,[esp]
+ add ebp,ebp
+ mov ebx,[esp+4]
+ add ebp,ebp
+ mov ecx,[esp+8]
+ mov edx,[esp+12]
+ mov esi,[esp+16]
+ mov esp,[esp+20]
+ add eax,ebp
+ add ebx,ebp
+ add ecx,ebp
+ add edx,ebp
+ add esi,ebp
+ add edi,ebp
+ add esp,ebp
+ neg ebp
+
+ ;EAX source 0
+ ;EBX source 1
+ ;ECX source 2
+ ;EDX source 3
+ ;ESI source 4
+ ;EDI destination
+ ;ESP source 5
+ ;EBP counter
+
+.pixelloop6:
+ movd mm6,dword [eax+ebp] ;mm0 = [0][0][0][0][x0][r0][g0][b0]
+
+ punpcklbw mm6,[ebx+ebp] ;mm0 = [x0][x1][r0][r1][g0][g1][b0][b1]
+
+ movq mm7,mm6
+ punpcklbw mm6,mm5 ;mm0 = [g1][g0][b1][b0]
+
+ movd mm0,dword [ecx+ebp] ;mm0 = [0][0][0][0][x0][r0][g0][b0]
+ punpckhbw mm7,mm5 ;mm1 = [x1][x0][r1][r0]
+
+ punpcklbw mm0,[edx+ebp] ;mm0 = [x0][x1][r0][r1][g0][g1][b0][b1]
+ pmaddwd mm6,mm2
+
+ movq mm1,mm0
+ punpcklbw mm0,mm5 ;mm0 = [g1][g0][b1][b0]
+
+ pmaddwd mm7,mm2
+ punpckhbw mm1,mm5 ;mm1 = [x1][x0][r1][r0]
+
+ paddd mm6,[MMX_roundval]
+ pmaddwd mm0,mm3
+
+ paddd mm7,[MMX_roundval]
+ pmaddwd mm1,mm3
+
+ paddd mm6,mm0
+
+ movd mm0,dword [esi+ebp] ;mm0 = [0][0][0][0][x0][r0][g0][b0]
+ paddd mm7,mm1
+
+ punpcklbw mm0,[esp+ebp] ;mm0 = [x0][x1][r0][r1][g0][g1][b0][b1]
+ movq mm1,mm0
+ punpcklbw mm0,mm5 ;mm0 = [g1][g0][b1][b0]
+ punpckhbw mm1,mm5 ;mm1 = [x1][x0][r1][r0]
+ pmaddwd mm0,mm4
+ pmaddwd mm1,mm4
+ paddd mm6,mm0
+ paddd mm7,mm1
+
+ psrad mm6,14
+ psrad mm7,14
+ packssdw mm6,mm7
+ packuswb mm6,mm6
+
+ movd dword [edi+ebp],mm6
+
+ add ebp,4
+ jne .pixelloop6
+
+ mov esp, dword [fs:0]
+ pop dword [fs:0]
+ pop eax
+
+ jmp .xit
+
+
+ global _vdasm_resize_table_col_SSE2
+_vdasm_resize_table_col_SSE2:
+ push ebp
+ push esi
+ push edi
+ push ebx
+
+ mov edx,[esp+12+16]
+ mov eax,[esp+24+16]
+ shl eax,2
+ imul eax,[esp+16+16]
+ add edx,eax
+ mov [esp+12+16], edx ;[esp+12+16] = filter pointer
+
+ mov ebp,[esp+20+16] ;ebp = pixel counter
+ mov edi,[esp+4+16] ;edi = destination pointer
+
+ pxor xmm7, xmm7
+ movdqa xmm6, [MMX_roundval]
+
+ cmp dword [esp+16+16], 4
+ jz .accel_4coeff
+ cmp dword [esp+16+16], 6
+ jz .accel_6coeff
+
+ mov ecx,[esp+16+16]
+ shr ecx,1
+ mov [esp+16+16],ecx ;ecx = filter pair count
+
+ xor ebx,ebx ;ebx = source offset
+
+ mov ecx,[esp+16+16] ;ecx = filter width counter
+.pixelloop:
+ mov eax, [esp+8+16] ;esi = row pointer table
+ movdqa xmm4, xmm6
+.coeffloop:
+ mov esi,[eax]
+
+ movd xmm0, dword [esi+ebx]
+
+ mov esi,[eax+4]
+ add eax,8
+
+ movd xmm1, dword [esi+ebx]
+ punpcklbw xmm0, xmm1
+
+ punpcklbw xmm0, xmm7
+
+ movq xmm2, qword [edx]
+ pshufd xmm2, xmm2, 01000100b
+
+ pmaddwd xmm0, xmm2
+
+ paddd xmm4, xmm0
+
+ add edx,8
+
+ sub ecx,1
+ jne .coeffloop
+
+ psrad xmm4,14
+ add edi,4
+ packssdw xmm4,xmm4
+ add ebx,4
+ packuswb xmm4,xmm4
+ sub ebp,1
+
+ mov ecx,[esp+16+16] ;ecx = filter width counter
+ mov edx,[esp+12+16] ;edx = filter bank pointer
+
+ movd dword [edi-4],xmm4
+ jne .pixelloop
+
+.xit:
+ pop ebx
+ pop edi
+ pop esi
+ pop ebp
+ ret
+
+.accel_4coeff:
+ shl ebp, 2
+ mov eax, [esp+8+16] ;eax = row pointer table
+ mov esi, [eax+12]
+ mov ecx, [eax+8]
+ mov ebx, [eax+4]
+ mov eax, [eax]
+ lea edi, [edi+ebp-4]
+ neg ebp
+
+ ;registers:
+ ;
+ ;EAX source 0
+ ;EBX source 1
+ ;ECX source 2
+ ;ESI source 3
+ ;EDI destination
+ ;EBP counter
+ ;
+ movq xmm4, qword [edx] ;xmm4 = coeff 0/1
+ movq xmm5, qword [edx+8] ;xmm5 = coeff 2/3
+ punpcklqdq xmm4, xmm4
+ punpcklqdq xmm5, xmm5
+
+ add ebp, 4
+ jz .oddpixel_4coeff
+
+.pixelloop_4coeff_dualpel:
+ movq xmm0, qword [eax]
+ movq xmm1, qword [ebx]
+ movq xmm2, qword [ecx]
+ movq xmm3, qword [esi]
+ add eax,8
+ add ebx,8
+ add ecx,8
+ add esi,8
+ punpcklbw xmm0, xmm1
+ punpcklbw xmm2, xmm3
+ movdqa xmm1, xmm0
+ movdqa xmm3, xmm2
+ punpcklbw xmm0, xmm7
+ punpckhbw xmm1, xmm7
+ punpcklbw xmm2, xmm7
+ punpckhbw xmm3, xmm7
+ pmaddwd xmm0, xmm4
+ pmaddwd xmm1, xmm4
+ pmaddwd xmm2, xmm5
+ pmaddwd xmm3, xmm5
+ paddd xmm0, xmm2
+ paddd xmm1, xmm3
+ paddd xmm0, xmm6
+ paddd xmm1, xmm6
+ psrad xmm0, 14
+ psrad xmm1, 14
+ packssdw xmm0, xmm1
+ packuswb xmm0, xmm0
+ movq qword [edi+ebp],xmm0
+ add ebp, 8
+ jae .pixelloop_4coeff_dualpel
+ jnz .xit
+
+.oddpixel_4coeff:
+ movd xmm0, dword [eax]
+ movd xmm1, dword [ebx]
+ movd xmm2, dword [ecx]
+ movd xmm3, dword [esi]
+ punpcklbw xmm0, xmm1
+ punpcklbw xmm2, xmm3
+ punpcklbw xmm0, xmm7
+ punpcklbw xmm2, xmm7
+ pmaddwd xmm0, xmm4
+ pmaddwd xmm2, xmm5
+ paddd xmm0, xmm2
+ paddd xmm0, xmm6
+ psrad xmm0, 14
+ packssdw xmm0, xmm0
+ packuswb xmm0, xmm0
+ movd dword [edi],xmm0
+ jmp .xit
+
+
+.accel_6coeff:
+ movq xmm4, qword [edx] ;xmm4 = coeff 0/1
+ movq xmm5, qword [edx+8] ;xmm5 = coeff 2/3
+ movq xmm6, qword [edx+16] ;xmm5 = coeff 4/5
+ punpcklqdq xmm4, xmm4
+ punpcklqdq xmm5, xmm5
+ punpcklqdq xmm6, xmm6
+
+ push 0
+ push dword [fs:0]
+ mov dword [fs:0],esp
+
+ shl ebp, 2
+ mov eax, [esp+8+24] ;eax = row pointer table
+ mov esp, [eax+20]
+ mov esi, [eax+16]
+ mov edx, [eax+12]
+ mov ecx, [eax+8]
+ mov ebx, [eax+4]
+ mov eax, [eax]
+ lea edi, [edi+ebp-4]
+ neg ebp
+
+ ;registers:
+ ;
+ ;EAX source 0
+ ;EBX source 1
+ ;ECX source 2
+ ;EDX source 3
+ ;ESI source 4
+ ;EDI destination
+ ;ESP source 5
+ ;EBP counter
+ ;
+
+ add ebp, 4
+ jz .oddpixel_6coeff
+
+.pixelloop_6coeff_dualpel:
+ movq xmm0, qword [eax]
+ movq xmm1, qword [ebx]
+ movq xmm2, qword [ecx]
+ movq xmm3, qword [edx]
+ add eax,8
+ add ebx,8
+ add ecx,8
+ add edx,8
+ punpcklbw xmm0, xmm1
+ punpcklbw xmm2, xmm3
+ movdqa xmm1, xmm0
+ movdqa xmm3, xmm2
+ punpcklbw xmm0, xmm7
+ punpckhbw xmm1, xmm7
+ punpcklbw xmm2, xmm7
+ punpckhbw xmm3, xmm7
+ pmaddwd xmm0, xmm4
+ pmaddwd xmm1, xmm4
+ pmaddwd xmm2, xmm5
+ pmaddwd xmm3, xmm5
+ paddd xmm0, xmm2
+ paddd xmm1, xmm3
+
+ movq xmm2, qword [esi]
+ movq xmm3, qword [esp]
+ add esi, 8
+ add esp, 8
+ punpcklbw xmm2, xmm3
+ movdqa xmm3, xmm2
+ punpcklbw xmm2, xmm7
+ punpckhbw xmm3, xmm7
+ pmaddwd xmm2, xmm6
+ pmaddwd xmm3, xmm6
+ paddd xmm0, xmm2
+ paddd xmm1, xmm3
+ paddd xmm0, [MMX_roundval]
+ paddd xmm1, [MMX_roundval]
+ psrad xmm0, 14
+ psrad xmm1, 14
+ packssdw xmm0, xmm1
+ packuswb xmm0, xmm0
+ movq qword [edi+ebp],xmm0
+ add ebp, 8
+ jae .pixelloop_6coeff_dualpel
+ jnz .xit_6coeff
+
+.oddpixel_6coeff:
+ movd xmm0, dword [eax]
+ movd xmm1, dword [ebx]
+ movd xmm2, dword [ecx]
+ movd xmm3, dword [edx]
+ punpcklbw xmm0, xmm1
+ punpcklbw xmm2, xmm3
+ movd xmm1, dword [esi]
+ movd xmm3, dword [esp]
+ punpcklbw xmm0, xmm7
+ punpcklbw xmm2, xmm7
+ pmaddwd xmm0, xmm4
+ punpcklbw xmm1, xmm3
+ pmaddwd xmm2, xmm5
+ punpcklbw xmm1, xmm7
+ pmaddwd xmm1, xmm6
+ paddd xmm0, xmm2
+ paddd xmm1, [MMX_roundval]
+ paddd xmm0, xmm1
+ psrad xmm0, 14
+ packssdw xmm0, xmm0
+ packuswb xmm0, xmm0
+ movd dword [edi],xmm0
+
+.xit_6coeff:
+ mov esp, dword [fs:0]
+ pop dword [fs:0]
+ pop eax
+ jmp .xit
+
+
+ end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_resample_sse41.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_resample_sse41.asm
new file mode 100644
index 000000000..cf7332cb2
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_resample_sse41.asm
@@ -0,0 +1,358 @@
+ segment .rdata, align=16
+
+round dq 0000000000002000h
+colround dq 0000200000002000h
+
+ segment .text
+
+ global _vdasm_resize_table_row_8_k8_4x_SSE41
+_vdasm_resize_table_row_8_k8_4x_SSE41:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ movq xmm6, [round]
+ pshufd xmm6, xmm6, 0
+
+ mov ebp, [esp + 4 + 16] ;ebp = dst
+ mov esi, [esp + 12 + 16] ;esi = width
+ mov edi, [esp + 16 + 16] ;edi = kernel
+.yloop:
+ ;eax = temp
+ ;ebx = temp
+ ;ecx = temp
+ ;edx = temp
+ ;esi = horiz counter
+ ;edi = filter list
+ ;ebp = destination
+
+ mov eax, [edi+0]
+ mov ebx, [edi+4]
+ mov ecx, [edi+8]
+ mov edx, [esp+8+16]
+ add eax, edx
+ add ebx, edx
+ add ecx, edx
+ add edx, [edi+12]
+
+ pmovzxbw xmm0, [eax]
+ pmaddwd xmm0, [edi+10h]
+ pmovzxbw xmm1, [ebx]
+ pmaddwd xmm1, [edi+20h]
+ pmovzxbw xmm2, [ecx]
+ pmaddwd xmm2, [edi+30h]
+ pmovzxbw xmm3, [edx]
+ pmaddwd xmm3, [edi+40h]
+ add edi, 50h
+ phaddd xmm0, xmm1
+ phaddd xmm2, xmm3
+ phaddd xmm0, xmm2
+ paddd xmm0, xmm6
+ psrad xmm0, 14
+ packssdw xmm0, xmm0
+ packuswb xmm0, xmm0
+ movd [ebp], xmm0
+
+ add ebp, 4
+ sub esi, 1
+ jne .yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ global _vdasm_resize_table_row_8_k16_4x_SSE41
+_vdasm_resize_table_row_8_k16_4x_SSE41:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ movq xmm6, [round]
+ pshufd xmm6, xmm6, 0
+
+ mov ebp, [esp + 4 + 16] ;ebp = dst
+ mov esi, [esp + 12 + 16] ;esi = width
+ mov edi, [esp + 16 + 16] ;edi = kernel
+.yloop:
+ ;eax = temp
+ ;ebx = temp
+ ;ecx = temp
+ ;edx = temp
+ ;esi = horiz counter
+ ;edi = filter list
+ ;ebp = destination
+
+ mov eax, [edi+0]
+ mov ebx, [edi+4]
+ mov ecx, [edi+8]
+ mov edx, [esp+8+16]
+ add eax, edx
+ add ebx, edx
+ add ecx, edx
+ add edx, [edi+12]
+
+ pmovzxbw xmm0, [eax]
+ pmaddwd xmm0, [edi+10h]
+ pmovzxbw xmm1, [ebx]
+ pmaddwd xmm1, [edi+20h]
+ pmovzxbw xmm2, [ecx]
+ pmaddwd xmm2, [edi+30h]
+ pmovzxbw xmm3, [edx]
+ pmaddwd xmm3, [edi+40h]
+ pmovzxbw xmm4, [eax+8]
+ pmaddwd xmm4, [edi+50h]
+ pmovzxbw xmm5, [ebx+8]
+ pmaddwd xmm5, [edi+60h]
+ paddd xmm0, xmm4
+ pmovzxbw xmm4, [ecx+8]
+ pmaddwd xmm4, [edi+70h]
+ paddd xmm1, xmm5
+ pmovzxbw xmm5, [edx+8]
+ pmaddwd xmm5, [edi+80h]
+ paddd xmm2, xmm4
+ paddd xmm3, xmm5
+ add edi, 90h
+ phaddd xmm0, xmm1
+ phaddd xmm2, xmm3
+ phaddd xmm0, xmm2
+ paddd xmm0, xmm6
+ psrad xmm0, 14
+ packssdw xmm0, xmm0
+ packuswb xmm0, xmm0
+ movd [ebp], xmm0
+
+ add ebp, 4
+ sub esi, 1
+ jne .yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ global _vdasm_resize_table_row_8_SSE41
+_vdasm_resize_table_row_8_SSE41:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ pxor xmm7, xmm7
+ movq xmm6, [round]
+
+ mov edi, [esp + 4 + 16] ;edi = dst
+ mov ebx, [esp + 8 + 16] ;ebx = src
+ mov ebp, [esp + 12 + 16] ;ebp = width
+ mov edx, [esp + 16 + 16] ;edx = kernel
+.yloop:
+ ;eax = temp
+ ;ebx = source base address
+ ;ecx = (temp) source
+ ;edx = filter list
+ ;esi = (temp) kernel width
+ ;edi = destination
+ ;ebp = horiz counter
+
+ mov eax, [edx]
+ add edx, 16
+ lea ecx, [ebx + eax]
+ mov esi, [esp + 20 + 16] ;esi = kernel width
+
+ movq xmm2, xmm6
+.xloop:
+ pmovzxbw xmm0, [ecx]
+ add ecx, 8
+ pmaddwd xmm0, [edx]
+ paddd xmm2, xmm0
+ add edx, 16
+ sub esi, 8
+ jne .xloop
+
+ phaddd xmm2, xmm2
+ phaddd xmm2, xmm2
+ psrad xmm2, 14
+ packssdw xmm2, xmm2
+ packuswb xmm2, xmm2
+ movd eax, xmm2
+ mov [edi], al
+ add edi, 1
+ sub ebp, 1
+ jne .yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+
+ global _vdasm_resize_table_col_8_k2_SSE41
+_vdasm_resize_table_col_8_k2_SSE41:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ movq xmm6, [colround]
+ pshufd xmm6, xmm6, 0
+
+ mov esi, [esp + 4 + 16] ;esi = dst
+ mov edi, [esp + 16 + 16] ;edi = kernel
+ mov ebp, [esp + 12 + 16] ;ebp = width
+
+ movq xmm7, [edi]
+ pshufd xmm7, xmm7, 0
+
+ mov edx, [esp + 8 + 16] ;ebx = srcs
+ mov eax, [edx+0]
+ mov ebx, [edx+4]
+ add eax, ebp
+ add ebx, ebp
+ neg ebp
+
+.yloop:
+ ;eax = row0
+ ;ebx = row1
+ ;ecx =
+ ;edx =
+ ;edi = kernel
+ ;esi = dest
+ ;ebp = width counter
+
+ movd xmm0, [eax+ebp]
+ movd xmm2, [ebx+ebp]
+ punpcklbw xmm0, xmm2
+ pmovzxbw xmm0, xmm0
+ pmaddwd xmm0, xmm7
+
+ paddd xmm0, xmm6
+
+ psrad xmm0, 14
+ packssdw xmm0, xmm0
+ packuswb xmm0, xmm0
+ movd [esi], xmm0
+ add esi, 4
+ add ebp, 4
+ jnz .yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ global _vdasm_resize_table_col_8_k4_SSE41
+_vdasm_resize_table_col_8_k4_SSE41:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ movq xmm7, [colround]
+ pshufd xmm7, xmm7, 0
+
+ mov esi, [esp + 4 + 16] ;esi = dst
+ mov edi, [esp + 16 + 16] ;edi = kernel
+
+ movdqu xmm6, [edi]
+ pshufd xmm5, xmm6, 0
+ pshufd xmm6, xmm6, 0aah
+
+ mov edx, [esp + 8 + 16] ;ebx = srcs
+ mov ebp, [esp + 12 + 16]
+ mov eax, [edx+0]
+ mov ebx, [edx+4]
+ mov ecx, [edx+8]
+ mov edx, [edx+12]
+ lea eax, [eax+ebp-4]
+ lea ebx, [ebx+ebp-4]
+ lea ecx, [ecx+ebp-4]
+ lea edx, [edx+ebp-4]
+ lea esi, [esi+ebp-4]
+ neg ebp
+ add ebp,4
+ jz .odd
+.yloop:
+ ;eax = row0
+ ;ebx = row1
+ ;ecx = row2
+ ;edx = row3
+ ;edi = kernel
+ ;esi = dest
+ ;ebp = width counter
+
+ movd xmm0, [eax+ebp]
+ movd xmm1, [ebx+ebp]
+ punpcklbw xmm0, xmm1
+
+ movd xmm1, [ecx+ebp]
+ movd xmm2, [edx+ebp]
+ punpcklbw xmm1, xmm2
+
+ movd xmm2, [eax+ebp+4]
+ movd xmm3, [ebx+ebp+4]
+ punpcklbw xmm2, xmm3
+
+ movd xmm3, [ecx+ebp+4]
+ movd xmm4, [edx+ebp+4]
+ punpcklbw xmm3, xmm4
+
+ pmovzxbw xmm0, xmm0
+ pmaddwd xmm0, xmm5
+
+ pmovzxbw xmm1, xmm1
+ pmaddwd xmm1, xmm6
+
+ pmovzxbw xmm2, xmm2
+ pmaddwd xmm2, xmm5
+
+ pmovzxbw xmm3, xmm3
+ pmaddwd xmm3, xmm6
+
+ paddd xmm0, xmm1
+ paddd xmm2, xmm3
+
+ paddd xmm0, xmm7
+ paddd xmm2, xmm7
+
+ psrad xmm0, 14
+ psrad xmm2, 14
+
+ packssdw xmm0, xmm2
+ packuswb xmm0, xmm0
+ movq [esi+ebp], xmm0
+ add ebp, 8
+ js .yloop
+ jnz .noodd
+
+.odd:
+ movd xmm0, [eax]
+ movd xmm1, [ebx]
+ movd xmm2, [ecx]
+ movd xmm3, [edx]
+ punpcklbw xmm0, xmm1
+ punpcklbw xmm2, xmm3
+ pmovzxbw xmm0, xmm0
+ pmovzxbw xmm2, xmm2
+ pmaddwd xmm0, xmm5
+ pmaddwd xmm2, xmm6
+ paddd xmm0, xmm2
+ paddd xmm0, xmm7
+ psrad xmm0, 14
+ packssdw xmm0, xmm0
+ packuswb xmm0, xmm0
+ movd [esi], xmm0
+.noodd:
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_spanutils_isse.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_spanutils_isse.asm
new file mode 100644
index 000000000..3fe7cedbc
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_spanutils_isse.asm
@@ -0,0 +1,193 @@
+ section .rdata, rdata, align=16
+
+xfefefefefefefefe dq 0fefefefefefefefeh
+xe0e0e0e0e0e0e0e0 dq 0e0e0e0e0e0e0e0e0h
+x0002000200020002 dq 00002000200020002h
+
+ section .text
+
+;==============================================================================
+ global _vdasm_horiz_expand2x_coaligned_ISSE
+_vdasm_horiz_expand2x_coaligned_ISSE:
+ mov ecx, [esp+8]
+ mov edx, [esp+4]
+ mov eax, [esp+12]
+.xloop:
+ movq mm0, [ecx]
+ movq mm1, mm0
+ pavgb mm0, [ecx+1]
+ movq mm2, mm1
+ punpcklbw mm1, mm0
+ punpckhbw mm2, mm0
+
+ movq [edx], mm1
+ movq [edx+8], mm2
+ add edx, 16
+ add ecx, 8
+
+ sub eax, 16
+ jne .xloop
+ ret
+
+;==============================================================================
+ global _vdasm_vert_average_13_ISSE
+_vdasm_vert_average_13_ISSE:
+ push ebx
+ mov ebx, [esp+12+4]
+ mov ecx, [esp+8+4]
+ mov edx, [esp+4+4]
+ mov eax, [esp+16+4]
+
+ add ebx, eax
+ add ecx, eax
+ add edx, eax
+ neg eax
+
+ pcmpeqb mm7, mm7
+.xloop:
+ movq mm0, [ebx+eax]
+ movq mm1, [ecx+eax]
+ movq mm2, mm0
+
+ movq mm3, [ebx+eax+8]
+ pxor mm0, mm7
+ pxor mm1, mm7
+
+ movq mm4, [ecx+eax+8]
+ movq mm5, mm3
+ pxor mm3, mm7
+
+ pxor mm4, mm7
+ pavgb mm0, mm1
+ pavgb mm3, mm4
+
+ pxor mm0, mm7
+ pxor mm3, mm7
+ pavgb mm0, mm2
+
+ movq [edx+eax], mm0
+ pavgb mm3, mm5
+
+ movq [edx+eax+8], mm3
+ add eax, 16
+ jne .xloop
+
+ pop ebx
+ ret
+
+;==============================================================================
+ global _vdasm_vert_average_17_ISSE
+_vdasm_vert_average_17_ISSE:
+ push ebx
+ mov ebx, [esp+12+4]
+ mov ecx, [esp+8+4]
+ mov edx, [esp+4+4]
+ mov eax, [esp+16+4]
+
+ add ebx, eax
+ add ecx, eax
+ add edx, eax
+ neg eax
+
+ ;r = avgup(avgdown(avgdown(a, b), a), a)
+ ; = pavgb(~pavgb(pavgb(~a, ~b), ~a), a)
+
+ pcmpeqb mm7, mm7
+.xloop:
+ movq mm0, [ecx+eax]
+ movq mm1, [ebx+eax]
+ movq mm2, mm0
+ pxor mm0, mm7 ;~a
+ pxor mm1, mm7 ;~b
+ pavgb mm1, mm0 ;pavgb(~a, ~b) = ~avgdown(a, b)
+ pavgb mm1, mm0 ;pavgb(~avgdown(a, b), ~a) = ~avgdown(avgdown(a, b), a)
+ pxor mm1, mm7 ;avgdown(avgdown(a, b), a)
+ pavgb mm1, mm2 ;pavgb(avgdown(avgdown(a, b), a), a) = round((7*a + b)/8)
+ movq [edx+eax], mm1
+
+ add eax, 8
+ jne .xloop
+
+ pop ebx
+ ret
+
+;==============================================================================
+ global _vdasm_vert_average_35_ISSE
+_vdasm_vert_average_35_ISSE:
+ push ebx
+ mov ebx, [esp+12+4]
+ mov ecx, [esp+8+4]
+ mov edx, [esp+4+4]
+ mov eax, [esp+16+4]
+
+ add ebx, eax
+ add ecx, eax
+ add edx, eax
+ neg eax
+
+ ;r = avgup(avgdown(avgdown(a, b), b), a)
+ ; = pavgb(~pavgb(pavgb(~a, ~b), ~b), a)
+
+ pcmpeqb mm7, mm7
+.xloop:
+ movq mm0, [ecx+eax]
+ movq mm1, [ebx+eax]
+ movq mm2, mm0
+ pxor mm0, mm7 ;~a
+ pxor mm1, mm7 ;~b
+ pavgb mm0, mm1 ;avgup(~a, ~b) = ~avgdown(a, b)
+ pavgb mm0, mm1 ;avgup(~avgdown(a, b), ~b) = ~avgdown(avgdown(a, b), b)
+ pxor mm0, mm7 ;avgdown(avgdown(a, b), b)
+ pavgb mm0, mm2 ;avgup(avgdown(avgdown(a, b), b), a) = round((5*a + 3*b) / 8)
+ movq [edx+eax], mm0
+
+ add eax, 8
+ jne .xloop
+
+ pop ebx
+ ret
+
+;==============================================================================
+ global _vdasm_horiz_expand4x_coaligned_MMX
+_vdasm_horiz_expand4x_coaligned_MMX:
+ mov edx, [esp+4]
+ mov ecx, [esp+8]
+ mov eax, [esp+12]
+ movq mm6, qword [x0002000200020002]
+ pxor mm7, mm7
+.xloop:
+ movd mm0, [ecx]
+ movd mm1, [ecx+1]
+ add ecx, 4
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ psubw mm1, mm0 ;x1
+ movq mm2, mm1
+ paddw mm1, mm6 ;x1 + 2
+ movq mm3, mm1
+ paddw mm2, mm2 ;x2
+ paddw mm3, mm2 ;x3 + 2
+ paddw mm2, mm6 ;x2 + 2
+ psraw mm1, 2 ;x1/4
+ psraw mm2, 2 ;x2/4
+ psraw mm3, 2 ;x3/4
+ paddw mm1, mm0
+ paddw mm2, mm0
+ paddw mm3, mm0
+ movd mm0, [ecx-4]
+ packuswb mm1, mm1
+ packuswb mm2, mm2
+ packuswb mm3, mm3
+ punpcklbw mm0, mm1
+ punpcklbw mm2, mm3
+ movq mm1, mm0
+ punpcklwd mm0, mm2
+ punpckhwd mm1, mm2
+
+ movq [edx], mm0
+ movq [edx+8], mm1
+ add edx, 16
+ sub eax, 1
+ jne .xloop
+
+ ret
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_stretchrgb_mmx.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_stretchrgb_mmx.asm
new file mode 100644
index 000000000..3db442fa2
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_stretchrgb_mmx.asm
@@ -0,0 +1,326 @@
+ segment .rdata, align=16
+
+x0020w dq 00020002000200020h
+rb_mask_555 dq 07c1f7c1f7c1f7c1fh
+g_mask_555 dq 003e003e003e003e0h
+rb_mask_888 dq 000ff00ff00ff00ffh
+g_mask_888 dq 00000ff000000ff00h
+
+ segment .text
+
+ struc VDPixmapReferenceStretchBltBilinearParameters
+.dst resd 1
+.src resd 1
+.u resd 1
+.uinc resd 1
+.dudx resd 1
+
+.xprepos resd 1
+.xpostpos resd 1
+.xprecopy resd 1
+.xpostcopy resd 1
+.xmidsize resd 1
+ endstruc
+
+
+
+ global _vdasm_stretchbltV_XRGB1555_to_XRGB1555_MMX
+_vdasm_stretchbltV_XRGB1555_to_XRGB1555_MMX:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov eax, [esp+20+16]
+ and eax, 0f8000000h
+ mov ebx, [esp+8+16]
+ mov ecx, [esp+12+16]
+ jz .noreverse
+ xchg ebx, ecx
+ js .noreverse
+ neg eax
+ xchg ebx, ecx
+.noreverse:
+ shr eax, 16
+ mov [esp+20+16], eax
+ mov edx, [esp+4+16]
+ mov eax, [esp+16+16]
+ add eax, eax
+ lea ebx, [ebx+eax-6]
+ lea ecx, [ecx+eax-6]
+ lea edx, [edx+eax-6]
+ neg eax
+
+ movd mm4, dword [esp+20+16]
+ punpcklwd mm4, mm4
+ punpckldq mm4, mm4
+
+ movq mm6, [rb_mask_555]
+ movq mm7, [g_mask_555]
+
+.xstart:
+ add eax, 6
+ jbe .doodd
+.xloop:
+ movq mm0, [ebx+eax]
+ movq mm1, [ecx+eax]
+ movq mm2, mm7
+ movq mm3, mm7
+
+ pand mm2, mm0
+ pand mm3, mm1
+ pand mm0, mm6
+ pand mm1, mm6
+
+ psubw mm3, mm2
+ psubw mm1, mm0
+
+ pmulhw mm3, mm4
+ pmulhw mm1, mm4
+
+ psubw mm0, mm1
+ psubw mm2, mm3
+
+ pand mm0, mm6
+ pand mm2, mm7
+
+ paddw mm0, mm2
+
+ movq [edx+eax], mm0
+ add eax, 8
+ jnc .xloop
+
+.doodd:
+ sub eax, 6
+ jz .noodd
+.odd:
+ movzx esi, word [ebx+eax+6]
+ movd mm0, esi
+ movzx esi, word [ecx+eax+6]
+ movd mm1, esi
+ movq mm2, mm7
+ movq mm3, mm7
+
+ pand mm2, mm0
+ pand mm3, mm1
+ pand mm0, mm6
+ pand mm1, mm6
+
+ psubw mm3, mm2
+ psubw mm1, mm0
+
+ pmulhw mm3, mm4
+ pmulhw mm1, mm4
+
+ psubw mm0, mm1
+ psubw mm2, mm3
+
+ pand mm0, mm6
+ pand mm2, mm7
+
+ paddw mm0, mm2
+
+ movd esi, mm0
+ mov [edx+eax+6], si
+ add eax,2
+ jne .odd
+
+.noodd:
+ emms
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+
+ global _vdasm_stretchbltH_XRGB8888_to_XRGB8888_MMX
+_vdasm_stretchbltH_XRGB8888_to_XRGB8888_MMX:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov edx, [esp+4+16]
+
+ mov ebx, [edx+VDPixmapReferenceStretchBltBilinearParameters.src]
+ mov edi, [edx+VDPixmapReferenceStretchBltBilinearParameters.dst]
+
+ mov ecx, [edx+VDPixmapReferenceStretchBltBilinearParameters.xprecopy]
+ or ecx, ecx
+ jz .noprecopy
+ mov eax, [edx+VDPixmapReferenceStretchBltBilinearParameters.xprepos]
+ mov eax, [ebx+eax]
+ lea ebp, [ecx*4]
+ sub edi, ebp
+ rep stosd
+.noprecopy:
+ mov ebp, [edx+VDPixmapReferenceStretchBltBilinearParameters.xmidsize]
+ add ebp, ebp
+ add ebp, ebp
+ add edi, ebp
+ neg ebp
+
+ mov esi, [edx+VDPixmapReferenceStretchBltBilinearParameters.u]
+ mov eax, [edx+VDPixmapReferenceStretchBltBilinearParameters.dudx]
+ mov edx, [edx+VDPixmapReferenceStretchBltBilinearParameters.uinc]
+ movd mm2, esi
+ movd mm3, eax
+ shr ebx, 2
+
+ movq mm5, mm2
+ punpcklwd mm5, mm5
+ punpckhdq mm5, mm5
+ movq mm4, mm5
+ psraw mm4, 15
+
+.xloop:
+ movd mm0, dword [ebx*4]
+ pxor mm7, mm7
+ movd mm1, dword [ebx*4+4]
+ punpcklbw mm0, mm7
+ punpcklbw mm1, mm7
+ psubw mm1, mm0
+ pand mm4, mm1
+ pmulhw mm1, mm5
+ paddw mm1, mm4
+ paddw mm0, mm1
+ packuswb mm0, mm0
+ movd dword [edi+ebp], mm0
+
+ add esi, eax
+ adc ebx, edx
+
+ paddd mm2, mm3
+ movq mm5, mm2
+ punpcklwd mm5, mm5
+ punpckhdq mm5, mm5
+ movq mm4, mm5
+ psraw mm4, 15
+ add ebp, 4
+ jnz .xloop
+
+ mov edx, [esp+4+16]
+ mov ecx, [edx+VDPixmapReferenceStretchBltBilinearParameters.xpostcopy]
+ or ecx, ecx
+ jz .nopostcopy
+ mov eax, [edx+VDPixmapReferenceStretchBltBilinearParameters.xpostpos]
+ add eax, [edx+VDPixmapReferenceStretchBltBilinearParameters.src]
+ mov eax, [eax]
+ rep stosd
+.nopostcopy:
+
+ emms
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ global _vdasm_stretchbltV_XRGB8888_to_XRGB8888_MMX
+_vdasm_stretchbltV_XRGB8888_to_XRGB8888_MMX:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov eax, [esp+20+16]
+ and eax, 0ff000000h
+ mov ebx, [esp+8+16]
+ mov ecx, [esp+12+16]
+ jz .noreverse
+ xchg ebx, ecx
+ js .noreverse
+ neg eax
+ xchg ebx, ecx
+.noreverse:
+ shr eax, 16
+ mov [esp+20+16], eax
+ mov edx, [esp+4+16]
+ mov eax, [esp+16+16]
+ add eax, eax
+ add eax, eax
+ lea ebx, [ebx+eax-4]
+ lea ecx, [ecx+eax-4]
+ lea edx, [edx+eax-4]
+ neg eax
+
+ movd mm4, dword [esp+20+16]
+ punpcklwd mm4, mm4
+ punpckldq mm4, mm4
+
+ movq mm6, [rb_mask_888]
+ movq mm7, [g_mask_888]
+
+.xstart:
+ add eax, 4
+ jbe .doodd
+.xloop:
+ movq mm0, [ebx+eax]
+ movq mm1, [ecx+eax]
+ movq mm2, mm0
+ movq mm3, mm1
+ psrlw mm2, 8
+ psrlw mm3, 8
+ pand mm0, mm6
+ pand mm1, mm6
+
+ psubw mm3, mm2
+ psubw mm1, mm0
+
+ pmulhw mm3, mm4
+ pmulhw mm1, mm4
+
+ psubw mm0, mm1
+ psubw mm2, mm3
+
+ pand mm0, mm6
+
+ psllw mm2, 8
+
+ paddw mm0, mm2
+
+ movq qword [edx+eax], mm0
+ add eax, 8
+ jnc .xloop
+
+.doodd:
+ sub eax, 4
+ jz .noodd
+.odd:
+ movd mm0, dword [ebx]
+ movd mm1, dword [ecx]
+ movq mm2, mm0
+ movq mm3, mm1
+ psrlw mm2, 8
+ psrlw mm3, 8
+ pand mm0, mm6
+ pand mm1, mm6
+
+ psubw mm3, mm2
+ psubw mm1, mm0
+
+ pmulhw mm3, mm4
+ pmulhw mm1, mm4
+
+ psubw mm0, mm1
+ psubw mm2, mm3
+
+ pand mm0, mm6
+
+ psllw mm2, 8
+
+ paddw mm0, mm2
+
+ movd dword [edx], mm0
+
+.noodd:
+ emms
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+
+ end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_stretchrgb_point.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_stretchrgb_point.asm
new file mode 100644
index 000000000..dca765b92
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_stretchrgb_point.asm
@@ -0,0 +1,96 @@
+ segment .text
+
+ struc scaleinfo
+.dst resd 1
+.src resd 1
+.xaccum resd 1
+.xfracinc resd 1
+.xintinc resd 1
+.count resd 1
+ endstruc
+
+ global _vdasm_resize_point32
+_vdasm_resize_point32:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov eax, [esp+4+16]
+
+ mov ebx, [eax+scaleinfo.xaccum]
+ mov ecx, [eax+scaleinfo.xfracinc]
+ mov edx, [eax+scaleinfo.src]
+ mov esi, [eax+scaleinfo.xintinc]
+ mov edi, [eax+scaleinfo.dst]
+ mov ebp, [eax+scaleinfo.count]
+.xloop:
+ mov eax,[edx*4]
+ add ebx,ecx
+ adc edx,esi
+ mov [edi+ebp],eax
+ add ebp,4
+ jne .xloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ global _vdasm_resize_point32_MMX
+_vdasm_resize_point32_MMX:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov eax, [esp+4+16]
+
+ push 0
+ push dword [fs:0]
+ mov dword [fs:0], esp
+
+ mov ebx, [eax+scaleinfo.xaccum]
+ mov esp, [eax+scaleinfo.xfracinc]
+ mov edx, [eax+scaleinfo.src]
+ mov esi, [eax+scaleinfo.xintinc]
+ mov edi, [eax+scaleinfo.dst]
+ mov ebp, [eax+scaleinfo.count]
+
+ mov eax, ebx
+ mov ecx, edx
+ add ebx, esp
+ adc edx, esi
+ add esp, esp
+ adc esi, esi
+
+ add ebp, 4
+ jz .odd
+.dualloop:
+ movd mm0, dword [ecx*4]
+ punpckldq mm0,[edx*4]
+ add eax,esp
+ adc ecx,esi
+ add ebx,esp
+ adc edx,esi
+ movq [edi+ebp-4],mm0
+
+ add ebp,8
+ jnc .dualloop
+ jnz .noodd
+.odd:
+ mov eax, [ecx*4]
+ mov [edi-4], eax
+.noodd:
+ mov esp, dword [fs:0]
+ pop eax
+ pop eax
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_triblt.inc b/src/thirdparty/VirtualDub/Kasumi/source/a_triblt.inc
new file mode 100644
index 000000000..fb969c56f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_triblt.inc
@@ -0,0 +1,24 @@
+ struc span
+.u resd 1
+.v resd 1
+ endstruc
+
+ struc mipspan
+.u resd 1
+.v resd 1
+.lambda resd 1
+ endstruc
+
+ struc mipmap
+.bits resd 1
+.pitch resd 1
+.uvmul resd 1
+ resd 1
+ endstruc
+
+ struc texinfo
+.mips resd 16*4
+.dst resd 1
+.src resd 1
+.w resd 1
+ endstruc
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_mmx.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_mmx.asm
new file mode 100644
index 000000000..3836488aa
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_mmx.asm
@@ -0,0 +1,425 @@
+ segment .rdata, align=16
+
+correct dq 0000800000008000h
+round dq 0000200000002000h
+round1 dq 0000020000000200h
+round2 dq 0002000000020000h
+
+ segment .text
+
+ %include "a_triblt.inc"
+
+ extern _kVDCubicInterpTableFX14_075_MMX
+
+;--------------------------------------------------------------------------
+ global _vdasm_triblt_span_bilinear_mmx
+_vdasm_triblt_span_bilinear_mmx:
+ push ebp
+ push edi
+ push esi
+ push ebx
+ mov edi,[esp+4+16]
+ mov edx,[edi+texinfo.dst]
+ mov ebp,[edi+texinfo.w]
+ shl ebp,2
+ mov ebx,[edi+texinfo.mips+mipmap.bits]
+ add edx,ebp
+ mov esi,[edi+texinfo.mips+mipmap.pitch]
+ neg ebp
+ movd mm6,[edi+texinfo.mips+mipmap.uvmul]
+ pxor mm7,mm7
+ mov edi,[edi+texinfo.src]
+.xloop:
+ movq mm4,[edi]
+ movq mm0,mm4
+ psrld mm0,16
+ movq mm5,mm4
+ packssdw mm0,mm0
+ pmaddwd mm0,mm6
+ add edi,8
+ punpcklwd mm4,mm4
+ punpckldq mm4,mm4
+ movd ecx,mm0
+ add ecx,ebx
+ psrlw mm4,1
+ movd mm0,dword [ecx]
+ movd mm1,dword [ecx+4]
+ punpcklbw mm0,mm7
+ movd mm2,dword [ecx+esi]
+ punpcklbw mm1,mm7
+ movd mm3,dword [ecx+esi+4]
+ punpcklbw mm2,mm7
+ punpcklbw mm3,mm7
+ psubw mm1,mm0
+ psubw mm3,mm2
+ paddw mm1,mm1
+ paddw mm3,mm3
+ pmulhw mm1,mm4
+ pmulhw mm3,mm4
+ punpckhwd mm5,mm5
+ punpckldq mm5,mm5
+ paddw mm0,mm1
+ psrlw mm5,1
+ paddw mm2,mm3
+ psubw mm2,mm0
+ paddw mm2,mm2
+ pmulhw mm2,mm5
+ paddw mm0,mm2
+ packuswb mm0,mm0
+ movd dword [edx+ebp],mm0
+ add ebp,4
+ jnc .xloop
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ emms
+ ret
+
+;--------------------------------------------------------------------------
+ global _vdasm_triblt_span_trilinear_mmx
+_vdasm_triblt_span_trilinear_mmx:
+ push ebp
+ push edi
+ push esi
+ push ebx
+ mov esi,[esp+4+16]
+ mov edx,[esi+texinfo.dst]
+ mov ebp,[esi+texinfo.w]
+ shl ebp,2
+ add edx,ebp
+ neg ebp
+ mov edi,[esi+texinfo.src]
+ pxor mm7,mm7
+.xloop:
+ movd mm6,[edi+mipspan.u]
+ punpckldq mm6,[edi+mipspan.v]
+ mov eax,[edi+mipspan.lambda]
+ shr eax,4
+ and eax,byte -16
+ movd mm2,eax
+ psrlq mm2,4
+ psrld mm6,mm2
+ paddd mm6,[correct]
+
+ ;fetch mipmap 1
+ mov ebx,[esi+eax+mipmap.pitch]
+ movd mm1,[esi+eax+mipmap.uvmul]
+ movq mm4,mm6
+ movq mm0,mm6
+ psrld mm0,16
+ packssdw mm0,mm0
+ pmaddwd mm0,mm1
+ movq mm5,mm4
+ punpcklwd mm4,mm4
+ punpckldq mm4,mm4
+ punpckhwd mm5,mm5
+ punpckldq mm5,mm5
+ movd ecx,mm0
+ add ecx,[esi+eax+mipmap.bits]
+ psrlw mm4,1
+ movd mm0,dword [ecx]
+ movd mm1,dword [ecx+4]
+ punpcklbw mm0,mm7
+ movd mm2,dword [ecx+ebx]
+ punpcklbw mm1,mm7
+ movd mm3,dword [ecx+ebx+4]
+ punpcklbw mm2,mm7
+ punpcklbw mm3,mm7
+ psubw mm1,mm0
+ psubw mm3,mm2
+ paddw mm1,mm1
+ paddw mm3,mm3
+ pmulhw mm1,mm4
+ pmulhw mm3,mm4
+ paddw mm0,mm1
+ psrlw mm5,1
+ paddw mm2,mm3
+ psubw mm2,mm0
+ paddw mm2,mm2
+ pmulhw mm2,mm5
+ paddw mm0,mm2
+
+ ;fetch mipmap 2
+ mov ebx,[esi+eax+16+mipmap.pitch]
+ movd mm1,[esi+eax+16+mipmap.uvmul]
+ paddd mm6,[correct]
+ psrld mm6,1
+ movq mm4,mm6
+ psrld mm6,16
+ packssdw mm6,mm6
+ pmaddwd mm6,mm1
+ movq mm5,mm4
+ punpcklwd mm4,mm4
+ punpckldq mm4,mm4
+ punpckhwd mm5,mm5
+ punpckldq mm5,mm5
+ movd ecx,mm6
+ add ecx,[esi+eax+16+mipmap.bits]
+ psrlw mm4,1
+ movd mm6,dword [ecx]
+ movd mm1,dword [ecx+4]
+ punpcklbw mm6,mm7
+ movd mm2,dword [ecx+ebx]
+ punpcklbw mm1,mm7
+ movd mm3,dword [ecx+ebx+4]
+ punpcklbw mm2,mm7
+ punpcklbw mm3,mm7
+ psubw mm1,mm6
+ psubw mm3,mm2
+ paddw mm1,mm1
+ paddw mm3,mm3
+ pmulhw mm1,mm4
+ pmulhw mm3,mm4
+ paddw mm6,mm1
+ psrlw mm5,1
+ paddw mm2,mm3
+ psubw mm2,mm6
+ paddw mm2,mm2
+ pmulhw mm2,mm5
+ paddw mm6,mm2
+
+ ;blend mips
+ movd mm1,[edi+mipspan.lambda]
+ punpcklwd mm1,mm1
+ punpckldq mm1,mm1
+ psllw mm1,8
+ psrlq mm1,1
+ psubw mm6,mm0
+ paddw mm6,mm6
+ pmulhw mm6,mm1
+ paddw mm0,mm6
+ packuswb mm0,mm0
+
+ movd dword [edx+ebp],mm0
+ add edi, mipspan_size
+ add ebp,4
+ jnc .xloop
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ emms
+ ret
+
+;--------------------------------------------------------------------------
+%macro .SETUPADDR 1
+ ;compute mipmap index and UV
+ movd mm0, [edi + mipspan.u]
+ punpckldq mm0, [edi + mipspan.v]
+ mov ebx, [edi + mipspan.lambda]
+ shr ebx, 4
+ and ebx, byte -16
+
+ add ebx, mipmap_size*%1
+ movd mm2, ebx
+ add ebx, [esp + .af_mipbase]
+ psrlq mm2, 4
+ psrad mm0, mm2
+ paddd mm0, [correct]
+ movq mm1, mm0
+ psrlq mm1, 32
+
+ ;compute horizontal filters
+ movd ecx, mm0
+ shr ecx, 4
+ and ecx, 0ff0h
+ add ecx, _kVDCubicInterpTableFX14_075_MMX
+
+ ;compute vertical filter
+ movd edx, mm1
+ and edx, 0ff00h
+ shr edx, 4
+ add edx, _kVDCubicInterpTableFX14_075_MMX
+
+ ;compute texel address
+ movd mm1, [ebx + mipmap.uvmul]
+ psrld mm0, 16
+ packssdw mm0, mm0
+ pmaddwd mm0, mm1
+ movd eax, mm0
+ add eax, [ebx + mipmap.bits]
+%endmacro
+
+%macro .HCUBIC 4
+ movd %1, dword [eax]
+ punpcklbw %1, qword [eax+4]
+ movd %3, dword [eax+8]
+ punpcklbw %3, qword [eax+12]
+ movq %2, %1
+ movq %4, %3
+ punpcklbw %1, mm7
+ pmaddwd %1, [ecx]
+ punpcklbw %3, mm7
+ pmaddwd %3, [ecx+8]
+ punpckhbw %2, mm7
+ pmaddwd %2, [ecx]
+ punpckhbw %4, mm7
+ pmaddwd %4, [ecx+8]
+ paddd %1, %3
+ paddd %2, %4
+%endmacro
+
+%macro .VCUBIC 1
+ .HCUBIC mm0, mm1, mm2, mm3
+ add eax, %1
+
+ .HCUBIC mm4, mm5, mm2, mm3
+ add eax, %1
+
+ movq mm2, [round1]
+
+ paddd mm0, mm2
+ paddd mm1, mm2
+ paddd mm4, mm2
+ paddd mm5, mm2
+
+ psrad mm0, 10
+ psrad mm1, 10
+ psrad mm4, 10
+ psrad mm5, 10
+
+ packssdw mm0, mm0
+ packssdw mm1, mm1
+ packssdw mm4, mm4
+ packssdw mm5, mm5
+
+ punpcklwd mm0, mm4
+ punpcklwd mm1, mm5
+
+ movq mm3, [edx]
+
+ pmaddwd mm0, mm3
+ pmaddwd mm1, mm3
+
+ movq [esp + .af_htemp0], mm0
+ movq [esp + .af_htemp1], mm1
+
+ .HCUBIC mm0, mm1, mm2, mm3
+ add eax, %1
+ .HCUBIC mm4, mm5, mm2, mm3
+
+ movq mm2, [round1]
+
+ paddd mm0, mm2
+ paddd mm1, mm2
+ paddd mm4, mm2
+ paddd mm5, mm2
+
+ psrad mm0, 10
+ psrad mm1, 10
+ psrad mm4, 10
+ psrad mm5, 10
+
+ packssdw mm0, mm0
+ packssdw mm1, mm1
+ packssdw mm4, mm4
+ packssdw mm5, mm5
+
+ punpcklwd mm0, mm4
+ punpcklwd mm1, mm5
+
+ movq mm2, [round2]
+ movq mm3, [edx + 8]
+
+ pmaddwd mm0, mm3
+ pmaddwd mm1, mm3
+
+ paddd mm0, [esp + .af_htemp0]
+ paddd mm1, [esp + .af_htemp1]
+
+ paddd mm0, mm2
+ paddd mm1, mm2
+
+ psrad mm0, 18
+ psrad mm1, 18
+ packssdw mm0, mm1
+%endmacro
+
+ global _vdasm_triblt_span_bicubic_mip_linear_mmx
+_vdasm_triblt_span_bicubic_mip_linear_mmx:
+
+;parameters
+%define .p_texinfo 20
+
+;aligned frame
+%define .af_htemp0 0
+%define .af_htemp1 8
+%define .af_vtemp0 16
+%define .af_mipbase 24
+%define .af_prevesp 28
+%define .afsize 32
+
+ push ebp
+ lea ebp, [esp-12]
+ push edi
+ push esi
+ push ebx
+
+ sub esp, .afsize
+ and esp, -8
+
+ mov [esp + .af_prevesp], ebp
+
+ mov ebx, [ebp + .p_texinfo]
+ mov ebp, [ebx + texinfo.dst]
+ mov esi, [ebx + texinfo.w]
+ shl esi, 2
+ add ebp,esi
+ neg esi
+
+ mov edi, [ebx + texinfo.src]
+ mov [esp + .af_mipbase], ebx
+ pxor mm7, mm7
+
+.xloop:
+
+ ;registers:
+ ; eax base texel address
+ ; ebx first mip info
+ ; ecx horizontal filter
+ ; edx vertical filter
+ ; esi horizontal count
+ ; edi mipspan
+ ; ebp destination
+
+ ;fetch mipmap 1
+ .SETUPADDR 0
+ .VCUBIC [ebx+mipmap.pitch]
+
+ movq [esp + .af_vtemp0], mm0
+
+ ;fetch mipmap 2
+ .SETUPADDR 1
+ .VCUBIC [ebx+mipmap.pitch]
+
+ ;blend mips
+ movq mm1, [esp + .af_vtemp0]
+
+ psubw mm0, mm1
+
+ movd mm3,[edi+mipspan.lambda]
+ punpcklwd mm3,mm3
+ punpckldq mm3,mm3
+ psllw mm3,8
+ psrlq mm3,1
+
+ paddw mm0,mm0
+ pmulhw mm0,mm3
+ paddw mm0,mm1
+ packuswb mm0,mm0
+
+ movd dword [ebp+esi],mm0
+ add edi, mipspan_size
+ add esi,4
+ jnc .xloop
+
+ mov esp, [esp + .af_prevesp]
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ emms
+ ret
+
+ end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_scalar.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_scalar.asm
new file mode 100644
index 000000000..c550634f3
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_scalar.asm
@@ -0,0 +1,36 @@
+ segment .text
+
+ %include "a_triblt.inc"
+
+ global _vdasm_triblt_span_point
+_vdasm_triblt_span_point:
+ push ebp
+ push edi
+ push esi
+ push ebx
+ mov eax,[esp+4+16]
+ mov ebp,[eax+texinfo.w]
+ mov ebx,[eax+texinfo.mips+mipmap.pitch]
+ shl ebp,2
+ mov edi,[eax+texinfo.src]
+ mov edx,[eax+texinfo.dst]
+ mov ecx,[eax+texinfo.mips+mipmap.bits]
+ sar ebx,2
+ add edx,ebp
+ neg ebp
+.xloop:
+ mov eax,[edi+span.v]
+ imul eax,ebx
+ add eax,[edi+span.u]
+ add edi,8
+ mov eax,[ecx+eax*4]
+ mov [edx+ebp],eax
+ add ebp,4
+ jnc .xloop
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_sse2.asm b/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_sse2.asm
new file mode 100644
index 000000000..54514b317
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/a_triblt_sse2.asm
@@ -0,0 +1,197 @@
+ segment .rdata, align=16
+
+correct dq 0000800000008000h, 0000800000008000h
+round dq 0000200000002000h, 0000200000002000h
+round1 dq 0000020000000200h, 0000020000000200h
+round2 dq 0002000000020000h, 0002000000020000h
+
+ segment .text
+
+ %include "a_triblt.inc"
+
+ extern _kVDCubicInterpTableFX14_075_MMX
+
+;--------------------------------------------------------------------------
+ global _vdasm_triblt_span_bicubic_mip_linear_sse2
+_vdasm_triblt_span_bicubic_mip_linear_sse2:
+
+;parameters
+%define .p_texinfo 20
+
+;aligned frame
+%define .af_vtemp0 0
+%define .af_mipbase 16
+%define .af_prevesp 20
+%define .afsize 24
+
+ push ebp
+ lea ebp, [esp-12]
+ push edi
+ push esi
+ push ebx
+
+ sub esp, .afsize
+ and esp, -16
+
+ mov [esp + .af_prevesp], ebp
+
+ mov ebx, [ebp + .p_texinfo]
+ mov ebp, [ebx + texinfo.dst]
+ mov esi, [ebx + texinfo.w]
+ shl esi, 2
+ add ebp,esi
+ neg esi
+
+ mov edi, [ebx + texinfo.src]
+ mov [esp + .af_mipbase], ebx
+ pxor xmm7, xmm7
+
+.xloop:
+
+ ;registers:
+ ; eax base texel address
+ ; ebx first mip info
+ ; ecx horizontal filter
+ ; edx vertical filter
+ ; esi horizontal count
+ ; edi mipspan
+ ; ebp destination
+
+%macro .SETUPADDR 1
+ ;compute mipmap index and UV
+ movd xmm0, [edi + mipspan.u]
+ movd xmm1, [edi + mipspan.v]
+ punpckldq xmm0, xmm1
+ mov ebx, [edi + mipspan.lambda]
+ shr ebx, 4
+ and ebx, byte -16
+
+ add ebx, mipmap_size*%1
+ movd xmm2, ebx
+ add ebx, [esp + .af_mipbase]
+ psrlq xmm2, 4
+ psrad xmm0, xmm2
+ paddd xmm0, [correct]
+ pshufd xmm1, xmm0, 01010101b
+
+ ;compute horizontal filters
+ movd ecx, xmm0
+ shr ecx, 4
+ and ecx, 0ff0h
+ add ecx, _kVDCubicInterpTableFX14_075_MMX
+
+ ;compute vertical filter
+ movd edx, xmm1
+ and edx, 0ff00h
+ shr edx, 4
+ add edx, _kVDCubicInterpTableFX14_075_MMX
+
+ ;compute texel address
+ movd xmm1, [ebx + mipmap.uvmul]
+ psrld xmm0, 16
+ packssdw xmm0, xmm0
+ pmaddwd xmm0, xmm1
+ movd eax, xmm0
+ add eax, [ebx + mipmap.bits]
+%endmacro
+
+%macro .HCUBIC 4
+ movd %1, dword [eax]
+ movd %3, dword [eax+4]
+ movd %2, dword [eax+8]
+ movd %4, dword [eax+12]
+ punpcklbw %1, %3
+ punpcklbw %2, %4
+ punpcklbw %1, xmm7
+ punpcklbw %2, xmm7
+ movdqa %3, [ecx]
+ pshufd %4, %3, 11101110b
+ pshufd %3, %3, 01000100b
+ pmaddwd %1, %3
+ pmaddwd %2, %4
+ paddd %1, %2
+%endmacro
+
+%macro .VCUBIC 1
+ .HCUBIC xmm0, xmm4, xmm5, xmm6
+ add eax, %1
+ .HCUBIC xmm1, xmm4, xmm5, xmm6
+ add eax, %1
+ .HCUBIC xmm2, xmm4, xmm5, xmm6
+ add eax, %1
+ .HCUBIC xmm3, xmm4, xmm5, xmm6
+
+ movq xmm4, [round1]
+
+ paddd xmm0, xmm4
+
+ paddd xmm1, xmm4
+ psrad xmm0, 10
+
+ paddd xmm2, xmm4
+ psrad xmm1, 10
+ packssdw xmm0, xmm0
+
+ paddd xmm3, xmm4
+ psrad xmm2, 10
+ packssdw xmm1, xmm1
+
+ movdqa xmm5, [edx]
+ psrad xmm3, 10
+ punpcklwd xmm0, xmm1
+
+ packssdw xmm2, xmm2
+ packssdw xmm3, xmm3
+ pshufd xmm4, xmm5, 01000100b
+
+ pmaddwd xmm0, xmm4
+ punpcklwd xmm2, xmm3
+
+ pshufd xmm5, xmm5, 11101110b
+
+ pmaddwd xmm2, xmm5
+ paddd xmm0, xmm2
+ paddd xmm0, [round2]
+ psrad xmm0, 18
+
+ packssdw xmm0, xmm0
+%endmacro
+
+ ;fetch mipmap 1
+ .SETUPADDR 0
+ .VCUBIC [ebx+mipmap.pitch]
+
+ movq [esp + .af_vtemp0], xmm0
+
+ ;fetch mipmap 2
+ .SETUPADDR 1
+ .VCUBIC [ebx+mipmap.pitch]
+
+ ;blend mips
+ movq xmm1, [esp + .af_vtemp0]
+
+ psubw xmm0, xmm1
+
+ movd xmm3, [edi+mipspan.lambda]
+ pshuflw xmm3, xmm3, 0
+ psllw xmm3, 8
+ psrlq xmm3, 1
+
+ paddw xmm0, xmm0
+ pmulhw xmm0, xmm3
+ paddw xmm0, xmm1
+ packuswb xmm0, xmm0
+
+ movd dword [ebp+esi], xmm0
+ add edi, mipspan_size
+ add esi,4
+ jnc .xloop
+
+ mov esp, [esp + .af_prevesp]
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ end
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/alphablt.cpp b/src/thirdparty/VirtualDub/Kasumi/source/alphablt.cpp
new file mode 100644
index 000000000..a292ca2bd
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/alphablt.cpp
@@ -0,0 +1,76 @@
+#include <vd2/system/math.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/Kasumi/pixmapops.h>
+
+void VDPixmapBltAlphaConst8(uint8 *dst, ptrdiff_t dstpitch, const uint8 *src, ptrdiff_t srcpitch, uint32 w, uint32 h, uint32 ialpha);
+
+bool VDPixmapBltAlphaConst(const VDPixmap& dst, const VDPixmap& src, float alpha) {
+ if (!(alpha >= 0.0f))
+ alpha = 0.0f;
+ else if (!(alpha <= 1.0f))
+ alpha = 1.0f;
+
+ uint32 ialpha = VDRoundToInt32(alpha * 256.0f);
+
+ // format check
+ if (dst.format != src.format || !src.format)
+ return false;
+
+ // degenerate case check
+ if (!dst.w || !dst.h)
+ return false;
+
+ // size check
+ if (src.w != dst.w || src.h != dst.h)
+ return false;
+
+ // check for formats that are not 8bpp
+ switch(src.format) {
+ case nsVDPixmap::kPixFormat_Pal1:
+ case nsVDPixmap::kPixFormat_Pal2:
+ case nsVDPixmap::kPixFormat_Pal4:
+ case nsVDPixmap::kPixFormat_Pal8:
+ case nsVDPixmap::kPixFormat_RGB565:
+ case nsVDPixmap::kPixFormat_XRGB1555:
+ return false;
+ }
+
+ const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(src.format);
+
+ const int qw = -(-dst.w >> formatInfo.qwbits);
+ const int qh = -(-dst.h >> formatInfo.qhbits);
+ const int auxw = -(-dst.w >> formatInfo.auxwbits);
+ const int auxh = -(-dst.h >> formatInfo.auxhbits);
+
+ switch(formatInfo.auxbufs) {
+ case 2:
+ VDPixmapBltAlphaConst8((uint8 *)dst.data3, dst.pitch3, (const uint8 *)src.data3, src.pitch3, auxw, auxh, ialpha);
+ case 1:
+ VDPixmapBltAlphaConst8((uint8 *)dst.data2, dst.pitch2, (const uint8 *)src.data2, src.pitch2, auxw, auxh, ialpha);
+ case 0:
+ VDPixmapBltAlphaConst8((uint8 *)dst.data, dst.pitch, (const uint8 *)src.data, src.pitch, formatInfo.qsize * qw, qh, ialpha);
+ }
+
+ return true;
+}
+
+void VDPixmapBltAlphaConst8(uint8 *dst, ptrdiff_t dstpitch, const uint8 *src, ptrdiff_t srcpitch, uint32 w, uint32 h, uint32 ialpha) {
+ dstpitch -= w;
+ srcpitch -= w;
+ do {
+ uint32 w2 = w;
+ do {
+ sint32 sc = *src;
+ sint32 dc = *dst;
+
+ *dst = dc + (((sc-dc)*ialpha + 128) >> 8);
+ ++src;
+ ++dst;
+ } while(--w2);
+
+ src += srcpitch;
+ dst += dstpitch;
+ } while(--h);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt.cpp
new file mode 100644
index 000000000..75e5542a9
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt.cpp
@@ -0,0 +1,273 @@
+#include <vector>
+#include <vd2/system/memory.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/Kasumi/pixmapops.h>
+
+#if _MSC_VER >= 1300
+ #define VDNOINLINE __declspec(noinline)
+#else
+ #define VDNOINLINE
+#endif
+
+using namespace nsVDPixmap;
+
+namespace {
+ typedef void (*tpPalettedBlitter)(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h, const void *pal);
+ typedef void (*tpChunkyBlitter)(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+ typedef void (*tpPlanarBlitter)(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+}
+
+bool VDPixmapBltDirect(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+
+void VDPixmapBltDirectPalettedConversion(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h, tpPalettedBlitter pBlitter) {
+ uint8 palbytes[256 * 3];
+
+ int palsize;
+
+ switch(src.format) {
+ case kPixFormat_Pal1:
+ palsize = 2;
+ break;
+ case kPixFormat_Pal2:
+ palsize = 4;
+ break;
+ case kPixFormat_Pal4:
+ palsize = 16;
+ break;
+ case kPixFormat_Pal8:
+ palsize = 256;
+ break;
+ default:
+ VDNEVERHERE;
+ }
+
+ VDASSERT(src.palette);
+
+ VDPixmap srcpal = { (void *)src.palette, NULL, palsize, 1, 0, kPixFormat_XRGB8888 };
+ VDPixmap dstpal = { palbytes, NULL, palsize, 1, 0, dst.format };
+
+ VDVERIFY(VDPixmapBltDirect(dstpal, srcpal, palsize, 1));
+
+ pBlitter(dst.data, dst.pitch, src.data, src.pitch, w, h, palbytes);
+}
+
+tpVDPixBltTable VDPixmapGetBlitterTable() {
+#if defined(_WIN32) && defined(_M_IX86)
+ static tpVDPixBltTable pBltTable;
+
+ if (CPUGetEnabledExtensions() & CPUF_SUPPORTS_MMX) {
+ return VDGetPixBltTableX86MMX();
+ } else {
+ return VDGetPixBltTableX86Scalar();
+ }
+#else
+ static tpVDPixBltTable pBltTable = VDGetPixBltTableReference();
+ return pBltTable;
+#endif
+}
+
+bool VDPixmapBltDirect(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h) {
+ if ((unsigned)src.format >= kPixFormat_Max_Standard) {
+ VDASSERT(false);
+ return false;
+ }
+
+ if ((unsigned)dst.format >= kPixFormat_Max_Standard) {
+ VDASSERT(false);
+ return false;
+ }
+
+ const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(src.format);
+
+ if (src.format == dst.format) {
+ int qw = w;
+ int qh = h;
+
+ if (srcinfo.qchunky) {
+ qw = (qw + srcinfo.qw - 1) / srcinfo.qw;
+ qh = -(-h >> srcinfo.qhbits);
+ }
+
+ const int auxw = -(-w >> srcinfo.auxwbits);
+ const int auxh = -(-h >> srcinfo.auxhbits);
+
+ switch(srcinfo.auxbufs) {
+ case 2:
+ VDMemcpyRect(dst.data3, dst.pitch3, src.data3, src.pitch3, srcinfo.auxsize * auxw, auxh);
+ case 1:
+ VDMemcpyRect(dst.data2, dst.pitch2, src.data2, src.pitch2, srcinfo.auxsize * auxw, auxh);
+ case 0:
+ VDMemcpyRect(dst.data, dst.pitch, src.data, src.pitch, srcinfo.qsize * qw, qh);
+ }
+
+ return true;
+ }
+
+ VDPixmapBlitterFn pBlitter = VDPixmapGetBlitterTable()[src.format][dst.format];
+
+ if (!pBlitter)
+ return false;
+
+ pBlitter(dst, src, w, h);
+ return true;
+}
+
+bool VDPixmapIsBltPossible(int dst_format, int src_format) {
+ if (src_format == dst_format)
+ return true;
+
+ tpVDPixBltTable tab(VDPixmapGetBlitterTable());
+
+ if (tab[src_format][dst_format])
+ return true;
+
+ const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(src_format);
+ const VDPixmapFormatInfo& dstinfo = VDPixmapGetInfo(dst_format);
+
+ if (srcinfo.auxbufs > 0 || dstinfo.auxbufs > 0)
+ return false; // fail, planar buffers involved (can't do scanlines independently)
+
+ return (tab[src_format][kPixFormat_YUV444_XVYU] && tab[kPixFormat_YUV444_XVYU][dst_format])
+ ||(tab[src_format][kPixFormat_XRGB8888] && tab[kPixFormat_XRGB8888][dst_format]);
+}
+
+bool VDNOINLINE VDPixmapBltTwoStage(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h) {
+ const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(src.format);
+ const VDPixmapFormatInfo& dstinfo = VDPixmapGetInfo(dst.format);
+
+ if (srcinfo.auxbufs > 0 || dstinfo.auxbufs > 0)
+ return false; // fail, planar buffers involved
+
+ if (srcinfo.qh > 1)
+ return false; // fail, vertically packed formats involved
+
+ if (srcinfo.palsize)
+ return false; // fail, paletted formats involved
+
+ // Allocate a 4xW buffer and try round-tripping through either
+ // RGB32 or XYVU.
+ vdblock<uint32> tempBuf;
+
+ tpVDPixBltTable tab(VDPixmapGetBlitterTable());
+
+ VDPixmap linesrc(src);
+ VDPixmap linedst(dst);
+ VDPixmap linetmp = {};
+
+ if (w < 1024) {
+ linetmp.data = _alloca(sizeof(uint32) * w);
+ } else {
+ tempBuf.resize(w + 1);
+ linetmp.data = tempBuf.data();
+ }
+ linetmp.pitch = 0;
+ linetmp.format = kPixFormat_YUV444_XVYU;
+ linetmp.w = w;
+ linetmp.h = 1;
+
+ VDPixmapBlitterFn pb1 = tab[src.format][kPixFormat_YUV444_XVYU];
+ VDPixmapBlitterFn pb2 = tab[kPixFormat_YUV444_XVYU][dst.format];
+ if (!pb1 || !pb2) {
+ pb1 = tab[src.format][kPixFormat_XRGB8888];
+ pb2 = tab[kPixFormat_XRGB8888][dst.format];
+ if (!pb1 || !pb2)
+ return false;
+
+ linetmp.format = kPixFormat_XRGB8888;
+ }
+
+ do {
+ pb1(linetmp, linesrc, w, 1);
+ pb2(linedst, linetmp, w, 1);
+ vdptrstep(linesrc.data, linesrc.pitch);
+ vdptrstep(linedst.data, linedst.pitch);
+ } while(--h);
+ return true;
+}
+
+bool VDPixmapBltFast(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h) {
+ if (VDPixmapBltDirect(dst, src, w, h))
+ return true;
+
+ // Oro... let's see if we can do a two-stage conversion.
+ return VDPixmapBltTwoStage(dst, src, w, h);
+}
+
+bool VDPixmapBlt(const VDPixmap& dst, const VDPixmap& src) {
+ vdpixsize w = std::min<vdpixsize>(src.w, dst.w);
+ vdpixsize h = std::min<vdpixsize>(src.h, dst.h);
+
+ if (!w || !h)
+ return true;
+
+ return VDPixmapBltFast(dst, src, w, h);
+}
+
+bool VDPixmapBlt(const VDPixmap& dst, vdpixpos x1, vdpixpos y1, const VDPixmap& src, vdpixpos x2, vdpixpos y2, vdpixsize w, vdpixsize h) {
+ if (x1 < 0) {
+ x2 -= x1;
+ w -= x1;
+ x1 = 0;
+ }
+
+ if (y1 < 0) {
+ y2 -= y1;
+ h -= y1;
+ y1 = 0;
+ }
+
+ if (x2 < 0) {
+ x1 -= x2;
+ w -= x2;
+ x2 = 0;
+ }
+
+ if (y2 < 0) {
+ y1 -= y2;
+ h -= y2;
+ y2 = 0;
+ }
+
+ if (w > dst.w - x1)
+ w = dst.w - x1;
+
+ if (h > dst.h - y1)
+ h = dst.h - y1;
+
+ if (w > src.w - x2)
+ w = src.w - x2;
+
+ if (h > src.h - y2)
+ h = src.h - y2;
+
+ if (w>=0 && h >= 0) {
+ VDPixmap dst2(VDPixmapOffset(dst, x1, y1));
+ VDPixmap src2(VDPixmapOffset(src, x2, y2));
+
+ return VDPixmapBltFast(dst2, src2, w, h);
+ }
+
+ return true;
+}
+
+extern bool VDPixmapStretchBltNearest_reference(const VDPixmap& dst, sint32 x1, sint32 y1, sint32 x2, sint32 y2, const VDPixmap& src, sint32 u1, sint32 v1, sint32 u2, sint32 v2);
+extern bool VDPixmapStretchBltBilinear_reference(const VDPixmap& dst, sint32 x1, sint32 y1, sint32 x2, sint32 y2, const VDPixmap& src, sint32 u1, sint32 v1, sint32 u2, sint32 v2);
+
+bool VDPixmapStretchBltNearest(const VDPixmap& dst, const VDPixmap& src) {
+ return VDPixmapStretchBltNearest(dst, 0, 0, dst.w<<16, dst.h<<16, src, 0, 0, src.w<<16, src.h<<16);
+}
+
+bool VDPixmapStretchBltNearest(const VDPixmap& dst, sint32 x1, sint32 y1, sint32 x2, sint32 y2, const VDPixmap& src, sint32 u1, sint32 v1, sint32 u2, sint32 v2) {
+ return VDPixmapStretchBltNearest_reference(dst, x1, y1, x2, y2, src, u1, v1, u2, v2);
+}
+
+bool VDPixmapStretchBltBilinear(const VDPixmap& dst, const VDPixmap& src) {
+ return VDPixmapStretchBltBilinear(dst, 0, 0, dst.w<<16, dst.h<<16, src, 0, 0, src.w<<16, src.h<<16);
+}
+
+bool VDPixmapStretchBltBilinear(const VDPixmap& dst, sint32 x1, sint32 y1, sint32 x2, sint32 y2, const VDPixmap& src, sint32 u1, sint32 v1, sint32 u2, sint32 v2) {
+ return VDPixmapStretchBltBilinear_reference(dst, x1, y1, x2, y2, src, u1, v1, u2, v2);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_reference.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference.cpp
new file mode 100644
index 000000000..c4dccce9f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference.cpp
@@ -0,0 +1,259 @@
+#include <vd2/system/vdtypes.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include "blt_setup.h"
+
+#define DECLARE_PALETTED(x, y) extern void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h, const void *pal0)
+#define DECLARE_RGB(x, y) extern void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h)
+#define DECLARE_YUV(x, y) extern void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h)
+#define DECLARE_YUV_REV(x, y) void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h)
+#define DECLARE_YUV_PLANAR(x, y) extern void VDPixmapBlt_##x##_to_##y##_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h)
+
+DECLARE_RGB(RGB565, XRGB1555);
+DECLARE_RGB(RGB888, XRGB1555);
+DECLARE_RGB(XRGB8888, XRGB1555);
+DECLARE_RGB(XRGB1555, RGB565);
+DECLARE_RGB(RGB888, RGB565);
+DECLARE_RGB(XRGB8888, RGB565);
+DECLARE_RGB(XRGB1555, RGB888);
+DECLARE_RGB(RGB565, RGB888);
+DECLARE_RGB(XRGB8888, RGB888);
+DECLARE_RGB(XRGB1555, XRGB8888);
+DECLARE_RGB(RGB565, XRGB8888);
+DECLARE_RGB(RGB888, XRGB8888);
+
+DECLARE_PALETTED(Pal1, Any8);
+DECLARE_PALETTED(Pal1, Any16);
+DECLARE_PALETTED(Pal1, Any24);
+DECLARE_PALETTED(Pal1, Any32);
+DECLARE_PALETTED(Pal2, Any8);
+DECLARE_PALETTED(Pal2, Any16);
+DECLARE_PALETTED(Pal2, Any24);
+DECLARE_PALETTED(Pal2, Any32);
+DECLARE_PALETTED(Pal4, Any8);
+DECLARE_PALETTED(Pal4, Any16);
+DECLARE_PALETTED(Pal4, Any24);
+DECLARE_PALETTED(Pal4, Any32);
+DECLARE_PALETTED(Pal8, Any8);
+DECLARE_PALETTED(Pal8, Any16);
+DECLARE_PALETTED(Pal8, Any24);
+DECLARE_PALETTED(Pal8, Any32);
+
+DECLARE_YUV(XVYU, UYVY);
+DECLARE_YUV(XVYU, YUYV);
+DECLARE_YUV(Y8, UYVY);
+DECLARE_YUV(Y8, YUYV);
+DECLARE_YUV(UYVY, Y8);
+DECLARE_YUV(YUYV, Y8);
+DECLARE_YUV(UYVY, YUYV);
+DECLARE_YUV_PLANAR(YUV411, YV12);
+
+DECLARE_YUV(UYVY, XRGB1555);
+DECLARE_YUV(UYVY, RGB565);
+DECLARE_YUV(UYVY, RGB888);
+DECLARE_YUV(UYVY, XRGB8888);
+DECLARE_YUV(YUYV, XRGB1555);
+DECLARE_YUV(YUYV, RGB565);
+DECLARE_YUV(YUYV, RGB888);
+DECLARE_YUV(YUYV, XRGB8888);
+DECLARE_YUV(Y8, XRGB1555);
+DECLARE_YUV(Y8, RGB565);
+DECLARE_YUV(Y8, RGB888);
+DECLARE_YUV(Y8, XRGB8888);
+
+DECLARE_YUV_REV(XRGB1555, Y8);
+DECLARE_YUV_REV(RGB565, Y8);
+DECLARE_YUV_REV(RGB888, Y8);
+DECLARE_YUV_REV(XRGB8888, Y8);
+
+DECLARE_YUV_REV(XRGB1555, XVYU);
+DECLARE_YUV_REV(RGB565, XVYU);
+DECLARE_YUV_REV(RGB888, XVYU);
+DECLARE_YUV_REV(XRGB8888, XVYU);
+
+DECLARE_YUV_PLANAR(YV12, XRGB1555);
+DECLARE_YUV_PLANAR(YV12, RGB565);
+DECLARE_YUV_PLANAR(YV12, RGB888);
+DECLARE_YUV_PLANAR(YV12, XRGB8888);
+
+DECLARE_YUV_PLANAR(YUV411, XRGB1555);
+DECLARE_YUV_PLANAR(YUV411, RGB565);
+DECLARE_YUV_PLANAR(YUV411, RGB888);
+DECLARE_YUV_PLANAR(YUV411, XRGB8888);
+
+extern void VDPixmapBlt_YUVPlanar_decode_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+extern void VDPixmapBlt_YUVPlanar_encode_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+extern void VDPixmapBlt_YUVPlanar_convert_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+extern void VDPixmapBlt_UberblitAdapter(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+
+using namespace nsVDPixmap;
+
+void VDPixmapInitBlittersReference(VDPixmapBlitterTable& table) {
+ // use uberblit as the baseline
+ VDPixmapFormatSubset uberblitSrcFormats;
+ VDPixmapFormatSubset uberblitDstFormats;
+
+ uberblitSrcFormats =
+ kPixFormat_Pal1,
+ kPixFormat_Pal2,
+ kPixFormat_Pal4,
+ kPixFormat_Pal8,
+ kPixFormat_XRGB1555,
+ kPixFormat_RGB565,
+ kPixFormat_RGB888,
+ kPixFormat_XRGB8888,
+ kPixFormat_Y8,
+ kPixFormat_YUV422_UYVY,
+ kPixFormat_YUV422_YUYV,
+ kPixFormat_YUV444_XVYU,
+ kPixFormat_YUV444_Planar,
+ kPixFormat_YUV422_Planar,
+ kPixFormat_YUV422_Planar_16F,
+ kPixFormat_YUV420_Planar,
+ kPixFormat_YUV411_Planar,
+ kPixFormat_YUV410_Planar,
+ kPixFormat_YUV422_Planar_Centered,
+ kPixFormat_YUV420_Planar_Centered,
+ kPixFormat_YUV422_V210,
+ kPixFormat_YUV422_UYVY_709,
+ kPixFormat_YUV420_NV12;
+
+ uberblitDstFormats =
+ kPixFormat_XRGB1555,
+ kPixFormat_RGB565,
+ kPixFormat_RGB888,
+ kPixFormat_XRGB8888,
+ kPixFormat_Y8,
+ kPixFormat_YUV422_UYVY,
+ kPixFormat_YUV422_YUYV,
+ kPixFormat_YUV444_XVYU,
+ kPixFormat_YUV444_Planar,
+ kPixFormat_YUV422_Planar,
+ kPixFormat_YUV422_Planar_16F,
+ kPixFormat_YUV420_Planar,
+ kPixFormat_YUV411_Planar,
+ kPixFormat_YUV410_Planar,
+ kPixFormat_YUV422_Planar_Centered,
+ kPixFormat_YUV420_Planar_Centered,
+ kPixFormat_YUV422_V210,
+ kPixFormat_YUV422_UYVY_709,
+ kPixFormat_YUV420_NV12;
+
+ table.AddBlitter(uberblitSrcFormats, uberblitDstFormats, VDPixmapBlt_UberblitAdapter);
+
+ // standard formats
+
+ table.AddBlitter(kPixFormat_Pal1, kPixFormat_Y8, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal1_to_Any8_reference>);
+ table.AddBlitter(kPixFormat_Pal1, kPixFormat_XRGB1555, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal1_to_Any16_reference>);
+ table.AddBlitter(kPixFormat_Pal1, kPixFormat_RGB565, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal1_to_Any16_reference>);
+ table.AddBlitter(kPixFormat_Pal1, kPixFormat_RGB888, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal1_to_Any24_reference>);
+ table.AddBlitter(kPixFormat_Pal1, kPixFormat_XRGB8888, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal1_to_Any32_reference>);
+ table.AddBlitter(kPixFormat_Pal2, kPixFormat_Y8, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal2_to_Any8_reference>);
+ table.AddBlitter(kPixFormat_Pal2, kPixFormat_XRGB1555, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal2_to_Any16_reference>);
+ table.AddBlitter(kPixFormat_Pal2, kPixFormat_RGB565, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal2_to_Any16_reference>);
+ table.AddBlitter(kPixFormat_Pal2, kPixFormat_RGB888, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal2_to_Any24_reference>);
+ table.AddBlitter(kPixFormat_Pal2, kPixFormat_XRGB8888, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal2_to_Any32_reference>);
+ table.AddBlitter(kPixFormat_Pal4, kPixFormat_Y8, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal4_to_Any8_reference>);
+ table.AddBlitter(kPixFormat_Pal4, kPixFormat_XRGB1555, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal4_to_Any16_reference>);
+ table.AddBlitter(kPixFormat_Pal4, kPixFormat_RGB565, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal4_to_Any16_reference>);
+ table.AddBlitter(kPixFormat_Pal4, kPixFormat_RGB888, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal4_to_Any24_reference>);
+ table.AddBlitter(kPixFormat_Pal4, kPixFormat_XRGB8888, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal4_to_Any32_reference>);
+ table.AddBlitter(kPixFormat_Pal8, kPixFormat_Y8, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal8_to_Any8_reference>);
+ table.AddBlitter(kPixFormat_Pal8, kPixFormat_XRGB1555, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal8_to_Any16_reference>);
+ table.AddBlitter(kPixFormat_Pal8, kPixFormat_RGB565, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal8_to_Any16_reference>);
+ table.AddBlitter(kPixFormat_Pal8, kPixFormat_RGB888, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal8_to_Any24_reference>);
+ table.AddBlitter(kPixFormat_Pal8, kPixFormat_XRGB8888, VDPixmapBlitterPalettedAdapter<VDPixmapBlt_Pal8_to_Any32_reference>);
+
+ table.AddBlitter(kPixFormat_XRGB1555, kPixFormat_RGB565, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB1555_to_RGB565_reference>);
+ table.AddBlitter(kPixFormat_XRGB1555, kPixFormat_RGB888, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB1555_to_RGB888_reference>);
+ table.AddBlitter(kPixFormat_XRGB1555, kPixFormat_XRGB8888, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB1555_to_XRGB8888_reference>);
+ table.AddBlitter(kPixFormat_RGB565, kPixFormat_XRGB1555, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB565_to_XRGB1555_reference>);
+ table.AddBlitter(kPixFormat_RGB565, kPixFormat_RGB888, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB565_to_RGB888_reference>);
+ table.AddBlitter(kPixFormat_RGB565, kPixFormat_XRGB8888, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB565_to_XRGB8888_reference>);
+ table.AddBlitter(kPixFormat_RGB888, kPixFormat_XRGB1555, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB888_to_XRGB1555_reference>);
+ table.AddBlitter(kPixFormat_RGB888, kPixFormat_RGB565, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB888_to_RGB565_reference>);
+ table.AddBlitter(kPixFormat_RGB888, kPixFormat_XRGB8888, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB888_to_XRGB8888_reference>);
+ table.AddBlitter(kPixFormat_XRGB8888, kPixFormat_XRGB1555, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB8888_to_XRGB1555_reference>);
+ table.AddBlitter(kPixFormat_XRGB8888, kPixFormat_RGB565, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB8888_to_RGB565_reference>);
+ table.AddBlitter(kPixFormat_XRGB8888, kPixFormat_RGB888, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB8888_to_RGB888_reference>);
+
+ table.AddBlitter(kPixFormat_YUV444_XVYU, kPixFormat_YUV422_UYVY, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XVYU_to_UYVY_reference>);
+ table.AddBlitter(kPixFormat_YUV444_XVYU, kPixFormat_YUV422_YUYV, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XVYU_to_YUYV_reference>);
+ table.AddBlitter(kPixFormat_Y8, kPixFormat_YUV422_UYVY, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_Y8_to_UYVY_reference>);
+ table.AddBlitter(kPixFormat_Y8, kPixFormat_YUV422_YUYV, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_Y8_to_YUYV_reference>);
+ table.AddBlitter(kPixFormat_YUV422_UYVY, kPixFormat_Y8, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_UYVY_to_Y8_reference>);
+ table.AddBlitter(kPixFormat_YUV422_YUYV, kPixFormat_Y8, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_YUYV_to_Y8_reference>);
+
+ table.AddBlitter(kPixFormat_YUV422_UYVY, kPixFormat_XRGB1555, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_UYVY_to_XRGB1555_reference>);
+ table.AddBlitter(kPixFormat_YUV422_UYVY, kPixFormat_RGB565, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_UYVY_to_RGB565_reference>);
+ table.AddBlitter(kPixFormat_YUV422_UYVY, kPixFormat_RGB888, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_UYVY_to_RGB888_reference>);
+ table.AddBlitter(kPixFormat_YUV422_UYVY, kPixFormat_XRGB8888, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_UYVY_to_XRGB8888_reference>);
+ table.AddBlitter(kPixFormat_YUV422_YUYV, kPixFormat_XRGB1555, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_YUYV_to_XRGB1555_reference>);
+ table.AddBlitter(kPixFormat_YUV422_YUYV, kPixFormat_RGB565, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_YUYV_to_RGB565_reference>);
+ table.AddBlitter(kPixFormat_YUV422_YUYV, kPixFormat_RGB888, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_YUYV_to_RGB888_reference>);
+ table.AddBlitter(kPixFormat_YUV422_YUYV, kPixFormat_XRGB8888, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_YUYV_to_XRGB8888_reference>);
+ table.AddBlitter(kPixFormat_Y8, kPixFormat_XRGB1555, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_Y8_to_XRGB1555_reference>);
+ table.AddBlitter(kPixFormat_Y8, kPixFormat_RGB565, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_Y8_to_RGB565_reference>);
+ table.AddBlitter(kPixFormat_Y8, kPixFormat_RGB888, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_Y8_to_RGB888_reference>);
+ table.AddBlitter(kPixFormat_Y8, kPixFormat_XRGB8888, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_Y8_to_XRGB8888_reference>);
+
+ table.AddBlitter(kPixFormat_XRGB1555, kPixFormat_YUV444_XVYU, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB1555_to_XVYU_reference>);
+ table.AddBlitter(kPixFormat_RGB565, kPixFormat_YUV444_XVYU, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB565_to_XVYU_reference>);
+ table.AddBlitter(kPixFormat_RGB888, kPixFormat_YUV444_XVYU, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB888_to_XVYU_reference>);
+ table.AddBlitter(kPixFormat_XRGB8888, kPixFormat_YUV444_XVYU, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB8888_to_XVYU_reference>);
+
+ table.AddBlitter(kPixFormat_XRGB1555, kPixFormat_Y8, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB1555_to_Y8_reference>);
+ table.AddBlitter(kPixFormat_RGB565, kPixFormat_Y8, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB565_to_Y8_reference>);
+ table.AddBlitter(kPixFormat_RGB888, kPixFormat_Y8, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_RGB888_to_Y8_reference>);
+ table.AddBlitter(kPixFormat_XRGB8888, kPixFormat_Y8, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_XRGB8888_to_Y8_reference>);
+
+ table.AddBlitter(kPixFormat_YUV411_Planar, kPixFormat_YUV420_Planar, VDPixmapBlt_YUV411_to_YV12_reference);
+
+ table.AddBlitter(kPixFormat_YUV422_UYVY, kPixFormat_YUV422_YUYV, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_UYVY_to_YUYV_reference>);
+ table.AddBlitter(kPixFormat_YUV422_YUYV, kPixFormat_YUV422_UYVY, VDPixmapBlitterChunkyAdapter<VDPixmapBlt_UYVY_to_YUYV_reference>); // not an error -- same routine
+
+ //////////////////////////////////////////////////////////
+
+ VDPixmapFormatSubset srcFormats;
+ VDPixmapFormatSubset dstFormats;
+
+ srcFormats = kPixFormat_YUV444_Planar,
+ kPixFormat_YUV422_Planar,
+ kPixFormat_YUV420_Planar,
+ kPixFormat_YUV411_Planar,
+ kPixFormat_YUV410_Planar,
+ kPixFormat_YUV422_Planar_Centered,
+ kPixFormat_YUV420_Planar_Centered;
+
+ dstFormats = kPixFormat_XRGB1555, kPixFormat_RGB565, kPixFormat_RGB888, kPixFormat_XRGB8888, kPixFormat_YUV422_UYVY, kPixFormat_YUV422_YUYV;
+
+ table.AddBlitter(srcFormats, dstFormats, VDPixmapBlt_YUVPlanar_decode_reference);
+
+ //////////////////////////////////////////////////////////
+
+ dstFormats = kPixFormat_YUV444_Planar, kPixFormat_YUV422_Planar, kPixFormat_YUV420_Planar, kPixFormat_YUV411_Planar, kPixFormat_YUV410_Planar, kPixFormat_YUV422_Planar_Centered, kPixFormat_YUV420_Planar_Centered;
+ srcFormats = kPixFormat_XRGB1555, kPixFormat_RGB565, kPixFormat_RGB888, kPixFormat_XRGB8888, kPixFormat_YUV422_UYVY, kPixFormat_YUV422_YUYV;
+
+ table.AddBlitter(srcFormats, dstFormats, VDPixmapBlt_YUVPlanar_encode_reference);
+
+ //////////////////////////////////////////////////////////
+
+ srcFormats = kPixFormat_YUV444_Planar, kPixFormat_YUV422_Planar, kPixFormat_YUV420_Planar, kPixFormat_YUV411_Planar, kPixFormat_YUV410_Planar, kPixFormat_Y8, kPixFormat_YUV422_Planar_Centered, kPixFormat_YUV420_Planar_Centered;
+ dstFormats = kPixFormat_YUV444_Planar, kPixFormat_YUV422_Planar, kPixFormat_YUV420_Planar, kPixFormat_YUV411_Planar, kPixFormat_YUV410_Planar, kPixFormat_Y8, kPixFormat_YUV422_Planar_Centered, kPixFormat_YUV420_Planar_Centered;
+
+ table.AddBlitter(srcFormats, dstFormats, VDPixmapBlt_YUVPlanar_convert_reference);
+}
+
+tpVDPixBltTable VDGetPixBltTableReferenceInternal() {
+ static VDPixmapBlitterTable sReferenceTable;
+
+ VDPixmapInitBlittersReference(sReferenceTable);
+
+ return sReferenceTable.mTable;
+}
+
+tpVDPixBltTable VDGetPixBltTableReference() {
+ static tpVDPixBltTable spTable = VDGetPixBltTableReferenceInternal();
+
+ return spTable;
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_pal.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_pal.cpp
new file mode 100644
index 000000000..4a103de3b
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_pal.cpp
@@ -0,0 +1,545 @@
+#include <vd2/system/vdtypes.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+
+#define DECLARE_PALETTED(x, y) void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h, const void *pal0)
+
+///////////////////////////////////////////////////////////////////////////
+//
+// RGB blitters: Pal1 ->
+//
+///////////////////////////////////////////////////////////////////////////
+
+DECLARE_PALETTED(Pal1, Any8) {
+ const uint8 *src = (const uint8 *)src0;
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *pal = (const uint8 *)pal0;
+
+ src += (w-1) >> 3;
+ dst += (w-1) & ~7;
+
+ srcpitch += (w+7) >> 3;
+ dstpitch += (w+7) & ~7;
+
+ do {
+ int wt = w;
+
+ uint8 v = src[0] >> ((-wt) & 7);
+
+ switch(wt & 7) {
+ do {
+ v = src[0];
+
+ case 0: dst[7] = pal[v&1]; v >>= 1;
+ case 7: dst[6] = pal[v&1]; v >>= 1;
+ case 6: dst[5] = pal[v&1]; v >>= 1;
+ case 5: dst[4] = pal[v&1]; v >>= 1;
+ case 4: dst[3] = pal[v&1]; v >>= 1;
+ case 3: dst[2] = pal[v&1]; v >>= 1;
+ case 2: dst[1] = pal[v&1]; v >>= 1;
+ case 1: dst[0] = pal[v&1]; v >>= 1;
+
+ dst -= 8;
+ --src;
+ } while((wt -= 8) > 0);
+ }
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_PALETTED(Pal1, Any16) {
+ const uint8 *src = (const uint8 *)src0;
+ uint16 *dst = (uint16 *)dst0;
+ const uint16 *pal = (const uint16 *)pal0;
+
+ src += (w-1) >> 3;
+ dst += (w-1) & ~7;
+
+ srcpitch += (w+7) >> 3;
+ dstpitch += ((w+7) & ~7) * 2;
+
+ do {
+ int wt = w;
+
+ uint8 v = src[0] >> ((-wt) & 7);
+
+ switch(wt & 7) {
+ do {
+ v = src[0];
+
+ case 0: dst[7] = pal[v&1]; v >>= 1;
+ case 7: dst[6] = pal[v&1]; v >>= 1;
+ case 6: dst[5] = pal[v&1]; v >>= 1;
+ case 5: dst[4] = pal[v&1]; v >>= 1;
+ case 4: dst[3] = pal[v&1]; v >>= 1;
+ case 3: dst[2] = pal[v&1]; v >>= 1;
+ case 2: dst[1] = pal[v&1]; v >>= 1;
+ case 1: dst[0] = pal[v&1]; v >>= 1;
+
+ dst -= 8;
+ --src;
+ } while((wt -= 8) > 0);
+ }
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_PALETTED(Pal1, Any24) {
+ const uint8 *src = (const uint8 *)src0;
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *pal = (const uint8 *)pal0;
+
+ src += (w-1) >> 3;
+ dst += ((w-1) & ~7) * 3;
+
+ srcpitch += (w+7) >> 3;
+ dstpitch += ((w+7) & ~7) * 3;
+
+ do {
+ int wt = w;
+
+ uint8 v = src[0] >> ((-wt) & 7);
+ const uint8 *pe;
+
+ switch(wt & 7) {
+ do {
+ v = src[0];
+
+ case 0: pe = &pal[3*(v&1)]; dst[7*3+0] = pe[0]; dst[7*3+1] = pe[1]; dst[7*3+2] = pe[2]; v >>= 1;
+ case 7: pe = &pal[3*(v&1)]; dst[6*3+0] = pe[0]; dst[6*3+1] = pe[1]; dst[6*3+2] = pe[2]; v >>= 1;
+ case 6: pe = &pal[3*(v&1)]; dst[5*3+0] = pe[0]; dst[5*3+1] = pe[1]; dst[5*3+2] = pe[2]; v >>= 1;
+ case 5: pe = &pal[3*(v&1)]; dst[4*3+0] = pe[0]; dst[4*3+1] = pe[1]; dst[4*3+2] = pe[2]; v >>= 1;
+ case 4: pe = &pal[3*(v&1)]; dst[3*3+0] = pe[0]; dst[3*3+1] = pe[1]; dst[3*3+2] = pe[2]; v >>= 1;
+ case 3: pe = &pal[3*(v&1)]; dst[2*3+0] = pe[0]; dst[2*3+1] = pe[1]; dst[2*3+2] = pe[2]; v >>= 1;
+ case 2: pe = &pal[3*(v&1)]; dst[1*3+0] = pe[0]; dst[1*3+1] = pe[1]; dst[1*3+2] = pe[2]; v >>= 1;
+ case 1: pe = &pal[3*(v&1)]; dst[0*3+0] = pe[0]; dst[0*3+1] = pe[1]; dst[0*3+2] = pe[2]; v >>= 1;
+
+ dst -= 24;
+ --src;
+ } while((wt -= 8) > 0);
+ }
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_PALETTED(Pal1, Any32) {
+ const uint8 *src = (const uint8 *)src0;
+ uint32 *dst = (uint32 *)dst0;
+ const uint32 *pal = (const uint32 *)pal0;
+
+ src += (w-1) >> 3;
+ dst += (w-1) & ~7;
+
+ srcpitch += (w+7) >> 3;
+ dstpitch += ((w+7) & ~7) * 4;
+
+ do {
+ int wt = w;
+
+ uint8 v = src[0] >> ((-wt) & 7);
+
+ switch(wt & 7) {
+ do {
+ v = src[0];
+
+ case 0: dst[7] = pal[v&1]; v >>= 1;
+ case 7: dst[6] = pal[v&1]; v >>= 1;
+ case 6: dst[5] = pal[v&1]; v >>= 1;
+ case 5: dst[4] = pal[v&1]; v >>= 1;
+ case 4: dst[3] = pal[v&1]; v >>= 1;
+ case 3: dst[2] = pal[v&1]; v >>= 1;
+ case 2: dst[1] = pal[v&1]; v >>= 1;
+ case 1: dst[0] = pal[v&1]; v >>= 1;
+
+ dst -= 8;
+ --src;
+ } while((wt -= 8) > 0);
+ }
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// RGB blitters: Pal2 ->
+//
+///////////////////////////////////////////////////////////////////////////
+
+DECLARE_PALETTED(Pal2, Any8) {
+ const uint8 *src = (const uint8 *)src0;
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *pal = (const uint8 *)pal0;
+
+ src += (w-1) >> 2;
+ dst += (w-1) & ~3;
+
+ srcpitch += (w+3) >> 2;
+ dstpitch += (w+3) & ~3;
+
+ do {
+ int wt = w;
+
+ uint8 v = src[0] >> (((-wt) & 3)*2);
+
+ switch(wt & 3) {
+ do {
+ v = src[0];
+
+ case 0: dst[3] = pal[v&3]; v >>= 2;
+ case 3: dst[2] = pal[v&3]; v >>= 2;
+ case 2: dst[1] = pal[v&3]; v >>= 2;
+ case 1: dst[0] = pal[v&3]; v >>= 2;
+
+ dst -= 4;
+ --src;
+ } while((wt -= 4) > 0);
+ }
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_PALETTED(Pal2, Any16) {
+ const uint8 *src = (const uint8 *)src0;
+ uint16 *dst = (uint16 *)dst0;
+ const uint16 *pal = (const uint16 *)pal0;
+
+ src += (w-1) >> 2;
+ dst += (w-1) & ~3;
+
+ srcpitch += (w+3) >> 2;
+ dstpitch += ((w+3) & ~3) * 2;
+
+ do {
+ int wt = w;
+
+ uint8 v = src[0] >> (((-wt) & 3)*2);
+
+ switch(wt & 3) {
+ do {
+ v = src[0];
+
+ case 0: dst[3] = pal[v&3]; v >>= 2;
+ case 3: dst[2] = pal[v&3]; v >>= 2;
+ case 2: dst[1] = pal[v&3]; v >>= 2;
+ case 1: dst[0] = pal[v&3]; v >>= 2;
+
+ dst -= 4;
+ --src;
+ } while((wt -= 4) > 0);
+ }
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_PALETTED(Pal2, Any24) {
+ const uint8 *src = (const uint8 *)src0;
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *pal = (const uint8 *)pal0;
+
+ src += (w-1) >> 2;
+ dst += ((w-1) & ~3) * 3;
+
+ srcpitch += (w+3) >> 2;
+ dstpitch += ((w+3) & ~3) * 3;
+
+ do {
+ int wt = w;
+
+ uint8 v = src[0] >> (((-wt) & 3)*2);
+ const uint8 *pe;
+
+ switch(wt & 3) {
+ do {
+ v = src[0];
+
+ case 0: pe = &pal[3*(v&3)]; dst[3*3+0] = pe[0]; dst[3*3+1] = pe[1]; dst[3*3+2] = pe[2]; v >>= 2;
+ case 3: pe = &pal[3*(v&3)]; dst[2*3+0] = pe[0]; dst[2*3+1] = pe[1]; dst[2*3+2] = pe[2]; v >>= 2;
+ case 2: pe = &pal[3*(v&3)]; dst[1*3+0] = pe[0]; dst[1*3+1] = pe[1]; dst[1*3+2] = pe[2]; v >>= 2;
+ case 1: pe = &pal[3*(v&3)]; dst[0*3+0] = pe[0]; dst[0*3+1] = pe[1]; dst[0*3+2] = pe[2]; v >>= 2;
+
+ dst -= 12;
+ --src;
+ } while((wt -= 4) > 0);
+ }
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_PALETTED(Pal2, Any32) {
+ const uint8 *src = (const uint8 *)src0;
+ uint32 *dst = (uint32 *)dst0;
+ const uint32 *pal = (const uint32 *)pal0;
+
+ src += (w-1) >> 2;
+ dst += (w-1) & ~3;
+
+ srcpitch += (w+3) >> 2;
+ dstpitch += ((w+3) & ~3) * 4;
+
+ do {
+ int wt = w;
+
+ uint8 v = src[0] >> (((-wt) & 3)*2);
+
+ switch(wt & 3) {
+ do {
+ v = src[0];
+
+ case 0: dst[3] = pal[v&3]; v >>= 2;
+ case 3: dst[2] = pal[v&3]; v >>= 2;
+ case 2: dst[1] = pal[v&3]; v >>= 2;
+ case 1: dst[0] = pal[v&3]; v >>= 2;
+
+ dst -= 4;
+ --src;
+ } while((wt -= 4) > 0);
+ }
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// RGB blitters: Pal4 ->
+//
+///////////////////////////////////////////////////////////////////////////
+
+DECLARE_PALETTED(Pal4, Any8) {
+ const uint8 *src = (const uint8 *)src0;
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *pal = (const uint8 *)pal0;
+
+ src += (w-1) >> 1;
+ dst += ((w-1) & ~1);
+
+ srcpitch += (w+1) >> 1;
+ dstpitch += (w+1) & ~1;
+
+ do {
+ int wt = w;
+
+ uint8 v = src[0] >> (((-wt) & 1)*4);
+
+ switch(wt & 1) {
+ do {
+ v = src[0];
+
+ case 0: dst[1] = pal[v&15]; v >>= 4;
+ case 1: dst[0] = pal[v&15]; v >>= 4;
+
+ dst -= 2;
+ --src;
+ } while((wt -= 2) > 0);
+ }
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_PALETTED(Pal4, Any16) {
+ const uint8 *src = (const uint8 *)src0;
+ uint16 *dst = (uint16 *)dst0;
+ const uint16 *pal = (const uint16 *)pal0;
+
+ src += (w-1) >> 1;
+ dst += ((w-1) & ~1);
+
+ srcpitch += (w+1) >> 1;
+ dstpitch += ((w+1) & ~1) * 2;
+
+ do {
+ int wt = w;
+
+ uint8 v = src[0] >> (((-wt) & 1)*4);
+
+ switch(wt & 1) {
+ do {
+ v = src[0];
+
+ case 0: dst[1] = pal[v&15]; v >>= 4;
+ case 1: dst[0] = pal[v&15]; v >>= 4;
+
+ dst -= 2;
+ --src;
+ } while((wt -= 2) > 0);
+ }
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_PALETTED(Pal4, Any24) {
+ const uint8 *src = (const uint8 *)src0;
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *pal = (const uint8 *)pal0;
+
+ src += (w-1) >> 1;
+ dst += ((w-1) & ~1) * 3;
+
+ srcpitch += (w+1) >> 1;
+ dstpitch += ((w+1) & ~1) * 3;
+
+ do {
+ int wt = w;
+
+ uint8 v = src[0] >> (((-wt) & 1)*4);
+ const uint8 *pe;
+
+ switch(wt & 1) {
+ do {
+ v = src[0];
+
+ case 0: pe = &pal[3*(v&15)]; dst[1*3+0] = pe[0]; dst[1*3+1] = pe[1]; dst[1*3+2] = pe[2]; v >>= 4;
+ case 1: pe = &pal[3*(v&15)]; dst[0*3+0] = pe[0]; dst[0*3+1] = pe[1]; dst[0*3+2] = pe[2]; v >>= 4;
+
+ dst -= 6;
+ --src;
+ } while((wt -= 2) > 0);
+ }
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_PALETTED(Pal4, Any32) {
+ const uint8 *src = (const uint8 *)src0;
+ uint32 *dst = (uint32 *)dst0;
+ const uint32 *pal = (const uint32 *)pal0;
+
+ src += (w-1) >> 1;
+ dst += ((w-1) & ~1);
+
+ srcpitch += (w+1) >> 1;
+ dstpitch += ((w+1) & ~1) * 4;
+
+ do {
+ int wt = w;
+
+ uint8 v = src[0] >> (((-wt) & 1)*4);
+
+ switch(wt & 1) {
+ do {
+ v = src[0];
+
+ case 0: dst[1] = pal[v&15]; v >>= 4;
+ case 1: dst[0] = pal[v&15]; v >>= 4;
+
+ dst -= 2;
+ --src;
+ } while((wt -= 2) > 0);
+ }
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// RGB blitters: Pal8 ->
+//
+///////////////////////////////////////////////////////////////////////////
+
+DECLARE_PALETTED(Pal8, Any8) {
+ const uint8 *src = (const uint8 *)src0;
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *pal = (const uint8 *)pal0;
+
+ srcpitch -= w;
+ dstpitch -= w;
+
+ do {
+ int wt = w;
+
+ do {
+ *dst++ = pal[*src++];
+ } while(--wt);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_PALETTED(Pal8, Any16) {
+ const uint8 *src = (const uint8 *)src0;
+ uint16 *dst = (uint16 *)dst0;
+ const uint16 *pal = (const uint16 *)pal0;
+
+ srcpitch -= w;
+ dstpitch -= w*2;
+
+ do {
+ int wt = w;
+
+ do {
+ *dst++ = pal[*src++];
+ } while(--wt);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_PALETTED(Pal8, Any24) {
+ const uint8 *src = (const uint8 *)src0;
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *pal = (const uint8 *)pal0;
+
+ srcpitch -= w;
+ dstpitch -= w*3;
+
+ do {
+ int wt = w;
+ do {
+ const uint8 *pe = &pal[3**src++];
+
+ dst[0] = pe[0];
+ dst[1] = pe[1];
+ dst[2] = pe[2];
+ dst += 3;
+ } while(--wt);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_PALETTED(Pal8, Any32) {
+ const uint8 *src = (const uint8 *)src0;
+ uint32 *dst = (uint32 *)dst0;
+ const uint32 *pal = (const uint32 *)pal0;
+
+ srcpitch -= w;
+ dstpitch -= w*4;
+
+ do {
+ int wt = w;
+
+ do {
+ *dst++ = pal[*src++];
+ } while(--wt);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_rgb.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_rgb.cpp
new file mode 100644
index 000000000..ea49f260d
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_rgb.cpp
@@ -0,0 +1,310 @@
+#include <vd2/system/vdtypes.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+
+#define DECLARE_RGB(x, y) void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h)
+
+///////////////////////////////////////////////////////////////////////////
+//
+// RGB blitters: -> XRGB1555
+//
+///////////////////////////////////////////////////////////////////////////
+
+DECLARE_RGB(RGB565, XRGB1555) {
+ const uint16 *src = (const uint16 *)src0;
+ uint16 *dst = (uint16 *)dst0;
+
+ srcpitch -= 2*w;
+ dstpitch -= 2*w;
+
+ do {
+ int wt = w;
+
+ do {
+ const uint32 px = *src++;
+ *dst++ = (px&0x001f) + ((px&0xffc0)>>1);
+ } while(--wt);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_RGB(RGB888, XRGB1555) {
+ const uint8 *src = (const uint8 *)src0;
+ uint16 *dst = (uint16 *)dst0;
+
+ srcpitch -= 3*w;
+ dstpitch -= 2*w;
+
+ do {
+ int wt = w;
+
+ do {
+ const uint32 r = ((uint32)src[2] & 0xf8) << 7;
+ const uint32 g = ((uint32)src[1] & 0xf8) << 2;
+ const uint32 b = (uint32)src[0] >> 3;
+ src += 3;
+
+ *dst++ = (uint16)(r + g + b);
+ } while(--wt);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_RGB(XRGB8888, XRGB1555) {
+ const uint8 *src = (const uint8 *)src0;
+ uint16 *dst = (uint16 *)dst0;
+
+ srcpitch -= 4*w;
+ dstpitch -= 2*w;
+
+ do {
+ int wt = w;
+
+ do {
+ const uint32 r = ((uint32)src[2] & 0xf8) << 7;
+ const uint32 g = ((uint32)src[1] & 0xf8) << 2;
+ const uint32 b = (uint32)src[0] >> 3;
+ src += 4;
+
+ *dst++ = (uint16)(r + g + b);
+ } while(--wt);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// RGB blitters: -> RGB565
+//
+///////////////////////////////////////////////////////////////////////////
+
+DECLARE_RGB(XRGB1555, RGB565) {
+ const uint16 *src = (const uint16 *)src0;
+ uint16 *dst = (uint16 *)dst0;
+
+ srcpitch -= 2*w;
+ dstpitch -= 2*w;
+
+ do {
+ int wt = w;
+
+ do {
+ const uint32 px = *src++;
+ *dst++ = (uint16)(px + (px&0xffe0) + ((px&0x0200)>>4));
+ } while(--wt);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_RGB(RGB888, RGB565) {
+ const uint8 *src = (const uint8 *)src0;
+ uint16 *dst = (uint16 *)dst0;
+
+ srcpitch -= 3*w;
+ dstpitch -= 2*w;
+
+ do {
+ int wt = w;
+
+ do {
+ const uint32 r = ((uint32)src[2] & 0xf8) << 8;
+ const uint32 g = ((uint32)src[1] & 0xfc) << 3;
+ const uint32 b = (uint32)src[0] >> 3;
+ src += 3;
+
+ *dst++ = (uint16)(r + g + b);
+ } while(--wt);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_RGB(XRGB8888, RGB565) {
+ const uint8 *src = (const uint8 *)src0;
+ uint16 *dst = (uint16 *)dst0;
+
+ srcpitch -= 4*w;
+ dstpitch -= 2*w;
+
+ do {
+ int wt = w;
+
+ do {
+ const uint32 r = ((uint32)src[2] & 0xf8) << 8;
+ const uint32 g = ((uint32)src[1] & 0xfc) << 3;
+ const uint32 b = (uint32)src[0] >> 3;
+ src += 4;
+
+ *dst++ = (uint16)(r + g + b);
+ } while(--wt);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// RGB blitters: -> RGB888
+//
+///////////////////////////////////////////////////////////////////////////
+
+DECLARE_RGB(XRGB1555, RGB888) {
+ const uint16 *src = (const uint16 *)src0;
+ uint8 *dst = (uint8 *)dst0;
+
+ srcpitch -= 2*w;
+ dstpitch -= 3*w;
+
+ do {
+ int wt = w;
+
+ do {
+ const uint32 px = *src++;
+ uint32 rb = px & 0x7c1f;
+ uint32 g = px & 0x03e0;
+
+ rb += rb<<5;
+ g += g<<5;
+
+ dst[0] = (uint8)(rb>>2);
+ dst[1] = (uint8)(g>>7);
+ dst[2] = (uint8)(rb>>12);
+ dst += 3;
+ } while(--wt);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_RGB(RGB565, RGB888) {
+ const uint16 *src = (const uint16 *)src0;
+ uint8 *dst = (uint8 *)dst0;
+
+ srcpitch -= 2*w;
+ dstpitch -= 3*w;
+
+ do {
+ int wt = w;
+
+ do {
+ const uint32 px = *src++;
+ uint32 rb = px & 0xf81f;
+ uint32 g = px & 0x07e0;
+
+ rb += rb<<5;
+ g += g<<6;
+
+ dst[0] = (uint8)(rb>>2);
+ dst[1] = (uint8)(g>>9);
+ dst[2] = (uint8)(rb>>13);
+ dst += 3;
+ } while(--wt);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_RGB(XRGB8888, RGB888) {
+ const uint8 *src = (const uint8 *)src0;
+ uint8 *dst = (uint8 *)dst0;
+
+ srcpitch -= 4*w;
+ dstpitch -= 3*w;
+
+ do {
+ int wt = w;
+
+ do {
+ dst[0] = src[0];
+ dst[1] = src[1];
+ dst[2] = src[2];
+ dst += 3;
+ src += 4;
+ } while(--wt);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// RGB blitters: -> XRGB8888
+//
+///////////////////////////////////////////////////////////////////////////
+
+DECLARE_RGB(XRGB1555, XRGB8888) {
+ const uint16 *src = (const uint16 *)src0;
+ uint32 *dst = (uint32 *)dst0;
+
+ srcpitch -= 2*w;
+ dstpitch -= 4*w;
+
+ do {
+ int wt = w;
+
+ do {
+ const uint32 px = *src++;
+ const uint32 rgb = ((px & 0x7c00) << 9) + ((px & 0x03e0) << 6) + ((px & 0x001f) << 3);
+
+ *dst++ = rgb + ((rgb & 0xe0e0e0)>>5);
+ } while(--wt);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_RGB(RGB565, XRGB8888) {
+ const uint16 *src = (const uint16 *)src0;
+ uint32 *dst = (uint32 *)dst0;
+
+ srcpitch -= 2*w;
+ dstpitch -= 4*w;
+
+ do {
+ int wt = w;
+
+ do {
+ const uint32 px = *src++;
+ const uint32 rb = ((px & 0xf800) << 8) + ((px & 0x001f) << 3);
+ const uint32 g = ((px & 0x07e0) << 5) + (px & 0x0300);
+
+ *dst++ = rb + ((rb & 0xe000e0)>>5) + g;
+ } while(--wt);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_RGB(RGB888, XRGB8888) {
+ const uint8 *src = (const uint8 *)src0;
+ uint32 *dst = (uint32 *)dst0;
+
+ srcpitch -= 3*w;
+ dstpitch -= 4*w;
+
+ do {
+ int wt = w;
+
+ do {
+ *dst++ = (uint32)src[0] + ((uint32)src[1]<<8) + ((uint32)src[2]<<16);
+ src += 3;
+ } while(--wt);
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuv.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuv.cpp
new file mode 100644
index 000000000..6f40eeaa0
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuv.cpp
@@ -0,0 +1,1590 @@
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/system/memory.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+
+#include "blt_spanutils.h"
+
+#ifdef _M_IX86
+ #include "blt_spanutils_x86.h"
+#endif
+
+using namespace nsVDPixmapSpanUtils;
+
+namespace {
+ struct YCbCrToRGB {
+ sint16 y_tab[256];
+ sint16 r_cr_tab[256];
+ sint16 b_cb_tab[256];
+ sint16 g_cr_tab[256];
+ sint16 g_cb_tab[256];
+ uint8 cliptab[277+256+279];
+ uint16 cliptab15[277+256+279];
+ uint16 cliptab16[277+256+279];
+
+ YCbCrToRGB() {
+ int i;
+
+ memset(cliptab, 0, 277);
+ memset(cliptab+277+256, 255, 279);
+
+ memset(cliptab15, 0, sizeof cliptab15[0] * 277);
+ memset(cliptab16, 0, sizeof cliptab16[0] * 277);
+ memset(cliptab15+277+256, 0xff, sizeof cliptab15[0] * 279);
+ memset(cliptab16+277+256, 0xff, sizeof cliptab16[0] * 279);
+
+ for(i=0; i<256; ++i) {
+ y_tab[i] = (sint16)(((i-16) * 76309 + 32768) >> 16);
+ r_cr_tab[i] = (sint16)(((i-128) * 104597 + 32768) >> 16);
+ b_cb_tab[i] = (sint16)(((i-128) * 132201 + 32768) >> 16);
+ g_cr_tab[i] = (sint16)(((i-128) * -53279 + 32768) >> 16);
+ g_cb_tab[i] = (sint16)(((i-128) * -25674 + 32768) >> 16);
+ cliptab[i+277] = (uint8)i;
+ cliptab15[i+277] = 0x421 * ((unsigned)i>>3);
+ cliptab16[i+277] = 0x801 * ((unsigned)i>>3) + 0x20 * ((unsigned)i>>2);
+ }
+ }
+ } colorconv;
+
+ struct YCbCrFormatInfo {
+ ptrdiff_t ystep;
+ ptrdiff_t cstep;
+ ptrdiff_t yinc[4];
+ ptrdiff_t cinc[4];
+ sint8 ypos[4];
+ sint8 cbpos[4];
+ sint8 crpos[4];
+ };
+
+ YCbCrFormatInfo g_formatInfo_YUV444_Planar = { -4, -4, {-1,-1,-1,-1}, {-1,-1,-1,-1}, {0,1,2,3}, {0,1,2,3}, {0,1,2,3}};
+ YCbCrFormatInfo g_formatInfo_YUV422_YUYV = { -8, -8, {-1,-1,-1,-1}, {-1,-1,-1,-1}, {0,2,4,6}, {1,1,5,5}, {3,3,7,7}};
+ YCbCrFormatInfo g_formatInfo_YUV422_UYVY = { -8, -8, {-1,-1,-1,-1}, {-1,-1,-1,-1}, {1,3,5,7}, {0,0,4,4}, {2,2,6,6}};
+ YCbCrFormatInfo g_formatInfo_YUV420_YV12 = { -4, -2, {-1,-1,-1,-1}, { 0,-1, 0,-1}, {0,1,2,3}, {0,0,1,1}, {0,0,1,1}};
+ YCbCrFormatInfo g_formatInfo_YUV411_YV12 = { -4, -1, {-1,-1,-1,-1}, {-1,-1,-1,-1}, {0,1,2,3}, {0,0,0,0}, {0,0,0,0}};
+
+ inline uint16 ycbcr_to_1555(uint8 y, uint8 cb0, uint8 cr0) {
+ const uint16 *p = &colorconv.cliptab15[277 + colorconv.y_tab[y]];
+ uint32 r = 0x7c00 & p[colorconv.r_cr_tab[cr0]];
+ uint32 g = 0x03e0 & p[colorconv.g_cr_tab[cr0] + colorconv.g_cb_tab[cb0]];
+ uint32 b = 0x001f & p[colorconv.b_cb_tab[cb0]];
+
+ return r + g + b;
+ }
+
+ inline uint16 ycbcr_to_565(uint8 y, uint8 cb0, uint8 cr0) {
+ const uint16 *p = &colorconv.cliptab16[277 + colorconv.y_tab[y]];
+ uint32 r = 0xf800 & p[colorconv.r_cr_tab[cr0]];
+ uint32 g = 0x07e0 & p[colorconv.g_cr_tab[cr0] + colorconv.g_cb_tab[cb0]];
+ uint32 b = 0x001f & p[colorconv.b_cb_tab[cb0]];
+
+ return r + g + b;
+ }
+
+ inline void ycbcr_to_888(uint8 *dst, uint8 y, uint8 cb0, uint8 cr0) {
+ const uint8 *p = &colorconv.cliptab[277 + colorconv.y_tab[y]];
+ uint8 r = p[colorconv.r_cr_tab[cr0]];
+ uint8 g = p[colorconv.g_cr_tab[cr0] + colorconv.g_cb_tab[cb0]];
+ uint8 b = p[colorconv.b_cb_tab[cb0]];
+
+ dst[0] = b;
+ dst[1] = g;
+ dst[2] = r;
+ }
+
+ inline uint32 ycbcr_to_8888(uint8 y, uint8 cb0, uint8 cr0) {
+ const uint8 *p = &colorconv.cliptab[277 + colorconv.y_tab[y]];
+ uint8 r = p[colorconv.r_cr_tab[cr0]];
+ uint8 g = p[colorconv.g_cr_tab[cr0] + colorconv.g_cb_tab[cb0]];
+ uint8 b = p[colorconv.b_cb_tab[cb0]];
+
+ return (r << 16) + (g << 8) + b;
+ }
+
+ void VDYCbCrToXRGB1555Span(void *dst0, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 w) {
+ uint16 *dst = (uint16 *)dst0;
+
+ do {
+ *dst++ = ycbcr_to_1555(*y++, *cb++, *cr++);
+ } while(--w);
+ }
+
+ void VDYCbCrToRGB565Span(void *dst0, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 w) {
+ uint16 *dst = (uint16 *)dst0;
+
+ do {
+ *dst++ = ycbcr_to_565(*y++, *cb++, *cr++);
+ } while(--w);
+ }
+
+ void VDYCbCrToRGB888Span(void *dst0, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 w) {
+ uint8 *dst = (uint8 *)dst0;
+
+ do {
+ ycbcr_to_888(dst, *y++, *cb++, *cr++);
+ dst += 3;
+ } while(--w);
+ }
+
+ void VDYCbCrToXRGB8888Span(void *dst0, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 w) {
+ uint32 *dst = (uint32 *)dst0;
+
+ do {
+ *dst++ = ycbcr_to_8888(*y++, *cb++, *cr++);
+ } while(--w);
+ }
+
+ void VDYCbCrToUYVYSpan(void *dst0, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 w) {
+ uint32 *dst = (uint32 *)dst0;
+
+ if (--w) {
+ do {
+ *dst++ = (uint32)*cb++ + ((uint32)y[0] << 8) + ((uint32)*cr++ << 16) + ((uint32)y[1] << 24);
+ y += 2;
+ } while((sint32)(w-=2)>0);
+ }
+
+ if (!(w & 1))
+ *dst++ = (uint32)*cb + ((uint32)y[0] << 8) + ((uint32)*cr << 16) + ((uint32)y[0] << 24);
+ }
+
+ void VDYCbCrToYUYVSpan(void *dst0, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 w) {
+ uint32 *dst = (uint32 *)dst0;
+
+ if (--w) {
+ do {
+ *dst++ = (uint32)y[0] + ((uint32)*cb++ << 8) + ((uint32)y[1] << 16) + ((uint32)*cr++ << 24);
+ y += 2;
+ } while((sint32)(w-=2)>0);
+ }
+
+ if (!(w & 1))
+ *dst++ = (uint32)y[0] + ((uint32)*cb << 8) + ((uint32)y[0] << 16) + ((uint32)*cr << 24);
+ }
+
+ void VDYCbCrToRGB1555Generic(void *dst, ptrdiff_t dststride, const void *yrow, ptrdiff_t ystride, const void *cbrow, ptrdiff_t cbstride, const void *crrow, ptrdiff_t crstride, int w, int h, const YCbCrFormatInfo& formatinfo) {
+ const ptrdiff_t ystep = formatinfo.ystep;
+ const ptrdiff_t cstep = formatinfo.cstep;
+ const ptrdiff_t ypos0 = formatinfo.ypos[0];
+ const ptrdiff_t ypos1 = formatinfo.ypos[1];
+ const ptrdiff_t ypos2 = formatinfo.ypos[2];
+ const ptrdiff_t ypos3 = formatinfo.ypos[3];
+ const ptrdiff_t crpos0 = formatinfo.crpos[0];
+ const ptrdiff_t crpos1 = formatinfo.crpos[1];
+ const ptrdiff_t crpos2 = formatinfo.crpos[2];
+ const ptrdiff_t crpos3 = formatinfo.crpos[3];
+ const ptrdiff_t cbpos0 = formatinfo.cbpos[0];
+ const ptrdiff_t cbpos1 = formatinfo.cbpos[1];
+ const ptrdiff_t cbpos2 = formatinfo.cbpos[2];
+ const ptrdiff_t cbpos3 = formatinfo.cbpos[3];
+
+ yrow = (char *)yrow - ystep * ((w-1) >> 2);
+ crrow = (char *)crrow - cstep * ((w-1) >> 2);
+ cbrow = (char *)cbrow - cstep * ((w-1) >> 2);
+ dst = (char *)dst + 2*((w-1) & ~3);
+
+ int y = 0;
+ do {
+ const uint8 *ysrc = (const uint8 *)yrow;
+ const uint8 *crsrc = (const uint8 *)crrow;
+ const uint8 *cbsrc = (const uint8 *)cbrow;
+ uint16 *out = (uint16 *)dst;
+ int w2 = -w;
+
+ switch(w2 & 3) {
+ do {
+ case 0: out[3] = ycbcr_to_1555(ysrc[ypos3], cbsrc[cbpos3], crsrc[crpos3]);
+ case 1: out[2] = ycbcr_to_1555(ysrc[ypos2], cbsrc[cbpos2], crsrc[crpos2]);
+ case 2: out[1] = ycbcr_to_1555(ysrc[ypos1], cbsrc[cbpos1], crsrc[crpos1]);
+ case 3: out[0] = ycbcr_to_1555(ysrc[ypos0], cbsrc[cbpos0], crsrc[crpos0]);
+ out -= 4;
+ ysrc += ystep;
+ crsrc += cstep;
+ cbsrc += cstep;
+ } while((w2 += 4) < 0);
+ }
+
+ dst = (char *)dst + dststride;
+ yrow = (const char *)yrow + (ystride & formatinfo.yinc[y & 3]);
+ cbrow = (const char *)cbrow + (cbstride & formatinfo.cinc[y & 3]);
+ crrow = (const char *)crrow + (crstride & formatinfo.cinc[y & 3]);
+ } while(++y < h);
+ }
+
+ void VDYCbCrToRGB565Generic(void *dst, ptrdiff_t dststride, const void *yrow, ptrdiff_t ystride, const void *cbrow, ptrdiff_t cbstride, const void *crrow, ptrdiff_t crstride, int w, int h, const YCbCrFormatInfo& formatinfo) {
+ const ptrdiff_t ystep = formatinfo.ystep;
+ const ptrdiff_t cstep = formatinfo.cstep;
+ const ptrdiff_t ypos0 = formatinfo.ypos[0];
+ const ptrdiff_t ypos1 = formatinfo.ypos[1];
+ const ptrdiff_t ypos2 = formatinfo.ypos[2];
+ const ptrdiff_t ypos3 = formatinfo.ypos[3];
+ const ptrdiff_t crpos0 = formatinfo.crpos[0];
+ const ptrdiff_t crpos1 = formatinfo.crpos[1];
+ const ptrdiff_t crpos2 = formatinfo.crpos[2];
+ const ptrdiff_t crpos3 = formatinfo.crpos[3];
+ const ptrdiff_t cbpos0 = formatinfo.cbpos[0];
+ const ptrdiff_t cbpos1 = formatinfo.cbpos[1];
+ const ptrdiff_t cbpos2 = formatinfo.cbpos[2];
+ const ptrdiff_t cbpos3 = formatinfo.cbpos[3];
+
+ yrow = (char *)yrow - ystep * ((w-1) >> 2);
+ crrow = (char *)crrow - cstep * ((w-1) >> 2);
+ cbrow = (char *)cbrow - cstep * ((w-1) >> 2);
+ dst = (char *)dst + 2*((w-1) & ~3);
+
+ int y = 0;
+ do {
+ const uint8 *ysrc = (const uint8 *)yrow;
+ const uint8 *crsrc = (const uint8 *)crrow;
+ const uint8 *cbsrc = (const uint8 *)cbrow;
+ uint16 *out = (uint16 *)dst;
+ int w2 = -w;
+
+ switch(w2 & 3) {
+ do {
+ case 0: out[3] = ycbcr_to_565(ysrc[ypos3], cbsrc[cbpos3], crsrc[crpos3]);
+ case 1: out[2] = ycbcr_to_565(ysrc[ypos2], cbsrc[cbpos2], crsrc[crpos2]);
+ case 2: out[1] = ycbcr_to_565(ysrc[ypos1], cbsrc[cbpos1], crsrc[crpos1]);
+ case 3: out[0] = ycbcr_to_565(ysrc[ypos0], cbsrc[cbpos0], crsrc[crpos0]);
+ out -= 4;
+ ysrc += ystep;
+ crsrc += cstep;
+ cbsrc += cstep;
+ } while((w2 += 4) < 0);
+ }
+
+ dst = (char *)dst + dststride;
+ yrow = (const char *)yrow + (ystride & formatinfo.yinc[y & 3]);
+ cbrow = (const char *)cbrow + (cbstride & formatinfo.cinc[y & 3]);
+ crrow = (const char *)crrow + (crstride & formatinfo.cinc[y & 3]);
+ } while(++y < h);
+ }
+
+ void VDYCbCrToRGB888Generic(void *dst, ptrdiff_t dststride, const void *yrow, ptrdiff_t ystride, const void *cbrow, ptrdiff_t cbstride, const void *crrow, ptrdiff_t crstride, int w, int h, const YCbCrFormatInfo& formatinfo) {
+ const ptrdiff_t ystep = formatinfo.ystep;
+ const ptrdiff_t cstep = formatinfo.cstep;
+ const ptrdiff_t ypos0 = formatinfo.ypos[0];
+ const ptrdiff_t ypos1 = formatinfo.ypos[1];
+ const ptrdiff_t ypos2 = formatinfo.ypos[2];
+ const ptrdiff_t ypos3 = formatinfo.ypos[3];
+ const ptrdiff_t crpos0 = formatinfo.crpos[0];
+ const ptrdiff_t crpos1 = formatinfo.crpos[1];
+ const ptrdiff_t crpos2 = formatinfo.crpos[2];
+ const ptrdiff_t crpos3 = formatinfo.crpos[3];
+ const ptrdiff_t cbpos0 = formatinfo.cbpos[0];
+ const ptrdiff_t cbpos1 = formatinfo.cbpos[1];
+ const ptrdiff_t cbpos2 = formatinfo.cbpos[2];
+ const ptrdiff_t cbpos3 = formatinfo.cbpos[3];
+
+ yrow = (char *)yrow - ystep * ((w-1) >> 2);
+ crrow = (char *)crrow - cstep * ((w-1) >> 2);
+ cbrow = (char *)cbrow - cstep * ((w-1) >> 2);
+ dst = (char *)dst + 3*((w-1) & ~3);
+
+ int y = 0;
+ do {
+ const uint8 *ysrc = (const uint8 *)yrow;
+ const uint8 *crsrc = (const uint8 *)crrow;
+ const uint8 *cbsrc = (const uint8 *)cbrow;
+ uint8 *out = (uint8 *)dst;
+ int w2 = -w;
+
+ switch(w2 & 3) {
+ do {
+ case 0: ycbcr_to_888(out+9, ysrc[ypos3], cbsrc[cbpos3], crsrc[crpos3]);
+ case 1: ycbcr_to_888(out+6, ysrc[ypos2], cbsrc[cbpos2], crsrc[crpos2]);
+ case 2: ycbcr_to_888(out+3, ysrc[ypos1], cbsrc[cbpos1], crsrc[crpos1]);
+ case 3: ycbcr_to_888(out, ysrc[ypos0], cbsrc[cbpos0], crsrc[crpos0]);
+ out -= 12;
+ ysrc += ystep;
+ crsrc += cstep;
+ cbsrc += cstep;
+ } while((w2 += 4) < 0);
+ }
+
+ dst = (char *)dst + dststride;
+ yrow = (const char *)yrow + (ystride & formatinfo.yinc[y & 3]);
+ cbrow = (const char *)cbrow + (cbstride & formatinfo.cinc[y & 3]);
+ crrow = (const char *)crrow + (crstride & formatinfo.cinc[y & 3]);
+ } while(++y < h);
+ }
+
+ void VDYCbCrToRGB8888Generic(void *dst, ptrdiff_t dststride, const void *yrow, ptrdiff_t ystride, const void *cbrow, ptrdiff_t cbstride, const void *crrow, ptrdiff_t crstride, int w, int h, const YCbCrFormatInfo& formatinfo) {
+ const ptrdiff_t ystep = formatinfo.ystep;
+ const ptrdiff_t cstep = formatinfo.cstep;
+ const ptrdiff_t ypos0 = formatinfo.ypos[0];
+ const ptrdiff_t ypos1 = formatinfo.ypos[1];
+ const ptrdiff_t ypos2 = formatinfo.ypos[2];
+ const ptrdiff_t ypos3 = formatinfo.ypos[3];
+ const ptrdiff_t crpos0 = formatinfo.crpos[0];
+ const ptrdiff_t crpos1 = formatinfo.crpos[1];
+ const ptrdiff_t crpos2 = formatinfo.crpos[2];
+ const ptrdiff_t crpos3 = formatinfo.crpos[3];
+ const ptrdiff_t cbpos0 = formatinfo.cbpos[0];
+ const ptrdiff_t cbpos1 = formatinfo.cbpos[1];
+ const ptrdiff_t cbpos2 = formatinfo.cbpos[2];
+ const ptrdiff_t cbpos3 = formatinfo.cbpos[3];
+
+ yrow = (char *)yrow - ystep * ((w-1) >> 2);
+ crrow = (char *)crrow - cstep * ((w-1) >> 2);
+ cbrow = (char *)cbrow - cstep * ((w-1) >> 2);
+ dst = (char *)dst + 4*((w-1) & ~3);
+
+ int y = 0;
+ do {
+ const uint8 *ysrc = (const uint8 *)yrow;
+ const uint8 *crsrc = (const uint8 *)crrow;
+ const uint8 *cbsrc = (const uint8 *)cbrow;
+ uint32 *out = (uint32 *)dst;
+ int w2 = -w;
+
+ switch(w2 & 3) {
+ do {
+ case 0: out[3] = ycbcr_to_8888(ysrc[ypos3], cbsrc[cbpos3], crsrc[crpos3]);
+ case 1: out[2] = ycbcr_to_8888(ysrc[ypos2], cbsrc[cbpos2], crsrc[crpos2]);
+ case 2: out[1] = ycbcr_to_8888(ysrc[ypos1], cbsrc[cbpos1], crsrc[crpos1]);
+ case 3: out[0] = ycbcr_to_8888(ysrc[ypos0], cbsrc[cbpos0], crsrc[crpos0]);
+ out -= 4;
+ ysrc += ystep;
+ crsrc += cstep;
+ cbsrc += cstep;
+ } while((w2 += 4) < 0);
+ }
+
+ dst = (char *)dst + dststride;
+ yrow = (const char *)yrow + (ystride & formatinfo.yinc[y & 3]);
+ cbrow = (const char *)cbrow + (cbstride & formatinfo.cinc[y & 3]);
+ crrow = (const char *)crrow + (crstride & formatinfo.cinc[y & 3]);
+ } while(++y < h);
+ }
+}
+
+#define DECLARE_YUV(x, y) void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h)
+
+DECLARE_YUV(UYVY, XRGB1555) {
+ do {
+ const uint8 *src = (const uint8 *)src0;
+ uint16 *dst = (uint16 *)dst0;
+
+ // convert first pixel
+ int cb, cr;
+ int rc0, gc0, bc0, rc1, gc1, bc1;
+ const uint16 *y;
+
+ cb = src[0];
+ cr = src[2];
+ rc1 = colorconv.r_cr_tab[cr];
+ gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+ bc1 = colorconv.b_cb_tab[cb];
+
+ y = &colorconv.cliptab15[277 + colorconv.y_tab[src[1]]];
+ *dst++ = (y[rc1] & 0x7c00) + (y[gc1] & 0x3e0) + (y[bc1] & 0x001f);
+
+ // convert pairs of pixels
+ int w2 = w;
+
+ if ((w2 -= 2) > 0) {
+ do {
+ rc0 = rc1;
+ gc0 = gc1;
+ bc0 = bc1;
+
+ cb = src[4];
+ cr = src[6];
+ rc1 = colorconv.r_cr_tab[cr];
+ gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+ bc1 = colorconv.b_cb_tab[cb];
+
+ y = &colorconv.cliptab15[277 + colorconv.y_tab[src[3]]];
+ dst[0] = (y[(rc0+rc1+1)>>1] & 0x7c00) + (y[(gc0+gc1+1)>>1] & 0x3e0) + (y[(bc0+bc1+1)>>1] & 0x001f);
+
+ y = &colorconv.cliptab15[277 + colorconv.y_tab[src[5]]];
+ dst[1] = (y[rc1] & 0x7c00) + (y[gc1] & 0x3e0) + (y[bc1] & 0x001f);
+
+ dst += 2;
+ src += 4;
+ } while((w2 -= 2) > 0);
+ }
+
+ // handle oddballs
+ if (!(w2 & 1)) {
+ y = &colorconv.cliptab15[277 + colorconv.y_tab[src[3]]];
+ *dst = (y[rc1] & 0x7c00) + (y[gc1] & 0x3e0) + (y[bc1] & 0x001f);
+ }
+
+ vdptrstep(src0, srcpitch);
+ vdptrstep(dst0, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(UYVY, RGB565) {
+ do {
+ const uint8 *src = (const uint8 *)src0;
+ uint16 *dst = (uint16 *)dst0;
+
+ // convert first pixel
+ int cb, cr;
+ int rc0, gc0, bc0, rc1, gc1, bc1;
+ const uint16 *y;
+
+ cb = src[0];
+ cr = src[2];
+ rc1 = colorconv.r_cr_tab[cr];
+ gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+ bc1 = colorconv.b_cb_tab[cb];
+
+ y = &colorconv.cliptab16[277 + colorconv.y_tab[src[1]]];
+ *dst++ = (y[rc1] & 0xf800) + (y[gc1] & 0x7e0) + (y[bc1] & 0x001f);
+
+ // convert pairs of pixels
+ int w2 = w;
+
+ if ((w2 -= 2) > 0) {
+ do {
+ rc0 = rc1;
+ gc0 = gc1;
+ bc0 = bc1;
+
+ cb = src[4];
+ cr = src[6];
+ rc1 = colorconv.r_cr_tab[cr];
+ gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+ bc1 = colorconv.b_cb_tab[cb];
+
+ y = &colorconv.cliptab16[277 + colorconv.y_tab[src[3]]];
+ dst[0] = (y[(rc0+rc1+1)>>1] & 0xf800) + (y[(gc0+gc1+1)>>1] & 0x7e0) + (y[(bc0+bc1+1)>>1] & 0x001f);
+
+ y = &colorconv.cliptab16[277 + colorconv.y_tab[src[5]]];
+ dst[1] = (y[rc1] & 0xf800) + (y[gc1] & 0x7e0) + (y[bc1] & 0x001f);
+
+ dst += 2;
+ src += 4;
+ } while((w2 -= 2) > 0);
+ }
+
+ // handle oddballs
+ if (!(w2 & 1)) {
+ y = &colorconv.cliptab16[277 + colorconv.y_tab[src[3]]];
+ *dst = (y[rc1] & 0xf800) + (y[gc1] & 0x7e0) + (y[bc1] & 0x001f);
+ }
+
+ vdptrstep(src0, srcpitch);
+ vdptrstep(dst0, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(UYVY, RGB888) {
+ do {
+ const uint8 *src = (const uint8 *)src0;
+ uint8 *dst = (uint8 *)dst0;
+
+ // convert first pixel
+ int cb, cr;
+ int rc0, gc0, bc0, rc1, gc1, bc1;
+ const uint8 *y;
+
+ cb = src[0];
+ cr = src[2];
+ rc1 = colorconv.r_cr_tab[cr];
+ gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+ bc1 = colorconv.b_cb_tab[cb];
+
+ y = &colorconv.cliptab[277 + colorconv.y_tab[src[1]]];
+ dst[0] = y[bc1];
+ dst[1] = y[gc1];
+ dst[2] = y[rc1];
+ dst += 3;
+
+ // convert pairs of pixels
+ int w2 = w;
+
+ if ((w2 -= 2) > 0) {
+ do {
+ rc0 = rc1;
+ gc0 = gc1;
+ bc0 = bc1;
+
+ cb = src[4];
+ cr = src[6];
+ rc1 = colorconv.r_cr_tab[cr];
+ gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+ bc1 = colorconv.b_cb_tab[cb];
+
+ y = &colorconv.cliptab[277 + colorconv.y_tab[src[3]]];
+ dst[0] = y[(bc0+bc1+1)>>1];
+ dst[1] = y[(gc0+gc1+1)>>1];
+ dst[2] = y[(rc0+rc1+1)>>1];
+
+ y = &colorconv.cliptab[277 + colorconv.y_tab[src[5]]];
+ dst[3] = y[bc1];
+ dst[4] = y[gc1];
+ dst[5] = y[rc1];
+
+ dst += 6;
+ src += 4;
+ } while((w2 -= 2) > 0);
+ }
+
+ // handle oddballs
+ if (!(w2 & 1)) {
+ y = &colorconv.cliptab[277 + colorconv.y_tab[src[3]]];
+ dst[0] = y[bc1];
+ dst[1] = y[gc1];
+ dst[2] = y[rc1];
+ }
+
+ vdptrstep(src0, srcpitch);
+ vdptrstep(dst0, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(UYVY, XRGB8888) {
+ do {
+ const uint8 *src = (const uint8 *)src0;
+ uint8 *dst = (uint8 *)dst0;
+
+ // convert first pixel
+ int cb, cr;
+ int rc0, gc0, bc0, rc1, gc1, bc1;
+ const uint8 *y;
+
+ cb = src[0];
+ cr = src[2];
+ rc1 = colorconv.r_cr_tab[cr];
+ gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+ bc1 = colorconv.b_cb_tab[cb];
+
+ y = &colorconv.cliptab[277 + colorconv.y_tab[src[1]]];
+ dst[0] = y[bc1];
+ dst[1] = y[gc1];
+ dst[2] = y[rc1];
+ dst += 4;
+
+ // convert pairs of pixels
+ int w2 = w;
+
+ if ((w2 -= 2) > 0) {
+ do {
+ rc0 = rc1;
+ gc0 = gc1;
+ bc0 = bc1;
+
+ cb = src[4];
+ cr = src[6];
+ rc1 = colorconv.r_cr_tab[cr];
+ gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+ bc1 = colorconv.b_cb_tab[cb];
+
+ y = &colorconv.cliptab[277 + colorconv.y_tab[src[3]]];
+ dst[0] = y[(bc0+bc1+1)>>1];
+ dst[1] = y[(gc0+gc1+1)>>1];
+ dst[2] = y[(rc0+rc1+1)>>1];
+
+ y = &colorconv.cliptab[277 + colorconv.y_tab[src[5]]];
+ dst[4] = y[bc1];
+ dst[5] = y[gc1];
+ dst[6] = y[rc1];
+
+ dst += 8;
+ src += 4;
+ } while((w2 -= 2) > 0);
+ }
+
+ // handle oddballs
+ if (!(w2 & 1)) {
+ y = &colorconv.cliptab[277 + colorconv.y_tab[src[3]]];
+ dst[0] = y[bc1];
+ dst[1] = y[gc1];
+ dst[2] = y[rc1];
+ }
+
+ vdptrstep(src0, srcpitch);
+ vdptrstep(dst0, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(YUYV, XRGB1555) {
+ do {
+ const uint8 *src = (const uint8 *)src0;
+ uint16 *dst = (uint16 *)dst0;
+
+ // convert first pixel
+ int cb, cr;
+ int rc0, gc0, bc0, rc1, gc1, bc1;
+ const uint16 *y;
+
+ cb = src[1];
+ cr = src[3];
+ rc1 = colorconv.r_cr_tab[cr];
+ gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+ bc1 = colorconv.b_cb_tab[cb];
+
+ y = &colorconv.cliptab15[277 + colorconv.y_tab[src[0]]];
+ *dst++ = (y[rc1] & 0x7c00) + (y[gc1] & 0x3e0) + (y[bc1] & 0x001f);
+
+ // convert pairs of pixels
+ int w2 = w;
+
+ if ((w2 -= 2) > 0) {
+ do {
+ rc0 = rc1;
+ gc0 = gc1;
+ bc0 = bc1;
+
+ cb = src[5];
+ cr = src[7];
+ rc1 = colorconv.r_cr_tab[cr];
+ gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+ bc1 = colorconv.b_cb_tab[cb];
+
+ y = &colorconv.cliptab15[277 + colorconv.y_tab[src[2]]];
+ dst[0] = (y[(rc0+rc1+1)>>1] & 0x7c00) + (y[(gc0+gc1+1)>>1] & 0x3e0) + (y[(bc0+bc1+1)>>1] & 0x001f);
+
+ y = &colorconv.cliptab15[277 + colorconv.y_tab[src[4]]];
+ dst[1] = (y[rc1] & 0x7c00) + (y[gc1] & 0x3e0) + (y[bc1] & 0x001f);
+
+ dst += 2;
+ src += 4;
+ } while((w2 -= 2) > 0);
+ }
+
+ // handle oddballs
+ if (!(w2 & 1)) {
+ y = &colorconv.cliptab15[277 + colorconv.y_tab[src[2]]];
+ *dst = (y[rc1] & 0x7c00) + (y[gc1] & 0x3e0) + (y[bc1] & 0x001f);
+ }
+
+ vdptrstep(src0, srcpitch);
+ vdptrstep(dst0, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(YUYV, RGB565) {
+ do {
+ const uint8 *src = (const uint8 *)src0;
+ uint16 *dst = (uint16 *)dst0;
+
+ // convert first pixel
+ int cb, cr;
+ int rc0, gc0, bc0, rc1, gc1, bc1;
+ const uint16 *y;
+
+ cb = src[1];
+ cr = src[3];
+ rc1 = colorconv.r_cr_tab[cr];
+ gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+ bc1 = colorconv.b_cb_tab[cb];
+
+ y = &colorconv.cliptab16[277 + colorconv.y_tab[src[0]]];
+ *dst++ = (y[rc1] & 0xf800) + (y[gc1] & 0x7e0) + (y[bc1] & 0x001f);
+
+ // convert pairs of pixels
+ int w2 = w;
+
+ if ((w2 -= 2) > 0) {
+ do {
+ rc0 = rc1;
+ gc0 = gc1;
+ bc0 = bc1;
+
+ cb = src[5];
+ cr = src[7];
+ rc1 = colorconv.r_cr_tab[cr];
+ gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+ bc1 = colorconv.b_cb_tab[cb];
+
+ y = &colorconv.cliptab16[277 + colorconv.y_tab[src[2]]];
+ dst[0] = (y[(rc0+rc1+1)>>1] & 0xf800) + (y[(gc0+gc1+1)>>1] & 0x7e0) + (y[(bc0+bc1+1)>>1] & 0x001f);
+
+ y = &colorconv.cliptab16[277 + colorconv.y_tab[src[4]]];
+ dst[1] = (y[rc1] & 0xf800) + (y[gc1] & 0x7e0) + (y[bc1] & 0x001f);
+
+ dst += 2;
+ src += 4;
+ } while((w2 -= 2) > 0);
+ }
+
+ // handle oddballs
+ if (!(w2 & 1)) {
+ y = &colorconv.cliptab16[277 + colorconv.y_tab[src[2]]];
+ *dst = (y[rc1] & 0xf800) + (y[gc1] & 0x7e0) + (y[bc1] & 0x001f);
+ }
+
+ vdptrstep(src0, srcpitch);
+ vdptrstep(dst0, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(YUYV, RGB888) {
+ do {
+ const uint8 *src = (const uint8 *)src0;
+ uint8 *dst = (uint8 *)dst0;
+
+ // convert first pixel
+ int cb, cr;
+ int rc0, gc0, bc0, rc1, gc1, bc1;
+ const uint8 *y;
+
+ cb = src[1];
+ cr = src[3];
+ rc1 = colorconv.r_cr_tab[cr];
+ gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+ bc1 = colorconv.b_cb_tab[cb];
+
+ y = &colorconv.cliptab[277 + colorconv.y_tab[src[0]]];
+ dst[0] = y[bc1];
+ dst[1] = y[gc1];
+ dst[2] = y[rc1];
+ dst += 3;
+
+ // convert pairs of pixels
+ int w2 = w;
+
+ if ((w2 -= 2) > 0) {
+ do {
+ rc0 = rc1;
+ gc0 = gc1;
+ bc0 = bc1;
+
+ cb = src[5];
+ cr = src[7];
+ rc1 = colorconv.r_cr_tab[cr];
+ gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+ bc1 = colorconv.b_cb_tab[cb];
+
+ y = &colorconv.cliptab[277 + colorconv.y_tab[src[2]]];
+ dst[0] = y[(bc0+bc1+1)>>1];
+ dst[1] = y[(gc0+gc1+1)>>1];
+ dst[2] = y[(rc0+rc1+1)>>1];
+
+ y = &colorconv.cliptab[277 + colorconv.y_tab[src[4]]];
+ dst[3] = y[bc1];
+ dst[4] = y[gc1];
+ dst[5] = y[rc1];
+
+ dst += 6;
+ src += 4;
+ } while((w2 -= 2) > 0);
+ }
+
+ // handle oddballs
+ if (!(w2 & 1)) {
+ y = &colorconv.cliptab[277 + colorconv.y_tab[src[2]]];
+ dst[0] = y[bc1];
+ dst[1] = y[gc1];
+ dst[2] = y[rc1];
+ }
+
+ vdptrstep(src0, srcpitch);
+ vdptrstep(dst0, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(YUYV, XRGB8888) {
+ do {
+ const uint8 *src = (const uint8 *)src0;
+ uint8 *dst = (uint8 *)dst0;
+
+ // convert first pixel
+ int cb, cr;
+ int rc0, gc0, bc0, rc1, gc1, bc1;
+ const uint8 *y;
+
+ cb = src[1];
+ cr = src[3];
+ rc1 = colorconv.r_cr_tab[cr];
+ gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+ bc1 = colorconv.b_cb_tab[cb];
+
+ y = &colorconv.cliptab[277 + colorconv.y_tab[src[0]]];
+ dst[0] = y[bc1];
+ dst[1] = y[gc1];
+ dst[2] = y[rc1];
+ dst += 4;
+
+ // convert pairs of pixels
+ int w2 = w;
+
+ if ((w2 -= 2) > 0) {
+ do {
+ rc0 = rc1;
+ gc0 = gc1;
+ bc0 = bc1;
+
+ cb = src[5];
+ cr = src[7];
+ rc1 = colorconv.r_cr_tab[cr];
+ gc1 = colorconv.g_cr_tab[cr] + colorconv.g_cb_tab[cb];
+ bc1 = colorconv.b_cb_tab[cb];
+
+ y = &colorconv.cliptab[277 + colorconv.y_tab[src[2]]];
+ dst[0] = y[(bc0+bc1+1)>>1];
+ dst[1] = y[(gc0+gc1+1)>>1];
+ dst[2] = y[(rc0+rc1+1)>>1];
+
+ y = &colorconv.cliptab[277 + colorconv.y_tab[src[4]]];
+ dst[4] = y[bc1];
+ dst[5] = y[gc1];
+ dst[6] = y[rc1];
+
+ dst += 8;
+ src += 4;
+ } while((w2 -= 2) > 0);
+ }
+
+ // handle oddballs
+ if (!(w2 & 1)) {
+ y = &colorconv.cliptab[277 + colorconv.y_tab[src[2]]];
+ dst[0] = y[bc1];
+ dst[1] = y[gc1];
+ dst[2] = y[rc1];
+ }
+
+ vdptrstep(src0, srcpitch);
+ vdptrstep(dst0, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(Y8, XRGB1555) {
+ uint16 *dst = (uint16 *)dst0;
+ const uint8 *src = (const uint8 *)src0;
+
+ dstpitch -= 2*w;
+ srcpitch -= w;
+
+ do {
+ vdpixsize w2 = w;
+
+ do {
+ *dst++ = colorconv.cliptab15[colorconv.y_tab[*src++] + 277];
+ } while(--w2);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(Y8, RGB565) {
+ uint16 *dst = (uint16 *)dst0;
+ const uint8 *src = (const uint8 *)src0;
+
+ dstpitch -= 2*w;
+ srcpitch -= w;
+
+ do {
+ vdpixsize w2 = w;
+
+ do {
+ *dst++ = colorconv.cliptab16[colorconv.y_tab[*src++] + 277];
+ } while(--w2);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(Y8, RGB888) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *src = (const uint8 *)src0;
+
+ dstpitch -= 3*w;
+ srcpitch -= w;
+
+ do {
+ vdpixsize w2 = w;
+
+ do {
+ dst[0] = dst[1] = dst[2] = colorconv.cliptab[colorconv.y_tab[*src++] + 277];
+ dst += 3;
+ } while(--w2);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(Y8, XRGB8888) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint8 *src = (const uint8 *)src0;
+
+ dstpitch -= 4*w;
+ srcpitch -= w;
+
+ do {
+ vdpixsize w2 = w;
+
+ do {
+ *dst++ = 0x010101 * colorconv.cliptab[colorconv.y_tab[*src++] + 277];
+ } while(--w2);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+#define DECLARE_YUV_PLANAR(x, y) void VDPixmapBlt_##x##_to_##y##_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h)
+
+
+namespace {
+ typedef void (*tpYUVPlanarFinalDecoder)(void *, const uint8 *, const uint8 *, const uint8 *, uint32);
+ typedef void (*tpYUVPlanarHorizDecoder)(uint8 *dst, const uint8 *src, sint32 w);
+ typedef void (*tpYUVPlanarVertDecoder)(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase);
+}
+
+#ifdef _M_IX86
+ extern "C" void __cdecl vdasm_pixblt_YUV444Planar_to_XRGB1555_scan_MMX(void *dst, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 count);
+ extern "C" void __cdecl vdasm_pixblt_YUV444Planar_to_RGB565_scan_MMX(void *dst, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 count);
+ extern "C" void __cdecl vdasm_pixblt_YUV444Planar_to_XRGB8888_scan_MMX(void *dst, const uint8 *y, const uint8 *cb, const uint8 *cr, uint32 count);
+#endif
+
+
+void VDPixmapBlt_YUVPlanar_decode_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h) {
+ const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(src.format);
+ int hbits = srcinfo.auxwbits;
+ int vbits = srcinfo.auxhbits;
+
+ if (src.format == nsVDPixmap::kPixFormat_YUV422_UYVY || src.format == nsVDPixmap::kPixFormat_YUV422_YUYV)
+ hbits = 1;
+
+ bool h_coaligned = true;
+ bool v_coaligned = false;
+
+ if (src.format == nsVDPixmap::kPixFormat_YUV422_Planar_Centered ||
+ src.format == nsVDPixmap::kPixFormat_YUV420_Planar_Centered) {
+ h_coaligned = false;
+ }
+
+ tpYUVPlanarVertDecoder vfunc = NULL;
+ tpYUVPlanarHorizDecoder hfunc = NULL;
+ uint32 horiz_buffer_size = 0;
+ uint32 vert_buffer_size = 0;
+ uint32 horiz_count = 0;
+ sint32 yaccum = 8;
+ sint32 yinc = 8;
+ uint32 yleft = h;
+
+ switch(vbits*2+v_coaligned) {
+ case 0: // 4:4:4, 4:2:2
+ case 1:
+ break;
+ case 2: // 4:2:0 (centered)
+ vfunc = vert_expand2x_centered;
+ vert_buffer_size = w>>1;
+ yaccum = 6;
+ yinc = 4;
+ yleft >>= 1;
+ break;
+ case 4: // 4:1:0 (centered)
+ vfunc = vert_expand4x_centered;
+ vert_buffer_size = w>>2;
+ yaccum = 5;
+ yinc = 2;
+ yleft >>= 2;
+ break;
+ default:
+ VDNEVERHERE;
+ return;
+ }
+
+ --yleft;
+
+ tpYUVPlanarFinalDecoder dfunc = NULL;
+
+#ifdef _M_IX86
+ uint32 cpuflags = CPUGetEnabledExtensions();
+
+ if (cpuflags & CPUF_SUPPORTS_MMX) {
+ if (cpuflags & CPUF_SUPPORTS_INTEGER_SSE) {
+ if (vfunc == vert_expand2x_centered)
+ vfunc = vert_expand2x_centered_ISSE;
+ }
+
+ switch(dst.format) {
+ case nsVDPixmap::kPixFormat_XRGB1555: dfunc = vdasm_pixblt_YUV444Planar_to_XRGB1555_scan_MMX; break;
+ case nsVDPixmap::kPixFormat_RGB565: dfunc = vdasm_pixblt_YUV444Planar_to_RGB565_scan_MMX; break;
+ case nsVDPixmap::kPixFormat_XRGB8888: dfunc = vdasm_pixblt_YUV444Planar_to_XRGB8888_scan_MMX; break;
+ }
+ }
+#endif
+
+ bool halfchroma = false;
+
+ if (!dfunc) {
+ switch(dst.format) {
+ case nsVDPixmap::kPixFormat_XRGB1555: dfunc = VDYCbCrToXRGB1555Span; break;
+ case nsVDPixmap::kPixFormat_RGB565: dfunc = VDYCbCrToRGB565Span; break;
+ case nsVDPixmap::kPixFormat_RGB888: dfunc = VDYCbCrToRGB888Span; break;
+ case nsVDPixmap::kPixFormat_XRGB8888: dfunc = VDYCbCrToXRGB8888Span; break;
+ case nsVDPixmap::kPixFormat_YUV422_UYVY: dfunc = VDYCbCrToUYVYSpan; halfchroma = true; break;
+ case nsVDPixmap::kPixFormat_YUV422_YUYV: dfunc = VDYCbCrToYUYVSpan; halfchroma = true; break;
+ default:
+ VDNEVERHERE;
+ return;
+ }
+ }
+
+ switch(hbits*2+h_coaligned) {
+ case 0: // 4:4:4
+ case 1:
+ if (halfchroma) {
+ hfunc = horiz_compress2x_coaligned;
+ horiz_buffer_size = (w + 1) >> 1;
+ horiz_count = w;
+ }
+ break;
+ case 2: // 4:2:0 MPEG-1 (centered)
+ if (halfchroma) {
+ hfunc = horiz_realign_to_coaligned;
+ horiz_buffer_size = (w + 1) >> 1;
+ horiz_count = (w + 1) >> 1;
+ } else {
+ hfunc = horiz_expand2x_centered;
+ horiz_buffer_size = w;
+ horiz_count = w;
+ }
+ break;
+ case 3: // 4:2:0/4:2:2 MPEG-2 (coaligned)
+ if (!halfchroma) {
+ hfunc = horiz_expand2x_coaligned;
+ horiz_buffer_size = w;
+ horiz_count = w;
+ }
+ break;
+ case 5: // 4:1:1 (coaligned)
+ if (halfchroma) {
+ hfunc = horiz_expand2x_coaligned;
+ horiz_buffer_size = (w + 1) >> 1;
+ horiz_count = (w + 1) >> 1;
+ } else {
+ hfunc = horiz_expand4x_coaligned;
+ horiz_buffer_size = w;
+ horiz_count = w;
+ }
+ break;
+
+ default:
+ VDNEVERHERE;
+ return;
+ }
+
+#ifdef _M_IX86
+ if (cpuflags & CPUF_SUPPORTS_INTEGER_SSE) {
+ if (hfunc == horiz_expand2x_coaligned)
+ hfunc = horiz_expand2x_coaligned_ISSE;
+ }
+#endif
+
+ uint32 chroma_srcwidth = -(-w >> srcinfo.auxwbits);
+ horiz_buffer_size = (horiz_buffer_size + 15) & ~15;
+ vert_buffer_size = (vert_buffer_size + 15) & ~15;
+
+ // allocate buffers
+
+ vdblock<uint8> tempbuf((horiz_buffer_size + vert_buffer_size)*2 + 1);
+
+ uint8 *const crbufh = tempbuf.data();
+ uint8 *const crbufv = crbufh + horiz_buffer_size;
+ uint8 *const cbbufh = crbufv + vert_buffer_size;
+ uint8 *const cbbufv = cbbufh + horiz_buffer_size;
+
+ const uint8 *cb0 = (const uint8*)src.data2;
+ const uint8 *cr0 = (const uint8*)src.data3;
+ const uint8 *cb1 = cb0;
+ const uint8 *cr1 = cr0;
+ const uint8 *y = (const uint8 *)src.data;
+ const ptrdiff_t ypitch = src.pitch;
+ const ptrdiff_t cbpitch = src.pitch2;
+ const ptrdiff_t crpitch = src.pitch3;
+
+ void *out = dst.data;
+ ptrdiff_t outpitch = dst.pitch;
+
+ for(;;) {
+ if (yaccum >= 8) {
+ yaccum &= 7;
+
+ cb0 = cb1;
+ cr0 = cr1;
+
+ if (yleft > 0) {
+ --yleft;
+ vdptrstep(cb1, cbpitch);
+ vdptrstep(cr1, crpitch);
+ }
+ }
+
+ const uint8 *cr = cr0;
+ const uint8 *cb = cb0;
+
+ // vertical interpolation: cr
+ if(yaccum & 7) {
+ const uint8 *const srcs[2]={cr0, cr1};
+ vfunc(crbufv, srcs, chroma_srcwidth, (yaccum & 7) << 5);
+ cr = crbufv;
+ }
+
+ // horizontal interpolation: cr
+ if (hfunc) {
+ hfunc(crbufh, cr, horiz_count);
+ cr = crbufh;
+ }
+
+ // vertical interpolation: cb
+ if(yaccum & 7) {
+ const uint8 *const srcs[2]={cb0, cb1};
+ vfunc(cbbufv, srcs, chroma_srcwidth, (yaccum & 7) << 5);
+ cb = cbbufv;
+ }
+
+ // horizontal interpolation: cb
+ if (hfunc) {
+ hfunc(cbbufh, cb, horiz_count);
+ cb = cbbufh;
+ }
+
+ dfunc(out, y, cb, cr, w);
+ vdptrstep(out, outpitch);
+ vdptrstep(y, ypitch);
+
+ if (!--h)
+ break;
+
+ yaccum += yinc;
+ }
+
+#ifdef _M_IX86
+ if (cpuflags & CPUF_SUPPORTS_MMX) {
+ __asm emms
+ }
+#endif
+}
+
+namespace {
+ typedef void (*tpUVBltHorizDecoder)(uint8 *dst, const uint8 *src, sint32 w);
+ typedef void (*tpUVBltVertDecoder)(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase);
+
+ void uvplaneblt(uint8 *dst, ptrdiff_t dstpitch, int dstformat, const uint8 *src, ptrdiff_t srcpitch, int srcformat, vdpixsize w, vdpixsize h) {
+ const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(srcformat);
+ const VDPixmapFormatInfo& dstinfo = VDPixmapGetInfo(dstformat);
+
+ int xshift = srcinfo.auxwbits - dstinfo.auxwbits;
+ int yshift = srcinfo.auxhbits - dstinfo.auxhbits;
+
+ tpUVBltHorizDecoder hfunc = NULL;
+ tpUVBltVertDecoder vfunc = NULL;
+
+ switch(xshift) {
+ case +2:
+ hfunc = horiz_expand4x_coaligned;
+ break;
+ case +1:
+ hfunc = horiz_expand2x_coaligned;
+ break;
+ case 0:
+ break;
+ case -1:
+ hfunc = horiz_compress2x_coaligned;
+ break;
+ case -2:
+ hfunc = horiz_compress4x_coaligned;
+ break;
+ default:
+ VDNEVERHERE;
+ return;
+ }
+
+#ifdef _M_IX86
+ uint32 cpuflags = CPUGetEnabledExtensions();
+
+ if (cpuflags & CPUF_SUPPORTS_INTEGER_SSE) {
+ if (hfunc == horiz_expand2x_coaligned)
+ hfunc = horiz_expand2x_coaligned_ISSE;
+ }
+#endif
+
+ int winsize, winposnext, winstep;
+
+ switch(yshift) {
+ case +2:
+ vfunc = vert_expand4x_centered;
+ winsize = 2;
+ winposnext = 0xa0;
+ winstep = 0x40;
+ break;
+ case +1:
+ vfunc = vert_expand2x_centered;
+ winsize = 2;
+ winposnext = 0xc0;
+ winstep = 0x80;
+ break;
+ case 0:
+ winsize = 1;
+ winposnext = 0;
+ winstep = 0x100;
+ break;
+ case -1:
+ vfunc = vert_compress2x_centered;
+ winsize = 4;
+ winposnext = 0x200;
+ winstep = 0x200;
+ break;
+ case -2:
+ vfunc = vert_compress4x_centered;
+ winsize = 8;
+ winposnext = 0x500;
+ winstep = 0x400;
+ break;
+ default:
+ VDNEVERHERE;
+ return;
+ }
+
+#ifdef _M_IX86
+ if (cpuflags & CPUF_SUPPORTS_INTEGER_SSE) {
+ if (vfunc == vert_expand2x_centered)
+ vfunc = vert_expand2x_centered_ISSE;
+ }
+#endif
+
+ int dsth = -(-h >> dstinfo.auxhbits);
+ int srch = -(-h >> srcinfo.auxhbits);
+ int dstw = -(-w >> dstinfo.auxwbits);
+ int w2 = -(-w >> std::min<int>(dstinfo.auxwbits, srcinfo.auxwbits));
+
+ int winpos = (winposnext>>8) - winsize;
+
+ const uint8 *window[16];
+
+ vdblock<uint8> tmpbuf;
+ ptrdiff_t tmppitch = (w+15) & ~15;
+
+ if (vfunc && hfunc)
+ tmpbuf.resize(tmppitch * winsize);
+
+ do {
+ int desiredpos = winposnext >> 8;
+
+ while(winpos < desiredpos) {
+ const uint8 *srcrow = vdptroffset(src, srcpitch * std::max<int>(0, std::min<int>(srch-1, ++winpos)));
+ int winoffset = (winpos-1) & (winsize-1);
+
+ if (hfunc) {
+ uint8 *dstrow = vfunc ? tmpbuf.data() + tmppitch * winoffset : dst;
+ hfunc(dstrow, srcrow, w2);
+ srcrow = dstrow;
+ }
+
+ window[winoffset] = window[winoffset + winsize] = srcrow;
+ }
+
+ if (vfunc)
+ vfunc(dst, window + (winpos & (winsize-1)), dstw, winposnext & 255);
+ else if (!hfunc)
+ memcpy(dst, window[winpos & (winsize-1)], dstw);
+
+ winposnext += winstep;
+ vdptrstep(dst, dstpitch);
+ } while(--dsth);
+
+#ifdef _M_IX86
+ if (cpuflags & CPUF_SUPPORTS_MMX) {
+ __asm emms
+ }
+#endif
+ }
+}
+
+void VDPixmapBlt_YUVPlanar_convert_reference(const VDPixmap& dstpm, const VDPixmap& srcpm, vdpixsize w, vdpixsize h) {
+ VDMemcpyRect(dstpm.data, dstpm.pitch, srcpm.data, srcpm.pitch, dstpm.w, dstpm.h);
+
+ if (srcpm.format != nsVDPixmap::kPixFormat_Y8) {
+ if (dstpm.format != nsVDPixmap::kPixFormat_Y8) {
+ // YCbCr -> YCbCr
+ uvplaneblt((uint8 *)dstpm.data2, dstpm.pitch2, dstpm.format, (uint8 *)srcpm.data2, srcpm.pitch2, srcpm.format, w, h);
+ uvplaneblt((uint8 *)dstpm.data3, dstpm.pitch3, dstpm.format, (uint8 *)srcpm.data3, srcpm.pitch3, srcpm.format, w, h);
+ }
+ } else {
+ if (dstpm.format != nsVDPixmap::kPixFormat_Y8) {
+ const VDPixmapFormatInfo& info = VDPixmapGetInfo(dstpm.format);
+ VDMemset8Rect(dstpm.data2, dstpm.pitch2, 0x80, -(-w >> info.auxwbits), -(-h >> info.auxhbits));
+ VDMemset8Rect(dstpm.data3, dstpm.pitch3, 0x80, -(-w >> info.auxwbits), -(-h >> info.auxhbits));
+ }
+ }
+}
+
+extern "C" void vdasm_pixblt_YUV411Planar_to_XRGB1555_scan_MMX(void *dst, const void *y, const void *cb, const void *cr, unsigned count);
+extern "C" void vdasm_pixblt_YUV411Planar_to_RGB565_scan_MMX(void *dst, const void *y, const void *cb, const void *cr, unsigned count);
+extern "C" void vdasm_pixblt_YUV411Planar_to_XRGB8888_scan_MMX(void *dst, const void *y, const void *cb, const void *cr, unsigned count);
+extern "C" void vdasm_pixblt_YUV411Planar_to_XRGB1555_scan_ISSE(void *dst, const void *y, const void *cb, const void *cr, unsigned count);
+extern "C" void vdasm_pixblt_YUV411Planar_to_RGB565_scan_ISSE(void *dst, const void *y, const void *cb, const void *cr, unsigned count);
+extern "C" void vdasm_pixblt_YUV411Planar_to_XRGB8888_scan_ISSE(void *dst, const void *y, const void *cb, const void *cr, unsigned count);
+
+DECLARE_YUV_PLANAR(YUV411, XRGB1555) {
+ uint16 *out = (uint16 *)dst.data;
+ const ptrdiff_t opitch = dst.pitch;
+ const uint8 *yrow = (const uint8 *)src.data;
+ const uint8 *cbrow = (const uint8 *)src.data2;
+ const uint8 *crrow = (const uint8 *)src.data3;
+ const ptrdiff_t ypitch = src.pitch;
+ const ptrdiff_t cbpitch = src.pitch2;
+ const ptrdiff_t crpitch = src.pitch3;
+
+ vdpixsize wpairs = (w-1)>>2;
+ vdpixsize wleft = w - (wpairs<<2);
+
+ do {
+ uint16 *p = out;
+ const uint8 *y = yrow;
+ const uint8 *cb = cbrow;
+ const uint8 *cr = crrow;
+ vdpixsize wt;
+
+ if (wpairs > 0) {
+#ifdef _M_AMD64
+ wt = wpairs;
+
+ do {
+ const unsigned cb0 = cb[0];
+ const unsigned cb1 = cb[1];
+ const unsigned cr0 = cr[0];
+ const unsigned cr1 = cr[1];
+
+ p[0] = ycbcr_to_1555(y[0], cb0, cr0);
+ p[1] = ycbcr_to_1555(y[1], (3*cb0+cb1+2)>>2, (3*cr0+cr1+2)>>2);
+ p[2] = ycbcr_to_1555(y[2], (cb0+cb1+1)>>1, (cr0+cr1+1)>>1);
+ p[3] = ycbcr_to_1555(y[3], (cb0+3*cb1+2)>>2, (cr0+3*cr1+2)>>2);
+
+ y += 4;
+ p += 4;
+ ++cb;
+ ++cr;
+ } while(--wt);
+#else
+ vdasm_pixblt_YUV411Planar_to_XRGB1555_scan_ISSE(p, y, cb, cr, wpairs);
+ y += 4*wpairs;
+ cr += wpairs;
+ cb += wpairs;
+ p += 4*wpairs;
+#endif
+ }
+
+ if (wleft > 0) {
+ wt = wleft;
+
+ const uint8 cr0 = *cr;
+ const uint8 cb0 = *cb;
+
+ do {
+ *p++ = ycbcr_to_1555(*y++, cb0, cr0);
+ } while(--wt);
+ }
+
+ vdptrstep(out, opitch);
+ vdptrstep(yrow, ypitch);
+ vdptrstep(cbrow, cbpitch);
+ vdptrstep(crrow, crpitch);
+ } while(--h);
+
+#ifndef _M_AMD64
+ __asm emms
+#endif
+}
+
+DECLARE_YUV_PLANAR(YUV411, RGB565) {
+ uint16 *out = (uint16 *)dst.data;
+ const ptrdiff_t opitch = dst.pitch;
+ const uint8 *yrow = (const uint8 *)src.data;
+ const uint8 *cbrow = (const uint8 *)src.data2;
+ const uint8 *crrow = (const uint8 *)src.data3;
+ const ptrdiff_t ypitch = src.pitch;
+ const ptrdiff_t cbpitch = src.pitch2;
+ const ptrdiff_t crpitch = src.pitch3;
+
+ vdpixsize wpairs = (w-1)>>2;
+ vdpixsize wleft = w - (wpairs<<2);
+
+ do {
+ uint16 *p = out;
+ const uint8 *y = yrow;
+ const uint8 *cb = cbrow;
+ const uint8 *cr = crrow;
+ vdpixsize wt;
+
+ if (wpairs > 0) {
+#if _M_AMD64
+ wt = wpairs;
+
+ do {
+ const unsigned cb0 = cb[0];
+ const unsigned cb1 = cb[1];
+ const unsigned cr0 = cr[0];
+ const unsigned cr1 = cr[1];
+
+ p[0] = ycbcr_to_565(y[0], cb0, cr0);
+ p[1] = ycbcr_to_565(y[1], (3*cb0+cb1+2)>>2, (3*cr0+cr1+2)>>2);
+ p[2] = ycbcr_to_565(y[2], (cb0+cb1+1)>>1, (cr0+cr1+1)>>1);
+ p[3] = ycbcr_to_565(y[3], (cb0+3*cb1+2)>>2, (cr0+3*cr1+2)>>2);
+
+ y += 4;
+ p += 4;
+ ++cb;
+ ++cr;
+ } while(--wt);
+#else
+ vdasm_pixblt_YUV411Planar_to_RGB565_scan_ISSE(p, y, cb, cr, wpairs);
+#endif
+ }
+
+ if (wleft > 0) {
+ wt = wleft;
+
+ const uint8 cr0 = *cr;
+ const uint8 cb0 = *cb;
+
+ do {
+ *p++ = ycbcr_to_565(*y++, cb0, cr0);
+ } while(--wt);
+ }
+
+ vdptrstep(out, opitch);
+ vdptrstep(yrow, ypitch);
+ vdptrstep(cbrow, cbpitch);
+ vdptrstep(crrow, crpitch);
+ } while(--h);
+
+#ifndef _M_AMD64
+ __asm emms
+#endif
+}
+
+DECLARE_YUV_PLANAR(YUV411, RGB888) {
+ uint8 *out = (uint8 *)dst.data;
+ const ptrdiff_t opitch = dst.pitch;
+ const uint8 *yrow = (const uint8 *)src.data;
+ const uint8 *cbrow = (const uint8 *)src.data2;
+ const uint8 *crrow = (const uint8 *)src.data3;
+ const ptrdiff_t ypitch = src.pitch;
+ const ptrdiff_t cbpitch = src.pitch2;
+ const ptrdiff_t crpitch = src.pitch3;
+
+ vdpixsize wpairs = (w-1)>>2;
+ vdpixsize wleft = w - (wpairs<<2);
+
+ do {
+ uint8 *p = out;
+ const uint8 *y = yrow;
+ const uint8 *cb = cbrow;
+ const uint8 *cr = crrow;
+ vdpixsize wt;
+
+ if (wpairs > 0) {
+ wt = wpairs;
+
+ do {
+ const unsigned cb0 = cb[0];
+ const unsigned cb1 = cb[1];
+ const unsigned cr0 = cr[0];
+ const unsigned cr1 = cr[1];
+
+ ycbcr_to_888(p+0, y[0], cb0, cr0);
+ ycbcr_to_888(p+3, y[1], (3*cb0+cb1+2)>>2, (3*cr0+cr1+2)>>2);
+ ycbcr_to_888(p+6, y[2], (cb0+cb1+1)>>1, (cr0+cr1+1)>>1);
+ ycbcr_to_888(p+9, y[3], (cb0+3*cb1+2)>>2, (cr0+3*cr1+2)>>2);
+
+ y += 4;
+ p += 12;
+ ++cb;
+ ++cr;
+ } while(--wt);
+ }
+
+ if (wleft > 0) {
+ wt = wleft;
+
+ const uint8 cr0 = *cr;
+ const uint8 cb0 = *cb;
+
+ do {
+ ycbcr_to_888(p, *y++, cb0, cr0);
+ p += 4;
+ } while(--wt);
+ }
+
+ vdptrstep(out, opitch);
+ vdptrstep(yrow, ypitch);
+ vdptrstep(cbrow, cbpitch);
+ vdptrstep(crrow, crpitch);
+ } while(--h);
+}
+
+DECLARE_YUV_PLANAR(YUV411, XRGB8888) {
+ uint32 *out = (uint32 *)dst.data;
+ const ptrdiff_t opitch = dst.pitch;
+ const uint8 *yrow = (const uint8 *)src.data;
+ const uint8 *cbrow = (const uint8 *)src.data2;
+ const uint8 *crrow = (const uint8 *)src.data3;
+ const ptrdiff_t ypitch = src.pitch;
+ const ptrdiff_t cbpitch = src.pitch2;
+ const ptrdiff_t crpitch = src.pitch3;
+
+ vdpixsize wpairs = (w-1)>>2;
+ vdpixsize wleft = w - (wpairs<<2);
+
+ do {
+ uint32 *p = out;
+ const uint8 *y = yrow;
+ const uint8 *cb = cbrow;
+ const uint8 *cr = crrow;
+ vdpixsize wt;
+
+ if (wpairs > 0) {
+#ifdef _M_AMD64
+ wt = wpairs;
+
+ do {
+ const unsigned cb0 = cb[0];
+ const unsigned cb1 = cb[1];
+ const unsigned cr0 = cr[0];
+ const unsigned cr1 = cr[1];
+
+ p[0] = ycbcr_to_8888(y[0], cb0, cr0);
+ p[1] = ycbcr_to_8888(y[1], (3*cb0+cb1+2)>>2, (3*cr0+cr1+2)>>2);
+ p[2] = ycbcr_to_8888(y[2], (cb0+cb1+1)>>1, (cr0+cr1+1)>>1);
+ p[3] = ycbcr_to_8888(y[3], (cb0+3*cb1+2)>>2, (cr0+3*cr1+2)>>2);
+
+ y += 4;
+ p += 4;
+ ++cb;
+ ++cr;
+ } while(--wt);
+#else
+ vdasm_pixblt_YUV411Planar_to_XRGB8888_scan_MMX(p, y, cb, cr, wpairs);
+ y += 4*wpairs;
+ cr += wpairs;
+ cb += wpairs;
+ p += 4*wpairs;
+#endif
+ }
+
+ if (wleft > 0) {
+ wt = wleft;
+
+ const uint8 cr0 = *cr;
+ const uint8 cb0 = *cb;
+
+ do {
+ *p++ = ycbcr_to_8888(*y++, cb0, cr0);
+ } while(--wt);
+ }
+
+ vdptrstep(out, opitch);
+ vdptrstep(yrow, ypitch);
+ vdptrstep(cbrow, cbpitch);
+ vdptrstep(crrow, crpitch);
+ } while(--h);
+
+#ifndef _M_AMD64
+ __asm emms
+#endif
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuv2yuv.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuv2yuv.cpp
new file mode 100644
index 000000000..b581e9bf7
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuv2yuv.cpp
@@ -0,0 +1,260 @@
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/memory.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+
+#include "bitutils.h"
+#include "blt_spanutils.h"
+
+#define DECLARE_YUV(x, y) void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h)
+#define DECLARE_YUV_PLANAR(x, y) void VDPixmapBlt_##x##_to_##y##_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h)
+
+using namespace nsVDPixmapBitUtils;
+using namespace nsVDPixmapSpanUtils;
+
+DECLARE_YUV(XVYU, UYVY) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint32 *src = (const uint32 *)src0;
+
+ srcpitch -= (w&~1)*4;
+ dstpitch -= (w&~1)*2;
+
+ do {
+ vdpixsize wt = w;
+
+ wt = -wt;
+
+ if (++wt) {
+ uint32 a, b, c;
+
+ a = src[0];
+ b = src[1];
+ *dst++ = (avg_8888_121(a, a, b) & 0xff00ff) + (a & 0xff00) + ((b & 0xff00)<<16);
+ src += 2;
+
+ if ((wt+=2) < 0) {
+ do {
+ a = src[-1];
+ b = src[0];
+ c = src[1];
+
+ *dst++ = (avg_8888_121(a, b, c) & 0xff00ff) + (b & 0xff00) + ((c & 0xff00)<<16);
+ src += 2;
+ } while((wt+=2) < 0);
+ }
+ }
+
+ if (!(wt&1))
+ *dst = *src;
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(XVYU, YUYV) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint32 *src = (const uint32 *)src0;
+
+ srcpitch -= (w&~1)*4;
+ dstpitch -= (w&~1)*2;
+
+ do {
+ vdpixsize wt = w;
+
+ wt = -wt;
+
+ if (++wt) {
+ uint32 a, b, c;
+
+ a = src[0];
+ b = src[1];
+ *dst++ = ((avg_8888_121(a, a, b) & 0xff00ff)<<8) + ((a & 0xff00)>>8) + ((b & 0xff00)<<8);
+ src += 2;
+
+ if ((wt+=2)<0) {
+ do {
+ a = src[-1];
+ b = src[0];
+ c = src[1];
+
+ *dst++ = ((avg_8888_121(a, b, c) & 0xff00ff)<<8) + ((b & 0xff00)>>8) + ((c & 0xff00)<<8);
+ src += 2;
+ } while((wt+=2) < 0);
+ }
+ }
+
+ if (!(wt&1)) {
+ uint32 v = *src;
+ *dst = ((v&0xff00ff)<<8) + ((v&0xff00ff00)>>8);
+ }
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(UYVY, YUYV) { // also YUYV->UYVY
+ uint32 *dst = (uint32 *)dst0;
+ const uint32 *src = (const uint32 *)src0;
+
+ w = (w+1) >> 1;
+
+ dstpitch -= 4*w;
+ srcpitch -= 4*w;
+
+ do {
+ vdpixsize w2 = w;
+
+ do {
+ const uint32 p = *src++;
+
+ *dst++ = ((p & 0xff00ff00)>>8) + ((p & 0x00ff00ff)<<8);
+ } while(--w2);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(UYVY, Y8) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *src = (const uint8 *)src0;
+
+ dstpitch -= w;
+ srcpitch -= 2*w;
+
+ do {
+ vdpixsize w2 = w;
+
+ do {
+ *dst++ = src[1];
+ src += 2;
+ } while(--w2);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(YUYV, Y8) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *src = (const uint8 *)src0;
+
+ dstpitch -= w;
+ srcpitch -= 2*w;
+
+ do {
+ vdpixsize w2 = w;
+
+ do {
+ *dst++ = src[0];
+ src += 2;
+ } while(--w2);
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(Y8, UYVY) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *src = (const uint8 *)src0;
+
+ dstpitch -= 2*w;
+ srcpitch -= w;
+
+ do {
+ vdpixsize w2 = w;
+
+ do {
+ dst[0] = 0x80;
+ dst[1] = *src++;
+ dst += 2;
+ } while(--w2);
+
+ if (w & 1) {
+ dst[0] = 0x80;
+ dst[1] = dst[-1];
+ }
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV(Y8, YUYV) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *src = (const uint8 *)src0;
+
+ dstpitch -= 2*w;
+ srcpitch -= w;
+
+ do {
+ vdpixsize w2 = w;
+
+ do {
+ dst[0] = *src++;
+ dst[1] = 0x80;
+ dst += 2;
+ } while(--w2);
+
+ if (w & 1) {
+ dst[0] = dst[-1];
+ dst[1] = 0x80;
+ }
+
+ vdptrstep(src, srcpitch);
+ vdptrstep(dst, dstpitch);
+ } while(--h);
+}
+
+DECLARE_YUV_PLANAR(YUV411, YV12) {
+ VDMemcpyRect(dst.data, dst.pitch, src.data, src.pitch, w, h);
+
+ vdblock<uint8> tmprow(w);
+ const uint8 *srcp = (const uint8 *)src.data2;
+ ptrdiff_t srcpitch = src.pitch2;
+ uint8 *dstp = (uint8 *)dst.data2;
+ ptrdiff_t dstpitch = dst.pitch2;
+ const uint8 *src1, *src2;
+
+ vdpixsize h2;
+ for(h2 = h; h2 > 0; h2 -= 2) {
+ src1 = srcp;
+ vdptrstep(srcp, srcpitch);
+ if (h2 > 1)
+ src2 = srcp;
+ else
+ src2 = src1;
+ vdptrstep(srcp, srcpitch);
+
+ const uint8 *sources[2] = {src1, src2};
+
+ vert_compress2x_centered_fast(tmprow.data(), sources, w, 0);
+ horiz_expand2x_coaligned(dstp, tmprow.data(), w);
+
+ vdptrstep(dstp, dstpitch);
+ }
+
+ srcp = (const uint8 *)src.data3;
+ srcpitch = src.pitch3;
+ dstp = (uint8 *)dst.data3;
+ dstpitch = dst.pitch3;
+ for(h2 = h; h2 > 0; h2 -= 2) {
+ src1 = srcp;
+ vdptrstep(srcp, srcpitch);
+ if (h2 > 1)
+ src2 = srcp;
+ else
+ src2 = src1;
+ vdptrstep(srcp, srcpitch);
+
+ const uint8 *sources[2] = {src1, src2};
+ vert_compress2x_centered_fast(tmprow.data(), sources, w, 0);
+ horiz_expand2x_coaligned(dstp, tmprow.data(), w);
+
+ vdptrstep(dstp, dstpitch);
+ }
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuvrev.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuvrev.cpp
new file mode 100644
index 000000000..d6f38bf65
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_reference_yuvrev.cpp
@@ -0,0 +1,530 @@
+#include <vd2/system/cpuaccel.h>
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include "blt_spanutils.h"
+
+#ifdef _M_IX86
+ #include "blt_spanutils_x86.h"
+#endif
+
+using namespace nsVDPixmapSpanUtils;
+
+namespace {
+ // From Jim Blinn's "Dirty Pixels":
+ //
+ // Y = .299R + .587G + .114B
+ // Cr = 0.713(R-Y)
+ // Cb = 0.564(B-Y)
+ //
+ // IY = 219Y + 16 = ((yt = 1052IR + 2065IG + 401IB) + 67584) >> 12
+ // ICr = 224Cr + 128 = (yt*2987 - 10507932IR + 2155872256) >> 24
+ // ICb = 224Cb + 128 = (yt*2363 - 8312025IB + 2155872256) >> 24
+
+ void ConvertRGB32ToXVYU32(uint32 *dst, const uint8 *src, sint32 count) {
+ do {
+ const sint32 r = src[2];
+ const sint32 g = src[1];
+ const sint32 b = src[0];
+ const sint32 yt = 1052*r + 2065*g + 401*b;
+ const sint32 y = (yt + 67584) >> 4; // <<8 alignment shift
+ const sint32 cr = (10507932*r - yt*2987 + 2155872256U) >> 8; // <<16 alignment shift
+ const sint32 cb = ( 8312025*b - yt*2363 + 2155872256U) >> 24;
+
+ *dst++ = (y&0xff00) + cb + (cr&0xff0000); // VYU order
+ src += 4;
+ } while(--count);
+ }
+
+ void ConvertRGB24ToXVYU32(uint32 *dst, const uint8 *src, sint32 count) {
+ do {
+ const sint32 r = src[2];
+ const sint32 g = src[1];
+ const sint32 b = src[0];
+ const sint32 yt = 1052*r + 2065*g + 401*b;
+ const sint32 y = (yt + 67584) >> 4; // <<8 alignment shift
+ const sint32 cr = (10507932*r - yt*2987 + 2155872256U) >> 8; // <<16 alignment shift
+ const sint32 cb = ( 8312025*b - yt*2363 + 2155872256U) >> 24;
+
+ *dst++ = (y&0xff00) + cb + (cr&0xff0000); // VYU order
+ src += 3;
+ } while(--count);
+ }
+
+ void ConvertRGB16ToXVYU32(uint32 *dst, const uint16 *src, sint32 count) {
+ do {
+ const sint16 px = *src++;
+ const sint32 r = (px & 0xf800) >> 11;
+ const sint32 g = (px & 0x07e0) >> 5;
+ const sint32 b = (px & 0x001f);
+ const sint32 yt = 8652*r + 8358*g + 3299*b;
+ const sint32 y = (yt + 67584) >> 4; // <<8 alignment shift
+ const sint32 cr = (86436217*r - yt*2987 + 2155872256U) >> 8;
+ const sint32 cb = (68373108*b - yt*2363 + 2155872256U) >> 24; // <<16 alignment shift
+
+ *dst++ = (y&0xff00) + cb + (cr&0xff0000); // VYU order
+ } while(--count);
+ }
+
+ void ConvertRGB15ToXVYU32(uint32 *dst, const uint16 *src, sint32 count) {
+ do {
+ const sint16 px = *src++;
+ const sint32 r = (px & 0x7c00) >> 10;
+ const sint32 g = (px & 0x03e0) >> 5;
+ const sint32 b = (px & 0x001f);
+ const sint32 yt = 8652*r + 16986*g + 3299*b;
+ const sint32 y = (yt + 67584) >> 4; // <<8 alignment shift
+ const sint32 cr = (86436217*r - yt*2987 + 2155872256U) >> 8; // <<16 alignment shift
+ const sint32 cb = (68373108*b - yt*2363 + 2155872256U) >> 24;
+
+ *dst++ = (y&0xff00) + cb + (cr&0xff0000); // VYU order
+ } while(--count);
+ }
+
+ void ConvertRGB32ToY8(uint8 *dst, const uint8 *src, sint32 count) {
+ do {
+ const sint32 r = src[2];
+ const sint32 g = src[1];
+ const sint32 b = src[0];
+ *dst++ = (uint8)((1052*r + 2065*g + 401*b + 67584) >> 12);
+ src += 4;
+ } while(--count);
+ }
+
+ void ConvertRGB24ToY8(uint8 *dst, const uint8 *src, sint32 count) {
+ do {
+ const sint32 r = src[2];
+ const sint32 g = src[1];
+ const sint32 b = src[0];
+ *dst++ = (uint8)((1052*r + 2065*g + 401*b + 67584) >> 12);
+ src += 3;
+ } while(--count);
+ }
+
+ void ConvertRGB16ToY8(uint8 *dst, const uint16 *src, sint32 count) {
+ do {
+ const sint16 px = *src++;
+ const sint32 r = (px & 0xf800) >> 11;
+ const sint32 g = (px & 0x07e0) >> 5;
+ const sint32 b = (px & 0x001f);
+ *dst++ = (uint8)((8652*r + 8358*g + 3299*b + 67584) >> 12);
+ } while(--count);
+ }
+
+ void ConvertRGB15ToY8(uint8 *dst, const uint16 *src, sint32 count) {
+ do {
+ const sint16 px = *src++;
+ const sint32 r = (px & 0x7c00) >> 10;
+ const sint32 g = (px & 0x03e0) >> 5;
+ const sint32 b = (px & 0x001f);
+ *dst++ = (uint8)((8652*r + 16986*g + 3299*b + 67584) >> 12);
+ } while(--count);
+ }
+}
+
+#define DECLARE_YUV_REV(x, y) void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h)
+
+DECLARE_YUV_REV(XRGB1555, XVYU) {
+ do {
+ ConvertRGB15ToXVYU32((uint32 *)dst0, (const uint16 *)src0, w);
+
+ vdptrstep(dst0, dstpitch);
+ vdptrstep(src0, srcpitch);
+ } while(--h);
+}
+
+DECLARE_YUV_REV(RGB565, XVYU) {
+ do {
+ ConvertRGB16ToXVYU32((uint32 *)dst0, (const uint16 *)src0, w);
+
+ vdptrstep(dst0, dstpitch);
+ vdptrstep(src0, srcpitch);
+ } while(--h);
+}
+
+DECLARE_YUV_REV(RGB888, XVYU) {
+ do {
+ ConvertRGB24ToXVYU32((uint32 *)dst0, (const uint8 *)src0, w);
+
+ vdptrstep(dst0, dstpitch);
+ vdptrstep(src0, srcpitch);
+ } while(--h);
+}
+
+DECLARE_YUV_REV(XRGB8888, XVYU) {
+ do {
+ ConvertRGB32ToXVYU32((uint32 *)dst0, (const uint8 *)src0, w);
+
+ vdptrstep(dst0, dstpitch);
+ vdptrstep(src0, srcpitch);
+ } while(--h);
+}
+
+DECLARE_YUV_REV(XRGB1555, Y8) {
+ do {
+ ConvertRGB15ToY8((uint8 *)dst0, (const uint16 *)src0, w);
+
+ vdptrstep(dst0, dstpitch);
+ vdptrstep(src0, srcpitch);
+ } while(--h);
+}
+
+DECLARE_YUV_REV(RGB565, Y8) {
+ do {
+ ConvertRGB16ToY8((uint8 *)dst0, (const uint16 *)src0, w);
+
+ vdptrstep(dst0, dstpitch);
+ vdptrstep(src0, srcpitch);
+ } while(--h);
+}
+
+DECLARE_YUV_REV(RGB888, Y8) {
+ do {
+ ConvertRGB24ToY8((uint8 *)dst0, (const uint8 *)src0, w);
+
+ vdptrstep(dst0, dstpitch);
+ vdptrstep(src0, srcpitch);
+ } while(--h);
+}
+
+DECLARE_YUV_REV(XRGB8888, Y8) {
+ do {
+ ConvertRGB32ToY8((uint8 *)dst0, (const uint8 *)src0, w);
+
+ vdptrstep(dst0, dstpitch);
+ vdptrstep(src0, srcpitch);
+ } while(--h);
+}
+
+
+
+
+
+namespace {
+ void ConvertRGB32ToYUVPlanar(uint8 *ydst, uint8 *cbdst, uint8 *crdst, const void *src0, sint32 count) {
+ const uint8 *src = (const uint8 *)src0;
+
+ do {
+ const sint32 r = src[2];
+ const sint32 g = src[1];
+ const sint32 b = src[0];
+ const sint32 yt = 1052*r + 2065*g + 401*b;
+ *ydst++ = (yt + 67584) >> 12;
+ *crdst++ = (10507932*r - yt*2987 + 2155872256U) >> 24;
+ *cbdst++ = ( 8312025*b - yt*2363 + 2155872256U) >> 24;
+ src += 4;
+ } while(--count);
+ }
+
+ void ConvertRGB24ToYUVPlanar(uint8 *ydst, uint8 *cbdst, uint8 *crdst, const void *src0, sint32 count) {
+ const uint8 *src = (const uint8 *)src0;
+
+ do {
+ const sint32 r = src[2];
+ const sint32 g = src[1];
+ const sint32 b = src[0];
+ const sint32 yt = 1052*r + 2065*g + 401*b;
+ *ydst++ = (yt + 67584) >> 12;
+ *crdst++ = (10507932*r - yt*2987 + 2155872256U) >> 24;
+ *cbdst++ = ( 8312025*b - yt*2363 + 2155872256U) >> 24;
+ src += 3;
+ } while(--count);
+ }
+
+ void ConvertRGB16ToYUVPlanar(uint8 *ydst, uint8 *cbdst, uint8 *crdst, const void *src0, sint32 count) {
+ const uint16 *src = (const uint16 *)src0;
+
+ do {
+ const sint16 px = *src++;
+ const sint32 r = (px & 0xf800) >> 11;
+ const sint32 g = (px & 0x07e0) >> 5;
+ const sint32 b = (px & 0x001f);
+ const sint32 yt = 8652*r + 8358*g + 3299*b;
+ *ydst++ = (yt + 67584) >> 12;
+ *crdst++ = (86436217*r - yt*2987 + 2155872256U) >> 24;
+ *cbdst++ = (68373108*b - yt*2363 + 2155872256U) >> 24;
+ } while(--count);
+ }
+
+ void ConvertRGB15ToYUVPlanar(uint8 *ydst, uint8 *cbdst, uint8 *crdst, const void *src0, sint32 count) {
+ const uint16 *src = (const uint16 *)src0;
+
+ do {
+ const sint16 px = *src++;
+ const sint32 r = (px & 0x7c00) >> 10;
+ const sint32 g = (px & 0x03e0) >> 5;
+ const sint32 b = (px & 0x001f);
+ const sint32 yt = 8652*r + 16986*g + 3299*b;
+ *ydst++ = (yt + 67584) >> 12;
+ *crdst++ = (86436217*r - yt*2987 + 2155872256U) >> 24;
+ *cbdst++ = (68373108*b - yt*2363 + 2155872256U) >> 24;
+ } while(--count);
+ }
+
+ void ConvertUYVYToYUVPlanar(uint8 *ydst, uint8 *cbdst, uint8 *crdst, const void *src0, sint32 count) {
+ const uint8 *src = (const uint8 *)src0;
+
+ do {
+ *cbdst++ = src[0];
+ *ydst++ = src[1];
+ *crdst++ = src[2];
+ if (!--count)
+ break;
+ *ydst++ = src[3];
+ src += 4;
+ } while(--count);
+ }
+
+ void ConvertYUYVToYUVPlanar(uint8 *ydst, uint8 *cbdst, uint8 *crdst, const void *src0, sint32 count) {
+ const uint8 *src = (const uint8 *)src0;
+
+ do {
+ *cbdst++ = src[1];
+ *ydst++ = src[0];
+ *crdst++ = src[3];
+ if (!--count)
+ break;
+ *ydst++ = src[2];
+ src += 4;
+ } while(--count);
+ }
+}
+
+void VDPixmapBlt_YUVPlanar_encode_reference(const VDPixmap& dstbm, const VDPixmap& srcbm, vdpixsize w, vdpixsize h) {
+ void (*cfunc)(uint8 *ydst, uint8 *cbdst, uint8 *crdst, const void *src, sint32 w) = NULL;
+ void (*hfunc)(uint8 *dst, const uint8 *src, sint32 w) = NULL;
+ void (*vfunc)(uint8 *dst, const uint8 *const *sources, sint32 w, uint8 phase) = NULL;
+
+ bool halfchroma = false;
+
+ switch(srcbm.format) {
+ case nsVDPixmap::kPixFormat_XRGB1555:
+ cfunc = ConvertRGB15ToYUVPlanar;
+ break;
+ case nsVDPixmap::kPixFormat_RGB565:
+ cfunc = ConvertRGB16ToYUVPlanar;
+ break;
+ case nsVDPixmap::kPixFormat_RGB888:
+ cfunc = ConvertRGB24ToYUVPlanar;
+ break;
+ case nsVDPixmap::kPixFormat_XRGB8888:
+ cfunc = ConvertRGB32ToYUVPlanar;
+ break;
+ case nsVDPixmap::kPixFormat_YUV422_UYVY:
+ cfunc = ConvertUYVYToYUVPlanar;
+ halfchroma = true;
+ break;
+ case nsVDPixmap::kPixFormat_YUV422_YUYV:
+ cfunc = ConvertYUYVToYUVPlanar;
+ halfchroma = true;
+ break;
+ default:
+ VDNEVERHERE;
+ return;
+ }
+
+ vdpixsize w2 = w;
+ vdpixsize h2 = h;
+ int winstep = 1;
+ int winsize = 1;
+ int winposnext = 0;
+ vdpixsize chroma_srcw = w;
+
+ switch(dstbm.format) {
+
+ case nsVDPixmap::kPixFormat_YUV444_Planar:
+ if (halfchroma)
+ hfunc = horiz_expand2x_coaligned;
+ break;
+
+ case nsVDPixmap::kPixFormat_YUV422_Planar:
+ if (halfchroma)
+ chroma_srcw = (chroma_srcw + 1) >> 1;
+ else
+ hfunc = horiz_compress2x_coaligned;
+
+ w2 = (w2+1) >> 1;
+ break;
+
+ case nsVDPixmap::kPixFormat_YUV422_Planar_Centered:
+ if (halfchroma) {
+ chroma_srcw = (chroma_srcw + 1) >> 1;
+ hfunc = horiz_realign_to_centered;
+ } else
+ hfunc = horiz_compress2x_centered;
+
+ w2 = (w2+1) >> 1;
+ break;
+
+ case nsVDPixmap::kPixFormat_YUV420_Planar:
+ if (halfchroma)
+ chroma_srcw = (chroma_srcw + 1) >> 1;
+ else
+ hfunc = horiz_compress2x_coaligned;
+
+ vfunc = vert_compress2x_centered;
+ winstep = 2;
+ winposnext = 2;
+ winsize = 4;
+ h2 = (h+1) >> 1;
+ w2 = (w2+1) >> 1;
+ break;
+
+ case nsVDPixmap::kPixFormat_YUV420_Planar_Centered:
+ if (halfchroma) {
+ chroma_srcw = (chroma_srcw + 1) >> 1;
+ hfunc = horiz_realign_to_centered;
+ } else
+ hfunc = horiz_compress2x_centered;
+
+ vfunc = vert_compress2x_centered;
+ winstep = 2;
+ winposnext = 2;
+ winsize = 4;
+ h2 = (h+1) >> 1;
+ w2 = (w2+1) >> 1;
+ break;
+
+ case nsVDPixmap::kPixFormat_YUV411_Planar:
+ if (halfchroma) {
+ chroma_srcw = (chroma_srcw + 1) >> 1;
+ hfunc = horiz_compress2x_coaligned;
+ } else
+ hfunc = horiz_compress4x_coaligned;
+ w2 = (w2+1) >> 2;
+ break;
+
+ case nsVDPixmap::kPixFormat_YUV410_Planar:
+ if (halfchroma) {
+ chroma_srcw = (chroma_srcw + 1) >> 1;
+ hfunc = horiz_compress2x_coaligned;
+ } else
+ hfunc = horiz_compress4x_coaligned;
+ vfunc = vert_compress4x_centered;
+ winsize = 8;
+ winposnext = 5;
+ winstep = 4;
+ h2 = (h+3) >> 2;
+ w2 = (w2+3) >> 2;
+ break;
+ }
+
+#ifdef _M_IX86
+ uint32 cpuflags = CPUGetEnabledExtensions();
+
+ if (cpuflags & CPUF_SUPPORTS_INTEGER_SSE) {
+ if (hfunc == horiz_expand2x_coaligned)
+ hfunc = horiz_expand2x_coaligned_ISSE;
+ }
+#endif
+
+ const uint8 *src = (const uint8 *)srcbm.data;
+ const ptrdiff_t srcpitch = srcbm.pitch;
+
+ uint8 *ydst = (uint8 *)dstbm.data;
+ uint8 *cbdst = (uint8 *)dstbm.data2;
+ uint8 *crdst = (uint8 *)dstbm.data3;
+ const ptrdiff_t ydstpitch = dstbm.pitch;
+ const ptrdiff_t cbdstpitch = dstbm.pitch2;
+ const ptrdiff_t crdstpitch = dstbm.pitch3;
+
+ if (!vfunc) {
+ if (hfunc) {
+ uint32 tmpsize = (w + 15) & ~15;
+
+ vdblock<uint8> tmp(tmpsize * 2);
+ uint8 *const cbtmp = tmp.data();
+ uint8 *const crtmp = cbtmp + tmpsize;
+
+ do {
+ cfunc(ydst, cbtmp, crtmp, src, w);
+ src += srcpitch;
+ ydst += ydstpitch;
+ hfunc(cbdst, cbtmp, chroma_srcw);
+ hfunc(crdst, crtmp, chroma_srcw);
+ cbdst += cbdstpitch;
+ crdst += crdstpitch;
+ } while(--h);
+ } else if (dstbm.format == nsVDPixmap::kPixFormat_Y8) {
+ // wasteful, but oh well
+ uint32 tmpsize = (w2+15)&~15;
+ vdblock<uint8> tmp(tmpsize);
+
+ cbdst = tmp.data();
+ crdst = cbdst + tmpsize;
+
+ do {
+ cfunc(ydst, cbdst, crdst, src, w);
+ src += srcpitch;
+ ydst += ydstpitch;
+ } while(--h2);
+ } else {
+ do {
+ cfunc(ydst, cbdst, crdst, src, w);
+ src += srcpitch;
+ ydst += ydstpitch;
+ cbdst += cbdstpitch;
+ crdst += crdstpitch;
+ } while(--h2);
+ }
+ } else {
+ const uint32 tmpsize = w2;
+
+ vdblock<uint8> tmpbuf(tmpsize * (winsize + 1) * 2 + 2 * w);
+
+ uint8 *cbwindow[16];
+ uint8 *crwindow[16];
+
+ uint8 *p = tmpbuf.data();
+ for(int i=0; i<winsize; ++i) {
+ cbwindow[i] = cbwindow[winsize+i] = p;
+ p += tmpsize;
+ crwindow[i] = crwindow[winsize+i] = p;
+ p += tmpsize;
+ }
+
+ uint8 *cbtmp = p;
+ uint8 *crtmp = p + w;
+
+ int winoffset;
+ int winpos = winposnext - winsize;
+ bool firstline = true;
+
+ do {
+ while(winpos < winposnext) {
+ winoffset = ++winpos & (winsize - 1);
+
+ bool valid = (unsigned)(winpos-1) < (unsigned)(h-1); // -1 because we generate line 0 as the first window line
+ if (valid || firstline) {
+ if (hfunc) {
+ cfunc(ydst, cbtmp, crtmp, src, w);
+ hfunc(cbwindow[winoffset + winsize - 1], cbtmp, chroma_srcw);
+ hfunc(crwindow[winoffset + winsize - 1], crtmp, chroma_srcw);
+ } else {
+ cfunc(ydst, cbwindow[winoffset + winsize - 1], crwindow[winoffset + winsize - 1], src, w);
+ }
+ src += srcpitch;
+ ydst += ydstpitch;
+ firstline = false;
+ } else {
+ // dupe last generated line -- could be done by pointer swabbing, but I'm lazy
+ memcpy(cbwindow[winoffset + winsize - 1], cbwindow[winoffset + winsize - 2], w2);
+ memcpy(crwindow[winoffset + winsize - 1], crwindow[winoffset + winsize - 2], w2);
+ }
+ }
+ winposnext += winstep;
+
+ vfunc(cbdst, cbwindow + winoffset, w2, 0);
+ vfunc(crdst, crwindow + winoffset, w2, 0);
+ cbdst += cbdstpitch;
+ crdst += crdstpitch;
+ } while(--h2);
+ }
+
+#ifdef _M_IX86
+ if (cpuflags & CPUF_SUPPORTS_MMX) {
+ __asm emms
+ }
+#endif
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_setup.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_setup.cpp
new file mode 100644
index 000000000..ce999221a
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_setup.cpp
@@ -0,0 +1,17 @@
+#include "blt_setup.h"
+
+void VDPixmapBlitterTable::Clear() {
+ memset(mTable, 0, sizeof mTable);
+}
+
+void VDPixmapBlitterTable::AddBlitter(const VDPixmapFormatSubset& srcFormats, VDPixmapFormatSubset& dstFormats, VDPixmapBlitterFn blitter) {
+ for(int i=0; i<srcFormats.mFormatCount; ++i) {
+ int srcFormat = srcFormats.mFormats[i];
+ for(int j=0; j<dstFormats.mFormatCount; ++j) {
+ int dstFormat = dstFormats.mFormats[j];
+
+ if (srcFormat != dstFormat)
+ mTable[srcFormat][dstFormat] = blitter;
+ }
+ }
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_spanutils.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_spanutils.cpp
new file mode 100644
index 000000000..6baeeca36
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_spanutils.cpp
@@ -0,0 +1,365 @@
+#include "blt_spanutils.h"
+#include "bitutils.h"
+
+using namespace nsVDPixmapBitUtils;
+
+namespace nsVDPixmapSpanUtils {
+ void horiz_expand2x_centered(uint8 *dst, const uint8 *src, sint32 w) {
+ w = -w;
+
+ *dst++ = *src;
+
+ if (++w) {
+ if (++w) {
+ do {
+ dst[0] = (uint8)((3*src[0] + src[1] + 2)>>2);
+ dst[1] = (uint8)((src[0] + 3*src[1] + 2)>>2);
+ dst += 2;
+ ++src;
+ } while((w+=2)<0);
+ }
+
+ if (!(w & 1)) {
+ *dst = src[0];
+ }
+ }
+ }
+
+ void horiz_expand2x_coaligned(uint8 *dst, const uint8 *src, sint32 w) {
+ w = -w;
+
+ if ((w+=2) < 0) {
+ do {
+ dst[0] = src[0];
+ dst[1] = (uint8)((src[0] + src[1] + 1)>>1);
+ dst += 2;
+ ++src;
+ } while((w+=2)<0);
+ }
+
+ w -= 2;
+ while(w < 0) {
+ ++w;
+ *dst++ = src[0];
+ }
+ }
+
+ void horiz_expand4x_coaligned(uint8 *dst, const uint8 *src, sint32 w) {
+ w = -w;
+
+ if ((w+=4) < 0) {
+ do {
+ dst[0] = src[0];
+ dst[1] = (uint8)((3*src[0] + src[1] + 2)>>2);
+ dst[2] = (uint8)((src[0] + src[1] + 1)>>1);
+ dst[3] = (uint8)((src[0] + 3*src[1] + 2)>>2);
+ dst += 4;
+ ++src;
+ } while((w+=4)<0);
+ }
+
+ w -= 4;
+ while(w < 0) {
+ ++w;
+ *dst++ = src[0];
+ }
+ }
+
+ void horiz_compress2x_coaligned(uint8 *dst, const uint8 *src, sint32 w) {
+ if (w == 1) {
+ *dst = *src;
+ return;
+ }
+
+ *dst++ = (uint8)((3*src[0] + src[1] + 2) >> 2);
+ ++src;
+ --w;
+
+ while(w >= 3) {
+ w -= 2;
+ *dst++ = (uint8)((src[0] + 2*src[1] + src[2] + 2) >> 2);
+ src += 2;
+ }
+
+ if (w >= 2)
+ *dst++ = (uint8)((src[0] + 3*src[1] + 2) >> 2);
+ }
+
+ void horiz_compress2x_centered(uint8 *dst, const uint8 *src, sint32 w) {
+ if (w == 1) {
+ *dst = *src;
+ return;
+ }
+
+ if (w == 2) {
+ *dst = (uint8)((src[0] + src[1] + 1) >> 1);
+ return;
+ }
+
+ *dst++ = (uint8)((4*src[0] + 3*src[1] + src[2] + 4) >> 3);
+ --w;
+ ++src;
+
+ while(w >= 4) {
+ w -= 2;
+ *dst++ = (uint8)(((src[0] + src[3]) + 3*(src[1] + src[2]) + 4) >> 3);
+ src += 2;
+ }
+
+ switch(w) {
+ case 3:
+ *dst++ = (uint8)((src[0] + 3*src[1] + 4*src[2] + 4) >> 3);
+ break;
+ case 2:
+ *dst++ = (uint8)((src[0] + 7*src[1] + 4) >> 3);
+ break;
+ }
+ }
+
+ void horiz_compress4x_coaligned(uint8 *dst, const uint8 *src, sint32 w) {
+ if (w == 1) {
+ *dst = *src;
+ return;
+ }
+
+ if (w == 2) {
+ *dst++ = (uint8)((11*src[0] + 5*src[1] + 8) >> 4);
+ return;
+ }
+
+ *dst++ = (uint8)((11*src[0] + 4*src[1] + src[2] + 8) >> 4);
+ src += 2;
+ w -= 2;
+
+ while(w >= 5) {
+ w -= 4;
+ *dst++ = (uint8)(((src[0] + src[4]) + 4*(src[1] + src[3]) + 6*src[2] + 8) >> 4);
+ src += 4;
+ }
+
+ switch(w) {
+ case 4:
+ *dst = (uint8)((src[0] + 4*src[1] + 6*src[2] + 5*src[3] + 8) >> 4);
+ break;
+ case 3:
+ *dst = (uint8)((src[0] + 4*src[1] + 11*src[2] + 8) >> 4);
+ break;
+ }
+ }
+
+ void horiz_compress4x_centered(uint8 *dst, const uint8 *src, sint32 w) {
+
+ switch(w) {
+ case 1:
+ *dst = *src;
+ return;
+ case 2: // 29 99
+ *dst = (uint8)((29*src[0] + 99*src[1] + 64) >> 7);
+ return;
+ case 3: // 29 35 64
+ *dst = (uint8)((29*src[0] + 35*src[1] + 64*src[1] + 64) >> 7);
+ return;
+ case 4: // 29 35 35 29
+ *dst = (uint8)((29*src[0] + 35*(src[1] + src[2]) + 29*src[3] + 64) >> 7);
+ return;
+ case 5: // 29 35 35 21 8
+ // 1 7 120
+ dst[0] = (uint8)((29*src[0] + 35*(src[1] + src[2]) + 21*src[3] + 8*src[4] + 64) >> 7);
+ dst[1] = (uint8)((src[2] + 7*src[3] + 120*src[4] + 64) >> 7);
+ return;
+ }
+
+ *dst++ = (uint8)((29*src[0] + 35*(src[1] + src[2]) + 21*src[3] + 7*src[4] + src[5] + 64) >> 7);
+ src += 2;
+ w -= 2;
+
+ while(w >= 8) {
+ w -= 4;
+ *dst++ = (uint8)(((src[0] + src[7]) + 7*(src[1] + src[6]) + 21*(src[2] + src[5]) + 35*(src[3] + src[4]) + 64) >> 7);
+ src += 4;
+ }
+
+ switch(w) {
+ case 4: // 1 7 21 99
+ *dst = (uint8)((src[0] + 7*src[1] + 21*src[2] + 99*src[3] + 64) >> 7);
+ break;
+ case 5: // 1 7 21 35 64
+ *dst = (uint8)((src[0] + 7*src[1] + 21*src[2] + 35*src[3] + 64*src[4] + 64) >> 7);
+ break;
+ case 6: // 1 7 21 35 35 29
+ *dst = (uint8)((src[0] + 7*src[1] + 21*src[2] + 29*src[5] + 35*(src[3] + src[4]) + 64) >> 7);
+ break;
+ case 7: // 1 7 21 35 35 21 8
+ // 1 7 120
+ dst[0] = (uint8)((src[0] + 7*src[1] + 8*src[6] + 21*(src[2] + src[5]) + 35*(src[3] + src[4]) + 64) >> 7);
+ dst[1] = (uint8)((src[4] + 7*src[5] + 120*src[6] + 64) >> 7);
+ break;
+ }
+ }
+
+ void horiz_realign_to_centered(uint8 *dst, const uint8 *src, sint32 w) {
+ // luma samples: Y Y Y Y Y
+ // coaligned: C C C
+ // centered: C C
+ //
+ // To realign coaligned samples to centered, we need to shift them
+ // right by a quarter sample in chroma space. This can be done via
+ // a [3 1]/4 filter.
+
+ for(sint32 i=1; i<w; ++i) {
+ dst[0] = (uint8)((3*(uint32)src[0] + (uint32)src[1] + 2) >> 2);
+ ++dst;
+ ++src;
+ }
+
+ *dst++ = *src++;
+ }
+
+ void horiz_realign_to_coaligned(uint8 *dst, const uint8 *src, sint32 w) {
+ // luma samples: Y Y Y Y Y
+ // coaligned: C C C
+ // centered: C C
+ //
+ // To realign centered samples to coaligned, we need to shift them
+ // left by a quarter sample in chroma space. This can be done via
+ // a [1 3]/4 filter.
+
+ *dst++ = *src++;
+
+ for(sint32 i=1; i<w; ++i) {
+ dst[0] = (uint8)(((uint32)src[-1] + 3*(uint32)src[0] + 2) >> 2);
+ ++dst;
+ ++src;
+ }
+ }
+
+ void vert_expand2x_centered(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase) {
+ const uint8 *src3 = srcs[0];
+ const uint8 *src1 = srcs[1];
+
+ if (phase >= 128)
+ std::swap(src1, src3);
+
+ sint32 w4 = w>>2;
+ w &= 3;
+
+ if (w4) {
+ const uint32 *src34 = (const uint32 *)src3;
+ const uint32 *src14 = (const uint32 *)src1;
+ uint32 *dst4 = ( uint32 *)dst;
+
+ do {
+ const uint32 a = *src34++;
+ const uint32 b = *src14++;
+ const uint32 ab = (a&b) + (((a^b)&0xfefefefe)>>1);
+
+ *dst4++ = (a|ab) - (((a^ab)&0xfefefefe)>>1);
+ } while(--w4);
+
+ src3 = (const uint8 *)src34;
+ src1 = (const uint8 *)src14;
+ dst = ( uint8 *)dst4;
+ }
+
+ if (w) {
+ do {
+ *dst++ = (uint8)((*src1++ + 3**src3++ + 2) >> 2);
+ } while(--w);
+ }
+ }
+
+ void vert_expand4x_centered(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase) {
+ const uint8 *src3 = srcs[0];
+ const uint8 *src1 = srcs[1];
+
+ switch(phase & 0xc0) {
+ case 0x00:
+ do {
+ *dst++ = (uint8)((1**src1++ + 7**src3++ + 4) >> 3);
+ } while(--w);
+ break;
+ case 0x40:
+ do {
+ *dst++ = (uint8)((3**src1++ + 5**src3++ + 4) >> 3);
+ } while(--w);
+ break;
+ case 0x80:
+ do {
+ *dst++ = (uint8)((5**src1++ + 3**src3++ + 4) >> 3);
+ } while(--w);
+ break;
+ case 0xc0:
+ do {
+ *dst++ = (uint8)((7**src1++ + 1**src3++ + 4) >> 3);
+ } while(--w);
+ break;
+ default:
+ VDNEVERHERE;
+ }
+ }
+
+ void vert_compress2x_centered_fast(uint8 *dst, const uint8 *const *srcarray, sint32 w, uint8 phase) {
+ const uint8 *src1 = srcarray[0];
+ const uint8 *src2 = srcarray[1];
+
+ w = -w;
+ w += 3;
+
+ while(w < 0) {
+ *(uint32 *)dst = avg_8888_11(*(uint32 *)src1, *(uint32 *)src2);
+ dst += 4;
+ src1 += 4;
+ src2 += 4;
+ w += 4;
+ }
+
+ w -= 3;
+
+ while(w < 0) {
+ *dst = (uint8)((*src1 + *src2 + 1)>>1);
+ ++dst;
+ ++src1;
+ ++src2;
+ ++w;
+ }
+ }
+
+ void vert_compress2x_centered(uint8 *dst, const uint8 *const *srcarray, sint32 w, uint8 phase) {
+ const uint8 *src1 = srcarray[0];
+ const uint8 *src2 = srcarray[1];
+ const uint8 *src3 = srcarray[2];
+ const uint8 *src4 = srcarray[3];
+
+ w = -w;
+
+ while(w < 0) {
+ *dst++ = (uint8)(((*src1++ + *src4++) + 3*(*src2++ + *src3++) + 4)>>3);
+ ++w;
+ }
+ }
+
+ void vert_compress4x_centered(uint8 *dst, const uint8 *const *srcarray, sint32 w, uint8 phase) {
+ const uint8 *src1 = srcarray[0];
+ const uint8 *src2 = srcarray[1];
+ const uint8 *src3 = srcarray[2];
+ const uint8 *src4 = srcarray[3];
+ const uint8 *src5 = srcarray[4];
+ const uint8 *src6 = srcarray[5];
+ const uint8 *src7 = srcarray[6];
+ const uint8 *src8 = srcarray[7];
+
+ w = -w;
+
+ while(w < 0) {
+ int sum18 = *src1++ + *src8++;
+ int sum27 = *src2++ + *src7++;
+ int sum36 = *src3++ + *src6++;
+ int sum45 = *src4++ + *src5++;
+
+ *dst++ = (uint8)((sum18 + 7*sum27 + 21*sum36 + 35*sum45 + 64) >> 7);
+
+ ++w;
+ }
+ }
+}
+
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_spanutils_x86.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_spanutils_x86.cpp
new file mode 100644
index 000000000..ea9e0599a
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_spanutils_x86.cpp
@@ -0,0 +1,170 @@
+// VirtualDub - Video processing and capture application
+// Graphics support library
+// Copyright (C) 1998-2007 Avery Lee
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#include "blt_spanutils_x86.h"
+
+#ifdef _MSC_VER
+ #pragma warning(disable: 4799) // warning C4799: function 'nsVDPixmapSpanUtils::vdasm_horiz_expand2x_coaligned_ISSE' has no EMMS instruction
+#endif
+
+extern "C" void __cdecl vdasm_horiz_expand2x_coaligned_ISSE(void *dst, const void *src, uint32 count);
+extern "C" void __cdecl vdasm_horiz_expand4x_coaligned_MMX(void *dst, const void *src, uint32 count);
+extern "C" void __cdecl vdasm_vert_average_13_ISSE(void *dst, const void *src1, const void *src3, uint32 count);
+extern "C" void __cdecl vdasm_vert_average_17_ISSE(void *dst, const void *src1, const void *src3, uint32 count);
+extern "C" void __cdecl vdasm_vert_average_35_ISSE(void *dst, const void *src1, const void *src3, uint32 count);
+
+namespace nsVDPixmapSpanUtils {
+
+ void horiz_expand2x_coaligned_ISSE(uint8 *dst, const uint8 *src, sint32 w) {
+ if (w >= 17) {
+ uint32 fastcount = (w - 1) & ~15;
+
+ vdasm_horiz_expand2x_coaligned_ISSE(dst, src, fastcount);
+ dst += fastcount;
+ src += fastcount >> 1;
+ w -= fastcount;
+ }
+
+ w = -w;
+ if ((w+=2) < 0) {
+ do {
+ dst[0] = src[0];
+ dst[1] = (uint8)((src[0] + src[1] + 1)>>1);
+ dst += 2;
+ ++src;
+ } while((w+=2)<0);
+ }
+
+ w -= 2;
+ while(w < 0) {
+ ++w;
+ *dst++ = src[0];
+ }
+ }
+
+ void horiz_expand4x_coaligned_MMX(uint8 *dst, const uint8 *src, sint32 w) {
+ if (w >= 17) {
+ uint32 fastcount = (w - 1) >> 4;
+
+ vdasm_horiz_expand4x_coaligned_MMX(dst, src, fastcount);
+ dst += fastcount << 4;
+ src += fastcount << 2;
+ w -= fastcount << 4;
+ }
+
+ w = -w;
+ if ((w+=4) < 0) {
+ do {
+ dst[0] = src[0];
+ dst[1] = (uint8)((3*src[0] + src[1] + 2)>>2);
+ dst[2] = (uint8)((src[0] + src[1] + 1)>>1);
+ dst[3] = (uint8)((src[0] + 3*src[1] + 2)>>2);
+ dst += 4;
+ ++src;
+ } while((w+=4)<0);
+ }
+
+ w -= 4;
+ while(w < 0) {
+ ++w;
+ *dst++ = src[0];
+ }
+ }
+
+ void vert_expand2x_centered_ISSE(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase) {
+ const uint8 *src3 = srcs[0];
+ const uint8 *src1 = srcs[1];
+
+ if (phase >= 128)
+ std::swap(src1, src3);
+
+ uint32 fastcount = w & ~15;
+
+ if (fastcount) {
+ vdasm_vert_average_13_ISSE(dst, src1, src3, fastcount);
+ dst += fastcount;
+ src1 += fastcount;
+ src3 += fastcount;
+ w -= fastcount;
+ }
+
+ if (w) {
+ do {
+ *dst++ = (uint8)((*src1++ + 3**src3++ + 2) >> 2);
+ } while(--w);
+ }
+ }
+
+ void vert_average_1_7_ISSE(uint8 *dst, const uint8 *src7, const uint8 *src1, sint32 w) {
+ uint32 fastcount = w & ~7;
+
+ if (fastcount) {
+ vdasm_vert_average_17_ISSE(dst, src1, src7, fastcount);
+ dst += fastcount;
+ src1 += fastcount;
+ src7 += fastcount;
+ w -= fastcount;
+ }
+
+ if (w) {
+ do {
+ *dst++ = (uint8)((*src1++ + 7**src7++ + 4) >> 3);
+ } while(--w);
+ }
+ }
+
+ void vert_average_3_5_ISSE(uint8 *dst, const uint8 *src7, const uint8 *src1, sint32 w) {
+ uint32 fastcount = w & ~7;
+
+ if (fastcount) {
+ vdasm_vert_average_35_ISSE(dst, src1, src7, fastcount);
+ dst += fastcount;
+ src1 += fastcount;
+ src7 += fastcount;
+ w -= fastcount;
+ }
+
+ if (w) {
+ do {
+ *dst++ = (uint8)((3**src1++ + 5**src7++ + 4) >> 3);
+ } while(--w);
+ }
+ }
+
+ void vert_expand4x_centered_ISSE(uint8 *dst, const uint8 *const *srcs, sint32 w, uint8 phase) {
+ const uint8 *src1 = srcs[0];
+ const uint8 *src2 = srcs[1];
+
+ switch(phase & 0xc0) {
+ case 0x00:
+ vert_average_1_7_ISSE(dst, src2, src1, w);
+ break;
+ case 0x40:
+ vert_average_3_5_ISSE(dst, src2, src1, w);
+ break;
+ case 0x80:
+ vert_average_3_5_ISSE(dst, src1, src2, w);
+ break;
+ case 0xc0:
+ vert_average_1_7_ISSE(dst, src1, src2, w);
+ break;
+ default:
+ VDNEVERHERE;
+ }
+ }
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_uberblit.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_uberblit.cpp
new file mode 100644
index 000000000..dcaa20907
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_uberblit.cpp
@@ -0,0 +1,19 @@
+#include <vd2/system/vdalloc.h>
+#include <vd2/Kasumi/pixmap.h>
+#include "uberblit.h"
+
+void VDPixmapBlt_UberblitAdapter(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h) {
+ vdautoptr<IVDPixmapBlitter> blitter(VDPixmapCreateBlitter(dst, src));
+
+ if (w > src.w)
+ w = src.w;
+ if (w > dst.w)
+ w = dst.w;
+ if (h > src.h)
+ h = src.h;
+ if (h > dst.h)
+ h = dst.h;
+
+ vdrect32 r(0, 0, w, h);
+ blitter->Blit(dst, &r, src);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/blt_x86.cpp b/src/thirdparty/VirtualDub/Kasumi/source/blt_x86.cpp
new file mode 100644
index 000000000..af1519c5b
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/blt_x86.cpp
@@ -0,0 +1,144 @@
+#include <vd2/system/vdtypes.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include "blt_setup.h"
+
+void VDPixmapInitBlittersReference(VDPixmapBlitterTable& table);
+
+#define DECLARE_PALETTED(x, y) extern void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h, const void *pal0);
+#define DECLARE_RGB(x, y) extern void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+#define DECLARE_RGB_ASM(x, y) extern "C" void vdasm_pixblt_##x##_to_##y(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+#define DECLARE_RGB_ASM_MMX(x, y) extern "C" void vdasm_pixblt_##x##_to_##y##_MMX(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+#define DECLARE_YUV(x, y) extern void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h);
+#define DECLARE_YUV_REV(x, y) void VDPixmapBlt_##x##_to_##y##_reference(void *dst0, ptrdiff_t dstpitch, const void *src0, ptrdiff_t srcpitch, vdpixsize w, vdpixsize h)
+#define DECLARE_YUV_PLANAR(x, y) extern void VDPixmapBlt_##x##_to_##y##_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+
+ DECLARE_RGB_ASM(RGB565, XRGB1555); DECLARE_RGB_ASM_MMX(RGB565, XRGB1555);
+ DECLARE_RGB_ASM(RGB888, XRGB1555);
+ DECLARE_RGB_ASM(XRGB8888, XRGB1555); DECLARE_RGB_ASM_MMX(XRGB8888, XRGB1555);
+ DECLARE_RGB_ASM(XRGB1555, RGB565); DECLARE_RGB_ASM_MMX(XRGB1555, RGB565);
+ DECLARE_RGB_ASM(RGB888, RGB565);
+ DECLARE_RGB_ASM(XRGB8888, RGB565); DECLARE_RGB_ASM_MMX(XRGB8888, RGB565);
+DECLARE_RGB(XRGB1555, RGB888);
+DECLARE_RGB(RGB565, RGB888);
+ DECLARE_RGB_ASM(XRGB8888, RGB888); DECLARE_RGB_ASM_MMX(XRGB8888, RGB888);
+ DECLARE_RGB_ASM(XRGB1555, XRGB8888); DECLARE_RGB_ASM_MMX(XRGB1555, XRGB8888);
+ DECLARE_RGB_ASM(RGB565, XRGB8888); DECLARE_RGB_ASM_MMX(RGB565, XRGB8888);
+ DECLARE_RGB_ASM(RGB888, XRGB8888); DECLARE_RGB_ASM_MMX(RGB888, XRGB8888);
+
+DECLARE_PALETTED(Pal1, Any8);
+DECLARE_PALETTED(Pal1, Any16);
+DECLARE_PALETTED(Pal1, Any24);
+DECLARE_PALETTED(Pal1, Any32);
+DECLARE_PALETTED(Pal2, Any8);
+DECLARE_PALETTED(Pal2, Any16);
+DECLARE_PALETTED(Pal2, Any24);
+DECLARE_PALETTED(Pal2, Any32);
+DECLARE_PALETTED(Pal4, Any8);
+DECLARE_PALETTED(Pal4, Any16);
+DECLARE_PALETTED(Pal4, Any24);
+DECLARE_PALETTED(Pal4, Any32);
+DECLARE_PALETTED(Pal8, Any8);
+DECLARE_PALETTED(Pal8, Any16);
+DECLARE_PALETTED(Pal8, Any24);
+DECLARE_PALETTED(Pal8, Any32);
+
+DECLARE_YUV(XVYU, UYVY);
+DECLARE_YUV(XVYU, YUYV);
+DECLARE_YUV(Y8, UYVY);
+DECLARE_YUV(Y8, YUYV);
+DECLARE_YUV(UYVY, Y8);
+DECLARE_YUV(YUYV, Y8);
+DECLARE_YUV(UYVY, YUYV);
+DECLARE_YUV_PLANAR(YUV411, YV12);
+
+DECLARE_YUV(UYVY, XRGB1555);
+DECLARE_YUV(UYVY, RGB565);
+DECLARE_YUV(UYVY, RGB888);
+DECLARE_YUV(UYVY, XRGB8888);
+DECLARE_YUV(YUYV, XRGB1555);
+DECLARE_YUV(YUYV, RGB565);
+DECLARE_YUV(YUYV, RGB888);
+DECLARE_YUV(YUYV, XRGB8888);
+DECLARE_YUV(Y8, XRGB1555);
+DECLARE_YUV(Y8, RGB565);
+DECLARE_YUV(Y8, RGB888);
+DECLARE_YUV(Y8, XRGB8888);
+
+DECLARE_YUV_REV(XRGB1555, Y8);
+DECLARE_YUV_REV(RGB565, Y8);
+DECLARE_YUV_REV(RGB888, Y8);
+DECLARE_YUV_REV(XRGB8888, Y8);
+
+DECLARE_YUV_REV(XRGB1555, XVYU);
+DECLARE_YUV_REV(RGB565, XVYU);
+DECLARE_YUV_REV(RGB888, XVYU);
+DECLARE_YUV_REV(XRGB8888, XVYU);
+
+DECLARE_YUV_PLANAR(YV12, XRGB1555);
+DECLARE_YUV_PLANAR(YV12, RGB565);
+DECLARE_YUV_PLANAR(YV12, RGB888);
+DECLARE_YUV_PLANAR(YV12, XRGB8888);
+
+DECLARE_YUV_PLANAR(YUV411, XRGB1555);
+DECLARE_YUV_PLANAR(YUV411, RGB565);
+DECLARE_YUV_PLANAR(YUV411, RGB888);
+DECLARE_YUV_PLANAR(YUV411, XRGB8888);
+
+extern void VDPixmapBlt_YUVPlanar_decode_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+extern void VDPixmapBlt_YUVPlanar_encode_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+extern void VDPixmapBlt_YUVPlanar_convert_reference(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+
+using namespace nsVDPixmap;
+
+void VDPixmapInitBlittersX86(VDPixmapBlitterTable& table) {
+ VDPixmapInitBlittersReference(table);
+
+ table.AddBlitter(kPixFormat_XRGB1555, kPixFormat_RGB565, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB1555_to_RGB565>);
+ table.AddBlitter(kPixFormat_XRGB1555, kPixFormat_XRGB8888, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB1555_to_XRGB8888>);
+ table.AddBlitter(kPixFormat_RGB565, kPixFormat_XRGB1555, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_RGB565_to_XRGB1555>);
+ table.AddBlitter(kPixFormat_RGB565, kPixFormat_XRGB8888, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_RGB565_to_XRGB8888>);
+ table.AddBlitter(kPixFormat_RGB888, kPixFormat_XRGB1555, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_RGB888_to_XRGB1555>);
+ table.AddBlitter(kPixFormat_RGB888, kPixFormat_RGB565, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_RGB888_to_RGB565>);
+ table.AddBlitter(kPixFormat_RGB888, kPixFormat_XRGB8888, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_RGB888_to_XRGB8888>);
+ table.AddBlitter(kPixFormat_XRGB8888, kPixFormat_XRGB1555, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB8888_to_XRGB1555>);
+ table.AddBlitter(kPixFormat_XRGB8888, kPixFormat_RGB565, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB8888_to_RGB565>);
+ table.AddBlitter(kPixFormat_XRGB8888, kPixFormat_RGB888, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB8888_to_RGB888>);
+}
+
+tpVDPixBltTable VDGetPixBltTableX86ScalarInternal() {
+ static VDPixmapBlitterTable sReferenceTable;
+
+ VDPixmapInitBlittersX86(sReferenceTable);
+
+ return sReferenceTable.mTable;
+}
+
+tpVDPixBltTable VDGetPixBltTableX86MMXInternal() {
+ static VDPixmapBlitterTable sReferenceTable;
+
+ VDPixmapInitBlittersX86(sReferenceTable);
+
+ sReferenceTable.AddBlitter(kPixFormat_XRGB1555, kPixFormat_RGB565, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB1555_to_RGB565_MMX>);
+ sReferenceTable.AddBlitter(kPixFormat_XRGB1555, kPixFormat_XRGB8888, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB1555_to_XRGB8888_MMX>);
+ sReferenceTable.AddBlitter(kPixFormat_RGB565, kPixFormat_XRGB1555, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_RGB565_to_XRGB1555_MMX>);
+ sReferenceTable.AddBlitter(kPixFormat_RGB565, kPixFormat_XRGB8888, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_RGB565_to_XRGB8888_MMX>);
+ sReferenceTable.AddBlitter(kPixFormat_RGB888, kPixFormat_XRGB8888, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_RGB888_to_XRGB8888_MMX>);
+ sReferenceTable.AddBlitter(kPixFormat_XRGB8888, kPixFormat_XRGB1555, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB8888_to_XRGB1555_MMX>);
+ sReferenceTable.AddBlitter(kPixFormat_XRGB8888, kPixFormat_RGB565, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB8888_to_RGB565_MMX>);
+ sReferenceTable.AddBlitter(kPixFormat_XRGB8888, kPixFormat_RGB888, VDPixmapBlitterChunkyAdapter<vdasm_pixblt_XRGB8888_to_RGB888_MMX>);
+
+ return sReferenceTable.mTable;
+}
+
+tpVDPixBltTable VDGetPixBltTableX86Scalar() {
+ static tpVDPixBltTable spTable = VDGetPixBltTableX86ScalarInternal();
+
+ return spTable;
+}
+
+tpVDPixBltTable VDGetPixBltTableX86MMX() {
+ static tpVDPixBltTable spTable = VDGetPixBltTableX86MMXInternal();
+
+ return spTable;
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/pixel.cpp b/src/thirdparty/VirtualDub/Kasumi/source/pixel.cpp
new file mode 100644
index 000000000..45797ca4b
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/pixel.cpp
@@ -0,0 +1,667 @@
+// VirtualDub - Video processing and capture application
+// Graphics support library
+// Copyright (C) 1998-2007 Avery Lee
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#include <vd2/system/math.h>
+#include <vd2/system/halffloat.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixel.h>
+
+uint32 VDPixmapSample(const VDPixmap& px, sint32 x, sint32 y) {
+ if (x >= px.w)
+ x = px.w - 1;
+ if (y >= px.h)
+ y = px.h - 1;
+ if (x < 0)
+ x = 0;
+ if (y < 0)
+ y = 0;
+
+ switch(px.format) {
+ case nsVDPixmap::kPixFormat_Pal1:
+ {
+ uint8 idx = ((const uint8 *)px.data + px.pitch*y)[x >> 3];
+
+ return px.palette[(idx >> (7 - (x & 7))) & 1];
+ }
+
+ case nsVDPixmap::kPixFormat_Pal2:
+ {
+ uint8 idx = ((const uint8 *)px.data + px.pitch*y)[x >> 2];
+
+ return px.palette[(idx >> (6 - (x & 3)*2)) & 3];
+ }
+
+ case nsVDPixmap::kPixFormat_Pal4:
+ {
+ uint8 idx = ((const uint8 *)px.data + px.pitch*y)[x >> 1];
+
+ if (!(x & 1))
+ idx >>= 4;
+
+ return px.palette[idx & 15];
+ }
+
+ case nsVDPixmap::kPixFormat_Pal8:
+ {
+ uint8 idx = ((const uint8 *)px.data + px.pitch*y)[x];
+
+ return px.palette[idx];
+ }
+
+ case nsVDPixmap::kPixFormat_XRGB1555:
+ {
+ uint16 c = ((const uint16 *)((const uint8 *)px.data + px.pitch*y))[x];
+ uint32 r = c & 0x7c00;
+ uint32 g = c & 0x03e0;
+ uint32 b = c & 0x001f;
+ uint32 rgb = (r << 9) + (g << 6) + (b << 3);
+
+ return rgb + ((rgb >> 5) & 0x070707);
+ }
+ break;
+
+ case nsVDPixmap::kPixFormat_RGB565:
+ {
+ uint16 c = ((const uint16 *)((const uint8 *)px.data + px.pitch*y))[x];
+ uint32 r = c & 0xf800;
+ uint32 g = c & 0x07e0;
+ uint32 b = c & 0x001f;
+ uint32 rb = (r << 8) + (b << 3);
+
+ return rb + ((rb >> 5) & 0x070007) + (g << 5) + ((g >> 1) & 0x0300);
+ }
+ break;
+
+ case nsVDPixmap::kPixFormat_RGB888:
+ {
+ const uint8 *src = (const uint8 *)px.data + px.pitch*y + 3*x;
+ uint32 b = src[0];
+ uint32 g = src[1];
+ uint32 r = src[2];
+
+ return (r << 16) + (g << 8) + b;
+ }
+ break;
+
+ case nsVDPixmap::kPixFormat_XRGB8888:
+ return ((const uint32 *)((const uint8 *)px.data + px.pitch*y))[x];
+
+ case nsVDPixmap::kPixFormat_Y8:
+ {
+ uint8 luma = ((const uint8 *)px.data + px.pitch*y)[x];
+
+ return ((luma - 16)*255/219) * 0x010101;
+ }
+ break;
+
+ case nsVDPixmap::kPixFormat_YUV444_Planar:
+ return VDConvertYCbCrToRGB(VDPixmapSample8(px.data, px.pitch, x, y), VDPixmapSample8(px.data2, px.pitch2, x, y), VDPixmapSample8(px.data3, px.pitch3, x, y));
+
+ case nsVDPixmap::kPixFormat_YUV422_Planar:
+ {
+ sint32 u = (x << 7) + 128;
+ sint32 v = (y << 8);
+ uint32 w2 = px.w >> 1;
+ uint32 h2 = px.h;
+
+ return VDConvertYCbCrToRGB(
+ VDPixmapSample8(px.data, px.pitch, x, y),
+ VDPixmapInterpolateSample8(px.data2, px.pitch2, w2, h2, u, v),
+ VDPixmapInterpolateSample8(px.data3, px.pitch3, w2, h2, u, v));
+ }
+
+ case nsVDPixmap::kPixFormat_YUV420_Planar:
+ {
+ sint32 u = (x << 7) + 128;
+ sint32 v = (y << 7);
+ uint32 w2 = px.w >> 1;
+ uint32 h2 = px.h >> 1;
+
+ return VDConvertYCbCrToRGB(
+ VDPixmapSample8(px.data, px.pitch, x, y),
+ VDPixmapInterpolateSample8(px.data2, px.pitch2, w2, h2, u, v),
+ VDPixmapInterpolateSample8(px.data3, px.pitch3, w2, h2, u, v));
+ }
+
+ case nsVDPixmap::kPixFormat_YUV411_Planar:
+ {
+ sint32 u = (x << 6) + 128;
+ sint32 v = (y << 8);
+ uint32 w2 = px.w >> 2;
+ uint32 h2 = px.h;
+
+ return VDConvertYCbCrToRGB(
+ VDPixmapSample8(px.data, px.pitch, x, y),
+ VDPixmapInterpolateSample8(px.data2, px.pitch2, w2, h2, u, v),
+ VDPixmapInterpolateSample8(px.data3, px.pitch3, w2, h2, u, v));
+ }
+
+ case nsVDPixmap::kPixFormat_YUV410_Planar:
+ {
+ sint32 u = (x << 6) + 128;
+ sint32 v = (y << 6);
+ uint32 w2 = px.w >> 2;
+ uint32 h2 = px.h >> 2;
+
+ return VDConvertYCbCrToRGB(
+ VDPixmapSample8(px.data, px.pitch, x, y),
+ VDPixmapInterpolateSample8(px.data2, px.pitch2, w2, h2, u, v),
+ VDPixmapInterpolateSample8(px.data3, px.pitch3, w2, h2, u, v));
+ }
+
+ default:
+ return VDPixmapInterpolateSampleRGB24(px, (x << 8) + 128, (y << 8) + 128);
+ }
+}
+
+uint8 VDPixmapInterpolateSample8(const void *data, ptrdiff_t pitch, uint32 w, uint32 h, sint32 x_256, sint32 y_256) {
+ // bias coordinates to integer
+ x_256 -= 128;
+ y_256 -= 128;
+
+ // clamp coordinates
+ x_256 &= ~(x_256 >> 31);
+ y_256 &= ~(y_256 >> 31);
+
+ uint32 w_256 = (w - 1) << 8;
+ uint32 h_256 = (h - 1) << 8;
+ x_256 ^= (x_256 ^ w_256) & ((x_256 - w_256) >> 31);
+ y_256 ^= (y_256 ^ h_256) & ((y_256 - h_256) >> 31);
+
+ const uint8 *row0 = (const uint8 *)data + pitch * (y_256 >> 8);
+ const uint8 *row1 = row0;
+
+ if ((uint32)y_256 < h_256)
+ row1 += pitch;
+
+ ptrdiff_t xstep = (uint32)x_256 < w_256 ? 1 : 0;
+ sint32 xoffset = x_256 & 255;
+ sint32 yoffset = y_256 & 255;
+ sint32 p00 = row0[0];
+ sint32 p10 = row0[xstep];
+ sint32 p01 = row1[0];
+ sint32 p11 = row1[xstep];
+ sint32 p0 = (p00 << 8) + (p10 - p00)*xoffset;
+ sint32 p1 = (p01 << 8) + (p11 - p01)*xoffset;
+ sint32 p = ((p0 << 8) + (p1 - p0)*yoffset + 0x8000) >> 16;
+
+ return (uint8)p;
+}
+
+uint32 VDPixmapInterpolateSample8To24(const void *data, ptrdiff_t pitch, uint32 w, uint32 h, sint32 x_256, sint32 y_256) {
+ // bias coordinates to integer
+ x_256 -= 128;
+ y_256 -= 128;
+
+ // clamp coordinates
+ x_256 &= ~(x_256 >> 31);
+ y_256 &= ~(y_256 >> 31);
+
+ uint32 w_256 = (w - 1) << 8;
+ uint32 h_256 = (h - 1) << 8;
+ x_256 ^= (x_256 ^ w_256) & ((x_256 - w_256) >> 31);
+ y_256 ^= (y_256 ^ h_256) & ((y_256 - h_256) >> 31);
+
+ const uint8 *row0 = (const uint8 *)data + pitch * (y_256 >> 8) + (x_256 >> 8);
+ const uint8 *row1 = row0;
+
+ if ((uint32)y_256 < h_256)
+ row1 += pitch;
+
+ ptrdiff_t xstep = (uint32)x_256 < w_256 ? 1 : 0;
+ sint32 xoffset = x_256 & 255;
+ sint32 yoffset = y_256 & 255;
+ sint32 p00 = row0[0];
+ sint32 p10 = row0[xstep];
+ sint32 p01 = row1[0];
+ sint32 p11 = row1[xstep];
+ sint32 p0 = (p00 << 8) + (p10 - p00)*xoffset;
+ sint32 p1 = (p01 << 8) + (p11 - p01)*xoffset;
+ sint32 p = (p0 << 8) + (p1 - p0)*yoffset;
+
+ return p;
+}
+
+uint32 VDPixmapInterpolateSample8x2To24(const void *data, ptrdiff_t pitch, uint32 w, uint32 h, sint32 x_256, sint32 y_256) {
+ // bias coordinates to integer
+ x_256 -= 128;
+ y_256 -= 128;
+
+ // clamp coordinates
+ x_256 &= ~(x_256 >> 31);
+ y_256 &= ~(y_256 >> 31);
+
+ uint32 w_256 = (w - 1) << 8;
+ uint32 h_256 = (h - 1) << 8;
+ x_256 ^= (x_256 ^ w_256) & ((x_256 - w_256) >> 31);
+ y_256 ^= (y_256 ^ h_256) & ((y_256 - h_256) >> 31);
+
+ const uint8 *row0 = (const uint8 *)data + pitch * (y_256 >> 8) + (x_256 >> 8)*2;
+ const uint8 *row1 = row0;
+
+ if ((uint32)y_256 < h_256)
+ row1 += pitch;
+
+ ptrdiff_t xstep = (uint32)x_256 < w_256 ? 2 : 0;
+ sint32 xoffset = x_256 & 255;
+ sint32 yoffset = y_256 & 255;
+ sint32 p00 = row0[0];
+ sint32 p10 = row0[xstep];
+ sint32 p01 = row1[0];
+ sint32 p11 = row1[xstep];
+ sint32 p0 = (p00 << 8) + (p10 - p00)*xoffset;
+ sint32 p1 = (p01 << 8) + (p11 - p01)*xoffset;
+ sint32 p = (p0 << 8) + (p1 - p0)*yoffset;
+
+ return p;
+}
+
+uint32 VDPixmapInterpolateSample8x4To24(const void *data, ptrdiff_t pitch, uint32 w, uint32 h, sint32 x_256, sint32 y_256) {
+ // bias coordinates to integer
+ x_256 -= 128;
+ y_256 -= 128;
+
+ // clamp coordinates
+ x_256 &= ~(x_256 >> 31);
+ y_256 &= ~(y_256 >> 31);
+
+ uint32 w_256 = (w - 1) << 8;
+ uint32 h_256 = (h - 1) << 8;
+ x_256 ^= (x_256 ^ w_256) & ((x_256 - w_256) >> 31);
+ y_256 ^= (y_256 ^ h_256) & ((y_256 - h_256) >> 31);
+
+ const uint8 *row0 = (const uint8 *)data + pitch * (y_256 >> 8) + (x_256 >> 8)*4;
+ const uint8 *row1 = row0;
+
+ if ((uint32)y_256 < h_256)
+ row1 += pitch;
+
+ ptrdiff_t xstep = (uint32)x_256 < w_256 ? 4 : 0;
+ sint32 xoffset = x_256 & 255;
+ sint32 yoffset = y_256 & 255;
+ sint32 p00 = row0[0];
+ sint32 p10 = row0[xstep];
+ sint32 p01 = row1[0];
+ sint32 p11 = row1[xstep];
+ sint32 p0 = (p00 << 8) + (p10 - p00)*xoffset;
+ sint32 p1 = (p01 << 8) + (p11 - p01)*xoffset;
+ sint32 p = (p0 << 8) + (p1 - p0)*yoffset;
+
+ return p;
+}
+
+float VDPixmapInterpolateSample16F(const void *data, ptrdiff_t pitch, uint32 w, uint32 h, sint32 x_256, sint32 y_256) {
+ // bias coordinates to integer
+ x_256 -= 128;
+ y_256 -= 128;
+
+ // clamp coordinates
+ x_256 &= ~(x_256 >> 31);
+ y_256 &= ~(y_256 >> 31);
+
+ uint32 w_256 = (w - 1) << 8;
+ uint32 h_256 = (h - 1) << 8;
+ x_256 ^= (x_256 ^ w_256) & ((x_256 - w_256) >> 31);
+ y_256 ^= (y_256 ^ h_256) & ((y_256 - h_256) >> 31);
+
+ const uint16 *row0 = (const uint16 *)((const uint8 *)data + pitch * (y_256 >> 8) + (x_256 >> 8)*2);
+ const uint16 *row1 = row0;
+
+ if ((uint32)y_256 < h_256)
+ row1 = (const uint16 *)((const char *)row1 + pitch);
+
+ ptrdiff_t xstep = (uint32)x_256 < w_256 ? 1 : 0;
+ float xoffset = (float)(x_256 & 255) * (1.0f / 255.0f);
+ float yoffset = (float)(y_256 & 255) * (1.0f / 255.0f);
+
+ float p00;
+ float p10;
+ float p01;
+ float p11;
+ VDConvertHalfToFloat(row0[0], &p00);
+ VDConvertHalfToFloat(row0[xstep], &p10);
+ VDConvertHalfToFloat(row1[0], &p01);
+ VDConvertHalfToFloat(row1[xstep], &p11);
+
+ float p0 = p00 + (p10 - p00)*xoffset;
+ float p1 = p01 + (p11 - p01)*xoffset;
+
+ return p0 + (p1 - p0)*yoffset;
+}
+
+namespace {
+ uint32 Lerp8888(uint32 p0, uint32 p1, uint32 p2, uint32 p3, uint32 xf, uint32 yf) {
+ uint32 rb0 = p0 & 0x00ff00ff;
+ uint32 ag0 = p0 & 0xff00ff00;
+ uint32 rb1 = p1 & 0x00ff00ff;
+ uint32 ag1 = p1 & 0xff00ff00;
+ uint32 rb2 = p2 & 0x00ff00ff;
+ uint32 ag2 = p2 & 0xff00ff00;
+ uint32 rb3 = p3 & 0x00ff00ff;
+ uint32 ag3 = p3 & 0xff00ff00;
+
+ uint32 rbt = (rb0 + ((( rb1 - rb0 )*xf + 0x00800080) >> 8)) & 0x00ff00ff;
+ uint32 agt = (ag0 + ((((ag1 >> 8) - (ag0 >> 8))*xf + 0x00800080) )) & 0xff00ff00;
+ uint32 rbb = (rb2 + ((( rb3 - rb2 )*xf + 0x00800080) >> 8)) & 0x00ff00ff;
+ uint32 agb = (ag2 + ((((ag3 >> 8) - (ag2 >> 8))*xf + 0x00800080) )) & 0xff00ff00;
+ uint32 rb = (rbt + ((( rbb - rbt )*yf + 0x00800080) >> 8)) & 0x00ff00ff;
+ uint32 ag = (agt + ((((agb >> 8) - (agt >> 8))*yf + 0x00800080) )) & 0xff00ff00;
+
+ return rb + ag;
+ }
+
+ uint32 InterpPlanarY8(const VDPixmap& px, sint32 x1, sint32 y1) {
+ sint32 y = VDPixmapInterpolateSample8To24(px.data, px.pitch, px.w, px.h, x1, y1);
+
+ return VDClampedRoundFixedToUint8Fast((float)(y-0x100000) * (1.1643836f/65536.0f/255.0f))*0x010101;
+ }
+
+ uint32 InterpPlanarYCC888(const VDPixmap& px, sint32 x1, sint32 y1, sint32 x23, sint32 y23, uint32 w23, uint32 h23) {
+ float y = (float)(sint32)VDPixmapInterpolateSample8To24(px.data, px.pitch, px.w, px.h, x1, y1);
+ float cb = (float)(sint32)VDPixmapInterpolateSample8To24(px.data2, px.pitch2, w23, h23, x23, y23);
+ float cr = (float)(sint32)VDPixmapInterpolateSample8To24(px.data3, px.pitch3, w23, h23, x23, y23);
+
+ // ! 1.1643836 - 5.599D-17 1.5960268 - 222.92157 !
+ // ! 1.1643836 - 0.3917623 - 0.8129676 135.57529 !
+ // ! 1.1643836 2.0172321 - 1.110D-16 - 276.83585 !
+ uint32 ir = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y + (1.5960268f/65536.0f/255.0f)*cr - (222.92157f / 255.0f));
+ uint32 ig = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y - (0.3917623f/65536.0f/255.0f)*cb - (0.8129676f/65536.0f/255.0f)*cr + (135.57529f / 255.0f));
+ uint32 ib = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y + (2.0172321f/65536.0f/255.0f)*cb - (276.83585f / 255.0f));
+
+ return (ir << 16) + (ig << 8) + ib;
+ }
+
+ uint32 ConvertYCC72ToRGB24(sint32 iy, sint32 icb, sint32 icr) {
+ float y = (float)iy;
+ float cb = (float)icb;
+ float cr = (float)icr;
+
+ // ! 1.1643836 - 5.599D-17 1.5960268 - 222.92157 !
+ // ! 1.1643836 - 0.3917623 - 0.8129676 135.57529 !
+ // ! 1.1643836 2.0172321 - 1.110D-16 - 276.83585 !
+ uint32 ir = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y + (1.5960268f/65536.0f/255.0f)*cr - (222.92157f / 255.0f));
+ uint32 ig = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y - (0.3917623f/65536.0f/255.0f)*cb - (0.8129676f/65536.0f/255.0f)*cr + (135.57529f / 255.0f));
+ uint32 ib = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y + (2.0172321f/65536.0f/255.0f)*cb - (276.83585f / 255.0f));
+
+ return (ir << 16) + (ig << 8) + ib;
+ }
+
+ uint32 ConvertYCC72ToRGB24_709(sint32 iy, sint32 icb, sint32 icr) {
+ float y = (float)iy;
+ float cb = (float)icb;
+ float cr = (float)icr;
+
+ // ! 1.1643836 - 2.932D-17 1.7927411 - 248.10099 !
+ // ! 1.1643836 - 0.2132486 - 0.5329093 76.87808 !
+ // ! 1.1643836 2.1124018 - 5.551D-17 - 289.01757 !
+ uint32 ir = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y + (1.7927411f/65536.0f/255.0f)*cr - (248.10099f / 255.0f));
+ uint32 ig = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y - (0.2132486f/65536.0f/255.0f)*cb - (0.5329093f/65536.0f/255.0f)*cr + (76.87808f / 255.0f));
+ uint32 ib = VDClampedRoundFixedToUint8Fast((1.1643836f/65536.0f/255.0f)*y + (2.1124018f/65536.0f/255.0f)*cb - (289.01757f / 255.0f));
+
+ return (ir << 16) + (ig << 8) + ib;
+ }
+
+ uint32 SampleV210_Y(const void *src, ptrdiff_t srcpitch, sint32 x, sint32 y, uint32 w, uint32 h) {
+ const uint32 *p = (const uint32 *)((const char *)src + srcpitch*y) + (x / 6)*4;
+
+ switch((uint32)x % 6) {
+ default:
+ case 0: return (p[0] >> 10) & 0x3ff;
+ case 1: return (p[1] >> 0) & 0x3ff;
+ case 2: return (p[1] >> 20) & 0x3ff;
+ case 3: return (p[2] >> 10) & 0x3ff;
+ case 4: return (p[3] >> 0) & 0x3ff;
+ case 5: return (p[3] >> 20) & 0x3ff;
+ }
+ }
+
+ uint32 SampleV210_Cb(const void *src, ptrdiff_t srcpitch, sint32 x, sint32 y, uint32 w, uint32 h) {
+ const uint32 *p = (const uint32 *)((const char *)src + srcpitch*y) + (x / 3)*4;
+
+ switch((uint32)x % 3) {
+ default:
+ case 0: return (p[0] >> 0) & 0x3ff;
+ case 1: return (p[1] >> 10) & 0x3ff;
+ case 2: return (p[2] >> 20) & 0x3ff;
+ }
+ }
+
+ uint32 SampleV210_Cr(const void *src, ptrdiff_t srcpitch, sint32 x, sint32 y, uint32 w, uint32 h) {
+ const uint32 *p = (const uint32 *)((const char *)src + srcpitch*y) + (x / 3)*4;
+
+ switch((uint32)x % 3) {
+ default:
+ case 0: return (p[0] >> 20) & 0x3ff;
+ case 1: return (p[2] >> 0) & 0x3ff;
+ case 2: return (p[3] >> 10) & 0x3ff;
+ }
+ }
+}
+
+uint32 VDPixmapInterpolateSampleRGB24(const VDPixmap& px, sint32 x_256, sint32 y_256) {
+ switch(px.format) {
+ case nsVDPixmap::kPixFormat_Pal1:
+ case nsVDPixmap::kPixFormat_Pal2:
+ case nsVDPixmap::kPixFormat_Pal4:
+ case nsVDPixmap::kPixFormat_Pal8:
+ case nsVDPixmap::kPixFormat_RGB565:
+ case nsVDPixmap::kPixFormat_RGB888:
+ case nsVDPixmap::kPixFormat_XRGB1555:
+ case nsVDPixmap::kPixFormat_XRGB8888:
+ {
+ x_256 -= 128;
+ y_256 -= 128;
+ int ix = x_256 >> 8;
+ int iy = y_256 >> 8;
+ uint32 p0 = VDPixmapSample(px, ix, iy);
+ uint32 p1 = VDPixmapSample(px, ix+1, iy);
+ uint32 p2 = VDPixmapSample(px, ix, iy+1);
+ uint32 p3 = VDPixmapSample(px, ix+1, iy+1);
+
+ return Lerp8888(p0, p1, p2, p3, x_256 & 255, y_256 & 255);
+ }
+ break;
+
+ case nsVDPixmap::kPixFormat_Y8:
+ return InterpPlanarY8(px, x_256, y_256);
+
+ case nsVDPixmap::kPixFormat_YUV422_UYVY:
+ return ConvertYCC72ToRGB24(
+ VDPixmapInterpolateSample8x2To24((const char *)px.data + 1, px.pitch, px.w, px.h, x_256, y_256),
+ VDPixmapInterpolateSample8x4To24((const char *)px.data + 0, px.pitch, (px.w + 1) >> 1, px.h, (x_256 >> 1) + 128, y_256),
+ VDPixmapInterpolateSample8x4To24((const char *)px.data + 2, px.pitch, (px.w + 1) >> 1, px.h, (x_256 >> 1) + 128, y_256)
+ );
+
+ case nsVDPixmap::kPixFormat_YUV422_YUYV:
+ return ConvertYCC72ToRGB24(
+ VDPixmapInterpolateSample8x2To24((const char *)px.data + 0, px.pitch, px.w, px.h, x_256, y_256),
+ VDPixmapInterpolateSample8x4To24((const char *)px.data + 1, px.pitch, (px.w + 1) >> 1, px.h, (x_256 >> 1) + 128, y_256),
+ VDPixmapInterpolateSample8x4To24((const char *)px.data + 3, px.pitch, (px.w + 1) >> 1, px.h, (x_256 >> 1) + 128, y_256)
+ );
+
+ case nsVDPixmap::kPixFormat_YUV444_XVYU:
+ return ConvertYCC72ToRGB24(
+ VDPixmapInterpolateSample8x4To24((const char *)px.data + 1, px.pitch, px.w, px.h, x_256, y_256),
+ VDPixmapInterpolateSample8x4To24((const char *)px.data + 0, px.pitch, px.w, px.h, x_256, y_256),
+ VDPixmapInterpolateSample8x4To24((const char *)px.data + 2, px.pitch, px.w, px.h, x_256, y_256)
+ );
+
+ case nsVDPixmap::kPixFormat_YUV422_UYVY_709:
+ return ConvertYCC72ToRGB24_709(
+ VDPixmapInterpolateSample8x2To24((const char *)px.data + 1, px.pitch, px.w, px.h, x_256, y_256),
+ VDPixmapInterpolateSample8x4To24((const char *)px.data + 0, px.pitch, (px.w + 1) >> 1, px.h, (x_256 >> 1) + 128, y_256),
+ VDPixmapInterpolateSample8x4To24((const char *)px.data + 2, px.pitch, (px.w + 1) >> 1, px.h, (x_256 >> 1) + 128, y_256)
+ );
+
+ case nsVDPixmap::kPixFormat_YUV420_NV12:
+ return ConvertYCC72ToRGB24(
+ VDPixmapInterpolateSample8To24(px.data, px.pitch, px.w, px.h, x_256, y_256),
+ VDPixmapInterpolateSample8x2To24((const char *)px.data2 + 0, px.pitch2, (px.w + 1) >> 1, (px.h + 1) >> 1, (x_256 >> 1) + 128, y_256 >> 1),
+ VDPixmapInterpolateSample8x2To24((const char *)px.data2 + 1, px.pitch2, (px.w + 1) >> 1, (px.h + 1) >> 1, (x_256 >> 1) + 128, y_256 >> 1)
+ );
+
+ case nsVDPixmap::kPixFormat_YUV444_Planar:
+ return InterpPlanarYCC888(px, x_256, y_256, x_256, y_256, px.w, px.h);
+
+ case nsVDPixmap::kPixFormat_YUV422_Planar:
+ return InterpPlanarYCC888(px, x_256, y_256, (x_256 >> 1) + 128, y_256, (px.w + 1) >> 1, px.h);
+
+ case nsVDPixmap::kPixFormat_YUV411_Planar:
+ return InterpPlanarYCC888(px, x_256, y_256, (x_256 >> 2) + 128, y_256, (px.w + 3) >> 2, px.h);
+
+ case nsVDPixmap::kPixFormat_YUV420_Planar:
+ return InterpPlanarYCC888(px, x_256, y_256, (x_256 >> 1) + 128, y_256 >> 1, (px.w + 1) >> 1, (px.h + 1) >> 1);
+
+ case nsVDPixmap::kPixFormat_YUV410_Planar:
+ return InterpPlanarYCC888(px, x_256, y_256, (x_256 >> 2) + 128, y_256 >> 2, (px.w + 3) >> 2, (px.h + 3) >> 2);
+
+ case nsVDPixmap::kPixFormat_YUV420_Planar_Centered:
+ return InterpPlanarYCC888(px, x_256, y_256, x_256 >> 1, y_256 >> 1, (px.w + 1) >> 1, (px.h + 1) >> 1);
+
+ case nsVDPixmap::kPixFormat_YUV422_Planar_Centered:
+ return InterpPlanarYCC888(px, x_256, y_256, x_256 >> 1, y_256, (px.w + 1) >> 1, px.h);
+
+ case nsVDPixmap::kPixFormat_YUV422_Planar_16F:
+ {
+ float y = VDPixmapInterpolateSample16F(px.data, px.pitch, px.w, px.h, x_256, y_256);
+ float cb = VDPixmapInterpolateSample16F(px.data2, px.pitch2, (px.w + 1) >> 1, px.h, (x_256 >> 1) + 128, y_256);
+ float cr = VDPixmapInterpolateSample16F(px.data3, px.pitch3, (px.w + 1) >> 1, px.h, (x_256 >> 1) + 128, y_256);
+
+ uint32 ir = VDClampedRoundFixedToUint8Fast(1.1643836f*y + 1.5960268f*cr - (222.92157f / 255.0f));
+ uint32 ig = VDClampedRoundFixedToUint8Fast(1.1643836f*y - 0.3917623f*cb - 0.8129676f*cr + (135.57529f / 255.0f));
+ uint32 ib = VDClampedRoundFixedToUint8Fast(1.1643836f*y + 2.0172321f*cb - (276.83585f / 255.0f));
+
+ return (ir << 16) + (ig << 8) + ib;
+ }
+
+ case nsVDPixmap::kPixFormat_YUV422_V210:
+ {
+ sint32 luma_x = x_256 - 128;
+ sint32 luma_y = y_256 - 128;
+
+ if (luma_x < 0)
+ luma_x = 0;
+
+ if (luma_y < 0)
+ luma_y = 0;
+
+ if (luma_x > (sint32)((px.w - 1) << 8))
+ luma_x = (sint32)((px.w - 1) << 8);
+
+ if (luma_y > (sint32)((px.h - 1) << 8))
+ luma_y = (sint32)((px.h - 1) << 8);
+
+ sint32 luma_ix = luma_x >> 8;
+ sint32 luma_iy = luma_y >> 8;
+ float luma_fx = (float)(luma_x & 255) * (1.0f / 255.0f);
+ float luma_fy = (float)(luma_y & 255) * (1.0f / 255.0f);
+
+ float y0 = SampleV210_Y(px.data, px.pitch, luma_ix+0, luma_iy+0, px.w, px.h) * (1.0f / 1023.0f);
+ float y1 = SampleV210_Y(px.data, px.pitch, luma_ix+1, luma_iy+0, px.w, px.h) * (1.0f / 1023.0f);
+ float y2 = SampleV210_Y(px.data, px.pitch, luma_ix+0, luma_iy+1, px.w, px.h) * (1.0f / 1023.0f);
+ float y3 = SampleV210_Y(px.data, px.pitch, luma_ix+1, luma_iy+1, px.w, px.h) * (1.0f / 1023.0f);
+ float yt = y0 + (y1 - y0)*luma_fx;
+ float yb = y2 + (y3 - y2)*luma_fx;
+ float yr = yt + (yb - yt)*luma_fy;
+
+ uint32 chroma_w = (px.w + 1) >> 1;
+ uint32 chroma_h = px.h;
+ sint32 chroma_x = x_256 >> 1;
+ sint32 chroma_y = y_256 - 128;
+
+ if (chroma_x < 0)
+ chroma_x = 0;
+
+ if (chroma_y < 0)
+ chroma_y = 0;
+
+ if (chroma_x > (sint32)((chroma_w - 1) << 8))
+ chroma_x = (sint32)((chroma_w - 1) << 8);
+
+ if (chroma_y > (sint32)((chroma_h - 1) << 8))
+ chroma_y = (sint32)((chroma_h - 1) << 8);
+
+ sint32 chroma_ix = chroma_x >> 8;
+ sint32 chroma_iy = chroma_y >> 8;
+ float chroma_fx = (float)(chroma_x & 255) * (1.0f / 255.0f);
+ float chroma_fy = (float)(chroma_y & 255) * (1.0f / 255.0f);
+
+ float cb0 = SampleV210_Cb(px.data, px.pitch, chroma_ix+0, chroma_iy+0, px.w, px.h) * (1.0f / 1023.0f);
+ float cb1 = SampleV210_Cb(px.data, px.pitch, chroma_ix+1, chroma_iy+0, px.w, px.h) * (1.0f / 1023.0f);
+ float cb2 = SampleV210_Cb(px.data, px.pitch, chroma_ix+0, chroma_iy+1, px.w, px.h) * (1.0f / 1023.0f);
+ float cb3 = SampleV210_Cb(px.data, px.pitch, chroma_ix+1, chroma_iy+1, px.w, px.h) * (1.0f / 1023.0f);
+ float cbt = cb0 + (cb1 - cb0)*chroma_fx;
+ float cbb = cb2 + (cb3 - cb2)*chroma_fx;
+ float cbr = cbt + (cbb - cbt)*chroma_fy;
+
+ float cr0 = SampleV210_Cr(px.data, px.pitch, chroma_ix+0, chroma_iy+0, px.w, px.h) * (1.0f / 1023.0f);
+ float cr1 = SampleV210_Cr(px.data, px.pitch, chroma_ix+1, chroma_iy+0, px.w, px.h) * (1.0f / 1023.0f);
+ float cr2 = SampleV210_Cr(px.data, px.pitch, chroma_ix+0, chroma_iy+1, px.w, px.h) * (1.0f / 1023.0f);
+ float cr3 = SampleV210_Cr(px.data, px.pitch, chroma_ix+1, chroma_iy+1, px.w, px.h) * (1.0f / 1023.0f);
+ float crt = cr0 + (cr1 - cr0)*chroma_fx;
+ float crb = cr2 + (cr3 - cr2)*chroma_fx;
+ float crr = crt + (crb - crt)*chroma_fy;
+
+ uint32 ir = VDClampedRoundFixedToUint8Fast(1.1643836f*yr + 1.5960268f*crr - (222.92157f / 255.0f));
+ uint32 ig = VDClampedRoundFixedToUint8Fast(1.1643836f*yr - 0.3917623f*cbr - 0.8129676f*crr + (135.57529f / 255.0f));
+ uint32 ib = VDClampedRoundFixedToUint8Fast(1.1643836f*yr + 2.0172321f*cbr - (276.83585f / 255.0f));
+
+ return (ir << 16) + (ig << 8) + ib;
+ }
+ break;
+
+ default:
+ return 0;
+ }
+}
+
+uint32 VDConvertYCbCrToRGB(uint8 y0, uint8 cb0, uint8 cr0) {
+ sint32 y = y0 - 16;
+ sint32 cb = cb0 - 128;
+ sint32 cr = cr0 - 128;
+
+ sint32 y2 = y * 76309 + 0x8000;
+ sint32 r = y2 + cr * 104597;
+ sint32 g = y2 + cr * -53279 + cb * -25674;
+ sint32 b = y2 + cb * 132201;
+
+ r &= ~(r >> 31);
+ g &= ~(g >> 31);
+ b &= ~(b >> 31);
+ r += (0xffffff - r) & ((0xffffff - r) >> 31);
+ g += (0xffffff - g) & ((0xffffff - g) >> 31);
+ b += (0xffffff - b) & ((0xffffff - b) >> 31);
+
+ return (r & 0xff0000) + ((g & 0xff0000) >> 8) + (b >> 16);
+}
+
+uint32 VDConvertRGBToYCbCr(uint32 c) {
+ return VDConvertRGBToYCbCr((uint8)(c >> 16), (uint8)(c >> 8), (uint8)c);
+}
+
+uint32 VDConvertRGBToYCbCr(uint8 r8, uint8 g8, uint8 b8) {
+ sint32 r = r8;
+ sint32 g = g8;
+ sint32 b = b8;
+ sint32 yt = 1052*r + 2065*g + 401*b;
+ sint32 y = (yt + 0x10800) >> 4;
+ sint32 cr = (10507932*r - yt*2987 + 0x80800000U) >> 8;
+ sint32 cb = ( 8312025*b - yt*2363 + 0x80800000U) >> 24;
+
+ return (uint8)cb + (y & 0xff00) + (cr&0xff0000);
+} \ No newline at end of file
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/pixmaputils.cpp b/src/thirdparty/VirtualDub/Kasumi/source/pixmaputils.cpp
new file mode 100644
index 000000000..635cbf3c0
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/pixmaputils.cpp
@@ -0,0 +1,519 @@
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/system/memory.h>
+
+extern VDPixmapFormatInfo g_vdPixmapFormats[] = {
+ // name qchnk qw qh qwb qhb qs ab aw ah as ps
+ /* Null */ { "null", false, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
+ /* Pal1 */ { "Pal1", true, 8, 1, 3, 0, 1, 0, 0, 0, 0, 2 },
+ /* Pal2 */ { "Pal2", true, 4, 1, 2, 0, 1, 0, 0, 0, 0, 4 },
+ /* Pal4 */ { "Pal4", true, 2, 1, 1, 0, 1, 0, 0, 0, 0, 16 },
+ /* Pal8 */ { "Pal8", false, 1, 1, 0, 0, 1, 0, 0, 0, 0, 256 },
+ /* RGB16_555 */ { "XRGB1555", false, 1, 1, 0, 0, 2, 0, 0, 0, 0, 0 },
+ /* RGB16_565 */ { "RGB565", false, 1, 1, 0, 0, 2, 0, 0, 0, 0, 0 },
+ /* RGB24 */ { "RGB888", false, 1, 1, 0, 0, 3, 0, 0, 0, 0, 0 },
+ /* RGB32 */ { "XRGB8888", false, 1, 1, 0, 0, 4, 0, 0, 0, 0, 0 },
+ /* Y8 */ { "Y8", false, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 },
+ /* YUV422_UYVY */ { "UYVY", true, 2, 1, 1, 0, 4, 0, 0, 0, 0, 0 },
+ /* YUV422_YUYV */ { "YUYV", true, 2, 1, 1, 0, 4, 0, 0, 0, 0, 0 },
+ /* YUV444_XVYU */ { "XVYU", false, 1, 1, 0, 0, 4, 0, 0, 0, 0, 0 },
+ /* YUV444_Planar */ { "YUV444", false, 1, 1, 0, 0, 1, 2, 0, 0, 1, 0 },
+ /* YUV422_Planar */ { "YUV422", false, 1, 1, 0, 0, 1, 2, 1, 0, 1, 0 },
+ /* YUV420_Planar */ { "YUV420", false, 1, 1, 0, 0, 1, 2, 1, 1, 1, 0 },
+ /* YUV411_Planar */ { "YUV411", false, 1, 1, 0, 0, 1, 2, 2, 0, 1, 0 },
+ /* YUV410_Planar */ { "YUV410", false, 1, 1, 0, 0, 1, 2, 2, 2, 1, 0 },
+ /* YUV422_Planar_Centered */ { "YUV422C", false, 1, 1, 0, 0, 1, 2, 1, 0, 1, 0 },
+ /* YUV420_Planar_Centered */ { "YUV420C", false, 1, 1, 0, 0, 1, 2, 1, 1, 1, 0 },
+ /* YUV422_Planar_16F */ { "YUV422_16F", false, 1, 1, 0, 0, 2, 2, 1, 0, 2, 0 },
+ /* V210 */ { "v210", true,24, 1, 2, 0, 64, 0, 0, 0, 1, 0 },
+ /* YUV422_UYVY_709 */ { "UYVY-709", true, 2, 1, 1, 0, 4, 0, 0, 0, 0, 0 },
+ /* NV12 */ { "NV12", false, 1, 1, 0, 0, 1, 1, 1, 1, 2, 0 },
+};
+
+#ifdef _DEBUG
+ bool VDIsValidPixmapPlane(const void *p, ptrdiff_t pitch, vdpixsize w, vdpixsize h) {
+ bool isvalid;
+
+ if (pitch < 0)
+ isvalid = VDIsValidReadRegion((const char *)p + pitch*(h-1), (-pitch)*(h-1)+w);
+ else
+ isvalid = VDIsValidReadRegion(p, pitch*(h-1)+w);
+
+ if (!isvalid) {
+ VDDEBUG("Kasumi: Invalid pixmap plane detected.\n"
+ " Base=%p, pitch=%d, size=%dx%d (bytes)\n", p, (int)pitch, w, h);
+ }
+
+ return isvalid;
+ }
+
+ bool VDAssertValidPixmap(const VDPixmap& px) {
+ const VDPixmapFormatInfo& info = VDPixmapGetInfo(px.format);
+
+ if (px.format) {
+ if (!VDIsValidPixmapPlane(px.data, px.pitch, -(-px.w / info.qw)*info.qsize, -(-px.h >> info.qhbits))) {
+ VDDEBUG("Kasumi: Invalid primary plane detected in pixmap.\n"
+ " Pixmap info: format=%d (%s), dimensions=%dx%d\n", px.format, info.name, px.w, px.h);
+ VDASSERT(!"Kasumi: Invalid primary plane detected in pixmap.\n");
+ return false;
+ }
+
+ if (info.palsize)
+ if (!VDIsValidReadRegion(px.palette, sizeof(uint32) * info.palsize)) {
+ VDDEBUG("Kasumi: Invalid palette detected in pixmap.\n"
+ " Pixmap info: format=%d (%s), dimensions=%dx%d\n", px.format, info.name, px.w, px.h);
+ VDASSERT(!"Kasumi: Invalid palette detected in pixmap.\n");
+ return false;
+ }
+
+ if (info.auxbufs) {
+ const vdpixsize auxw = -(-px.w >> info.auxwbits);
+ const vdpixsize auxh = -(-px.h >> info.auxhbits);
+
+ if (!VDIsValidPixmapPlane(px.data2, px.pitch2, auxw * info.auxsize, auxh)) {
+ VDDEBUG("Kasumi: Invalid Cb plane detected in pixmap.\n"
+ " Pixmap info: format=%d (%s), dimensions=%dx%d\n", px.format, info.name, px.w, px.h);
+ VDASSERT(!"Kasumi: Invalid Cb plane detected in pixmap.\n");
+ return false;
+ }
+
+ if (info.auxbufs > 2) {
+ if (!VDIsValidPixmapPlane(px.data3, px.pitch3, auxw * info.auxsize, auxh)) {
+ VDDEBUG("Kasumi: Invalid Cr plane detected in pixmap.\n"
+ " Pixmap info: format=%d, dimensions=%dx%d\n", px.format, px.w, px.h);
+ VDASSERT(!"Kasumi: Invalid Cr plane detected in pixmap.\n");
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+ }
+#endif
+
+VDPixmap VDPixmapOffset(const VDPixmap& src, vdpixpos x, vdpixpos y) {
+ VDPixmap temp(src);
+ const VDPixmapFormatInfo& info = VDPixmapGetInfo(temp.format);
+
+ if (info.qchunky) {
+ x = (x + info.qw - 1) / info.qw;
+ y >>= info.qhbits;
+ }
+
+ switch(info.auxbufs) {
+ case 2:
+ temp.data3 = (char *)temp.data3 + (x >> info.auxwbits)*info.auxsize + (y >> info.auxhbits)*temp.pitch3;
+ case 1:
+ temp.data2 = (char *)temp.data2 + (x >> info.auxwbits)*info.auxsize + (y >> info.auxhbits)*temp.pitch2;
+ case 0:
+ temp.data = (char *)temp.data + x*info.qsize + y*temp.pitch;
+ }
+
+ return temp;
+}
+
+VDPixmapLayout VDPixmapLayoutOffset(const VDPixmapLayout& src, vdpixpos x, vdpixpos y) {
+ VDPixmapLayout temp(src);
+ const VDPixmapFormatInfo& info = VDPixmapGetInfo(temp.format);
+
+ if (info.qchunky) {
+ x = (x + info.qw - 1) / info.qw;
+ y = -(-y >> info.qhbits);
+ }
+
+ switch(info.auxbufs) {
+ case 2:
+ temp.data3 += -(-x >> info.auxwbits)*info.auxsize + -(-y >> info.auxhbits)*temp.pitch3;
+ case 1:
+ temp.data2 += -(-x >> info.auxwbits)*info.auxsize + -(-y >> info.auxhbits)*temp.pitch2;
+ case 0:
+ temp.data += x*info.qsize + y*temp.pitch;
+ }
+
+ return temp;
+}
+
+uint32 VDPixmapCreateLinearLayout(VDPixmapLayout& layout, int format, vdpixsize w, vdpixsize h, int alignment) {
+ const ptrdiff_t alignmask = alignment - 1;
+
+ const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(format);
+ sint32 qw = (w + srcinfo.qw - 1) / srcinfo.qw;
+ sint32 qh = -(-h >> srcinfo.qhbits);
+ sint32 subw = -(-w >> srcinfo.auxwbits);
+ sint32 subh = -(-h >> srcinfo.auxhbits);
+ sint32 auxsize = srcinfo.auxsize;
+
+ ptrdiff_t mainpitch = (srcinfo.qsize * qw + alignmask) & ~alignmask;
+ size_t mainsize = mainpitch * qh;
+
+ layout.data = 0;
+ layout.pitch = mainpitch;
+ layout.palette = NULL;
+ layout.data2 = 0;
+ layout.pitch2 = 0;
+ layout.data3 = 0;
+ layout.pitch3 = 0;
+ layout.w = w;
+ layout.h = h;
+ layout.format = format;
+
+ if (srcinfo.auxbufs >= 1) {
+ ptrdiff_t subpitch = (subw * auxsize + alignmask) & ~alignmask;
+ size_t subsize = subpitch * subh;
+
+ layout.data2 = mainsize;
+ layout.pitch2 = subpitch;
+ mainsize += subsize;
+
+ if (srcinfo.auxbufs >= 2) {
+ layout.data3 = mainsize;
+ layout.pitch3 = subpitch;
+ mainsize += subsize;
+ }
+ }
+
+ return mainsize;
+}
+
+void VDPixmapFlipV(VDPixmap& px) {
+ const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(px.format);
+ sint32 w = px.w;
+ sint32 h = px.h;
+ sint32 qw = (w + srcinfo.qw - 1) / srcinfo.qw;
+ sint32 qh = -(-h >> srcinfo.qhbits);
+ sint32 subh = -(-h >> srcinfo.auxhbits);
+
+ vdptrstep(px.data, px.pitch * (qh - 1));
+ px.pitch = -px.pitch;
+
+ if (srcinfo.auxbufs >= 1) {
+ vdptrstep(px.data2, px.pitch2 * (subh - 1));
+ px.pitch2 = -px.pitch2;
+
+ if (srcinfo.auxbufs >= 2) {
+ vdptrstep(px.data3, px.pitch3 * (subh - 1));
+ px.pitch3 = -px.pitch3;
+ }
+ }
+}
+
+void VDPixmapLayoutFlipV(VDPixmapLayout& layout) {
+ const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(layout.format);
+ sint32 w = layout.w;
+ sint32 h = layout.h;
+ sint32 qw = (w + srcinfo.qw - 1) / srcinfo.qw;
+ sint32 qh = -(-h >> srcinfo.qhbits);
+ sint32 subh = -(-h >> srcinfo.auxhbits);
+
+ layout.data += layout.pitch * (qh - 1);
+ layout.pitch = -layout.pitch;
+
+ if (srcinfo.auxbufs >= 1) {
+ layout.data2 += layout.pitch2 * (subh - 1);
+ layout.pitch2 = -layout.pitch2;
+
+ if (srcinfo.auxbufs >= 2) {
+ layout.data3 += layout.pitch3 * (subh - 1);
+ layout.pitch3 = -layout.pitch3;
+ }
+ }
+}
+
+uint32 VDPixmapLayoutGetMinSize(const VDPixmapLayout& layout) {
+ const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(layout.format);
+ sint32 w = layout.w;
+ sint32 h = layout.h;
+ sint32 qw = (w + srcinfo.qw - 1) / srcinfo.qw;
+ sint32 qh = -(-h >> srcinfo.qhbits);
+ sint32 subh = -(-h >> srcinfo.auxhbits);
+
+ uint32 limit = layout.data;
+ if (layout.pitch >= 0)
+ limit += layout.pitch * qh;
+ else
+ limit -= layout.pitch;
+
+ if (srcinfo.auxbufs >= 1) {
+ uint32 limit2 = layout.data2;
+
+ if (layout.pitch2 >= 0)
+ limit2 += layout.pitch2 * subh;
+ else
+ limit2 -= layout.pitch2;
+
+ if (limit < limit2)
+ limit = limit2;
+
+ if (srcinfo.auxbufs >= 2) {
+ uint32 limit3 = layout.data3;
+
+ if (layout.pitch3 >= 0)
+ limit3 += layout.pitch3 * subh;
+ else
+ limit3 -= layout.pitch3;
+
+ if (limit < limit3)
+ limit = limit3;
+ }
+ }
+
+ return limit;
+}
+
+VDPixmap VDPixmapExtractField(const VDPixmap& src, bool field2) {
+ VDPixmap px(src);
+
+ if (field2) {
+ const VDPixmapFormatInfo& info = VDPixmapGetInfo(px.format);
+
+ if (px.data) {
+ if (info.qh == 1)
+ vdptrstep(px.data, px.pitch);
+
+ if (!info.auxhbits) {
+ vdptrstep(px.data2, px.pitch2);
+ vdptrstep(px.data3, px.pitch3);
+ }
+ }
+ }
+
+ px.h >>= 1;
+ px.pitch += px.pitch;
+ px.pitch2 += px.pitch2;
+ px.pitch3 += px.pitch3;
+ return px;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDPixmapBuffer::VDPixmapBuffer(const VDPixmap& src)
+ : mpBuffer(NULL)
+ , mLinearSize(0)
+{
+ assign(src);
+}
+
+VDPixmapBuffer::VDPixmapBuffer(const VDPixmapBuffer& src)
+ : mpBuffer(NULL)
+ , mLinearSize(0)
+{
+ assign(src);
+}
+
+VDPixmapBuffer::VDPixmapBuffer(const VDPixmapLayout& layout) {
+ init(layout);
+}
+
+VDPixmapBuffer::~VDPixmapBuffer() {
+#ifdef _DEBUG
+ validate();
+#endif
+
+ delete[] mpBuffer;
+}
+
+void VDPixmapBuffer::init(sint32 width, sint32 height, int f) {
+ const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(f);
+ sint32 qw = (width + srcinfo.qw - 1) / srcinfo.qw;
+ sint32 qh = -(-height >> srcinfo.qhbits);
+ sint32 subw = -(-width >> srcinfo.auxwbits);
+ sint32 subh = -(-height >> srcinfo.auxhbits);
+ ptrdiff_t mainpitch = (srcinfo.qsize * qw + 15) & ~15;
+ ptrdiff_t subpitch = (srcinfo.auxsize * subw + 15) & ~15;
+ size_t mainsize = mainpitch * qh;
+ size_t subsize = subpitch * subh;
+ size_t totalsize = mainsize + subsize*srcinfo.auxbufs + 4 * srcinfo.palsize;
+
+#ifdef _DEBUG
+ totalsize += 28;
+#endif
+
+ if (mLinearSize != totalsize) {
+ clear();
+ mpBuffer = new char[totalsize + 15];
+ mLinearSize = totalsize;
+ }
+
+ char *p = mpBuffer + (-(int)(uintptr)mpBuffer & 15);
+
+#ifdef _DEBUG
+ *(uint32 *)p = totalsize;
+ for(int i=0; i<12; ++i)
+ p[4+i] = (char)(0xa0 + i);
+
+ p += 16;
+#endif
+
+ data = p;
+ pitch = mainpitch;
+ p += mainsize;
+
+ palette = NULL;
+ data2 = NULL;
+ pitch2 = NULL;
+ data3 = NULL;
+ pitch3 = NULL;
+ w = width;
+ h = height;
+ format = f;
+
+ if (srcinfo.auxbufs >= 1) {
+ data2 = p;
+ pitch2 = subpitch;
+ p += subsize;
+ }
+
+ if (srcinfo.auxbufs >= 2) {
+ data3 = p;
+ pitch3 = subpitch;
+ p += subsize;
+ }
+
+ if (srcinfo.palsize) {
+ palette = (const uint32 *)p;
+ p += srcinfo.palsize * 4;
+ }
+
+#ifdef _DEBUG
+ for(int j=0; j<12; ++j)
+ p[j] = (char)(0xb0 + j);
+#endif
+}
+
+void VDPixmapBuffer::init(const VDPixmapLayout& layout) {
+ const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(layout.format);
+ sint32 qw = (layout.w + srcinfo.qw - 1) / srcinfo.qw;
+ sint32 qh = -(-layout.h >> srcinfo.qhbits);
+ sint32 subw = -(-layout.w >> srcinfo.auxwbits);
+ sint32 subh = -(-layout.h >> srcinfo.auxhbits);
+
+ ptrdiff_t mino=0, maxo=0;
+
+ if (layout.pitch < 0) {
+ mino = std::min<ptrdiff_t>(mino, layout.data + layout.pitch * (qh-1));
+ maxo = std::max<ptrdiff_t>(maxo, layout.data - layout.pitch);
+ } else {
+ mino = std::min<ptrdiff_t>(mino, layout.data);
+ maxo = std::max<ptrdiff_t>(maxo, layout.data + layout.pitch*qh);
+ }
+
+ if (srcinfo.auxbufs >= 1) {
+ if (layout.pitch2 < 0) {
+ mino = std::min<ptrdiff_t>(mino, layout.data2 + layout.pitch2 * (subh-1));
+ maxo = std::max<ptrdiff_t>(maxo, layout.data2 - layout.pitch2);
+ } else {
+ mino = std::min<ptrdiff_t>(mino, layout.data2);
+ maxo = std::max<ptrdiff_t>(maxo, layout.data2 + layout.pitch2*subh);
+ }
+
+ if (srcinfo.auxbufs >= 2) {
+ if (layout.pitch3 < 0) {
+ mino = std::min<ptrdiff_t>(mino, layout.data3 + layout.pitch3 * (subh-1));
+ maxo = std::max<ptrdiff_t>(maxo, layout.data3 - layout.pitch3);
+ } else {
+ mino = std::min<ptrdiff_t>(mino, layout.data3);
+ maxo = std::max<ptrdiff_t>(maxo, layout.data3 + layout.pitch3*subh);
+ }
+ }
+ }
+
+ ptrdiff_t linsize = ((maxo - mino + 3) & ~(uintptr)3);
+
+ ptrdiff_t totalsize = linsize + 4*srcinfo.palsize;
+
+#ifdef _DEBUG
+ totalsize += 28;
+#endif
+
+ if (mLinearSize != totalsize) {
+ clear();
+ mpBuffer = new char[totalsize + 15];
+ mLinearSize = totalsize;
+ }
+
+ char *p = mpBuffer + (-(int)(uintptr)mpBuffer & 15);
+
+#ifdef _DEBUG
+ *(uint32 *)p = totalsize - 28;
+ for(int i=0; i<12; ++i)
+ p[4+i] = (char)(0xa0 + i);
+
+ p += 16;
+#endif
+
+ w = layout.w;
+ h = layout.h;
+ format = layout.format;
+ data = p + layout.data - mino;
+ data2 = p + layout.data2 - mino;
+ data3 = p + layout.data3 - mino;
+ pitch = layout.pitch;
+ pitch2 = layout.pitch2;
+ pitch3 = layout.pitch3;
+ palette = NULL;
+
+ if (srcinfo.palsize) {
+ palette = (const uint32 *)(p + linsize);
+ memcpy((void *)palette, layout.palette, 4*srcinfo.palsize);
+ }
+
+#ifdef _DEBUG
+ for(int j=0; j<12; ++j)
+ p[totalsize + j - 28] = (char)(0xb0 + j);
+#endif
+
+ VDAssertValidPixmap(*this);
+}
+
+void VDPixmapBuffer::assign(const VDPixmap& src) {
+ if (!src.format) {
+ delete[] mpBuffer;
+ mpBuffer = NULL;
+ data = NULL;
+ format = 0;
+ } else {
+ init(src.w, src.h, src.format);
+
+ const VDPixmapFormatInfo& srcinfo = VDPixmapGetInfo(src.format);
+ int qw = (src.w + srcinfo.qw - 1) / srcinfo.qw;
+ int qh = -(-src.h >> srcinfo.qhbits);
+ int subw = -(-src.w >> srcinfo.auxwbits);
+ int subh = -(-src.h >> srcinfo.auxhbits);
+
+ if (srcinfo.palsize)
+ memcpy((void *)palette, src.palette, 4 * srcinfo.palsize);
+
+ switch(srcinfo.auxbufs) {
+ case 2:
+ VDMemcpyRect(data3, pitch3, src.data3, src.pitch3, subw, subh);
+ case 1:
+ VDMemcpyRect(data2, pitch2, src.data2, src.pitch2, subw, subh);
+ case 0:
+ VDMemcpyRect(data, pitch, src.data, src.pitch, qw * srcinfo.qsize, qh);
+ }
+ }
+}
+
+void VDPixmapBuffer::swap(VDPixmapBuffer& dst) {
+ std::swap(mpBuffer, dst.mpBuffer);
+ std::swap(mLinearSize, dst.mLinearSize);
+ std::swap(static_cast<VDPixmap&>(*this), static_cast<VDPixmap&>(dst));
+}
+
+#ifdef _DEBUG
+void VDPixmapBuffer::validate() {
+ if (mpBuffer) {
+ char *p = (char *)(((uintptr)mpBuffer + 15) & ~(uintptr)15);
+
+ // verify head bytes
+ for(int i=0; i<12; ++i)
+ if (p[i+4] != (char)(0xa0 + i))
+ VDASSERT(!"VDPixmapBuffer: Buffer underflow detected.\n");
+
+ // verify tail bytes
+ for(int j=0; j<12; ++j)
+ if (p[mLinearSize - 12 + j] != (char)(0xb0 + j))
+ VDASSERT(!"VDPixmapBuffer: Buffer overflow detected.\n");
+ }
+}
+#endif \ No newline at end of file
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/region.cpp b/src/thirdparty/VirtualDub/Kasumi/source/region.cpp
new file mode 100644
index 000000000..283f43cf8
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/region.cpp
@@ -0,0 +1,1334 @@
+// VirtualDub - Video processing and capture application
+// Graphics support library
+// Copyright (C) 1998-2007 Avery Lee
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/Kasumi/region.h>
+#include <vd2/system/math.h>
+#include <vd2/system/vdstl.h>
+
+void VDPixmapRegion::swap(VDPixmapRegion& x) {
+ mSpans.swap(x.mSpans);
+ std::swap(mBounds, x.mBounds);
+}
+
+VDPixmapPathRasterizer::VDPixmapPathRasterizer()
+ : mpEdgeBlocks(NULL)
+ , mpFreeEdgeBlocks(NULL)
+ , mEdgeBlockIdx(kEdgeBlockMax)
+ , mpScanBuffer(NULL)
+{
+ ClearScanBuffer();
+}
+
+VDPixmapPathRasterizer::VDPixmapPathRasterizer(const VDPixmapPathRasterizer&)
+ : mpEdgeBlocks(NULL)
+ , mpFreeEdgeBlocks(NULL)
+ , mEdgeBlockIdx(kEdgeBlockMax)
+ , mpScanBuffer(NULL)
+{
+ ClearScanBuffer();
+}
+
+VDPixmapPathRasterizer::~VDPixmapPathRasterizer() {
+ Clear();
+ FreeEdgeLists();
+}
+
+VDPixmapPathRasterizer& VDPixmapPathRasterizer::operator=(const VDPixmapPathRasterizer&) {
+ return *this;
+}
+
+void VDPixmapPathRasterizer::Clear() {
+ ClearEdgeList();
+ ClearScanBuffer();
+}
+
+void VDPixmapPathRasterizer::QuadraticBezier(const vdint2 *pts) {
+ int x0 = pts[0].x;
+ int x1 = pts[1].x;
+ int x2 = pts[2].x;
+ int y0 = pts[0].y;
+ int y1 = pts[1].y;
+ int y2 = pts[2].y;
+
+ // P = (1-t)^2*P0 + 2t(1-t)*P1 + t^2*P2
+ // P = (1-2t+t^2)P0 + 2(t-t^2)P1 + t^2*P2
+ // P = (P0-2P1+P2)t^2 + 2(P1-P0)t + P0
+
+ int cx2 = x0-2*x1+x2;
+ int cx1 = -2*x0+2*x1;
+ int cx0 = x0;
+
+ int cy2 = y0-2*y1+y2;
+ int cy1 = -2*y0+2*y1;
+ int cy0 = y0;
+
+ // This equation is from Graphics Gems I.
+ //
+ // The idea is that since we're approximating a cubic curve with lines,
+ // any error we incur is due to the curvature of the line, which we can
+ // estimate by calculating the maximum acceleration of the curve. For
+ // a cubic, the acceleration (second derivative) is a line, meaning that
+ // the absolute maximum acceleration must occur at either the beginning
+ // (|c2|) or the end (|c2+c3|). Our bounds here are a little more
+ // conservative than that, but that's okay.
+ //
+ // If the acceleration of the parametric formula is zero (c2 = c3 = 0),
+ // that component of the curve is linear and does not incur any error.
+ // If a=0 for both X and Y, the curve is a line segment and we can
+ // use a step size of 1.
+
+ int maxaccel1 = abs(cy2);
+ int maxaccel2 = abs(cx2);
+
+ int maxaccel = maxaccel1 > maxaccel2 ? maxaccel1 : maxaccel2;
+ int h = 1;
+
+ while(maxaccel > 8 && h < 1024) {
+ maxaccel >>= 2;
+ h += h;
+ }
+
+ int lastx = x0;
+ int lasty = y0;
+
+ // compute forward differences
+ sint64 h1 = (sint64)(0x40000000 / h) << 2;
+ sint64 h2 = h1/h;
+
+ sint64 ax0 = (sint64)cx0 << 32;
+ sint64 ax1 = h1*(sint64)cx1 + h2*(sint64)cx2;
+ sint64 ax2 = 2*h2*(sint64)cx2;
+
+ sint64 ay0 = (sint64)cy0 << 32;
+ sint64 ay1 = h1*(sint64)cy1 + h2*(sint64)cy2;
+ sint64 ay2 = 2*h2*(sint64)cy2;
+
+ // round, not truncate
+ ax0 += 0x80000000;
+ ay0 += 0x80000000;
+
+ do {
+ ax0 += ax1;
+ ax1 += ax2;
+ ay0 += ay1;
+ ay1 += ay2;
+
+ int xi = (int)((uint64)ax0 >> 32);
+ int yi = (int)((uint64)ay0 >> 32);
+
+ FastLine(lastx, lasty, xi, yi);
+ lastx = xi;
+ lasty = yi;
+ } while(--h);
+}
+
+void VDPixmapPathRasterizer::CubicBezier(const vdint2 *pts) {
+ int x0 = pts[0].x;
+ int x1 = pts[1].x;
+ int x2 = pts[2].x;
+ int x3 = pts[3].x;
+ int y0 = pts[0].y;
+ int y1 = pts[1].y;
+ int y2 = pts[2].y;
+ int y3 = pts[3].y;
+
+ int cx3 = - x0+3*x1-3*x2+x3;
+ int cx2 = 3*x0-6*x1+3*x2;
+ int cx1 = -3*x0+3*x1;
+ int cx0 = x0;
+
+ int cy3 = - y0+3*y1-3*y2+y3;
+ int cy2 = 3*y0-6*y1+3*y2;
+ int cy1 = -3*y0+3*y1;
+ int cy0 = y0;
+
+ // This equation is from Graphics Gems I.
+ //
+ // The idea is that since we're approximating a cubic curve with lines,
+ // any error we incur is due to the curvature of the line, which we can
+ // estimate by calculating the maximum acceleration of the curve. For
+ // a cubic, the acceleration (second derivative) is a line, meaning that
+ // the absolute maximum acceleration must occur at either the beginning
+ // (|c2|) or the end (|c2+c3|). Our bounds here are a little more
+ // conservative than that, but that's okay.
+ //
+ // If the acceleration of the parametric formula is zero (c2 = c3 = 0),
+ // that component of the curve is linear and does not incur any error.
+ // If a=0 for both X and Y, the curve is a line segment and we can
+ // use a step size of 1.
+
+ int maxaccel1 = abs(2*cy2) + abs(6*cy3);
+ int maxaccel2 = abs(2*cx2) + abs(6*cx3);
+
+ int maxaccel = maxaccel1 > maxaccel2 ? maxaccel1 : maxaccel2;
+ int h = 1;
+
+ while(maxaccel > 8 && h < 1024) {
+ maxaccel >>= 2;
+ h += h;
+ }
+
+ int lastx = x0;
+ int lasty = y0;
+
+ // compute forward differences
+ sint64 h1 = (sint64)(0x40000000 / h) << 2;
+ sint64 h2 = h1/h;
+ sint64 h3 = h2/h;
+
+ sint64 ax0 = (sint64)cx0 << 32;
+ sint64 ax1 = h1*(sint64)cx1 + h2*(sint64)cx2 + h3*(sint64)cx3;
+ sint64 ax2 = 2*h2*(sint64)cx2 + 6*h3*(sint64)cx3;
+ sint64 ax3 = 6*h3*(sint64)cx3;
+
+ sint64 ay0 = (sint64)cy0 << 32;
+ sint64 ay1 = h1*(sint64)cy1 + h2*(sint64)cy2 + h3*(sint64)cy3;
+ sint64 ay2 = 2*h2*(sint64)cy2 + 6*h3*(sint64)cy3;
+ sint64 ay3 = 6*h3*(sint64)cy3;
+
+ // round, not truncate
+ ax0 += 0x80000000;
+ ay0 += 0x80000000;
+
+ do {
+ ax0 += ax1;
+ ax1 += ax2;
+ ax2 += ax3;
+ ay0 += ay1;
+ ay1 += ay2;
+ ay2 += ay3;
+
+ int xi = (int)((uint64)ax0 >> 32);
+ int yi = (int)((uint64)ay0 >> 32);
+
+ FastLine(lastx, lasty, xi, yi);
+ lastx = xi;
+ lasty = yi;
+ } while(--h);
+}
+
+void VDPixmapPathRasterizer::Line(const vdint2& pt1, const vdint2& pt2) {
+ FastLine(pt1.x, pt1.y, pt2.x, pt2.y);
+}
+
+void VDPixmapPathRasterizer::FastLine(int x0, int y0, int x1, int y1) {
+ int flag = 1;
+
+ if (y1 == y0)
+ return;
+
+ if (y1 < y0) {
+ int t;
+
+ t=x0; x0=x1; x1=t;
+ t=y0; y0=y1; y1=t;
+ flag = 0;
+ }
+
+ int dy = y1-y0;
+ int xacc = x0<<13;
+
+ // prestep y0 down
+ int iy0 = (y0+3) >> 3;
+ int iy1 = (y1+3) >> 3;
+
+ if (iy0 < iy1) {
+ int invslope = (x1-x0)*65536/dy;
+
+ int prestep = (4-y0) & 7;
+ xacc += (invslope * prestep)>>3;
+
+ if (iy0 < mScanYMin || iy1 > mScanYMax) {
+ ReallocateScanBuffer(iy0, iy1);
+ VDASSERT(iy0 >= mScanYMin && iy1 <= mScanYMax);
+ }
+
+ while(iy0 < iy1) {
+ int ix = (xacc+32767)>>16;
+
+ if (mEdgeBlockIdx >= kEdgeBlockMax) {
+ if (mpFreeEdgeBlocks) {
+ EdgeBlock *newBlock = mpFreeEdgeBlocks;
+ mpFreeEdgeBlocks = mpFreeEdgeBlocks->next;
+ newBlock->next = mpEdgeBlocks;
+ mpEdgeBlocks = newBlock;
+ } else {
+ mpEdgeBlocks = new EdgeBlock(mpEdgeBlocks);
+ }
+
+ mEdgeBlockIdx = 0;
+ }
+
+ Edge& e = mpEdgeBlocks->edges[mEdgeBlockIdx];
+ Scan& s = mpScanBufferBiased[iy0];
+ VDASSERT(iy0 >= mScanYMin && iy0 < mScanYMax);
+ ++mEdgeBlockIdx;
+
+ e.posandflag = ix*2+flag;
+ e.next = s.chain;
+ s.chain = &e;
+ ++s.count;
+
+ ++iy0;
+ xacc += invslope;
+ }
+ }
+}
+
+void VDPixmapPathRasterizer::ScanConvert(VDPixmapRegion& region) {
+ // Convert the edges to spans. We couldn't do this before because some of
+ // the regions may have winding numbers >+1 and it would have been a pain
+ // to try to adjust the spans on the fly. We use one heap to detangle
+ // a scanline's worth of edges from the singly-linked lists, and another
+ // to collect the actual scans.
+ vdfastvector<int> heap;
+
+ region.mSpans.clear();
+ int xmin = INT_MAX;
+ int xmax = INT_MIN;
+ int ymin = INT_MAX;
+ int ymax = INT_MIN;
+
+ for(int y=mScanYMin; y<mScanYMax; ++y) {
+ uint32 flipcount = mpScanBufferBiased[y].count;
+
+ if (!flipcount)
+ continue;
+
+ // Keep the edge heap from doing lots of stupid little reallocates.
+ if (heap.capacity() < flipcount)
+ heap.resize((flipcount + 63)&~63);
+
+ // Detangle scanline into edge heap.
+ int *heap0 = heap.data();
+ int *heap1 = heap0;
+ for(const Edge *ptr = mpScanBufferBiased[y].chain; ptr; ptr = ptr->next)
+ *heap1++ = ptr->posandflag;
+
+ VDASSERT(heap1 - heap0 == flipcount);
+
+ // Sort edge heap. Note that we conveniently made the opening edges
+ // one more than closing edges at the same spot, so we won't have any
+ // problems with abutting spans.
+
+ std::sort(heap0, heap1);
+
+#if 0
+ while(heap0 != heap1) {
+ int x = *heap0++ >> 1;
+ region.mSpans.push_back((y<<16) + x + 0x80008000);
+ region.mSpans.push_back((y<<16) + x + 0x80008001);
+ }
+ continue;
+#endif
+
+ // Trim any odd edges off, since we can never close on one.
+ if (flipcount & 1)
+ --heap1;
+
+ // Process edges and add spans. Since we only check for a non-zero
+ // winding number, it doesn't matter which way the outlines go. Also, since
+ // the parity always flips after each edge regardless of direction, we can
+ // process the edges in pairs.
+
+ size_t spanstart = region.mSpans.size();
+
+ int x_left;
+ int count = 0;
+ while(heap0 != heap1) {
+ int x = *heap0++;
+
+ if (!count)
+ x_left = (x>>1);
+
+ count += (x&1);
+
+ x = *heap0++;
+
+ count += (x&1);
+
+ if (!--count) {
+ int x_right = (x>>1);
+
+ if (x_right > x_left) {
+ region.mSpans.push_back((y<<16) + x_left + 0x80008000);
+ region.mSpans.push_back((y<<16) + x_right + 0x80008000);
+
+ }
+ }
+ }
+
+ size_t spanend = region.mSpans.size();
+
+ if (spanend > spanstart) {
+ if (ymin > y)
+ ymin = y;
+
+ if (ymax < y)
+ ymax = y;
+
+ int x1 = (region.mSpans[spanstart] & 0xffff) - 0x8000;
+ int x2 = (region.mSpans[spanend-1] & 0xffff) - 0x8000;
+
+ if (xmin > x1)
+ xmin = x1;
+
+ if (xmax < x2)
+ xmax = x2;
+ }
+ }
+
+ if (xmax > xmin) {
+ region.mBounds.set(xmin, ymin, xmax, ymax);
+ } else {
+ region.mBounds.set(0, 0, 0, 0);
+ }
+
+ // Dump the edge and scan buffers, since we no longer need them.
+ ClearEdgeList();
+ ClearScanBuffer();
+}
+
+void VDPixmapPathRasterizer::ClearEdgeList() {
+ if (mpEdgeBlocks) {
+ EdgeBlock *block = mpEdgeBlocks;
+
+ while(EdgeBlock *next = block->next)
+ block = next;
+
+ block->next = mpFreeEdgeBlocks;
+ mpFreeEdgeBlocks = mpEdgeBlocks;
+ mpEdgeBlocks = NULL;
+ }
+
+ mEdgeBlockIdx = kEdgeBlockMax;
+}
+
+void VDPixmapPathRasterizer::FreeEdgeLists() {
+ ClearEdgeList();
+
+ while(EdgeBlock *block = mpFreeEdgeBlocks) {
+ mpFreeEdgeBlocks = block->next;
+
+ delete block;
+ }
+}
+
+void VDPixmapPathRasterizer::ClearScanBuffer() {
+ delete[] mpScanBuffer;
+ mpScanBuffer = mpScanBufferBiased = NULL;
+ mScanYMin = 0;
+ mScanYMax = 0;
+}
+
+void VDPixmapPathRasterizer::ReallocateScanBuffer(int ymin, int ymax) {
+ //
+ // check if there actually is a scan buffer to avoid unintentionally pinning at zero
+ if (mpScanBuffer) {
+ int nicedelta = (mScanYMax - mScanYMin);
+
+ if (ymin < mScanYMin) {
+ int yminnice = mScanYMin - nicedelta;
+ if (ymin > yminnice)
+ ymin = yminnice;
+
+ ymin &= ~31;
+ } else
+ ymin = mScanYMin;
+
+ if (ymax > mScanYMax) {
+ int ymaxnice = mScanYMax + nicedelta;
+ if (ymax < ymaxnice)
+ ymax = ymaxnice;
+
+ ymax = (ymax + 31) & ~31;
+ } else
+ ymax = mScanYMax;
+
+ VDASSERT(ymin <= mScanYMin && ymax >= mScanYMax);
+ }
+
+ // reallocate scan buffer
+ Scan *pNewBuffer = new Scan[ymax - ymin];
+ Scan *pNewBufferBiased = pNewBuffer - ymin;
+
+ if (mpScanBuffer) {
+ memcpy(pNewBufferBiased + mScanYMin, mpScanBufferBiased + mScanYMin, (mScanYMax - mScanYMin) * sizeof(Scan));
+ delete[] mpScanBuffer;
+
+ // zero new areas of scan buffer
+ for(int y=ymin; y<mScanYMin; ++y) {
+ pNewBufferBiased[y].chain = NULL;
+ pNewBufferBiased[y].count = 0;
+ }
+
+ for(int y=mScanYMax; y<ymax; ++y) {
+ pNewBufferBiased[y].chain = NULL;
+ pNewBufferBiased[y].count = 0;
+ }
+ } else {
+ for(int y=ymin; y<ymax; ++y) {
+ pNewBufferBiased[y].chain = NULL;
+ pNewBufferBiased[y].count = 0;
+ }
+ }
+
+ mpScanBuffer = pNewBuffer;
+ mpScanBufferBiased = pNewBufferBiased;
+ mScanYMin = ymin;
+ mScanYMax = ymax;
+}
+
+bool VDPixmapFillRegion(const VDPixmap& dst, const VDPixmapRegion& region, int x, int y, uint32 color) {
+ if (dst.format != nsVDPixmap::kPixFormat_XRGB8888)
+ return false;
+
+ // fast out
+ if (region.mSpans.empty())
+ return true;
+
+ // check if vertical clipping is required
+ const size_t n = region.mSpans.size();
+ uint32 start = 0;
+ uint32 end = n;
+
+ uint32 spanmin = (-x) + ((-y) << 16) + 0x80008000;
+
+ if (region.mSpans.front() < spanmin) {
+ uint32 lo = 0, hi = n;
+
+ // compute top clip
+ while(lo < hi) {
+ int mid = ((lo + hi) >> 1) & ~1;
+
+ if (region.mSpans[mid + 1] < spanmin)
+ lo = mid + 2;
+ else
+ hi = mid;
+ }
+
+ start = lo;
+
+ // check for total top clip
+ if (start >= n)
+ return true;
+ }
+
+ uint32 spanlimit = (dst.w - x) + ((dst.h - y - 1) << 16) + 0x80008000;
+
+ if (region.mSpans.back() > spanlimit) {
+ // compute bottom clip
+ int lo = start;
+ int hi = n;
+
+ while(lo < hi) {
+ int mid = ((lo + hi) >> 1) & ~1;
+
+ if (region.mSpans[mid] >= spanlimit)
+ hi = mid;
+ else
+ lo = mid+2;
+ }
+
+ end = lo;
+
+ // check for total bottom clip
+ if (start >= end)
+ return true;
+ }
+
+ // fill region
+ const uint32 *pSpan = &region.mSpans[start];
+ const uint32 *pEnd = &region.mSpans[0] + end;
+ int lasty = -1;
+ uint32 *dstp;
+
+ for(; pSpan != pEnd; pSpan += 2) {
+ uint32 span0 = pSpan[0];
+ uint32 span1 = pSpan[1];
+
+ uint32 py = (span0 >> 16) - 0x8000 + y;
+ uint32 px = (span0 & 0xffff) - 0x8000 + x;
+ uint32 w = span1-span0;
+
+ VDASSERT(py < (uint32)dst.h);
+ VDASSERT(px < (uint32)dst.w);
+ VDASSERT(dst.w - (int)px >= (int)w);
+
+ if (lasty != py)
+ dstp = (uint32 *)vdptroffset(dst.data, dst.pitch * py);
+
+ uint32 *p = dstp + px;
+ do {
+ *p++ = color;
+ } while(--w);
+ }
+
+ return true;
+}
+
+namespace {
+ void RenderABuffer32(const VDPixmap& dst, int y, const uint8 *alpha, uint32 w, uint32 color) {
+ if (!w)
+ return;
+
+ // update dest pointer
+ uint32 *dstp = (uint32 *)vdptroffset(dst.data, dst.pitch * y);
+
+ const uint32 color_rb = color & 0x00FF00FF;
+ const uint32 color_g = color & 0x0000FF00;
+ do {
+ const uint32 px = *dstp;
+ const uint32 px_rb = px & 0x00FF00FF;
+ const uint32 px_g = px & 0x0000FF00;
+ const sint32 a = *alpha++;
+
+ const uint32 result_rb = (((px_rb << 6) + ((sint32)(color_rb - px_rb)*a + 0x00200020)) & 0x3FC03FC0);
+ const uint32 result_g = (((px_g << 6) + ((sint32)(color_g - px_g )*a + 0x00002000)) & 0x003FC000);
+
+ *dstp++ = (result_rb + result_g) >> 6;
+ } while(--w);
+ }
+
+ void RenderABuffer8(const VDPixmap& dst, int y, const uint8 *alpha, uint32 w, uint32 color) {
+ if (!w)
+ return;
+
+ // update dest pointer
+ uint8 *dstp = (uint8 *)vdptroffset(dst.data, dst.pitch * y);
+
+ do {
+ const uint8 px = *dstp;
+ const sint8 a = *alpha++;
+
+ *dstp++ = px + (((sint32)(color - px) * a + 32) >> 6);
+ } while(--w);
+ }
+
+ void RenderABuffer8_128(const VDPixmap& dst, int y, const uint8 *alpha, uint32 w, uint32 color) {
+ if (!w)
+ return;
+
+ // update dest pointer
+ uint8 *dstp = (uint8 *)vdptroffset(dst.data, dst.pitch * y);
+
+ do {
+ const uint8 px = *dstp;
+ const sint16 a = *alpha++;
+
+ *dstp++ = px + (((sint32)(color - px) * a + 64) >> 7);
+ } while(--w);
+ }
+
+ void RenderABuffer8_256(const VDPixmap& dst, int y, const uint16 *alpha, uint32 w, uint32 color) {
+ if (!w)
+ return;
+
+ // update dest pointer
+ uint8 *dstp = (uint8 *)vdptroffset(dst.data, dst.pitch * y);
+
+ do {
+ const uint8 px = *dstp;
+ const sint32 a = *alpha++;
+
+ *dstp++ = px + (((sint32)(color - px) * a + 128) >> 8);
+ } while(--w);
+ }
+
+ void RenderABuffer8_1024(const VDPixmap& dst, int y, const uint16 *alpha, uint32 w, uint32 color) {
+ if (!w)
+ return;
+
+ // update dest pointer
+ uint8 *dstp = (uint8 *)vdptroffset(dst.data, dst.pitch * y);
+
+ do {
+ const uint8 px = *dstp;
+ const sint32 a = *alpha++;
+
+ *dstp++ = px + (((sint32)(color - px) * a + 512) >> 10);
+ } while(--w);
+ }
+}
+
+bool VDPixmapFillRegionAntialiased_32x_32x(const VDPixmap& dst, const VDPixmapRegion& region, int x, int y, uint32 color) {
+ if (dst.format != nsVDPixmap::kPixFormat_Y8)
+ return false;
+
+ // fast out
+ if (region.mSpans.empty())
+ return true;
+
+ // check if vertical clipping is required
+ const size_t n = region.mSpans.size();
+ uint32 start = 0;
+ uint32 end = n;
+
+ uint32 spanmin = -x + (-y << 16) + 0x80008000;
+
+ if (region.mSpans.front() < spanmin) {
+ // find first span : x2 > spanmin
+ start = std::upper_bound(region.mSpans.begin(), region.mSpans.end(), spanmin) - region.mSpans.begin();
+ start &= ~1;
+
+ // check for total top clip
+ if (start >= n)
+ return true;
+ }
+
+ uint32 spanlimit = (dst.w*32 - x) + (((dst.h*32 - y) - 1) << 16) + 0x80008000;
+
+ if (region.mSpans.back() > spanlimit) {
+ // find last span : x1 < spanlimit
+ end = std::lower_bound(region.mSpans.begin(), region.mSpans.end(), spanlimit) - region.mSpans.begin();
+
+ end = (end + 1) & ~1;
+
+ // check for total bottom clip
+ if (start >= end)
+ return true;
+ }
+
+ // allocate A-buffer
+ vdfastvector<uint16> abuffer(dst.w, 0);
+
+ // fill region
+ const uint32 *pSpan = &region.mSpans[start];
+ const uint32 *pEnd = &region.mSpans[0] + end;
+ int lasty = -1;
+
+ sint32 dstw32 = dst.w * 32;
+ sint32 dsth32 = dst.h * 32;
+
+ for(; pSpan != pEnd; pSpan += 2) {
+ uint32 span0 = pSpan[0];
+ uint32 span1 = pSpan[1];
+
+ sint32 py = (span0 >> 16) - 0x8000 + y;
+
+ if ((uint32)py >= (uint32)dsth32)
+ continue;
+
+ sint32 px1 = (span0 & 0xffff) - 0x8000 + x;
+ sint32 px2 = (span1 & 0xffff) - 0x8000 + x;
+ sint32 w = span1-span0;
+
+ if (lasty != py) {
+ if (((lasty ^ py) & 0xFFFFFFE0)) {
+ if (lasty >= 0) {
+ // flush scanline
+
+ RenderABuffer8_1024(dst, lasty >> 5, abuffer.data(), dst.w, color);
+ }
+
+ memset(abuffer.data(), 0, abuffer.size() * sizeof(abuffer[0]));
+ }
+ lasty = py;
+ }
+
+ if (px1 < 0)
+ px1 = 0;
+ if (px2 > dstw32)
+ px2 = dstw32;
+
+ if (px1 >= px2)
+ continue;
+
+ uint32 ix1 = px1 >> 5;
+ uint32 ix2 = px2 >> 5;
+ uint16 *p1 = abuffer.data() + ix1;
+ uint16 *p2 = abuffer.data() + ix2;
+
+ if (p1 == p2) {
+ p1[0] += (px2 - px1);
+ } else {
+ if (px1 & 31) {
+ p1[0] += 32 - (px1 & 31);
+ ++p1;
+ }
+
+ while(p1 != p2) {
+ p1[0] += 32;
+ ++p1;
+ }
+
+ if (px2 & 31)
+ p1[0] += px2 & 32;
+ }
+ }
+
+ if (lasty >= 0)
+ RenderABuffer8_1024(dst, lasty >> 5, abuffer.data(), dst.w, color);
+
+ return true;
+}
+
+bool VDPixmapFillRegionAntialiased_16x_16x(const VDPixmap& dst, const VDPixmapRegion& region, int x, int y, uint32 color) {
+ if (dst.format != nsVDPixmap::kPixFormat_Y8)
+ return false;
+
+ // fast out
+ if (region.mSpans.empty())
+ return true;
+
+ // check if vertical clipping is required
+ const size_t n = region.mSpans.size();
+ uint32 start = 0;
+ uint32 end = n;
+
+ uint32 spanmin = -x + (-y << 16) + 0x80008000;
+
+ if (region.mSpans.front() < spanmin) {
+ // find first span : x2 > spanmin
+ start = std::upper_bound(region.mSpans.begin(), region.mSpans.end(), spanmin) - region.mSpans.begin();
+ start &= ~1;
+
+ // check for total top clip
+ if (start >= n)
+ return true;
+ }
+
+ uint32 spanlimit = (dst.w*16 - x) + (((dst.h*16 - y) - 1) << 16) + 0x80008000;
+
+ if (region.mSpans.back() > spanlimit) {
+ // find last span : x1 < spanlimit
+ end = std::lower_bound(region.mSpans.begin(), region.mSpans.end(), spanlimit) - region.mSpans.begin();
+
+ end = (end + 1) & ~1;
+
+ // check for total bottom clip
+ if (start >= end)
+ return true;
+ }
+
+ // allocate A-buffer
+ vdfastvector<uint16> abuffer(dst.w, 0);
+
+ // fill region
+ const uint32 *pSpan = &region.mSpans[start];
+ const uint32 *pEnd = &region.mSpans[0] + end;
+ int lasty = -1;
+
+ sint32 dstw16 = dst.w * 16;
+ sint32 dsth16 = dst.h * 16;
+
+ for(; pSpan != pEnd; pSpan += 2) {
+ uint32 span0 = pSpan[0];
+ uint32 span1 = pSpan[1];
+
+ sint32 py = (span0 >> 16) - 0x8000 + y;
+
+ if ((uint32)py >= (uint32)dsth16)
+ continue;
+
+ sint32 px1 = (span0 & 0xffff) - 0x8000 + x;
+ sint32 px2 = (span1 & 0xffff) - 0x8000 + x;
+ sint32 w = span1-span0;
+
+ if (lasty != py) {
+ if (((lasty ^ py) & 0xFFFFFFF0)) {
+ if (lasty >= 0) {
+ // flush scanline
+
+ RenderABuffer8_256(dst, lasty >> 4, abuffer.data(), dst.w, color);
+ }
+
+ memset(abuffer.data(), 0, abuffer.size() * sizeof(abuffer[0]));
+ }
+ lasty = py;
+ }
+
+ if (px1 < 0)
+ px1 = 0;
+ if (px2 > dstw16)
+ px2 = dstw16;
+
+ if (px1 >= px2)
+ continue;
+
+ uint32 ix1 = px1 >> 4;
+ uint32 ix2 = px2 >> 4;
+ uint16 *p1 = abuffer.data() + ix1;
+ uint16 *p2 = abuffer.data() + ix2;
+
+ if (p1 == p2) {
+ p1[0] += (px2 - px1);
+ } else {
+ if (px1 & 15) {
+ p1[0] += 16 - (px1 & 15);
+ ++p1;
+ }
+
+ while(p1 != p2) {
+ p1[0] += 16;
+ ++p1;
+ }
+
+ if (px2 & 15)
+ p1[0] += px2 & 15;
+ }
+ }
+
+ if (lasty >= 0)
+ RenderABuffer8_256(dst, lasty >> 4, abuffer.data(), dst.w, color);
+
+ return true;
+}
+
+bool VDPixmapFillRegionAntialiased_16x_8x(const VDPixmap& dst, const VDPixmapRegion& region, int x, int y, uint32 color) {
+ if (dst.format != nsVDPixmap::kPixFormat_XRGB8888 && dst.format != nsVDPixmap::kPixFormat_Y8)
+ return false;
+
+ // fast out
+ if (region.mSpans.empty())
+ return true;
+
+ // check if vertical clipping is required
+ const size_t n = region.mSpans.size();
+ uint32 start = 0;
+ uint32 end = n;
+
+ uint32 spanmin = -x + (-y << 16) + 0x80008000;
+
+ if (region.mSpans.front() < spanmin) {
+ // find first span : x2 > spanmin
+ start = std::upper_bound(region.mSpans.begin(), region.mSpans.end(), spanmin) - region.mSpans.begin();
+ start &= ~1;
+
+ // check for total top clip
+ if (start >= n)
+ return true;
+ }
+
+ uint32 spanlimit = (dst.w*16 - x) + (((dst.h*8 - y) - 1) << 16) + 0x80008000;
+
+ if (region.mSpans.back() > spanlimit) {
+ // find last span : x1 < spanlimit
+ end = std::lower_bound(region.mSpans.begin(), region.mSpans.end(), spanlimit) - region.mSpans.begin();
+
+ end = (end + 1) & ~1;
+
+ // check for total bottom clip
+ if (start >= end)
+ return true;
+ }
+
+ // allocate A-buffer
+ vdfastvector<uint8> abuffer(dst.w, 0);
+
+ // fill region
+ const uint32 *pSpan = &region.mSpans[start];
+ const uint32 *pEnd = &region.mSpans[0] + end;
+ int lasty = -1;
+
+ sint32 dstw16 = dst.w * 16;
+ sint32 dsth8 = dst.h * 8;
+
+ for(; pSpan != pEnd; pSpan += 2) {
+ uint32 span0 = pSpan[0];
+ uint32 span1 = pSpan[1];
+
+ sint32 py = (span0 >> 16) - 0x8000 + y;
+
+ if ((uint32)py >= (uint32)dsth8)
+ continue;
+
+ sint32 px1 = (span0 & 0xffff) - 0x8000 + x;
+ sint32 px2 = (span1 & 0xffff) - 0x8000 + x;
+ sint32 w = span1-span0;
+
+ if (lasty != py) {
+ if (((lasty ^ py) & 0xFFFFFFF8)) {
+ if (lasty >= 0) {
+ // flush scanline
+
+ RenderABuffer8_128(dst, lasty >> 3, abuffer.data(), dst.w, color);
+ }
+
+ memset(abuffer.data(), 0, abuffer.size());
+ }
+ lasty = py;
+ }
+
+ if (px1 < 0)
+ px1 = 0;
+ if (px2 > dstw16)
+ px2 = dstw16;
+
+ if (px1 >= px2)
+ continue;
+
+ uint32 ix1 = px1 >> 4;
+ uint32 ix2 = px2 >> 4;
+ uint8 *p1 = abuffer.data() + ix1;
+ uint8 *p2 = abuffer.data() + ix2;
+
+ if (p1 == p2) {
+ p1[0] += (px2 - px1);
+ } else {
+ if (px1 & 15) {
+ p1[0] += 16 - (px1 & 15);
+ ++p1;
+ }
+
+ while(p1 != p2) {
+ p1[0] += 16;
+ ++p1;
+ }
+
+ if (px2 & 15)
+ p1[0] += px2 & 15;
+ }
+ }
+
+ if (lasty >= 0)
+ RenderABuffer8_128(dst, lasty >> 3, abuffer.data(), dst.w, color);
+
+ return true;
+}
+
+bool VDPixmapFillRegionAntialiased8x(const VDPixmap& dst, const VDPixmapRegion& region, int x, int y, uint32 color) {
+ if (dst.format == nsVDPixmap::kPixFormat_YUV444_Planar ||
+ dst.format == nsVDPixmap::kPixFormat_YUV422_Planar ||
+ dst.format == nsVDPixmap::kPixFormat_YUV420_Planar ||
+ dst.format == nsVDPixmap::kPixFormat_YUV410_Planar) {
+ VDPixmap pxY;
+ VDPixmap pxCb;
+ VDPixmap pxCr;
+
+ pxY.format = nsVDPixmap::kPixFormat_Y8;
+ pxY.data = dst.data;
+ pxY.pitch = dst.pitch;
+ pxY.w = dst.w;
+ pxY.h = dst.h;
+
+ pxCb.format = nsVDPixmap::kPixFormat_Y8;
+ pxCb.data = dst.data2;
+ pxCb.pitch = dst.pitch2;
+ pxCb.w = dst.w;
+ pxCb.h = dst.h;
+
+ pxCr.format = nsVDPixmap::kPixFormat_Y8;
+ pxCr.data = dst.data3;
+ pxCr.pitch = dst.pitch3;
+ pxCr.w = dst.w;
+ pxCr.h = dst.h;
+
+ uint32 colorY = (color >> 8) & 0xff;
+ uint32 colorCb = (color >> 0) & 0xff;
+ uint32 colorCr = (color >> 16) & 0xff;
+
+ VDPixmapFillRegionAntialiased8x(pxY, region, x, y, colorY);
+
+ switch(dst.format) {
+ case nsVDPixmap::kPixFormat_YUV410_Planar:
+ pxCr.w = pxCb.w = dst.w >> 2;
+ pxCr.h = pxCb.h = dst.h >> 2;
+ x >>= 2;
+ y >>= 2;
+ VDPixmapFillRegionAntialiased_32x_32x(pxCb, region, x, y, colorCb);
+ VDPixmapFillRegionAntialiased_32x_32x(pxCr, region, x, y, colorCr);
+ return true;
+ case nsVDPixmap::kPixFormat_YUV420_Planar:
+ pxCr.w = pxCb.w = dst.w >> 1;
+ pxCr.h = pxCb.h = dst.h >> 1;
+ x >>= 1;
+ y >>= 1;
+ x += 2;
+ VDPixmapFillRegionAntialiased_16x_16x(pxCb, region, x, y, colorCb);
+ VDPixmapFillRegionAntialiased_16x_16x(pxCr, region, x, y, colorCr);
+ return true;
+ case nsVDPixmap::kPixFormat_YUV422_Planar:
+ pxCr.w = pxCb.w = dst.w >> 1;
+ x >>= 1;
+ x += 2;
+ VDPixmapFillRegionAntialiased_16x_8x(pxCb, region, x, y, colorCb);
+ VDPixmapFillRegionAntialiased_16x_8x(pxCr, region, x, y, colorCr);
+ return true;
+ case nsVDPixmap::kPixFormat_YUV444_Planar:
+ VDPixmapFillRegionAntialiased8x(pxCb, region, x, y, colorCb);
+ VDPixmapFillRegionAntialiased8x(pxCr, region, x, y, colorCr);
+ return true;
+ }
+ }
+
+ if (dst.format != nsVDPixmap::kPixFormat_XRGB8888 && dst.format != nsVDPixmap::kPixFormat_Y8)
+ return false;
+
+ // fast out
+ if (region.mSpans.empty())
+ return true;
+
+ // check if vertical clipping is required
+ const size_t n = region.mSpans.size();
+ uint32 start = 0;
+ uint32 end = n;
+
+ uint32 spanmin = -x + (-y << 16) + 0x80008000;
+
+ if (region.mSpans.front() < spanmin) {
+ // find first span : x2 > spanmin
+ start = std::upper_bound(region.mSpans.begin(), region.mSpans.end(), spanmin) - region.mSpans.begin();
+ start &= ~1;
+
+ // check for total top clip
+ if (start >= n)
+ return true;
+ }
+
+ uint32 spanlimit = (dst.w*8 - x) + (((dst.h*8 - y) - 1) << 16) + 0x80008000;
+
+ if (region.mSpans.back() > spanlimit) {
+ // find last span : x1 < spanlimit
+ end = std::lower_bound(region.mSpans.begin(), region.mSpans.end(), spanlimit) - region.mSpans.begin();
+
+ end = (end + 1) & ~1;
+
+ // check for total bottom clip
+ if (start >= end)
+ return true;
+ }
+
+ // allocate A-buffer
+ vdfastvector<uint8> abuffer(dst.w, 0);
+
+ // fill region
+ const uint32 *pSpan = &region.mSpans[start];
+ const uint32 *pEnd = &region.mSpans[0] + end;
+ int lasty = -1;
+
+ sint32 dstw8 = dst.w * 8;
+ sint32 dsth8 = dst.h * 8;
+
+ for(; pSpan != pEnd; pSpan += 2) {
+ uint32 span0 = pSpan[0];
+ uint32 span1 = pSpan[1];
+
+ sint32 py = (span0 >> 16) - 0x8000 + y;
+
+ if ((uint32)py >= (uint32)dsth8)
+ continue;
+
+ sint32 px1 = (span0 & 0xffff) - 0x8000 + x;
+ sint32 px2 = (span1 & 0xffff) - 0x8000 + x;
+ sint32 w = span1-span0;
+
+ if (lasty != py) {
+ if (((lasty ^ py) & 0xFFFFFFF8)) {
+ if (lasty >= 0) {
+ // flush scanline
+
+ if (dst.format == nsVDPixmap::kPixFormat_XRGB8888)
+ RenderABuffer32(dst, lasty >> 3, abuffer.data(), dst.w, color);
+ else
+ RenderABuffer8(dst, lasty >> 3, abuffer.data(), dst.w, color);
+ }
+
+ memset(abuffer.data(), 0, abuffer.size());
+ }
+ lasty = py;
+ }
+
+ if (px1 < 0)
+ px1 = 0;
+ if (px2 > dstw8)
+ px2 = dstw8;
+
+ if (px1 >= px2)
+ continue;
+
+ uint32 ix1 = px1 >> 3;
+ uint32 ix2 = px2 >> 3;
+ uint8 *p1 = abuffer.data() + ix1;
+ uint8 *p2 = abuffer.data() + ix2;
+
+ if (p1 == p2) {
+ p1[0] += (px2 - px1);
+ } else {
+ if (px1 & 7) {
+ p1[0] += 8 - (px1 & 7);
+ ++p1;
+ }
+
+ while(p1 != p2) {
+ p1[0] += 8;
+ ++p1;
+ }
+
+ if (px2 & 7)
+ p1[0] += px2 & 7;
+ }
+ }
+
+ if (lasty >= 0) {
+ if (dst.format == nsVDPixmap::kPixFormat_XRGB8888)
+ RenderABuffer32(dst, lasty >> 3, abuffer.data(), dst.w, color);
+ else
+ RenderABuffer8(dst, lasty >> 3, abuffer.data(), dst.w, color);
+ }
+
+ return true;
+}
+
+void VDPixmapCreateRoundRegion(VDPixmapRegion& dst, float r) {
+ int ir = VDCeilToInt(r);
+ float r2 = r*r;
+
+ dst.mSpans.clear();
+ dst.mBounds.set(-ir, 0, ir+1, 0);
+
+ for(int y = -ir; y <= ir; ++y) {
+ int dx = VDCeilToInt(sqrtf(r2 - y*y));
+
+ if (dx > 0) {
+ dst.mSpans.push_back(0x80008000 + (y << 16) - dx);
+ dst.mSpans.push_back(0x80008001 + (y << 16) + dx);
+ if (dst.mBounds.top > y)
+ dst.mBounds.top = y;
+ if (dst.mBounds.bottom < y)
+ dst.mBounds.bottom = y;
+ }
+ }
+}
+
+void VDPixmapConvolveRegion(VDPixmapRegion& dst, const VDPixmapRegion& r1, const VDPixmapRegion& r2, int dx1, int dx2, int dy) {
+ dst.mSpans.clear();
+ dst.mSpans.resize(r1.mSpans.size()+r2.mSpans.size());
+
+ const uint32 *itA = r1.mSpans.data();
+ const uint32 *itAE = itA + r1.mSpans.size();
+ const uint32 *itB = r2.mSpans.data();
+ const uint32 *itBE = itB + r2.mSpans.size();
+ uint32 *dstp0 = dst.mSpans.data();
+ uint32 *dstp = dst.mSpans.data();
+
+ uint32 offset1 = (dy<<16) + dx1;
+ uint32 offset2 = (dy<<16) + dx2;
+
+ while(itA != itAE && itB != itBE) {
+ uint32 x1;
+ uint32 x2;
+
+ if (itB[0] + offset1 < itA[0]) {
+ // B span is earlier. Use it.
+ x1 = itB[0] + offset1;
+ x2 = itB[1] + offset2;
+ itB += 2;
+
+ // B spans *can* overlap, due to the widening.
+ while(itB != itBE && itB[0]+offset1 <= x2) {
+ uint32 bx2 = itB[1] + offset2;
+ if (x2 < bx2)
+ x2 = bx2;
+
+ itB += 2;
+ }
+
+ goto a_start;
+ } else {
+ // A span is earlier. Use it.
+ x1 = itA[0];
+ x2 = itA[1];
+ itA += 2;
+
+ // A spans don't overlap, so begin merge loop with B first.
+ }
+
+ for(;;) {
+ // If we run out of B spans or the B span doesn't overlap,
+ // then the next A span can't either (because A spans don't
+ // overlap) and we exit.
+
+ if (itB == itBE || itB[0]+offset1 > x2)
+ break;
+
+ do {
+ uint32 bx2 = itB[1] + offset2;
+ if (x2 < bx2)
+ x2 = bx2;
+
+ itB += 2;
+ } while(itB != itBE && itB[0]+offset1 <= x2);
+
+ // If we run out of A spans or the A span doesn't overlap,
+ // then the next B span can't either, because we would have
+ // consumed all overlapping B spans in the above loop.
+a_start:
+ if (itA == itAE || itA[0] > x2)
+ break;
+
+ do {
+ uint32 ax2 = itA[1];
+ if (x2 < ax2)
+ x2 = ax2;
+
+ itA += 2;
+ } while(itA != itAE && itA[0] <= x2);
+ }
+
+ // Flush span.
+ dstp[0] = x1;
+ dstp[1] = x2;
+ dstp += 2;
+ }
+
+ // Copy over leftover spans.
+ memcpy(dstp, itA, sizeof(uint32)*(itAE - itA));
+ dstp += itAE - itA;
+
+ while(itB != itBE) {
+ // B span is earlier. Use it.
+ uint32 x1 = itB[0] + offset1;
+ uint32 x2 = itB[1] + offset2;
+ itB += 2;
+
+ // B spans *can* overlap, due to the widening.
+ while(itB != itBE && itB[0]+offset1 <= x2) {
+ uint32 bx2 = itB[1] + offset2;
+ if (x2 < bx2)
+ x2 = bx2;
+
+ itB += 2;
+ }
+
+ dstp[0] = x1;
+ dstp[1] = x2;
+ dstp += 2;
+ }
+
+ dst.mSpans.resize(dstp - dst.mSpans.data());
+}
+
+void VDPixmapConvolveRegion(VDPixmapRegion& dst, const VDPixmapRegion& r1, const VDPixmapRegion& r2) {
+ VDPixmapRegion temp;
+
+ const uint32 *src1 = r2.mSpans.data();
+ const uint32 *src2 = src1 + r2.mSpans.size();
+
+ dst.mSpans.clear();
+ while(src1 != src2) {
+ uint32 p1 = src1[0];
+ uint32 p2 = src1[1];
+ src1 += 2;
+
+ temp.mSpans.swap(dst.mSpans);
+ VDPixmapConvolveRegion(dst, temp, r1, (p1 & 0xffff) - 0x8000, (p2 & 0xffff) - 0x8000, (p1 >> 16) - 0x8000);
+ }
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/resample.cpp b/src/thirdparty/VirtualDub/Kasumi/source/resample.cpp
new file mode 100644
index 000000000..4d1aef5f5
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/resample.cpp
@@ -0,0 +1,348 @@
+// VirtualDub - Video processing and capture application
+// Graphics support library
+// Copyright (C) 1998-2004 Avery Lee
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#include <float.h>
+#include <math.h>
+#include <vd2/system/vdalloc.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/system/memory.h>
+#include <vd2/system/math.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/Kasumi/resample.h>
+#include "uberblit_gen.h"
+
+///////////////////////////////////////////////////////////////////////////
+//
+// the resampler (finally)
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDPixmapResampler : public IVDPixmapResampler {
+public:
+ VDPixmapResampler();
+ ~VDPixmapResampler();
+
+ void SetSplineFactor(double A) { mSplineFactor = A; }
+ void SetFilters(FilterMode h, FilterMode v, bool interpolationOnly);
+ bool Init(uint32 dw, uint32 dh, int dstformat, uint32 sw, uint32 sh, int srcformat);
+ bool Init(const vdrect32f& dstrect, uint32 dw, uint32 dh, int dstformat, const vdrect32f& srcrect, uint32 sw, uint32 sh, int srcformat);
+ void Shutdown();
+
+ void Process(const VDPixmap& dst, const VDPixmap& src);
+
+protected:
+ void ApplyFilters(VDPixmapUberBlitterGenerator& gen, uint32 dw, uint32 dh, float xoffset, float yoffset, float xfactor, float yfactor);
+
+ vdautoptr<IVDPixmapBlitter> mpBlitter;
+ vdautoptr<IVDPixmapBlitter> mpBlitter2;
+ double mSplineFactor;
+ FilterMode mFilterH;
+ FilterMode mFilterV;
+ bool mbInterpOnly;
+
+ vdrect32 mDstRectPlane0;
+ vdrect32 mDstRectPlane12;
+};
+
+IVDPixmapResampler *VDCreatePixmapResampler() { return new VDPixmapResampler; }
+
+VDPixmapResampler::VDPixmapResampler()
+ : mSplineFactor(-0.6)
+ , mFilterH(kFilterCubic)
+ , mFilterV(kFilterCubic)
+ , mbInterpOnly(false)
+{
+}
+
+VDPixmapResampler::~VDPixmapResampler() {
+ Shutdown();
+}
+
+void VDPixmapResampler::SetFilters(FilterMode h, FilterMode v, bool interpolationOnly) {
+ mFilterH = h;
+ mFilterV = v;
+ mbInterpOnly = interpolationOnly;
+}
+
+bool VDPixmapResampler::Init(uint32 dw, uint32 dh, int dstformat, uint32 sw, uint32 sh, int srcformat) {
+ vdrect32f rSrc(0.0f, 0.0f, (float)sw, (float)sh);
+ vdrect32f rDst(0.0f, 0.0f, (float)dw, (float)dh);
+ return Init(rDst, dw, dh, dstformat, rSrc, sw, sh, srcformat);
+}
+
+bool VDPixmapResampler::Init(const vdrect32f& dstrect0, uint32 dw, uint32 dh, int dstformat, const vdrect32f& srcrect0, uint32 sw, uint32 sh, int srcformat) {
+ Shutdown();
+
+ if (dstformat != srcformat || (
+ srcformat != nsVDPixmap::kPixFormat_XRGB8888 &&
+ srcformat != nsVDPixmap::kPixFormat_Y8 &&
+ srcformat != nsVDPixmap::kPixFormat_YUV444_Planar &&
+ srcformat != nsVDPixmap::kPixFormat_YUV422_Planar &&
+ srcformat != nsVDPixmap::kPixFormat_YUV420_Planar &&
+ srcformat != nsVDPixmap::kPixFormat_YUV411_Planar &&
+ srcformat != nsVDPixmap::kPixFormat_YUV410_Planar
+ ))
+ return false;
+
+ // convert destination flips to source flips
+ vdrect32f dstrect(dstrect0);
+ vdrect32f srcrect(srcrect0);
+
+ if (dstrect.left > dstrect.right) {
+ std::swap(dstrect.left, dstrect.right);
+ std::swap(srcrect.left, srcrect.right);
+ }
+
+ if (dstrect.top > dstrect.bottom) {
+ std::swap(dstrect.top, dstrect.bottom);
+ std::swap(srcrect.top, srcrect.bottom);
+ }
+
+ // compute source step factors
+ float xfactor = (float)srcrect.width() / (float)dstrect.width();
+ float yfactor = (float)srcrect.height() / (float)dstrect.height();
+
+ // clip destination rect
+ if (dstrect.left < 0) {
+ float clipx1 = -dstrect.left;
+ srcrect.left += xfactor * clipx1;
+ dstrect.left = 0.0f;
+ }
+
+ if (dstrect.top < 0) {
+ float clipy1 = -dstrect.top;
+ srcrect.top += yfactor * clipy1;
+ dstrect.top = 0.0f;
+ }
+
+ float clipx2 = dstrect.right - (float)dw;
+ if (clipx2 > 0) {
+ srcrect.right -= xfactor * clipx2;
+ dstrect.right = (float)dw;
+ }
+
+ float clipy2 = dstrect.bottom - (float)dh;
+ if (clipy2 > 0) {
+ srcrect.bottom -= yfactor * clipy2;
+ dstrect.bottom = (float)dh;
+ }
+
+ // compute plane 0 dest rect in integral quanta
+ const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(dstformat);
+ mDstRectPlane0.left = VDCeilToInt(dstrect.left - 0.5f);
+ mDstRectPlane0.top = VDCeilToInt(dstrect.top - 0.5f);
+ mDstRectPlane0.right = VDCeilToInt(dstrect.right - 0.5f);
+ mDstRectPlane0.bottom = VDCeilToInt(dstrect.bottom - 0.5f);
+
+ // compute plane 0 stepping parameters
+ float xoffset = (((float)mDstRectPlane0.left + 0.5f) - dstrect.left) * xfactor + srcrect.left;
+ float yoffset = (((float)mDstRectPlane0.top + 0.5f) - dstrect.top ) * yfactor + srcrect.top;
+
+ // compute plane 1/2 dest rect and stepping parameters
+ float xoffset2 = 0.0f;
+ float yoffset2 = 0.0f;
+
+ if (formatInfo.auxbufs > 0) {
+ float xf2 = (float)(1 << formatInfo.auxwbits);
+ float yf2 = (float)(1 << formatInfo.auxhbits);
+ float invxf2 = 1.0f / xf2;
+ float invyf2 = 1.0f / yf2;
+
+ // convert source and dest rects to plane 1/2 space
+ vdrect32f srcrect2(srcrect);
+ vdrect32f dstrect2(dstrect);
+
+ srcrect2.scale(invxf2, invyf2);
+ dstrect2.scale(invxf2, invyf2);
+
+ switch(srcformat) {
+ case nsVDPixmap::kPixFormat_YUV444_Planar:
+ break;
+ case nsVDPixmap::kPixFormat_YUV422_Planar:
+ srcrect2.translate(0.25f, 0.0f);
+ dstrect2.translate(0.25f, 0.0f);
+ break;
+ case nsVDPixmap::kPixFormat_YUV420_Planar:
+ srcrect2.translate(0.25f, 0.0f);
+ dstrect2.translate(0.25f, 0.0f);
+ break;
+ case nsVDPixmap::kPixFormat_YUV411_Planar:
+ srcrect2.translate(0.375f, 0.0f);
+ dstrect2.translate(0.375f, 0.0f);
+ break;
+ case nsVDPixmap::kPixFormat_YUV410_Planar:
+ break;
+ default:
+ VDASSERT(false);
+ }
+
+ mDstRectPlane12.left = VDCeilToInt(dstrect2.left - 0.5f);
+ mDstRectPlane12.top = VDCeilToInt(dstrect2.top - 0.5f);
+ mDstRectPlane12.right = VDCeilToInt(dstrect2.right - 0.5f);
+ mDstRectPlane12.bottom = VDCeilToInt(dstrect2.bottom - 0.5f);
+
+ xoffset2 = (((float)mDstRectPlane12.left + 0.5f) - dstrect2.left) * xfactor + srcrect2.left;
+ yoffset2 = (((float)mDstRectPlane12.top + 0.5f) - dstrect2.top ) * yfactor + srcrect2.top;
+ }
+
+ VDPixmapUberBlitterGenerator gen;
+
+ switch(srcformat) {
+ case nsVDPixmap::kPixFormat_XRGB8888:
+ gen.ldsrc(0, 0, 0, 0, sw, sh, VDPixmapGetFormatTokenFromFormat(srcformat), sw*4);
+ ApplyFilters(gen, mDstRectPlane0.width(), mDstRectPlane0.height(), xoffset, yoffset, xfactor, yfactor);
+ break;
+
+ case nsVDPixmap::kPixFormat_Y8:
+ gen.ldsrc(0, 0, 0, 0, sw, sh, kVDPixType_8, sw);
+ ApplyFilters(gen, mDstRectPlane0.width(), mDstRectPlane0.height(), xoffset, yoffset, xfactor, yfactor);
+ break;
+
+ case nsVDPixmap::kPixFormat_YUV444_Planar:
+ case nsVDPixmap::kPixFormat_YUV422_Planar:
+ case nsVDPixmap::kPixFormat_YUV420_Planar:
+ case nsVDPixmap::kPixFormat_YUV411_Planar:
+ case nsVDPixmap::kPixFormat_YUV410_Planar:
+ gen.ldsrc(0, 0, 0, 0, sw, sh, kVDPixType_8, sw);
+ ApplyFilters(gen, mDstRectPlane0.width(), mDstRectPlane0.height(), xoffset, yoffset, xfactor, yfactor);
+
+ {
+ const VDPixmapFormatInfo& info = VDPixmapGetInfo(dstformat);
+ uint32 subsw = -(-(sint32)sw >> info.auxwbits);
+ uint32 subsh = -(-(sint32)sh >> info.auxhbits);
+
+ VDPixmapUberBlitterGenerator gen2;
+ gen2.ldsrc(0, 0, 0, 0, subsw, subsh, kVDPixType_8, subsw);
+ ApplyFilters(gen2, mDstRectPlane12.width(), mDstRectPlane12.height(), xoffset2, yoffset2, xfactor, yfactor);
+ mpBlitter2 = gen2.create();
+ if (!mpBlitter2)
+ return false;
+ }
+ break;
+ }
+
+ mpBlitter = gen.create();
+ if (!mpBlitter)
+ return false;
+
+ return true;
+}
+
+void VDPixmapResampler::Shutdown() {
+ mpBlitter = NULL;
+ mpBlitter2 = NULL;
+}
+
+void VDPixmapResampler::Process(const VDPixmap& dst, const VDPixmap& src) {
+ if (!mpBlitter)
+ return;
+
+ switch(dst.format) {
+ case nsVDPixmap::kPixFormat_XRGB8888:
+ case nsVDPixmap::kPixFormat_Y8:
+ mpBlitter->Blit(dst, &mDstRectPlane0, src);
+ break;
+
+ case nsVDPixmap::kPixFormat_YUV444_Planar:
+ case nsVDPixmap::kPixFormat_YUV422_Planar:
+ case nsVDPixmap::kPixFormat_YUV420_Planar:
+ case nsVDPixmap::kPixFormat_YUV411_Planar:
+ case nsVDPixmap::kPixFormat_YUV410_Planar:
+ // blit primary plane
+ mpBlitter->Blit(dst, &mDstRectPlane0, src);
+
+ // slice and blit secondary planes
+ {
+ const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(dst.format);
+ VDPixmap pxdst;
+ pxdst.format = nsVDPixmap::kPixFormat_Y8;
+ pxdst.w = -(-dst.w >> formatInfo.auxwbits);
+ pxdst.h = -(-dst.h >> formatInfo.auxhbits);
+ pxdst.pitch = dst.pitch2;
+ pxdst.data = dst.data2;
+
+ VDPixmap pxsrc;
+ pxsrc.format = nsVDPixmap::kPixFormat_Y8;
+ pxsrc.w = -(-src.w >> formatInfo.auxwbits);
+ pxsrc.h = -(-src.h >> formatInfo.auxhbits);
+ pxsrc.pitch = src.pitch2;
+ pxsrc.data = src.data2;
+
+ mpBlitter2->Blit(pxdst, &mDstRectPlane12, pxsrc);
+
+ pxdst.pitch = dst.pitch3;
+ pxdst.data = dst.data3;
+ pxsrc.pitch = src.pitch3;
+ pxsrc.data = src.data3;
+ mpBlitter2->Blit(pxdst, &mDstRectPlane12, pxsrc);
+ }
+ break;
+ }
+}
+
+void VDPixmapResampler::ApplyFilters(VDPixmapUberBlitterGenerator& gen, uint32 dw, uint32 dh, float xoffset, float yoffset, float xfactor, float yfactor) {
+ switch(mFilterH) {
+ case kFilterPoint:
+ gen.pointh(xoffset, xfactor, dw);
+ break;
+
+ case kFilterLinear:
+ gen.linearh(xoffset, xfactor, dw, mbInterpOnly);
+ break;
+
+ case kFilterCubic:
+ gen.cubich(xoffset, xfactor, dw, (float)mSplineFactor, mbInterpOnly);
+ break;
+
+ case kFilterLanczos3:
+ gen.lanczos3h(xoffset, xfactor, dw);
+ break;
+ }
+
+ switch(mFilterV) {
+ case kFilterPoint:
+ gen.pointv(yoffset, yfactor, dh);
+ break;
+
+ case kFilterLinear:
+ gen.linearv(yoffset, yfactor, dh, mbInterpOnly);
+ break;
+
+ case kFilterCubic:
+ gen.cubicv(yoffset, yfactor, dh, (float)mSplineFactor, mbInterpOnly);
+ break;
+
+ case kFilterLanczos3:
+ gen.lanczos3v(yoffset, yfactor, dh);
+ break;
+ }
+}
+
+bool VDPixmapResample(const VDPixmap& dst, const VDPixmap& src, IVDPixmapResampler::FilterMode filter) {
+ VDPixmapResampler r;
+
+ r.SetFilters(filter, filter, false);
+
+ if (!r.Init(dst.w, dst.h, dst.format, src.w, src.h, src.format))
+ return false;
+
+ r.Process(dst, src);
+ return true;
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/resample_kernels.cpp b/src/thirdparty/VirtualDub/Kasumi/source/resample_kernels.cpp
new file mode 100644
index 000000000..010364e1a
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/resample_kernels.cpp
@@ -0,0 +1,255 @@
+#include <math.h>
+#include <vd2/Kasumi/resample_kernels.h>
+
+///////////////////////////////////////////////////////////////////////////
+//
+// utility functions
+//
+///////////////////////////////////////////////////////////////////////////
+
+namespace {
+ inline sint32 scale32x32_fp16(sint32 x, sint32 y) {
+ return (sint32)(((sint64)x * y + 0x8000) >> 16);
+ }
+
+ inline double sinc(double x) {
+ return fabs(x) < 1e-9 ? 1.0 : sin(x) / x;
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VDResamplerAxis
+//
+///////////////////////////////////////////////////////////////////////////
+
+void VDResamplerAxis::Init(sint32 dudx) {
+ this->dudx = dudx;
+}
+
+void VDResamplerAxis::Compute(sint32 count, sint32 u0, sint32 w, sint32 kernel_width) {
+ u = u0;
+ dx = count;
+
+ sint32 du_kern = (kernel_width-1) << 16;
+ sint32 u2 = u + dudx*(dx-1);
+ sint32 u_limit = w << 16;
+
+ dx_precopy = 0;
+ dx_preclip = 0;
+ dx_active = 0;
+ dx_postclip = 0;
+ dx_postcopy = 0;
+ dx_dualclip = 0;
+
+ sint32 dx_temp = dx;
+ sint32 u_start = u;
+
+ // (desired - u0 + (dudx-1)) / dudx : first pixel >= desired
+
+ sint32 dudx_m1_mu0 = dudx - 1 - u;
+ sint32 first_preclip = (dudx_m1_mu0 + 0x10000 - du_kern) / dudx;
+ sint32 first_active = (dudx_m1_mu0 ) / dudx;
+ sint32 first_postclip = (dudx_m1_mu0 + u_limit - du_kern) / dudx;
+ sint32 first_postcopy = (dudx_m1_mu0 + u_limit - 0x10000) / dudx;
+
+ // clamp
+ if (first_preclip < 0)
+ first_preclip = 0;
+ if (first_active < first_preclip)
+ first_active = first_preclip;
+ if (first_postclip < first_active)
+ first_postclip = first_active;
+ if (first_postcopy < first_postclip)
+ first_postcopy = first_postclip;
+ if (first_preclip > dx)
+ first_preclip = dx;
+ if (first_active > dx)
+ first_active = dx;
+ if (first_postclip > dx)
+ first_postclip = dx;
+ if (first_postcopy > dx)
+ first_postcopy = dx;
+
+ // determine widths
+
+ dx_precopy = first_preclip;
+ dx_preclip = first_active - first_preclip;
+ dx_active = first_postclip - first_active;
+ dx_postclip = first_postcopy - first_postclip;
+ dx_postcopy = dx - first_postcopy;
+
+ // sanity checks
+ sint32 pos0 = dx_precopy;
+ sint32 pos1 = pos0 + dx_preclip;
+ sint32 pos2 = pos1 + dx_active;
+ sint32 pos3 = pos2 + dx_postclip;
+
+ VDASSERT(!((dx_precopy|dx_preclip|dx_active|dx_postcopy|dx_postclip) & 0x80000000));
+ VDASSERT(dx_precopy + dx_preclip + dx_active + dx_postcopy + dx_postclip == dx);
+
+ VDASSERT(!pos0 || u_start + dudx*(pos0 - 1) < 0x10000 - du_kern); // precopy -> preclip
+ VDASSERT( pos0 >= pos1 || u_start + dudx*(pos0 ) >= 0x10000 - du_kern);
+ VDASSERT( pos1 <= pos0 || u_start + dudx*(pos1 - 1) < 0); // preclip -> active
+ VDASSERT( pos1 >= pos2 || u_start + dudx*(pos1 ) >= 0 || !dx_active);
+ VDASSERT( pos2 <= pos1 || u_start + dudx*(pos2 - 1) < u_limit - du_kern || !dx_active); // active -> postclip
+ VDASSERT( pos2 >= pos3 || u_start + dudx*(pos2 ) >= u_limit - du_kern);
+ VDASSERT( pos3 <= pos2 || u_start + dudx*(pos3 - 1) < u_limit - 0x10000); // postclip -> postcopy
+ VDASSERT( pos3 >= dx || u_start + dudx*(pos3 ) >= u_limit - 0x10000);
+
+ u += dx_precopy * dudx;
+
+ // test for overlapping clipping regions
+ if (!dx_active && kernel_width > w) {
+ dx_dualclip = dx_preclip + dx_postclip;
+ dx_preclip = dx_postclip = 0;
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VDResamplerLinearFilter
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDResamplerLinearFilter::VDResamplerLinearFilter(double twofc)
+ : mScale(twofc)
+ , mTaps((int)ceil(1.0 / twofc) * 2)
+{
+}
+
+int VDResamplerLinearFilter::GetFilterWidth() const {
+ return mTaps;
+}
+
+double VDResamplerLinearFilter::EvaluateFilter(double t) const {
+ t = 1.0f - fabs(t)*mScale;
+
+ return t + fabs(t);
+}
+
+void VDResamplerLinearFilter::GenerateFilter(float *dst, double offset) const {
+ double pos = -((double)((mTaps>>1)-1) + offset) * mScale;
+
+ for(unsigned i=0; i<mTaps; ++i) {
+ double t = 1.0 - fabs(pos);
+
+ *dst++ = (float)(t+fabs(t));
+ pos += mScale;
+ }
+}
+
+void VDResamplerLinearFilter::GenerateFilterBank(float *dst) const {
+ for(int offset=0; offset<256; ++offset) {
+ GenerateFilter(dst, offset * (1.0f / 256.0f));
+ dst += mTaps;
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VDResamplerCubicFilter
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDResamplerCubicFilter::VDResamplerCubicFilter(double twofc, double A)
+ : mScale(twofc)
+ , mA0( 1.0 )
+ , mA2(-3.0-A)
+ , mA3( 2.0+A)
+ , mB0(-4.0*A)
+ , mB1( 8.0*A)
+ , mB2(-5.0*A)
+ , mB3( A)
+ , mTaps((int)ceil(2.0 / twofc)*2)
+{
+}
+
+int VDResamplerCubicFilter::GetFilterWidth() const { return mTaps; }
+
+double VDResamplerCubicFilter::EvaluateFilter(double t) const {
+ t = fabs(t)*mScale;
+
+ if (t < 1.0)
+ return mA0 + (t*t)*(mA2 + t*mA3);
+ else if (t < 2.0)
+ return mB0 + t*(mB1 + t*(mB2 + t*mB3));
+ else
+ return 0;
+}
+
+void VDResamplerCubicFilter::GenerateFilter(float *dst, double offset) const {
+ double pos = -((double)((mTaps>>1)-1) + offset) * mScale;
+
+ for(unsigned i=0; i<mTaps; ++i) {
+ double t = fabs(pos);
+ double v = 0;
+
+ if (t < 1.0)
+ v = mA0 + (t*t)*(mA2 + t*mA3);
+ else if (t < 2.0)
+ v = mB0 + t*(mB1 + t*(mB2 + t*mB3));
+
+ *dst++ = (float)v;
+ pos += mScale;
+ }
+}
+
+void VDResamplerCubicFilter::GenerateFilterBank(float *dst) const {
+ for(int offset=0; offset<256; ++offset) {
+ GenerateFilter(dst, offset * (1.0f / 256.0f));
+ dst += mTaps;
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VDResamplerLanczos3Filter
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDResamplerLanczos3Filter::VDResamplerLanczos3Filter(double twofc)
+ : mScale(twofc)
+ , mTaps((int)ceil(3.0 / twofc)*2)
+{
+}
+
+int VDResamplerLanczos3Filter::GetFilterWidth() const {
+ return mTaps;
+}
+
+double VDResamplerLanczos3Filter::EvaluateFilter(double t) const {
+ static const double pi = 3.1415926535897932384626433832795; // pi
+ static const double pi3 = 1.0471975511965977461542144610932; // pi/3
+
+ t *= mScale;
+
+ if (fabs(t) < 3.0)
+ return sinc(pi*t) * sinc(pi3*t);
+ else
+ return 0.0;
+}
+
+void VDResamplerLanczos3Filter::GenerateFilter(float *dst, double offset) const {
+ static const double pi = 3.1415926535897932384626433832795; // pi
+ static const double pi3 = 1.0471975511965977461542144610932; // pi/3
+
+ double t = -(((double)((mTaps>>1)-1) + offset) * mScale);
+
+ for(unsigned i=0; i<mTaps; ++i) {
+ double v = 0;
+
+ if (fabs(t) < 3.0)
+ v = sinc(pi*t) * sinc(pi3*t);
+
+ *dst++ = (float)v;
+ t += mScale;
+ }
+}
+
+void VDResamplerLanczos3Filter::GenerateFilterBank(float *dst) const {
+ for(int offset=0; offset<256; ++offset) {
+ GenerateFilter(dst, offset * (1.0f / 256.0f));
+ dst += mTaps;
+ }
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/resample_stages.cpp b/src/thirdparty/VirtualDub/Kasumi/source/resample_stages.cpp
new file mode 100644
index 000000000..fcea6c669
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/resample_stages.cpp
@@ -0,0 +1,149 @@
+#include <vd2/system/math.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/Kasumi/resample_kernels.h>
+#include "resample_stages.h"
+
+VDSteppedAllocator::VDSteppedAllocator(size_t initialSize)
+ : mpHead(NULL)
+ , mpAllocNext(NULL)
+ , mAllocLeft(0)
+ , mAllocNext(initialSize)
+ , mAllocInit(initialSize)
+{
+}
+
+VDSteppedAllocator::~VDSteppedAllocator() {
+ clear();
+}
+
+void VDSteppedAllocator::clear() {
+ while(Block *p = mpHead) {
+ mpHead = mpHead->next;
+ free(p);
+ }
+ mAllocLeft = 0;
+ mAllocNext = mAllocInit;
+}
+
+void *VDSteppedAllocator::allocate(size_type n) {
+ n = (n+15) & ~15;
+ if (mAllocLeft < n) {
+ mAllocLeft = mAllocNext;
+ mAllocNext += (mAllocNext >> 1);
+ if (mAllocLeft < n)
+ mAllocLeft = n;
+
+ Block *t = (Block *)malloc(sizeof(Block) + mAllocLeft);
+
+ if (mpHead)
+ mpHead->next = t;
+
+ mpHead = t;
+ mpHead->next = NULL;
+
+ mpAllocNext = (char *)(mpHead + 1);
+ }
+
+ void *p = mpAllocNext;
+ mpAllocNext += n;
+ mAllocLeft -= n;
+ return p;
+}
+
+void VDResamplerGenerateTable(sint32 *dst, const IVDResamplerFilter& filter) {
+ const unsigned width = filter.GetFilterWidth();
+ vdblock<float> filters(width * 256);
+ float *src = filters.data();
+
+ filter.GenerateFilterBank(src);
+
+ for(unsigned phase=0; phase < 256; ++phase) {
+ float sum = 0;
+
+ for(unsigned i=0; i<width; ++i)
+ sum += src[i];
+
+ float scalefac = 16384.0f / sum;
+
+ for(unsigned j=0; j<width; j += 2) {
+ int v0 = VDRoundToIntFast(src[j+0] * scalefac);
+ int v1 = VDRoundToIntFast(src[j+1] * scalefac);
+
+ dst[j+0] = v0;
+ dst[j+1] = v1;
+ }
+
+ src += width;
+ dst += width;
+ }
+}
+
+void VDResamplerGenerateTableF(float *dst, const IVDResamplerFilter& filter) {
+ const unsigned width = filter.GetFilterWidth();
+ filter.GenerateFilterBank(dst);
+
+ for(unsigned phase=0; phase < 256; ++phase) {
+ float sum = 0;
+
+ for(unsigned i=0; i<width; ++i)
+ sum += dst[i];
+
+ float scalefac = 1.0f / sum;
+
+ for(unsigned j=0; j<width; ++j)
+ *dst++ *= scalefac;
+ }
+}
+
+void VDResamplerGenerateTable2(sint32 *dst, const IVDResamplerFilter& filter, sint32 count, sint32 u0, sint32 dudx) {
+ const unsigned width = filter.GetFilterWidth();
+ vdblock<float> filters(width);
+ float *src = filters.data();
+
+ filter.GenerateFilterBank(src);
+
+ for(sint32 i=0; i<count; ++i) {
+ sint32 u = u0 + dudx*i;
+
+ *dst++ = u >> 16;
+ filter.GenerateFilter(src, (double)(u & 0xffff) / 65536.0);
+
+ float sum = 0;
+ for(uint32 j=0; j<width; ++j)
+ sum += src[j];
+
+ float scalefac = 16384.0f / sum;
+
+ sint32 isum = 0;
+ for(uint32 j=0; j<width; ++j) {
+ sint32 v = VDRoundToIntFast(src[j] * scalefac);
+
+ dst[j] = v;
+ isum += v;
+ }
+
+ sint32 ierr = 16384 - isum;
+ sint32 idelta = 2*(ierr >> 31) - 1;
+ while(ierr) {
+ for(uint32 j=0; j<width && ierr; ++j) {
+ if (!dst[j])
+ continue;
+
+ dst[j] += idelta;
+ ierr -= idelta;
+ }
+ }
+
+ dst += width;
+ }
+}
+
+void VDResamplerSwizzleTable(sint32 *dst, unsigned pairs) {
+ do {
+ sint32 v0 = dst[0];
+ sint32 v1 = dst[1];
+
+ dst[0] = dst[1] = (v0 & 0xffff) + (v1<<16);
+ dst += 2;
+ } while(--pairs);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_reference.cpp b/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_reference.cpp
new file mode 100644
index 000000000..94bee7c9e
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_reference.cpp
@@ -0,0 +1,425 @@
+#include <vd2/system/memory.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include "resample_stages_reference.h"
+#include <vd2/Kasumi/resample_kernels.h>
+#include "blt_spanutils.h"
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+int VDResamplerRowStageSeparablePoint8::GetWindowSize() const {
+ return 1;
+}
+
+void VDResamplerRowStageSeparablePoint8::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *src = (const uint8 *)src0;
+
+ do {
+ *dst++ = src[u>>16];
+ u += dudx;
+ } while(--w);
+}
+
+int VDResamplerRowStageSeparablePoint16::GetWindowSize() const {
+ return 1;
+}
+
+void VDResamplerRowStageSeparablePoint16::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+ uint16 *dst = (uint16 *)dst0;
+ const uint16 *src = (const uint16 *)src0;
+
+ do {
+ *dst++ = src[u>>16];
+ u += dudx;
+ } while(--w);
+}
+
+int VDResamplerRowStageSeparablePoint32::GetWindowSize() const {
+ return 1;
+}
+
+void VDResamplerRowStageSeparablePoint32::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint32 *src = (const uint32 *)src0;
+
+ do {
+ *dst++ = src[u>>16];
+ u += dudx;
+ } while(--w);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+int VDResamplerRowStageSeparableLinear8::GetWindowSize() const {return 2;}
+void VDResamplerRowStageSeparableLinear8::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *src = (const uint8 *)src0;
+
+ do {
+ const sint32 iu = u>>16;
+ const uint32 p0 = src[iu];
+ const uint32 p1 = src[iu+1];
+ const uint32 f = (u >> 8) & 0xff;
+
+ *dst++ = (uint8)(p0 + (((sint32)(p1 - p0)*f + 0x80)>>8));
+ u += dudx;
+ } while(--w);
+}
+
+void VDResamplerRowStageSeparableLinear8_phaseZeroStepHalf::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *src = (const uint8 *)src0;
+
+ VDASSERT(!u && dudx == 0x8000);
+
+ nsVDPixmapSpanUtils::horiz_expand2x_coaligned(dst, src, w);
+}
+
+int VDResamplerRowStageSeparableLinear32::GetWindowSize() const {return 2;}
+void VDResamplerRowStageSeparableLinear32::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint32 *src = (const uint32 *)src0;
+
+ do {
+ const sint32 iu = u>>16;
+ const uint32 p0 = src[iu];
+ const uint32 p1 = src[iu+1];
+ const uint32 f = (u >> 8) & 0xff;
+
+ const uint32 p0_rb = p0 & 0xff00ff;
+ const uint32 p1_rb = p1 & 0xff00ff;
+ const uint32 p0_g = p0 & 0xff00;
+ const uint32 p1_g = p1 & 0xff00;
+
+ *dst++ = ((p0_rb + (((p1_rb - p0_rb)*f + 0x800080)>>8)) & 0xff00ff)
+ + ((p0_g + (((p1_g - p0_g )*f + 0x008000)>>8)) & 0x00ff00);
+ u += dudx;
+ } while(--w);
+}
+
+int VDResamplerColStageSeparableLinear8::GetWindowSize() const {return 2;}
+void VDResamplerColStageSeparableLinear8::Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *src0 = (const uint8 *)srcarray[0];
+ const uint8 *src1 = (const uint8 *)srcarray[1];
+ const uint32 f = (phase >> 8) & 0xff;
+
+ do {
+ const uint32 p0 = *src0++;
+ const uint32 p1 = *src1++;
+
+ *dst++ = (uint8)(p0 + (((p1 - p0)*f + 0x80)>>8));
+ } while(--w);
+}
+
+int VDResamplerColStageSeparableLinear32::GetWindowSize() const {return 2;}
+void VDResamplerColStageSeparableLinear32::Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint32 *src0 = (const uint32 *)srcarray[0];
+ const uint32 *src1 = (const uint32 *)srcarray[1];
+ const uint32 f = (phase >> 8) & 0xff;
+
+ do {
+ const uint32 p0 = *src0++;
+ const uint32 p1 = *src1++;
+
+ const uint32 p0_rb = p0 & 0xff00ff;
+ const uint32 p1_rb = p1 & 0xff00ff;
+ const uint32 p0_g = p0 & 0xff00;
+ const uint32 p1_g = p1 & 0xff00;
+
+ *dst++ = ((p0_rb + (((p1_rb - p0_rb)*f + 0x800080)>>8)) & 0xff00ff)
+ + ((p0_g + (((p1_g - p0_g )*f + 0x008000)>>8)) & 0x00ff00);
+ } while(--w);
+}
+
+VDResamplerRowStageSeparableTable8::VDResamplerRowStageSeparableTable8(const IVDResamplerFilter& filter) {
+ mFilterBank.resize(filter.GetFilterWidth() * 256);
+ VDResamplerGenerateTable(mFilterBank.data(), filter);
+}
+
+int VDResamplerRowStageSeparableTable8::GetWindowSize() const {return (int)mFilterBank.size() >> 8;}
+
+void VDResamplerRowStageSeparableTable8::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *src = (const uint8 *)src0;
+ const unsigned ksize = (int)mFilterBank.size() >> 8;
+ const sint32 *filterBase = mFilterBank.data();
+
+ do {
+ const uint8 *src2 = src + (u>>16);
+ const sint32 *filter = filterBase + ksize*((u>>8)&0xff);
+ u += dudx;
+
+ int b = 0x2000;
+ for(unsigned i = ksize; i; --i) {
+ uint8 p = *src2++;
+ sint32 coeff = *filter++;
+
+ b += (sint32)p*coeff;
+ }
+
+ b >>= 14;
+
+ if ((uint32)b >= 0x00000100)
+ b = ~b >> 31;
+
+ *dst++ = (uint8)b;
+ } while(--w);
+}
+
+VDResamplerRowStageSeparableTable32::VDResamplerRowStageSeparableTable32(const IVDResamplerFilter& filter) {
+ mFilterBank.resize(filter.GetFilterWidth() * 256);
+ VDResamplerGenerateTable(mFilterBank.data(), filter);
+}
+
+int VDResamplerRowStageSeparableTable32::GetWindowSize() const {return (int)mFilterBank.size() >> 8;}
+
+void VDResamplerRowStageSeparableTable32::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint32 *src = (const uint32 *)src0;
+ const unsigned ksize = (int)mFilterBank.size() >> 8;
+ const sint32 *filterBase = mFilterBank.data();
+
+ do {
+ const uint32 *src2 = src + (u>>16);
+ const sint32 *filter = filterBase + ksize*((u>>8)&0xff);
+ u += dudx;
+
+ int r = 0x2000, g = 0x2000, b = 0x2000;
+ for(unsigned i = ksize; i; --i) {
+ uint32 p = *src2++;
+ sint32 coeff = *filter++;
+
+ r += ((p>>16)&0xff)*coeff;
+ g += ((p>> 8)&0xff)*coeff;
+ b += ((p )&0xff)*coeff;
+ }
+
+ r <<= 2;
+ g >>= 6;
+ b >>= 14;
+
+ if ((uint32)r >= 0x01000000)
+ r = ~r >> 31;
+ if ((uint32)g >= 0x00010000)
+ g = ~g >> 31;
+ if ((uint32)b >= 0x00000100)
+ b = ~b >> 31;
+
+ *dst++ = (r & 0xff0000) + (g & 0xff00) + (b & 0xff);
+ } while(--w);
+}
+
+VDResamplerRowStageSeparableTable32Fx4::VDResamplerRowStageSeparableTable32Fx4(const IVDResamplerFilter& filter) {
+ mFilterBank.resize(filter.GetFilterWidth() * 256);
+ VDResamplerGenerateTableF(mFilterBank.data(), filter);
+}
+
+int VDResamplerRowStageSeparableTable32Fx4::GetWindowSize() const {return (int)mFilterBank.size() >> 8;}
+
+void VDResamplerRowStageSeparableTable32Fx4::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+ float *dst = (float *)dst0;
+ const float *src = (const float *)src0;
+ const unsigned ksize = (int)mFilterBank.size() >> 8;
+ const float *filterBase = mFilterBank.data();
+
+ do {
+ const float *src2 = src + (u>>16)*4;
+ const float *filter = filterBase + ksize*((u>>8)&0xff);
+ u += dudx;
+
+ float r = 0, g = 0, b = 0, a = 0;
+ for(unsigned i = ksize; i; --i) {
+ float coeff = *filter++;
+
+ r += coeff * src2[0];
+ g += coeff * src2[1];
+ b += coeff * src2[2];
+ a += coeff * src2[3];
+ src2 += 4;
+ }
+
+ dst[0] = r;
+ dst[1] = g;
+ dst[2] = b;
+ dst[3] = a;
+ dst += 4;
+ } while(--w);
+}
+
+VDResamplerRowStageSeparableTable32F::VDResamplerRowStageSeparableTable32F(const IVDResamplerFilter& filter) {
+ mFilterBank.resize(filter.GetFilterWidth() * 256);
+ VDResamplerGenerateTableF(mFilterBank.data(), filter);
+}
+
+int VDResamplerRowStageSeparableTable32F::GetWindowSize() const {return (int)mFilterBank.size() >> 8;}
+
+void VDResamplerRowStageSeparableTable32F::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+ float *dst = (float *)dst0;
+ const float *src = (const float *)src0;
+ const unsigned ksize = (int)mFilterBank.size() >> 8;
+ const float *filterBase = mFilterBank.data();
+
+ VDCPUCleanupExtensions();
+
+ do {
+ const float *src2 = src + (u>>16);
+ const float *filter = filterBase + ksize*((u>>8)&0xff);
+ u += dudx;
+
+ float r = 0;
+ for(unsigned i = ksize; i; --i) {
+ float coeff = *filter++;
+
+ r += coeff * src2[0];
+ ++src2;
+ }
+
+ dst[0] = r;
+ ++dst;
+ } while(--w);
+}
+
+VDResamplerColStageSeparableTable8::VDResamplerColStageSeparableTable8(const IVDResamplerFilter& filter) {
+ mFilterBank.resize(filter.GetFilterWidth() * 256);
+ VDResamplerGenerateTable(mFilterBank.data(), filter);
+}
+
+int VDResamplerColStageSeparableTable8::GetWindowSize() const {return (int)mFilterBank.size() >> 8;}
+
+void VDResamplerColStageSeparableTable8::Process(void *dst0, const void *const *src0, uint32 w, sint32 phase) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *const *src = (const uint8 *const *)src0;
+ const unsigned ksize = (unsigned)mFilterBank.size() >> 8;
+ const sint32 *filter = &mFilterBank[((phase>>8)&0xff) * ksize];
+
+ for(uint32 i=0; i<w; ++i) {
+ int b = 0x2000;
+ const sint32 *filter2 = filter;
+ const uint8 *const *src2 = src;
+
+ for(unsigned j = ksize; j; --j) {
+ sint32 p = (*src2++)[i];
+ sint32 coeff = *filter2++;
+
+ b += p*coeff;
+ }
+
+ b >>= 14;
+
+ if ((uint32)b >= 0x00000100)
+ b = ~b >> 31;
+
+ *dst++ = (uint8)b;
+ }
+}
+
+VDResamplerColStageSeparableTable32::VDResamplerColStageSeparableTable32(const IVDResamplerFilter& filter) {
+ mFilterBank.resize(filter.GetFilterWidth() * 256);
+ VDResamplerGenerateTable(mFilterBank.data(), filter);
+}
+
+int VDResamplerColStageSeparableTable32::GetWindowSize() const {return (int)mFilterBank.size() >> 8;}
+
+void VDResamplerColStageSeparableTable32::Process(void *dst0, const void *const *src0, uint32 w, sint32 phase) {
+ uint32 *dst = (uint32 *)dst0;
+ const uint32 *const *src = (const uint32 *const *)src0;
+ const unsigned ksize = (unsigned)mFilterBank.size() >> 8;
+ const sint32 *filter = &mFilterBank[((phase>>8)&0xff) * ksize];
+
+ for(uint32 i=0; i<w; ++i) {
+ int r = 0x2000, g = 0x2000, b = 0x2000;
+ const sint32 *filter2 = filter;
+ const uint32 *const *src2 = src;
+
+ for(unsigned j = ksize; j; --j) {
+ uint32 p = (*src2++)[i];
+ sint32 coeff = *filter2++;
+
+ r += ((p>>16)&0xff)*coeff;
+ g += ((p>> 8)&0xff)*coeff;
+ b += ((p )&0xff)*coeff;
+ }
+
+ r <<= 2;
+ g >>= 6;
+ b >>= 14;
+
+ if ((uint32)r >= 0x01000000)
+ r = ~r >> 31;
+ if ((uint32)g >= 0x00010000)
+ g = ~g >> 31;
+ if ((uint32)b >= 0x00000100)
+ b = ~b >> 31;
+
+ *dst++ = (r & 0xff0000) + (g & 0xff00) + (b & 0xff);
+ }
+}
+
+VDResamplerColStageSeparableTable32F::VDResamplerColStageSeparableTable32F(const IVDResamplerFilter& filter) {
+ mFilterBank.resize(filter.GetFilterWidth() * 256);
+ VDResamplerGenerateTableF(mFilterBank.data(), filter);
+}
+
+int VDResamplerColStageSeparableTable32F::GetWindowSize() const {return (int)mFilterBank.size() >> 8;}
+
+void VDResamplerColStageSeparableTable32F::Process(void *dst0, const void *const *src0, uint32 w, sint32 phase) {
+ float *dst = (float *)dst0;
+ const float *const *src = (const float *const *)src0;
+ const unsigned ksize = (unsigned)mFilterBank.size() >> 8;
+ const float *filter = &mFilterBank[((phase>>8)&0xff) * ksize];
+
+ for(uint32 i=0; i<w; ++i) {
+ float r = 0;
+ const float *filter2 = filter;
+ const float *const *src2 = src;
+
+ for(unsigned j = ksize; j; --j) {
+ const float *p = (*src2++) + i;
+ float coeff = *filter2++;
+
+ r += p[0]*coeff;
+ }
+
+ dst[0] = r;
+ ++dst;
+ }
+}
+
+VDResamplerColStageSeparableTable32Fx4::VDResamplerColStageSeparableTable32Fx4(const IVDResamplerFilter& filter) {
+ mFilterBank.resize(filter.GetFilterWidth() * 256);
+ VDResamplerGenerateTableF(mFilterBank.data(), filter);
+}
+
+int VDResamplerColStageSeparableTable32Fx4::GetWindowSize() const {return (int)mFilterBank.size() >> 8;}
+
+void VDResamplerColStageSeparableTable32Fx4::Process(void *dst0, const void *const *src0, uint32 w, sint32 phase) {
+ float *dst = (float *)dst0;
+ const float *const *src = (const float *const *)src0;
+ const unsigned ksize = (unsigned)mFilterBank.size() >> 8;
+ const float *filter = &mFilterBank[((phase>>8)&0xff) * ksize];
+
+ for(uint32 i=0; i<w; ++i) {
+ float r = 0, g = 0, b = 0, a = 0;
+ const float *filter2 = filter;
+ const float *const *src2 = src;
+
+ for(unsigned j = ksize; j; --j) {
+ const float *p = (*src2++) + i*4;
+ float coeff = *filter2++;
+
+ r += p[0]*coeff;
+ g += p[1]*coeff;
+ b += p[2]*coeff;
+ a += p[3]*coeff;
+ }
+
+ dst[0] = r;
+ dst[1] = g;
+ dst[2] = b;
+ dst[3] = a;
+ dst += 4;
+ }
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_x64.cpp b/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_x64.cpp
new file mode 100644
index 000000000..a206d37d8
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_x64.cpp
@@ -0,0 +1,26 @@
+#include "resample_stages_x64.h"
+
+extern "C" long vdasm_resize_table_col_SSE2(uint32 *out, const uint32 *const*in_table, const int *filter, int filter_width, uint32 w);
+extern "C" long vdasm_resize_table_row_SSE2(uint32 *out, const uint32 *in, const int *filter, int filter_width, uint32 w, long accum, long frac);
+
+VDResamplerSeparableTableRowStageSSE2::VDResamplerSeparableTableRowStageSSE2(const IVDResamplerFilter& filter)
+ : VDResamplerRowStageSeparableTable32(filter)
+{
+ VDResamplerSwizzleTable(mFilterBank.data(), (uint32)mFilterBank.size() >> 1);
+}
+
+void VDResamplerSeparableTableRowStageSSE2::Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx) {
+ vdasm_resize_table_row_SSE2((uint32 *)dst, (const uint32 *)src, (const int *)mFilterBank.data(), (int)mFilterBank.size() >> 8, w, u, dudx);
+}
+
+VDResamplerSeparableTableColStageSSE2::VDResamplerSeparableTableColStageSSE2(const IVDResamplerFilter& filter)
+ : VDResamplerColStageSeparableTable32(filter)
+{
+ VDResamplerSwizzleTable(mFilterBank.data(), (uint32)mFilterBank.size() >> 1);
+}
+
+void VDResamplerSeparableTableColStageSSE2::Process(void *dst, const void *const *src, uint32 w, sint32 phase) {
+ const unsigned filtSize = (unsigned)mFilterBank.size() >> 8;
+
+ vdasm_resize_table_col_SSE2((uint32*)dst, (const uint32 *const *)src, (const int *)mFilterBank.data() + filtSize*((phase >> 8) & 0xff), filtSize, w);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_x86.cpp b/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_x86.cpp
new file mode 100644
index 000000000..bc4db574f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/resample_stages_x86.cpp
@@ -0,0 +1,1277 @@
+#include <numeric>
+#include "blt_spanutils_x86.h"
+#include "resample_stages_x86.h"
+#include <vd2/Kasumi/resample_kernels.h>
+
+#ifdef _MSC_VER
+ #pragma warning(disable: 4799) // warning C4799: function 'vdasm_resize_table_row_8_k8_4x_MMX' has no EMMS instruction
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+
+extern "C" void vdasm_resize_table_row_8_k8_4x_SSE41(void *dst, const void *src, uint32 width, const void *kernel);
+extern "C" void vdasm_resize_table_row_8_k16_4x_SSE41(void *dst, const void *src, uint32 width, const void *kernel);
+extern "C" void vdasm_resize_table_row_8_SSE41(void *dst, const void *src, uint32 width, const void *kernel, uint32 kwidth);
+extern "C" void vdasm_resize_table_col_8_k2_SSE41(void *dst, const void *const *srcs, uint32 width, const void *kernel);
+extern "C" void vdasm_resize_table_col_8_k4_SSE41(void *dst, const void *const *srcs, uint32 width, const void *kernel);
+
+///////////////////////////////////////////////////////////////////////////////
+
+namespace {
+ struct ScaleInfo {
+ void *dst;
+ uintptr src;
+ uint32 accum;
+ uint32 fracinc;
+ sint32 intinc;
+ uint32 count;
+ };
+
+ extern "C" void vdasm_resize_point32(const ScaleInfo *);
+}
+
+int VDResamplerSeparablePointRowStageX86::GetWindowSize() const {return 1;}
+void VDResamplerSeparablePointRowStageX86::Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx) {
+ ScaleInfo info;
+
+ info.dst = (uint32 *)dst + w;
+ info.src = ((uintptr)src >> 2) + (u>>16);
+ info.accum = u<<16;
+ info.fracinc = dudx << 16;
+ info.intinc = (sint32)dudx >> 16;
+ info.count = -(sint32)w*4;
+
+ vdasm_resize_point32(&info);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDResamplerRowStageSeparableLinear8_phaseZeroStepHalf_ISSE::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *src = (const uint8 *)src0;
+
+ nsVDPixmapSpanUtils::horiz_expand2x_coaligned_ISSE(dst, src, w);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+extern "C" void vdasm_resize_point32_MMX(const ScaleInfo *);
+extern "C" void vdasm_resize_interp_row_run_MMX(void *dst, const void *src, uint32 width, sint64 xaccum, sint64 x_inc);
+extern "C" void vdasm_resize_interp_col_run_MMX(void *dst, const void *src1, const void *src2, uint32 width, uint32 yaccum);
+extern "C" void vdasm_resize_ccint_row_MMX(void *dst, const void *src, uint32 count, uint32 xaccum, sint32 xinc, const void *tbl);
+extern "C" void vdasm_resize_ccint_col_MMX(void *dst, const void *src1, const void *src2, const void *src3, const void *src4, uint32 count, const void *tbl);
+extern "C" long vdasm_resize_table_col_MMX(uint32 *out, const uint32 *const*in_table, const int *filter, int filter_width, uint32 w, long frac);
+extern "C" long vdasm_resize_table_row_MMX(uint32 *out, const uint32 *in, const int *filter, int filter_width, uint32 w, long accum, long frac);
+
+int VDResamplerSeparablePointRowStageMMX::GetWindowSize() const {return 1;}
+void VDResamplerSeparablePointRowStageMMX::Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx) {
+ ScaleInfo info;
+
+ info.dst = (uint32 *)dst + w;
+ info.src = ((uintptr)src >> 2) + (u>>16);
+ info.accum = u<<16;
+ info.fracinc = dudx << 16;
+ info.intinc = (sint32)dudx >> 16;
+ info.count = -(sint32)w*4;
+
+ vdasm_resize_point32_MMX(&info);
+}
+
+int VDResamplerSeparableLinearRowStageMMX::GetWindowSize() const {return 2;}
+void VDResamplerSeparableLinearRowStageMMX::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+ vdasm_resize_interp_row_run_MMX(dst0, src0, w, (sint64)u << 16, (sint64)dudx << 16);
+}
+
+int VDResamplerSeparableLinearColStageMMX::GetWindowSize() const {return 2;}
+void VDResamplerSeparableLinearColStageMMX::Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase) {
+ vdasm_resize_interp_col_run_MMX(dst0, srcarray[0], srcarray[1], w, phase);
+}
+
+VDResamplerSeparableCubicRowStageMMX::VDResamplerSeparableCubicRowStageMMX(double A)
+ : mFilterBank(1024)
+{
+ sint32 *p = mFilterBank.data();
+ VDResamplerGenerateTable(p, VDResamplerCubicFilter(1.0, A));
+ VDResamplerSwizzleTable(p, 512);
+}
+
+int VDResamplerSeparableCubicRowStageMMX::GetWindowSize() const {return 4;}
+void VDResamplerSeparableCubicRowStageMMX::Process(void *dst0, const void *src0, uint32 w, uint32 u, uint32 dudx) {
+ vdasm_resize_ccint_row_MMX(dst0, src0, w, u, dudx, mFilterBank.data());
+}
+
+VDResamplerSeparableCubicColStageMMX::VDResamplerSeparableCubicColStageMMX(double A)
+ : mFilterBank(1024)
+{
+ sint32 *p = mFilterBank.data();
+ VDResamplerGenerateTable(p, VDResamplerCubicFilter(1.0, A));
+ VDResamplerSwizzleTable(p, 512);
+}
+
+int VDResamplerSeparableCubicColStageMMX::GetWindowSize() const {return 4;}
+void VDResamplerSeparableCubicColStageMMX::Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase) {
+ vdasm_resize_ccint_col_MMX(dst0, srcarray[0], srcarray[1], srcarray[2], srcarray[3], w, mFilterBank.data() + ((phase>>6)&0x3fc));
+}
+
+VDResamplerSeparableTableRowStage8MMX::VDResamplerSeparableTableRowStage8MMX(const IVDResamplerFilter& filter)
+ : VDResamplerRowStageSeparableTable32(filter)
+ , mLastSrcWidth(0)
+ , mLastDstWidth(0)
+ , mLastU(0)
+ , mLastDUDX(0)
+{
+ mAlignedKernelWidth = (GetWindowSize() + 6) & ~3;
+ mAlignedKernelSize = mAlignedKernelWidth + 4;
+}
+
+void VDResamplerSeparableTableRowStage8MMX::Init(const VDResamplerAxis& axis, uint32 srcw) {
+ uint32 w = axis.dx_preclip + axis.dx_active + axis.dx_postclip + axis.dx_dualclip;
+
+ if (mLastSrcWidth != srcw || mLastDstWidth != w || mLastU != axis.u || mLastDUDX != axis.dudx) {
+ mLastSrcWidth = srcw;
+ mLastDstWidth = w;
+ mLastU = axis.u;
+ mLastDUDX = axis.dudx;
+
+ RedoRowFilters(axis, w, srcw);
+ }
+}
+
+void VDResamplerSeparableTableRowStage8MMX::RedoRowFilters(const VDResamplerAxis& axis, uint32 w, uint32 srcw) {
+ int kstride = mFilterBank.size() >> 8;
+ int ksize = mAlignedKernelWidth;
+ int kesize = mAlignedKernelSize;
+
+ mRowKernels.clear();
+ mRowKernelSize = w * kesize;
+
+ mRowKernels.resize(mRowKernelSize * 4, 0);
+
+ for(int byteOffset = 0; byteOffset < 4; ++byteOffset) {
+ sint16 *dst = mRowKernels.data() + mRowKernelSize * byteOffset;
+ int ksizeThisOffset = std::min<int>(ksize, (byteOffset + srcw + 3) & ~3);
+
+ mKernelSizeByOffset[byteOffset] = ksizeThisOffset;
+
+ sint32 u = axis.u;
+ sint32 uoffmin = -byteOffset;
+ sint32 uoffmax = ((srcw + byteOffset + 3) & ~3) - byteOffset - ksizeThisOffset;
+ for(uint32 i=0; i<w; ++i) {
+ sint32 uoffset = u >> 16;
+ sint32 uoffset2 = ((uoffset + byteOffset) & ~3) - byteOffset;
+
+ if (uoffset2 < uoffmin)
+ uoffset2 = uoffmin;
+
+ if (uoffset2 > uoffmax)
+ uoffset2 = uoffmax;
+
+ VDASSERT(uoffset2 + ksizeThisOffset <= (((sint32)srcw + byteOffset + 3) & ~3));
+
+ *(sint32 *)dst = uoffset2;
+ dst += 2;
+ *dst++ = 0;
+ *dst++ = 0;
+
+ uint32 phase = (u >> 8) & 255;
+ const sint32 *src = &mFilterBank[kstride * phase];
+
+ sint32 start = 0;
+ sint32 end = kstride;
+
+ int dstoffset = uoffset - uoffset2;
+
+ // check for filter kernel overlapping left source boundary
+ if (uoffset < 0)
+ start = -uoffset;
+
+ // check for filter kernel overlapping right source boundary
+ if (uoffset + end > (sint32)srcw)
+ end = srcw - uoffset;
+
+ VDASSERT(dstoffset + start >= 0);
+ VDASSERT(dstoffset + end <= ksizeThisOffset);
+
+ sint16 *dst2 = dst + dstoffset;
+ dst += ksizeThisOffset;
+
+ for(int j=start; j<end; ++j)
+ dst2[j] = src[j];
+
+ if (start > 0)
+ dst2[start] = std::accumulate(src, src+start, dst2[start]);
+
+ if (end < kstride)
+ dst2[end - 1] = std::accumulate(src+end, src+kstride, dst2[end - 1]);
+
+ u += axis.dudx;
+ }
+ }
+
+ // swizzle rows where optimization is possible
+ vdfastvector<sint16> temp;
+
+ int quads = w >> 2;
+ int quadRemainder = w & 3;
+
+ for(int byteOffset = 0; byteOffset < 4; ++byteOffset) {
+ int ksizeThisOffset = mKernelSizeByOffset[byteOffset];
+ int kpairs = ksizeThisOffset >> 2;
+
+ if (ksizeThisOffset < 8 || ksizeThisOffset > 12) {
+ mbQuadOptimizationEnabled[byteOffset] = false;
+ } else {
+ ptrdiff_t unswizzledStride = (ksizeThisOffset >> 1) + 2;
+
+ mbQuadOptimizationEnabled[byteOffset] = true;
+ mTailOffset[byteOffset] = quads * (8 + ksizeThisOffset*4);
+
+ uint32 *dst = (uint32 *)&mRowKernels[mRowKernelSize * byteOffset];
+ temp.resize(mRowKernelSize);
+ memcpy(temp.data(), dst, mRowKernelSize*2);
+
+ const uint32 *src0 = (const uint32 *)temp.data();
+ const uint32 *src1 = src0 + unswizzledStride;
+ const uint32 *src2 = src1 + unswizzledStride;
+ const uint32 *src3 = src2 + unswizzledStride;
+ ptrdiff_t srcskip = unswizzledStride * 3;
+
+ for(int q = 0; q < quads; ++q) {
+ dst[0] = src0[0];
+ dst[1] = src1[0];
+ dst[2] = src2[0];
+ dst[3] = src3[0];
+ src0 += 2;
+ src1 += 2;
+ src2 += 2;
+ src3 += 2;
+ dst += 4;
+
+ for(int p = 0; p < kpairs; ++p) {
+ dst[0] = src0[0];
+ dst[1] = src0[1];
+ dst[2] = src1[0];
+ dst[3] = src1[1];
+ dst[4] = src2[0];
+ dst[5] = src2[1];
+ dst[6] = src3[0];
+ dst[7] = src3[1];
+ dst += 8;
+ src0 += 2;
+ src1 += 2;
+ src2 += 2;
+ src3 += 2;
+ }
+
+ src0 += srcskip;
+ src1 += srcskip;
+ src2 += srcskip;
+ src3 += srcskip;
+ }
+
+ memcpy(dst, src0, unswizzledStride * 4 * quadRemainder);
+
+ VDASSERT(dst + unswizzledStride * quadRemainder <= (void *)(mRowKernels.data() + (mRowKernelSize * (byteOffset + 1))));
+ }
+ }
+}
+
+void __declspec(naked) vdasm_resize_table_row_8_k8_4x_MMX(void *dst, const void *src, uint32 width, const void *kernel) {
+ static const __declspec(align(8)) __int64 kRound = 0x0000000000002000;
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ pxor mm7, mm7
+ movq mm6, kRound
+
+ mov ebp, [esp + 4 + 16] ;ebp = dst
+ mov esi, [esp + 12 + 16] ;esi = width
+ mov edi, [esp + 16 + 16] ;edi = kernel
+yloop:
+ ;eax = temp
+ ;ebx = temp
+ ;ecx = temp
+ ;edx = temp
+ ;esi = horiz counter
+ ;edi = filter list
+ ;ebp = destination
+
+ mov eax, [edi+0]
+ mov ebx, [edi+4]
+ mov ecx, [edi+8]
+ mov edx, [esp+8+16]
+ add eax, edx
+ add ebx, edx
+ add ecx, edx
+ add edx, [edi+12]
+
+ movd mm0, [eax]
+ punpcklbw mm0, mm7
+
+ pmaddwd mm0, [edi+16]
+ movd mm1, [ebx]
+ punpcklbw mm1, mm7
+
+ pmaddwd mm1, [edi+24]
+ movd mm2, [ecx]
+ punpcklbw mm2, mm7
+
+ pmaddwd mm2, [edi+32]
+ movd mm3, [edx]
+ punpcklbw mm3, mm7
+
+ pmaddwd mm3, [edi+40]
+ movd mm4, [eax+4]
+ paddd mm0, mm6
+
+ movd mm5, [ebx+4]
+ punpcklbw mm4, mm7
+ paddd mm1, mm6
+
+ pmaddwd mm4, [edi+48]
+ punpcklbw mm5, mm7
+ paddd mm2, mm6
+
+ pmaddwd mm5, [edi+56]
+ paddd mm3, mm6
+ paddd mm0, mm4
+
+ paddd mm1, mm5
+ movd mm4, [ecx+4]
+ punpcklbw mm4, mm7
+
+ pmaddwd mm4, [edi+64]
+ movd mm5, [edx+4]
+ punpcklbw mm5, mm7
+
+ pmaddwd mm5, [edi+72]
+ paddd mm2, mm4
+ paddd mm3, mm5
+
+ movq mm4, mm0
+ punpckldq mm0, mm1
+ movq mm5, mm2
+ punpckldq mm2, mm3
+ punpckhdq mm4, mm1
+ punpckhdq mm5, mm3
+ paddd mm0, mm4
+ paddd mm2, mm5
+ psrad mm0, 14
+ psrad mm2, 14
+
+ packssdw mm0, mm2
+ packuswb mm0, mm0
+
+ add edi, 80
+
+ movd [ebp], mm0
+ add ebp, 4
+ sub esi, 1
+ jne yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ }
+}
+
+void __declspec(naked) vdasm_resize_table_row_8_k12_4x_MMX(void *dst, const void *src, uint32 width, const void *kernel) {
+ static const __declspec(align(8)) __int64 kRound = 0x0000200000002000;
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ pxor mm7, mm7
+ movq mm6, kRound
+
+ mov ebp, [esp + 4 + 16] ;ebp = dst
+ mov esi, [esp + 12 + 16] ;esi = width
+ mov edi, [esp + 16 + 16] ;edi = kernel
+yloop:
+ ;eax = temp
+ ;ebx = temp
+ ;ecx = temp
+ ;edx = temp
+ ;esi = horiz counter
+ ;edi = filter list
+ ;ebp = destination
+
+ mov eax, [edi+0]
+ mov ebx, [edi+4]
+ mov ecx, [edi+8]
+ mov edx, [esp+8+16]
+ add eax, edx
+ add ebx, edx
+ add ecx, edx
+ add edx, [edi+12]
+
+ movd mm0, [eax]
+ punpcklbw mm0, mm7
+
+ pmaddwd mm0, [edi+16]
+ movd mm1, [ebx]
+ punpcklbw mm1, mm7
+
+ pmaddwd mm1, [edi+24]
+ movd mm2, [ecx]
+ punpcklbw mm2, mm7
+
+ pmaddwd mm2, [edi+32]
+ movd mm3, [edx]
+ punpcklbw mm3, mm7
+
+ pmaddwd mm3, [edi+40]
+ movd mm4, [eax+4]
+ punpcklbw mm4, mm7
+
+ pmaddwd mm4, [edi+48]
+ movd mm5, [ebx+4]
+ punpcklbw mm5, mm7
+
+ pmaddwd mm5, [edi+56]
+ paddd mm0, mm4
+ paddd mm1, mm5
+
+ movd mm4, [ecx+4]
+ punpcklbw mm4, mm7
+ movd mm5, [edx+4]
+
+ pmaddwd mm4, [edi+64]
+ punpcklbw mm5, mm7
+ paddd mm2, mm4
+
+ pmaddwd mm5, [edi+72]
+ movd mm4, [eax+8]
+ punpcklbw mm4, mm7
+
+ paddd mm3, mm5
+ movd mm5, [ebx+8]
+ punpcklbw mm5, mm7
+
+ pmaddwd mm4, [edi+80]
+ paddd mm0, mm4
+ movd mm4, [ecx+8]
+
+ pmaddwd mm5, [edi+88]
+ paddd mm1, mm5
+ punpcklbw mm4, mm7
+
+ pmaddwd mm4, [edi+96]
+ movd mm5, [edx+8]
+ punpcklbw mm5, mm7
+
+ pmaddwd mm5, [edi+104]
+ paddd mm2, mm4
+ paddd mm3, mm5
+
+ movq mm4, mm0
+ punpckldq mm0, mm1
+ movq mm5, mm2
+ punpckldq mm2, mm3
+ punpckhdq mm4, mm1
+ punpckhdq mm5, mm3
+ paddd mm0, mm4
+ paddd mm2, mm5
+ paddd mm0, mm6
+ paddd mm2, mm6
+ psrad mm0, 14
+ psrad mm2, 14
+
+ packssdw mm0, mm2
+ packuswb mm0, mm0
+
+ add edi, 112
+
+ movd [ebp], mm0
+ add ebp, 4
+ sub esi, 1
+ jne yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ }
+}
+
+void __declspec(naked) vdasm_resize_table_row_8_MMX(void *dst, const void *src, uint32 width, const void *kernel, uint32 kwidth) {
+ static const __declspec(align(8)) __int64 kRound = 0x0000000000002000;
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ pxor mm7, mm7
+ movq mm6, kRound
+
+ mov edi, [esp + 4 + 16] ;edi = dst
+ mov ebx, [esp + 8 + 16] ;ebx = src
+ mov ebp, [esp + 12 + 16] ;ebp = width
+ mov edx, [esp + 16 + 16] ;edx = kernel
+yloop:
+ ;eax = temp
+ ;ebx = source base address
+ ;ecx = (temp) source
+ ;edx = filter list
+ ;esi = (temp) kernel width
+ ;edi = destination
+ ;ebp = horiz counter
+
+ mov eax, [edx]
+ add edx, 8
+ lea ecx, [ebx + eax]
+ mov esi, [esp + 20 + 16] ;esi = kernel width
+
+ movq mm2, mm6
+xloop:
+ movd mm0, [ecx]
+ punpcklbw mm0, mm7
+ add ecx, 4
+ pmaddwd mm0, [edx]
+ paddd mm2, mm0
+ add edx, 8
+ sub esi, 4
+ jne xloop
+
+ punpckldq mm0, mm2
+ paddd mm0, mm2
+ psrad mm0, 14
+ psrlq mm0, 32
+ packssdw mm0, mm0
+ packuswb mm0, mm0
+ movd eax, mm0
+ mov [edi], al
+ add edi, 1
+ sub ebp, 1
+ jne yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ }
+}
+
+void VDResamplerSeparableTableRowStage8MMX::Process(void *dst, const void *src, uint32 w) {
+ int byteOffset = (int)(ptrdiff_t)src & 3;
+ const sint16 *ksrc = &mRowKernels[mRowKernelSize * byteOffset];
+#if 0
+ int kwidth = mAlignedKernelWidth;
+ uint8 *dst2 = (uint8 *)dst;
+
+ do {
+ int offset = ksrc[0];
+ ksrc += 4;
+
+ const uint8 *src2 = (const uint8 *)src + offset;
+ sint32 accum = 0x8000;
+ for(int i=0; i<kwidth; ++i) {
+ accum += (sint32)src2[i] * (*ksrc++);
+ }
+
+ accum >>= 14;
+
+ accum &= ~(accum >> 31);
+ accum |= (255 - accum) >> 31;
+
+ *dst2++ = (uint8)accum;
+
+ } while(--w);
+#else
+ int ksize = mKernelSizeByOffset[byteOffset];
+ if (mbQuadOptimizationEnabled[byteOffset]) {
+ if (w >= 4) {
+ if (ksize == 12) {
+ vdasm_resize_table_row_8_k12_4x_MMX(dst, src, w >> 2, ksrc);
+
+#if 0
+ int w4 = w >> 2;
+ uint8 *dst2 = (uint8 *)dst;
+ const uint8 *src2 = (const uint8 *)src;
+ const sint16 *ksrc2 = ksrc;
+
+ do {
+ int off0 = ksrc2[0];
+ int off1 = ksrc2[2];
+ int off2 = ksrc2[4];
+ int off3 = ksrc2[6];
+ const uint8 *d0 = src2 + off0;
+ const uint8 *d1 = src2 + off1;
+ const uint8 *d2 = src2 + off2;
+ const uint8 *d3 = src2 + off3;
+
+ int acc0 = 0;
+ int acc1 = 0;
+ int acc2 = 0;
+ int acc3 = 0;
+
+ acc0 += d0[ 0]*ksrc2[ 8]
+ + d0[ 1]*ksrc2[ 9]
+ + d0[ 2]*ksrc2[ 10]
+ + d0[ 3]*ksrc2[ 11]
+ + d0[ 4]*ksrc2[ 24]
+ + d0[ 5]*ksrc2[ 25]
+ + d0[ 6]*ksrc2[ 26]
+ + d0[ 7]*ksrc2[ 27]
+ + d0[ 8]*ksrc2[ 40]
+ + d0[ 9]*ksrc2[ 41]
+ + d0[10]*ksrc2[ 42]
+ + d0[11]*ksrc2[ 43];
+
+ acc0 = (acc0 + 0x2000) >> 14;
+ if (acc0 < 0) acc0 = 0; else if (acc0 > 255) acc0 = 255;
+
+ acc1 += d1[ 0]*ksrc2[ 12]
+ + d1[ 1]*ksrc2[ 13]
+ + d1[ 2]*ksrc2[ 14]
+ + d1[ 3]*ksrc2[ 15]
+ + d1[ 4]*ksrc2[ 28]
+ + d1[ 5]*ksrc2[ 29]
+ + d1[ 6]*ksrc2[ 30]
+ + d1[ 7]*ksrc2[ 31]
+ + d1[ 8]*ksrc2[ 44]
+ + d1[ 9]*ksrc2[ 45]
+ + d1[10]*ksrc2[ 46]
+ + d1[11]*ksrc2[ 47];
+
+ acc1 = (acc1 + 0x2000) >> 14;
+ if (acc1 < 0) acc1 = 0; else if (acc1 > 255) acc1 = 255;
+
+ acc2 += d2[ 0]*ksrc2[ 16]
+ + d2[ 1]*ksrc2[ 17]
+ + d2[ 2]*ksrc2[ 18]
+ + d2[ 3]*ksrc2[ 19]
+ + d2[ 4]*ksrc2[ 32]
+ + d2[ 5]*ksrc2[ 33]
+ + d2[ 6]*ksrc2[ 34]
+ + d2[ 7]*ksrc2[ 35]
+ + d2[ 8]*ksrc2[ 48]
+ + d2[ 9]*ksrc2[ 49]
+ + d2[10]*ksrc2[ 50]
+ + d2[11]*ksrc2[ 51];
+
+ acc2 = (acc2 + 0x2000) >> 14;
+ if (acc2 < 0) acc2 = 0; else if (acc2 > 255) acc2 = 255;
+
+ acc3 += d3[ 0]*ksrc2[ 20]
+ + d3[ 1]*ksrc2[ 21]
+ + d3[ 2]*ksrc2[ 22]
+ + d3[ 3]*ksrc2[ 23]
+ + d3[ 4]*ksrc2[ 36]
+ + d3[ 5]*ksrc2[ 37]
+ + d3[ 6]*ksrc2[ 38]
+ + d3[ 7]*ksrc2[ 39]
+ + d3[ 8]*ksrc2[ 52]
+ + d3[ 9]*ksrc2[ 53]
+ + d3[10]*ksrc2[ 54]
+ + d3[11]*ksrc2[ 55];
+
+ acc3 = (acc3 + 0x2000) >> 14;
+ if (acc3 < 0) acc3 = 0; else if (acc3 > 255) acc3 = 255;
+
+ ksrc2 += 56;
+
+ dst2[0] = (uint8)acc0;
+ dst2[1] = (uint8)acc1;
+ dst2[2] = (uint8)acc2;
+ dst2[3] = (uint8)acc3;
+ dst2 += 4;
+ } while(--w4);
+#endif
+ } else
+ vdasm_resize_table_row_8_k8_4x_MMX(dst, src, w >> 2, ksrc);
+ }
+
+ if (w & 3)
+ vdasm_resize_table_row_8_MMX((char *)dst + (w & ~3), src, w & 3, ksrc + mTailOffset[byteOffset], ksize);
+ } else {
+ vdasm_resize_table_row_8_MMX(dst, src, w, ksrc, ksize);
+ }
+#endif
+}
+
+void VDResamplerSeparableTableRowStage8MMX::Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx) {
+ vdasm_resize_table_row_MMX((uint32 *)dst, (const uint32 *)src, (const int *)mFilterBank.data(), (int)mFilterBank.size() >> 8, w, u, dudx);
+}
+
+VDResamplerSeparableTableRowStageMMX::VDResamplerSeparableTableRowStageMMX(const IVDResamplerFilter& filter)
+ : VDResamplerRowStageSeparableTable32(filter)
+{
+ VDResamplerSwizzleTable(mFilterBank.data(), (unsigned)mFilterBank.size() >> 1);
+}
+
+void VDResamplerSeparableTableRowStageMMX::Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx) {
+ vdasm_resize_table_row_MMX((uint32 *)dst, (const uint32 *)src, (const int *)mFilterBank.data(), (int)mFilterBank.size() >> 8, w, u, dudx);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDResamplerSeparableTableColStage8MMX::VDResamplerSeparableTableColStage8MMX(const IVDResamplerFilter& filter)
+ : VDResamplerColStageSeparableTable8(filter)
+{
+ VDResamplerSwizzleTable(mFilterBank.data(), (unsigned)mFilterBank.size() >> 1);
+}
+
+void __declspec(naked) vdasm_resize_table_col_8_k2_MMX(void *dst, const void *const *srcs, uint32 width, const void *kernel) {
+ static const __declspec(align(8)) __int64 kRound = 0x0000200000002000;
+
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ pxor mm7, mm7
+ movq mm6, kRound
+
+ mov esi, [esp + 4 + 16] ;esi = dst
+ mov edi, [esp + 16 + 16] ;edi = kernel
+ mov ebp, [esp + 12 + 16] ;ebp = width
+
+ movq mm5, [edi]
+
+ mov edx, [esp + 8 + 16] ;ebx = srcs
+ mov eax, [edx+0]
+ mov ebx, [edx+4]
+ add eax, ebp
+ add ebx, ebp
+ neg ebp
+yloop:
+ ;eax = row0
+ ;ebx = row1
+ ;ecx =
+ ;edx =
+ ;edi = kernel
+ ;esi = dest
+ ;ebp = width counter
+
+ movd mm0, [eax+ebp]
+ punpcklbw mm0, mm7
+ movd mm2, [ebx+ebp]
+ punpcklbw mm2, mm7
+ movq mm1, mm0
+ punpcklwd mm0, mm2
+ punpckhwd mm1, mm2
+ pmaddwd mm0, mm5
+ pmaddwd mm1, mm5
+
+ paddd mm0, mm6
+ paddd mm1, mm6
+
+ psrad mm0, 14
+ psrad mm1, 14
+ packssdw mm0, mm1
+ packuswb mm0, mm0
+ movd [esi], mm0
+ add esi, 4
+ add ebp, 4
+ jne yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ }
+}
+
+void __declspec(naked) vdasm_resize_table_col_8_k4_MMX(void *dst, const void *const *srcs, uint32 width, const void *kernel) {
+ static const __declspec(align(8)) __int64 kRound = 0x0000200000002000;
+
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ pxor mm7, mm7
+ movq mm6, kRound
+
+ mov esi, [esp + 4 + 16] ;esi = dst
+ mov edi, [esp + 16 + 16] ;edi = kernel
+ xor ebp, ebp
+
+ mov edx, [esp + 8 + 16] ;ebx = srcs
+ mov eax, [edx+0]
+ mov ebx, [edx+4]
+ mov ecx, [edx+8]
+ mov edx, [edx+12]
+yloop:
+ ;eax = row0
+ ;ebx = row1
+ ;ecx = row2
+ ;edx = row3
+ ;edi = kernel
+ ;esi = dest
+ ;ebp = width counter
+
+ movd mm0, [eax+ebp]
+ punpcklbw mm0, mm7
+ movd mm2, [ebx+ebp]
+ punpcklbw mm2, mm7
+ movq mm1, mm0
+ punpcklwd mm0, mm2
+ movq mm5, [edi]
+ punpckhwd mm1, mm2
+ pmaddwd mm0, mm5
+ pmaddwd mm1, mm5
+
+ paddd mm0, mm6
+ paddd mm1, mm6
+
+ movd mm3, [ecx+ebp]
+ punpcklbw mm3, mm7
+ movd mm2, [edx+ebp]
+ punpcklbw mm2, mm7
+ movq mm4, mm3
+ punpcklwd mm3, mm2
+ movq mm5, [edi+8]
+ punpckhwd mm4, mm2
+ pmaddwd mm3, mm5
+ pmaddwd mm4, mm5
+
+ paddd mm0, mm3
+ paddd mm1, mm4
+
+ psrad mm0, 14
+ psrad mm1, 14
+ packssdw mm0, mm1
+ packuswb mm0, mm0
+ add ebp, 4
+ movd [esi], mm0
+ add esi, 4
+ cmp ebp, [esp + 12 + 16]
+ jb yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ }
+}
+
+void __declspec(naked) vdasm_resize_table_col_8_MMX(void *dst, const void *const *srcs, uint32 width, const void *kernel, uint32 kwidth) {
+ static const __declspec(align(8)) __int64 kRound = 0x0000200000002000;
+
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ pxor mm7, mm7
+ movq mm6, kRound
+
+ mov edi, [esp + 4 + 16] ;edi = dst
+ xor ebp, ebp
+yloop:
+ mov edx, [esp + 16 + 16] ;edx = kernel
+ mov ebx, [esp + 8 + 16] ;ebx = srcs
+ mov esi, [esp + 20 + 16] ;esi = kwidth
+ movq mm3, mm6
+ movq mm4, mm6
+xloop:
+ mov ecx, [ebx]
+ movd mm0, [ecx+ebp]
+ punpcklbw mm0, mm7
+ mov ecx, [ebx+4]
+ movd mm2, [ecx+ebp]
+ punpcklbw mm2, mm7
+ movq mm1, mm0
+ punpcklwd mm0, mm2
+ punpckhwd mm1, mm2
+ movq mm5, [edx]
+ pmaddwd mm0, mm5
+ pmaddwd mm1, mm5
+
+ paddd mm3, mm0
+ paddd mm4, mm1
+ add ebx, 8
+ add edx, 8
+ sub esi, 2
+ jne xloop
+
+ psrad mm3, 14
+ psrad mm4, 14
+ packssdw mm3, mm4
+ packuswb mm3, mm3
+ movd [edi], mm3
+ add edi, 4
+ add ebp, 4
+ cmp ebp, [esp + 12 + 16]
+ jb yloop
+
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ }
+}
+
+void VDResamplerSeparableTableColStage8MMX::Process(void *dst0, const void *const *src0, uint32 w, sint32 phase) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *const *src = (const uint8 *const *)src0;
+ const unsigned ksize = (unsigned)mFilterBank.size() >> 8;
+ const sint16 *filter = (const sint16 *)&mFilterBank[((phase>>8)&0xff) * ksize];
+
+ int w4 = w & ~3;
+
+ if (w4) {
+ switch(ksize) {
+ case 2:
+ vdasm_resize_table_col_8_k2_MMX(dst, (const void *const *)src, w4, filter);
+ break;
+
+ case 4:
+ vdasm_resize_table_col_8_k4_MMX(dst, (const void *const *)src, w4, filter);
+ break;
+
+ default:
+ vdasm_resize_table_col_8_MMX(dst, (const void *const *)src, w4, filter, ksize);
+ break;
+ }
+ }
+
+ for(uint32 i=w4; i<w; ++i) {
+ int b = 0x2000;
+ const sint16 *filter2 = filter;
+ const uint8 *const *src2 = src;
+
+ for(unsigned j = ksize; j; j -= 2) {
+ sint32 p0 = (*src2++)[i];
+ sint32 p1 = (*src2++)[i];
+ sint32 coeff0 = filter2[0];
+ sint32 coeff1 = filter2[1];
+ filter2 += 4;
+
+ b += p0*coeff0;
+ b += p1*coeff1;
+ }
+
+ b >>= 14;
+
+ if ((uint32)b >= 0x00000100)
+ b = ~b >> 31;
+
+ dst[i] = (uint8)b;
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDResamplerSeparableTableColStageMMX::VDResamplerSeparableTableColStageMMX(const IVDResamplerFilter& filter)
+ : VDResamplerColStageSeparableTable32(filter)
+{
+ VDResamplerSwizzleTable(mFilterBank.data(), (unsigned)mFilterBank.size() >> 1);
+}
+
+void VDResamplerSeparableTableColStageMMX::Process(void *dst, const void *const *src, uint32 w, sint32 phase) {
+ vdasm_resize_table_col_MMX((uint32*)dst, (const uint32 *const *)src, (const int *)mFilterBank.data(), (int)mFilterBank.size() >> 8, w, (phase >> 8) & 0xff);
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (SSE2, x86)
+//
+///////////////////////////////////////////////////////////////////////////
+
+extern "C" long vdasm_resize_table_col_SSE2(uint32 *out, const uint32 *const*in_table, const int *filter, int filter_width, uint32 w, long frac);
+extern "C" long vdasm_resize_table_row_SSE2(uint32 *out, const uint32 *in, const int *filter, int filter_width, uint32 w, long accum, long frac);
+extern "C" void vdasm_resize_ccint_col_SSE2(void *dst, const void *src1, const void *src2, const void *src3, const void *src4, uint32 count, const void *tbl);
+
+VDResamplerSeparableCubicColStageSSE2::VDResamplerSeparableCubicColStageSSE2(double A)
+ : VDResamplerSeparableCubicColStageMMX(A)
+{
+}
+
+void VDResamplerSeparableCubicColStageSSE2::Process(void *dst0, const void *const *srcarray, uint32 w, sint32 phase) {
+ vdasm_resize_ccint_col_SSE2(dst0, srcarray[0], srcarray[1], srcarray[2], srcarray[3], w, mFilterBank.data() + ((phase>>6)&0x3fc));
+}
+
+VDResamplerSeparableTableRowStageSSE2::VDResamplerSeparableTableRowStageSSE2(const IVDResamplerFilter& filter)
+ : VDResamplerSeparableTableRowStageMMX(filter)
+{
+}
+
+void VDResamplerSeparableTableRowStageSSE2::Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx) {
+ vdasm_resize_table_row_MMX((uint32 *)dst, (const uint32 *)src, (const int *)mFilterBank.data(), (int)mFilterBank.size() >> 8, w, u, dudx);
+}
+
+VDResamplerSeparableTableColStageSSE2::VDResamplerSeparableTableColStageSSE2(const IVDResamplerFilter& filter)
+ : VDResamplerSeparableTableColStageMMX(filter)
+{
+}
+
+void VDResamplerSeparableTableColStageSSE2::Process(void *dst, const void *const *src, uint32 w, sint32 phase) {
+ vdasm_resize_table_col_SSE2((uint32*)dst, (const uint32 *const *)src, (const int *)mFilterBank.data(), (int)mFilterBank.size() >> 8, w, (phase >> 8) & 0xff);
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// resampler stages (SSE4.1, x86)
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDResamplerSeparableTableRowStage8SSE41::VDResamplerSeparableTableRowStage8SSE41(const IVDResamplerFilter& filter)
+ : VDResamplerRowStageSeparableTable32(filter)
+ , mLastSrcWidth(0)
+ , mLastDstWidth(0)
+ , mLastU(0)
+ , mLastDUDX(0)
+{
+ mAlignedKernelWidth = (GetWindowSize() + 15) & ~7;
+ mAlignedKernelSize = mAlignedKernelWidth + 16;
+}
+
+void VDResamplerSeparableTableRowStage8SSE41::Init(const VDResamplerAxis& axis, uint32 srcw) {
+ uint32 w = axis.dx_preclip + axis.dx_active + axis.dx_postclip + axis.dx_dualclip;
+
+ if (mLastSrcWidth != srcw || mLastDstWidth != w || mLastU != axis.u || mLastDUDX != axis.dudx) {
+ mLastSrcWidth = srcw;
+ mLastDstWidth = w;
+ mLastU = axis.u;
+ mLastDUDX = axis.dudx;
+
+ RedoRowFilters(axis, w, srcw);
+ }
+}
+
+void VDResamplerSeparableTableRowStage8SSE41::RedoRowFilters(const VDResamplerAxis& axis, uint32 w, uint32 srcw) {
+ int kstride = mFilterBank.size() >> 8;
+ int ksize = mAlignedKernelWidth;
+ int kesize = mAlignedKernelSize;
+
+ mRowKernels.clear();
+ mRowKernelSize = w * kesize;
+
+ mRowKernels.resize(mRowKernelSize * 8, 0);
+
+ for(int byteOffset = 0; byteOffset < 8; ++byteOffset) {
+ sint16 *dst = mRowKernels.data() + mRowKernelSize * byteOffset;
+ int ksizeThisOffset = std::min<int>(ksize, (byteOffset + srcw + 7) & ~7);
+
+ mKernelSizeByOffset[byteOffset] = ksizeThisOffset;
+
+ sint32 u = axis.u;
+ sint32 uoffmin = -byteOffset;
+ sint32 uoffmax = ((srcw + byteOffset + 7) & ~7) - byteOffset - ksizeThisOffset;
+ for(uint32 i=0; i<w; ++i) {
+ sint32 uoffset = u >> 16;
+ sint32 uoffset2 = ((uoffset + byteOffset) & ~7) - byteOffset;
+
+ if (uoffset2 < uoffmin)
+ uoffset2 = uoffmin;
+
+ if (uoffset2 > uoffmax)
+ uoffset2 = uoffmax;
+
+ *(sint32 *)dst = uoffset2;
+ dst += 2;
+ *dst++ = 0;
+ *dst++ = 0;
+ *dst++ = 0;
+ *dst++ = 0;
+ *dst++ = 0;
+ *dst++ = 0;
+
+ uint32 phase = (u >> 8) & 255;
+ const sint32 *src = &mFilterBank[kstride * phase];
+
+ sint32 start = 0;
+ sint32 end = kstride;
+
+ int dstoffset = uoffset - uoffset2;
+
+ // check for filter kernel overlapping left source boundary
+ if (uoffset < 0)
+ start = -uoffset;
+
+ // check for filter kernel overlapping right source boundary
+ if (uoffset + end > (sint32)srcw)
+ end = srcw - uoffset;
+
+ VDASSERT(dstoffset + start >= 0);
+ VDASSERT(dstoffset + end <= ksizeThisOffset);
+
+ sint16 *dst2 = dst + dstoffset;
+ dst += ksizeThisOffset;
+
+ for(int j=start; j<end; ++j)
+ dst2[j] = src[j];
+
+ if (start > 0)
+ dst2[start] = std::accumulate(src, src+start, dst2[start]);
+
+ if (end < kstride)
+ dst2[end - 1] = std::accumulate(src+end, src+kstride, dst2[end - 1]);
+
+ u += axis.dudx;
+ }
+ }
+
+ // swizzle rows where optimization is possible
+ vdfastvector<sint16> temp;
+
+ int quads = w >> 2;
+ int quadRemainder = w & 3;
+
+ for(int byteOffset = 0; byteOffset < 8; ++byteOffset) {
+ int ksizeThisOffset = mKernelSizeByOffset[byteOffset];
+ int kpairs = ksizeThisOffset >> 3;
+
+ if (ksizeThisOffset < 8 || ksizeThisOffset > 16) {
+ mbQuadOptimizationEnabled[byteOffset] = false;
+ } else {
+ ptrdiff_t unswizzledStride = (ksizeThisOffset >> 1) + 4;
+
+ mbQuadOptimizationEnabled[byteOffset] = true;
+ mTailOffset[byteOffset] = quads * (8 + ksizeThisOffset*4);
+
+ uint32 *dst = (uint32 *)&mRowKernels[mRowKernelSize * byteOffset];
+ temp.resize(mRowKernelSize);
+ memcpy(temp.data(), dst, mRowKernelSize*2);
+
+ const uint32 *src0 = (const uint32 *)temp.data();
+ const uint32 *src1 = src0 + unswizzledStride;
+ const uint32 *src2 = src1 + unswizzledStride;
+ const uint32 *src3 = src2 + unswizzledStride;
+ ptrdiff_t srcskip = unswizzledStride * 3;
+
+ for(int q = 0; q < quads; ++q) {
+ dst[0] = src0[0];
+ dst[1] = src1[0];
+ dst[2] = src2[0];
+ dst[3] = src3[0];
+ src0 += 4;
+ src1 += 4;
+ src2 += 4;
+ src3 += 4;
+ dst += 4;
+
+ for(int p = 0; p < kpairs; ++p) {
+ dst[ 0] = src0[0];
+ dst[ 1] = src0[1];
+ dst[ 2] = src0[2];
+ dst[ 3] = src0[3];
+ dst[ 4] = src1[0];
+ dst[ 5] = src1[1];
+ dst[ 6] = src1[2];
+ dst[ 7] = src1[3];
+ dst[ 8] = src2[0];
+ dst[ 9] = src2[1];
+ dst[10] = src2[2];
+ dst[11] = src2[3];
+ dst[12] = src3[0];
+ dst[13] = src3[1];
+ dst[14] = src3[2];
+ dst[15] = src3[3];
+ dst += 16;
+ src0 += 4;
+ src1 += 4;
+ src2 += 4;
+ src3 += 4;
+ }
+
+ src0 += srcskip;
+ src1 += srcskip;
+ src2 += srcskip;
+ src3 += srcskip;
+ }
+
+ memcpy(dst, src0, unswizzledStride * 4 * quadRemainder);
+ }
+ }
+}
+
+void VDResamplerSeparableTableRowStage8SSE41::Process(void *dst, const void *src, uint32 w) {
+ int byteOffset = (int)(ptrdiff_t)src & 7;
+ const sint16 *ksrc = &mRowKernels[mRowKernelSize * byteOffset];
+
+ int ksize = mKernelSizeByOffset[byteOffset];
+ if (mbQuadOptimizationEnabled[byteOffset]) {
+ if (w >= 4) {
+ if (ksize == 16)
+ vdasm_resize_table_row_8_k16_4x_SSE41(dst, src, w >> 2, ksrc);
+ else
+ vdasm_resize_table_row_8_k8_4x_SSE41(dst, src, w >> 2, ksrc);
+ }
+
+ if (w & 3)
+ vdasm_resize_table_row_8_SSE41((char *)dst + (w & ~3), src, w & 3, ksrc + mTailOffset[byteOffset], ksize);
+ } else {
+ vdasm_resize_table_row_8_SSE41(dst, src, w, ksrc, ksize);
+ }
+}
+
+void VDResamplerSeparableTableRowStage8SSE41::Process(void *dst, const void *src, uint32 w, uint32 u, uint32 dudx) {
+ vdasm_resize_table_row_MMX((uint32 *)dst, (const uint32 *)src, (const int *)mFilterBank.data(), (int)mFilterBank.size() >> 8, w, u, dudx);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDResamplerSeparableTableColStage8SSE41::VDResamplerSeparableTableColStage8SSE41(const IVDResamplerFilter& filter)
+ : VDResamplerColStageSeparableTable8(filter)
+{
+ VDResamplerSwizzleTable(mFilterBank.data(), (unsigned)mFilterBank.size() >> 1);
+}
+
+void VDResamplerSeparableTableColStage8SSE41::Process(void *dst0, const void *const *src0, uint32 w, sint32 phase) {
+ uint8 *dst = (uint8 *)dst0;
+ const uint8 *const *src = (const uint8 *const *)src0;
+ const unsigned ksize = (unsigned)mFilterBank.size() >> 8;
+ const sint16 *filter = (const sint16 *)&mFilterBank[((phase>>8)&0xff) * ksize];
+
+ int w4 = w & ~3;
+
+ if (w4) {
+ switch(ksize) {
+ case 2:
+ vdasm_resize_table_col_8_k2_SSE41(dst, (const void *const *)src, w4, filter);
+ break;
+
+ case 4:
+ vdasm_resize_table_col_8_k4_SSE41(dst, (const void *const *)src, w4, filter);
+ break;
+
+ default:
+ vdasm_resize_table_col_8_MMX(dst, (const void *const *)src, w4, filter, ksize);
+ break;
+ }
+ }
+
+ for(uint32 i=w4; i<w; ++i) {
+ int b = 0x2000;
+ const sint16 *filter2 = filter;
+ const uint8 *const *src2 = src;
+
+ for(unsigned j = ksize; j; j -= 2) {
+ sint32 p0 = (*src2++)[i];
+ sint32 p1 = (*src2++)[i];
+ sint32 coeff0 = filter2[0];
+ sint32 coeff1 = filter2[1];
+ filter2 += 4;
+
+ b += p0*coeff0;
+ b += p1*coeff1;
+ }
+
+ b >>= 14;
+
+ if ((uint32)b >= 0x00000100)
+ b = ~b >> 31;
+
+ dst[i] = (uint8)b;
+ }
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/stretchblt_reference.cpp b/src/thirdparty/VirtualDub/Kasumi/source/stretchblt_reference.cpp
new file mode 100644
index 000000000..3afdec910
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/stretchblt_reference.cpp
@@ -0,0 +1,816 @@
+#include <vd2/system/memory.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+
+namespace {
+ struct VDPixmapReferenceStretchBltParameters {
+ void *dst;
+ ptrdiff_t dstpitch;
+ const void *src;
+ ptrdiff_t srcpitch;
+ ptrdiff_t srcinc;
+ sint32 dx;
+ sint32 dy;
+ uint32 u;
+ uint32 uinc;
+ uint32 dudx;
+ uint32 v;
+ uint32 dvdy;
+ sint32 xprecopy;
+ sint32 xpostcopy;
+ ptrdiff_t xprepos;
+ ptrdiff_t xpostpos;
+
+ void advance() {
+ dst = (char *)dst + dstpitch;
+ src = (char *)src + srcinc;
+
+ uint32 vt = v + dvdy;
+
+ if (vt < v)
+ src = (char *)src + srcpitch;
+
+ v = vt;
+ }
+ };
+}
+
+void VDPixmapStretchBlt_Any8_nearest_reference(VDPixmapReferenceStretchBltParameters params) {
+ do {
+ uint8 *dstp = (uint8 *)params.dst;
+ const uint8 *srcp = (const uint8 *)params.src;
+ uint32 u = params.u;
+
+ if (params.xprecopy) {
+ VDMemset8(dstp, *(const uint8 *)((const char *)params.src + params.xprepos), params.xprecopy);
+ dstp += params.xprecopy;
+ }
+
+ sint32 wt = params.dx;
+
+ if (wt > 0)
+ do {
+ *dstp++ = *srcp;
+
+ uint32 ut = u + params.dudx;
+ srcp += ut<u;
+ srcp += params.uinc;
+ u = ut;
+ } while(--wt);
+
+ if (params.xpostcopy)
+ VDMemset8(dstp, *(const uint8 *)((const char *)params.src + params.xpostpos), params.xpostcopy);
+
+ params.advance();
+ } while(--params.dy);
+}
+
+void VDPixmapStretchBlt_Any16_nearest_reference(VDPixmapReferenceStretchBltParameters params) {
+ do {
+ uint16 *dstp = (uint16 *)params.dst;
+ const uint16 *srcp = (const uint16 *)params.src;
+ uint32 u = params.u;
+
+ if (params.xprecopy) {
+ VDMemset16(dstp, *(const uint16 *)((const char *)params.src + params.xprepos), params.xprecopy);
+ dstp += params.xprecopy;
+ }
+
+ sint32 wt = params.dx;
+
+ if (wt > 0)
+ do {
+ *dstp++ = *srcp;
+
+ uint32 ut = u + params.dudx;
+ srcp += ut<u;
+ srcp += params.uinc;
+ u = ut;
+ } while(--wt);
+
+ if (params.xpostcopy)
+ VDMemset16(dstp, *(const uint16 *)((const char *)params.src + params.xpostpos), params.xpostcopy);
+
+ params.advance();
+ } while(--params.dy);
+}
+
+void VDPixmapStretchBlt_Any24_nearest_reference(VDPixmapReferenceStretchBltParameters params) {
+ do {
+ uint8 *dstp = (uint8 *)params.dst;
+ const uint8 *srcp = (const uint8 *)params.src;
+ uint32 u = params.u;
+
+ if (params.xprecopy) {
+ const uint8 *repsrc = (const uint8 *)params.src + params.xprepos;
+ const uint8 p0 = repsrc[0];
+ const uint8 p1 = repsrc[1];
+ const uint8 p2 = repsrc[2];
+
+ for(sint32 i=0; i<params.xprecopy; ++i) {
+ dstp[0] = p0;
+ dstp[1] = p1;
+ dstp[2] = p2;
+ dstp += 3;
+ }
+ }
+
+ sint32 wt = params.dx;
+
+ if (wt > 0)
+ do {
+ dstp[0] = srcp[0];
+ dstp[1] = srcp[1];
+ dstp[2] = srcp[2];
+ dstp += 3;
+
+ uint32 ut = u + params.dudx;
+ srcp += (ut<u)*3;
+ srcp += params.uinc*3;
+ u = ut;
+ } while(--wt);
+
+ if (params.xpostcopy) {
+ const uint8 *repsrc = (const uint8 *)params.src + params.xpostpos;
+ const uint8 p0 = repsrc[0];
+ const uint8 p1 = repsrc[1];
+ const uint8 p2 = repsrc[2];
+
+ for(sint32 i=0; i<params.xpostcopy; ++i) {
+ dstp[0] = p0;
+ dstp[1] = p1;
+ dstp[2] = p2;
+ dstp += 3;
+ }
+ }
+
+ params.advance();
+ } while(--params.dy);
+}
+
+void VDPixmapStretchBlt_Any32_nearest_reference(VDPixmapReferenceStretchBltParameters params) {
+ do {
+ uint32 *dstp = (uint32 *)params.dst;
+ const uint32 *srcp = (const uint32 *)params.src;
+ uint32 u = params.u;
+
+ if (params.xprecopy) {
+ VDMemset32(dstp, *(const uint32 *)((const char *)params.src + params.xprepos), params.xprecopy);
+ dstp += params.xprecopy;
+ }
+
+ sint32 wt = params.dx;
+ if (wt > 0)
+ do {
+ *dstp++ = *srcp;
+
+ uint32 ut = u + params.dudx;
+ srcp += ut<u;
+ srcp += params.uinc;
+ u = ut;
+ } while(--wt);
+
+ if (params.xpostcopy)
+ VDMemset32(dstp, *(const uint32 *)((const char *)params.src + params.xpostpos), params.xpostcopy);
+
+ params.advance();
+ } while(--params.dy);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+namespace {
+ void VDSetupNearestSamplingParameters(sint64& u64, sint64 dudx, sint32 dx, sint32 du, sint32& xprecopy, sint32& xprepos, sint32& xmain, sint32& xpostcopy, sint32& xpostpos) {
+ sint64 ulo = u64;
+ sint64 uhi = u64 + dudx * (dx - 1);
+ sint64 tdudx = dudx;
+ const sint64 ulimit = ((sint64)du << 32);
+
+ xprepos = 0;
+ xpostpos = du-1;
+
+ if (!tdudx) {
+ if (u64 < 0)
+ xprecopy = dx;
+ else if (u64 >= ulimit)
+ xprecopy = dx;
+ else
+ xmain = dx;
+ } else {
+ if (tdudx < 0) {
+ std::swap(ulo, uhi);
+ tdudx = -tdudx;
+ }
+
+ if (ulo < 0) {
+ if (uhi < 0)
+ xprecopy = dx;
+ else
+ xprecopy = (sint32)((-ulo-1) / tdudx) + 1;
+
+ VDASSERT(xprecopy <= 0 || (uint64)ulo >= (uint64)ulimit);
+ VDASSERT(xprecopy <= 0 || (uint64)(ulo + tdudx * (xprecopy-1)) >= (uint64)ulimit);
+ }
+
+ if (uhi >= ulimit) {
+ if (ulo >= ulimit)
+ xpostcopy = dx;
+ else
+ xpostcopy = (sint32)((uhi - ulimit) / tdudx) + 1;
+
+ VDASSERT(xpostcopy <= 0 || (uint64)uhi >= (uint64)ulimit);
+ VDASSERT(xpostcopy <= 0 || (uint64)(uhi - tdudx * (xpostcopy - 1)) >= (uint64)ulimit);
+ }
+
+ if (dudx < 0) {
+ std::swap(xprecopy, xpostcopy);
+ std::swap(xprepos, xpostpos);
+ }
+
+ xmain = dx - (xprecopy + xpostcopy);
+ }
+
+ // sanity-check parameters
+
+ VDASSERT(xprecopy>=0 && xprecopy <= dx);
+ VDASSERT(xpostcopy>=0 && xpostcopy <= dx);
+ VDASSERT(xmain>=0 && xmain <= dx);
+
+ VDASSERT(xprecopy <= 0 || (uint64)u64 >= (uint64)ulimit);
+ VDASSERT(xprecopy <= 0 || (uint64)(u64 + dudx * (xprecopy-1)) >= (uint64)ulimit);
+ VDASSERT(xmain <= 0 || (uint64)(u64 + dudx * xprecopy) < (uint64)ulimit);
+ VDASSERT(xmain <= 0 || (uint64)(u64 + dudx * (xprecopy+xmain-1)) < (uint64)ulimit);
+ VDASSERT(xpostcopy <= 0 || (uint64)(u64 + dudx * (xprecopy + xmain)) >= (uint64)ulimit);
+ VDASSERT(xpostcopy <= 0 || (uint64)(u64 + dudx * (xprecopy + xmain + xpostcopy - 1)) >= (uint64)ulimit);
+
+ u64 += dudx * xprecopy;
+ }
+}
+
+bool VDPixmapStretchBltNearest_reference(const VDPixmap& dst, sint32 x1, sint32 y1, sint32 x2, sint32 y2, const VDPixmap& src, sint32 u1, sint32 v1, sint32 u2, sint32 v2) {
+ // we don't support format conversion
+ if (dst.format != src.format)
+ return false;
+
+ void (*pBlitter)(VDPixmapReferenceStretchBltParameters);
+ int bpp;
+
+ switch(src.format) {
+ case nsVDPixmap::kPixFormat_Pal8:
+ pBlitter = VDPixmapStretchBlt_Any8_nearest_reference;
+ bpp = 1;
+ break;
+ case nsVDPixmap::kPixFormat_XRGB1555:
+ case nsVDPixmap::kPixFormat_RGB565:
+ pBlitter = VDPixmapStretchBlt_Any16_nearest_reference;
+ bpp = 2;
+ break;
+ case nsVDPixmap::kPixFormat_RGB888:
+ pBlitter = VDPixmapStretchBlt_Any24_nearest_reference;
+ bpp = 3;
+ break;
+ case nsVDPixmap::kPixFormat_XRGB8888:
+ pBlitter = VDPixmapStretchBlt_Any32_nearest_reference;
+ bpp = 4;
+ break;
+ default:
+ return false;
+ }
+
+ // preemptive clip to prevent gradient calculations from crashing
+ if (x2 == x1 || y2 == y1)
+ return true;
+
+ // translate destination flips into source flips
+ if (x1 > x2) {
+ std::swap(x1, x2);
+ std::swap(u1, u2);
+ }
+
+ if (y1 > y2) {
+ std::swap(y1, y2);
+ std::swap(v1, v2);
+ }
+
+ // compute gradients
+ sint32 dx = x2 - x1;
+ sint32 dy = y2 - y1;
+ sint32 du = u2 - u1;
+ sint32 dv = v2 - v1;
+ sint64 dudx = ((sint64)du << 32) / dx; // must truncate toward zero to prevent overflow
+ sint64 dvdy = ((sint64)dv << 32) / dy;
+
+ // prestep top-left point to pixel center and convert destination coordinates to integer
+ sint64 u64 = (sint64)u1 << 16;
+ sint64 v64 = (sint64)v1 << 16;
+ sint32 prestepx = (0x8000 - x1) & 0xffff;
+ sint32 prestepy = (0x8000 - y1) & 0xffff;
+
+ u64 += (dudx * prestepx) >> 16;
+ v64 += (dvdy * prestepy) >> 16;
+
+ sint32 x1i = (x1 + 0x8000) >> 16;
+ sint32 y1i = (y1 + 0x8000) >> 16;
+ sint32 x2i = (x2 + 0x8000) >> 16;
+ sint32 y2i = (y2 + 0x8000) >> 16;
+
+ // destination clipping
+ if (x1i < 0) {
+ u64 -= dudx * x1i;
+ x1i = 0;
+ }
+
+ if (y1i < 0) {
+ v64 -= dvdy * y1i;
+ y1i = 0;
+ }
+
+ if (x2i > dst.w)
+ x2i = dst.w;
+
+ if (y2i > dst.h)
+ y2i = dst.h;
+
+ if (x1i >= x2i || y1i >= y2i)
+ return true;
+
+ // Calculate horizontal clip parameters
+ sint32 xprecopy = 0, xpostcopy = 0;
+ int xprepos = 0;
+ int xpostpos = src.w-1;
+ int xmain = 0;
+
+ VDSetupNearestSamplingParameters(u64, dudx, x2i-x1i, src.w, xprecopy, xprepos, xmain, xpostcopy, xpostpos);
+
+ // Calculate vertical clip parameters
+ sint32 yprecopy = 0, ypostcopy = 0;
+ int yprepos = 0;
+ int ypostpos = src.h-1;
+ int ymain = 0;
+
+ VDSetupNearestSamplingParameters(v64, dvdy, y2i-y1i, src.h, yprecopy, yprepos, ymain, ypostcopy, ypostpos);
+
+ // set up parameter block
+ VDPixmapReferenceStretchBltParameters params;
+
+ char *srcbase = (char *)src.data + (sint32)(u64 >> 32) * bpp;
+
+ params.dst = (char *)dst.data + y1i * dst.pitch + x1i * bpp;
+ params.dstpitch = dst.pitch;
+ params.src = srcbase + (sint32)(v64 >> 32) * src.pitch;
+ params.srcpitch = src.pitch;
+ params.srcinc = (sint32)(dvdy >> 32) * src.pitch;
+ params.dx = xmain;
+ params.dy = ymain;
+ params.u = (uint32)u64;
+ params.uinc = (uint32)(dudx >> 32);
+ params.dudx = (uint32)dudx;
+ params.v = (uint32)v64;
+ params.dvdy = (uint32)dvdy;
+ params.xprecopy = xprecopy;
+ params.xprepos = (xprepos - (sint32)(u64 >> 32)) * bpp;
+ params.xpostcopy = xpostcopy;
+ params.xpostpos = (xpostpos - (sint32)(u64 >> 32)) * bpp;
+
+ if (yprecopy > 0) {
+ VDPixmapReferenceStretchBltParameters preparams(params);
+
+ preparams.src = srcbase + yprepos * src.pitch;
+ preparams.srcinc = 0;
+ preparams.dy = yprecopy;
+ preparams.v = 0;
+ preparams.dvdy = 0;
+
+ pBlitter(preparams);
+
+ params.dst = (char *)params.dst + params.dstpitch * yprecopy;
+ }
+
+ if (ymain > 0)
+ pBlitter(params);
+
+ if (ypostcopy > 0) {
+ VDPixmapReferenceStretchBltParameters postparams(params);
+
+ postparams.dst = (char *)params.dst + params.dstpitch * params.dy;
+ postparams.src = srcbase + ypostpos * src.pitch;
+ postparams.srcpitch = 0;
+ postparams.srcinc = 0;
+ postparams.dy = ypostcopy;
+ postparams.v = 0;
+ postparams.dvdy = 0;
+
+ pBlitter(postparams);
+ }
+ return true;
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+ uint32 lerp_XRGB1555(sint32 a, sint32 b, sint32 f) {
+ sint32 a_rb = a & 0x7c1f;
+ sint32 a_g = a & 0x03e0;
+ sint32 b_rb = b & 0x7c1f;
+ sint32 b_g = b & 0x03e0;
+
+ const sint32 rb = (a_rb + (((b_rb - a_rb)*f + 0x4010) >> 5)) & 0x7c1f;
+ const sint32 g = (a_g + (((b_g - a_g )*f + 0x0200) >> 5)) & 0x03e0;
+
+ return rb + g;
+ }
+
+ uint32 lerp_XRGB8888(sint32 a, sint32 b, sint32 f) {
+ sint32 a_rb = a & 0xff00ff;
+ sint32 a_g = a & 0x00ff00;
+ sint32 b_rb = b & 0xff00ff;
+ sint32 b_g = b & 0x00ff00;
+
+ const uint32 rb = (a_rb + (((b_rb - a_rb)*f + 0x00800080) >> 8)) & 0xff00ff;
+ const uint32 g = (a_g + (((b_g - a_g )*f + 0x00008000) >> 8)) & 0x00ff00;
+
+ return rb + g;
+ }
+
+ uint32 bilerp_RGB888(sint32 a, sint32 b, sint32 c, sint32 d, sint32 x, sint32 y) {
+ sint32 a_rb = a & 0xff00ff;
+ sint32 a_g = a & 0x00ff00;
+ sint32 b_rb = b & 0xff00ff;
+ sint32 b_g = b & 0x00ff00;
+ sint32 c_rb = c & 0xff00ff;
+ sint32 c_g = c & 0x00ff00;
+ sint32 d_rb = d & 0xff00ff;
+ sint32 d_g = d & 0x00ff00;
+
+ const uint32 top_rb = (a_rb + (((b_rb - a_rb)*x + 0x00800080) >> 8)) & 0xff00ff;
+ const uint32 top_g = (a_g + (((b_g - a_g )*x + 0x00008000) >> 8)) & 0x00ff00;
+ const uint32 bot_rb = (c_rb + (((d_rb - c_rb)*x + 0x00800080) >> 8)) & 0xff00ff;
+ const uint32 bot_g = (c_g + (((d_g - c_g )*x + 0x00008000) >> 8)) & 0x00ff00;
+
+ const uint32 final_rb = (top_rb + (((bot_rb - top_rb)*y) >> 8)) & 0xff00ff;
+ const uint32 final_g = (top_g + (((bot_g - top_g )*y) >> 8)) & 0x00ff00;
+
+ return final_rb + final_g;
+ }
+
+ uint32 bilerp_XRGB1555(sint32 a, sint32 b, sint32 c, sint32 d, sint32 x, sint32 y) {
+ sint32 a_rb = a & 0x7c1f;
+ sint32 a_g = a & 0x03e0;
+ sint32 b_rb = b & 0x7c1f;
+ sint32 b_g = b & 0x03e0;
+ sint32 c_rb = c & 0x7c1f;
+ sint32 c_g = c & 0x03e0;
+ sint32 d_rb = d & 0x7c1f;
+ sint32 d_g = d & 0x03e0;
+
+ const sint32 top_rb = (a_rb + (((b_rb - a_rb)*x + 0x4010) >> 5)) & 0x7c1f;
+ const sint32 top_g = (a_g + (((b_g - a_g )*x + 0x0200) >> 5)) & 0x03e0;
+ const sint32 bot_rb = (c_rb + (((d_rb - c_rb)*x + 0x4010) >> 5)) & 0x7c1f;
+ const sint32 bot_g = (c_g + (((d_g - c_g )*x + 0x0200) >> 5)) & 0x03e0;
+
+ const sint32 final_rb = (top_rb + (((bot_rb - top_rb)*y + 0x4010) >> 5)) & 0x7c1f;
+ const sint32 final_g = (top_g + (((bot_g - top_g )*y + 0x0200) >> 5)) & 0x03e0;
+
+ return final_rb + final_g;
+ }
+
+ uint32 bilerp_RGB565(sint32 a, sint32 b, sint32 c, sint32 d, sint32 x, sint32 y) {
+ sint32 a_rb = a & 0xf81f;
+ sint32 a_g = a & 0x07e0;
+ sint32 b_rb = b & 0xf81f;
+ sint32 b_g = b & 0x07e0;
+ sint32 c_rb = c & 0xf81f;
+ sint32 c_g = c & 0x07e0;
+ sint32 d_rb = d & 0xf81f;
+ sint32 d_g = d & 0x07e0;
+
+ const sint32 top_rb = (a_rb + (((b_rb - a_rb)*x + 0x8010) >> 6)) & 0xf81f;
+ const sint32 top_g = (a_g + (((b_g - a_g )*x + 0x0400) >> 6)) & 0x07e0;
+ const sint32 bot_rb = (c_rb + (((d_rb - c_rb)*x + 0x8010) >> 6)) & 0xf81f;
+ const sint32 bot_g = (c_g + (((d_g - c_g )*x + 0x0400) >> 6)) & 0x07e0;
+
+ const sint32 final_rb = (top_rb + (((bot_rb - top_rb)*y + 0x8010) >> 6)) & 0xf81f;
+ const sint32 final_g = (top_g + (((bot_g - top_g )*y + 0x0400) >> 6)) & 0x07e0;
+
+ return final_rb + final_g;
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+namespace {
+ struct VDPixmapReferenceStretchBltBilinearParameters {
+ void *dst;
+ const void *src;
+ uint32 u;
+ uint32 uinc;
+ uint32 dudx;
+
+ ptrdiff_t xprepos;
+ ptrdiff_t xpostpos;
+ sint32 xprecopy;
+ sint32 xpostcopy;
+ sint32 xmidsize;
+ };
+
+ void VDPixmapStretchBiH_XRGB1555_to_XRGB1555(const VDPixmapReferenceStretchBltBilinearParameters& params) {
+ uint16 *dst = (uint16 *)params.dst;
+ const uint16 *src = (const uint16 *)params.src;
+
+ if (params.xprecopy)
+ VDMemset16(dst - params.xprecopy, *(const uint16 *)((const char *)params.src + params.xprepos), params.xprecopy);
+
+ if (params.xmidsize) {
+ sint32 w = params.xmidsize;
+ uint32 u = params.u;
+ const uint32 dudx = params.dudx;
+ const ptrdiff_t uinc = params.uinc;
+
+ do {
+ *dst++ = lerp_XRGB1555(src[0], src[1], u >> 27);
+
+ const uint32 ut = u + dudx;
+ src += uinc + (ut < u);
+ u = ut;
+ } while(--w);
+ }
+
+ if (params.xpostcopy)
+ VDMemset16(dst, *(const uint16 *)((const char *)params.src + params.xpostpos), params.xpostcopy);
+ }
+
+ void VDPixmapStretchBiH_XRGB8888_to_XRGB8888(const VDPixmapReferenceStretchBltBilinearParameters& params) {
+ uint32 *dst = (uint32 *)params.dst;
+ const uint32 *src = (const uint32 *)params.src;
+
+ if (params.xprecopy)
+ VDMemset32(dst - params.xprecopy, *(const uint32 *)((const char *)params.src + params.xprepos), params.xprecopy);
+
+ if (params.xmidsize) {
+ sint32 w = params.xmidsize;
+ uint32 u = params.u;
+ const uint32 dudx = params.dudx;
+ const ptrdiff_t uinc = params.uinc;
+
+ do {
+ *dst++ = lerp_XRGB8888(src[0], src[1], u >> 24);
+
+ const uint32 ut = u + dudx;
+ src += uinc + (ut < u);
+ u = ut;
+ } while(--w);
+ }
+
+ if (params.xpostcopy)
+ VDMemset32(dst, *(const uint32 *)((const char *)params.src + params.xpostpos), params.xpostcopy);
+ }
+
+ void VDPixmapStretchBiV_XRGB1555_to_XRGB1555(void *dstv, const void *src1v, const void *src2v, sint32 w, uint32 f) {
+ uint16 *dst = (uint16 *)dstv;
+ const uint16 *src1 = (const uint16 *)src1v;
+ const uint16 *src2 = (const uint16 *)src2v;
+
+ f >>= 27;
+
+ do {
+ *dst++ = lerp_XRGB1555(*src1++, *src2++, f);
+ } while(--w);
+ }
+
+ void VDPixmapStretchBiV_XRGB8888_to_XRGB8888(void *dstv, const void *src1v, const void *src2v, sint32 w, uint32 f) {
+ uint32 *dst = (uint32 *)dstv;
+ const uint32 *src1 = (const uint32 *)src1v;
+ const uint32 *src2 = (const uint32 *)src2v;
+
+ f >>= 24;
+
+ do {
+ *dst++ = lerp_XRGB8888(*src1++, *src2++, f);
+ } while(--w);
+ }
+}
+
+#ifdef _M_IX86
+extern "C" void vdasm_stretchbltH_XRGB8888_to_XRGB8888_MMX(const VDPixmapReferenceStretchBltBilinearParameters&);
+
+extern "C" void vdasm_stretchbltV_XRGB1555_to_XRGB1555_MMX(void *dstv, const void *src1v, const void *src2v, sint32 w, uint32 f);
+extern "C" void vdasm_stretchbltV_XRGB8888_to_XRGB8888_MMX(void *dstv, const void *src1v, const void *src2v, sint32 w, uint32 f);
+#endif
+
+bool VDPixmapStretchBltBilinear_reference(const VDPixmap& dst, sint32 x1, sint32 y1, sint32 x2, sint32 y2, const VDPixmap& src, sint32 u1, sint32 v1, sint32 u2, sint32 v2) {
+ // preemptive clip to prevent gradient calculations from crashing
+ if (x2 == x1 || y2 == y1)
+ return true;
+
+ // we don't support source clipping
+ if ((uint32)u1 > (uint32)(src.w << 16) || (uint32)v1 > (uint32)(src.h << 16))
+ return false;
+
+ if ((uint32)u2 > (uint32)(src.w << 16) || (uint32)v2 > (uint32)(src.h << 16))
+ return false;
+
+ // we don't support format changes (yet)
+ if (dst.format != src.format)
+ return false;
+
+ // format determination
+ void (*pHorizontalFilter)(const VDPixmapReferenceStretchBltBilinearParameters& params);
+ void (*pVerticalFilter)(void *dstv, const void *src1v, const void *src2v, sint32 w, uint32 f);
+ int bpp;
+
+#pragma vdpragma_TODO("fixme this is b0rken")
+ switch(src.format) {
+ case nsVDPixmap::kPixFormat_XRGB1555:
+ pHorizontalFilter = VDPixmapStretchBiH_XRGB1555_to_XRGB1555;
+#ifdef _M_IX86
+ if (CPUGetEnabledExtensions() & CPUF_SUPPORTS_MMX)
+ pVerticalFilter = vdasm_stretchbltV_XRGB1555_to_XRGB1555_MMX;
+ else
+#endif
+ pVerticalFilter = VDPixmapStretchBiV_XRGB1555_to_XRGB1555;
+ bpp = 2;
+ break;
+ case nsVDPixmap::kPixFormat_XRGB8888:
+#ifdef _M_IX86
+ if (CPUGetEnabledExtensions() & CPUF_SUPPORTS_MMX) {
+ pHorizontalFilter = vdasm_stretchbltH_XRGB8888_to_XRGB8888_MMX;
+ pVerticalFilter = vdasm_stretchbltV_XRGB8888_to_XRGB8888_MMX;
+ } else
+#endif
+ {
+ pHorizontalFilter = VDPixmapStretchBiH_XRGB8888_to_XRGB8888;
+ pVerticalFilter = VDPixmapStretchBiV_XRGB8888_to_XRGB8888;
+ }
+ bpp = 4;
+ break;
+ default:
+ return false;
+ }
+
+ // translate destination flips into source flips
+ if (x1 > x2) {
+ std::swap(x1, x2);
+ std::swap(u1, u2);
+ }
+
+ if (y1 > y2) {
+ std::swap(y1, y2);
+ std::swap(v1, v2);
+ }
+
+ // compute gradients
+ sint32 dx = x2 - x1;
+ sint32 dy = y2 - y1;
+ sint32 du = u2 - u1;
+ sint32 dv = v2 - v1;
+ sint64 dudx = ((sint64)du << 32) / dx; // must truncate toward zero to prevent overflow
+ sint64 dvdy = ((sint64)dv << 32) / dy;
+
+ // prestep top-left point to pixel center and convert destination coordinates to integer
+ sint64 u64 = (sint64)u1 << 16;
+ sint64 v64 = (sint64)v1 << 16;
+ sint32 prestepx = (0x8000 - x1) & 0xffff;
+ sint32 prestepy = (0x8000 - y1) & 0xffff;
+
+ u64 += (dudx * prestepx) >> 16;
+ v64 += (dvdy * prestepy) >> 16;
+
+ sint32 x1i = (x1 + 0x8000) >> 16;
+ sint32 y1i = (y1 + 0x8000) >> 16;
+ sint32 x2i = (x2 + 0x8000) >> 16;
+ sint32 y2i = (y2 + 0x8000) >> 16;
+
+ // destination clipping
+ if (x1i < 0) {
+ u64 -= dudx * x1i;
+ x1i = 0;
+ }
+
+ if (y1i < 0) {
+ v64 -= dvdy * y1i;
+ y1i = 0;
+ }
+
+ if (x2i > dst.w)
+ x2i = dst.w;
+
+ if (y2i > dst.h)
+ y2i = dst.h;
+
+ if (x1i >= x2i || y1i >= y2i)
+ return true;
+
+ u64 -= 0x80000000;
+ v64 -= 0x80000000;
+
+ int xprepos = 0;
+ int xpostpos = src.w-1;
+
+ sint64 ulo = u64;
+ sint64 uhi = u64 + dudx * (x2i - x1i - 1);
+ sint64 tdudx = dudx;
+
+ if (ulo > uhi) {
+ std::swap(ulo, uhi);
+ tdudx = -tdudx;
+ }
+
+ int xprecopy = 0;
+ int xpostcopy = 0;
+
+ if (ulo < 0) {
+ xprecopy = (int)((1 - ulo) / tdudx) + 1;
+ }
+
+ const sint64 ulimit = ((sint64)(src.w-1) << 32);
+
+ if (uhi >= ulimit)
+ xpostcopy = (int)((uhi - ulimit - 1) / tdudx) + 1;
+
+ if (dudx < 0) {
+ std::swap(xprecopy, xpostcopy);
+ std::swap(xprepos, xpostpos);
+ }
+
+ u64 += dudx * xprecopy;
+ const int xtotal = x2i - x1i;
+ int xmidcopy = (x2i - x1i) - (xprecopy + xpostcopy);
+ const sint32 ui = (sint32)(u64 >> 32);
+
+ // set up parameter block
+
+ VDPixmapReferenceStretchBltBilinearParameters params;
+
+ params.u = (uint32)u64;
+ params.uinc = (sint32)(dudx >> 32);
+ params.dudx = (sint32)dudx;
+ params.xprecopy = xprecopy;
+ params.xprepos = (xprepos - ui) * bpp;
+ params.xpostcopy = xpostcopy;
+ params.xpostpos = (xpostpos - ui) * bpp;
+ params.xmidsize = xmidcopy;
+
+ void *dstp = (char *)dst.data + y1i * dst.pitch + x1i * bpp;
+ const void *srcp = (char *)src.data + ui * bpp;
+
+ VDPixmapBuffer window(xtotal, 2, src.format);
+
+ void *pTempRow1 = window.data;
+ void *pTempRow2 = (char *)window.data + window.pitch;
+ int windowbottom = dvdy > 0 ? -0x7fffffff : 0x7fffffff;
+
+ do {
+ sint32 iv = (sint32)(v64 >> 32);
+ sint32 iv_bottom = iv + 1;
+
+ if (iv < 0)
+ iv = iv_bottom = 0;
+
+ if (iv >= src.h-1)
+ iv = iv_bottom = src.h-1;
+
+ if (dvdy < 0) {
+ if (windowbottom > iv_bottom+1)
+ windowbottom = iv_bottom+1;
+
+ while(windowbottom > iv) {
+ std::swap(pTempRow1, pTempRow2);
+
+ --windowbottom;
+
+ params.dst = (char *)pTempRow1 + bpp * params.xprecopy;
+ params.src = vdptroffset(srcp, windowbottom * src.pitch);
+
+ pHorizontalFilter(params);
+ }
+ } else {
+ if (windowbottom < iv-1)
+ windowbottom = iv-1;
+
+ while(windowbottom < iv_bottom) {
+ std::swap(pTempRow1, pTempRow2);
+
+ ++windowbottom;
+
+ params.dst = (char *)pTempRow2 + bpp * params.xprecopy;
+ params.src = vdptroffset(srcp, windowbottom * src.pitch);
+
+ pHorizontalFilter(params);
+ }
+ }
+
+ if (iv == iv_bottom)
+ if (dvdy < 0)
+ pVerticalFilter(dstp, pTempRow1, pTempRow1, xtotal, 0);
+ else
+ pVerticalFilter(dstp, pTempRow2, pTempRow2, xtotal, 0);
+ else
+ pVerticalFilter(dstp, pTempRow1, pTempRow2, xtotal, (uint32)v64);
+
+ v64 += dvdy;
+ dstp = (char *)dstp + dst.pitch;
+ } while(++y1i < y2i);
+
+ return true;
+} \ No newline at end of file
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/tables.cpp b/src/thirdparty/VirtualDub/Kasumi/source/tables.cpp
new file mode 100644
index 000000000..bf1987500
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/tables.cpp
@@ -0,0 +1,204 @@
+// Automatically generated by Asuka 'maketables.'" DO NOT EDIT!
+
+#include <vd2/system/vdtypes.h>
+
+extern "C" const sint32 kVDCubicInterpTableFX14_075[256][4]={
+ { 0, 16384, 0, 0 }, { -48, 16384, 48, 0 }, { -95, 16383, 97, -1 }, { -141, 16380, 147, -2 },
+ { -186, 16375, 198, -3 }, { -231, 16371, 249, -5 }, { -275, 16365, 301, -7 }, { -318, 16357, 354, -9 },
+ { -360, 16349, 407, -12 }, { -402, 16340, 461, -15 }, { -443, 16329, 516, -18 }, { -484, 16318, 572, -22 },
+ { -523, 16305, 628, -26 }, { -562, 16291, 685, -30 }, { -601, 16278, 742, -35 }, { -638, 16262, 800, -40 },
+ { -675, 16245, 859, -45 }, { -711, 16228, 918, -51 }, { -747, 16209, 978, -56 }, { -782, 16190, 1039, -63 },
+ { -816, 16169, 1100, -69 }, { -849, 16147, 1162, -76 }, { -882, 16124, 1225, -83 }, { -915, 16101, 1288, -90 },
+ { -946, 16077, 1351, -98 }, { -977, 16052, 1415, -106 }, { -1007, 16025, 1480, -114 }, { -1037, 15998, 1545, -122 },
+ { -1066, 15970, 1611, -131 }, { -1094, 15940, 1678, -140 }, { -1122, 15910, 1745, -149 }, { -1149, 15879, 1812, -158 },
+ { -1176, 15848, 1880, -168 }, { -1202, 15815, 1949, -178 }, { -1227, 15781, 2018, -188 }, { -1252, 15747, 2087, -198 },
+ { -1276, 15712, 2157, -209 }, { -1300, 15676, 2228, -220 }, { -1323, 15639, 2299, -231 }, { -1345, 15601, 2370, -242 },
+ { -1367, 15562, 2442, -253 }, { -1388, 15523, 2514, -265 }, { -1409, 15482, 2587, -276 }, { -1429, 15441, 2660, -288 },
+ { -1448, 15399, 2734, -301 }, { -1467, 15356, 2808, -313 }, { -1486, 15312, 2883, -325 }, { -1504, 15268, 2958, -338 },
+ { -1521, 15223, 3033, -351 }, { -1538, 15177, 3109, -364 }, { -1554, 15130, 3185, -377 }, { -1570, 15084, 3261, -391 },
+ { -1585, 15035, 3338, -404 }, { -1600, 14986, 3416, -418 }, { -1614, 14936, 3493, -431 }, { -1627, 14885, 3571, -445 },
+ { -1641, 14834, 3650, -459 }, { -1653, 14783, 3728, -474 }, { -1665, 14730, 3807, -488 }, { -1677, 14676, 3887, -502 },
+ { -1688, 14623, 3966, -517 }, { -1699, 14568, 4046, -531 }, { -1709, 14512, 4127, -546 }, { -1719, 14457, 4207, -561 },
+ { -1728, 14400, 4288, -576 }, { -1737, 14343, 4369, -591 }, { -1745, 14284, 4451, -606 }, { -1753, 14226, 4532, -621 },
+ { -1760, 14167, 4614, -637 }, { -1767, 14107, 4696, -652 }, { -1774, 14047, 4779, -668 }, { -1780, 13986, 4861, -683 },
+ { -1785, 13924, 4944, -699 }, { -1791, 13861, 5028, -714 }, { -1795, 13798, 5111, -730 }, { -1800, 13736, 5194, -746 },
+ { -1804, 13671, 5278, -761 }, { -1807, 13606, 5362, -777 }, { -1810, 13541, 5446, -793 }, { -1813, 13475, 5531, -809 },
+ { -1815, 13409, 5615, -825 }, { -1817, 13342, 5700, -841 }, { -1818, 13275, 5784, -857 }, { -1819, 13207, 5869, -873 },
+ { -1820, 13139, 5954, -889 }, { -1820, 13069, 6040, -905 }, { -1820, 13000, 6125, -921 }, { -1820, 12930, 6211, -937 },
+ { -1819, 12860, 6296, -953 }, { -1818, 12789, 6382, -969 }, { -1816, 12717, 6468, -985 }, { -1815, 12647, 6553, -1001 },
+ { -1812, 12574, 6639, -1017 }, { -1810, 12502, 6725, -1033 }, { -1807, 12427, 6812, -1048 }, { -1804, 12354, 6898, -1064 },
+ { -1800, 12280, 6984, -1080 }, { -1796, 12206, 7070, -1096 }, { -1792, 12130, 7157, -1111 }, { -1787, 12055, 7243, -1127 },
+ { -1782, 11980, 7329, -1143 }, { -1777, 11903, 7416, -1158 }, { -1772, 11827, 7502, -1173 }, { -1766, 11751, 7588, -1189 },
+ { -1760, 11673, 7675, -1204 }, { -1753, 11595, 7761, -1219 }, { -1747, 11517, 7848, -1234 }, { -1740, 11439, 7934, -1249 },
+ { -1733, 11361, 8020, -1264 }, { -1725, 11281, 8107, -1279 }, { -1717, 11202, 8193, -1294 }, { -1709, 11123, 8279, -1309 },
+ { -1701, 11043, 8365, -1323 }, { -1692, 10962, 8451, -1337 }, { -1684, 10883, 8537, -1352 }, { -1675, 10802, 8623, -1366 },
+ { -1665, 10720, 8709, -1380 }, { -1656, 10640, 8794, -1394 }, { -1646, 10557, 8880, -1407 }, { -1636, 10476, 8965, -1421 },
+ { -1626, 10393, 9051, -1434 }, { -1615, 10311, 9136, -1448 }, { -1604, 10228, 9221, -1461 }, { -1594, 10146, 9306, -1474 },
+ { -1582, 10062, 9391, -1487 }, { -1571, 9979, 9475, -1499 }, { -1560, 9896, 9560, -1512 }, { -1548, 9812, 9644, -1524 },
+ { -1536, 9728, 9728, -1536 }, { -1524, 9644, 9812, -1548 }, { -1512, 9560, 9896, -1560 }, { -1499, 9475, 9979, -1571 },
+ { -1487, 9391, 10062, -1582 }, { -1474, 9306, 10146, -1594 }, { -1461, 9221, 10228, -1604 }, { -1448, 9136, 10311, -1615 },
+ { -1434, 9051, 10393, -1626 }, { -1421, 8965, 10476, -1636 }, { -1407, 8880, 10557, -1646 }, { -1394, 8795, 10639, -1656 },
+ { -1380, 8709, 10720, -1665 }, { -1366, 8624, 10801, -1675 }, { -1352, 8538, 10882, -1684 }, { -1337, 8450, 10963, -1692 },
+ { -1323, 8365, 11043, -1701 }, { -1309, 8279, 11123, -1709 }, { -1294, 8192, 11203, -1717 }, { -1279, 8106, 11282, -1725 },
+ { -1264, 8020, 11361, -1733 }, { -1249, 7934, 11439, -1740 }, { -1234, 7847, 11518, -1747 }, { -1219, 7760, 11596, -1753 },
+ { -1204, 7675, 11673, -1760 }, { -1189, 7589, 11750, -1766 }, { -1173, 7502, 11827, -1772 }, { -1158, 7415, 11904, -1777 },
+ { -1143, 7329, 11980, -1782 }, { -1127, 7243, 12055, -1787 }, { -1111, 7156, 12131, -1792 }, { -1096, 7070, 12206, -1796 },
+ { -1080, 6984, 12280, -1800 }, { -1064, 6898, 12354, -1804 }, { -1048, 6811, 12428, -1807 }, { -1033, 6726, 12501, -1810 },
+ { -1017, 6639, 12574, -1812 }, { -1001, 6554, 12646, -1815 }, { -985, 6467, 12718, -1816 }, { -969, 6382, 12789, -1818 },
+ { -953, 6296, 12860, -1819 }, { -937, 6211, 12930, -1820 }, { -921, 6125, 13000, -1820 }, { -905, 6039, 13070, -1820 },
+ { -889, 5954, 13139, -1820 }, { -873, 5869, 13207, -1819 }, { -857, 5784, 13275, -1818 }, { -841, 5700, 13342, -1817 },
+ { -825, 5615, 13409, -1815 }, { -809, 5531, 13475, -1813 }, { -793, 5446, 13541, -1810 }, { -777, 5362, 13606, -1807 },
+ { -761, 5278, 13671, -1804 }, { -746, 5195, 13735, -1800 }, { -730, 5111, 13798, -1795 }, { -714, 5028, 13861, -1791 },
+ { -699, 4944, 13924, -1785 }, { -683, 4862, 13985, -1780 }, { -668, 4780, 14046, -1774 }, { -652, 4696, 14107, -1767 },
+ { -637, 4614, 14167, -1760 }, { -621, 4532, 14226, -1753 }, { -606, 4450, 14285, -1745 }, { -591, 4369, 14343, -1737 },
+ { -576, 4288, 14400, -1728 }, { -561, 4207, 14457, -1719 }, { -546, 4126, 14513, -1709 }, { -531, 4046, 14568, -1699 },
+ { -517, 3966, 14623, -1688 }, { -502, 3886, 14677, -1677 }, { -488, 3807, 14730, -1665 }, { -474, 3728, 14783, -1653 },
+ { -459, 3650, 14834, -1641 }, { -445, 3570, 14886, -1627 }, { -431, 3493, 14936, -1614 }, { -418, 3416, 14986, -1600 },
+ { -404, 3338, 15035, -1585 }, { -391, 3262, 15083, -1570 }, { -377, 3185, 15130, -1554 }, { -364, 3109, 15177, -1538 },
+ { -351, 3033, 15223, -1521 }, { -338, 2958, 15268, -1504 }, { -325, 2882, 15313, -1486 }, { -313, 2808, 15356, -1467 },
+ { -301, 2734, 15399, -1448 }, { -288, 2660, 15441, -1429 }, { -276, 2587, 15482, -1409 }, { -265, 2514, 15523, -1388 },
+ { -253, 2442, 15562, -1367 }, { -242, 2370, 15601, -1345 }, { -231, 2299, 15639, -1323 }, { -220, 2228, 15676, -1300 },
+ { -209, 2157, 15712, -1276 }, { -198, 2087, 15747, -1252 }, { -188, 2017, 15782, -1227 }, { -178, 1949, 15815, -1202 },
+ { -168, 1880, 15848, -1176 }, { -158, 1811, 15880, -1149 }, { -149, 1744, 15911, -1122 }, { -140, 1677, 15941, -1094 },
+ { -131, 1611, 15970, -1066 }, { -122, 1545, 15998, -1037 }, { -114, 1480, 16025, -1007 }, { -106, 1415, 16052, -977 },
+ { -98, 1351, 16077, -946 }, { -90, 1288, 16101, -915 }, { -83, 1224, 16125, -882 }, { -76, 1162, 16147, -849 },
+ { -69, 1100, 16169, -816 }, { -63, 1040, 16189, -782 }, { -56, 978, 16209, -747 }, { -51, 919, 16227, -711 },
+ { -45, 859, 16245, -675 }, { -40, 800, 16262, -638 }, { -35, 743, 16277, -601 }, { -30, 684, 16292, -562 },
+ { -26, 628, 16305, -523 }, { -22, 572, 16318, -484 }, { -18, 516, 16329, -443 }, { -15, 462, 16339, -402 },
+ { -12, 407, 16349, -360 }, { -9, 354, 16357, -318 }, { -7, 302, 16364, -275 }, { -5, 250, 16370, -231 },
+ { -3, 198, 16375, -186 }, { -2, 148, 16379, -141 }, { -1, 98, 16382, -95 }, { 0, 49, 16383, -48 },
+};
+
+#ifdef _M_IX86
+extern "C" const __declspec(align(16)) sint16 kVDCubicInterpTableFX14_075_MMX[256][8]={
+ { 0, 16384, 0, 16384, 0, 0, 0, 0 }, { -48, 16384, -48, 16384, 48, 0, 48, 0 },
+ { -95, 16383, -95, 16383, 97, -1, 97, -1 }, { -141, 16380, -141, 16380, 147, -2, 147, -2 },
+ { -186, 16375, -186, 16375, 198, -3, 198, -3 }, { -231, 16371, -231, 16371, 249, -5, 249, -5 },
+ { -275, 16365, -275, 16365, 301, -7, 301, -7 }, { -318, 16357, -318, 16357, 354, -9, 354, -9 },
+ { -360, 16349, -360, 16349, 407, -12, 407, -12 }, { -402, 16340, -402, 16340, 461, -15, 461, -15 },
+ { -443, 16329, -443, 16329, 516, -18, 516, -18 }, { -484, 16318, -484, 16318, 572, -22, 572, -22 },
+ { -523, 16305, -523, 16305, 628, -26, 628, -26 }, { -562, 16291, -562, 16291, 685, -30, 685, -30 },
+ { -601, 16278, -601, 16278, 742, -35, 742, -35 }, { -638, 16262, -638, 16262, 800, -40, 800, -40 },
+ { -675, 16245, -675, 16245, 859, -45, 859, -45 }, { -711, 16228, -711, 16228, 918, -51, 918, -51 },
+ { -747, 16209, -747, 16209, 978, -56, 978, -56 }, { -782, 16190, -782, 16190, 1039, -63, 1039, -63 },
+ { -816, 16169, -816, 16169, 1100, -69, 1100, -69 }, { -849, 16147, -849, 16147, 1162, -76, 1162, -76 },
+ { -882, 16124, -882, 16124, 1225, -83, 1225, -83 }, { -915, 16101, -915, 16101, 1288, -90, 1288, -90 },
+ { -946, 16077, -946, 16077, 1351, -98, 1351, -98 }, { -977, 16052, -977, 16052, 1415, -106, 1415, -106 },
+ { -1007, 16025, -1007, 16025, 1480, -114, 1480, -114 }, { -1037, 15998, -1037, 15998, 1545, -122, 1545, -122 },
+ { -1066, 15970, -1066, 15970, 1611, -131, 1611, -131 }, { -1094, 15940, -1094, 15940, 1678, -140, 1678, -140 },
+ { -1122, 15910, -1122, 15910, 1745, -149, 1745, -149 }, { -1149, 15879, -1149, 15879, 1812, -158, 1812, -158 },
+ { -1176, 15848, -1176, 15848, 1880, -168, 1880, -168 }, { -1202, 15815, -1202, 15815, 1949, -178, 1949, -178 },
+ { -1227, 15781, -1227, 15781, 2018, -188, 2018, -188 }, { -1252, 15747, -1252, 15747, 2087, -198, 2087, -198 },
+ { -1276, 15712, -1276, 15712, 2157, -209, 2157, -209 }, { -1300, 15676, -1300, 15676, 2228, -220, 2228, -220 },
+ { -1323, 15639, -1323, 15639, 2299, -231, 2299, -231 }, { -1345, 15601, -1345, 15601, 2370, -242, 2370, -242 },
+ { -1367, 15562, -1367, 15562, 2442, -253, 2442, -253 }, { -1388, 15523, -1388, 15523, 2514, -265, 2514, -265 },
+ { -1409, 15482, -1409, 15482, 2587, -276, 2587, -276 }, { -1429, 15441, -1429, 15441, 2660, -288, 2660, -288 },
+ { -1448, 15399, -1448, 15399, 2734, -301, 2734, -301 }, { -1467, 15356, -1467, 15356, 2808, -313, 2808, -313 },
+ { -1486, 15312, -1486, 15312, 2883, -325, 2883, -325 }, { -1504, 15268, -1504, 15268, 2958, -338, 2958, -338 },
+ { -1521, 15223, -1521, 15223, 3033, -351, 3033, -351 }, { -1538, 15177, -1538, 15177, 3109, -364, 3109, -364 },
+ { -1554, 15130, -1554, 15130, 3185, -377, 3185, -377 }, { -1570, 15084, -1570, 15084, 3261, -391, 3261, -391 },
+ { -1585, 15035, -1585, 15035, 3338, -404, 3338, -404 }, { -1600, 14986, -1600, 14986, 3416, -418, 3416, -418 },
+ { -1614, 14936, -1614, 14936, 3493, -431, 3493, -431 }, { -1627, 14885, -1627, 14885, 3571, -445, 3571, -445 },
+ { -1641, 14834, -1641, 14834, 3650, -459, 3650, -459 }, { -1653, 14783, -1653, 14783, 3728, -474, 3728, -474 },
+ { -1665, 14730, -1665, 14730, 3807, -488, 3807, -488 }, { -1677, 14676, -1677, 14676, 3887, -502, 3887, -502 },
+ { -1688, 14623, -1688, 14623, 3966, -517, 3966, -517 }, { -1699, 14568, -1699, 14568, 4046, -531, 4046, -531 },
+ { -1709, 14512, -1709, 14512, 4127, -546, 4127, -546 }, { -1719, 14457, -1719, 14457, 4207, -561, 4207, -561 },
+ { -1728, 14400, -1728, 14400, 4288, -576, 4288, -576 }, { -1737, 14343, -1737, 14343, 4369, -591, 4369, -591 },
+ { -1745, 14284, -1745, 14284, 4451, -606, 4451, -606 }, { -1753, 14226, -1753, 14226, 4532, -621, 4532, -621 },
+ { -1760, 14167, -1760, 14167, 4614, -637, 4614, -637 }, { -1767, 14107, -1767, 14107, 4696, -652, 4696, -652 },
+ { -1774, 14047, -1774, 14047, 4779, -668, 4779, -668 }, { -1780, 13986, -1780, 13986, 4861, -683, 4861, -683 },
+ { -1785, 13924, -1785, 13924, 4944, -699, 4944, -699 }, { -1791, 13861, -1791, 13861, 5028, -714, 5028, -714 },
+ { -1795, 13798, -1795, 13798, 5111, -730, 5111, -730 }, { -1800, 13736, -1800, 13736, 5194, -746, 5194, -746 },
+ { -1804, 13671, -1804, 13671, 5278, -761, 5278, -761 }, { -1807, 13606, -1807, 13606, 5362, -777, 5362, -777 },
+ { -1810, 13541, -1810, 13541, 5446, -793, 5446, -793 }, { -1813, 13475, -1813, 13475, 5531, -809, 5531, -809 },
+ { -1815, 13409, -1815, 13409, 5615, -825, 5615, -825 }, { -1817, 13342, -1817, 13342, 5700, -841, 5700, -841 },
+ { -1818, 13275, -1818, 13275, 5784, -857, 5784, -857 }, { -1819, 13207, -1819, 13207, 5869, -873, 5869, -873 },
+ { -1820, 13139, -1820, 13139, 5954, -889, 5954, -889 }, { -1820, 13069, -1820, 13069, 6040, -905, 6040, -905 },
+ { -1820, 13000, -1820, 13000, 6125, -921, 6125, -921 }, { -1820, 12930, -1820, 12930, 6211, -937, 6211, -937 },
+ { -1819, 12860, -1819, 12860, 6296, -953, 6296, -953 }, { -1818, 12789, -1818, 12789, 6382, -969, 6382, -969 },
+ { -1816, 12717, -1816, 12717, 6468, -985, 6468, -985 }, { -1815, 12647, -1815, 12647, 6553, -1001, 6553, -1001 },
+ { -1812, 12574, -1812, 12574, 6639, -1017, 6639, -1017 }, { -1810, 12502, -1810, 12502, 6725, -1033, 6725, -1033 },
+ { -1807, 12427, -1807, 12427, 6812, -1048, 6812, -1048 }, { -1804, 12354, -1804, 12354, 6898, -1064, 6898, -1064 },
+ { -1800, 12280, -1800, 12280, 6984, -1080, 6984, -1080 }, { -1796, 12206, -1796, 12206, 7070, -1096, 7070, -1096 },
+ { -1792, 12130, -1792, 12130, 7157, -1111, 7157, -1111 }, { -1787, 12055, -1787, 12055, 7243, -1127, 7243, -1127 },
+ { -1782, 11980, -1782, 11980, 7329, -1143, 7329, -1143 }, { -1777, 11903, -1777, 11903, 7416, -1158, 7416, -1158 },
+ { -1772, 11827, -1772, 11827, 7502, -1173, 7502, -1173 }, { -1766, 11751, -1766, 11751, 7588, -1189, 7588, -1189 },
+ { -1760, 11673, -1760, 11673, 7675, -1204, 7675, -1204 }, { -1753, 11595, -1753, 11595, 7761, -1219, 7761, -1219 },
+ { -1747, 11517, -1747, 11517, 7848, -1234, 7848, -1234 }, { -1740, 11439, -1740, 11439, 7934, -1249, 7934, -1249 },
+ { -1733, 11361, -1733, 11361, 8020, -1264, 8020, -1264 }, { -1725, 11281, -1725, 11281, 8107, -1279, 8107, -1279 },
+ { -1717, 11202, -1717, 11202, 8193, -1294, 8193, -1294 }, { -1709, 11123, -1709, 11123, 8279, -1309, 8279, -1309 },
+ { -1701, 11043, -1701, 11043, 8365, -1323, 8365, -1323 }, { -1692, 10962, -1692, 10962, 8451, -1337, 8451, -1337 },
+ { -1684, 10883, -1684, 10883, 8537, -1352, 8537, -1352 }, { -1675, 10802, -1675, 10802, 8623, -1366, 8623, -1366 },
+ { -1665, 10720, -1665, 10720, 8709, -1380, 8709, -1380 }, { -1656, 10640, -1656, 10640, 8794, -1394, 8794, -1394 },
+ { -1646, 10557, -1646, 10557, 8880, -1407, 8880, -1407 }, { -1636, 10476, -1636, 10476, 8965, -1421, 8965, -1421 },
+ { -1626, 10393, -1626, 10393, 9051, -1434, 9051, -1434 }, { -1615, 10311, -1615, 10311, 9136, -1448, 9136, -1448 },
+ { -1604, 10228, -1604, 10228, 9221, -1461, 9221, -1461 }, { -1594, 10146, -1594, 10146, 9306, -1474, 9306, -1474 },
+ { -1582, 10062, -1582, 10062, 9391, -1487, 9391, -1487 }, { -1571, 9979, -1571, 9979, 9475, -1499, 9475, -1499 },
+ { -1560, 9896, -1560, 9896, 9560, -1512, 9560, -1512 }, { -1548, 9812, -1548, 9812, 9644, -1524, 9644, -1524 },
+ { -1536, 9728, -1536, 9728, 9728, -1536, 9728, -1536 }, { -1524, 9644, -1524, 9644, 9812, -1548, 9812, -1548 },
+ { -1512, 9560, -1512, 9560, 9896, -1560, 9896, -1560 }, { -1499, 9475, -1499, 9475, 9979, -1571, 9979, -1571 },
+ { -1487, 9391, -1487, 9391, 10062, -1582, 10062, -1582 }, { -1474, 9306, -1474, 9306, 10146, -1594, 10146, -1594 },
+ { -1461, 9221, -1461, 9221, 10228, -1604, 10228, -1604 }, { -1448, 9136, -1448, 9136, 10311, -1615, 10311, -1615 },
+ { -1434, 9051, -1434, 9051, 10393, -1626, 10393, -1626 }, { -1421, 8965, -1421, 8965, 10476, -1636, 10476, -1636 },
+ { -1407, 8880, -1407, 8880, 10557, -1646, 10557, -1646 }, { -1394, 8795, -1394, 8795, 10639, -1656, 10639, -1656 },
+ { -1380, 8709, -1380, 8709, 10720, -1665, 10720, -1665 }, { -1366, 8624, -1366, 8624, 10801, -1675, 10801, -1675 },
+ { -1352, 8538, -1352, 8538, 10882, -1684, 10882, -1684 }, { -1337, 8450, -1337, 8450, 10963, -1692, 10963, -1692 },
+ { -1323, 8365, -1323, 8365, 11043, -1701, 11043, -1701 }, { -1309, 8279, -1309, 8279, 11123, -1709, 11123, -1709 },
+ { -1294, 8192, -1294, 8192, 11203, -1717, 11203, -1717 }, { -1279, 8106, -1279, 8106, 11282, -1725, 11282, -1725 },
+ { -1264, 8020, -1264, 8020, 11361, -1733, 11361, -1733 }, { -1249, 7934, -1249, 7934, 11439, -1740, 11439, -1740 },
+ { -1234, 7847, -1234, 7847, 11518, -1747, 11518, -1747 }, { -1219, 7760, -1219, 7760, 11596, -1753, 11596, -1753 },
+ { -1204, 7675, -1204, 7675, 11673, -1760, 11673, -1760 }, { -1189, 7589, -1189, 7589, 11750, -1766, 11750, -1766 },
+ { -1173, 7502, -1173, 7502, 11827, -1772, 11827, -1772 }, { -1158, 7415, -1158, 7415, 11904, -1777, 11904, -1777 },
+ { -1143, 7329, -1143, 7329, 11980, -1782, 11980, -1782 }, { -1127, 7243, -1127, 7243, 12055, -1787, 12055, -1787 },
+ { -1111, 7156, -1111, 7156, 12131, -1792, 12131, -1792 }, { -1096, 7070, -1096, 7070, 12206, -1796, 12206, -1796 },
+ { -1080, 6984, -1080, 6984, 12280, -1800, 12280, -1800 }, { -1064, 6898, -1064, 6898, 12354, -1804, 12354, -1804 },
+ { -1048, 6811, -1048, 6811, 12428, -1807, 12428, -1807 }, { -1033, 6726, -1033, 6726, 12501, -1810, 12501, -1810 },
+ { -1017, 6639, -1017, 6639, 12574, -1812, 12574, -1812 }, { -1001, 6554, -1001, 6554, 12646, -1815, 12646, -1815 },
+ { -985, 6467, -985, 6467, 12718, -1816, 12718, -1816 }, { -969, 6382, -969, 6382, 12789, -1818, 12789, -1818 },
+ { -953, 6296, -953, 6296, 12860, -1819, 12860, -1819 }, { -937, 6211, -937, 6211, 12930, -1820, 12930, -1820 },
+ { -921, 6125, -921, 6125, 13000, -1820, 13000, -1820 }, { -905, 6039, -905, 6039, 13070, -1820, 13070, -1820 },
+ { -889, 5954, -889, 5954, 13139, -1820, 13139, -1820 }, { -873, 5869, -873, 5869, 13207, -1819, 13207, -1819 },
+ { -857, 5784, -857, 5784, 13275, -1818, 13275, -1818 }, { -841, 5700, -841, 5700, 13342, -1817, 13342, -1817 },
+ { -825, 5615, -825, 5615, 13409, -1815, 13409, -1815 }, { -809, 5531, -809, 5531, 13475, -1813, 13475, -1813 },
+ { -793, 5446, -793, 5446, 13541, -1810, 13541, -1810 }, { -777, 5362, -777, 5362, 13606, -1807, 13606, -1807 },
+ { -761, 5278, -761, 5278, 13671, -1804, 13671, -1804 }, { -746, 5195, -746, 5195, 13735, -1800, 13735, -1800 },
+ { -730, 5111, -730, 5111, 13798, -1795, 13798, -1795 }, { -714, 5028, -714, 5028, 13861, -1791, 13861, -1791 },
+ { -699, 4944, -699, 4944, 13924, -1785, 13924, -1785 }, { -683, 4862, -683, 4862, 13985, -1780, 13985, -1780 },
+ { -668, 4780, -668, 4780, 14046, -1774, 14046, -1774 }, { -652, 4696, -652, 4696, 14107, -1767, 14107, -1767 },
+ { -637, 4614, -637, 4614, 14167, -1760, 14167, -1760 }, { -621, 4532, -621, 4532, 14226, -1753, 14226, -1753 },
+ { -606, 4450, -606, 4450, 14285, -1745, 14285, -1745 }, { -591, 4369, -591, 4369, 14343, -1737, 14343, -1737 },
+ { -576, 4288, -576, 4288, 14400, -1728, 14400, -1728 }, { -561, 4207, -561, 4207, 14457, -1719, 14457, -1719 },
+ { -546, 4126, -546, 4126, 14513, -1709, 14513, -1709 }, { -531, 4046, -531, 4046, 14568, -1699, 14568, -1699 },
+ { -517, 3966, -517, 3966, 14623, -1688, 14623, -1688 }, { -502, 3886, -502, 3886, 14677, -1677, 14677, -1677 },
+ { -488, 3807, -488, 3807, 14730, -1665, 14730, -1665 }, { -474, 3728, -474, 3728, 14783, -1653, 14783, -1653 },
+ { -459, 3650, -459, 3650, 14834, -1641, 14834, -1641 }, { -445, 3570, -445, 3570, 14886, -1627, 14886, -1627 },
+ { -431, 3493, -431, 3493, 14936, -1614, 14936, -1614 }, { -418, 3416, -418, 3416, 14986, -1600, 14986, -1600 },
+ { -404, 3338, -404, 3338, 15035, -1585, 15035, -1585 }, { -391, 3262, -391, 3262, 15083, -1570, 15083, -1570 },
+ { -377, 3185, -377, 3185, 15130, -1554, 15130, -1554 }, { -364, 3109, -364, 3109, 15177, -1538, 15177, -1538 },
+ { -351, 3033, -351, 3033, 15223, -1521, 15223, -1521 }, { -338, 2958, -338, 2958, 15268, -1504, 15268, -1504 },
+ { -325, 2882, -325, 2882, 15313, -1486, 15313, -1486 }, { -313, 2808, -313, 2808, 15356, -1467, 15356, -1467 },
+ { -301, 2734, -301, 2734, 15399, -1448, 15399, -1448 }, { -288, 2660, -288, 2660, 15441, -1429, 15441, -1429 },
+ { -276, 2587, -276, 2587, 15482, -1409, 15482, -1409 }, { -265, 2514, -265, 2514, 15523, -1388, 15523, -1388 },
+ { -253, 2442, -253, 2442, 15562, -1367, 15562, -1367 }, { -242, 2370, -242, 2370, 15601, -1345, 15601, -1345 },
+ { -231, 2299, -231, 2299, 15639, -1323, 15639, -1323 }, { -220, 2228, -220, 2228, 15676, -1300, 15676, -1300 },
+ { -209, 2157, -209, 2157, 15712, -1276, 15712, -1276 }, { -198, 2087, -198, 2087, 15747, -1252, 15747, -1252 },
+ { -188, 2017, -188, 2017, 15782, -1227, 15782, -1227 }, { -178, 1949, -178, 1949, 15815, -1202, 15815, -1202 },
+ { -168, 1880, -168, 1880, 15848, -1176, 15848, -1176 }, { -158, 1811, -158, 1811, 15880, -1149, 15880, -1149 },
+ { -149, 1744, -149, 1744, 15911, -1122, 15911, -1122 }, { -140, 1677, -140, 1677, 15941, -1094, 15941, -1094 },
+ { -131, 1611, -131, 1611, 15970, -1066, 15970, -1066 }, { -122, 1545, -122, 1545, 15998, -1037, 15998, -1037 },
+ { -114, 1480, -114, 1480, 16025, -1007, 16025, -1007 }, { -106, 1415, -106, 1415, 16052, -977, 16052, -977 },
+ { -98, 1351, -98, 1351, 16077, -946, 16077, -946 }, { -90, 1288, -90, 1288, 16101, -915, 16101, -915 },
+ { -83, 1224, -83, 1224, 16125, -882, 16125, -882 }, { -76, 1162, -76, 1162, 16147, -849, 16147, -849 },
+ { -69, 1100, -69, 1100, 16169, -816, 16169, -816 }, { -63, 1040, -63, 1040, 16189, -782, 16189, -782 },
+ { -56, 978, -56, 978, 16209, -747, 16209, -747 }, { -51, 919, -51, 919, 16227, -711, 16227, -711 },
+ { -45, 859, -45, 859, 16245, -675, 16245, -675 }, { -40, 800, -40, 800, 16262, -638, 16262, -638 },
+ { -35, 743, -35, 743, 16277, -601, 16277, -601 }, { -30, 684, -30, 684, 16292, -562, 16292, -562 },
+ { -26, 628, -26, 628, 16305, -523, 16305, -523 }, { -22, 572, -22, 572, 16318, -484, 16318, -484 },
+ { -18, 516, -18, 516, 16329, -443, 16329, -443 }, { -15, 462, -15, 462, 16339, -402, 16339, -402 },
+ { -12, 407, -12, 407, 16349, -360, 16349, -360 }, { -9, 354, -9, 354, 16357, -318, 16357, -318 },
+ { -7, 302, -7, 302, 16364, -275, 16364, -275 }, { -5, 250, -5, 250, 16370, -231, 16370, -231 },
+ { -3, 198, -3, 198, 16375, -186, 16375, -186 }, { -2, 148, -2, 148, 16379, -141, 16379, -141 },
+ { -1, 98, -1, 98, 16382, -95, 16382, -95 }, { 0, 49, 0, 49, 16383, -48, 16383, -48 },
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/triblt.cpp b/src/thirdparty/VirtualDub/Kasumi/source/triblt.cpp
new file mode 100644
index 000000000..8fe16138a
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/triblt.cpp
@@ -0,0 +1,1717 @@
+// VirtualDub - Video processing and capture application
+// Graphics support library
+// Copyright (C) 1998-2008 Avery Lee
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#include <math.h>
+#include <vector>
+#include <vd2/system/math.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/system/vdalloc.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/Kasumi/pixmapops.h>
+#include <vd2/Kasumi/resample.h>
+#include <vd2/Kasumi/tables.h>
+#include <vd2/Kasumi/triblt.h>
+
+namespace {
+ uint32 lerp_RGB888(sint32 a, sint32 b, sint32 x) {
+ sint32 a_rb = a & 0xff00ff;
+ sint32 a_g = a & 0x00ff00;
+ sint32 b_rb = b & 0xff00ff;
+ sint32 b_g = b & 0x00ff00;
+
+ const uint32 top_rb = (a_rb + (((b_rb - a_rb)*x + 0x00800080) >> 8)) & 0xff00ff;
+ const uint32 top_g = (a_g + (((b_g - a_g )*x + 0x00008000) >> 8)) & 0x00ff00;
+
+ return top_rb + top_g;
+ }
+
+ uint32 bilerp_RGB888(sint32 a, sint32 b, sint32 c, sint32 d, sint32 x, sint32 y) {
+ sint32 a_rb = a & 0xff00ff;
+ sint32 a_g = a & 0x00ff00;
+ sint32 b_rb = b & 0xff00ff;
+ sint32 b_g = b & 0x00ff00;
+ sint32 c_rb = c & 0xff00ff;
+ sint32 c_g = c & 0x00ff00;
+ sint32 d_rb = d & 0xff00ff;
+ sint32 d_g = d & 0x00ff00;
+
+ const uint32 top_rb = (a_rb + (((b_rb - a_rb)*x + 0x00800080) >> 8)) & 0xff00ff;
+ const uint32 top_g = (a_g + (((b_g - a_g )*x + 0x00008000) >> 8)) & 0x00ff00;
+ const uint32 bot_rb = (c_rb + (((d_rb - c_rb)*x + 0x00800080) >> 8)) & 0xff00ff;
+ const uint32 bot_g = (c_g + (((d_g - c_g )*x + 0x00008000) >> 8)) & 0x00ff00;
+
+ const uint32 final_rb = (top_rb + (((bot_rb - top_rb)*y) >> 8)) & 0xff00ff;
+ const uint32 final_g = (top_g + (((bot_g - top_g )*y) >> 8)) & 0x00ff00;
+
+ return final_rb + final_g;
+ }
+
+ uint32 bicubic_RGB888(const uint32 *src0, const uint32 *src1, const uint32 *src2, const uint32 *src3, sint32 x, sint32 y) {
+ const uint32 p00 = src0[0];
+ const uint32 p01 = src0[1];
+ const uint32 p02 = src0[2];
+ const uint32 p03 = src0[3];
+ const uint32 p10 = src1[0];
+ const uint32 p11 = src1[1];
+ const uint32 p12 = src1[2];
+ const uint32 p13 = src1[3];
+ const uint32 p20 = src2[0];
+ const uint32 p21 = src2[1];
+ const uint32 p22 = src2[2];
+ const uint32 p23 = src2[3];
+ const uint32 p30 = src3[0];
+ const uint32 p31 = src3[1];
+ const uint32 p32 = src3[2];
+ const uint32 p33 = src3[3];
+
+ const sint32 *htab = kVDCubicInterpTableFX14_075[x];
+ const sint32 *vtab = kVDCubicInterpTableFX14_075[y];
+
+ const int ch0 = htab[0];
+ const int ch1 = htab[1];
+ const int ch2 = htab[2];
+ const int ch3 = htab[3];
+ const int cv0 = vtab[0];
+ const int cv1 = vtab[1];
+ const int cv2 = vtab[2];
+ const int cv3 = vtab[3];
+
+ int r0 = ((int)((p00>>16)&0xff) * ch0 + (int)((p01>>16)&0xff) * ch1 + (int)((p02>>16)&0xff) * ch2 + (int)((p03>>16)&0xff) * ch3 + 128) >> 8;
+ int g0 = ((int)((p00>> 8)&0xff) * ch0 + (int)((p01>> 8)&0xff) * ch1 + (int)((p02>> 8)&0xff) * ch2 + (int)((p03>> 8)&0xff) * ch3 + 128) >> 8;
+ int b0 = ((int)((p00 )&0xff) * ch0 + (int)((p01 )&0xff) * ch1 + (int)((p02 )&0xff) * ch2 + (int)((p03 )&0xff) * ch3 + 128) >> 8;
+ int r1 = ((int)((p10>>16)&0xff) * ch0 + (int)((p11>>16)&0xff) * ch1 + (int)((p12>>16)&0xff) * ch2 + (int)((p13>>16)&0xff) * ch3 + 128) >> 8;
+ int g1 = ((int)((p10>> 8)&0xff) * ch0 + (int)((p11>> 8)&0xff) * ch1 + (int)((p12>> 8)&0xff) * ch2 + (int)((p13>> 8)&0xff) * ch3 + 128) >> 8;
+ int b1 = ((int)((p10 )&0xff) * ch0 + (int)((p11 )&0xff) * ch1 + (int)((p12 )&0xff) * ch2 + (int)((p13 )&0xff) * ch3 + 128) >> 8;
+ int r2 = ((int)((p20>>16)&0xff) * ch0 + (int)((p21>>16)&0xff) * ch1 + (int)((p22>>16)&0xff) * ch2 + (int)((p23>>16)&0xff) * ch3 + 128) >> 8;
+ int g2 = ((int)((p20>> 8)&0xff) * ch0 + (int)((p21>> 8)&0xff) * ch1 + (int)((p22>> 8)&0xff) * ch2 + (int)((p23>> 8)&0xff) * ch3 + 128) >> 8;
+ int b2 = ((int)((p20 )&0xff) * ch0 + (int)((p21 )&0xff) * ch1 + (int)((p22 )&0xff) * ch2 + (int)((p23 )&0xff) * ch3 + 128) >> 8;
+ int r3 = ((int)((p30>>16)&0xff) * ch0 + (int)((p31>>16)&0xff) * ch1 + (int)((p32>>16)&0xff) * ch2 + (int)((p33>>16)&0xff) * ch3 + 128) >> 8;
+ int g3 = ((int)((p30>> 8)&0xff) * ch0 + (int)((p31>> 8)&0xff) * ch1 + (int)((p32>> 8)&0xff) * ch2 + (int)((p33>> 8)&0xff) * ch3 + 128) >> 8;
+ int b3 = ((int)((p30 )&0xff) * ch0 + (int)((p31 )&0xff) * ch1 + (int)((p32 )&0xff) * ch2 + (int)((p33 )&0xff) * ch3 + 128) >> 8;
+
+ int r = (r0 * cv0 + r1 * cv1 + r2 * cv2 + r3 * cv3 + (1<<19)) >> 20;
+ int g = (g0 * cv0 + g1 * cv1 + g2 * cv2 + g3 * cv3 + (1<<19)) >> 20;
+ int b = (b0 * cv0 + b1 * cv1 + b2 * cv2 + b3 * cv3 + (1<<19)) >> 20;
+
+ if (r<0) r=0; else if (r>255) r=255;
+ if (g<0) g=0; else if (g>255) g=255;
+ if (b<0) b=0; else if (b>255) b=255;
+
+ return (r<<16) + (g<<8) + b;
+ }
+}
+
+namespace {
+ enum {
+ kTop = 1,
+ kBottom = 2,
+ kLeft = 4,
+ kRight = 8,
+ kNear = 16,
+ kFar = 32
+ };
+
+ struct VDTriBltMipInfo {
+ const uint32 *mip;
+ ptrdiff_t pitch;
+ uint32 uvmul, _pad;
+ };
+
+ struct VDTriBltInfo {
+ VDTriBltMipInfo mips[16];
+ uint32 *dst;
+ const uint32 *src;
+ sint32 width;
+ const int *cubictab;
+ };
+
+ struct VDTriBltGenInfo {
+ float u;
+ float v;
+ float rhw;
+ float dudx;
+ float dvdx;
+ float drhwdx;
+ };
+
+ typedef void (*VDTriBltSpanFunction)(const VDTriBltInfo *);
+ typedef void (*VDTriBltGenFunction)(const VDTriBltGenInfo *);
+
+ void vd_triblt_span_point(const VDTriBltInfo *pInfo) {
+ sint32 w = -pInfo->width;
+ uint32 *dst = pInfo->dst + pInfo->width;
+ const uint32 *src = pInfo->src;
+ const uint32 *texture = pInfo->mips[0].mip;
+ const ptrdiff_t texpitch = pInfo->mips[0].pitch;
+
+ do {
+ dst[w] = vdptroffset(texture, texpitch * src[1])[src[0]];
+ src += 2;
+ } while(++w);
+ }
+
+ void vd_triblt_span_bilinear(const VDTriBltInfo *pInfo) {
+ sint32 w = -pInfo->width;
+ uint32 *dst = pInfo->dst + pInfo->width;
+ const uint32 *src = pInfo->src;
+ const uint32 *texture = pInfo->mips[0].mip;
+ const ptrdiff_t texpitch = pInfo->mips[0].pitch;
+
+ do {
+ const sint32 u = src[0];
+ const sint32 v = src[1];
+ src += 2;
+ const uint32 *src1 = vdptroffset(texture, texpitch * (v>>8)) + (u>>8);
+ const uint32 *src2 = vdptroffset(src1, texpitch);
+
+ dst[w] = bilerp_RGB888(src1[0], src1[1], src2[0], src2[1], u&255, v&255);
+ } while(++w);
+ }
+
+ void vd_triblt_span_trilinear(const VDTriBltInfo *pInfo) {
+ sint32 w = -pInfo->width;
+ uint32 *dst = pInfo->dst + pInfo->width;
+ const uint32 *src = pInfo->src;
+
+ do {
+ sint32 u = src[0];
+ sint32 v = src[1];
+ const sint32 lambda = src[2];
+ src += 3;
+
+ const sint32 lod = lambda >> 8;
+
+ const uint32 *texture1 = pInfo->mips[lod].mip;
+ const ptrdiff_t texpitch1 = pInfo->mips[lod].pitch;
+ const uint32 *texture2 = pInfo->mips[lod+1].mip;
+ const ptrdiff_t texpitch2 = pInfo->mips[lod+1].pitch;
+
+ u >>= lod;
+ v >>= lod;
+
+ u += 128;
+ v += 128;
+
+ const uint32 *src1 = vdptroffset(texture1, texpitch1 * (v>>8)) + (u>>8);
+ const uint32 *src2 = vdptroffset(src1, texpitch1);
+ const uint32 p1 = bilerp_RGB888(src1[0], src1[1], src2[0], src2[1], u&255, v&255);
+
+ u += 128;
+ v += 128;
+
+ const uint32 *src3 = vdptroffset(texture2, texpitch2 * (v>>9)) + (u>>9);
+ const uint32 *src4 = vdptroffset(src3, texpitch2);
+ const uint32 p2 = bilerp_RGB888(src3[0], src3[1], src4[0], src4[1], (u>>1)&255, (v>>1)&255);
+
+ dst[w] = lerp_RGB888(p1, p2, lambda & 255);
+ } while(++w);
+ }
+
+ void vd_triblt_span_bicubic_mip_linear(const VDTriBltInfo *pInfo) {
+ sint32 w = -pInfo->width;
+ uint32 *dst = pInfo->dst + pInfo->width;
+ const uint32 *src = pInfo->src;
+
+ do {
+ sint32 u = src[0];
+ sint32 v = src[1];
+ const sint32 lambda = src[2];
+ src += 3;
+
+ const sint32 lod = lambda >> 8;
+
+ const uint32 *texture1 = pInfo->mips[lod].mip;
+ const ptrdiff_t texpitch1 = pInfo->mips[lod].pitch;
+ const uint32 *texture2 = pInfo->mips[lod+1].mip;
+ const ptrdiff_t texpitch2 = pInfo->mips[lod+1].pitch;
+
+ u >>= lod;
+ v >>= lod;
+
+ u += 128;
+ v += 128;
+
+ const uint32 *src1 = vdptroffset(texture1, texpitch1 * (v>>8)) + (u>>8);
+ const uint32 *src2 = vdptroffset(src1, texpitch1);
+ const uint32 *src3 = vdptroffset(src2, texpitch1);
+ const uint32 *src4 = vdptroffset(src3, texpitch1);
+ const uint32 p1 = bicubic_RGB888(src1, src2, src3, src4, u&255, v&255);
+
+ u += 128;
+ v += 128;
+
+ const uint32 *src5 = vdptroffset(texture2, texpitch2 * (v>>9)) + (u>>9);
+ const uint32 *src6 = vdptroffset(src5, texpitch2);
+ const uint32 *src7 = vdptroffset(src6, texpitch2);
+ const uint32 *src8 = vdptroffset(src7, texpitch2);
+ const uint32 p2 = bicubic_RGB888(src5, src6, src7, src8, (u>>1)&255, (v>>1)&255);
+
+ dst[w] = lerp_RGB888(p1, p2, lambda & 255);
+ } while(++w);
+ }
+
+#ifdef _M_IX86
+ extern "C" void vdasm_triblt_span_bilinear_mmx(const VDTriBltInfo *pInfo);
+ extern "C" void vdasm_triblt_span_trilinear_mmx(const VDTriBltInfo *pInfo);
+ extern "C" void vdasm_triblt_span_bicubic_mip_linear_mmx(const VDTriBltInfo *pInfo);
+ extern "C" void vdasm_triblt_span_bicubic_mip_linear_sse2(const VDTriBltInfo *pInfo);
+ extern "C" void vdasm_triblt_span_point(const VDTriBltInfo *pInfo);
+#endif
+
+ struct VDTriBltTransformedVertex {
+ float x, y, z;
+ union {
+ float w;
+ float rhw;
+ };
+ float r, g, b, a;
+ float u, v;
+ int outcode;
+
+ void interp(const VDTriBltTransformedVertex *v1, const VDTriBltTransformedVertex *v2, float alpha) {
+ x = v1->x + alpha * (v2->x - v1->x);
+ y = v1->y + alpha * (v2->y - v1->y);
+ z = v1->z + alpha * (v2->z - v1->z);
+ w = v1->w + alpha * (v2->w - v1->w);
+
+ r = v1->r + alpha * (v2->r - v1->r);
+ g = v1->g + alpha * (v2->g - v1->g);
+ b = v1->b + alpha * (v2->b - v1->b);
+ a = v1->a + alpha * (v2->a - v1->a);
+
+ u = v1->u + alpha * (v2->u - v1->u);
+ v = v1->v + alpha * (v2->v - v1->v);
+
+ outcode = (x < -w ? kLeft : 0)
+ + (x > +w ? kRight : 0)
+ + (y < -w ? kTop : 0)
+ + (y > +w ? kBottom : 0)
+ + (z < -w ? kNear : 0)
+ + (z > +w ? kFar : 0);
+ }
+ };
+
+ void TransformVerts(VDTriBltTransformedVertex *dst, const VDTriBltVertex *src, int nVerts, const float xform[16]) {
+ const float xflocal[16]={
+ xform[ 0], xform[ 1], xform[ 2], xform[ 3],
+ xform[ 4], xform[ 5], xform[ 6], xform[ 7],
+ xform[ 8], xform[ 9], xform[10], xform[11],
+ xform[12], xform[13], xform[14], xform[15],
+ };
+
+ if (nVerts <= 0)
+ return;
+
+ do {
+ const float x0 = src->x;
+ const float y0 = src->y;
+ const float z0 = src->z;
+
+ const float w = x0*xflocal[12] + y0*xflocal[13] + z0*xflocal[14] + xflocal[15];
+ const float x = x0*xflocal[ 0] + y0*xflocal[ 1] + z0*xflocal[ 2] + xflocal[ 3];
+ const float y = x0*xflocal[ 4] + y0*xflocal[ 5] + z0*xflocal[ 6] + xflocal[ 7];
+ const float z = x0*xflocal[ 8] + y0*xflocal[ 9] + z0*xflocal[10] + xflocal[11];
+
+ int outcode = 0;
+
+ if (x < -w) outcode += kLeft;
+ if (x > w) outcode += kRight;
+ if (y < -w) outcode += kTop;
+ if (y > w) outcode += kBottom;
+ if (z < -w) outcode += kNear;
+ if (z > w) outcode += kFar;
+
+ dst->x = x;
+ dst->y = y;
+ dst->z = z;
+ dst->w = w;
+ dst->u = src->u;
+ dst->v = src->v;
+ dst->r = 1.0f;
+ dst->g = 1.0f;
+ dst->b = 1.0f;
+ dst->a = 1.0f;
+ dst->outcode = outcode;
+
+ ++src;
+ ++dst;
+ } while(--nVerts);
+ }
+
+ void TransformVerts(VDTriBltTransformedVertex *dst, const VDTriColorVertex *src, int nVerts, const float xform[16]) {
+ const float xflocal[16]={
+ xform[ 0], xform[ 1], xform[ 2], xform[ 3],
+ xform[ 4], xform[ 5], xform[ 6], xform[ 7],
+ xform[ 8], xform[ 9], xform[10], xform[11],
+ xform[12], xform[13], xform[14], xform[15],
+ };
+
+ if (nVerts <= 0)
+ return;
+
+ do {
+ const float x0 = src->x;
+ const float y0 = src->y;
+ const float z0 = src->z;
+
+ const float w = x0*xflocal[12] + y0*xflocal[13] + z0*xflocal[14] + xflocal[15];
+ const float x = x0*xflocal[ 0] + y0*xflocal[ 1] + z0*xflocal[ 2] + xflocal[ 3];
+ const float y = x0*xflocal[ 4] + y0*xflocal[ 5] + z0*xflocal[ 6] + xflocal[ 7];
+ const float z = x0*xflocal[ 8] + y0*xflocal[ 9] + z0*xflocal[10] + xflocal[11];
+
+ int outcode = 0;
+
+ if (x < -w) outcode += kLeft;
+ if (x > w) outcode += kRight;
+ if (y < -w) outcode += kTop;
+ if (y > w) outcode += kBottom;
+ if (z < -w) outcode += kNear;
+ if (z > w) outcode += kFar;
+
+ dst->x = x;
+ dst->y = y;
+ dst->z = z;
+ dst->w = w;
+ dst->u = 0.0f;
+ dst->v = 0.0f;
+ dst->r = src->r;
+ dst->g = src->g;
+ dst->b = src->b;
+ dst->a = src->a;
+ dst->outcode = outcode;
+
+ ++src;
+ ++dst;
+ } while(--nVerts);
+ }
+
+ struct VDTriangleSetupInfo {
+ const VDTriBltTransformedVertex *pt, *pr, *pl;
+ VDTriBltTransformedVertex tmp0, tmp1, tmp2;
+ };
+
+ void SetupTri(
+ VDTriangleSetupInfo& setup,
+ VDPixmap& dst,
+ const VDTriBltTransformedVertex *vx0,
+ const VDTriBltTransformedVertex *vx1,
+ const VDTriBltTransformedVertex *vx2,
+ const VDTriBltFilterMode *filterMode
+ )
+ {
+ setup.tmp0 = *vx0;
+ setup.tmp1 = *vx1;
+ setup.tmp2 = *vx2;
+
+ // adjust UVs for filter mode
+ if (filterMode) {
+ switch(*filterMode) {
+ case kTriBltFilterBilinear:
+ setup.tmp0.u += 0.5f;
+ setup.tmp0.v += 0.5f;
+ setup.tmp1.u += 0.5f;
+ setup.tmp1.v += 0.5f;
+ setup.tmp2.u += 0.5f;
+ setup.tmp2.v += 0.5f;
+ case kTriBltFilterTrilinear:
+ case kTriBltFilterBicubicMipLinear:
+ setup.tmp0.u *= 256.0f;
+ setup.tmp0.v *= 256.0f;
+ setup.tmp1.u *= 256.0f;
+ setup.tmp1.v *= 256.0f;
+ setup.tmp2.u *= 256.0f;
+ setup.tmp2.v *= 256.0f;
+ break;
+ case kTriBltFilterPoint:
+ setup.tmp0.u += 1.0f;
+ setup.tmp0.v += 1.0f;
+ setup.tmp1.u += 1.0f;
+ setup.tmp1.v += 1.0f;
+ setup.tmp2.u += 1.0f;
+ setup.tmp2.v += 1.0f;
+ break;
+ }
+ }
+
+ // do perspective divide and NDC space conversion
+ const float xscale = dst.w * 0.5f;
+ const float yscale = dst.h * 0.5f;
+
+ setup.tmp0.rhw = 1.0f / setup.tmp0.w;
+ setup.tmp0.x = (1.0f+setup.tmp0.x*setup.tmp0.rhw)*xscale;
+ setup.tmp0.y = (1.0f+setup.tmp0.y*setup.tmp0.rhw)*yscale;
+ setup.tmp0.u *= setup.tmp0.rhw;
+ setup.tmp0.v *= setup.tmp0.rhw;
+ setup.tmp0.r *= setup.tmp0.rhw;
+ setup.tmp0.g *= setup.tmp0.rhw;
+ setup.tmp0.b *= setup.tmp0.rhw;
+ setup.tmp0.a *= setup.tmp0.rhw;
+ setup.tmp1.rhw = 1.0f / setup.tmp1.w;
+ setup.tmp1.x = (1.0f+setup.tmp1.x*setup.tmp1.rhw)*xscale;
+ setup.tmp1.y = (1.0f+setup.tmp1.y*setup.tmp1.rhw)*yscale;
+ setup.tmp1.u *= setup.tmp1.rhw;
+ setup.tmp1.v *= setup.tmp1.rhw;
+ setup.tmp1.r *= setup.tmp1.rhw;
+ setup.tmp1.g *= setup.tmp1.rhw;
+ setup.tmp1.b *= setup.tmp1.rhw;
+ setup.tmp1.a *= setup.tmp1.rhw;
+ setup.tmp2.rhw = 1.0f / setup.tmp2.w;
+ setup.tmp2.x = (1.0f+setup.tmp2.x*setup.tmp2.rhw)*xscale;
+ setup.tmp2.y = (1.0f+setup.tmp2.y*setup.tmp2.rhw)*yscale;
+ setup.tmp2.u *= setup.tmp2.rhw;
+ setup.tmp2.v *= setup.tmp2.rhw;
+ setup.tmp2.r *= setup.tmp2.rhw;
+ setup.tmp2.g *= setup.tmp2.rhw;
+ setup.tmp2.b *= setup.tmp2.rhw;
+ setup.tmp2.a *= setup.tmp2.rhw;
+
+ // verify clipping
+ VDASSERT(setup.tmp0.x >= 0 && setup.tmp0.x <= dst.w);
+ VDASSERT(setup.tmp1.x >= 0 && setup.tmp1.x <= dst.w);
+ VDASSERT(setup.tmp2.x >= 0 && setup.tmp2.x <= dst.w);
+ VDASSERT(setup.tmp0.y >= 0 && setup.tmp0.y <= dst.h);
+ VDASSERT(setup.tmp1.y >= 0 && setup.tmp1.y <= dst.h);
+ VDASSERT(setup.tmp2.y >= 0 && setup.tmp2.y <= dst.h);
+
+ vx0 = &setup.tmp0;
+ vx1 = &setup.tmp1;
+ vx2 = &setup.tmp2;
+
+ const VDTriBltTransformedVertex *pt, *pl, *pr;
+
+ // sort points
+ if (vx0->y < vx1->y) // 1 < 2
+ if (vx0->y < vx2->y) { // 1 < 2,3
+ pt = vx0;
+ pr = vx1;
+ pl = vx2;
+ } else { // 3 < 1 < 2
+ pt = vx2;
+ pr = vx0;
+ pl = vx1;
+ }
+ else // 2 < 1
+ if (vx1->y < vx2->y) { // 2 < 1,3
+ pt = vx1;
+ pr = vx2;
+ pl = vx0;
+ } else { // 3 < 2 < 1
+ pt = vx2;
+ pr = vx0;
+ pl = vx1;
+ }
+
+ setup.pl = pl;
+ setup.pt = pt;
+ setup.pr = pr;
+ }
+
+ void RenderTri(VDPixmap& dst, const VDPixmap *const *pSources, int nMipmaps,
+ const VDTriBltTransformedVertex *vx0,
+ const VDTriBltTransformedVertex *vx1,
+ const VDTriBltTransformedVertex *vx2,
+ VDTriBltFilterMode filterMode,
+ float mipMapLODBias)
+ {
+ VDTriangleSetupInfo setup;
+
+ SetupTri(setup, dst, vx0, vx1, vx2, &filterMode);
+
+ const VDTriBltTransformedVertex *pt = setup.pt, *pl = setup.pl, *pr = setup.pr;
+
+ const float x10 = pl->x - pt->x;
+ const float x20 = pr->x - pt->x;
+ const float y10 = pl->y - pt->y;
+ const float y20 = pr->y - pt->y;
+ const float A = x20*y10 - x10*y20;
+
+ if (A <= 0.f)
+ return;
+
+ float invA = 0.f;
+ if (A >= 1e-5f)
+ invA = 1.0f / A;
+
+ float x10_A = x10 * invA;
+ float x20_A = x20 * invA;
+ float y10_A = y10 * invA;
+ float y20_A = y20 * invA;
+
+ float u10 = pl->u - pt->u;
+ float u20 = pr->u - pt->u;
+ float v10 = pl->v - pt->v;
+ float v20 = pr->v - pt->v;
+ float rhw10 = pl->rhw - pt->rhw;
+ float rhw20 = pr->rhw - pt->rhw;
+
+ float dudx = u20*y10_A - u10*y20_A;
+ float dudy = u10*x20_A - u20*x10_A;
+ float dvdx = v20*y10_A - v10*y20_A;
+ float dvdy = v10*x20_A - v20*x10_A;
+ float drhwdx = rhw20*y10_A - rhw10*y20_A;
+ float drhwdy = rhw10*x20_A - rhw20*x10_A;
+
+ // Compute edge walking parameters
+
+ float dxl1=0, dxr1=0, dul1=0, dvl1=0, drhwl1=0;
+ float dxl2=0, dxr2=0, dul2=0, dvl2=0, drhwl2=0;
+
+ // Compute left-edge interpolation parameters for first half.
+
+ if (pl->y != pt->y) {
+ dxl1 = (pl->x - pt->x) / (pl->y - pt->y);
+
+ dul1 = dudy + dxl1 * dudx;
+ dvl1 = dvdy + dxl1 * dvdx;
+ drhwl1 = drhwdy + dxl1 * drhwdx;
+ }
+
+ // Compute right-edge interpolation parameters for first half.
+
+ if (pr->y != pt->y) {
+ dxr1 = (pr->x - pt->x) / (pr->y - pt->y);
+ }
+
+ // Compute third-edge interpolation parameters.
+
+ if (pr->y != pl->y) {
+ dxl2 = (pr->x - pl->x) / (pr->y - pl->y);
+
+ dul2 = dudy + dxl2 * dudx;
+ dvl2 = dvdy + dxl2 * dvdx;
+ drhwl2 = drhwdy + dxl2 * drhwdx;
+
+ dxr2 = dxl2;
+ }
+
+ // Initialize parameters for first half.
+ //
+ // We place pixel centers at (x+0.5, y+0.5).
+
+ double xl, xr, ul, vl, rhwl, yf;
+ int y, y1, y2;
+
+ // y_start < y+0.5 to include pixel y.
+
+ y = (int)floor(pt->y + 0.5);
+ yf = (y+0.5) - pt->y;
+
+ xl = pt->x + dxl1 * yf;
+ xr = pt->x + dxr1 * yf;
+ ul = pt->u + dul1 * yf;
+ vl = pt->v + dvl1 * yf;
+ rhwl = pt->rhw + drhwl1 * yf;
+
+ // Initialize parameters for second half.
+
+ double xl2, xr2, ul2, vl2, rhwl2;
+
+ if (pl->y > pr->y) { // Left edge is long side
+ dxl2 = dxl1;
+ dul2 = dul1;
+ dvl2 = dvl1;
+ drhwl2 = drhwl1;
+
+ y1 = (int)floor(pr->y + 0.5);
+ y2 = (int)floor(pl->y + 0.5);
+
+ yf = (y1+0.5) - pr->y;
+
+ // Step left edge.
+
+ xl2 = xl + dxl1 * (y1 - y);
+ ul2 = ul + dul1 * (y1 - y);
+ vl2 = vl + dvl1 * (y1 - y);
+ rhwl2 = rhwl + drhwl1 * (y1 - y);
+
+ // Prestep right edge.
+
+ xr2 = pr->x + dxr2 * yf;
+ } else { // Right edge is long side
+ dxr2 = dxr1;
+
+ y1 = (int)floor(pl->y + 0.5);
+ y2 = (int)floor(pr->y + 0.5);
+
+ yf = (y1+0.5) - pl->y;
+
+ // Prestep left edge.
+
+ xl2 = pl->x + dxl2 * yf;
+ ul2 = pl->u + dul2 * yf;
+ vl2 = pl->v + dvl2 * yf;
+ rhwl2 = pl->rhw + drhwl2 * yf;
+
+ // Step right edge.
+
+ xr2 = xr + dxr1 * (y1 - y);
+ }
+
+ // rasterize
+ const ptrdiff_t dstpitch = dst.pitch;
+ uint32 *dstp = (uint32 *)((char *)dst.data + dstpitch * y);
+
+ VDTriBltInfo texinfo;
+ VDTriBltSpanFunction drawSpan;
+ uint32 cpuflags = CPUGetEnabledExtensions();
+
+ bool triBlt16 = false;
+
+ switch(filterMode) {
+ case kTriBltFilterBicubicMipLinear:
+#ifdef _M_IX86
+ if (cpuflags & CPUF_SUPPORTS_SSE2) {
+ drawSpan = vdasm_triblt_span_bicubic_mip_linear_sse2;
+ triBlt16 = true;
+ } else if (cpuflags & CPUF_SUPPORTS_MMX) {
+ drawSpan = vdasm_triblt_span_bicubic_mip_linear_mmx;
+ triBlt16 = true;
+ } else
+#endif
+ drawSpan = vd_triblt_span_bicubic_mip_linear;
+ break;
+ case kTriBltFilterTrilinear:
+#ifdef _M_IX86
+ if (cpuflags & CPUF_SUPPORTS_MMX) {
+ drawSpan = vdasm_triblt_span_trilinear_mmx;
+ triBlt16 = true;
+ } else
+#endif
+ drawSpan = vd_triblt_span_trilinear;
+ break;
+ case kTriBltFilterBilinear:
+#ifdef _M_IX86
+ if (cpuflags & CPUF_SUPPORTS_MMX) {
+ drawSpan = vdasm_triblt_span_bilinear_mmx;
+ triBlt16 = true;
+ } else
+#endif
+ drawSpan = vd_triblt_span_bilinear;
+ break;
+ case kTriBltFilterPoint:
+ drawSpan = vd_triblt_span_point;
+ break;
+ }
+
+ float rhobase = sqrtf(std::max<float>(dudx*dudx + dvdx*dvdx, dudy*dudy + dvdy*dvdy) * (1.0f / 65536.0f)) * powf(2.0f, mipMapLODBias);
+
+ if (triBlt16) {
+ ul *= 256.0f;
+ vl *= 256.0f;
+ ul2 *= 256.0f;
+ vl2 *= 256.0f;
+ dul1 *= 256.0f;
+ dvl1 *= 256.0f;
+ dul2 *= 256.0f;
+ dvl2 *= 256.0f;
+ dudx *= 256.0f;
+ dvdx *= 256.0f;
+ dudy *= 256.0f;
+ dvdy *= 256.0f;
+ }
+
+ int minx1 = (int)floor(std::min<float>(std::min<float>(pl->x, pr->x), pt->x) + 0.5);
+ int maxx2 = (int)floor(std::max<float>(std::max<float>(pl->x, pr->x), pt->x) + 0.5);
+
+ uint32 *const spanptr = new uint32[3 * (maxx2 - minx1)];
+
+ while(y < y2) {
+ if (y == y1) {
+ xl = xl2;
+ xr = xr2;
+ ul = ul2;
+ vl = vl2;
+ rhwl = rhwl2;
+ dxl1 = dxl2;
+ dxr1 = dxr2;
+ dul1 = dul2;
+ dvl1 = dvl2;
+ drhwl1 = drhwl2;
+ }
+
+ int x1, x2;
+ double xf;
+ double u, v, rhw;
+
+ // x_left must be less than (x+0.5) to include pixel x.
+
+ x1 = (int)floor(xl + 0.5);
+ x2 = (int)floor(xr + 0.5);
+ xf = (x1+0.5) - xl;
+
+ u = ul + xf * dudx;
+ v = vl + xf * dvdx;
+ rhw = rhwl + xf * drhwdx;
+
+ int x = x1;
+ uint32 *spanp = spanptr;
+
+ float w = 1.0f / (float)rhw;
+
+ if (x < x2) {
+ if (filterMode >= kTriBltFilterTrilinear) {
+ do {
+ int utexel = VDRoundToIntFastFullRange(u * w);
+ int vtexel = VDRoundToIntFastFullRange(v * w);
+ union{ float f; sint32 i; } rho = {rhobase * w};
+
+ int lambda = ((rho.i - 0x3F800000) >> (23-8));
+ if (lambda < 0)
+ lambda = 0;
+ if (lambda >= (nMipmaps<<8)-256)
+ lambda = (nMipmaps<<8)-257;
+
+ spanp[0] = utexel;
+ spanp[1] = vtexel;
+ spanp[2] = lambda;
+ spanp += 3;
+
+ u += dudx;
+ v += dvdx;
+ rhw += drhwdx;
+
+ w *= (2.0f - w*(float)rhw);
+ } while(++x < x2);
+ } else {
+ do {
+ int utexel = VDFloorToInt(u * w);
+ int vtexel = VDFloorToInt(v * w);
+
+ spanp[0] = utexel;
+ spanp[1] = vtexel;
+ spanp += 2;
+
+ u += dudx;
+ v += dvdx;
+ rhw += drhwdx;
+
+ w *= (2.0f - w*(float)rhw);
+ } while(++x < x2);
+ }
+ }
+
+ for(int i=0; i<nMipmaps; ++i) {
+ texinfo.mips[i].mip = (const uint32 *)pSources[i]->data;
+ texinfo.mips[i].pitch = pSources[i]->pitch;
+ texinfo.mips[i].uvmul = (pSources[i]->pitch << 16) + 4;
+ }
+ texinfo.dst = dstp+x1;
+ texinfo.src = spanptr;
+ texinfo.width = x2-x1;
+
+ if (texinfo.width>0)
+ drawSpan(&texinfo);
+
+ dstp = vdptroffset(dstp, dstpitch);
+ xl += dxl1;
+ xr += dxr1;
+ ul += dul1;
+ vl += dvl1;
+ rhwl += drhwl1;
+
+ ++y;
+ }
+
+ delete[] spanptr;
+ }
+
+ void FillTri(VDPixmap& dst, uint32 c,
+ const VDTriBltTransformedVertex *vx0,
+ const VDTriBltTransformedVertex *vx1,
+ const VDTriBltTransformedVertex *vx2
+ )
+ {
+
+ VDTriangleSetupInfo setup;
+
+ SetupTri(setup, dst, vx0, vx1, vx2, NULL);
+
+ const VDTriBltTransformedVertex *pt = setup.pt, *pl = setup.pl, *pr = setup.pr;
+
+ // Compute edge walking parameters
+ float dxl1=0, dxr1=0;
+ float dxl2=0, dxr2=0;
+
+ float x_lt = pl->x - pt->x;
+ float x_rt = pr->x - pt->x;
+ float x_rl = pr->x - pl->x;
+ float y_lt = pl->y - pt->y;
+ float y_rt = pr->y - pt->y;
+ float y_rl = pr->y - pl->y;
+
+ // reject backfaces
+ if (x_lt*y_rt >= x_rt*y_lt)
+ return;
+
+ // Compute left-edge interpolation parameters for first half.
+ if (pl->y != pt->y)
+ dxl1 = x_lt / y_lt;
+
+ // Compute right-edge interpolation parameters for first half.
+ if (pr->y != pt->y)
+ dxr1 = x_rt / y_rt;
+
+ // Compute third-edge interpolation parameters.
+ if (pr->y != pl->y) {
+ dxl2 = x_rl / y_rl;
+
+ dxr2 = dxl2;
+ }
+
+ // Initialize parameters for first half.
+ //
+ // We place pixel centers at (x+0.5, y+0.5).
+
+ double xl, xr, yf;
+ int y, y1, y2;
+
+ // y_start < y+0.5 to include pixel y.
+
+ y = (int)floor(pt->y + 0.5);
+ yf = (y+0.5) - pt->y;
+
+ xl = pt->x + dxl1 * yf;
+ xr = pt->x + dxr1 * yf;
+
+ // Initialize parameters for second half.
+ double xl2, xr2;
+
+ if (pl->y > pr->y) { // Left edge is long side
+ dxl2 = dxl1;
+
+ y1 = (int)floor(pr->y + 0.5);
+ y2 = (int)floor(pl->y + 0.5);
+
+ yf = (y1+0.5) - pr->y;
+
+ // Prestep right edge.
+ xr2 = pr->x + dxr2 * yf;
+
+ // Step left edge.
+ xl2 = xl + dxl1 * (y1 - y);
+ } else { // Right edge is long side
+ dxr2 = dxr1;
+
+ y1 = (int)floor(pl->y + 0.5);
+ y2 = (int)floor(pr->y + 0.5);
+
+ yf = (y1+0.5) - pl->y;
+
+ // Prestep left edge.
+ xl2 = pl->x + dxl2 * yf;
+
+ // Step right edge.
+ xr2 = xr + dxr1 * (y1 - y);
+ }
+
+ // rasterize
+ const ptrdiff_t dstpitch = dst.pitch;
+ uint32 *dstp = (uint32 *)((char *)dst.data + dstpitch * y);
+
+ while(y < y2) {
+ if (y == y1) {
+ xl = xl2;
+ xr = xr2;
+ dxl1 = dxl2;
+ dxr1 = dxr2;
+ }
+
+ int x1, x2;
+ double xf;
+
+ // x_left must be less than (x+0.5) to include pixel x.
+
+ x1 = (int)floor(xl + 0.5);
+ x2 = (int)floor(xr + 0.5);
+ xf = (x1+0.5) - xl;
+
+ while(x1 < x2)
+ dstp[x1++] = c;
+
+ dstp = vdptroffset(dstp, dstpitch);
+ xl += dxl1;
+ xr += dxr1;
+ ++y;
+ }
+ }
+
+ void FillTriGrad(VDPixmap& dst,
+ const VDTriBltTransformedVertex *vx0,
+ const VDTriBltTransformedVertex *vx1,
+ const VDTriBltTransformedVertex *vx2
+ )
+ {
+
+ VDTriangleSetupInfo setup;
+
+ SetupTri(setup, dst, vx0, vx1, vx2, NULL);
+
+ const VDTriBltTransformedVertex *pt = setup.pt, *pl = setup.pl, *pr = setup.pr;
+ const float x10 = pl->x - pt->x;
+ const float x20 = pr->x - pt->x;
+ const float y10 = pl->y - pt->y;
+ const float y20 = pr->y - pt->y;
+ const float A = x20*y10 - x10*y20;
+
+ if (A <= 0.f)
+ return;
+
+ float invA = 0.f;
+ if (A >= 1e-5f)
+ invA = 1.0f / A;
+
+ float x10_A = x10 * invA;
+ float x20_A = x20 * invA;
+ float y10_A = y10 * invA;
+ float y20_A = y20 * invA;
+
+ float r10 = pl->r - pt->r;
+ float r20 = pr->r - pt->r;
+ float g10 = pl->g - pt->g;
+ float g20 = pr->g - pt->g;
+ float b10 = pl->b - pt->b;
+ float b20 = pr->b - pt->b;
+ float a10 = pl->a - pt->a;
+ float a20 = pr->a - pt->a;
+ float rhw10 = pl->rhw - pt->rhw;
+ float rhw20 = pr->rhw - pt->rhw;
+
+ float drdx = r20*y10_A - r10*y20_A;
+ float drdy = r10*x20_A - r20*x10_A;
+ float dgdx = g20*y10_A - g10*y20_A;
+ float dgdy = g10*x20_A - g20*x10_A;
+ float dbdx = b20*y10_A - b10*y20_A;
+ float dbdy = b10*x20_A - b20*x10_A;
+ float dadx = a20*y10_A - a10*y20_A;
+ float dady = a10*x20_A - a20*x10_A;
+ float drhwdx = rhw20*y10_A - rhw10*y20_A;
+ float drhwdy = rhw10*x20_A - rhw20*x10_A;
+
+ // Compute edge walking parameters
+ float dxl1=0;
+ float drl1=0;
+ float dgl1=0;
+ float dbl1=0;
+ float dal1=0;
+ float drhwl1=0;
+ float dxr1=0;
+ float dxl2=0;
+ float drl2=0;
+ float dgl2=0;
+ float dbl2=0;
+ float dal2=0;
+ float drhwl2=0;
+ float dxr2=0;
+
+ float x_lt = pl->x - pt->x;
+ float x_rt = pr->x - pt->x;
+ float x_rl = pr->x - pl->x;
+ float y_lt = pl->y - pt->y;
+ float y_rt = pr->y - pt->y;
+ float y_rl = pr->y - pl->y;
+
+ // Compute left-edge interpolation parameters for first half.
+ if (pl->y != pt->y) {
+ dxl1 = x_lt / y_lt;
+ drl1 = drdy + dxl1 * drdx;
+ dgl1 = dgdy + dxl1 * dgdx;
+ dbl1 = dbdy + dxl1 * dbdx;
+ dal1 = dady + dxl1 * dadx;
+ drhwl1 = drhwdy + dxl1 * drhwdx;
+ }
+
+ // Compute right-edge interpolation parameters for first half.
+ if (pr->y != pt->y)
+ dxr1 = x_rt / y_rt;
+
+ // Compute third-edge interpolation parameters.
+ if (pr->y != pl->y) {
+ dxl2 = x_rl / y_rl;
+
+ drl2 = drdy + dxl2 * drdx;
+ dgl2 = dgdy + dxl2 * dgdx;
+ dbl2 = dbdy + dxl2 * dbdx;
+ dal2 = dady + dxl2 * dadx;
+ drhwl2 = drhwdy + dxl2 * drhwdx;
+
+ dxr2 = dxl2;
+ }
+
+ // Initialize parameters for first half.
+ //
+ // We place pixel centers at (x+0.5, y+0.5).
+
+ double xl, xr, yf;
+ double rl, gl, bl, al, rhwl;
+ double rl2, gl2, bl2, al2, rhwl2;
+ int y, y1, y2;
+
+ // y_start < y+0.5 to include pixel y.
+
+ y = (int)floor(pt->y + 0.5);
+ yf = (y+0.5) - pt->y;
+
+ xl = pt->x + dxl1 * yf;
+ xr = pt->x + dxr1 * yf;
+ rl = pt->r + drl1 * yf;
+ gl = pt->g + dgl1 * yf;
+ bl = pt->b + dbl1 * yf;
+ al = pt->a + dal1 * yf;
+ rhwl = pt->rhw + drhwl1 * yf;
+
+ // Initialize parameters for second half.
+ double xl2, xr2;
+
+ if (pl->y > pr->y) { // Left edge is long side
+ dxl2 = dxl1;
+ drl2 = drl1;
+ dgl2 = dgl1;
+ dbl2 = dbl1;
+ dal2 = dal1;
+ drhwl2 = drhwl1;
+
+ y1 = (int)floor(pr->y + 0.5);
+ y2 = (int)floor(pl->y + 0.5);
+
+ yf = (y1+0.5) - pr->y;
+
+ // Step left edge.
+ xl2 = xl + dxl1 * (y1 - y);
+ rl2 = rl + drl1 * (y1 - y);
+ gl2 = gl + dgl1 * (y1 - y);
+ bl2 = bl + dbl1 * (y1 - y);
+ al2 = al + dal1 * (y1 - y);
+ rhwl2 = rhwl + drhwl1 * (y1 - y);
+
+ // Prestep right edge.
+ xr2 = pr->x + dxr2 * yf;
+ } else { // Right edge is long side
+ dxr2 = dxr1;
+
+ y1 = (int)floor(pl->y + 0.5);
+ y2 = (int)floor(pr->y + 0.5);
+
+ yf = (y1+0.5) - pl->y;
+
+ // Prestep left edge.
+ xl2 = pl->x + dxl2 * yf;
+ rl2 = pl->r + drl2 * yf;
+ gl2 = pl->g + dgl2 * yf;
+ bl2 = pl->b + dbl2 * yf;
+ al2 = pl->a + dal2 * yf;
+ rhwl2 = pl->rhw + drhwl2 * yf;
+
+ // Step right edge.
+ xr2 = xr + dxr2 * (y1 - y);
+ }
+
+ // rasterize
+ const ptrdiff_t dstpitch = dst.pitch;
+ char *dstp0 = (char *)dst.data + dstpitch * y;
+
+ while(y < y2) {
+ if (y == y1) {
+ xl = xl2;
+ xr = xr2;
+ rl = rl2;
+ gl = gl2;
+ bl = bl2;
+ al = al2;
+ rhwl = rhwl2;
+ dxl1 = dxl2;
+ drl1 = drl2;
+ dgl1 = dgl2;
+ dbl1 = dbl2;
+ dal1 = dal2;
+ drhwl1 = drhwl2;
+ dxr1 = dxr2;
+ }
+
+ int x1, x2;
+ double xf;
+ double r, g, b, a, rhw;
+
+ // x_left must be less than (x+0.5) to include pixel x.
+
+ x1 = (int)floor(xl + 0.5);
+ x2 = (int)floor(xr + 0.5);
+ xf = (x1+0.5) - xl;
+
+ r = rl + xf * drdx;
+ g = gl + xf * dgdx;
+ b = bl + xf * dbdx;
+ a = al + xf * dadx;
+ rhw = rhwl + xf * drhwdx;
+
+ float w = 1.0f / (float)rhw;
+
+ if (x1 < x2) {
+ if (dst.format == nsVDPixmap::kPixFormat_XRGB8888) {
+ uint32 *dstp = (uint32 *)dstp0;
+
+ do {
+ float sr = (float)(r * w);
+ float sg = (float)(g * w);
+ float sb = (float)(b * w);
+ float sa = (float)(a * w);
+
+ uint8 ir = VDClampedRoundFixedToUint8Fast(sr);
+ uint8 ig = VDClampedRoundFixedToUint8Fast(sg);
+ uint8 ib = VDClampedRoundFixedToUint8Fast(sb);
+ uint8 ia = VDClampedRoundFixedToUint8Fast(sa);
+
+ dstp[x1] = ((uint32)ia << 24) + ((uint32)ir << 16) + ((uint32)ig << 8) + ib;
+
+ r += drdx;
+ g += dgdx;
+ b += dbdx;
+ a += dadx;
+ rhw += drhwdx;
+
+ w *= (2.0f - w*(float)rhw);
+ } while(++x1 < x2);
+ } else {
+ uint8 *dstp = (uint8 *)dstp0;
+
+ do {
+ float sg = (float)(g * w);
+
+ uint8 ig = VDClampedRoundFixedToUint8Fast(sg);
+
+ dstp[x1] = ig;
+
+ g += dgdx;
+ rhw += drhwdx;
+
+ w *= (2.0f - w*(float)rhw);
+ } while(++x1 < x2);
+ }
+ }
+
+ dstp0 = vdptroffset(dstp0, dstpitch);
+ xl += dxl1;
+ rl += drl1;
+ gl += dgl1;
+ bl += dbl1;
+ al += dal1;
+ rhwl += drhwl1;
+ xr += dxr1;
+ ++y;
+ }
+ }
+
+ struct VDTriClipWorkspace {
+ VDTriBltTransformedVertex *vxheapptr[2][19];
+ VDTriBltTransformedVertex vxheap[21];
+ };
+
+ VDTriBltTransformedVertex **VDClipTriangle(VDTriClipWorkspace& ws,
+ const VDTriBltTransformedVertex *vx0,
+ const VDTriBltTransformedVertex *vx1,
+ const VDTriBltTransformedVertex *vx2,
+ int orflags) {
+ // Each line segment can intersect all six planes, meaning the maximum bound is
+ // 18 vertices. Add 3 for the original.
+
+ VDTriBltTransformedVertex *vxheapnext;
+ VDTriBltTransformedVertex **vxlastheap = ws.vxheapptr[0], **vxnextheap = ws.vxheapptr[1];
+
+ ws.vxheap[0] = *vx0;
+ ws.vxheap[1] = *vx1;
+ ws.vxheap[2] = *vx2;
+
+ vxlastheap[0] = &ws.vxheap[0];
+ vxlastheap[1] = &ws.vxheap[1];
+ vxlastheap[2] = &ws.vxheap[2];
+ vxlastheap[3] = NULL;
+
+ vxheapnext = ws.vxheap + 3;
+
+ // Current Next Action
+ // ------- ---- ------
+ // Unclipped Unclipped Copy vertex
+ // Unclipped Clipped Copy vertex and add intersection
+ // Clipped Unclipped Add intersection
+ // Clipped Clipped No action
+
+#define DOCLIP(cliptype, _sign_, cliparg) \
+ if (orflags & k##cliptype) { \
+ VDTriBltTransformedVertex **src = vxlastheap; \
+ VDTriBltTransformedVertex **dst = vxnextheap; \
+ \
+ while(*src) { \
+ VDTriBltTransformedVertex *cur = *src; \
+ VDTriBltTransformedVertex *next = src[1]; \
+ \
+ if (!next) \
+ next = vxlastheap[0]; \
+ \
+ if (!(cur->outcode & k##cliptype)) \
+ *dst++ = cur; \
+ \
+ if ((cur->outcode ^ next->outcode) & k##cliptype) { \
+ double alpha = (cur->w _sign_ cur->cliparg) / ((cur->w _sign_ cur->cliparg) - (next->w _sign_ next->cliparg)); \
+ \
+ if (alpha >= 0.0 && alpha <= 1.0) { \
+ vxheapnext->interp(cur, next, (float)alpha); \
+ vxheapnext->cliparg = -(_sign_ vxheapnext->w); \
+ *dst++ = vxheapnext++; \
+ } \
+ } \
+ ++src; \
+ } \
+ *dst = NULL; \
+ if (dst < vxnextheap+3) return NULL; \
+ src = vxlastheap; vxlastheap = vxnextheap; vxnextheap = src; \
+ }
+
+
+ DOCLIP(Far, -, z);
+ DOCLIP(Near, +, z);
+ DOCLIP(Bottom, -, y);
+ DOCLIP(Top, +, y);
+ DOCLIP(Right, -, x);
+ DOCLIP(Left, +, x);
+
+#undef DOCLIP
+
+ return vxlastheap;
+ }
+
+ void RenderClippedTri(VDPixmap& dst, const VDPixmap *const *pSources, int nMipmaps,
+ const VDTriBltTransformedVertex *vx0,
+ const VDTriBltTransformedVertex *vx1,
+ const VDTriBltTransformedVertex *vx2,
+ VDTriBltFilterMode filterMode,
+ float mipMapLODBias,
+ int orflags)
+ {
+
+ VDTriBltTransformedVertex *vxheapnext;
+ VDTriBltTransformedVertex vxheap[21];
+
+ VDTriBltTransformedVertex *vxheapptr[2][19];
+ VDTriBltTransformedVertex **vxlastheap = vxheapptr[0], **vxnextheap = vxheapptr[1];
+
+ vxheap[0] = *vx0;
+ vxheap[1] = *vx1;
+ vxheap[2] = *vx2;
+
+ vxlastheap[0] = &vxheap[0];
+ vxlastheap[1] = &vxheap[1];
+ vxlastheap[2] = &vxheap[2];
+ vxlastheap[3] = NULL;
+
+ vxheapnext = vxheap + 3;
+
+ // Current Next Action
+ // ------- ---- ------
+ // Unclipped Unclipped Copy vertex
+ // Unclipped Clipped Copy vertex and add intersection
+ // Clipped Unclipped Add intersection
+ // Clipped Clipped No action
+
+#define DOCLIP(cliptype, _sign_, cliparg) \
+ if (orflags & k##cliptype) { \
+ VDTriBltTransformedVertex **src = vxlastheap; \
+ VDTriBltTransformedVertex **dst = vxnextheap; \
+ \
+ while(*src) { \
+ VDTriBltTransformedVertex *cur = *src; \
+ VDTriBltTransformedVertex *next = src[1]; \
+ \
+ if (!next) \
+ next = vxlastheap[0]; \
+ \
+ if (!(cur->outcode & k##cliptype)) \
+ *dst++ = cur; \
+ \
+ if ((cur->outcode ^ next->outcode) & k##cliptype) { \
+ double alpha = (cur->w _sign_ cur->cliparg) / ((cur->w _sign_ cur->cliparg) - (next->w _sign_ next->cliparg)); \
+ \
+ if (alpha >= 0.0 && alpha <= 1.0) { \
+ vxheapnext->interp(cur, next, (float)alpha); \
+ vxheapnext->cliparg = -(_sign_ vxheapnext->w); \
+ *dst++ = vxheapnext++; \
+ } \
+ } \
+ ++src; \
+ } \
+ *dst = NULL; \
+ if (dst < vxnextheap+3) return; \
+ src = vxlastheap; vxlastheap = vxnextheap; vxnextheap = src; \
+ }
+
+
+ DOCLIP(Far, -, z);
+ DOCLIP(Near, +, z);
+ DOCLIP(Bottom, -, y);
+ DOCLIP(Top, +, y);
+ DOCLIP(Right, -, x);
+ DOCLIP(Left, +, x);
+
+#undef DOCLIP
+
+ VDTriBltTransformedVertex **src = vxlastheap+1;
+
+ while(src[1]) {
+ RenderTri(dst, pSources, nMipmaps, vxlastheap[0], src[0], src[1], filterMode, mipMapLODBias);
+ ++src;
+ }
+ }
+
+}
+
+bool VDPixmapTriFill(VDPixmap& dst, const uint32 c, const VDTriBltVertex *pVertices, int nVertices, const int *pIndices, int nIndices, const float pTransform[16]) {
+ if (dst.format != nsVDPixmap::kPixFormat_XRGB8888)
+ return false;
+
+ static const float xf_ident[16]={1.f,0.f,0.f,0.f,0.f,1.f,0.f,0.f,0.f,0.f,1.f,0.f,0.f,0.f,0.f,1.f};
+ vdfastvector<VDTriBltTransformedVertex> xverts(nVertices);
+
+ if (!pTransform)
+ pTransform = xf_ident;
+
+ TransformVerts(xverts.data(), pVertices, nVertices, pTransform);
+
+ const VDTriBltTransformedVertex *xsrc = xverts.data();
+
+ VDTriClipWorkspace clipws;
+
+ while(nIndices >= 3) {
+ const int idx0 = pIndices[0];
+ const int idx1 = pIndices[1];
+ const int idx2 = pIndices[2];
+ const VDTriBltTransformedVertex *xv0 = &xsrc[idx0];
+ const VDTriBltTransformedVertex *xv1 = &xsrc[idx1];
+ const VDTriBltTransformedVertex *xv2 = &xsrc[idx2];
+ const int kode0 = xv0->outcode;
+ const int kode1 = xv1->outcode;
+ const int kode2 = xv2->outcode;
+
+ if (!(kode0 & kode1 & kode2)) {
+ if (int orflags = kode0 | kode1 | kode2) {
+ VDTriBltTransformedVertex **src = VDClipTriangle(clipws, xv0, xv1, xv2, orflags);
+
+ if (src) {
+ VDTriBltTransformedVertex *src0 = *src++;
+
+ // fan out triangles
+ while(src[1]) {
+ FillTri(dst, c, src0, src[0], src[1]);
+ ++src;
+ }
+ }
+ } else
+ FillTri(dst, c, xv0, xv1, xv2);
+ }
+
+ pIndices += 3;
+ nIndices -= 3;
+ }
+
+ return true;
+}
+
+bool VDPixmapTriFill(VDPixmap& dst, const VDTriColorVertex *pVertices, int nVertices, const int *pIndices, int nIndices, const float pTransform[16]) {
+ VDPixmap pxY;
+ VDPixmap pxCb;
+ VDPixmap pxCr;
+ bool ycbcr = false;
+ float ycbcr_xoffset = 0;
+
+ switch(dst.format) {
+ case nsVDPixmap::kPixFormat_XRGB8888:
+ case nsVDPixmap::kPixFormat_Y8:
+ break;
+ case nsVDPixmap::kPixFormat_YUV444_Planar:
+ case nsVDPixmap::kPixFormat_YUV422_Planar:
+ case nsVDPixmap::kPixFormat_YUV420_Planar:
+ case nsVDPixmap::kPixFormat_YUV410_Planar:
+ pxY.format = nsVDPixmap::kPixFormat_Y8;
+ pxY.data = dst.data;
+ pxY.pitch = dst.pitch;
+ pxY.w = dst.w;
+ pxY.h = dst.h;
+
+ pxCb.format = nsVDPixmap::kPixFormat_Y8;
+ pxCb.data = dst.data2;
+ pxCb.pitch = dst.pitch2;
+ pxCb.h = dst.h;
+
+ pxCr.format = nsVDPixmap::kPixFormat_Y8;
+ pxCr.data = dst.data3;
+ pxCr.pitch = dst.pitch3;
+ pxCr.h = dst.h;
+
+ if (dst.format == nsVDPixmap::kPixFormat_YUV410_Planar) {
+ pxCr.w = pxCb.w = dst.w >> 2;
+ pxCr.h = pxCb.h = dst.h >> 2;
+ ycbcr_xoffset = 0.75f / (float)pxCr.w;
+ } else if (dst.format == nsVDPixmap::kPixFormat_YUV420_Planar) {
+ pxCr.w = pxCb.w = dst.w >> 1;
+ pxCr.h = pxCb.h = dst.h >> 1;
+ ycbcr_xoffset = 0.5f / (float)pxCr.w;
+ } else if (dst.format == nsVDPixmap::kPixFormat_YUV422_Planar) {
+ pxCr.w = pxCb.w = dst.w >> 1;
+ ycbcr_xoffset = 0.5f / (float)pxCr.w;
+ } else if (dst.format == nsVDPixmap::kPixFormat_YUV444_Planar) {
+ pxCr.w = pxCb.w = dst.w;
+ ycbcr_xoffset = 0.0f;
+ }
+
+ ycbcr = true;
+ break;
+ default:
+ return false;
+ }
+
+ VDTriBltTransformedVertex fastxverts[64];
+ vdfastvector<VDTriBltTransformedVertex> xverts;
+
+ VDTriBltTransformedVertex *xsrc;
+ if (nVertices <= 64) {
+ xsrc = fastxverts;
+ } else {
+ xverts.resize(nVertices);
+ xsrc = xverts.data();
+ }
+
+ static const float xf_ident[16]={1.f,0.f,0.f,0.f,0.f,1.f,0.f,0.f,0.f,0.f,1.f,0.f,0.f,0.f,0.f,1.f};
+ if (!pTransform)
+ pTransform = xf_ident;
+
+ VDTriClipWorkspace clipws;
+ for(int plane=0; plane<(ycbcr?3:1); ++plane) {
+ VDPixmap& pxPlane = ycbcr ? plane == 0 ? pxY : plane == 1 ? pxCb : pxCr : dst;
+
+ if (ycbcr && plane) {
+ float xf_ycbcr[16];
+ memcpy(xf_ycbcr, pTransform, sizeof(float) * 16);
+
+ // translate in x by ycbcr_xoffset
+ xf_ycbcr[0] += xf_ycbcr[12]*ycbcr_xoffset;
+ xf_ycbcr[1] += xf_ycbcr[13]*ycbcr_xoffset;
+ xf_ycbcr[2] += xf_ycbcr[14]*ycbcr_xoffset;
+ xf_ycbcr[3] += xf_ycbcr[15]*ycbcr_xoffset;
+
+ TransformVerts(xsrc, pVertices, nVertices, xf_ycbcr);
+
+ switch(plane) {
+ case 1:
+ for(int i=0; i<nVertices; ++i)
+ xsrc[i].g = xsrc[i].b;
+ break;
+ case 2:
+ for(int i=0; i<nVertices; ++i)
+ xsrc[i].g = xsrc[i].r;
+ break;
+ }
+ } else {
+ TransformVerts(xsrc, pVertices, nVertices, pTransform);
+ }
+
+ const int *nextIndex = pIndices;
+ int indicesLeft = nIndices;
+ while(indicesLeft >= 3) {
+ const int idx0 = nextIndex[0];
+ const int idx1 = nextIndex[1];
+ const int idx2 = nextIndex[2];
+ const VDTriBltTransformedVertex *xv0 = &xsrc[idx0];
+ const VDTriBltTransformedVertex *xv1 = &xsrc[idx1];
+ const VDTriBltTransformedVertex *xv2 = &xsrc[idx2];
+ const int kode0 = xv0->outcode;
+ const int kode1 = xv1->outcode;
+ const int kode2 = xv2->outcode;
+
+ if (!(kode0 & kode1 & kode2)) {
+ if (int orflags = kode0 | kode1 | kode2) {
+ VDTriBltTransformedVertex **src = VDClipTriangle(clipws, xv0, xv1, xv2, orflags);
+
+ if (src) {
+ VDTriBltTransformedVertex *src0 = *src++;
+
+ // fan out triangles
+ while(src[1]) {
+ FillTriGrad(pxPlane, src0, src[0], src[1]);
+ ++src;
+ }
+ }
+ } else {
+ FillTriGrad(pxPlane, xv0, xv1, xv2);
+ }
+ }
+
+ nextIndex += 3;
+ indicesLeft -= 3;
+ }
+ }
+
+ return true;
+}
+
+bool VDPixmapTriBlt(VDPixmap& dst, const VDPixmap *const *pSources, int nMipmaps,
+ const VDTriBltVertex *pVertices, int nVertices,
+ const int *pIndices, int nIndices,
+ VDTriBltFilterMode filterMode,
+ float mipMapLODBias,
+ const float pTransform[16])
+{
+ if (dst.format != nsVDPixmap::kPixFormat_XRGB8888)
+ return false;
+
+ static const float xf_ident[16]={1.f,0.f,0.f,0.f,0.f,1.f,0.f,0.f,0.f,0.f,1.f,0.f,0.f,0.f,0.f,1.f};
+ vdfastvector<VDTriBltTransformedVertex> xverts(nVertices);
+
+ if (!pTransform)
+ pTransform = xf_ident;
+
+ TransformVerts(xverts.data(), pVertices, nVertices, pTransform);
+
+ const VDTriBltTransformedVertex *xsrc = xverts.data();
+
+ VDTriClipWorkspace clipws;
+
+ while(nIndices >= 3) {
+ const int idx0 = pIndices[0];
+ const int idx1 = pIndices[1];
+ const int idx2 = pIndices[2];
+ const VDTriBltTransformedVertex *xv0 = &xsrc[idx0];
+ const VDTriBltTransformedVertex *xv1 = &xsrc[idx1];
+ const VDTriBltTransformedVertex *xv2 = &xsrc[idx2];
+ const int kode0 = xv0->outcode;
+ const int kode1 = xv1->outcode;
+ const int kode2 = xv2->outcode;
+
+ if (!(kode0 & kode1 & kode2)) {
+ if (int orflags = kode0 | kode1 | kode2) {
+ VDTriBltTransformedVertex **src = VDClipTriangle(clipws, xv0, xv1, xv2, orflags);
+
+ if (src) {
+ VDTriBltTransformedVertex *src0 = *src++;
+
+ // fan out triangles
+ while(src[1]) {
+ RenderTri(dst, pSources, nMipmaps, src0, src[0], src[1], filterMode, mipMapLODBias);
+ ++src;
+ }
+ }
+ } else
+ RenderTri(dst, pSources, nMipmaps, xv0, xv1, xv2, filterMode, mipMapLODBias);
+ }
+
+ pIndices += 3;
+ nIndices -= 3;
+ }
+
+ return true;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+void VDPixmapSetTextureBorders(VDPixmap& px, bool wrap) {
+ const int w = px.w;
+ const int h = px.h;
+
+ VDPixmapBlt(px, 0, 1, px, wrap ? w-2 : 1, 1, 1, h-2);
+ VDPixmapBlt(px, w-1, 1, px, wrap ? 1 : w-2, 1, 1, h-2);
+
+ VDPixmapBlt(px, 0, 0, px, 0, wrap ? h-2 : 1, w, 1);
+ VDPixmapBlt(px, 0, h-1, px, 0, wrap ? 1 : h-2, w, 1);
+}
+
+void VDPixmapSetTextureBordersCubic(VDPixmap& px) {
+ const int w = px.w;
+ const int h = px.h;
+
+ VDPixmapBlt(px, 0, 1, px, 2, 1, 1, h-2);
+ VDPixmapBlt(px, 1, 1, px, 2, 1, 1, h-2);
+ VDPixmapBlt(px, w-2, 1, px, w-3, 1, 1, h-2);
+ VDPixmapBlt(px, w-1, 1, px, w-3, 1, 1, h-2);
+
+ VDPixmapBlt(px, 0, 0, px, 0, 2, w, 1);
+ VDPixmapBlt(px, 0, 1, px, 0, 2, w, 1);
+ VDPixmapBlt(px, 0, h-2, px, 0, h-3, w, 1);
+ VDPixmapBlt(px, 0, h-1, px, 0, h-3, w, 1);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDPixmapTextureMipmapChain::VDPixmapTextureMipmapChain(const VDPixmap& src, bool wrap, bool cubic, int maxlevels) {
+ int w = src.w;
+ int h = src.h;
+ int mipcount = 0;
+
+ while((w>1 || h>1) && maxlevels--) {
+ ++mipcount;
+ w >>= 1;
+ h >>= 1;
+ }
+
+ mBuffers.resize(mipcount);
+ mMipMaps.resize(mipcount);
+
+ vdautoptr<IVDPixmapResampler> r(VDCreatePixmapResampler());
+ r->SetFilters(IVDPixmapResampler::kFilterLinear, IVDPixmapResampler::kFilterLinear, false);
+
+ float fw = (float)src.w;
+ float fh = (float)src.h;
+ for(int mip=0; mip<mipcount; ++mip) {
+ const int mipw = VDCeilToInt(fw);
+ const int miph = VDCeilToInt(fh);
+
+ mMipMaps[mip] = &mBuffers[mip];
+
+ if (cubic) {
+ mBuffers[mip].init(mipw+4, miph+4, nsVDPixmap::kPixFormat_XRGB8888);
+
+ if (!mip) {
+ VDPixmapBlt(mBuffers[0], 2, 2, src, 0, 0, src.w, src.h);
+ VDPixmapSetTextureBordersCubic(mBuffers[0]);
+ } else {
+ const VDPixmap& curmip = mBuffers[mip];
+ const VDPixmap& prevmip = mBuffers[mip-1];
+
+ vdrect32f rdst( 0.0f, 0.0f, (float)curmip.w , (float)curmip.h );
+ vdrect32f rsrc(-2.0f, -2.0f, 2.0f*(float)curmip.w - 2.0f, 2.0f*(float)curmip.h - 2.0f);
+ r->Init(rdst, curmip.w, curmip.h, curmip.format, rsrc, prevmip.w, prevmip.h, prevmip.format);
+ r->Process(curmip, prevmip);
+ }
+ } else {
+ mBuffers[mip].init(mipw+2, miph+2, nsVDPixmap::kPixFormat_XRGB8888);
+
+ if (!mip) {
+ VDPixmapBlt(mBuffers[0], 1, 1, src, 0, 0, src.w, src.h);
+ VDPixmapSetTextureBorders(mBuffers[0], wrap);
+ } else {
+ const VDPixmap& curmip = mBuffers[mip];
+ const VDPixmap& prevmip = mBuffers[mip-1];
+
+ vdrect32f rdst( 0.0f, 0.0f, (float)curmip.w , (float)curmip.h );
+ vdrect32f rsrc(-1.0f, -1.0f, 2.0f*(float)curmip.w - 1.0f, 2.0f*(float)curmip.h - 1.0f);
+ r->Init(rdst, curmip.w, curmip.h, curmip.format, rsrc, prevmip.w, prevmip.h, prevmip.format);
+ r->Process(curmip, prevmip);
+ }
+ }
+
+ fw *= 0.5f;
+ fh *= 0.5f;
+ }
+}
+
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit.cpp
new file mode 100644
index 000000000..6dc1b4334
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit.cpp
@@ -0,0 +1,903 @@
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include "uberblit.h"
+#include "uberblit_gen.h"
+
+uint32 VDPixmapGetFormatTokenFromFormat(int format) {
+ using namespace nsVDPixmap;
+ switch(format) {
+ case kPixFormat_Pal1: return kVDPixType_1 | kVDPixSamp_444 | kVDPixSpace_Pal;
+ case kPixFormat_Pal2: return kVDPixType_2 | kVDPixSamp_444 | kVDPixSpace_Pal;
+ case kPixFormat_Pal4: return kVDPixType_4 | kVDPixSamp_444 | kVDPixSpace_Pal;
+ case kPixFormat_Pal8: return kVDPixType_8 | kVDPixSamp_444 | kVDPixSpace_Pal;
+ case kPixFormat_XRGB1555: return kVDPixType_1555_LE | kVDPixSamp_444 | kVDPixSpace_BGR;
+ case kPixFormat_RGB565: return kVDPixType_565_LE | kVDPixSamp_444 | kVDPixSpace_BGR;
+ case kPixFormat_RGB888: return kVDPixType_888 | kVDPixSamp_444 | kVDPixSpace_BGR;
+ case kPixFormat_XRGB8888: return kVDPixType_8888 | kVDPixSamp_444 | kVDPixSpace_BGR;
+ case kPixFormat_Y8: return kVDPixType_8 | kVDPixSamp_444 | kVDPixSpace_Y_601;
+ case kPixFormat_YUV422_UYVY: return kVDPixType_B8G8_R8G8 | kVDPixSamp_422 | kVDPixSpace_YCC_601;
+ case kPixFormat_YUV422_YUYV: return kVDPixType_G8B8_G8R8 | kVDPixSamp_422 | kVDPixSpace_YCC_601;
+ case kPixFormat_YUV444_XVYU: return kVDPixType_8888 | kVDPixSamp_444 | kVDPixSpace_YCC_601;
+ case kPixFormat_YUV444_Planar: return kVDPixType_8_8_8 | kVDPixSamp_444 | kVDPixSpace_YCC_601;
+ case kPixFormat_YUV422_Planar: return kVDPixType_8_8_8 | kVDPixSamp_422 | kVDPixSpace_YCC_601;
+ case kPixFormat_YUV422_Planar_16F: return kVDPixType_16F_16F_16F_LE | kVDPixSamp_422 | kVDPixSpace_YCC_601;
+ case kPixFormat_YUV420_Planar: return kVDPixType_8_8_8 | kVDPixSamp_420_MPEG2 | kVDPixSpace_YCC_601;
+ case kPixFormat_YUV411_Planar: return kVDPixType_8_8_8 | kVDPixSamp_411 | kVDPixSpace_YCC_601;
+ case kPixFormat_YUV410_Planar: return kVDPixType_8_8_8 | kVDPixSamp_410 | kVDPixSpace_YCC_601;
+ case kPixFormat_YUV422_Planar_Centered: return kVDPixType_8_8_8 | kVDPixSamp_422_JPEG | kVDPixSpace_YCC_601;
+ case kPixFormat_YUV420_Planar_Centered: return kVDPixType_8_8_8 | kVDPixSamp_420_MPEG1 | kVDPixSpace_YCC_601;
+ case kPixFormat_YUV422_V210: return kVDPixType_V210 | kVDPixSamp_422 | kVDPixSpace_YCC_601;
+ case kPixFormat_YUV422_UYVY_709: return kVDPixType_B8G8_R8G8 | kVDPixSamp_422 | kVDPixSpace_YCC_709;
+ case kPixFormat_YUV420_NV12: return kVDPixType_8_B8R8 | kVDPixSamp_420_MPEG2 | kVDPixSpace_YCC_601;
+ default:
+ VDASSERT(false);
+ return 0;
+ }
+}
+
+const VDPixmapSamplingInfo& VDPixmapGetSamplingInfo(uint32 samplingToken) {
+ static const VDPixmapSamplingInfo kPixmapSamplingInfo[]={
+ /* Null */ { 0, 0, 0, 0, 0 },
+ /* 444 */ { 0, 0, 0, 0, 0 },
+ /* 422 */ { -4, 0, 0, 1, 0 },
+ /* 422_JPEG */ { 0, 0, 0, 1, 0 },
+ /* 420_MPEG2 */ { -4, 0, 0, 1, 1 },
+ /* 420_MPEG2INT */ { -4, 0, 0, 1, 1 },
+ /* 420_MPEG1 */ { 0, 0, 0, 1, 1 },
+ /* 420_DVPAL */ { -4, 0, 0, 1, 1 },
+ /* 411 */ { -6, 0, 0, 2, 0 },
+ /* 410 */ { -6, 0, 0, 2, 2 }
+ };
+
+ uint32 index = (samplingToken & kVDPixSamp_Mask) >> kVDPixSamp_Bits;
+
+ return index >= sizeof(kPixmapSamplingInfo)/sizeof(kPixmapSamplingInfo[0]) ? kPixmapSamplingInfo[0] : kPixmapSamplingInfo[index];
+}
+
+namespace {
+ uint32 BlitterConvertSampling(VDPixmapUberBlitterGenerator& gen, uint32 srcToken, uint32 dstSamplingToken, sint32 w, sint32 h) {
+ // if the source type is 16F, we have to convert to 32F
+ if ((srcToken & kVDPixType_Mask) == kVDPixType_16F_16F_16F_LE) {
+ // 0 1 2
+ gen.conv_16F_to_32F();
+ gen.swap(1);
+ // 1 0 2
+ gen.conv_16F_to_32F();
+ gen.swap(2);
+ // 2 0 1
+ gen.conv_16F_to_32F();
+ gen.swap(2);
+ gen.swap(1);
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_32F_32F_LE;
+ }
+
+ // look up sampling info
+ const VDPixmapSamplingInfo& srcInfo = VDPixmapGetSamplingInfo(srcToken);
+ const VDPixmapSamplingInfo& dstInfo = VDPixmapGetSamplingInfo(dstSamplingToken);
+
+ // convert destination chroma origin to luma space
+ int c_x = ((8 + dstInfo.mCXOffset16) << dstInfo.mCXBits) - 8;
+ int cr_y = ((8 + dstInfo.mCrYOffset16) << dstInfo.mCYBits) - 8;
+ int cb_y = ((8 + dstInfo.mCbYOffset16) << dstInfo.mCYBits) - 8;
+
+ // convert luma chroma location to source chroma space
+ c_x = ((8 + c_x) >> srcInfo.mCXBits) - 8 - srcInfo.mCXOffset16;
+ cr_y = ((8 + cr_y) >> srcInfo.mCYBits) - 8 - srcInfo.mCrYOffset16;
+ cb_y = ((8 + cb_y) >> srcInfo.mCYBits) - 8 - srcInfo.mCbYOffset16;
+
+ float cxo = c_x / 16.0f + 0.5f;
+ float cxf = ((16 << dstInfo.mCXBits) >> srcInfo.mCXBits) / 16.0f;
+ float cyf = ((16 << dstInfo.mCYBits) >> srcInfo.mCYBits) / 16.0f;
+ sint32 cw = -(-w >> dstInfo.mCXBits);
+ sint32 ch = -(-h >> dstInfo.mCYBits);
+
+ gen.swap(2);
+ gen.linear(cxo, cxf, cw, cb_y / 16.0f + 0.5f, cyf, ch);
+ gen.swap(2);
+ gen.linear(cxo, cxf, cw, cr_y / 16.0f + 0.5f, cyf, ch);
+
+ return (srcToken & ~kVDPixSamp_Mask) | (dstSamplingToken & kVDPixSamp_Mask);
+ }
+
+ uint32 BlitterConvertType(VDPixmapUberBlitterGenerator& gen, uint32 srcToken, uint32 dstToken, sint32 w, sint32 h) {
+ uint32 dstType = dstToken & kVDPixType_Mask;
+
+ while((srcToken ^ dstToken) & kVDPixType_Mask) {
+ uint32 srcType = srcToken & kVDPixType_Mask;
+ uint32 targetType = dstType;
+
+ type_reconvert:
+ switch(targetType) {
+ case kVDPixType_1555_LE:
+ switch(srcType) {
+ case kVDPixType_565_LE:
+ gen.conv_565_to_555();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_1555_LE;
+ break;
+
+ case kVDPixType_8888:
+ gen.conv_8888_to_555();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_1555_LE;
+ break;
+ case kVDPixType_B8G8_R8G8:
+ case kVDPixType_G8B8_G8R8:
+ targetType = kVDPixType_8_8_8;
+ goto type_reconvert;
+ default:
+ targetType = kVDPixType_8888;
+ goto type_reconvert;
+ }
+ break;
+
+ case kVDPixType_565_LE:
+ switch(srcType) {
+ case kVDPixType_1555_LE:
+ gen.conv_555_to_565();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_565_LE;
+ break;
+ case kVDPixType_8888:
+ gen.conv_8888_to_565();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_565_LE;
+ break;
+ case kVDPixType_B8G8_R8G8:
+ case kVDPixType_G8B8_G8R8:
+ targetType = kVDPixType_8_8_8;
+ goto type_reconvert;
+ default:
+ targetType = kVDPixType_8888;
+ goto type_reconvert;
+ }
+ break;
+
+ case kVDPixType_888:
+ switch(srcType) {
+ case kVDPixType_8888:
+ gen.conv_8888_to_888();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_888;
+ break;
+ default:
+ targetType = kVDPixType_8888;
+ goto type_reconvert;
+ }
+ break;
+
+ case kVDPixType_8888:
+ switch(srcType) {
+ case kVDPixType_1555_LE:
+ gen.conv_555_to_8888();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8888;
+ break;
+ case kVDPixType_565_LE:
+ gen.conv_565_to_8888();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8888;
+ break;
+ case kVDPixType_888:
+ gen.conv_888_to_8888();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8888;
+ break;
+ case kVDPixType_32Fx4_LE:
+ gen.conv_X32F_to_8888();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8888;
+ break;
+ case kVDPixType_8_8_8:
+ if ((srcToken & kVDPixSamp_Mask) != kVDPixSamp_444)
+ srcToken = BlitterConvertSampling(gen, srcToken, kVDPixSamp_444, w, h);
+ gen.interleave_X8R8G8B8();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8888;
+ break;
+ default:
+ VDASSERT(false);
+ break;
+ }
+ break;
+
+ case kVDPixType_8:
+ switch(srcType) {
+ case kVDPixType_8_8_8:
+ gen.pop();
+ gen.swap(1);
+ gen.pop();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
+ break;
+
+ case kVDPixType_16F_LE:
+ targetType = kVDPixType_32F_LE;
+ goto type_reconvert;
+
+ case kVDPixType_32F_LE:
+ gen.conv_32F_to_8();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
+ break;
+
+ default:
+ targetType = kVDPixType_8_8_8;
+ goto type_reconvert;
+ }
+ break;
+
+ case kVDPixType_8_8_8:
+ switch(srcType) {
+ case kVDPixType_B8G8_R8G8:
+ gen.dup();
+ gen.dup();
+ gen.extract_8in32(2, (w + 1) >> 1, h);
+ gen.swap(2);
+ gen.extract_8in16(1, w, h);
+ gen.swap(1);
+ gen.extract_8in32(0, (w + 1) >> 1, h);
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_8_8_8 | kVDPixSamp_422;
+ break;
+ case kVDPixType_G8B8_G8R8:
+ gen.dup();
+ gen.dup();
+ gen.extract_8in32(3, (w + 1) >> 1, h);
+ gen.swap(2);
+ gen.extract_8in16(0, w, h);
+ gen.swap(1);
+ gen.extract_8in32(1, (w + 1) >> 1, h);
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_8_8_8 | kVDPixSamp_422;
+ break;
+ case kVDPixType_16F_16F_16F_LE:
+ case kVDPixType_V210:
+ targetType = kVDPixType_32F_32F_32F_LE;
+ goto type_reconvert;
+ case kVDPixType_32F_32F_32F_LE:
+ // 0 1 2
+ gen.conv_32F_to_8();
+ gen.swap(1);
+ // 1 0 2
+ gen.conv_32F_to_8();
+ gen.swap(2);
+ // 2 0 1
+ gen.conv_32F_to_8();
+ gen.swap(2);
+ gen.swap(1);
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_8_8;
+ break;
+ case kVDPixType_8_B8R8:
+ {
+ const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
+ int cw = -(-w >> sampInfo.mCXBits);
+ int ch = -(-h >> sampInfo.mCYBits);
+
+ gen.dup();
+ gen.extract_8in16(1, cw, ch);
+ gen.swap(2);
+ gen.swap(1);
+ gen.extract_8in16(0, cw, ch);
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_8_8;
+ }
+ break;
+ default:
+ VDASSERT(false);
+ break;
+ }
+ break;
+
+ case kVDPixType_B8G8_R8G8:
+ switch(srcType) {
+ case kVDPixType_8_8_8:
+ if ((srcToken ^ dstToken) & kVDPixSamp_Mask)
+ srcToken = BlitterConvertSampling(gen, srcToken, dstToken, w, h);
+
+ gen.interleave_B8G8_R8G8();
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_B8G8_R8G8;
+ break;
+ case kVDPixType_G8B8_G8R8:
+ gen.swap_8in16(w, h, w*2);
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_B8G8_R8G8;
+ break;
+ default:
+ targetType = kVDPixType_8_8_8;
+ goto type_reconvert;
+ }
+ break;
+
+ case kVDPixType_G8B8_G8R8:
+ switch(srcType) {
+ case kVDPixType_8_8_8:
+ if ((srcToken ^ dstToken) & kVDPixSamp_Mask)
+ srcToken = BlitterConvertSampling(gen, srcToken, dstToken, w, h);
+
+ gen.interleave_G8B8_G8R8();
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_G8B8_G8R8;
+ break;
+ case kVDPixType_B8G8_R8G8:
+ gen.swap_8in16(w, h, w*2);
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSamp_Mask)) | kVDPixType_G8B8_G8R8;
+ break;
+ default:
+ targetType = kVDPixType_8_8_8;
+ goto type_reconvert;
+ }
+ break;
+
+ case kVDPixType_16F_16F_16F_LE:
+ switch(srcType) {
+ case kVDPixType_32F_32F_32F_LE:
+ // 0 1 2
+ gen.conv_32F_to_16F();
+ gen.swap(1);
+ // 1 0 2
+ gen.conv_32F_to_16F();
+ gen.swap(2);
+ // 2 0 1
+ gen.conv_32F_to_16F();
+ gen.swap(2);
+ gen.swap(1);
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_16F_16F_16F_LE;
+ break;
+
+ default:
+ targetType = kVDPixType_32F_32F_32F_LE;
+ goto type_reconvert;
+ }
+ break;
+
+ case kVDPixType_32F_32F_32F_LE:
+ switch(srcType) {
+ case kVDPixType_8_8_8:
+ // 0 1 2
+ gen.conv_8_to_32F();
+ gen.swap(1);
+ // 1 0 2
+ gen.conv_8_to_32F();
+ gen.swap(2);
+ // 2 0 1
+ gen.conv_8_to_32F();
+ gen.swap(2);
+ gen.swap(1);
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_32F_32F_LE;
+ break;
+
+ case kVDPixType_16F_16F_16F_LE:
+ // 0 1 2
+ gen.conv_16F_to_32F();
+ gen.swap(1);
+ // 1 0 2
+ gen.conv_16F_to_32F();
+ gen.swap(2);
+ // 2 0 1
+ gen.conv_16F_to_32F();
+ gen.swap(2);
+ gen.swap(1);
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_32F_32F_LE;
+ break;
+
+ case kVDPixType_B8G8_R8G8:
+ case kVDPixType_G8B8_G8R8:
+ case kVDPixType_8_B8R8:
+ targetType = kVDPixType_8_8_8;
+ goto type_reconvert;
+
+ case kVDPixType_V210:
+ gen.conv_V210_to_32F();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_32F_32F_LE;
+ break;
+
+ default:
+ VDASSERT(false);
+ }
+ break;
+
+ case kVDPixType_V210:
+ switch(srcType) {
+ case kVDPixType_32F_32F_32F_LE:
+ if ((srcToken & kVDPixSamp_Mask) != kVDPixSamp_422)
+ srcToken = BlitterConvertSampling(gen, srcToken, kVDPixSamp_422, w, h);
+
+ gen.conv_32F_to_V210();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_V210;
+ break;
+
+ case kVDPixType_16F_16F_16F_LE:
+ targetType = kVDPixType_32F_32F_32F_LE;
+ goto type_reconvert;
+
+ case kVDPixType_8_8_8:
+ if ((srcToken & kVDPixSamp_Mask) != kVDPixSamp_422)
+ srcToken = BlitterConvertSampling(gen, srcToken, kVDPixSamp_422, w, h);
+
+ targetType = kVDPixType_32F_32F_32F_LE;
+ goto type_reconvert;
+
+ case kVDPixType_B8G8_R8G8:
+ case kVDPixType_G8B8_G8R8:
+ case kVDPixType_8_B8R8:
+ targetType = kVDPixType_8_8_8;
+ goto type_reconvert;
+
+ default:
+ VDASSERT(false);
+ }
+ break;
+
+ case kVDPixType_32F_LE:
+ switch(srcType) {
+ case kVDPixType_8:
+ gen.conv_8_to_32F();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_LE;
+ break;
+ case kVDPixType_16F_LE:
+ gen.conv_16F_to_32F();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_LE;
+ break;
+ default:
+ VDASSERT(false);
+ }
+ break;
+
+ case kVDPixType_8_B8R8:
+ switch(srcType) {
+ case kVDPixType_8_8_8:
+ gen.swap(1);
+ gen.swap(2);
+ gen.interleave_B8R8();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_B8R8;
+ break;
+ default:
+ VDASSERT(false);
+ break;
+ }
+ break;
+
+ default:
+ VDASSERT(false);
+ break;
+ }
+ }
+
+ return srcToken;
+ }
+}
+
+IVDPixmapBlitter *VDPixmapCreateBlitter(const VDPixmap& dst, const VDPixmap& src) {
+ const VDPixmapLayout& dstlayout = VDPixmapToLayoutFromBase(dst, dst.data);
+ const VDPixmapLayout& srclayout = VDPixmapToLayoutFromBase(src, src.data);
+
+ return VDPixmapCreateBlitter(dstlayout, srclayout);
+}
+
+IVDPixmapBlitter *VDPixmapCreateBlitter(const VDPixmapLayout& dst, const VDPixmapLayout& src) {
+ if (src.format == dst.format) {
+ return VDCreatePixmapUberBlitterDirectCopy(dst, src);
+ }
+
+ uint32 srcToken = VDPixmapGetFormatTokenFromFormat(src.format);
+ uint32 dstToken = VDPixmapGetFormatTokenFromFormat(dst.format);
+
+ VDPixmapUberBlitterGenerator gen;
+
+ // load source channels
+ int w = src.w;
+ int h = src.h;
+
+ switch(srcToken & kVDPixType_Mask) {
+ case kVDPixType_1:
+ gen.ldsrc(0, 0, 0, 0, w, h, srcToken, (w + 7) >> 3);
+ break;
+
+ case kVDPixType_2:
+ gen.ldsrc(0, 0, 0, 0, w, h, srcToken, (w + 3) >> 2);
+ break;
+
+ case kVDPixType_4:
+ gen.ldsrc(0, 0, 0, 0, w, h, srcToken, (w + 1) >> 1);
+ break;
+
+ case kVDPixType_8:
+ gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w);
+ break;
+
+ case kVDPixType_555_LE:
+ case kVDPixType_565_LE:
+ case kVDPixType_1555_LE:
+ gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*2);
+ break;
+
+ case kVDPixType_888:
+ gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*3);
+ break;
+
+ case kVDPixType_8888:
+ case kVDPixType_32F_LE:
+ gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*4);
+ break;
+
+ case kVDPixType_32Fx4_LE:
+ gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*16);
+ break;
+
+ case kVDPixType_B8G8_R8G8:
+ case kVDPixType_G8B8_G8R8:
+ gen.ldsrc(0, 0, 0, 0, w, h, srcToken, ((w + 1) & ~1)*2);
+ break;
+
+ case kVDPixType_8_8_8:
+ {
+ uint32 ytoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
+ uint32 cbtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
+ uint32 crtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
+
+ const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
+
+ int cxbits = sampInfo.mCXBits;
+ int cybits = sampInfo.mCYBits;
+ int w2 = -(-w >> cxbits);
+ int h2 = -(-h >> cybits);
+ gen.ldsrc(0, 2, 0, 0, w2, h2, cbtoken, w2);
+ gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w);
+ gen.ldsrc(0, 1, 0, 0, w2, h2, crtoken, w2);
+ }
+ break;
+
+ case kVDPixType_16F_16F_16F_LE:
+ {
+ uint32 ytoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_16F_LE;
+ uint32 cbtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_16F_LE;
+ uint32 crtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_16F_LE;
+
+ const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
+
+ int cxbits = sampInfo.mCXBits;
+ int cybits = sampInfo.mCYBits;
+ int w2 = -(-w >> cxbits);
+ int h2 = -(-h >> cybits);
+ gen.ldsrc(0, 2, 0, 0, w2, h2, cbtoken, w2 * 2);
+ gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*2);
+ gen.ldsrc(0, 1, 0, 0, w2, h2, crtoken, w2 * 2);
+ }
+ break;
+
+ case kVDPixType_32F_32F_32F_LE:
+ {
+ uint32 ytoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_LE;
+ uint32 cbtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_LE;
+ uint32 crtoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_LE;
+
+ const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
+
+ int cxbits = sampInfo.mCXBits;
+ int cybits = sampInfo.mCYBits;
+ int w2 = -(-w >> cxbits);
+ int h2 = -(-h >> cybits);
+ gen.ldsrc(0, 2, 0, 0, w2, h2, cbtoken, w2 * 4);
+ gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w*4);
+ gen.ldsrc(0, 1, 0, 0, w2, h2, crtoken, w2 * 4);
+ }
+ break;
+
+ case kVDPixType_V210:
+ gen.ldsrc(0, 0, 0, 0, w, h, srcToken, ((w + 5) / 6) * 4);
+ break;
+
+ case kVDPixType_8_B8R8:
+ {
+ uint32 ytoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8;
+ uint32 ctoken = (srcToken & ~kVDPixType_Mask) | kVDPixType_B8R8;
+
+ const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
+
+ int cxbits = sampInfo.mCXBits;
+ int cybits = sampInfo.mCYBits;
+ int w2 = -(-w >> cxbits);
+ int h2 = -(-h >> cybits);
+ gen.ldsrc(0, 0, 0, 0, w, h, srcToken, w);
+ gen.ldsrc(0, 1, 0, 0, w2, h2, ctoken, w2*2);
+ }
+ break;
+
+ default:
+ VDASSERT(false);
+ }
+
+ // check if we need a color space change
+ if ((srcToken ^ dstToken) & kVDPixSpace_Mask) {
+ // first, if we're dealing with an interleaved format, deinterleave it
+ switch(srcToken & kVDPixType_Mask) {
+ case kVDPixType_B8G8_R8G8:
+ gen.dup();
+ gen.dup();
+ gen.extract_8in32(2, (w + 1) >> 1, h);
+ gen.swap(2);
+ gen.extract_8in16(1, w, h);
+ gen.swap(1);
+ gen.extract_8in32(0, (w + 1) >> 1, h);
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_8_8;
+ break;
+
+ case kVDPixType_G8B8_G8R8:
+ gen.dup();
+ gen.dup();
+ gen.extract_8in32(3, (w + 1) >> 1, h);
+ gen.swap(2);
+ gen.extract_8in16(0, w, h);
+ gen.swap(1);
+ gen.extract_8in32(1, (w + 1) >> 1, h);
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_8_8;
+ break;
+
+ case kVDPixType_8_B8R8:
+ gen.dup();
+ gen.extract_8in16(1, (w + 1) >> 1, (h + 1) >> 1);
+ gen.swap(2);
+ gen.swap(1);
+ gen.extract_8in16(0, (w + 1) >> 1, (h + 1) >> 1);
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_8_8_8;
+ break;
+
+ case kVDPixType_V210:
+ gen.conv_V210_to_32F();
+ srcToken = (srcToken & ~kVDPixType_Mask) | kVDPixType_32F_32F_32F_LE;
+ break;
+ }
+
+ // if the source is subsampled, converge on 4:4:4 subsampling, but only if we actually need
+ // the auxiliary channels
+ const VDPixmapSamplingInfo& sampInfo = VDPixmapGetSamplingInfo(srcToken);
+
+ if ((dstToken & kVDPixSpace_Mask) != kVDPixSpace_Y_601 && (dstToken & kVDPixSpace_Mask) != kVDPixSpace_Y_709) {
+ if (sampInfo.mCXBits | sampInfo.mCYBits | sampInfo.mCXOffset16 | sampInfo.mCbYOffset16 | sampInfo.mCrYOffset16)
+ srcToken = BlitterConvertSampling(gen, srcToken, kVDPixSamp_444, w, h);
+ }
+
+ // change color spaces
+ uint32 dstSpace = dstToken & kVDPixSpace_Mask;
+ while((srcToken ^ dstToken) & kVDPixSpace_Mask) {
+ uint32 srcSpace = srcToken & kVDPixSpace_Mask;
+ uint32 targetSpace = dstSpace;
+
+space_reconvert:
+ switch(targetSpace) {
+ case kVDPixSpace_BGR:
+ switch(srcSpace) {
+ case kVDPixSpace_YCC_709:
+ switch(srcToken & kVDPixType_Mask) {
+ case kVDPixType_8_8_8:
+ gen.ycbcr709_to_rgb32();
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
+ break;
+
+ case kVDPixType_16F_16F_16F_LE:
+ srcToken = BlitterConvertType(gen, srcToken, kVDPixType_32F_32F_32F_LE, w, h);
+ gen.ycbcr709_to_rgb32_32f();
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_32Fx4_LE;
+ break;
+
+ case kVDPixType_32F_32F_32F_LE:
+ gen.ycbcr709_to_rgb32_32f();
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_32Fx4_LE;
+ break;
+
+ default:
+ VDASSERT(false);
+ break;
+ }
+ break;
+
+ case kVDPixSpace_YCC_601:
+ switch(srcToken & kVDPixType_Mask) {
+ case kVDPixType_8_8_8:
+ gen.ycbcr601_to_rgb32();
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
+ break;
+
+ case kVDPixType_16F_16F_16F_LE:
+ srcToken = BlitterConvertType(gen, srcToken, kVDPixType_32F_32F_32F_LE, w, h);
+ gen.ycbcr601_to_rgb32_32f();
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_32Fx4_LE;
+ break;
+
+ case kVDPixType_32F_32F_32F_LE:
+ gen.ycbcr601_to_rgb32_32f();
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_32Fx4_LE;
+ break;
+
+ default:
+ VDASSERT(false);
+ break;
+ }
+ break;
+
+ case kVDPixSpace_Y_601:
+ targetSpace = kVDPixSpace_YCC_601;
+ goto space_reconvert;
+
+ case kVDPixSpace_Pal:
+ switch(srcToken & kVDPixType_Mask) {
+ case kVDPixType_1:
+ gen.conv_Pal1_to_8888(0);
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
+ break;
+
+ case kVDPixType_2:
+ gen.conv_Pal2_to_8888(0);
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
+ break;
+
+ case kVDPixType_4:
+ gen.conv_Pal4_to_8888(0);
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
+ break;
+
+ case kVDPixType_8:
+ gen.conv_Pal8_to_8888(0);
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_BGR | kVDPixType_8888;
+ break;
+
+ default:
+ VDASSERT(false);
+ break;
+ }
+ break;
+
+ default:
+ VDASSERT(false);
+ break;
+ }
+ break;
+ case kVDPixSpace_Y_601:
+ if (srcSpace == kVDPixSpace_YCC_601) {
+ gen.pop();
+ gen.swap(1);
+ gen.pop();
+ switch(srcToken & kVDPixType_Mask) {
+ case kVDPixType_32F_32F_32F_LE:
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_601 | kVDPixType_32F_LE;
+ break;
+ case kVDPixType_16F_16F_16F_LE:
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_601 | kVDPixType_16F_LE;
+ break;
+ case kVDPixType_8_8_8:
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_601 | kVDPixType_8;
+ break;
+
+ default:
+ VDASSERT(false);
+ }
+ srcToken = BlitterConvertType(gen, srcToken, kVDPixType_8, w, h);
+ break;
+ } else if (srcSpace == kVDPixSpace_YCC_709) {
+ gen.pop();
+ gen.swap(1);
+ gen.pop();
+ switch(srcToken & kVDPixType_Mask) {
+ case kVDPixType_32F_32F_32F_LE:
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_709 | kVDPixType_32F_LE;
+ break;
+ case kVDPixType_16F_16F_16F_LE:
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_709 | kVDPixType_16F_LE;
+ break;
+ case kVDPixType_8_8_8:
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_Y_709 | kVDPixType_8;
+ break;
+
+ default:
+ VDASSERT(false);
+ }
+ srcToken = BlitterConvertType(gen, srcToken, kVDPixType_8, w, h);
+ break;
+ }
+ // fall through
+ case kVDPixSpace_YCC_601:
+ switch(srcSpace) {
+ case kVDPixSpace_BGR:
+ srcToken = BlitterConvertType(gen, srcToken, kVDPixType_8888, w, h);
+ gen.rgb32_to_ycbcr601();
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_YCC_601 | kVDPixType_8_8_8;
+ break;
+ case kVDPixSpace_Y_601:
+ case kVDPixSpace_Y_709:
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_YCC_601 | kVDPixType_8;
+
+ {
+ const VDPixmapSamplingInfo& sinfo = VDPixmapGetSamplingInfo(dstToken);
+ int cw = ((w - 1) >> sinfo.mCXBits) + 1;
+ int ch = ((h - 1) >> sinfo.mCYBits) + 1;
+
+ gen.ldconst(0x80, cw, cw, ch, srcToken);
+ }
+
+ gen.dup();
+ gen.swap(2);
+ gen.swap(1);
+ srcToken = kVDPixSpace_YCC_601 | kVDPixType_8_8_8 | (dstToken & kVDPixSamp_Mask);
+ break;
+ case kVDPixSpace_YCC_709:
+ VDASSERT((srcToken & kVDPixType_Mask) == kVDPixType_8_8_8);
+ gen.ycbcr709_to_ycbcr601();
+ srcToken = (srcToken & ~kVDPixSpace_Mask) | kVDPixSpace_YCC_601;
+ break;
+
+ case kVDPixSpace_Pal:
+ targetSpace = kVDPixSpace_BGR;
+ goto space_reconvert;
+
+ default:
+ VDASSERT(false);
+ break;
+ }
+ break;
+ case kVDPixSpace_YCC_709:
+ switch(srcSpace) {
+ case kVDPixSpace_BGR:
+ srcToken = BlitterConvertType(gen, srcToken, kVDPixType_8888, w, h);
+ gen.rgb32_to_ycbcr709();
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_YCC_709 | kVDPixType_8_8_8;
+ break;
+ case kVDPixSpace_Y_709:
+ case kVDPixSpace_Y_601:
+ srcToken = (srcToken & ~(kVDPixType_Mask | kVDPixSpace_Mask)) | kVDPixSpace_YCC_709 | kVDPixType_8;
+
+ {
+ const VDPixmapSamplingInfo& sinfo = VDPixmapGetSamplingInfo(dstToken);
+ int cw = ((w - 1) >> sinfo.mCXBits) + 1;
+ int ch = ((h - 1) >> sinfo.mCYBits) + 1;
+
+ gen.ldconst(0x80, cw, cw, ch, srcToken);
+ }
+
+ gen.dup();
+ gen.swap(2);
+ gen.swap(1);
+ srcToken = kVDPixSpace_YCC_709 | kVDPixType_8_8_8 | (dstToken & kVDPixSamp_Mask);
+ break;
+ case kVDPixSpace_YCC_601:
+ VDASSERT((srcToken & kVDPixType_Mask) == kVDPixType_8_8_8 || (srcToken & kVDPixType_Mask) == kVDPixType_32F_32F_32F_LE);
+ gen.ycbcr601_to_ycbcr709();
+ srcToken = (srcToken & ~kVDPixSpace_Mask) | kVDPixSpace_YCC_709;
+ break;
+ case kVDPixSpace_Pal:
+ targetSpace = kVDPixSpace_BGR;
+ goto space_reconvert;
+ default:
+ VDASSERT(false);
+ break;
+ }
+ break;
+
+ default:
+ VDASSERT(false);
+ break;
+ }
+ }
+ }
+
+ // check if we need a type change
+ //
+ // Note: If the sampling is also different, we have to be careful about what types we
+ // target. The type conversion may itself involve a sampling conversion, so things get
+ // VERY tricky here.
+ if ((srcToken ^ dstToken) & kVDPixType_Mask) {
+ bool samplingDifferent = 0 != ((srcToken ^ dstToken) & kVDPixSamp_Mask);
+ uint32 intermediateTypeToken = dstToken & kVDPixType_Mask;
+
+ if (samplingDifferent) {
+ switch(dstToken & kVDPixType_Mask) {
+ case kVDPixType_16F_16F_16F_LE:
+ intermediateTypeToken = kVDPixType_32F_32F_32F_LE;
+ break;
+ case kVDPixType_8_B8R8:
+ intermediateTypeToken = kVDPixType_8_8_8;
+ break;
+ }
+ }
+
+ srcToken = BlitterConvertType(gen, srcToken, (dstToken & ~kVDPixType_Mask) | intermediateTypeToken, w, h);
+ }
+
+ // convert subsampling if necessary
+ switch(srcToken & kVDPixType_Mask) {
+ case kVDPixType_8_8_8:
+ case kVDPixType_16F_16F_16F_LE:
+ case kVDPixType_32F_32F_32F_LE:
+ if ((srcToken ^ dstToken) & kVDPixSamp_Mask)
+ srcToken = BlitterConvertSampling(gen, srcToken, dstToken, w, h);
+ break;
+ }
+
+ // check if we need a type change (possible with 16F)
+ srcToken = BlitterConvertType(gen, srcToken, dstToken, w, h);
+
+ return gen.create();
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_16f.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_16f.cpp
new file mode 100644
index 000000000..3e9af1a1b
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_16f.cpp
@@ -0,0 +1,40 @@
+#include <vd2/system/halffloat.h>
+#include "uberblit_16f.h"
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGen_32F_To_16F::Start() {
+ StartWindow(mWidth * sizeof(uint16));
+}
+
+uint32 VDPixmapGen_32F_To_16F::GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_16F_LE;
+}
+
+void VDPixmapGen_32F_To_16F::Compute(void *dst0, sint32 y) {
+ uint16 *dst = (uint16 *)dst0;
+ const float *src = (const float *)mpSrc->GetRow(y, mSrcIndex);
+ uint32 w = mWidth;
+
+ for(uint32 i=0; i<w; ++i)
+ *dst++ = VDConvertFloatToHalf(src++);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGen_16F_To_32F::Start() {
+ StartWindow(mWidth * sizeof(float));
+}
+
+uint32 VDPixmapGen_16F_To_32F::GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_32F_LE;
+}
+
+void VDPixmapGen_16F_To_32F::Compute(void *dst0, sint32 y) {
+ float *dst = (float *)dst0;
+ const uint16 *src = (const uint16 *)mpSrc->GetRow(y, mSrcIndex);
+ uint32 w = mWidth;
+
+ for(uint32 i=0; i<w; ++i)
+ VDConvertHalfToFloat(*src++, dst++);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_gen.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_gen.cpp
new file mode 100644
index 000000000..f93ca322e
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_gen.cpp
@@ -0,0 +1,1597 @@
+#include <vd2/system/vdalloc.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include "uberblit.h"
+#include "uberblit_gen.h"
+#include "uberblit_fill.h"
+#include "uberblit_input.h"
+#include "uberblit_resample.h"
+#include "uberblit_resample_special.h"
+#include "uberblit_ycbcr.h"
+#include "uberblit_rgb.h"
+#include "uberblit_swizzle.h"
+#include "uberblit_pal.h"
+#include "uberblit_16f.h"
+#include "uberblit_v210.h"
+
+#ifdef VD_CPU_X86
+ #include "uberblit_swizzle_x86.h"
+ #include "uberblit_ycbcr_x86.h"
+ #include "uberblit_rgb_x86.h"
+ #include "uberblit_resample_special_x86.h"
+#endif
+
+void VDPixmapGenerate(void *dst, ptrdiff_t pitch, sint32 bpr, sint32 height, IVDPixmapGen *gen, int genIndex) {
+ for(sint32 y=0; y<height; ++y) {
+ memcpy(dst, gen->GetRow(y, genIndex), bpr);
+ vdptrstep(dst, pitch);
+ }
+ VDCPUCleanupExtensions();
+}
+
+void VDPixmapGenerateFast(void *dst, ptrdiff_t pitch, sint32 height, IVDPixmapGen *gen) {
+ for(sint32 y=0; y<height; ++y) {
+ gen->ProcessRow(dst, y);
+ vdptrstep(dst, pitch);
+ }
+ VDCPUCleanupExtensions();
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+IVDPixmapBlitter *VDCreatePixmapUberBlitterDirectCopy(const VDPixmap& dst, const VDPixmap& src) {
+ return new VDPixmapUberBlitterDirectCopy;
+}
+
+IVDPixmapBlitter *VDCreatePixmapUberBlitterDirectCopy(const VDPixmapLayout& dst, const VDPixmapLayout& src) {
+ return new VDPixmapUberBlitterDirectCopy;
+}
+
+VDPixmapUberBlitterDirectCopy::VDPixmapUberBlitterDirectCopy() {
+}
+
+VDPixmapUberBlitterDirectCopy::~VDPixmapUberBlitterDirectCopy() {
+}
+
+void VDPixmapUberBlitterDirectCopy::Blit(const VDPixmap& dst, const VDPixmap& src) {
+ Blit(dst, NULL, src);
+}
+
+void VDPixmapUberBlitterDirectCopy::Blit(const VDPixmap& dst, const vdrect32 *rDst, const VDPixmap& src) {
+ VDASSERT(dst.format == src.format);
+
+ const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(dst.format);
+
+ void *p = dst.data;
+ void *p2 = dst.data2;
+ void *p3 = dst.data3;
+ int w = dst.w;
+ int h = dst.h;
+
+ if (formatInfo.qchunky) {
+ w = (w + formatInfo.qw - 1) / formatInfo.qw;
+ h = -(-h >> formatInfo.qhbits);
+ }
+
+ int w2 = -(-dst.w >> formatInfo.auxwbits);
+ int h2 = -(-dst.h >> formatInfo.auxhbits);
+
+ if (rDst) {
+ int x1 = rDst->left;
+ int y1 = rDst->top;
+ int x2 = rDst->right;
+ int y2 = rDst->bottom;
+
+ VDASSERT(x1 >= 0 && y1 >= 0 && x2 <= w && y2 <= h && x2 >= x1 && y2 >= y1);
+
+ if (x2 < x1 || y2 < y1)
+ return;
+
+ p = vdptroffset(dst.data, dst.pitch * y1 + x1 * formatInfo.qsize);
+ w = x2 - x1;
+ h = y2 - y1;
+
+ if (formatInfo.auxbufs >= 1) {
+ VDASSERT(!((x1|x2) & ((1 << formatInfo.auxwbits) - 1)));
+ VDASSERT(!((y1|y2) & ((1 << formatInfo.auxhbits) - 1)));
+
+ int ax1 = x1 >> formatInfo.auxwbits;
+ int ay1 = y1 >> formatInfo.auxhbits;
+ int ax2 = x2 >> formatInfo.auxwbits;
+ int ay2 = y2 >> formatInfo.auxhbits;
+
+ p2 = vdptroffset(dst.data2, dst.pitch2 * ay1 + ax1);
+ w2 = ax2 - ax1;
+ h2 = ay2 - ay1;
+
+ if (formatInfo.auxbufs >= 2)
+ p3 = vdptroffset(dst.data3, dst.pitch3 * ay1 + ax1);
+ }
+ }
+
+ uint32 bpr = formatInfo.qsize * w;
+
+ VDMemcpyRect(p, dst.pitch, src.data, src.pitch, bpr, h);
+
+ if (formatInfo.auxbufs >= 1) {
+ VDMemcpyRect(p2, dst.pitch2, src.data2, src.pitch2, w2 * formatInfo.auxsize, h2);
+
+ if (formatInfo.auxbufs >= 2)
+ VDMemcpyRect(p3, dst.pitch3, src.data3, src.pitch3, w2 * formatInfo.auxsize, h2);
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+VDPixmapUberBlitter::VDPixmapUberBlitter() {
+}
+
+VDPixmapUberBlitter::~VDPixmapUberBlitter() {
+ while(!mGenerators.empty()) {
+ delete mGenerators.back();
+ mGenerators.pop_back();
+ }
+}
+
+void VDPixmapUberBlitter::Blit(const VDPixmap& dst, const VDPixmap& src) {
+ Blit(dst, NULL, src);
+}
+
+void VDPixmapUberBlitter::Blit(const VDPixmap& dst, const vdrect32 *rDst, const VDPixmap& src) {
+ for(Sources::const_iterator it(mSources.begin()), itEnd(mSources.end()); it!=itEnd; ++it) {
+ const SourceEntry& se = *it;
+ const void *p;
+ ptrdiff_t pitch;
+
+ switch(se.mSrcPlane) {
+ case 0:
+ p = src.data;
+ pitch = src.pitch;
+ break;
+ case 1:
+ p = src.data2;
+ pitch = src.pitch2;
+ break;
+ case 2:
+ p = src.data3;
+ pitch = src.pitch3;
+ break;
+ default:
+ VDASSERT(false);
+ break;
+ }
+
+ se.mpSrc->SetSource((const char *)p + pitch*se.mSrcY + se.mSrcX, pitch, src.palette);
+ }
+
+ if (mOutputs[2].mpSrc) {
+ if (mbIndependentPlanes)
+ Blit3Separated(dst, rDst);
+ else if (mbIndependentChromaPlanes)
+ Blit3Split(dst, rDst);
+ else
+ Blit3(dst, rDst);
+ } else if (mOutputs[1].mpSrc) {
+ if (mbIndependentPlanes)
+ Blit2Separated(dst, rDst);
+ else
+ Blit2(dst, rDst);
+ } else
+ Blit(dst, rDst);
+}
+
+void VDPixmapUberBlitter::Blit(const VDPixmap& dst, const vdrect32 *rDst) {
+ const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(dst.format);
+
+ mOutputs[0].mpSrc->AddWindowRequest(0, 0);
+ mOutputs[0].mpSrc->Start();
+
+ void *p = dst.data;
+ int w = dst.w;
+ int h = dst.h;
+
+ if (formatInfo.qchunky) {
+ w = (w + formatInfo.qw - 1) / formatInfo.qw;
+ h = -(-h >> formatInfo.qhbits);
+ }
+
+ if (rDst) {
+ int x1 = rDst->left;
+ int y1 = rDst->top;
+ int x2 = rDst->right;
+ int y2 = rDst->bottom;
+
+ if (formatInfo.qchunky) {
+ x1 = x1 / formatInfo.qw;
+ y1 = y1 / formatInfo.qh;
+ x2 = (x2 + formatInfo.qw - 1) / formatInfo.qw;
+ y2 = (y2 + formatInfo.qh - 1) / formatInfo.qh;
+ }
+
+ VDASSERT(x1 >= 0 && y1 >= 0 && x2 <= w && y2 <= h && x2 >= x1 && y2 >= y1);
+
+ if (x2 < x1 || y2 < y1)
+ return;
+
+ p = vdptroffset(dst.data, dst.pitch * y1 + x1 * formatInfo.qsize);
+ w = x2 - x1;
+ h = y2 - y1;
+ }
+
+ uint32 bpr = formatInfo.qsize * w;
+
+ if (mOutputs[0].mSrcIndex == 0)
+ VDPixmapGenerateFast(p, dst.pitch, h, mOutputs[0].mpSrc);
+ else
+ VDPixmapGenerate(p, dst.pitch, bpr, h, mOutputs[0].mpSrc, mOutputs[0].mSrcIndex);
+}
+
+void VDPixmapUberBlitter::Blit3(const VDPixmap& px, const vdrect32 *rDst) {
+ const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(px.format);
+ IVDPixmapGen *gen = mOutputs[1].mpSrc;
+ int idx = mOutputs[1].mSrcIndex;
+ IVDPixmapGen *gen1 = mOutputs[2].mpSrc;
+ int idx1 = mOutputs[2].mSrcIndex;
+ IVDPixmapGen *gen2 = mOutputs[0].mpSrc;
+ int idx2 = mOutputs[0].mSrcIndex;
+
+ gen->AddWindowRequest(0, 0);
+ gen->Start();
+ gen1->AddWindowRequest(0, 0);
+ gen1->Start();
+ gen2->AddWindowRequest(0, 0);
+ gen2->Start();
+
+ uint32 auxstep = 0x80000000UL >> formatInfo.auxhbits;
+ uint32 auxaccum = 0;
+
+ auxstep += auxstep;
+
+ int qw = px.w;
+ int qh = px.h;
+
+ if (formatInfo.qchunky) {
+ qw = (qw + formatInfo.qw - 1) / formatInfo.qw;
+ qh = -(-qh >> formatInfo.qhbits);
+ }
+
+ uint32 height = qh;
+ uint32 bpr = formatInfo.qsize * qw;
+ uint32 bpr2 = formatInfo.auxsize * -(-px.w >> formatInfo.auxwbits);
+ uint8 *dst = (uint8 *)px.data;
+ uint8 *dst2 = (uint8 *)px.data2;
+ uint8 *dst3 = (uint8 *)px.data3;
+ ptrdiff_t pitch = px.pitch;
+ ptrdiff_t pitch2 = px.pitch2;
+ ptrdiff_t pitch3 = px.pitch3;
+ uint32 y2 = 0;
+ for(uint32 y=0; y<height; ++y) {
+ memcpy(dst, gen->GetRow(y, idx), bpr);
+ vdptrstep(dst, pitch);
+
+ if (!auxaccum) {
+ memcpy(dst2, gen1->GetRow(y2, idx1), bpr2);
+ vdptrstep(dst2, pitch2);
+ memcpy(dst3, gen2->GetRow(y2, idx2), bpr2);
+ vdptrstep(dst3, pitch3);
+ ++y2;
+ }
+
+ auxaccum += auxstep;
+ }
+
+ VDCPUCleanupExtensions();
+}
+
+void VDPixmapUberBlitter::Blit3Split(const VDPixmap& px, const vdrect32 *rDst) {
+ const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(px.format);
+ IVDPixmapGen *gen = mOutputs[1].mpSrc;
+ int idx = mOutputs[1].mSrcIndex;
+ IVDPixmapGen *gen1 = mOutputs[2].mpSrc;
+ int idx1 = mOutputs[2].mSrcIndex;
+ IVDPixmapGen *gen2 = mOutputs[0].mpSrc;
+ int idx2 = mOutputs[0].mSrcIndex;
+
+ gen->AddWindowRequest(0, 0);
+ gen->Start();
+ gen1->AddWindowRequest(0, 0);
+ gen1->Start();
+ gen2->AddWindowRequest(0, 0);
+ gen2->Start();
+
+ uint32 auxstep = 0x80000000UL >> formatInfo.auxhbits;
+ uint32 auxaccum = 0;
+
+ auxstep += auxstep;
+
+ int qw = px.w;
+ int qh = px.h;
+
+ if (formatInfo.qchunky) {
+ qw = (qw + formatInfo.qw - 1) / formatInfo.qw;
+ qh = -(-qh >> formatInfo.qhbits);
+ }
+
+ uint32 height = qh;
+ uint32 bpr = formatInfo.qsize * qw;
+ uint8 *dst = (uint8 *)px.data;
+ ptrdiff_t pitch = px.pitch;
+
+ if (idx == 0) {
+ for(uint32 y=0; y<height; ++y) {
+ gen->ProcessRow(dst, y);
+ vdptrstep(dst, pitch);
+ }
+ } else {
+ for(uint32 y=0; y<height; ++y) {
+ memcpy(dst, gen->GetRow(y, idx), bpr);
+ vdptrstep(dst, pitch);
+ }
+ }
+
+ uint32 bpr2 = -(-px.w >> formatInfo.auxwbits) * formatInfo.auxsize;
+ uint8 *dst2 = (uint8 *)px.data2;
+ uint8 *dst3 = (uint8 *)px.data3;
+ ptrdiff_t pitch2 = px.pitch2;
+ ptrdiff_t pitch3 = px.pitch3;
+ uint32 y2 = 0;
+ for(uint32 y=0; y<height; ++y) {
+ if (!auxaccum) {
+ memcpy(dst2, gen1->GetRow(y2, idx1), bpr2);
+ vdptrstep(dst2, pitch2);
+ memcpy(dst3, gen2->GetRow(y2, idx2), bpr2);
+ vdptrstep(dst3, pitch3);
+ ++y2;
+ }
+
+ auxaccum += auxstep;
+ }
+
+ VDCPUCleanupExtensions();
+}
+
+void VDPixmapUberBlitter::Blit3Separated(const VDPixmap& px, const vdrect32 *rDst) {
+ const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(px.format);
+ IVDPixmapGen *gen = mOutputs[1].mpSrc;
+ int idx = mOutputs[1].mSrcIndex;
+ IVDPixmapGen *gen1 = mOutputs[2].mpSrc;
+ int idx1 = mOutputs[2].mSrcIndex;
+ IVDPixmapGen *gen2 = mOutputs[0].mpSrc;
+ int idx2 = mOutputs[0].mSrcIndex;
+
+ gen->AddWindowRequest(0, 0);
+ gen->Start();
+ gen1->AddWindowRequest(0, 0);
+ gen1->Start();
+ gen2->AddWindowRequest(0, 0);
+ gen2->Start();
+
+ int qw = px.w;
+ int qh = px.h;
+
+ if (formatInfo.qchunky) {
+ qw = (qw + formatInfo.qw - 1) / formatInfo.qw;
+ qh = -(-qh >> formatInfo.qhbits);
+ }
+
+ uint32 height = qh;
+ uint32 bpr = formatInfo.qsize * qw;
+ uint8 *dst = (uint8 *)px.data;
+ ptrdiff_t pitch = px.pitch;
+
+ if (idx == 0) {
+ for(uint32 y=0; y<height; ++y) {
+ gen->ProcessRow(dst, y);
+ vdptrstep(dst, pitch);
+ }
+ } else {
+ for(uint32 y=0; y<height; ++y) {
+ memcpy(dst, gen->GetRow(y, idx), bpr);
+ vdptrstep(dst, pitch);
+ }
+ }
+
+ uint32 bpr2 = -(-px.w >> formatInfo.auxwbits) * formatInfo.auxsize;
+ uint32 h2 = -(-px.h >> formatInfo.auxhbits);
+ uint8 *dst2 = (uint8 *)px.data2;
+ ptrdiff_t pitch2 = px.pitch2;
+ if (idx1 == 0) {
+ for(uint32 y2=0; y2<h2; ++y2) {
+ gen1->ProcessRow(dst2, y2);
+ vdptrstep(dst2, pitch2);
+ }
+ } else {
+ for(uint32 y2=0; y2<h2; ++y2) {
+ memcpy(dst2, gen1->GetRow(y2, idx1), bpr2);
+ vdptrstep(dst2, pitch2);
+ }
+ }
+
+ uint8 *dst3 = (uint8 *)px.data3;
+ ptrdiff_t pitch3 = px.pitch3;
+ if (idx2 == 0) {
+ for(uint32 y2=0; y2<h2; ++y2) {
+ gen2->ProcessRow(dst3, y2);
+ vdptrstep(dst3, pitch3);
+ }
+ } else {
+ for(uint32 y2=0; y2<h2; ++y2) {
+ memcpy(dst3, gen2->GetRow(y2, idx2), bpr2);
+ vdptrstep(dst3, pitch3);
+ }
+ }
+
+ VDCPUCleanupExtensions();
+}
+
+void VDPixmapUberBlitter::Blit2(const VDPixmap& px, const vdrect32 *rDst) {
+ const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(px.format);
+ IVDPixmapGen *gen = mOutputs[0].mpSrc;
+ int idx = mOutputs[0].mSrcIndex;
+ IVDPixmapGen *gen1 = mOutputs[1].mpSrc;
+ int idx1 = mOutputs[1].mSrcIndex;
+
+ gen->AddWindowRequest(0, 0);
+ gen->Start();
+ gen1->AddWindowRequest(0, 0);
+ gen1->Start();
+
+ uint32 auxstep = 0x80000000UL >> formatInfo.auxhbits;
+ uint32 auxaccum = 0;
+
+ auxstep += auxstep;
+
+ int qw = px.w;
+ int qh = px.h;
+
+ if (formatInfo.qchunky) {
+ qw = (qw + formatInfo.qw - 1) / formatInfo.qw;
+ qh = -(-qh >> formatInfo.qhbits);
+ }
+
+ uint32 height = qh;
+ uint32 bpr = formatInfo.qsize * qw;
+ uint32 bpr2 = formatInfo.auxsize * -(-px.w >> formatInfo.auxwbits);
+ uint8 *dst = (uint8 *)px.data;
+ uint8 *dst2 = (uint8 *)px.data2;
+ ptrdiff_t pitch = px.pitch;
+ ptrdiff_t pitch2 = px.pitch2;
+ uint32 y2 = 0;
+ for(uint32 y=0; y<height; ++y) {
+ memcpy(dst, gen->GetRow(y, idx), bpr);
+ vdptrstep(dst, pitch);
+
+ if (!auxaccum) {
+ memcpy(dst2, gen1->GetRow(y2, idx1), bpr2);
+ vdptrstep(dst2, pitch2);
+ ++y2;
+ }
+
+ auxaccum += auxstep;
+ }
+
+ VDCPUCleanupExtensions();
+}
+
+void VDPixmapUberBlitter::Blit2Separated(const VDPixmap& px, const vdrect32 *rDst) {
+ const VDPixmapFormatInfo& formatInfo = VDPixmapGetInfo(px.format);
+ IVDPixmapGen *gen = mOutputs[0].mpSrc;
+ int idx = mOutputs[0].mSrcIndex;
+ IVDPixmapGen *gen1 = mOutputs[1].mpSrc;
+ int idx1 = mOutputs[1].mSrcIndex;
+
+ gen->AddWindowRequest(0, 0);
+ gen->Start();
+ gen1->AddWindowRequest(0, 0);
+ gen1->Start();
+
+ int qw = px.w;
+ int qh = px.h;
+
+ if (formatInfo.qchunky) {
+ qw = (qw + formatInfo.qw - 1) / formatInfo.qw;
+ qh = -(-qh >> formatInfo.qhbits);
+ }
+
+ uint32 height = qh;
+ uint32 bpr = formatInfo.qsize * qw;
+ uint8 *dst = (uint8 *)px.data;
+ ptrdiff_t pitch = px.pitch;
+
+ if (idx == 0) {
+ for(uint32 y=0; y<height; ++y) {
+ gen->ProcessRow(dst, y);
+ vdptrstep(dst, pitch);
+ }
+ } else {
+ for(uint32 y=0; y<height; ++y) {
+ memcpy(dst, gen->GetRow(y, idx), bpr);
+ vdptrstep(dst, pitch);
+ }
+ }
+
+ uint32 bpr2 = -(-px.w >> formatInfo.auxwbits) * formatInfo.auxsize;
+ uint32 h2 = -(-px.h >> formatInfo.auxhbits);
+ uint8 *dst2 = (uint8 *)px.data2;
+ ptrdiff_t pitch2 = px.pitch2;
+ if (idx1 == 0) {
+ for(uint32 y2=0; y2<h2; ++y2) {
+ gen1->ProcessRow(dst2, y2);
+ vdptrstep(dst2, pitch2);
+ }
+ } else {
+ for(uint32 y2=0; y2<h2; ++y2) {
+ memcpy(dst2, gen1->GetRow(y2, idx1), bpr2);
+ vdptrstep(dst2, pitch2);
+ }
+ }
+
+ VDCPUCleanupExtensions();
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+VDPixmapUberBlitterGenerator::VDPixmapUberBlitterGenerator() {
+}
+
+VDPixmapUberBlitterGenerator::~VDPixmapUberBlitterGenerator() {
+ while(!mGenerators.empty()) {
+ delete mGenerators.back();
+ mGenerators.pop_back();
+ }
+}
+
+void VDPixmapUberBlitterGenerator::swap(int index) {
+ std::swap(mStack.back(), (&mStack.back())[-index]);
+}
+
+void VDPixmapUberBlitterGenerator::dup() {
+ mStack.push_back(mStack.back());
+}
+
+void VDPixmapUberBlitterGenerator::pop() {
+ mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::ldsrc(int srcIndex, int srcPlane, int x, int y, uint32 w, uint32 h, uint32 type, uint32 bpr) {
+ VDPixmapGenSrc *src = new VDPixmapGenSrc;
+
+ src->Init(w, h, type, bpr);
+
+ mGenerators.push_back(src);
+ mStack.push_back(StackEntry(src, 0));
+
+ SourceEntry se;
+ se.mpSrc = src;
+ se.mSrcIndex = srcIndex;
+ se.mSrcPlane = srcPlane;
+ se.mSrcX = x;
+ se.mSrcY = y;
+ mSources.push_back(se);
+}
+
+void VDPixmapUberBlitterGenerator::ldconst(uint8 fill, uint32 bpr, uint32 w, uint32 h, uint32 type) {
+ VDPixmapGenFill8 *src = new VDPixmapGenFill8;
+
+ src->Init(fill, bpr, w, h, type);
+
+ mGenerators.push_back(src);
+ mStack.push_back(StackEntry(src, 0));
+}
+
+void VDPixmapUberBlitterGenerator::extract_8in16(int offset, uint32 w, uint32 h) {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_8In16 *src = NULL;
+
+#if VD_CPU_X86
+ if (MMX_enabled) {
+ if (offset == 0)
+ src = new VDPixmapGen_8In16_Even_MMX;
+ else if (offset == 1)
+ src = new VDPixmapGen_8In16_Odd_MMX;
+ }
+#endif
+ if (!src)
+ src = new VDPixmapGen_8In16;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, offset, w, h);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::extract_8in32(int offset, uint32 w, uint32 h) {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_8In32 *src = NULL;
+
+#if VD_CPU_X86
+ if (MMX_enabled) {
+ if ((unsigned)offset < 4)
+ src = new VDPixmapGen_8In32_MMX;
+ }
+#endif
+
+ if (!src)
+ src = new VDPixmapGen_8In32;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, offset, w, h);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::swap_8in16(uint32 w, uint32 h, uint32 bpr) {
+ StackEntry *args = &mStack.back();
+
+#if VD_CPU_X86
+ VDPixmapGen_Swap8In16 *src = MMX_enabled ? new VDPixmapGen_Swap8In16_MMX : new VDPixmapGen_Swap8In16;
+#else
+ VDPixmapGen_Swap8In16 *src = new VDPixmapGen_Swap8In16;
+#endif
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, w, h, bpr);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_Pal1_to_8888(int srcIndex) {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_Pal1_To_X8R8G8B8 *src = new VDPixmapGen_Pal1_To_X8R8G8B8;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+
+ SourceEntry se;
+ se.mpSrc = src;
+ se.mSrcIndex = srcIndex;
+ se.mSrcPlane = 0;
+ se.mSrcX = 0;
+ se.mSrcY = 0;
+ mSources.push_back(se);
+}
+
+void VDPixmapUberBlitterGenerator::conv_Pal2_to_8888(int srcIndex) {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_Pal2_To_X8R8G8B8 *src = new VDPixmapGen_Pal2_To_X8R8G8B8;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+
+ SourceEntry se;
+ se.mpSrc = src;
+ se.mSrcIndex = srcIndex;
+ se.mSrcPlane = 0;
+ se.mSrcX = 0;
+ se.mSrcY = 0;
+ mSources.push_back(se);
+}
+
+void VDPixmapUberBlitterGenerator::conv_Pal4_to_8888(int srcIndex) {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_Pal4_To_X8R8G8B8 *src = new VDPixmapGen_Pal4_To_X8R8G8B8;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+
+ SourceEntry se;
+ se.mpSrc = src;
+ se.mSrcIndex = srcIndex;
+ se.mSrcPlane = 0;
+ se.mSrcX = 0;
+ se.mSrcY = 0;
+ mSources.push_back(se);
+}
+
+void VDPixmapUberBlitterGenerator::conv_Pal8_to_8888(int srcIndex) {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_Pal8_To_X8R8G8B8 *src = new VDPixmapGen_Pal8_To_X8R8G8B8;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+
+ SourceEntry se;
+ se.mpSrc = src;
+ se.mSrcIndex = srcIndex;
+ se.mSrcPlane = 0;
+ se.mSrcX = 0;
+ se.mSrcY = 0;
+ mSources.push_back(se);
+}
+
+void VDPixmapUberBlitterGenerator::pointh(float xoffset, float xfactor, uint32 w) {
+ StackEntry *args = &mStack.back();
+
+ if (xoffset != 0.5f || xfactor != 1.0f) {
+ VDPixmapGenResampleRow *src = new VDPixmapGenResampleRow;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, w, xoffset, xfactor, nsVDPixmap::kFilterPoint, 0, false);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+ }
+}
+
+void VDPixmapUberBlitterGenerator::pointv(float yoffset, float yfactor, uint32 h) {
+ StackEntry *args = &mStack.back();
+
+ if (yoffset != 0.5f || yfactor != 1.0f) {
+ VDPixmapGenResampleCol *src = new VDPixmapGenResampleCol;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, h, yoffset, yfactor, nsVDPixmap::kFilterPoint, 0, false);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+ }
+}
+
+void VDPixmapUberBlitterGenerator::linearh(float xoffset, float xfactor, uint32 w, bool interpOnly) {
+ StackEntry *args = &mStack.back();
+ IVDPixmapGen *src = args[0].mpSrc;
+ int srcIndex = args[0].mSrcIndex;
+
+ sint32 srcw = src->GetWidth(srcIndex);
+ if (xoffset == 0.5f && xfactor == 1.0f && srcw == w)
+ return;
+
+ if (xoffset == 0.5f && (src->GetType(srcIndex) & kVDPixType_Mask) == kVDPixType_8) {
+ if (xfactor == 2.0f && w == ((srcw + 1) >> 1)) {
+ VDPixmapGenResampleRow_d2_p0_lin_u8 *out = new VDPixmapGenResampleRow_d2_p0_lin_u8;
+
+ out->Init(src, srcIndex);
+ mGenerators.push_back(out);
+ MarkDependency(out, src);
+ args[0] = StackEntry(out, 0);
+ return;
+ }
+
+ if (xfactor == 4.0f && w == ((srcw + 3) >> 2)) {
+ VDPixmapGenResampleRow_d4_p0_lin_u8 *out = new VDPixmapGenResampleRow_d4_p0_lin_u8;
+
+ out->Init(src, srcIndex);
+ mGenerators.push_back(out);
+ MarkDependency(out, src);
+ args[0] = StackEntry(out, 0);
+ return;
+ }
+
+ if (xfactor == 0.5f && w == srcw*2) {
+#if VD_CPU_X86
+ VDPixmapGenResampleRow_x2_p0_lin_u8 *out = ISSE_enabled ? new VDPixmapGenResampleRow_x2_p0_lin_u8_ISSE : new VDPixmapGenResampleRow_x2_p0_lin_u8;
+#else
+ VDPixmapGenResampleRow_x2_p0_lin_u8 *out = new VDPixmapGenResampleRow_x2_p0_lin_u8;
+#endif
+
+ out->Init(src, srcIndex);
+ mGenerators.push_back(out);
+ MarkDependency(out, src);
+ args[0] = StackEntry(out, 0);
+ return;
+ }
+
+ if (xfactor == 0.25f && w == srcw*4) {
+#if VD_CPU_X86
+ VDPixmapGenResampleRow_x4_p0_lin_u8 *out = MMX_enabled ? new VDPixmapGenResampleRow_x4_p0_lin_u8_MMX : new VDPixmapGenResampleRow_x4_p0_lin_u8;
+#else
+ VDPixmapGenResampleRow_x4_p0_lin_u8 *out = new VDPixmapGenResampleRow_x4_p0_lin_u8;
+#endif
+
+ out->Init(src, srcIndex);
+ mGenerators.push_back(out);
+ MarkDependency(out, src);
+ args[0] = StackEntry(out, 0);
+ return;
+ }
+ }
+
+ VDPixmapGenResampleRow *out = new VDPixmapGenResampleRow;
+
+ out->Init(args[0].mpSrc, args[0].mSrcIndex, w, xoffset, xfactor, nsVDPixmap::kFilterLinear, 0, interpOnly);
+
+ mGenerators.push_back(out);
+ MarkDependency(out, src);
+ args[0] = StackEntry(out, 0);
+}
+
+void VDPixmapUberBlitterGenerator::linearv(float yoffset, float yfactor, uint32 h, bool interpOnly) {
+ StackEntry *args = &mStack.back();
+ IVDPixmapGen *src = args[0].mpSrc;
+ int srcIndex = args[0].mSrcIndex;
+
+ sint32 srch = src->GetHeight(srcIndex);
+ if (yoffset == 0.5f && yfactor == 1.0f && srch == h)
+ return;
+
+ if ((src->GetType(srcIndex) & kVDPixType_Mask) == kVDPixType_8) {
+ if (yoffset == 1.0f && yfactor == 2.0f && h == ((srch + 1) >> 1)) {
+ VDPixmapGenResampleCol_x2_phalf_lin_u8 *out = new VDPixmapGenResampleCol_x2_phalf_lin_u8;
+
+ out->Init(src, srcIndex);
+ mGenerators.push_back(out);
+ MarkDependency(out, src);
+ args[0] = StackEntry(out, 0);
+ return;
+ }
+
+ if (yoffset == 2.0f && yfactor == 4.0f && h == ((srch + 2) >> 2)) {
+ VDPixmapGenResampleCol_x4_p1half_lin_u8 *out = new VDPixmapGenResampleCol_x4_p1half_lin_u8;
+
+ out->Init(src, srcIndex);
+ mGenerators.push_back(out);
+ MarkDependency(out, src);
+ args[0] = StackEntry(out, 0);
+ return;
+ }
+
+ if (yoffset == 0.25f && yfactor == 0.5f && h == srch*2) {
+#if VD_CPU_X86
+ VDPixmapGenResampleCol_d2_pnqrtr_lin_u8 *out = ISSE_enabled ? new VDPixmapGenResampleCol_d2_pnqrtr_lin_u8_ISSE : new VDPixmapGenResampleCol_d2_pnqrtr_lin_u8;
+#else
+ VDPixmapGenResampleCol_d2_pnqrtr_lin_u8 *out = new VDPixmapGenResampleCol_d2_pnqrtr_lin_u8;
+#endif
+
+ out->Init(src, srcIndex);
+ mGenerators.push_back(out);
+ MarkDependency(out, src);
+ args[0] = StackEntry(out, 0);
+ return;
+ }
+
+ if (yoffset == 0.125f && yfactor == 0.25f && h == srch*4) {
+#if VD_CPU_X86
+ VDPixmapGenResampleCol_d4_pn38_lin_u8 *out = ISSE_enabled ? new VDPixmapGenResampleCol_d4_pn38_lin_u8_ISSE : new VDPixmapGenResampleCol_d4_pn38_lin_u8;
+#else
+ VDPixmapGenResampleCol_d4_pn38_lin_u8 *out = new VDPixmapGenResampleCol_d4_pn38_lin_u8;
+#endif
+
+ out->Init(src, srcIndex);
+ mGenerators.push_back(out);
+ MarkDependency(out, src);
+ args[0] = StackEntry(out, 0);
+ return;
+ }
+ }
+
+ VDPixmapGenResampleCol *out = new VDPixmapGenResampleCol;
+
+ out->Init(src, srcIndex, h, yoffset, yfactor, nsVDPixmap::kFilterLinear, 0, interpOnly);
+
+ mGenerators.push_back(out);
+ MarkDependency(out, src);
+ args[0] = StackEntry(out, 0);
+}
+
+void VDPixmapUberBlitterGenerator::linear(float xoffset, float xfactor, uint32 w, float yoffset, float yfactor, uint32 h) {
+ linearh(xoffset, xfactor, w, false);
+ linearv(yoffset, yfactor, h, false);
+}
+
+void VDPixmapUberBlitterGenerator::cubich(float xoffset, float xfactor, uint32 w, float splineFactor, bool interpOnly) {
+ StackEntry *args = &mStack.back();
+
+ if (xoffset != 0.5f || xfactor != 1.0f) {
+ VDPixmapGenResampleRow *src = new VDPixmapGenResampleRow;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, w, xoffset, xfactor, nsVDPixmap::kFilterCubic, splineFactor, interpOnly);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+ }
+}
+
+void VDPixmapUberBlitterGenerator::cubicv(float yoffset, float yfactor, uint32 h, float splineFactor, bool interpOnly) {
+ StackEntry *args = &mStack.back();
+
+ if (yoffset != 0.5f || yfactor != 1.0f) {
+ VDPixmapGenResampleCol *src = new VDPixmapGenResampleCol;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, h, yoffset, yfactor, nsVDPixmap::kFilterCubic, splineFactor, interpOnly);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+ }
+}
+
+void VDPixmapUberBlitterGenerator::cubic(float xoffset, float xfactor, uint32 w, float yoffset, float yfactor, uint32 h, float splineFactor) {
+ cubich(xoffset, xfactor, w, splineFactor, false);
+ cubicv(yoffset, yfactor, h, splineFactor, false);
+}
+
+void VDPixmapUberBlitterGenerator::lanczos3h(float xoffset, float xfactor, uint32 w) {
+ StackEntry *args = &mStack.back();
+
+ if (xoffset != 0.5f || xfactor != 1.0f) {
+ VDPixmapGenResampleRow *src = new VDPixmapGenResampleRow;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, w, xoffset, xfactor, nsVDPixmap::kFilterLanczos3, 0, false);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+ }
+}
+
+void VDPixmapUberBlitterGenerator::lanczos3v(float yoffset, float yfactor, uint32 h) {
+ StackEntry *args = &mStack.back();
+
+ if (yoffset != 0.5f || yfactor != 1.0f) {
+ VDPixmapGenResampleCol *src = new VDPixmapGenResampleCol;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, h, yoffset, yfactor, nsVDPixmap::kFilterLanczos3, 0, false);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+ }
+}
+
+void VDPixmapUberBlitterGenerator::lanczos3(float xoffset, float xfactor, uint32 w, float yoffset, float yfactor, uint32 h) {
+ lanczos3h(xoffset, xfactor, w);
+ lanczos3v(yoffset, yfactor, h);
+}
+
+void VDPixmapUberBlitterGenerator::conv_555_to_8888() {
+ StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+ VDPixmapGen_X1R5G5B5_To_X8R8G8B8 *src = MMX_enabled ? new VDPixmapGen_X1R5G5B5_To_X8R8G8B8_MMX : new VDPixmapGen_X1R5G5B5_To_X8R8G8B8;
+#else
+ VDPixmapGen_X1R5G5B5_To_X8R8G8B8 *src = new VDPixmapGen_X1R5G5B5_To_X8R8G8B8;
+#endif
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_565_to_8888() {
+ StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+ VDPixmapGen_R5G6B5_To_X8R8G8B8 *src = MMX_enabled ? new VDPixmapGen_R5G6B5_To_X8R8G8B8_MMX : new VDPixmapGen_R5G6B5_To_X8R8G8B8;
+#else
+ VDPixmapGen_R5G6B5_To_X8R8G8B8 *src = new VDPixmapGen_R5G6B5_To_X8R8G8B8;
+#endif
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_888_to_8888() {
+ StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+ VDPixmapGen_R8G8B8_To_A8R8G8B8 *src = MMX_enabled ? new VDPixmapGen_R8G8B8_To_X8R8G8B8_MMX : new VDPixmapGen_R8G8B8_To_A8R8G8B8;
+#else
+ VDPixmapGen_R8G8B8_To_A8R8G8B8 *src = new VDPixmapGen_R8G8B8_To_A8R8G8B8;
+#endif
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_8_to_32F() {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_8_To_32F *src = new VDPixmapGen_8_To_32F;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_16F_to_32F() {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_16F_To_32F *src = new VDPixmapGen_16F_To_32F;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_V210_to_32F() {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_V210_To_32F *src = new VDPixmapGen_V210_To_32F;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+ mStack.push_back(StackEntry(src, 1));
+ mStack.push_back(StackEntry(src, 2));
+}
+
+void VDPixmapUberBlitterGenerator::conv_8888_to_X32F() {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_X8R8G8B8_To_X32B32G32R32F *src = new VDPixmapGen_X8R8G8B8_To_X32B32G32R32F;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_8888_to_555() {
+ StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+ VDPixmapGen_X8R8G8B8_To_X1R5G5B5 *src = MMX_enabled ? new VDPixmapGen_X8R8G8B8_To_X1R5G5B5_MMX : new VDPixmapGen_X8R8G8B8_To_X1R5G5B5;
+#else
+ VDPixmapGen_X8R8G8B8_To_X1R5G5B5 *src = new VDPixmapGen_X8R8G8B8_To_X1R5G5B5;
+#endif
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_555_to_565() {
+ StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+ VDPixmapGen_X1R5G5B5_To_R5G6B5 *src = MMX_enabled ? new VDPixmapGen_X1R5G5B5_To_R5G6B5_MMX : new VDPixmapGen_X1R5G5B5_To_R5G6B5;
+#else
+ VDPixmapGen_X1R5G5B5_To_R5G6B5 *src = new VDPixmapGen_X1R5G5B5_To_R5G6B5;
+#endif
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_565_to_555() {
+ StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+ VDPixmapGen_R5G6B5_To_X1R5G5B5 *src = MMX_enabled ? new VDPixmapGen_R5G6B5_To_X1R5G5B5_MMX : new VDPixmapGen_R5G6B5_To_X1R5G5B5;
+#else
+ VDPixmapGen_R5G6B5_To_X1R5G5B5 *src = new VDPixmapGen_R5G6B5_To_X1R5G5B5;
+#endif
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_8888_to_565() {
+ StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+ VDPixmapGen_X8R8G8B8_To_R5G6B5 *src = MMX_enabled ? new VDPixmapGen_X8R8G8B8_To_R5G6B5_MMX : new VDPixmapGen_X8R8G8B8_To_R5G6B5;
+#else
+ VDPixmapGen_X8R8G8B8_To_R5G6B5 *src = new VDPixmapGen_X8R8G8B8_To_R5G6B5;
+#endif
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_8888_to_888() {
+ StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+ VDPixmapGen_X8R8G8B8_To_R8G8B8 *src = MMX_enabled ? new VDPixmapGen_X8R8G8B8_To_R8G8B8_MMX : new VDPixmapGen_X8R8G8B8_To_R8G8B8;
+#else
+ VDPixmapGen_X8R8G8B8_To_R8G8B8 *src = new VDPixmapGen_X8R8G8B8_To_R8G8B8;
+#endif
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_32F_to_8() {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_32F_To_8 *src = new VDPixmapGen_32F_To_8;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_X32F_to_8888() {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_X32B32G32R32F_To_X8R8G8B8 *src = new VDPixmapGen_X32B32G32R32F_To_X8R8G8B8;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_32F_to_16F() {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_32F_To_16F *src = new VDPixmapGen_32F_To_16F;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::conv_32F_to_V210() {
+ StackEntry *args = &*(mStack.end() - 3);
+ VDPixmapGen_32F_To_V210 *src = new VDPixmapGen_32F_To_V210;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ MarkDependency(src, args[1].mpSrc);
+ MarkDependency(src, args[2].mpSrc);
+ args[0] = StackEntry(src, 0);
+ mStack.pop_back();
+ mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::convd_8888_to_555() {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_X8R8G8B8_To_X1R5G5B5_Dithered *src = new VDPixmapGen_X8R8G8B8_To_X1R5G5B5_Dithered;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::convd_8888_to_565() {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_X8R8G8B8_To_R5G6B5_Dithered *src = new VDPixmapGen_X8R8G8B8_To_R5G6B5_Dithered;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::convd_32F_to_8() {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_32F_To_8_Dithered *src = new VDPixmapGen_32F_To_8_Dithered;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::convd_X32F_to_8888() {
+ StackEntry *args = &mStack.back();
+ VDPixmapGen_X32B32G32R32F_To_X8R8G8B8_Dithered *src = new VDPixmapGen_X32B32G32R32F_To_X8R8G8B8_Dithered;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+}
+
+void VDPixmapUberBlitterGenerator::interleave_B8G8_R8G8() {
+ StackEntry *args = &mStack.back() - 2;
+ VDPixmapGen_B8x3_To_B8G8_R8G8 *src = NULL;
+
+#if VD_CPU_X86
+ if (MMX_enabled)
+ src = new VDPixmapGen_B8x3_To_B8G8_R8G8_MMX;
+#endif
+
+ if (!src)
+ src = new VDPixmapGen_B8x3_To_B8G8_R8G8;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ MarkDependency(src, args[1].mpSrc);
+ MarkDependency(src, args[2].mpSrc);
+ args[0] = StackEntry(src, 0);
+ mStack.pop_back();
+ mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::interleave_G8B8_G8R8() {
+ StackEntry *args = &mStack.back() - 2;
+ VDPixmapGen_B8x3_To_G8B8_G8R8 *src = NULL;
+
+#if VD_CPU_X86
+ if (MMX_enabled)
+ src = new VDPixmapGen_B8x3_To_G8B8_G8R8_MMX;
+#endif
+
+ if (!src)
+ src = new VDPixmapGen_B8x3_To_G8B8_G8R8;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ MarkDependency(src, args[1].mpSrc);
+ MarkDependency(src, args[2].mpSrc);
+ args[0] = StackEntry(src, 0);
+ mStack.pop_back();
+ mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::interleave_X8R8G8B8() {
+ StackEntry *args = &mStack.back() - 2;
+ VDPixmapGen_B8x3_To_X8R8G8B8 *src = new VDPixmapGen_B8x3_To_X8R8G8B8;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ MarkDependency(src, args[1].mpSrc);
+ MarkDependency(src, args[2].mpSrc);
+ args[0] = StackEntry(src, 0);
+ mStack.pop_back();
+ mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::interleave_B8R8() {
+ StackEntry *args = &mStack.back() - 1;
+
+#if VD_CPU_X86
+ VDPixmapGen_B8x2_To_B8R8 *src = MMX_enabled ? new VDPixmapGen_B8x2_To_B8R8_MMX : new VDPixmapGen_B8x2_To_B8R8;
+#else
+ VDPixmapGen_B8x2_To_B8R8 *src = new VDPixmapGen_B8x2_To_B8R8;
+#endif
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ MarkDependency(src, args[1].mpSrc);
+ args[0] = StackEntry(src, 0);
+ mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::ycbcr601_to_rgb32() {
+ StackEntry *args = &mStack.back() - 2;
+
+#ifdef VD_CPU_X86
+ VDPixmapGenYCbCr601ToRGB32 *src = MMX_enabled ? new VDPixmapGenYCbCr601ToRGB32_MMX : new VDPixmapGenYCbCr601ToRGB32;
+#else
+ VDPixmapGenYCbCr601ToRGB32 *src = new VDPixmapGenYCbCr601ToRGB32;
+#endif
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ MarkDependency(src, args[1].mpSrc);
+ MarkDependency(src, args[2].mpSrc);
+ args[0] = StackEntry(src, 0);
+ mStack.pop_back();
+ mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::ycbcr709_to_rgb32() {
+ StackEntry *args = &mStack.back() - 2;
+
+ VDPixmapGenYCbCr709ToRGB32 *src = new VDPixmapGenYCbCr709ToRGB32;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ MarkDependency(src, args[1].mpSrc);
+ MarkDependency(src, args[2].mpSrc);
+ args[0] = StackEntry(src, 0);
+ mStack.pop_back();
+ mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::rgb32_to_ycbcr601() {
+ StackEntry *args = &mStack.back();
+#ifdef VD_CPU_X86
+ VDPixmapGenRGB32ToYCbCr601 *src = SSE2_enabled ? new VDPixmapGenRGB32ToYCbCr601_SSE2 : new VDPixmapGenRGB32ToYCbCr601;
+#else
+ VDPixmapGenRGB32ToYCbCr601 *src = new VDPixmapGenRGB32ToYCbCr601;
+#endif
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+ mStack.push_back(StackEntry(src, 1));
+ mStack.push_back(StackEntry(src, 2));
+}
+
+void VDPixmapUberBlitterGenerator::rgb32_to_ycbcr709() {
+ StackEntry *args = &mStack.back();
+ VDPixmapGenRGB32ToYCbCr709 *src = new VDPixmapGenRGB32ToYCbCr709;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+ mStack.push_back(StackEntry(src, 1));
+ mStack.push_back(StackEntry(src, 2));
+}
+
+void VDPixmapUberBlitterGenerator::ycbcr601_to_rgb32_32f() {
+ StackEntry *args = &mStack.back() - 2;
+
+ VDPixmapGenYCbCr601ToRGB32F *src = new VDPixmapGenYCbCr601ToRGB32F;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ MarkDependency(src, args[1].mpSrc);
+ MarkDependency(src, args[2].mpSrc);
+ args[0] = StackEntry(src, 0);
+ mStack.pop_back();
+ mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::ycbcr709_to_rgb32_32f() {
+ StackEntry *args = &mStack.back() - 2;
+
+ VDPixmapGenYCbCr709ToRGB32F *src = new VDPixmapGenYCbCr709ToRGB32F;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ MarkDependency(src, args[1].mpSrc);
+ MarkDependency(src, args[2].mpSrc);
+ args[0] = StackEntry(src, 0);
+ mStack.pop_back();
+ mStack.pop_back();
+}
+
+void VDPixmapUberBlitterGenerator::rgb32_to_ycbcr601_32f() {
+ StackEntry *args = &mStack.back();
+ VDPixmapGenRGB32FToYCbCr601 *src = new VDPixmapGenRGB32FToYCbCr601;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+ mStack.push_back(StackEntry(src, 1));
+ mStack.push_back(StackEntry(src, 2));
+}
+
+void VDPixmapUberBlitterGenerator::rgb32_to_ycbcr709_32f() {
+ StackEntry *args = &mStack.back();
+ VDPixmapGenRGB32FToYCbCr709 *src = new VDPixmapGenRGB32FToYCbCr709;
+
+ src->Init(args[0].mpSrc, args[0].mSrcIndex);
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ args[0] = StackEntry(src, 0);
+ mStack.push_back(StackEntry(src, 1));
+ mStack.push_back(StackEntry(src, 2));
+}
+
+void VDPixmapUberBlitterGenerator::ycbcr601_to_ycbcr709() {
+ StackEntry *args = &mStack.back() - 2;
+
+ IVDPixmapGen *src;
+ if ((args[0].mpSrc->GetType(args[0].mSrcIndex) & kVDPixType_Mask) == kVDPixType_32F_LE) {
+ VDPixmapGenYCbCr601ToYCbCr709_32F *src2 = new VDPixmapGenYCbCr601ToYCbCr709_32F;
+
+ src2->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+ src = src2;
+ } else {
+ VDPixmapGenYCbCr601ToYCbCr709 *src2 = new VDPixmapGenYCbCr601ToYCbCr709;
+
+ src2->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+ src = src2;
+ }
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ MarkDependency(src, args[1].mpSrc);
+ MarkDependency(src, args[2].mpSrc);
+ args[0] = StackEntry(src, 0);
+ args[1] = StackEntry(src, 1);
+ args[2] = StackEntry(src, 2);
+}
+
+void VDPixmapUberBlitterGenerator::ycbcr709_to_ycbcr601() {
+ StackEntry *args = &mStack.back() - 2;
+
+ IVDPixmapGen *src;
+ if ((args[0].mpSrc->GetType(args[0].mSrcIndex) & kVDPixType_Mask) == kVDPixType_32F_LE) {
+ VDPixmapGenYCbCr709ToYCbCr601_32F *src2 = new VDPixmapGenYCbCr709ToYCbCr601_32F;
+
+ src2->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+ src = src2;
+ } else {
+ VDPixmapGenYCbCr709ToYCbCr601 *src2 = new VDPixmapGenYCbCr709ToYCbCr601;
+
+ src2->Init(args[0].mpSrc, args[0].mSrcIndex, args[1].mpSrc, args[1].mSrcIndex, args[2].mpSrc, args[2].mSrcIndex);
+ src = src2;
+ }
+
+ mGenerators.push_back(src);
+ MarkDependency(src, args[0].mpSrc);
+ MarkDependency(src, args[1].mpSrc);
+ MarkDependency(src, args[2].mpSrc);
+ args[0] = StackEntry(src, 0);
+ args[1] = StackEntry(src, 1);
+ args[2] = StackEntry(src, 2);
+}
+
+IVDPixmapBlitter *VDPixmapUberBlitterGenerator::create() {
+ vdautoptr<VDPixmapUberBlitter> blitter(new VDPixmapUberBlitter);
+
+ int numStackEntries = (int)mStack.size();
+
+ for(int i=0; i<3; ++i) {
+ if (i < numStackEntries) {
+ blitter->mOutputs[i].mpSrc = mStack[i].mpSrc;
+ blitter->mOutputs[i].mSrcIndex = mStack[i].mSrcIndex;
+ } else {
+ blitter->mOutputs[i].mpSrc = NULL;
+ blitter->mOutputs[i].mSrcIndex = 0;
+ }
+ }
+
+ mStack.clear();
+
+ // If this blitter has three outputs, determine if outputs 1 and 2 are independent
+ // from output 0.
+ blitter->mbIndependentChromaPlanes = true;
+ blitter->mbIndependentPlanes = true;
+ if (numStackEntries >= 3) {
+ int numGens = mGenerators.size();
+ vdfastvector<uint8> genflags(numGens, 0);
+
+ enum {
+ kFlagStateful = 0x80,
+ kFlagY = 0x01,
+ kFlagCb = 0x02,
+ kFlagCr = 0x04,
+ kFlagYCbCr = 0x07
+ };
+
+ for(int i=0; i<3; ++i)
+ genflags[std::find(mGenerators.begin(), mGenerators.end(), blitter->mOutputs[i].mpSrc) - mGenerators.begin()] |= (1 << i);
+
+ for(int i=0; i<numGens; ++i) {
+ IVDPixmapGen *gen = mGenerators[i];
+
+ if (gen->IsStateful())
+ genflags[i] |= kFlagStateful;
+ }
+
+ while(!mDependencies.empty()) {
+ const Dependency& dep = mDependencies.back();
+
+ genflags[dep.mSrcIdx] |= (genflags[dep.mDstIdx] & ~kFlagStateful);
+
+ mDependencies.pop_back();
+ }
+
+ for(int i=0; i<numGens; ++i) {
+ uint8 flags = genflags[i];
+
+ if (!(flags & kFlagStateful))
+ continue;
+
+ switch(flags & kFlagYCbCr) {
+ case 0:
+ case kFlagY:
+ case kFlagCb:
+ case kFlagCr:
+ break;
+ case kFlagCr | kFlagCb:
+ blitter->mbIndependentPlanes = false;
+ break;
+ case kFlagCb | kFlagY:
+ case kFlagCr | kFlagY:
+ case kFlagCr | kFlagCb | kFlagY:
+ blitter->mbIndependentPlanes = false;
+ blitter->mbIndependentChromaPlanes = false;
+ break;
+ }
+ }
+ } else if (numStackEntries >= 2) {
+ int numGens = mGenerators.size();
+ vdfastvector<uint8> genflags(numGens, 0);
+
+ enum {
+ kFlagStateful = 0x80,
+ kFlagY = 0x01,
+ kFlagC = 0x02,
+ kFlagYC = 0x03
+ };
+
+ for(int i=0; i<2; ++i)
+ genflags[std::find(mGenerators.begin(), mGenerators.end(), blitter->mOutputs[i].mpSrc) - mGenerators.begin()] |= (1 << i);
+
+ for(int i=0; i<numGens; ++i) {
+ IVDPixmapGen *gen = mGenerators[i];
+
+ if (gen->IsStateful())
+ genflags[i] |= kFlagStateful;
+ }
+
+ while(!mDependencies.empty()) {
+ const Dependency& dep = mDependencies.back();
+
+ genflags[dep.mSrcIdx] |= (genflags[dep.mDstIdx] & ~kFlagStateful);
+
+ mDependencies.pop_back();
+ }
+
+ for(int i=0; i<numGens; ++i) {
+ uint8 flags = genflags[i];
+
+ if (!(flags & kFlagStateful))
+ continue;
+
+ switch(flags & kFlagYC) {
+ case kFlagYC:
+ blitter->mbIndependentPlanes = false;
+ blitter->mbIndependentChromaPlanes = false;
+ break;
+ }
+ }
+ }
+
+ blitter->mGenerators.swap(mGenerators);
+ blitter->mSources.swap(mSources);
+ return blitter.release();
+}
+
+void VDPixmapUberBlitterGenerator::MarkDependency(IVDPixmapGen *dst, IVDPixmapGen *src) {
+ Generators::const_iterator it1(std::find(mGenerators.begin(), mGenerators.end(), dst));
+ Generators::const_iterator it2(std::find(mGenerators.begin(), mGenerators.end(), src));
+
+ VDASSERT(it1 != mGenerators.end());
+ VDASSERT(it2 != mGenerators.end());
+
+ int idx1 = it1 - mGenerators.begin();
+ int idx2 = it2 - mGenerators.begin();
+
+ Dependency dep = { idx1, idx2 };
+
+ mDependencies.push_back(dep);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample.cpp
new file mode 100644
index 000000000..1363fb730
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample.cpp
@@ -0,0 +1,623 @@
+#include <float.h>
+#include <math.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/system/memory.h>
+#include <vd2/system/math.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/Kasumi/pixmap.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vd2/Kasumi/resample.h>
+
+#include <vd2/Kasumi/resample_kernels.h>
+#include "resample_stages_x86.h"
+#include "uberblit_resample.h"
+
+namespace {
+ sint32 scale32x32_fp16(sint32 x, sint32 y) {
+ return (sint32)(((sint64)x * y + 0x8000) >> 16);
+ }
+
+ template<class T>
+ IVDResamplerSeparableRowStage *RowFactory(double cutoff, float filterFactor) {
+ return new T;
+ }
+
+ template<class T>
+ IVDResamplerSeparableRowStage *RowFactoryLinear(double cutoff, float filterFactor) {
+ return new T(VDResamplerLinearFilter(cutoff));
+ }
+
+ template<class T>
+ IVDResamplerSeparableRowStage *RowFactoryCubic(double cutoff, float filterFactor) {
+ return new T(VDResamplerCubicFilter(cutoff, filterFactor));
+ }
+
+ template<class T>
+ IVDResamplerSeparableRowStage *RowFactoryCubic2(double cutoff, float filterFactor) {
+ return new T(filterFactor);
+ }
+
+ template<class T>
+ IVDResamplerSeparableRowStage *RowFactoryLanczos3(double cutoff, float filterFactor) {
+ return new T(VDResamplerLanczos3Filter(cutoff));
+ }
+
+ template<class T>
+ IVDResamplerSeparableColStage *ColFactory(double cutoff, float filterFactor) {
+ return new T;
+ }
+
+ template<class T>
+ IVDResamplerSeparableColStage *ColFactoryLinear(double cutoff, float filterFactor) {
+ return new T(VDResamplerLinearFilter(cutoff));
+ }
+
+ template<class T>
+ IVDResamplerSeparableColStage *ColFactoryCubic(double cutoff, float filterFactor) {
+ return new T(VDResamplerCubicFilter(cutoff, filterFactor));
+ }
+
+ template<class T>
+ IVDResamplerSeparableColStage *ColFactoryCubic2(double cutoff, float filterFactor) {
+ return new T(filterFactor);
+ }
+
+ template<class T>
+ IVDResamplerSeparableColStage *ColFactoryLanczos3(double cutoff, float filterFactor) {
+ return new T(VDResamplerLanczos3Filter(cutoff));
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VDPixmapGenResampleRow
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDPixmapGenResampleRow::VDPixmapGenResampleRow()
+ : mpRowStage(NULL)
+ , mpRowStage2(NULL)
+{
+}
+
+VDPixmapGenResampleRow::~VDPixmapGenResampleRow() {
+ if (mpRowStage)
+ delete mpRowStage;
+}
+
+void VDPixmapGenResampleRow::Init(IVDPixmapGen *src, uint32 srcIndex, uint32 width, float offset, float step, nsVDPixmap::FilterMode filterMode, float filterFactor, bool interpolationOnly) {
+ InitSource(src, srcIndex);
+
+ sint32 u0 = (sint32)(offset * 65536.0);
+ sint32 dudx = (sint32)(step * 65536.0);
+
+ mAxis.Init(dudx);
+
+ double x_2fc = 1.0;
+ if (!interpolationOnly && step > 1.0f)
+ x_2fc = 1.0 / step;
+
+ struct SpecialCaseSpanRoutine {
+ sint32 mPhase;
+ sint32 mStep;
+ uint32 mType;
+ nsVDPixmap::FilterMode mFilterMode;
+ uint32 mCPUFlags;
+ IVDResamplerSeparableRowStage *(*mpClassFactory)(double filterCutoff, float filterFactor);
+ };
+
+ static const SpecialCaseSpanRoutine kSpecialCaseSpanRoutines[]={
+ // Generic
+#if defined _M_IX86
+ { +0x0000, 0x008000, kVDPixType_8, nsVDPixmap::kFilterLinear, CPUF_SUPPORTS_INTEGER_SSE, RowFactory<VDResamplerRowStageSeparableLinear8_phaseZeroStepHalf_ISSE> },
+#endif
+
+ { +0x0000, 0x008000, kVDPixType_8, nsVDPixmap::kFilterLinear, 0, RowFactory<VDResamplerRowStageSeparableLinear8_phaseZeroStepHalf> },
+ };
+
+ long flags = CPUGetEnabledExtensions();
+ uint32 type = mpSrc->GetType(mSrcIndex) & kVDPixType_Mask;
+
+ for(int i=0; i<sizeof(kSpecialCaseSpanRoutines)/sizeof(kSpecialCaseSpanRoutines[0]); ++i) {
+ const SpecialCaseSpanRoutine& rout = kSpecialCaseSpanRoutines[i];
+
+ if (rout.mType != type)
+ continue;
+
+ if (x_2fc < 1.0)
+ continue;
+
+ if (rout.mStep != dudx)
+ continue;
+
+ if (rout.mPhase != u0)
+ continue;
+
+ if (rout.mFilterMode != filterMode)
+ continue;
+
+ if ((rout.mCPUFlags & flags) != rout.mCPUFlags)
+ continue;
+
+ mpRowStage = rout.mpClassFactory(x_2fc, filterFactor);
+ mpRowStage2 = mpRowStage->AsRowStage2();
+ break;
+ }
+
+ if (!mpRowStage) {
+ struct SpanRoutine {
+ uint32 mType;
+ bool mbInterpOnly;
+ nsVDPixmap::FilterMode mFilterMode;
+ uint32 mCPUFlags;
+ IVDResamplerSeparableRowStage *(*mpClassFactory)(double filterCutoff, float filterFactor);
+ };
+
+ static const SpanRoutine kSpanRoutines[]={
+#if defined _M_IX86
+ // X86
+ { kVDPixType_8888, false, nsVDPixmap::kFilterPoint, CPUF_SUPPORTS_MMX, RowFactory<VDResamplerSeparablePointRowStageMMX> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterPoint, 0, RowFactory<VDResamplerSeparablePointRowStageX86> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterLinear, CPUF_SUPPORTS_SSE41, RowFactoryLinear<VDResamplerSeparableTableRowStage8SSE41> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterLinear, CPUF_SUPPORTS_MMX, RowFactoryLinear<VDResamplerSeparableTableRowStage8MMX> },
+ { kVDPixType_8888, true, nsVDPixmap::kFilterLinear, CPUF_SUPPORTS_MMX, RowFactory<VDResamplerSeparableLinearRowStageMMX> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterLinear, CPUF_SUPPORTS_SSE2, RowFactoryLinear<VDResamplerSeparableTableRowStageSSE2> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterLinear, CPUF_SUPPORTS_MMX, RowFactoryLinear<VDResamplerSeparableTableRowStageMMX> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterCubic, CPUF_SUPPORTS_SSE41, RowFactoryCubic<VDResamplerSeparableTableRowStage8SSE41> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterCubic, CPUF_SUPPORTS_MMX, RowFactoryCubic<VDResamplerSeparableTableRowStage8MMX> },
+ { kVDPixType_8888, true, nsVDPixmap::kFilterCubic, CPUF_SUPPORTS_MMX, RowFactoryCubic2<VDResamplerSeparableCubicRowStageMMX> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterCubic, CPUF_SUPPORTS_SSE2, RowFactoryCubic<VDResamplerSeparableTableRowStageSSE2> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterCubic, CPUF_SUPPORTS_MMX, RowFactoryCubic<VDResamplerSeparableTableRowStageMMX> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterLanczos3, CPUF_SUPPORTS_SSE41, RowFactoryLanczos3<VDResamplerSeparableTableRowStage8SSE41> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterLanczos3, CPUF_SUPPORTS_MMX, RowFactoryLanczos3<VDResamplerSeparableTableRowStage8MMX> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterLanczos3, CPUF_SUPPORTS_SSE2, RowFactoryLanczos3<VDResamplerSeparableTableRowStageSSE2> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterLanczos3, CPUF_SUPPORTS_MMX, RowFactoryLanczos3<VDResamplerSeparableTableRowStageMMX> },
+#elif defined _M_AMD64
+ // AMD64
+ { kVDPixType_8888, false, nsVDPixmap::kFilterLinear, CPUF_SUPPORTS_SSE2, RowFactoryLinear<VDResamplerSeparableTableRowStageSSE2> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterCubic, CPUF_SUPPORTS_SSE2, RowFactoryCubic<VDResamplerSeparableTableRowStageSSE2> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterLanczos3, CPUF_SUPPORTS_SSE2, RowFactoryLanczos3<VDResamplerSeparableTableRowStageSSE2> },
+#endif
+ // Generic
+ { kVDPixType_8, false, nsVDPixmap::kFilterPoint, 0, RowFactory<VDResamplerRowStageSeparablePoint8> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterPoint, 0, RowFactory<VDResamplerRowStageSeparablePoint32> },
+ { kVDPixType_8, true, nsVDPixmap::kFilterLinear, 0, RowFactory<VDResamplerRowStageSeparableLinear8> },
+ { kVDPixType_8888, true, nsVDPixmap::kFilterLinear, 0, RowFactory<VDResamplerRowStageSeparableLinear32> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterLinear, 0, RowFactoryLinear<VDResamplerRowStageSeparableTable8> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterLinear, 0, RowFactoryLinear<VDResamplerRowStageSeparableTable32> },
+ { kVDPixType_32F_LE, false, nsVDPixmap::kFilterLinear, 0, RowFactoryLinear<VDResamplerRowStageSeparableTable32F> },
+ { kVDPixType_32Fx4_LE, false, nsVDPixmap::kFilterLinear, 0, RowFactoryLinear<VDResamplerRowStageSeparableTable32Fx4> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterCubic, 0, RowFactoryCubic<VDResamplerRowStageSeparableTable8> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterCubic, 0, RowFactoryCubic<VDResamplerRowStageSeparableTable32> },
+ { kVDPixType_32F_LE, false, nsVDPixmap::kFilterCubic, 0, RowFactoryCubic<VDResamplerRowStageSeparableTable32F> },
+ { kVDPixType_32Fx4_LE, false, nsVDPixmap::kFilterCubic, 0, RowFactoryCubic<VDResamplerRowStageSeparableTable32Fx4> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterLanczos3, 0, RowFactoryLanczos3<VDResamplerRowStageSeparableTable8> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterLanczos3, 0, RowFactoryLanczos3<VDResamplerRowStageSeparableTable32> },
+ { kVDPixType_32F_LE, false, nsVDPixmap::kFilterLanczos3, 0, RowFactoryLanczos3<VDResamplerRowStageSeparableTable32F> },
+ { kVDPixType_32Fx4_LE, false, nsVDPixmap::kFilterLanczos3, 0, RowFactoryLanczos3<VDResamplerRowStageSeparableTable32Fx4> },
+ };
+
+ for(int i=0; i<sizeof(kSpanRoutines)/sizeof(kSpanRoutines[0]); ++i) {
+ const SpanRoutine& rout = kSpanRoutines[i];
+
+ if (rout.mType != type)
+ continue;
+
+ if (rout.mbInterpOnly && x_2fc < 1.0)
+ continue;
+
+ if (rout.mFilterMode != filterMode)
+ continue;
+
+ if ((rout.mCPUFlags & flags) != rout.mCPUFlags)
+ continue;
+
+ mpRowStage = rout.mpClassFactory(x_2fc, filterFactor);
+ mpRowStage2 = mpRowStage->AsRowStage2();
+ break;
+ }
+ }
+
+ VDASSERT(mpRowStage);
+
+ mRowFiltW = mpRowStage->GetWindowSize();
+
+ mpSrc->AddWindowRequest(0, 0);
+
+ sint32 fsx1 = (sint32)(offset * 65536.0) - ((mRowFiltW-1) << 15);
+ mAxis.Compute(width, fsx1, mSrcWidth, mRowFiltW);
+ mWidth = width;
+
+ switch(type) {
+ case kVDPixType_8:
+ mBytesPerSample = 1;
+ break;
+ case kVDPixType_8888:
+ case kVDPixType_32F_LE:
+ mBytesPerSample = 4;
+ break;
+ case kVDPixType_32Fx4_LE:
+ mBytesPerSample = 16;
+ break;
+
+ default:
+ VDASSERT(false);
+ }
+}
+
+void VDPixmapGenResampleRow::Start() {
+ StartWindow(mWidth * mBytesPerSample);
+
+ uint32 clipSpace = ((mRowFiltW*3*mBytesPerSample + 15) >> 4) << 2;
+ mTempSpace.resize(clipSpace);
+
+ if (mpRowStage2)
+ mpRowStage2->Init(mAxis, mSrcWidth);
+}
+
+void VDPixmapGenResampleRow::Compute(void *dst0, sint32 y) {
+ switch(mBytesPerSample) {
+ case 1:
+ Compute8(dst0, y);
+ break;
+ case 4:
+ Compute32(dst0, y);
+ break;
+ case 16:
+ Compute128(dst0, y);
+ break;
+ }
+}
+
+void VDPixmapGenResampleRow::Compute8(void *dst0, sint32 y) {
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+ uint8 *dst = (uint8 *)dst0;
+
+ // process pre-copy region
+ if (uint32 count = mAxis.dx_precopy) {
+ VDMemset8(dst, src[0], count);
+ dst += count;
+ }
+
+ uint8 *p = (uint8*)mTempSpace.data();
+ sint32 u = mAxis.u;
+ const sint32 dudx = mAxis.dudx;
+
+ // process dual-clip region
+ if (mpRowStage2) {
+ uint32 count = mAxis.dx_preclip + mAxis.dx_active + mAxis.dx_postclip + mAxis.dx_dualclip;
+ mpRowStage2->Process(dst, src, count);
+ dst += count;
+ } else if (uint32 count = mAxis.dx_dualclip) {
+ VDMemset8(p, src[0], mRowFiltW);
+ memcpy(p + mRowFiltW, src+1, (mSrcWidth-2));
+ VDMemset8(p + mRowFiltW + (mSrcWidth-2), src[mSrcWidth-1], mRowFiltW);
+
+ mpRowStage->Process(dst, p, count, u + ((mRowFiltW-1)<<16), dudx);
+ u += dudx*count;
+ dst += count;
+ } else {
+ // process pre-clip region
+ if (uint32 count = mAxis.dx_preclip) {
+ VDMemset8(p, src[0], mRowFiltW);
+ memcpy(p + mRowFiltW, src+1, (mRowFiltW-1));
+
+ mpRowStage->Process(dst, p, count, u + ((mRowFiltW-1)<<16), dudx);
+ u += dudx*count;
+ dst += count;
+ }
+
+ // process active region
+ if (uint32 count = mAxis.dx_active) {
+ mpRowStage->Process(dst, src, count, u, dudx);
+ u += dudx*count;
+ dst += count;
+ }
+
+ // process post-clip region
+ if (uint32 count = mAxis.dx_postclip) {
+ uint32 offset = mSrcWidth + 1 - mRowFiltW;
+
+ memcpy(p, src+offset, (mRowFiltW-1));
+ VDMemset8(p + (mRowFiltW-1), src[mSrcWidth-1], mRowFiltW);
+
+ mpRowStage->Process(dst, p, count, u - (offset<<16), dudx);
+ dst += count;
+ }
+ }
+
+ // process post-copy region
+ if (uint32 count = mAxis.dx_postcopy) {
+ VDMemset8(dst, src[mSrcWidth-1], count);
+ }
+}
+
+void VDPixmapGenResampleRow::Compute32(void *dst0, sint32 y) {
+ const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+ uint32 *dst = (uint32 *)dst0;
+
+ // process pre-copy region
+ if (uint32 count = mAxis.dx_precopy) {
+ VDMemset32(dst, src[0], count);
+ dst += count;
+ }
+
+ uint32 *p = mTempSpace.data();
+ sint32 u = mAxis.u;
+ const sint32 dudx = mAxis.dudx;
+
+ // process dual-clip region
+ if (uint32 count = mAxis.dx_dualclip) {
+ VDMemset32(p, src[0], mRowFiltW);
+ memcpy(p + mRowFiltW, src+1, (mSrcWidth-2)*sizeof(uint32));
+ VDMemset32(p + mRowFiltW + (mSrcWidth-2), src[mSrcWidth-1], mRowFiltW);
+
+ mpRowStage->Process(dst, p, count, u + ((mRowFiltW-1)<<16), dudx);
+ u += dudx*count;
+ dst += count;
+ } else if (mpRowStage2) {
+ mpRowStage2->Process(dst, p, mAxis.dx_preclip + mAxis.dx_active + mAxis.dx_postclip);
+ } else {
+ // process pre-clip region
+ if (uint32 count = mAxis.dx_preclip) {
+ VDMemset32(p, src[0], mRowFiltW);
+ memcpy(p + mRowFiltW, src+1, (mRowFiltW-1)*sizeof(uint32));
+
+ mpRowStage->Process(dst, p, count, u + ((mRowFiltW-1)<<16), dudx);
+ u += dudx*count;
+ dst += count;
+ }
+
+ // process active region
+ if (uint32 count = mAxis.dx_active) {
+ mpRowStage->Process(dst, src, count, u, dudx);
+ u += dudx*count;
+ dst += count;
+ }
+
+ // process post-clip region
+ if (uint32 count = mAxis.dx_postclip) {
+ uint32 offset = mSrcWidth + 1 - mRowFiltW;
+
+ memcpy(p, src+offset, (mRowFiltW-1)*sizeof(uint32));
+ VDMemset32(p + (mRowFiltW-1), src[mSrcWidth-1], mRowFiltW);
+
+ mpRowStage->Process(dst, p, count, u - (offset<<16), dudx);
+ dst += count;
+ }
+ }
+
+ // process post-copy region
+ if (uint32 count = mAxis.dx_postcopy) {
+ VDMemset32(dst, src[mSrcWidth-1], count);
+ }
+}
+
+void VDPixmapGenResampleRow::Compute128(void *dst0, sint32 y) {
+ const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+ uint32 *dst = (uint32 *)dst0;
+
+ // process pre-copy region
+ if (uint32 count = mAxis.dx_precopy) {
+ VDMemset128(dst, src, count);
+ dst += 4*count;
+ }
+
+ uint32 *p = mTempSpace.data();
+ sint32 u = mAxis.u;
+ const sint32 dudx = mAxis.dudx;
+
+ // process dual-clip region
+ if (uint32 count = mAxis.dx_dualclip) {
+ VDMemset128(p, src, mRowFiltW);
+ memcpy(p + 4*mRowFiltW, src+1, (mSrcWidth-2)*sizeof(uint32)*4);
+ VDMemset128(p + 4*(mRowFiltW + (mSrcWidth-2)), src + 4*(mSrcWidth-1), mRowFiltW);
+
+ mpRowStage->Process(dst, p, count, u + ((mRowFiltW-1)<<16), dudx);
+ u += dudx*count;
+ dst += count * 4;
+ } else if (mpRowStage2) {
+ mpRowStage2->Process(dst, p, mAxis.dx_preclip + mAxis.dx_active + mAxis.dx_postclip);
+ } else {
+ // process pre-clip region
+ if (uint32 count = mAxis.dx_preclip) {
+ VDMemset128(p, src, mRowFiltW);
+ memcpy(p + 4*mRowFiltW, src+1, (mRowFiltW-1)*sizeof(uint32)*4);
+
+ mpRowStage->Process(dst, p, count, u + ((mRowFiltW-1)<<16), dudx);
+ u += dudx*count;
+ dst += count*4;
+ }
+
+ // process active region
+ if (uint32 count = mAxis.dx_active) {
+ mpRowStage->Process(dst, src, count, u, dudx);
+ u += dudx*count;
+ dst += count*4;
+ }
+
+ // process post-clip region
+ if (uint32 count = mAxis.dx_postclip) {
+ uint32 offset = mSrcWidth + 1 - mRowFiltW;
+
+ memcpy(p, src+offset*4, (mRowFiltW-1)*sizeof(uint32)*4);
+ VDMemset128(p + 4*(mRowFiltW-1), src + 4*(mSrcWidth-1), mRowFiltW);
+
+ mpRowStage->Process(dst, p, count, u - (offset<<16), dudx);
+ dst += count*4;
+ }
+ }
+
+ // process post-copy region
+ if (uint32 count = mAxis.dx_postcopy) {
+ VDMemset128(dst, src + 4*(mSrcWidth-1), count);
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VDPixmapGenResampleCol
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDPixmapGenResampleCol::VDPixmapGenResampleCol()
+ : mpColStage(NULL)
+{
+}
+
+VDPixmapGenResampleCol::~VDPixmapGenResampleCol() {
+ if (mpColStage)
+ delete mpColStage;
+}
+
+void VDPixmapGenResampleCol::Init(IVDPixmapGen *src, uint32 srcIndex, uint32 height, float offset, float step, nsVDPixmap::FilterMode filterMode, float filterFactor, bool interpolationOnly) {
+ InitSource(src, srcIndex);
+
+ sint32 dvdy = (sint32)(step * 65536.0);
+
+ mAxis.Init(dvdy);
+
+ // construct stages
+ double y_2fc = 1.0;
+ if (!interpolationOnly && step > 1.0f)
+ y_2fc = 1.0 / step;
+
+ struct SpanRoutine {
+ uint32 mType;
+ bool mbInterpOnly;
+ nsVDPixmap::FilterMode mFilterMode;
+ uint32 mCPUFlags;
+ IVDResamplerSeparableColStage *(*mpClassFactory)(double filterCutoff, float filterFactor);
+ };
+
+ static const SpanRoutine kSpanRoutines[]={
+#if defined _M_IX86
+ // X86
+ { kVDPixType_8, false, nsVDPixmap::kFilterLinear, CPUF_SUPPORTS_SSE41, ColFactoryLinear<VDResamplerSeparableTableColStage8SSE41> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterLinear, CPUF_SUPPORTS_MMX, ColFactoryLinear<VDResamplerSeparableTableColStage8MMX> },
+ { kVDPixType_8888, true, nsVDPixmap::kFilterLinear, CPUF_SUPPORTS_MMX, ColFactory<VDResamplerSeparableLinearColStageMMX> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterLinear, CPUF_SUPPORTS_SSE2, ColFactoryLinear<VDResamplerSeparableTableColStageSSE2> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterLinear, CPUF_SUPPORTS_MMX, ColFactoryLinear<VDResamplerSeparableTableColStageMMX> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterCubic, CPUF_SUPPORTS_SSE41, ColFactoryCubic<VDResamplerSeparableTableColStage8SSE41> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterCubic, CPUF_SUPPORTS_MMX, ColFactoryCubic<VDResamplerSeparableTableColStage8MMX> },
+ { kVDPixType_8888, true, nsVDPixmap::kFilterCubic, CPUF_SUPPORTS_SSE2, ColFactoryCubic2<VDResamplerSeparableCubicColStageSSE2> },
+ { kVDPixType_8888, true, nsVDPixmap::kFilterCubic, CPUF_SUPPORTS_MMX, ColFactoryCubic2<VDResamplerSeparableCubicColStageMMX> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterCubic, CPUF_SUPPORTS_SSE2, ColFactoryCubic<VDResamplerSeparableTableColStageSSE2> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterCubic, CPUF_SUPPORTS_MMX, ColFactoryCubic<VDResamplerSeparableTableColStageMMX> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterLanczos3, CPUF_SUPPORTS_SSE41, ColFactoryLanczos3<VDResamplerSeparableTableColStage8SSE41> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterLanczos3, CPUF_SUPPORTS_MMX, ColFactoryLanczos3<VDResamplerSeparableTableColStage8MMX> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterLanczos3, CPUF_SUPPORTS_SSE2, ColFactoryLanczos3<VDResamplerSeparableTableColStageSSE2> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterLanczos3, CPUF_SUPPORTS_MMX, ColFactoryLanczos3<VDResamplerSeparableTableColStageMMX> },
+#elif defined _M_AMD64
+ // AMD64
+ { kVDPixType_8888, false, nsVDPixmap::kFilterLinear, CPUF_SUPPORTS_SSE2, ColFactoryLinear<VDResamplerSeparableTableColStageSSE2> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterCubic, CPUF_SUPPORTS_SSE2, ColFactoryCubic<VDResamplerSeparableTableColStageSSE2> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterLanczos3, CPUF_SUPPORTS_SSE2, ColFactoryLanczos3<VDResamplerSeparableTableColStageSSE2> },
+#endif
+ // Generic
+ { kVDPixType_8, true, nsVDPixmap::kFilterLinear, 0, ColFactory<VDResamplerColStageSeparableLinear8> },
+ { kVDPixType_8888, true, nsVDPixmap::kFilterLinear, 0, ColFactory<VDResamplerColStageSeparableLinear32> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterLinear, 0, ColFactoryLinear<VDResamplerColStageSeparableTable8> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterLinear, 0, ColFactoryLinear<VDResamplerColStageSeparableTable32> },
+ { kVDPixType_32F_LE, false, nsVDPixmap::kFilterLinear, 0, ColFactoryLinear<VDResamplerColStageSeparableTable32F> },
+ { kVDPixType_32Fx4_LE, false, nsVDPixmap::kFilterLinear, 0, ColFactoryLinear<VDResamplerColStageSeparableTable32Fx4> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterCubic, 0, ColFactoryCubic<VDResamplerColStageSeparableTable8> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterCubic, 0, ColFactoryCubic<VDResamplerColStageSeparableTable32> },
+ { kVDPixType_32F_LE, false, nsVDPixmap::kFilterCubic, 0, ColFactoryCubic<VDResamplerColStageSeparableTable32F> },
+ { kVDPixType_32Fx4_LE, false, nsVDPixmap::kFilterCubic, 0, ColFactoryCubic<VDResamplerColStageSeparableTable32Fx4> },
+ { kVDPixType_8, false, nsVDPixmap::kFilterLanczos3, 0, ColFactoryLanczos3<VDResamplerColStageSeparableTable8> },
+ { kVDPixType_8888, false, nsVDPixmap::kFilterLanczos3, 0, ColFactoryLanczos3<VDResamplerColStageSeparableTable32> },
+ { kVDPixType_32F_LE, false, nsVDPixmap::kFilterLanczos3, 0, ColFactoryLanczos3<VDResamplerColStageSeparableTable32F> },
+ { kVDPixType_32Fx4_LE, false, nsVDPixmap::kFilterLanczos3, 0, ColFactoryLanczos3<VDResamplerColStageSeparableTable32Fx4> },
+ };
+
+ long flags = CPUGetEnabledExtensions();
+ uint32 type = src->GetType(srcIndex) & kVDPixType_Mask;
+ for(int i=0; i<sizeof(kSpanRoutines)/sizeof(kSpanRoutines[0]); ++i) {
+ const SpanRoutine& rout = kSpanRoutines[i];
+
+ if (rout.mType != type)
+ continue;
+
+ if (rout.mbInterpOnly && y_2fc < 1.0)
+ continue;
+
+ if (rout.mFilterMode != filterMode)
+ continue;
+
+ if ((rout.mCPUFlags & flags) != rout.mCPUFlags)
+ continue;
+
+ mpColStage = rout.mpClassFactory(y_2fc, filterFactor);
+ break;
+ }
+
+ mWinSize = mpColStage ? mpColStage->GetWindowSize() : 1;
+ mWindow.resize(mWinSize);
+
+ int delta = (mWinSize + 1) >> 1;
+ mpSrc->AddWindowRequest(-delta, delta);
+
+ sint32 fsy1 = (sint32)(offset * 65536.0) - ((mWinSize-1)<<15);
+ mAxis.Compute(height, fsy1, mSrcHeight, mWinSize);
+ mHeight = height;
+
+ switch(type) {
+ case kVDPixType_8:
+ mBytesPerSample = 1;
+ break;
+ case kVDPixType_8888:
+ case kVDPixType_32F_LE:
+ mBytesPerSample = 4;
+ break;
+ case kVDPixType_32Fx4_LE:
+ mBytesPerSample = 16;
+ break;
+
+ default:
+ VDASSERT(false);
+ }
+}
+
+void VDPixmapGenResampleCol::Start() {
+ mBytesPerRow = mWidth * mBytesPerSample;
+ StartWindow(mBytesPerRow);
+}
+
+void VDPixmapGenResampleCol::Compute(void *dst0, sint32 y) {
+ const uint32 winsize = mWinSize;
+ const uint32 dx = mSrcWidth;
+
+ y -= (sint32)mAxis.dx_precopy;
+
+ if (y < 0) {
+ const void *srcrow0 = mpSrc->GetRow(0, mSrcIndex);
+ memcpy(dst0, srcrow0, mBytesPerRow);
+ return;
+ }
+
+ uint32 midrange = mAxis.dx_preclip + mAxis.dx_active + mAxis.dx_postclip + mAxis.dx_dualclip;
+
+ if (y < (sint32)midrange) {
+ sint32 v = mAxis.u + mAxis.dudx * y;
+
+ if (mpColStage) {
+ for(uint32 i=0; i<winsize; ++i) {
+ int sy = (v >> 16) + i;
+
+ if ((unsigned)sy >= (unsigned)mSrcHeight)
+ sy = (~sy >> 31) & (mSrcHeight - 1);
+
+ mWindow[i] = mpSrc->GetRow(sy, mSrcIndex);
+ }
+
+ mpColStage->Process(dst0, mWindow.data(), dx, v);
+ } else
+ memcpy(dst0, mpSrc->GetRow(v >> 16, mSrcIndex), mBytesPerRow);
+ return;
+ }
+
+ const void *p = mpSrc->GetRow(mSrcHeight - 1, mSrcIndex);
+
+ memcpy(dst0, p, mBytesPerRow);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample_special.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample_special.cpp
new file mode 100644
index 000000000..0c649dd5c
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample_special.cpp
@@ -0,0 +1,186 @@
+#include "uberblit_resample_special.h"
+#include "blt_spanutils.h"
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGenResampleRow_d2_p0_lin_u8::Init(IVDPixmapGen *src, uint32 srcIndex) {
+ InitSource(src, srcIndex);
+ src->AddWindowRequest(0, 0);
+
+ mWidth = (mSrcWidth + 1) >> 1;
+}
+
+void VDPixmapGenResampleRow_d2_p0_lin_u8::Start() {
+ mpSrc->Start();
+ StartWindow(mWidth);
+}
+
+void VDPixmapGenResampleRow_d2_p0_lin_u8::Compute(void *dst0, sint32 y) {
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+ nsVDPixmapSpanUtils::horiz_compress2x_coaligned((uint8 *)dst0, src, mSrcWidth);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGenResampleRow_d4_p0_lin_u8::Init(IVDPixmapGen *src, uint32 srcIndex) {
+ InitSource(src, srcIndex);
+ src->AddWindowRequest(0, 0);
+
+ mWidth = (mSrcWidth + 3) >> 2;
+}
+
+void VDPixmapGenResampleRow_d4_p0_lin_u8::Start() {
+ mpSrc->Start();
+ StartWindow(mWidth);
+}
+
+void VDPixmapGenResampleRow_d4_p0_lin_u8::Compute(void *dst0, sint32 y) {
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+ nsVDPixmapSpanUtils::horiz_compress4x_coaligned((uint8 *)dst0, src, mSrcWidth);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGenResampleRow_x2_p0_lin_u8::Init(IVDPixmapGen *src, uint32 srcIndex) {
+ InitSource(src, srcIndex);
+ src->AddWindowRequest(0, 0);
+
+ mWidth = mSrcWidth * 2;
+}
+
+void VDPixmapGenResampleRow_x2_p0_lin_u8::Start() {
+ mpSrc->Start();
+ StartWindow(mWidth);
+}
+
+void VDPixmapGenResampleRow_x2_p0_lin_u8::Compute(void *dst0, sint32 y) {
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+ nsVDPixmapSpanUtils::horiz_expand2x_coaligned((uint8 *)dst0, src, mWidth);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGenResampleRow_x4_p0_lin_u8::Init(IVDPixmapGen *src, uint32 srcIndex) {
+ InitSource(src, srcIndex);
+ src->AddWindowRequest(0, 0);
+
+ mWidth = mSrcWidth * 4;
+}
+
+void VDPixmapGenResampleRow_x4_p0_lin_u8::Start() {
+ mpSrc->Start();
+ StartWindow(mWidth);
+}
+
+void VDPixmapGenResampleRow_x4_p0_lin_u8::Compute(void *dst0, sint32 y) {
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+ nsVDPixmapSpanUtils::horiz_expand4x_coaligned((uint8 *)dst0, src, mWidth);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGenResampleCol_x2_phalf_lin_u8::Init(IVDPixmapGen *src, uint32 srcIndex) {
+ InitSource(src, srcIndex);
+ src->AddWindowRequest(-2, 2);
+
+ mHeight = (mSrcHeight + 1) >> 1;
+}
+
+void VDPixmapGenResampleCol_x2_phalf_lin_u8::Start() {
+ mpSrc->Start();
+ StartWindow(mWidth);
+}
+
+void VDPixmapGenResampleCol_x2_phalf_lin_u8::Compute(void *dst0, sint32 y) {
+ sint32 y2 = y+y;
+ const uint8 *src[4] = {
+ (const uint8 *)mpSrc->GetRow(y2 > 0 ? y2-1 : 0, mSrcIndex),
+ (const uint8 *)mpSrc->GetRow(y2 , mSrcIndex),
+ (const uint8 *)mpSrc->GetRow(y2+1, mSrcIndex),
+ (const uint8 *)mpSrc->GetRow(y2+2, mSrcIndex)
+ };
+
+ nsVDPixmapSpanUtils::vert_compress2x_centered((uint8 *)dst0, src, mWidth, 0);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGenResampleCol_x4_p1half_lin_u8::Init(IVDPixmapGen *src, uint32 srcIndex) {
+ InitSource(src, srcIndex);
+ src->AddWindowRequest(-4, 4);
+
+ mHeight = (mSrcHeight + 2) >> 2;
+}
+
+void VDPixmapGenResampleCol_x4_p1half_lin_u8::Start() {
+ mpSrc->Start();
+ StartWindow(mWidth);
+}
+
+void VDPixmapGenResampleCol_x4_p1half_lin_u8::Compute(void *dst0, sint32 y) {
+ sint32 y4 = y*4;
+ const uint8 *src[8] = {
+ (const uint8 *)mpSrc->GetRow(y4 > 2 ? y4-2 : 0, mSrcIndex),
+ (const uint8 *)mpSrc->GetRow(y4 > 1 ? y4-1 : 0, mSrcIndex),
+ (const uint8 *)mpSrc->GetRow(y4 , mSrcIndex),
+ (const uint8 *)mpSrc->GetRow(y4+1, mSrcIndex),
+ (const uint8 *)mpSrc->GetRow(y4+2, mSrcIndex),
+ (const uint8 *)mpSrc->GetRow(y4+3, mSrcIndex),
+ (const uint8 *)mpSrc->GetRow(y4+4, mSrcIndex),
+ (const uint8 *)mpSrc->GetRow(y4+5, mSrcIndex)
+ };
+
+ nsVDPixmapSpanUtils::vert_compress4x_centered((uint8 *)dst0, src, mWidth, 0);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGenResampleCol_d2_pnqrtr_lin_u8::Init(IVDPixmapGen *src, uint32 srcIndex) {
+ InitSource(src, srcIndex);
+ src->AddWindowRequest(-1, 1);
+
+ mHeight = mSrcHeight * 2;
+}
+
+void VDPixmapGenResampleCol_d2_pnqrtr_lin_u8::Start() {
+ mpSrc->Start();
+ StartWindow(mWidth);
+}
+
+void VDPixmapGenResampleCol_d2_pnqrtr_lin_u8::Compute(void *dst0, sint32 y) {
+ sint32 y2 = (y - 1) >> 1;
+ const uint8 *src[2] = {
+ (const uint8 *)mpSrc->GetRow(y2, mSrcIndex),
+ (const uint8 *)mpSrc->GetRow(y2+1, mSrcIndex),
+ };
+
+ nsVDPixmapSpanUtils::vert_expand2x_centered((uint8 *)dst0, src, mWidth, ~y << 7);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGenResampleCol_d4_pn38_lin_u8::Init(IVDPixmapGen *src, uint32 srcIndex) {
+ InitSource(src, srcIndex);
+ src->AddWindowRequest(-1, 1);
+
+ mHeight = mSrcHeight * 4;
+}
+
+void VDPixmapGenResampleCol_d4_pn38_lin_u8::Start() {
+ mpSrc->Start();
+ StartWindow(mWidth);
+}
+
+void VDPixmapGenResampleCol_d4_pn38_lin_u8::Compute(void *dst0, sint32 y) {
+ sint32 y2 = (y - 2) >> 2;
+ const uint8 *src[2] = {
+ (const uint8 *)mpSrc->GetRow(y2, mSrcIndex),
+ (const uint8 *)mpSrc->GetRow(y2+1, mSrcIndex),
+ };
+
+ nsVDPixmapSpanUtils::vert_expand4x_centered((uint8 *)dst0, src, mWidth, (y - 2) << 6);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample_special_x86.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample_special_x86.cpp
new file mode 100644
index 000000000..b1828fcca
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_resample_special_x86.cpp
@@ -0,0 +1,35 @@
+#include "uberblit_resample_special_x86.h"
+#include "blt_spanutils.h"
+#include "blt_spanutils_x86.h"
+
+void VDPixmapGenResampleRow_x2_p0_lin_u8_ISSE::Compute(void *dst0, sint32 y) {
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+ nsVDPixmapSpanUtils::horiz_expand2x_coaligned_ISSE((uint8 *)dst0, src, mWidth);
+}
+
+void VDPixmapGenResampleRow_x4_p0_lin_u8_MMX::Compute(void *dst0, sint32 y) {
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+ nsVDPixmapSpanUtils::horiz_expand4x_coaligned_MMX((uint8 *)dst0, src, mWidth);
+}
+
+void VDPixmapGenResampleCol_d2_pnqrtr_lin_u8_ISSE::Compute(void *dst0, sint32 y) {
+ sint32 y2 = (y - 1) >> 1;
+ const uint8 *src[2] = {
+ (const uint8 *)mpSrc->GetRow(y2, mSrcIndex),
+ (const uint8 *)mpSrc->GetRow(y2+1, mSrcIndex),
+ };
+
+ nsVDPixmapSpanUtils::vert_expand2x_centered_ISSE((uint8 *)dst0, src, mWidth, ~y << 7);
+}
+
+void VDPixmapGenResampleCol_d4_pn38_lin_u8_ISSE::Compute(void *dst0, sint32 y) {
+ sint32 y2 = (y - 2) >> 2;
+ const uint8 *src[2] = {
+ (const uint8 *)mpSrc->GetRow(y2, mSrcIndex),
+ (const uint8 *)mpSrc->GetRow(y2+1, mSrcIndex),
+ };
+
+ nsVDPixmapSpanUtils::vert_expand4x_centered_ISSE((uint8 *)dst0, src, mWidth, (y - 2) << 6);
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_swizzle.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_swizzle.cpp
new file mode 100644
index 000000000..4cb5e4409
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_swizzle.cpp
@@ -0,0 +1,89 @@
+#include "uberblit_swizzle.h"
+
+void VDPixmapGen_Swap8In16::Init(IVDPixmapGen *gen, int srcIndex, uint32 w, uint32 h, uint32 bpr) {
+ InitSource(gen, srcIndex);
+ mRowLength = bpr;
+ SetOutputSize(w, h);
+ gen->AddWindowRequest(0, 0);
+}
+
+void VDPixmapGen_Swap8In16::Start() {
+ StartWindow(mRowLength);
+}
+
+uint32 VDPixmapGen_Swap8In16::GetType(uint32 index) const {
+ return mpSrc->GetType(mSrcIndex);
+}
+
+void VDPixmapGen_Swap8In16::Compute(void *dst0, sint32 y) {
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+ uint8 *dst = (uint8 *)dst0;
+ sint32 w = mRowLength;
+
+ uint32 n4 = w >> 2;
+
+ for(uint32 i=0; i<n4; ++i) {
+ uint32 p = *(uint32 *)src;
+ src += 4;
+
+ uint32 r = ((p & 0xff00ff00) >> 8) + ((p & 0x00ff00ff) << 8);
+
+ *(uint32 *)dst = r;
+ dst += 4;
+ }
+
+ if (w & 2) {
+ dst[0] = src[1];
+ dst[1] = src[0];
+ dst += 2;
+ src += 2;
+ }
+
+ if (w & 1) {
+ *dst = *src;
+ }
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGen_B8x2_To_B8R8::Init(IVDPixmapGen *srcCb, uint32 srcindexCb, IVDPixmapGen *srcCr, uint32 srcindexCr) {
+ mpSrcCb = srcCb;
+ mSrcIndexCb = srcindexCb;
+ mpSrcCr = srcCr;
+ mSrcIndexCr = srcindexCr;
+ mWidth = srcCb->GetWidth(srcindexCb);
+ mHeight = srcCb->GetHeight(srcindexCb);
+
+ srcCb->AddWindowRequest(0, 0);
+ srcCr->AddWindowRequest(0, 0);
+}
+
+void VDPixmapGen_B8x2_To_B8R8::Start() {
+ mpSrcCb->Start();
+ mpSrcCr->Start();
+
+ StartWindow(mWidth * 2);
+}
+
+uint32 VDPixmapGen_B8x2_To_B8R8::GetType(uint32 output) const {
+ return (mpSrcCb->GetType(mSrcIndexCb) & ~kVDPixType_Mask) | kVDPixType_B8R8;
+}
+
+void VDPixmapGen_B8x2_To_B8R8::Compute(void *dst0, sint32 y) {
+ uint8 *VDRESTRICT dst = (uint8 *)dst0;
+ const uint8 *VDRESTRICT srcCb = (const uint8 *)mpSrcCb->GetRow(y, mSrcIndexCb);
+ const uint8 *VDRESTRICT srcCr = (const uint8 *)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+ sint32 w = mWidth;
+ for(sint32 x=0; x<w; ++x) {
+ uint8 cb = srcCb[0];
+ uint8 cr = srcCr[0];
+
+ dst[0] = cb;
+ dst[1] = cr;
+
+ ++srcCb;
+ ++srcCr;
+ dst += 2;
+ }
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_swizzle_x86.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_swizzle_x86.cpp
new file mode 100644
index 000000000..3a87d5a68
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_swizzle_x86.cpp
@@ -0,0 +1,400 @@
+#include "uberblit_swizzle_x86.h"
+
+#ifdef VD_COMPILER_MSVC
+ #pragma warning(disable: 4799) // warning C4799: function 'vdasm_extract_8in16_even_MMX' has no EMMS instruction
+#endif
+
+void __declspec(naked) __fastcall vdasm_extract_8in16_even_MMX(void *dst, const void *src, uint32 count) {
+ __asm {
+ mov eax, [esp+4]
+ pcmpeqb mm2, mm2
+ psrlw mm2, 8
+ sub eax, 8
+ jc xtra
+xloop:
+ movq mm0, [edx]
+ movq mm1, [edx+8]
+ pand mm0, mm2
+ pand mm1, mm2
+ packuswb mm0, mm1
+ add edx, 16
+ movq [ecx], mm0
+ add ecx, 8
+ sub eax, 8
+ jns xloop
+xtra:
+ add eax, 8
+ jz fin
+ push ebx
+xtraloop:
+ mov bl, [edx]
+ add edx, 2
+ mov [ecx], bl
+ add ecx, 1
+ sub eax, 1
+ jnz xtraloop
+
+ pop ebx
+fin:
+ ret 4
+ }
+}
+
+void __declspec(naked) __fastcall vdasm_extract_8in16_odd_MMX(void *dst, const void *src, uint32 count) {
+ __asm {
+ mov eax, [esp+4]
+ sub eax, 8
+ jc xtra
+xloop:
+ movq mm0, [edx]
+ movq mm1, [edx+8]
+ psrlw mm0, 8
+ psrlw mm1, 8
+ add edx, 16
+ packuswb mm0, mm1
+ movq [ecx], mm0
+ add ecx, 8
+ sub eax, 8
+ jns xloop
+xtra:
+ add eax, 8
+ jz fin
+ push ebx
+xtraloop:
+ mov bl, [edx+1]
+ add edx, 2
+ mov [ecx], bl
+ add ecx, 1
+ sub eax, 1
+ jnz xtraloop
+
+ pop ebx
+fin:
+ ret 4
+ }
+}
+
+void __declspec(naked) __fastcall vdasm_extract_8in32_MMX(void *dst, const void *src, uint32 count, int byteshift) {
+ __asm {
+ movd mm4, [esp+8]
+ pcmpeqb mm5, mm5
+ pslld mm4, 3
+ mov eax, [esp+4]
+ psrld mm5, 24
+ sub eax, 8
+ jc xtra
+xloop:
+ movq mm0, [edx]
+ movq mm1, [edx+8]
+ psrld mm0, mm4
+ movq mm2, [edx+16]
+ psrld mm1, mm4
+ pand mm0, mm5
+ movq mm3, [edx+24]
+ psrld mm2, mm4
+ pand mm1, mm5
+ packssdw mm0, mm1
+ psrld mm3, mm4
+ pand mm2, mm5
+ pand mm3, mm5
+ add edx, 32
+ packssdw mm2, mm3
+ packuswb mm0, mm2
+ movq [ecx], mm0
+ add ecx, 8
+ sub eax, 8
+ jns xloop
+xtra:
+ add eax, 8
+ jz fin
+ add edx, dword ptr [esp+8]
+ push ebx
+xtraloop:
+ mov bl, [edx]
+ add edx, 4
+ mov [ecx], bl
+ add ecx, 1
+ sub eax, 1
+ jnz xtraloop
+
+ pop ebx
+fin:
+ ret 8
+ }
+}
+
+void __declspec(naked) __fastcall vdasm_swap_8in16_MMX(void *dst, const void *src, uint32 count) {
+ __asm {
+ mov eax, [esp+4]
+ sub eax, 8
+ js xtra
+xloop:
+ movq mm0, [edx]
+ add edx, 8
+ movq mm1, mm0
+ psllw mm0, 8
+ psrlw mm1, 8
+ paddb mm0, mm1
+ movq [ecx], mm0
+ add ecx, 8
+ sub eax, 8
+ jns xloop
+xtra:
+ add eax, 6
+ js nopairs
+ push ebx
+pairloop:
+ mov bl, [edx]
+ mov bh, [edx+1]
+ add edx, 2
+ mov [ecx], bh
+ mov [ecx+1], bl
+ add ecx, 2
+ sub eax, 2
+ jns pairloop
+ pop ebx
+nopairs:
+ add eax, 2
+ jz noodd
+ mov al, [edx]
+ mov [ecx], al
+noodd:
+ ret 4
+ }
+}
+
+void __declspec(naked) __fastcall vdasm_interleave_BGRG_MMX(void *dst, const void *srcR, const void *srcG, const void *srcB, uint32 count) {
+ __asm {
+ push edi
+ push esi
+ push ebx
+ mov esi, [esp+12+12]
+ mov edi, [esp+8+12]
+ mov ebx, [esp+4+12]
+ sub esi, 4
+ jc xtra
+ ; ecx = dst
+ ; edx = srcR
+ ; ebx = srcG
+ ; edi = srcB
+xloop:
+ movd mm0, [edi]
+ movd mm1, [edx]
+ punpcklbw mm0, mm1
+ movq mm1, [ebx]
+ movq mm2, mm0
+ punpcklbw mm0, mm1
+ add edx, 4
+ punpckhbw mm2, mm1
+ add edi, 4
+ movq [ecx], mm0
+ add ebx, 8
+ movq [ecx+8], mm2
+ add ecx, 16
+ sub esi, 4
+ jns xloop
+xtra:
+ add esi, 4
+ jz fin
+xtraloop:
+ mov al, [edi]
+ mov [ecx], al
+ mov al, [ebx]
+ mov [ecx+1], al
+ mov al, [edx]
+ mov [ecx+2], al
+ mov al, [ebx+1]
+ mov [ecx+3], al
+ add ebx, 2
+ add edx, 1
+ add edi, 1
+ add ecx, 4
+ sub esi, 1
+ jnz xtraloop
+fin:
+ pop ebx
+ pop esi
+ pop edi
+ ret 12
+ }
+}
+
+void __declspec(naked) __fastcall vdasm_interleave_GBGR_MMX(void *dst, const void *srcR, const void *srcG, const void *srcB, uint32 count) {
+ __asm {
+ push edi
+ push esi
+ push ebx
+ mov esi, [esp+12+12]
+ mov edi, [esp+8+12]
+ mov ebx, [esp+4+12]
+ sub esi, 4
+ jc xtra
+ ; ecx = dst
+ ; edx = srcR
+ ; ebx = srcG
+ ; edi = srcB
+xloop:
+ movd mm0, [edi]
+ movd mm1, [edx]
+ punpcklbw mm0, mm1
+ movq mm2, [ebx]
+ movq mm1, mm2
+ punpcklbw mm2, mm0
+ add edx, 4
+ punpckhbw mm1, mm0
+ add edi, 4
+ movq [ecx], mm2
+ add ebx, 8
+ movq [ecx+8], mm1
+ add ecx, 16
+ sub esi, 4
+ jns xloop
+xtra:
+ add esi, 4
+ jz fin
+xtraloop:
+ mov al, [ebx]
+ mov [ecx], al
+ mov al, [edi]
+ mov [ecx+1], al
+ mov al, [ebx+1]
+ mov [ecx+2], al
+ mov al, [edx]
+ mov [ecx+3], al
+ add ebx, 2
+ add edx, 1
+ add edi, 1
+ add ecx, 4
+ sub esi, 1
+ jnz xtraloop
+fin:
+ pop ebx
+ pop esi
+ pop edi
+ ret 12
+ }
+}
+
+void __declspec(naked) __fastcall vdasm_interleave_BR_MMX(void *dst, const void *srcB, const void *srcR, uint32 count) {
+ __asm {
+ push edi
+ push esi
+ push ebx
+ mov esi, [esp+8+12]
+ mov ebx, [esp+4+12]
+ sub esi, 8
+ jc xtra
+ ; ecx = dst
+ ; edx = srcB
+ ; ebx = srcG
+xloop:
+ movq mm0, [edx]
+ movq mm1, [ebx]
+ movq mm2, mm0
+ punpcklbw mm0, mm1
+ punpckhbw mm2, mm1
+ add edx, 8
+ movq [ecx], mm0
+ add ebx, 8
+ movq [ecx+8], mm2
+ add ecx, 16
+ sub esi, 8
+ jns xloop
+xtra:
+ add esi, 8
+ jz fin
+xtraloop:
+ mov al, [edx]
+ mov [ecx], al
+ mov al, [ebx]
+ mov [ecx+1], al
+ add ebx, 1
+ add edx, 1
+ add ecx, 2
+ sub esi, 1
+ jnz xtraloop
+fin:
+ pop ebx
+ pop esi
+ pop edi
+ ret 8
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGen_8In16_Even_MMX::Compute(void *dst, sint32 y) {
+ const uint8 *srcp = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+ vdasm_extract_8in16_even_MMX(dst, srcp, mWidth);
+}
+
+void VDPixmapGen_8In16_Odd_MMX::Compute(void *dst, sint32 y) {
+ const uint8 *srcp = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+ vdasm_extract_8in16_odd_MMX(dst, srcp, mWidth);
+}
+
+void VDPixmapGen_8In32_MMX::Compute(void *dst, sint32 y) {
+ const uint8 *srcp = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+ vdasm_extract_8in32_MMX(dst, srcp, mWidth, mOffset);
+}
+
+void VDPixmapGen_Swap8In16_MMX::Compute(void *dst, sint32 y) {
+ const uint8 *src = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+ vdasm_swap_8in16_MMX(dst, src, mRowLength);
+}
+
+void VDPixmapGen_B8x2_To_B8R8_MMX::Compute(void *dst0, sint32 y) {
+ uint8 *VDRESTRICT dst = (uint8 *VDRESTRICT)dst0;
+ const uint8 *VDRESTRICT srcCb = (const uint8 *VDRESTRICT)mpSrcCb->GetRow(y, mSrcIndexCb);
+ const uint8 *VDRESTRICT srcCr = (const uint8 *VDRESTRICT)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+ vdasm_interleave_BR_MMX(dst, srcCb, srcCr, mWidth);
+}
+
+void VDPixmapGen_B8x3_To_G8B8_G8R8_MMX::Compute(void *VDRESTRICT dst0, sint32 y) {
+ uint8 *VDRESTRICT dst = (uint8 *VDRESTRICT)dst0;
+ const uint8 *VDRESTRICT srcY = (const uint8 *VDRESTRICT)mpSrcY->GetRow(y, mSrcIndexY);
+ const uint8 *VDRESTRICT srcCb = (const uint8 *VDRESTRICT)mpSrcCb->GetRow(y, mSrcIndexCb);
+ const uint8 *VDRESTRICT srcCr = (const uint8 *VDRESTRICT)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+ vdasm_interleave_GBGR_MMX(dst, srcCr, srcY, srcCb, mWidth >> 1);
+
+ if (mWidth & 1) {
+ int w2 = mWidth >> 1;
+ srcY += mWidth;
+ srcCb += w2;
+ srcCr += w2;
+ dst += mWidth * 2;
+
+ dst[-2] = srcY[-1];
+ dst[-1] = srcCb[0];
+ dst[ 0] = 0; // must be zero for QuickTime compatibility
+ dst[ 1] = srcCr[0];
+ }
+}
+
+void VDPixmapGen_B8x3_To_B8G8_R8G8_MMX::Compute(void *VDRESTRICT dst0, sint32 y) {
+ uint8 *VDRESTRICT dst = (uint8 *VDRESTRICT)dst0;
+ const uint8 *VDRESTRICT srcY = (const uint8 * VDRESTRICT)mpSrcY->GetRow(y, mSrcIndexY);
+ const uint8 *VDRESTRICT srcCb = (const uint8 * VDRESTRICT)mpSrcCb->GetRow(y, mSrcIndexCb);
+ const uint8 *VDRESTRICT srcCr = (const uint8 * VDRESTRICT)mpSrcCr->GetRow(y, mSrcIndexCr);
+
+ vdasm_interleave_BGRG_MMX(dst, srcCr, srcY, srcCb, mWidth >> 1);
+
+ if (mWidth & 1) {
+ int w2 = mWidth >> 1;
+ srcY += mWidth;
+ srcCb += w2;
+ srcCr += w2;
+ dst += mWidth * 2;
+
+ dst[-2] = srcCb[0];
+ dst[-1] = srcY[-1];
+ dst[ 0] = srcCr[0];
+ dst[ 1] = 0; // must be zero for QuickTime compatibility
+ }
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_v210.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_v210.cpp
new file mode 100644
index 000000000..78793f477
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_v210.cpp
@@ -0,0 +1,199 @@
+#include <vd2/system/halffloat.h>
+#include <vd2/system/math.h>
+#include "uberblit_v210.h"
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGen_32F_To_V210::Compute(void *dst0, sint32 y) {
+ uint32 *dst = (uint32 *)dst0;
+ const float *srcR = (const float *)mpSrcR->GetRow(y, mSrcIndexR);
+ const float *srcG = (const float *)mpSrcG->GetRow(y, mSrcIndexG);
+ const float *srcB = (const float *)mpSrcB->GetRow(y, mSrcIndexB);
+
+ VDCPUCleanupExtensions();
+
+ int w6 = mWidth / 6;
+ for(sint32 i=0; i<w6; ++i) {
+ float r0 = srcR[0];
+ float r1 = srcR[1];
+ float r2 = srcR[2];
+ srcR += 3;
+
+ float b0 = srcB[0];
+ float b1 = srcB[1];
+ float b2 = srcB[2];
+ srcB += 3;
+
+ float g0 = srcG[0];
+ float g1 = srcG[1];
+ float g2 = srcG[2];
+ float g3 = srcG[3];
+ float g4 = srcG[4];
+ float g5 = srcG[5];
+ srcG += 6;
+
+ if (r0 < 0.0f) r0 = 0.0f; else if (r0 > 1.0f) r0 = 1.0f;
+ if (r1 < 0.0f) r1 = 0.0f; else if (r1 > 1.0f) r1 = 1.0f;
+ if (r2 < 0.0f) r2 = 0.0f; else if (r2 > 1.0f) r2 = 1.0f;
+ if (g0 < 0.0f) g0 = 0.0f; else if (g0 > 1.0f) g0 = 1.0f;
+ if (g1 < 0.0f) g1 = 0.0f; else if (g1 > 1.0f) g1 = 1.0f;
+ if (g2 < 0.0f) g2 = 0.0f; else if (g2 > 1.0f) g2 = 1.0f;
+ if (g3 < 0.0f) g3 = 0.0f; else if (g3 > 1.0f) g3 = 1.0f;
+ if (g4 < 0.0f) g4 = 0.0f; else if (g4 > 1.0f) g4 = 1.0f;
+ if (g5 < 0.0f) g5 = 0.0f; else if (g5 > 1.0f) g5 = 1.0f;
+ if (b0 < 0.0f) b0 = 0.0f; else if (b0 > 1.0f) b0 = 1.0f;
+ if (b1 < 0.0f) b1 = 0.0f; else if (b1 > 1.0f) b1 = 1.0f;
+ if (b2 < 0.0f) b2 = 0.0f; else if (b2 > 1.0f) b2 = 1.0f;
+
+ uint32 ir0 = (uint32)VDRoundToIntFast(r0 * 1024.0f);
+ uint32 ir1 = (uint32)VDRoundToIntFast(r1 * 1024.0f);
+ uint32 ir2 = (uint32)VDRoundToIntFast(r2 * 1024.0f);
+ uint32 ib0 = (uint32)VDRoundToIntFast(b0 * 1024.0f);
+ uint32 ib1 = (uint32)VDRoundToIntFast(b1 * 1024.0f);
+ uint32 ib2 = (uint32)VDRoundToIntFast(b2 * 1024.0f);
+ uint32 ig0 = (uint32)VDRoundToIntFast(g0 * 1024.0f);
+ uint32 ig1 = (uint32)VDRoundToIntFast(g1 * 1024.0f);
+ uint32 ig2 = (uint32)VDRoundToIntFast(g2 * 1024.0f);
+ uint32 ig3 = (uint32)VDRoundToIntFast(g3 * 1024.0f);
+ uint32 ig4 = (uint32)VDRoundToIntFast(g4 * 1024.0f);
+ uint32 ig5 = (uint32)VDRoundToIntFast(g5 * 1024.0f);
+
+ // dword 0: XX Cr0 Y0 Cb0
+ // dword 1: XX Y2 Cb1 Y1
+ // dword 2: XX Cb2 Y3 Cr1
+ // dword 3: XX Y5 Cr2 Y4
+ dst[0] = (ir0 << 20) + (ig0 << 10) + ib0;
+ dst[1] = (ig2 << 20) + (ib1 << 10) + ig1;
+ dst[2] = (ib2 << 20) + (ig3 << 10) + ir1;
+ dst[3] = (ig5 << 20) + (ir2 << 10) + ig4;
+
+ dst += 4;
+ }
+
+ int leftovers = mWidth - w6*6;
+ if (leftovers) {
+ float g0 = 0;
+ float g1 = 0;
+ float g2 = 0;
+ float g3 = 0;
+ float g4 = 0;
+ float r0 = 0;
+ float r1 = 0;
+ float r2 = 0;
+ float b0 = 0;
+ float b1 = 0;
+ float b2 = 0;
+
+ switch(leftovers) {
+ case 5: r2 = srcR[2];
+ b2 = srcB[2];
+ g4 = srcG[4];
+ case 4: g3 = srcG[3];
+ case 3: r1 = srcR[1];
+ b1 = srcB[1];
+ g2 = srcG[2];
+ case 2: g1 = srcG[1];
+ case 1: r0 = srcR[0];
+ b0 = srcB[0];
+ g0 = srcG[0];
+ }
+
+ if (r0 < 0.0f) r0 = 0.0f; else if (r0 > 1.0f) r0 = 1.0f;
+ if (r1 < 0.0f) r1 = 0.0f; else if (r1 > 1.0f) r1 = 1.0f;
+ if (r2 < 0.0f) r2 = 0.0f; else if (r2 > 1.0f) r2 = 1.0f;
+ if (g0 < 0.0f) g0 = 0.0f; else if (g0 > 1.0f) g0 = 1.0f;
+ if (g1 < 0.0f) g1 = 0.0f; else if (g1 > 1.0f) g1 = 1.0f;
+ if (g2 < 0.0f) g2 = 0.0f; else if (g2 > 1.0f) g2 = 1.0f;
+ if (g3 < 0.0f) g3 = 0.0f; else if (g3 > 1.0f) g3 = 1.0f;
+ if (g4 < 0.0f) g4 = 0.0f; else if (g4 > 1.0f) g4 = 1.0f;
+ if (b0 < 0.0f) b0 = 0.0f; else if (b0 > 1.0f) b0 = 1.0f;
+ if (b1 < 0.0f) b1 = 0.0f; else if (b1 > 1.0f) b1 = 1.0f;
+ if (b2 < 0.0f) b2 = 0.0f; else if (b2 > 1.0f) b2 = 1.0f;
+
+ uint32 ir0 = (uint32)VDRoundToIntFast(r0 * 1024.0f);
+ uint32 ir1 = (uint32)VDRoundToIntFast(r1 * 1024.0f);
+ uint32 ir2 = (uint32)VDRoundToIntFast(r2 * 1024.0f);
+ uint32 ib0 = (uint32)VDRoundToIntFast(b0 * 1024.0f);
+ uint32 ib1 = (uint32)VDRoundToIntFast(b1 * 1024.0f);
+ uint32 ib2 = (uint32)VDRoundToIntFast(b2 * 1024.0f);
+ uint32 ig0 = (uint32)VDRoundToIntFast(g0 * 1024.0f);
+ uint32 ig1 = (uint32)VDRoundToIntFast(g1 * 1024.0f);
+ uint32 ig2 = (uint32)VDRoundToIntFast(g2 * 1024.0f);
+ uint32 ig3 = (uint32)VDRoundToIntFast(g3 * 1024.0f);
+ uint32 ig4 = (uint32)VDRoundToIntFast(g4 * 1024.0f);
+
+ // dword 0: XX Cr0 Y0 Cb0
+ // dword 1: XX Y2 Cb1 Y1
+ // dword 2: XX Cb2 Y3 Cr1
+ // dword 3: XX Y5 Cr2 Y4
+ dst[0] = (ir0 << 20) + (ig0 << 10) + ib0;
+ dst[1] = (ig2 << 20) + (ib1 << 10) + ig1;
+ dst[2] = (ib2 << 20) + (ig3 << 10) + ir1;
+ dst[3] = (ir2 << 10) + ig4;
+ dst += 4;
+ }
+
+ // QuickTime defines the v210 format and requires zero padding in all unused samples.
+ int w48up = (mWidth + 23) / 24;
+ int w6up = (mWidth + 5) / 6;
+ int zeropad = w48up * 16 - w6up * 4;
+ memset(dst, 0, zeropad * 4);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void VDPixmapGen_V210_To_32F::Start() {
+ StartWindow(((mWidth + 5) / 6) * 6 * sizeof(float), 3);
+}
+
+const void *VDPixmapGen_V210_To_32F::GetRow(sint32 y, uint32 index) {
+ return (const uint8 *)VDPixmapGenWindowBasedOneSource::GetRow(y, index) + mWindowPitch * index;
+}
+
+sint32 VDPixmapGen_V210_To_32F::GetWidth(int index) const {
+ return index == 1 ? mWidth : (mWidth + 1) >> 1;
+}
+
+uint32 VDPixmapGen_V210_To_32F::GetType(uint32 output) const {
+ return (mpSrc->GetType(mSrcIndex) & ~kVDPixType_Mask) | kVDPixType_32F_LE;
+}
+
+void VDPixmapGen_V210_To_32F::Compute(void *dst0, sint32 y) {
+ float *dstR = (float *)dst0;
+ float *dstG = (float *)((char *)dstR + mWindowPitch);
+ float *dstB = (float *)((char *)dstG + mWindowPitch);
+ const uint32 *src = (const uint32 *)mpSrc->GetRow(y, mSrcIndex);
+ uint32 w = (mWidth + 5) / 6;
+
+ VDCPUCleanupExtensions();
+
+ // dword 0: XX Cr0 Y0 Cb0
+ // dword 1: XX Y2 Cb1 Y1
+ // dword 2: XX Cb2 Y3 Cr1
+ // dword 3: XX Y5 Cr2 Y4
+
+ for(uint32 i=0; i<w; ++i) {
+ const uint32 w0 = src[0];
+ const uint32 w1 = src[1];
+ const uint32 w2 = src[2];
+ const uint32 w3 = src[3];
+ src += 4;
+
+ dstB[0] = (float)( w0 & 0x3ff) / 1023.0f;
+ dstG[0] = (float)((w0 >> 10) & 0x3ff) / 1023.0f;
+ dstR[0] = (float)((w0 >> 20) & 0x3ff) / 1023.0f;
+ dstG[1] = (float)( w1 & 0x3ff) / 1023.0f;
+ dstB[1] = (float)((w1 >> 10) & 0x3ff) / 1023.0f;
+ dstG[2] = (float)((w1 >> 20) & 0x3ff) / 1023.0f;
+ dstR[1] = (float)( w2 & 0x3ff) / 1023.0f;
+ dstG[3] = (float)((w2 >> 10) & 0x3ff) / 1023.0f;
+ dstB[2] = (float)((w2 >> 20) & 0x3ff) / 1023.0f;
+ dstG[4] = (float)( w3 & 0x3ff) / 1023.0f;
+ dstR[2] = (float)((w3 >> 10) & 0x3ff) / 1023.0f;
+ dstG[5] = (float)((w3 >> 20) & 0x3ff) / 1023.0f;
+
+ dstR += 3;
+ dstG += 6;
+ dstB += 3;
+ }
+}
diff --git a/src/thirdparty/VirtualDub/Kasumi/source/uberblit_ycbcr_x86.cpp b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_ycbcr_x86.cpp
new file mode 100644
index 000000000..d34f731f1
--- /dev/null
+++ b/src/thirdparty/VirtualDub/Kasumi/source/uberblit_ycbcr_x86.cpp
@@ -0,0 +1,35 @@
+#include "uberblit_ycbcr_x86.h"
+
+extern "C" void vdasm_pixblt_XRGB8888_to_YUV444Planar_scan_SSE2(void *dstY, void *dstCb, void *dstCr, const void *srcRGB, uint32 count, const void *coeffs);
+
+void VDPixmapGenRGB32ToYCbCr601_SSE2::Compute(void *dst0, sint32 y) {
+ uint8 *dstCb = (uint8 *)dst0;
+ uint8 *dstY = dstCb + mWindowPitch;
+ uint8 *dstCr = dstY + mWindowPitch;
+ const uint8 *srcRGB = (const uint8 *)mpSrc->GetRow(y, mSrcIndex);
+
+ static const __declspec(align(16)) struct {
+ sint16 rb_to_y[8];
+ sint16 rb_to_cb[8];
+ sint16 rb_to_cr[8];
+ sint16 g_to_y[8];
+ sint16 g_to_cb[8];
+ sint16 g_to_cr[8];
+ sint32 y_bias[4];
+ sint32 c_bias[4];
+ } kCoeffs={
+ // Cb = (28784*r - 24103*g - 4681*b + 8388608 + 32768) >> 16;
+ // Y = (16829*r + 33039*g + 6416*b + 1048576 + 32768) >> 16;
+ // Cr = (-9714*r - 19071*g + 28784*b + 8388608 + 32768) >> 16;
+ { 3208, 8414, 3208, 8414, 3208, 8414, 3208, 8414, }, // rb to y
+ { -2340, 14392, -2340, 14392, -2340, 14392, -2340, 14392, }, // rb to cb
+ { 16519, 0, 16519, 0, 16519, 0, 16519, 0, }, // g to y
+ { -12050, 0, -12050, 0, -12050, 0, -12050, 0, }, // g to cb
+ { 14392, -4857, 14392, -4857, 14392, -4857, 14392, -4857, }, // rb to cr
+ { -9535, 0, -9535, 0, -9535, 0, -9535, 0, }, // g to cr
+ { 0x084000, 0x084000, 0x084000, 0x084000, }, // y bias
+ { 0x404000, 0x404000, 0x404000, 0x404000, }, // c bias
+ };
+
+ vdasm_pixblt_XRGB8888_to_YUV444Planar_scan_SSE2(dstY, dstCb, dstCr, srcRGB, mWidth, &kCoeffs);
+}
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/blitter.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/blitter.h
new file mode 100644
index 000000000..536bc0e7a
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/blitter.h
@@ -0,0 +1,19 @@
+#ifndef f_VD2_KASUMI_BLITTER_H
+#define f_VD2_KASUMI_BLITTER_H
+
+#include <vd2/system/vectors.h>
+
+struct VDPixmap;
+struct VDPixmapLayout;
+
+class IVDPixmapBlitter {
+public:
+ virtual ~IVDPixmapBlitter() {}
+ virtual void Blit(const VDPixmap& dst, const VDPixmap& src) = 0;
+ virtual void Blit(const VDPixmap& dst, const vdrect32 *rDst, const VDPixmap& src) = 0;
+};
+
+IVDPixmapBlitter *VDPixmapCreateBlitter(const VDPixmap& dst, const VDPixmap& src);
+IVDPixmapBlitter *VDPixmapCreateBlitter(const VDPixmapLayout& dst, const VDPixmapLayout& src);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixel.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixel.h
new file mode 100644
index 000000000..a2f2e2ead
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixel.h
@@ -0,0 +1,40 @@
+// VirtualDub - Video processing and capture application
+// Graphics support library
+// Copyright (C) 1998-2007 Avery Lee
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#ifndef f_VD2_KASUMI_PIXEL_H
+#define f_VD2_KASUMI_PIXEL_H
+
+#ifndef f_VD2_SYSTEM_VDTYPES_H
+ #include <vd2/system/vdtypes.h>
+#endif
+
+struct VDPixmap;
+
+uint32 VDPixmapSample(const VDPixmap& px, sint32 x, sint32 y);
+uint32 VDPixmapInterpolateSampleRGB24(const VDPixmap& px, sint32 x, sint32 y);
+
+inline uint8 VDPixmapSample8(const void *data, ptrdiff_t pitch, sint32 x, sint32 y) {
+ return ((const uint8 *)data)[pitch*y + x];
+}
+
+uint8 VDPixmapInterpolateSample8(const void *data, ptrdiff_t pitch, uint32 w, uint32 h, sint32 x_256, sint32 y_256);
+uint32 VDConvertYCbCrToRGB(uint8 y, uint8 cb, uint8 cr);
+uint32 VDConvertRGBToYCbCr(uint32 c);
+uint32 VDConvertRGBToYCbCr(uint8 r, uint8 g, uint8 b);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmap.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmap.h
new file mode 100644
index 000000000..a0125b6e3
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmap.h
@@ -0,0 +1,76 @@
+#ifndef f_VD2_KASUMI_PIXMAP_H
+#define f_VD2_KASUMI_PIXMAP_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+
+namespace nsVDPixmap {
+ enum VDPixmapFormat {
+ kPixFormat_Null,
+ kPixFormat_Pal1,
+ kPixFormat_Pal2,
+ kPixFormat_Pal4,
+ kPixFormat_Pal8,
+ kPixFormat_XRGB1555,
+ kPixFormat_RGB565,
+ kPixFormat_RGB888,
+ kPixFormat_XRGB8888,
+ kPixFormat_Y8,
+ kPixFormat_YUV422_UYVY,
+ kPixFormat_YUV422_YUYV,
+ kPixFormat_YUV444_XVYU, // The reason for the strange VYU ordering is to make it easier to convert to UYVY/YUY2.
+ kPixFormat_YUV444_Planar,
+ kPixFormat_YUV422_Planar,
+ kPixFormat_YUV420_Planar,
+ kPixFormat_YUV411_Planar,
+ kPixFormat_YUV410_Planar,
+ kPixFormat_YUV422_Planar_Centered, // MPEG-1/MJPEG chroma alignment
+ kPixFormat_YUV420_Planar_Centered, // MPEG-1/MJPEG chroma alignment
+ kPixFormat_YUV422_Planar_16F,
+ kPixFormat_YUV422_V210,
+ kPixFormat_YUV422_UYVY_709, // Also known as HDYC.
+ kPixFormat_YUV420_NV12,
+ kPixFormat_Max_Standard
+ };
+}
+
+typedef sint32 vdpixpos;
+typedef sint32 vdpixsize;
+typedef ptrdiff_t vdpixoffset;
+
+struct VDPixmap {
+ void *data;
+ const uint32 *palette;
+ vdpixsize w;
+ vdpixsize h;
+ vdpixoffset pitch;
+ sint32 format;
+
+ // Auxiliary planes are always byte-per-pixel.
+
+ void *data2; // Cb (U) for YCbCr
+ vdpixoffset pitch2;
+ void *data3; // Cr (V) for YCbCr
+ vdpixoffset pitch3;
+};
+
+struct VDPixmapLayout {
+ ptrdiff_t data;
+ const uint32 *palette;
+ vdpixsize w;
+ vdpixsize h;
+ vdpixoffset pitch;
+ sint32 format;
+
+ // Auxiliary planes are always byte-per-pixel.
+
+ ptrdiff_t data2; // Cb (U) for YCbCr
+ vdpixoffset pitch2;
+ ptrdiff_t data3; // Cr (V) for YCbCr
+ vdpixoffset pitch3;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmapops.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmapops.h
new file mode 100644
index 000000000..6dce3a858
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmapops.h
@@ -0,0 +1,20 @@
+#ifndef f_VD2_KASUMI_PIXMAPOPS_H
+#define f_VD2_KASUMI_PIXMAPOPS_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <vd2/Kasumi/pixmap.h>
+
+bool VDPixmapIsBltPossible(int dst_format, int src_format);
+bool VDPixmapBlt(const VDPixmap& dst, const VDPixmap& src);
+bool VDPixmapBlt(const VDPixmap& dst, vdpixpos x1, vdpixpos y1, const VDPixmap& src, vdpixpos x2, vdpixpos y2, vdpixsize w, vdpixsize h);
+bool VDPixmapStretchBltNearest(const VDPixmap& dst, const VDPixmap& src);
+bool VDPixmapStretchBltNearest(const VDPixmap& dst, sint32 x1, sint32 y1, sint32 x2, sint32 y2, const VDPixmap& src, sint32 u1, sint32 v1, sint32 u2, sint32 v2);
+bool VDPixmapStretchBltBilinear(const VDPixmap& dst, const VDPixmap& src);
+bool VDPixmapStretchBltBilinear(const VDPixmap& dst, sint32 x1, sint32 y1, sint32 x2, sint32 y2, const VDPixmap& src, sint32 u1, sint32 v1, sint32 u2, sint32 v2);
+
+bool VDPixmapBltAlphaConst(const VDPixmap& dst, const VDPixmap& src, float alpha);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmaputils.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmaputils.h
new file mode 100644
index 000000000..0d9e50cfd
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/pixmaputils.h
@@ -0,0 +1,171 @@
+#ifndef f_VD2_KASUMI_PIXMAPUTILS_H
+#define f_VD2_KASUMI_PIXMAPUTILS_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <vd2/Kasumi/pixmap.h>
+
+struct VDPixmapFormatInfo {
+ const char *name; // debugging name
+ bool qchunky; // quantums are chunky (not 1x1 pixels)
+ int qw, qh; // width, height of a quantum
+ int qwbits, qhbits; // width and height of a quantum as shifts
+ int qsize; // size of a pixel in bytes
+ int auxbufs; // number of auxiliary buffers (0 for chunky formats, usually 2 for planar)
+ int auxwbits, auxhbits; // subsampling factors for auxiliary buffers in shifts
+ int auxsize; // size of an aux sample in bytes
+ int palsize; // entries in palette
+ int subformats; // number of subformats for this format
+};
+
+extern VDPixmapFormatInfo g_vdPixmapFormats[];
+
+inline const VDPixmapFormatInfo& VDPixmapGetInfo(sint32 format) {
+ VDASSERT((uint32)format < nsVDPixmap::kPixFormat_Max_Standard);
+ return g_vdPixmapFormats[(uint32)format < nsVDPixmap::kPixFormat_Max_Standard ? format : 0];
+}
+
+#ifdef _DEBUG
+ bool VDAssertValidPixmap(const VDPixmap& px);
+#else
+ inline bool VDAssertValidPixmap(const VDPixmap& px) { return true; }
+#endif
+
+inline VDPixmap VDPixmapFromLayout(const VDPixmapLayout& layout, void *p) {
+ VDPixmap px;
+
+ px.data = (char *)p + layout.data;
+ px.data2 = (char *)p + layout.data2;
+ px.data3 = (char *)p + layout.data3;
+ px.format = layout.format;
+ px.w = layout.w;
+ px.h = layout.h;
+ px.palette = layout.palette;
+ px.pitch = layout.pitch;
+ px.pitch2 = layout.pitch2;
+ px.pitch3 = layout.pitch3;
+
+ return px;
+}
+
+inline VDPixmapLayout VDPixmapToLayoutFromBase(const VDPixmap& px, void *p) {
+ VDPixmapLayout layout;
+ layout.data = (char *)px.data - (char *)p;
+ layout.data2 = (char *)px.data2 - (char *)p;
+ layout.data3 = (char *)px.data3 - (char *)p;
+ layout.format = px.format;
+ layout.w = px.w;
+ layout.h = px.h;
+ layout.palette = px.palette;
+ layout.pitch = px.pitch;
+ layout.pitch2 = px.pitch2;
+ layout.pitch3 = px.pitch3;
+ return layout;
+}
+
+inline VDPixmapLayout VDPixmapToLayout(const VDPixmap& px, void *&p) {
+ VDPixmapLayout layout;
+ p = px.data;
+ layout.data = 0;
+ layout.data2 = (char *)px.data2 - (char *)px.data;
+ layout.data3 = (char *)px.data3 - (char *)px.data;
+ layout.format = px.format;
+ layout.w = px.w;
+ layout.h = px.h;
+ layout.palette = px.palette;
+ layout.pitch = px.pitch;
+ layout.pitch2 = px.pitch2;
+ layout.pitch3 = px.pitch3;
+ return layout;
+}
+
+uint32 VDPixmapCreateLinearLayout(VDPixmapLayout& layout, int format, vdpixsize w, vdpixsize h, int alignment);
+
+VDPixmap VDPixmapOffset(const VDPixmap& src, vdpixpos x, vdpixpos y);
+VDPixmapLayout VDPixmapLayoutOffset(const VDPixmapLayout& src, vdpixpos x, vdpixpos y);
+
+void VDPixmapFlipV(VDPixmap& layout);
+void VDPixmapLayoutFlipV(VDPixmapLayout& layout);
+
+uint32 VDPixmapLayoutGetMinSize(const VDPixmapLayout& layout);
+
+VDPixmap VDPixmapExtractField(const VDPixmap& src, bool field2);
+
+#ifndef VDPTRSTEP_DECLARED
+ template<class T>
+ inline void vdptrstep(T *&p, ptrdiff_t offset) {
+ p = (T *)((char *)p + offset);
+ }
+#endif
+#ifndef VDPTROFFSET_DECLARED
+ template<class T>
+ inline T *vdptroffset(T *p, ptrdiff_t offset) {
+ return (T *)((char *)p + offset);
+ }
+#endif
+#ifndef VDPTRDIFFABS_DECLARED
+ inline ptrdiff_t vdptrdiffabs(ptrdiff_t x) {
+ return x<0 ? -x : x;
+ }
+#endif
+
+
+typedef void (*VDPixmapBlitterFn)(const VDPixmap& dst, const VDPixmap& src, vdpixsize w, vdpixsize h);
+typedef VDPixmapBlitterFn (*tpVDPixBltTable)[nsVDPixmap::kPixFormat_Max_Standard];
+
+tpVDPixBltTable VDGetPixBltTableReference();
+tpVDPixBltTable VDGetPixBltTableX86Scalar();
+tpVDPixBltTable VDGetPixBltTableX86MMX();
+
+
+
+class VDPixmapBuffer : public VDPixmap {
+public:
+ VDPixmapBuffer() : mpBuffer(NULL), mLinearSize(0) { data = NULL; format = 0; }
+ explicit VDPixmapBuffer(const VDPixmap& src);
+ VDPixmapBuffer(const VDPixmapBuffer& src);
+ VDPixmapBuffer(sint32 w, sint32 h, int format) : mpBuffer(NULL), mLinearSize(0) {
+ init(w, h, format);
+ }
+ explicit VDPixmapBuffer(const VDPixmapLayout& layout);
+
+ ~VDPixmapBuffer();
+
+ void clear() {
+ if (mpBuffer) // to reduce debug checks
+ delete[] mpBuffer;
+ mpBuffer = NULL;
+ mLinearSize = 0;
+ format = nsVDPixmap::kPixFormat_Null;
+ }
+
+#ifdef _DEBUG
+ void *base() { return mpBuffer + (-(int)(uintptr)mpBuffer & 15) + 16; }
+ const void *base() const { return mpBuffer + (-(int)(uintptr)mpBuffer & 15) + 16; }
+ size_t size() const { return mLinearSize - 28; }
+
+ void validate();
+#else
+ void *base() { return mpBuffer + (-(int)(uintptr)mpBuffer & 15); }
+ const void *base() const { return mpBuffer + (-(int)(uintptr)mpBuffer & 15); }
+ size_t size() const { return mLinearSize; }
+
+ void validate() {}
+#endif
+
+ void init(sint32 w, sint32 h, int format);
+ void init(const VDPixmapLayout&);
+
+ void assign(const VDPixmap& src);
+
+ void swap(VDPixmapBuffer&);
+
+protected:
+ char *mpBuffer;
+ size_t mLinearSize;
+};
+
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/region.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/region.h
new file mode 100644
index 000000000..aa2963c90
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/region.h
@@ -0,0 +1,92 @@
+// VirtualDub - Video processing and capture application
+// Graphics support library
+// Copyright (C) 1998-2007 Avery Lee
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#ifndef f_VD2_KASUMI_REGION_H
+#define f_VD2_KASUMI_REGION_H
+
+struct VDPixmap;
+
+#include <vd2/system/vectors.h>
+#include <vd2/system/vdstl.h>
+
+class VDPixmapRegion {
+public:
+ void swap(VDPixmapRegion& x);
+
+public:
+ vdfastvector<uint32> mSpans;
+ vdrect32 mBounds;
+};
+
+class VDPixmapPathRasterizer {
+public:
+ VDPixmapPathRasterizer();
+ VDPixmapPathRasterizer(const VDPixmapPathRasterizer&); // no-op
+ ~VDPixmapPathRasterizer();
+
+ VDPixmapPathRasterizer& operator=(const VDPixmapPathRasterizer&); // no-op
+
+ void Clear();
+ void QuadraticBezier(const vdint2 pts[4]);
+ void CubicBezier(const vdint2 pts[4]);
+ void Line(const vdint2& pt1, const vdint2& pt2);
+ void FastLine(int x0, int y0, int x1, int y1);
+
+ void ScanConvert(VDPixmapRegion& region);
+
+protected:
+ void ClearEdgeList();
+ void FreeEdgeLists();
+ void ClearScanBuffer();
+ void ReallocateScanBuffer(int ymin, int ymax);
+
+ struct Edge {
+ Edge *next;
+ int posandflag;
+ };
+
+ enum { kEdgeBlockMax = 1024 };
+
+ struct EdgeBlock {
+ EdgeBlock *next;
+ Edge edges[1024];
+
+ EdgeBlock(EdgeBlock *p) : next(p) {}
+ };
+
+ struct Scan {
+ Edge *chain;
+ uint32 count;
+ };
+
+ EdgeBlock *mpEdgeBlocks;
+ EdgeBlock *mpFreeEdgeBlocks;
+ int mEdgeBlockIdx;
+ Scan *mpScanBuffer;
+ Scan *mpScanBufferBiased;
+ int mScanYMin;
+ int mScanYMax;
+};
+
+bool VDPixmapFillRegion(const VDPixmap& dst, const VDPixmapRegion& region, int x, int y, uint32 color);
+bool VDPixmapFillRegionAntialiased8x(const VDPixmap& dst, const VDPixmapRegion& region, int x, int y, uint32 color);
+
+void VDPixmapCreateRoundRegion(VDPixmapRegion& dst, float r);
+void VDPixmapConvolveRegion(VDPixmapRegion& dst, const VDPixmapRegion& r1, const VDPixmapRegion& r2);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/resample.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/resample.h
new file mode 100644
index 000000000..12c6f01a2
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/resample.h
@@ -0,0 +1,31 @@
+#ifndef f_VD2_KASUMI_RESAMPLE_H
+#define f_VD2_KASUMI_RESAMPLE_H
+
+#include <vd2/system/vectors.h>
+
+struct VDPixmap;
+
+class IVDPixmapResampler {
+public:
+ enum FilterMode {
+ kFilterPoint,
+ kFilterLinear,
+ kFilterCubic,
+ kFilterLanczos3,
+ kFilterCount
+ };
+
+ virtual ~IVDPixmapResampler() {}
+ virtual void SetSplineFactor(double A) = 0;
+ virtual void SetFilters(FilterMode h, FilterMode v, bool interpolationOnly) = 0;
+ virtual bool Init(uint32 dw, uint32 dh, int dstformat, uint32 sw, uint32 sh, int srcformat) = 0;
+ virtual bool Init(const vdrect32f& dstrect, uint32 dw, uint32 dh, int dstformat, const vdrect32f& srcrect, uint32 sw, uint32 sh, int srcformat) = 0;
+ virtual void Shutdown() = 0;
+
+ virtual void Process(const VDPixmap& dst, const VDPixmap& src) = 0;
+};
+
+IVDPixmapResampler *VDCreatePixmapResampler();
+bool VDPixmapResample(const VDPixmap& dst, const VDPixmap& src, IVDPixmapResampler::FilterMode filter);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/resample_kernels.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/resample_kernels.h
new file mode 100644
index 000000000..a95e9b028
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/resample_kernels.h
@@ -0,0 +1,91 @@
+#ifndef f_VD2_KASUMI_RESAMPLE_KERNELS_H
+#define f_VD2_KASUMI_RESAMPLE_KERNELS_H
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/Kasumi/pixmap.h>
+
+struct VDResamplerAxis {
+ sint32 dx;
+ sint32 u;
+ sint32 dudx;
+ uint32 dx_precopy;
+ uint32 dx_preclip;
+ uint32 dx_active;
+ uint32 dx_postclip;
+ uint32 dx_postcopy;
+ uint32 dx_dualclip;
+
+ void Init(sint32 dudx);
+ void Compute(sint32 count, sint32 u0, sint32 w, sint32 kernel_width);
+};
+
+
+///////////////////////////////////////////////////////////////////////////
+//
+// filter kernels
+//
+///////////////////////////////////////////////////////////////////////////
+
+class IVDResamplerFilter {
+public:
+ virtual ~IVDResamplerFilter() {}
+
+ virtual int GetFilterWidth() const = 0;
+ virtual double EvaluateFilter(double offset) const = 0;
+ virtual void GenerateFilter(float *dst, double offset) const = 0;
+ virtual void GenerateFilterBank(float *dst) const = 0;
+};
+
+class VDResamplerLinearFilter : public IVDResamplerFilter {
+public:
+ VDResamplerLinearFilter(double twofc);
+
+ int GetFilterWidth() const;
+
+ double EvaluateFilter(double offset) const;
+ void GenerateFilter(float *dst, double offset) const;
+ void GenerateFilterBank(float *dst) const;
+
+protected:
+ double mScale;
+ unsigned mTaps;
+};
+
+class VDResamplerCubicFilter : public IVDResamplerFilter {
+public:
+ VDResamplerCubicFilter(double twofc, double A);
+
+ int GetFilterWidth() const;
+
+ double EvaluateFilter(double offset) const;
+ void GenerateFilter(float *dst, double offset) const;
+ void GenerateFilterBank(float *dst) const;
+
+protected:
+ double mScale;
+ double mA0;
+ double mA2;
+ double mA3;
+ double mB0;
+ double mB1;
+ double mB2;
+ double mB3;
+ unsigned mTaps;
+};
+
+class VDResamplerLanczos3Filter : public IVDResamplerFilter {
+public:
+ VDResamplerLanczos3Filter(double twofc);
+
+ int GetFilterWidth() const;
+
+ double EvaluateFilter(double offset) const;
+ void GenerateFilter(float *dst, double offset) const;
+ void GenerateFilterBank(float *dst) const;
+
+protected:
+ double mScale;
+ unsigned mTaps;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/tables.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/tables.h
new file mode 100644
index 000000000..972f37036
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/tables.h
@@ -0,0 +1,41 @@
+// VirtualDub - Video processing and capture application
+// Graphics support library
+// Copyright (C) 1998-2008 Avery Lee
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#ifndef f_VD2_KASUMI_TABLES_H
+#define f_VD2_KASUMI_TABLES_H
+
+///////////////////////////////////////////////////////////////////////////////
+// Cubic interpolation tables
+//
+// These tables give coefficients for 1-D cubic interpolation with 8-bit
+// subunit precision. The [0] entry is positioned exactly on top of the
+// second sample, and the [255] entry is 255/256th of the way to the third
+// sample. The cardinal spline constant is -0.75 and the output range is
+// [-0.1875, 1.1875], where the maximum overshoot and undershoot occur at
+// the midpoint.
+//
+// The first and fourth coefficients are always negative; the second and
+// third coefficients are always positive.
+//
+extern "C" const sint32 kVDCubicInterpTableFX14_075[256][4];
+
+#ifdef _M_IX86
+ extern "C" const sint16 kVDCubicInterpTableFX14_075_MMX[256][8];
+#endif
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/text.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/text.h
new file mode 100644
index 000000000..245d38f12
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/text.h
@@ -0,0 +1,62 @@
+// VirtualDub - Video processing and capture application
+// Graphics support library
+// Copyright (C) 1998-2007 Avery Lee
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#ifndef f_VD2_KASUMI_TEXT_H
+#define f_VD2_KASUMI_TEXT_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <vd2/system/vectors.h>
+
+class VDPixmapPathRasterizer;
+
+struct VDOutlineFontGlyphInfo {
+ uint16 mPointArrayStart; // start of points (encoded as 8:8)
+ uint16 mCommandArrayStart; // start of commands (encoded as 6:2 RLE).
+ sint16 mAWidth; // advance from start to character cell
+ sint16 mBWidth; // width of character cell
+ sint16 mCWidth; // advance from character cell to end
+};
+
+struct VDOutlineFontInfo {
+ const uint16 *mpPointArray;
+ const uint8 *mpCommandArray;
+ const VDOutlineFontGlyphInfo *mpGlyphArray;
+ int mStartGlyph;
+ int mEndGlyph;
+ int mMinX;
+ int mMinY;
+ int mMaxX;
+ int mMaxY;
+ int mEmSquare;
+ int mAscent;
+ int mDescent;
+ int mLineGap;
+};
+
+struct VDTextLayoutMetrics {
+ vdrect32f mExtents;
+ float mAdvance;
+};
+
+void VDPixmapGetTextExtents(const VDOutlineFontInfo *font, float size, const char *pText, VDTextLayoutMetrics& out_Metrics);
+void VDPixmapConvertTextToPath(VDPixmapPathRasterizer& rast, const VDOutlineFontInfo *font, float size, float x, float y, const char *pText, const float transform[2][2] = NULL);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/Kasumi/triblt.h b/src/thirdparty/VirtualDub/h/vd2/Kasumi/triblt.h
new file mode 100644
index 000000000..4602cd883
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/Kasumi/triblt.h
@@ -0,0 +1,71 @@
+// VirtualDub - Video processing and capture application
+// Graphics support library
+// Copyright (C) 1998-2008 Avery Lee
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#ifndef f_VD2_KASUMI_TRIBLT_H
+#define f_VD2_KASUMI_TRIBLT_H
+
+#include <vd2/system/vdstl.h>
+#include <vd2/Kasumi/pixmaputils.h>
+#include <vector>
+
+struct VDTriBltVertex {
+ float x, y, z, u, v;
+};
+
+struct VDTriColorVertex {
+ float x, y, z, r, g, b, a;
+};
+
+enum VDTriBltFilterMode {
+ kTriBltFilterPoint,
+ kTriBltFilterBilinear,
+ kTriBltFilterTrilinear,
+ kTriBltFilterBicubicMipLinear,
+ kTriBltFilterCount
+};
+
+bool VDPixmapTriFill(VDPixmap& dst, uint32 c,
+ const VDTriBltVertex *pVertices, int nVertices,
+ const int *pIndices, const int nIndices,
+ const float pTransform[16] = NULL);
+
+bool VDPixmapTriFill(VDPixmap& dst,
+ const VDTriColorVertex *pVertices, int nVertices,
+ const int *pIndices, const int nIndices,
+ const float pTransform[16] = NULL);
+
+bool VDPixmapTriBlt(VDPixmap& dst, const VDPixmap *const *pSources, int nMipmaps,
+ const VDTriBltVertex *pVertices, int nVertices,
+ const int *pIndices, const int nIndices,
+ VDTriBltFilterMode filterMode,
+ float mipMapLODBias,
+ const float pTransform[16] = NULL);
+
+class VDPixmapTextureMipmapChain {
+public:
+ VDPixmapTextureMipmapChain(const VDPixmap& src, bool wrap=false, bool cubic = false, int maxlevels = 16);
+
+ const VDPixmap *const *Mips() const { return mMipMaps.data(); }
+ int Levels() const { return mMipMaps.size(); }
+
+protected:
+ std::vector<VDPixmapBuffer> mBuffers;
+ vdfastvector<const VDPixmap *> mMipMaps;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/Error.h b/src/thirdparty/VirtualDub/h/vd2/system/Error.h
new file mode 100644
index 000000000..22f15ede3
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/Error.h
@@ -0,0 +1,119 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_ERROR_H
+#define f_VD2_ERROR_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+
+class MyError;
+
+///////////////////////////////////////////////////////////////////////////
+// IVDAsyncErrorCallback
+//
+class IVDAsyncErrorCallback {
+public:
+ virtual bool OnAsyncError(MyError& e) = 0;
+};
+
+///////////////////////////////////////////////////////////////////////////
+// MyError
+//
+class MyError {
+private:
+ const MyError& operator=(const MyError&); // protect against accidents
+
+protected:
+ char *buf;
+
+public:
+ MyError();
+ MyError(const MyError& err);
+ MyError(const char *f, ...);
+ ~MyError();
+ void clear();
+ void assign(const MyError& e);
+ void assign(const char *s);
+ void setf(const char *f, ...);
+ void vsetf(const char *f, va_list val);
+ void post(struct HWND__ *hWndParent, const char *title) const;
+ char *gets() const {
+ return buf;
+ }
+ char *c_str() const {
+ return buf;
+ }
+ bool empty() const { return !buf; }
+ void discard();
+ void swap(MyError& err);
+ void TransferFrom(MyError& err);
+};
+
+class MyICError : public MyError {
+public:
+ MyICError(const char *s, uint32 icErr);
+ MyICError(uint32 icErr, const char *format, ...);
+};
+
+class MyMMIOError : public MyError {
+public:
+ MyMMIOError(const char *s, uint32 icErr);
+};
+
+class MyAVIError : public MyError {
+public:
+ MyAVIError(const char *s, uint32 aviErr);
+};
+
+class MyMemoryError : public MyError {
+public:
+ MyMemoryError();
+};
+
+class MyWin32Error : public MyError {
+public:
+ MyWin32Error(const char *format, uint32 err, ...);
+};
+
+class MyCrashError : public MyError {
+public:
+ MyCrashError(const char *format, uint32 dwExceptionCode);
+};
+
+class MyUserAbortError : public MyError {
+public:
+ MyUserAbortError();
+};
+
+class MyInternalError : public MyError {
+public:
+ MyInternalError(const char *format, ...);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/Fraction.h b/src/thirdparty/VirtualDub/h/vd2/system/Fraction.h
new file mode 100644
index 000000000..742533635
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/Fraction.h
@@ -0,0 +1,95 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_FRACTION_H
+#define f_VD2_SYSTEM_FRACTION_H
+
+#include <vd2/system/vdtypes.h>
+
+class VDFraction {
+friend VDFraction operator*(unsigned long b, const VDFraction f);
+friend VDFraction operator*(int b, const VDFraction f);
+private:
+ unsigned long hi, lo;
+
+ static VDFraction reduce(uint64 hi, uint64 lo);
+
+public:
+ VDFraction() {}
+ explicit VDFraction(int i) : hi(i), lo(1) {}
+ explicit VDFraction(unsigned long i) : hi(i), lo(1) { }
+ explicit VDFraction(unsigned long i, unsigned long j) : hi(i), lo(j) {}
+ explicit VDFraction(double d);
+
+ bool operator<(VDFraction b) const;
+ bool operator<=(VDFraction b) const;
+ bool operator>(VDFraction b) const;
+ bool operator>=(VDFraction b) const;
+ bool operator==(VDFraction b) const;
+ bool operator!=(VDFraction b) const;
+
+ VDFraction operator*(VDFraction b) const;
+ VDFraction operator/(VDFraction b) const;
+
+ VDFraction operator*(unsigned long b) const;
+ VDFraction operator/(unsigned long b) const;
+
+ VDFraction& operator*=(VDFraction b);
+ VDFraction& operator/=(VDFraction b);
+ VDFraction& operator*=(unsigned long b);
+ VDFraction& operator/=(unsigned long b);
+
+ void Assign(unsigned long n, unsigned long d) {
+ hi = n;
+ lo = d;
+ }
+
+ sint64 scale64t(sint64) const;
+ sint64 scale64r(sint64) const;
+ sint64 scale64u(sint64) const;
+ sint64 scale64it(sint64) const;
+ sint64 scale64ir(sint64) const;
+ sint64 scale64iu(sint64) const;
+
+ double asDouble() const;
+ double AsInverseDouble() const;
+
+ unsigned long roundup32ul() const;
+
+ unsigned long getHi() const { return hi; }
+ unsigned long getLo() const { return lo; }
+
+ VDFraction reduce() const { return reduce(hi, lo); }
+
+ bool Parse(const char *s);
+
+ static inline VDFraction reduce64(sint64 hi, sint64 lo) { return reduce(hi, lo); }
+};
+
+inline VDFraction operator*(unsigned long b, const VDFraction f) { return f*b; }
+
+typedef VDFraction Fraction;
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/VDNamespace.h b/src/thirdparty/VirtualDub/h/vd2/system/VDNamespace.h
new file mode 100644
index 000000000..c0f0d4141
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/VDNamespace.h
@@ -0,0 +1,157 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+#ifndef f_SYSTEM_VDNAMESPACE_H
+#define f_SYSTEM_VDNAMESPACE_H
+
+#include <vd2/system/list.h>
+
+class VDNamespaceNode;
+class VDNamespaceGroup;
+class VDNamespaceItem;
+class VDNamespace;
+template <class T> class VDNamespace2;
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Node: Any item in the namespace.
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDNamespaceNode {
+public:
+ const char *pszName;
+ VDNamespaceGroup *const pParent;
+
+ VDNamespaceNode(const char *name, VDNamespaceGroup *parent) : pszName(name), pParent(parent) { }
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Group: Holds items.
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDNamespaceGroup : public VDNamespaceNode, public ListNode2<VDNamespaceGroup> {
+public:
+ ListAlloc<VDNamespaceItem> listItems;
+ ListAlloc<VDNamespaceGroup> listGroups;
+
+ const char *namedup(const char *s);
+
+ VDNamespaceGroup(const char *_pszName, VDNamespaceGroup *parent);
+ ~VDNamespaceGroup();
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Item class
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDNamespaceItem : public VDNamespaceNode, public ListNode2<VDNamespaceItem> {
+public:
+ const void *object;
+
+ VDNamespaceItem(const char *_pszName, VDNamespaceGroup *parent, const void *src);
+ ~VDNamespaceItem();
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Namespace class
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDNamespace {
+protected:
+ VDNamespaceGroup root;
+
+ VDNamespaceGroup *_lookupGroup(const char *pszName, bool fCreate, bool fIsFilter);
+ VDNamespaceItem *_findItemByObject(const VDNamespaceGroup *pGroup, const void *pObj);
+ bool _getPathByItem(const VDNamespaceNode *pEntry, char *buf, int maxlen);
+
+public:
+
+ VDNamespace();
+ ~VDNamespace();
+
+ typedef bool (*tGroupEnumerator)(VDNamespace *pThis, const char *pszName, const VDNamespaceGroup *pGroup, void *pvData);
+ typedef bool (*tItemEnumerator)(VDNamespace *pThis, const char *pszName, const void *pItem, void *pvData);
+
+ void clear();
+ void add(const char *pszGroup, const char *pszName, const void *pDef);
+ const void *lookup(const char *pszName);
+
+ bool enumerateGroups(const VDNamespaceGroup *pGroupRoot, tGroupEnumerator pEnum, void *pvData);
+ bool enumerateItems(const VDNamespaceGroup *pGroupRoot, tItemEnumerator pEnum, void *pvData);
+
+ bool getPathByItem(const void *pObj, char *buf, int maxlen);
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Templated Namespace class
+//
+///////////////////////////////////////////////////////////////////////////
+
+template <class T>
+class VDNamespace2 : public VDNamespace {
+public:
+ VDNamespace2() {}
+ ~VDNamespace2() {}
+
+ typedef bool (*tGroupEnumerator)(VDNamespace2<T> *pThis, const char *pszName, const VDNamespaceGroup *pGroup, void *pvData);
+ typedef bool (*tItemEnumerator)(VDNamespace2<T> *pThis, const char *pszName, const T *pItem, void *pvData);
+
+ void add(const char *pszGroup, const char *pszName, const T *pDef) {
+ VDNamespace::add(pszGroup, pszName, pDef);
+ }
+
+ const T *lookup(const char *pszName) {
+ return static_cast<const T *>(VDNamespace::lookup(pszName));
+ }
+
+ bool enumerateGroups(const VDNamespaceGroup *pGroupRoot, tGroupEnumerator pEnum, void *pvData) {
+ for(ListAlloc<VDNamespaceGroup>::fwit it = (pGroupRoot ? pGroupRoot : &root)->listGroups.begin(); it; ++it)
+ if (!pEnum(this, it->pszName, it, pvData))
+ return false;
+
+ return true;
+ }
+
+ bool enumerateItems(const VDNamespaceGroup *pGroupRoot, tItemEnumerator pEnum, void *pvData) {
+ for(ListAlloc<VDNamespaceItem>::fwit it = (pGroupRoot ? pGroupRoot : &root)->listItems.begin(); it; ++it)
+ if (!pEnum(this, it->pszName, static_cast<const T *>(it->object), pvData))
+ return false;
+
+ return true;
+ }
+
+ bool getPathByItem(const T *pObj, char *buf, int maxlen) {
+ return VDNamespace::getPathByItem(pObj, buf, maxlen);
+ }
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/VDQueue.h b/src/thirdparty/VirtualDub/h/vd2/system/VDQueue.h
new file mode 100644
index 000000000..43367d287
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/VDQueue.h
@@ -0,0 +1,90 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_VDQUEUE_H
+#define f_VD2_SYSTEM_VDQUEUE_H
+
+#include <vd2/system/List.h>
+
+template<class T>
+class VDQueueNode : public ListNode2< VDQueueNode<T> > {
+public:
+ T t;
+ VDQueueNode(const T& t2) : t(t2) {}
+};
+
+template<class T>
+class VDQueue {
+public:
+ ListAlloc< VDQueueNode<T> > list;
+
+ VDQueue<T>();
+ ~VDQueue<T>();
+ T Pop();
+ T Peek();
+ void Push(const T&);
+ bool isEmpty() { return list.IsEmpty(); }
+};
+
+template<class T>
+VDQueue<T>::VDQueue<T>() {
+}
+
+template<class T>
+VDQueue<T>::~VDQueue<T>() {
+ while(!list.IsEmpty())
+ delete list.RemoveTail();
+}
+
+template<class T>
+T VDQueue<T>::Peek() {
+ return list.AtHead()->t;
+}
+
+template<class T>
+T VDQueue<T>::Pop() {
+ return list.RemoveHead()->t;
+}
+
+template<class T>
+void VDQueue<T>::Push(const T& t) {
+ list.AddTail(new VDQueueNode<T>(t));
+}
+
+/////////////
+
+template<class T>
+class VDQueueAlloc : public VDQueue<T> {
+public:
+ ~VDQueueAlloc();
+};
+
+template<class T>
+VDQueueAlloc<T>::~VDQueueAlloc() {
+ for(ListAlloc< VDQueueNode<T> >::fwit it = list.begin(); it; ++it)
+ delete &*it;
+}
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/VDRingBuffer.h b/src/thirdparty/VirtualDub/h/vd2/system/VDRingBuffer.h
new file mode 100644
index 000000000..f0c7806a0
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/VDRingBuffer.h
@@ -0,0 +1,301 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_SYSTEM_VDRINGBUFFER_H
+#define f_SYSTEM_VDRINGBUFFER_H
+
+#include <string.h>
+#include <utility>
+
+#include <vd2/system/atomic.h>
+
+class VDRingBufferBase {
+public:
+ VDRingBufferBase()
+ : nSize(0)
+ , nReadPoint(0)
+ , nWritePoint(0)
+ {
+ }
+
+ int getSize() const { return nSize; }
+ int getReadOffset() const { return nReadPoint; }
+ int getWriteOffset() const { return nWritePoint; }
+
+protected:
+ int nSize;
+ int nReadPoint;
+ int nWritePoint;
+};
+
+template<class T, class Allocator = std::allocator<T> >
+class VDRingBuffer : public VDRingBufferBase, private Allocator {
+protected:
+ T *pBuffer;
+ VDAtomicInt nLevel;
+
+public:
+ VDRingBuffer();
+ VDRingBuffer(int size);
+ ~VDRingBuffer();
+
+ void Init(int size);
+ void Shutdown();
+
+ int getLevel() const { return nLevel; }
+ int getSpace() const { return nSize - nLevel; }
+ int getWriteSpace() const;
+ T * getWritePtr() const { return pBuffer+nWritePoint; }
+
+ int size() const { return nSize; }
+ bool empty() const { return !nLevel; }
+ bool full() const { return nLevel == nSize; }
+
+ void Flush() { nReadPoint = nWritePoint = nLevel = 0; }
+
+ int Read(T *pBuffer, int bytes);
+ const T *LockRead(int requested, int& actual);
+ const T *LockReadAll(int& actual);
+ const T *LockReadWrapped(int requested, int& actual, int& nReadPoint);
+ const T *LockReadAllWrapped(int& actual, int& nReadPoint);
+ int UnlockRead(int actual);
+
+ int Write(const T *pData, int bytes);
+ T *LockWrite(int requested, int& actual);
+ T *LockWriteAll(int& actual);
+ int UnlockWrite(int actual);
+};
+
+template<class T, class Allocator>
+VDRingBuffer<T, Allocator>::VDRingBuffer(int size)
+ : pBuffer(NULL)
+{
+ Init(size);
+}
+
+template<class T, class Allocator>
+VDRingBuffer<T, Allocator>::VDRingBuffer()
+ : pBuffer(NULL)
+ , nLevel(0)
+{
+}
+
+template<class T, class Allocator>
+VDRingBuffer<T, Allocator>::~VDRingBuffer() {
+ Shutdown();
+}
+
+template<class T, class Allocator>
+void VDRingBuffer<T, Allocator>::Init(int size) {
+ Shutdown();
+ pBuffer = allocate(nSize = size, 0);
+ nLevel = 0;
+ nReadPoint = 0;
+ nWritePoint = 0;
+}
+
+template<class T, class Allocator>
+void VDRingBuffer<T, Allocator>::Shutdown() {
+ if (pBuffer) {
+ deallocate(pBuffer, nSize);
+ pBuffer = NULL;
+ }
+}
+
+template<class T, class Allocator>
+int VDRingBuffer<T, Allocator>::getWriteSpace() const {
+ volatile int tc = nSize - nWritePoint;
+ volatile int space = nSize - nLevel;
+
+ if (tc > space)
+ tc = space;
+
+ return tc;
+}
+
+template<class T, class Allocator>
+int VDRingBuffer<T, Allocator>::Read(T *pBuffer, int units) {
+ VDASSERT(units >= 0);
+
+ int actual = 0;
+ const T *pSrc;
+
+ while(units) {
+ int tc;
+
+ pSrc = LockRead(units, tc);
+
+ if (!tc)
+ break;
+
+ memcpy(pBuffer, pSrc, tc * sizeof(T));
+
+ UnlockRead(tc);
+
+ actual += tc;
+ units -= tc;
+ pBuffer += tc;
+ }
+
+ return actual;
+}
+
+template<class T, class Allocator>
+const T *VDRingBuffer<T, Allocator>::LockRead(int requested, int& actual) {
+ VDASSERT(requested >= 0);
+
+ int nLevelNow = nLevel;
+
+ if (requested > nLevelNow)
+ requested = nLevelNow;
+
+ if (requested + nReadPoint > nSize)
+ requested = nSize - nReadPoint;
+
+ actual = requested;
+
+ return pBuffer + nReadPoint;
+}
+
+template<class T, class Allocator>
+const T *VDRingBuffer<T, Allocator>::LockReadAll(int& actual) {
+ int requested = nLevel;
+
+ if (requested + nReadPoint > nSize)
+ requested = nSize - nReadPoint;
+
+ actual = requested;
+
+ return pBuffer + nReadPoint;
+}
+
+template<class T, class Allocator>
+const T *VDRingBuffer<T, Allocator>::LockReadWrapped(int requested, int& actual, int& readpt) {
+ int nLevelNow = nLevel;
+
+ if (requested > nLevelNow)
+ requested = nLevelNow;
+
+ actual = requested;
+ readpt = nReadPoint;
+
+ return pBuffer;
+}
+
+template<class T, class Allocator>
+const T *VDRingBuffer<T, Allocator>::LockReadAllWrapped(int& actual, int& readpt) {
+ int requested = nLevel;
+
+ actual = requested;
+ readpt = nReadPoint;
+
+ return pBuffer;
+}
+
+template<class T, class Allocator>
+int VDRingBuffer<T, Allocator>::UnlockRead(int actual) {
+ VDASSERT(actual >= 0);
+ VDASSERT(nLevel >= actual);
+
+ int newpt = nReadPoint + actual;
+
+ if (newpt >= nSize)
+ newpt -= nSize;
+
+ nReadPoint = newpt;
+
+ return nLevel.add(-actual);
+}
+
+template<class T, class Allocator>
+int VDRingBuffer<T, Allocator>::Write(const T *src, int elements) {
+ VDASSERT(elements >= 0);
+
+ int actual = 0;
+ while(elements) {
+ int tc;
+ void *dst = LockWrite(elements, tc);
+
+ if (!tc)
+ break;
+
+ memcpy(dst, src, tc*sizeof(T));
+
+ UnlockWrite(tc);
+
+ actual += tc;
+ elements -= tc;
+ src += tc;
+ }
+
+ return actual;
+}
+
+template<class T, class Allocator>
+T *VDRingBuffer<T, Allocator>::LockWrite(int requested, int& actual) {
+ VDASSERT(requested >= 0);
+ int nLevelNow = nSize - nLevel;
+
+ if (requested > nLevelNow)
+ requested = nLevelNow;
+
+ if (requested + nWritePoint > nSize)
+ requested = nSize - nWritePoint;
+
+ actual = requested;
+
+ return pBuffer + nWritePoint;
+}
+
+template<class T, class Allocator>
+T *VDRingBuffer<T, Allocator>::LockWriteAll(int& actual) {
+ int requested = nSize - nLevel;
+
+ if (requested + nWritePoint > nSize)
+ requested = nSize - nWritePoint;
+
+ actual = requested;
+
+ return pBuffer + nWritePoint;
+}
+
+template<class T, class Allocator>
+int VDRingBuffer<T, Allocator>::UnlockWrite(int actual) {
+ VDASSERT(actual >= 0);
+ VDASSERT(nLevel + actual <= nSize);
+
+ int newpt = nWritePoint + actual;
+
+ if (newpt >= nSize)
+ newpt = 0;
+
+ nWritePoint = newpt;
+
+ return nLevel.add(actual);
+}
+
+
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/VDScheduler.h b/src/thirdparty/VirtualDub/h/vd2/system/VDScheduler.h
new file mode 100644
index 000000000..e88fb6c6f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/VDScheduler.h
@@ -0,0 +1,125 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_VDSCHEDULER_H
+#define f_VD2_SYSTEM_VDSCHEDULER_H
+
+#include <vd2/system/vdstl.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/error.h>
+
+class VDSchedulerNode;
+class VDSchedulerSuspendNode;
+class VDSignal;
+class IVDAsyncErrorCallback;
+
+class VDScheduler {
+public:
+ VDScheduler();
+ ~VDScheduler();
+
+ void setSignal(VDSignal *);
+ VDSignal *getSignal() { return pWakeupSignal; }
+ void setSchedulerNode(VDSchedulerNode *pSchedulerNode);
+
+ IVDAsyncErrorCallback *getErrorCallback() const { return mpErrorCB; }
+ void setErrorCallback(IVDAsyncErrorCallback *pCB) { mpErrorCB = pCB; }
+
+ bool isShuttingDown() const { return mbExitThreads; }
+
+ void BeginShutdown(); ///< Start signaling scheduling threads to exit.
+
+ bool Run();
+ bool IdleWait(); ///< Wait because no nodes are ready. Returns false if a thread should exit immediately.
+ void Ping(); ///< Restart a scheduler thread. This is required when a scheduler thread leaves.
+ void Lock();
+ void Unlock();
+ void Reschedule(VDSchedulerNode *); ///< Move node to Ready if Waiting.
+ void RescheduleFast(VDSchedulerNode *); ///< Same as Reschedule(), but assumes the scheduler is already locked.
+ void Add(VDSchedulerNode *pNode); ///< Add node to scheduler.
+ void Remove(VDSchedulerNode *pNode); ///< Remove node from scheduler.
+ void DumpStatus();
+
+protected:
+ void Repost(VDSchedulerNode *, bool);
+
+ VDCriticalSection csScheduler;
+ IVDAsyncErrorCallback *mpErrorCB;
+ VDSignal *pWakeupSignal;
+ volatile bool mbExitThreads;
+ VDSchedulerNode *pParentSchedulerNode;
+
+ typedef vdlist<VDSchedulerNode> tNodeList;
+ tNodeList listWaiting, listReady;
+
+ typedef vdlist<VDSchedulerSuspendNode> tSuspendList;
+ tSuspendList listSuspends;
+};
+
+class VDSchedulerNode : public vdlist<VDSchedulerNode>::node {
+friend class VDScheduler;
+public:
+ int nPriority;
+
+ VDSchedulerNode() : nPriority(0) {}
+
+ virtual bool Service()=0;
+
+ virtual void DumpStatus();
+
+ void Reschedule() { pScheduler->Reschedule(this); }
+ void RemoveFromScheduler() { pScheduler->Remove(this); }
+
+protected:
+ VDScheduler *pScheduler;
+ volatile bool bRunning;
+ volatile bool bReschedule;
+ volatile bool bReady;
+ volatile bool bCondemned;
+};
+
+class VDSchedulerSuspendNode : public vdlist<VDSchedulerSuspendNode>::node {
+public:
+ VDSchedulerSuspendNode(VDSchedulerNode *pNode) : mpNode(pNode) {}
+
+ VDSchedulerNode *mpNode;
+ VDSignal mSignal;
+};
+
+class VDSchedulerThread : public VDThread {
+public:
+ VDSchedulerThread();
+ ~VDSchedulerThread();
+
+ bool Start(VDScheduler *pScheduler);
+
+protected:
+ void ThreadRun();
+
+ VDScheduler *mpScheduler;
+ uint32 mAffinity;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/VDString.h b/src/thirdparty/VirtualDub/h/vd2/system/VDString.h
new file mode 100644
index 000000000..58955384e
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/VDString.h
@@ -0,0 +1,1134 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_VDSTRING_H
+#define f_VD2_SYSTEM_VDSTRING_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <functional>
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/text.h>
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDStringSpanA {
+public:
+ typedef char value_type;
+ typedef uint32 size_type;
+ typedef ptrdiff_t difference_type;
+ typedef value_type& reference;
+ typedef const value_type& const_reference;
+ typedef value_type * pointer;
+ typedef const value_type * const_pointer;
+ typedef pointer iterator;
+ typedef const_pointer const_iterator;
+
+ static const size_type npos = (size_type)-1;
+
+ VDStringSpanA()
+ : mpBegin(const_cast<value_type *>(sNull))
+ , mpEnd(const_cast<value_type *>(sNull))
+ {
+ }
+
+ explicit VDStringSpanA(const value_type *s)
+ : mpBegin(const_cast<value_type *>(s))
+ , mpEnd(const_cast<value_type *>(s) + strlen(s))
+ {
+ }
+
+ VDStringSpanA(const value_type *s, const value_type *t)
+ : mpBegin(const_cast<value_type *>(s))
+ , mpEnd(const_cast<value_type *>(t))
+ {
+ }
+
+ // 21.3.2 iterators
+ const_iterator begin() const { return mpBegin; }
+ const_iterator end() const { return mpEnd; }
+
+ // 21.3.3 capacity
+ size_type size() const { return mpEnd - mpBegin; }
+ size_type length() const { return mpEnd - mpBegin; }
+ bool empty() const { return mpBegin == mpEnd; }
+
+ // 21.3.4 element access
+ const_reference operator[](size_type pos) const { VDASSERT(pos < (size_type)(mpEnd - mpBegin)); return mpBegin[pos]; }
+ const_reference at(size_type pos) const { VDASSERT(pos < (size_type)(mpEnd - mpBegin)); return mpBegin[pos]; }
+
+ const_reference front() const { VDASSERT(mpBegin != mpEnd); return *mpBegin; }
+ const_reference back() const { VDASSERT(mpBegin != mpEnd); return mpEnd[-1]; }
+
+ // 21.3.6 string operations
+ const_pointer data() const { return mpBegin; }
+
+ size_type copy(value_type *dst, size_type n, size_type pos = 0) const {
+ size_type len = (size_type)(mpEnd - mpBegin);
+ VDASSERT(pos <= len);
+
+ len -= pos;
+ if (n > len)
+ n = len;
+
+ memcpy(dst, mpBegin + pos, n*sizeof(value_type));
+ return n;
+ }
+
+ size_type find(value_type c, size_type pos = 0) const {
+ VDASSERT(pos <= (size_type)(mpEnd - mpBegin));
+ const void *p = memchr(mpBegin + pos, c, mpEnd - (mpBegin + pos));
+
+ return p ? (const value_type *)p - mpBegin : npos;
+ }
+
+ int compare(const VDStringSpanA& s) const {
+ size_type l1 = mpEnd - mpBegin;
+ size_type l2 = s.mpEnd - s.mpBegin;
+ size_type lm = l1 < l2 ? l1 : l2;
+
+ int r = memcmp(mpBegin, s.mpBegin, lm);
+
+ if (!r)
+ r = (int)mpBegin[lm] - (int)s.mpBegin[lm];
+
+ return r;
+ }
+
+ const VDStringSpanA trim(const value_type *s) const {
+ bool flags[256]={false};
+
+ while(value_type c = *s++)
+ flags[(unsigned char)c] = true;
+
+ const value_type *p = mpBegin;
+ const value_type *q = mpEnd;
+
+ while(p != q && flags[*p])
+ ++p;
+
+ while(p != q && flags[q[-1]])
+ --q;
+
+ return VDStringSpanA(p, q);
+ }
+
+ const VDStringSpanA subspan(size_type pos = 0, size_type n = npos) const {
+
+ size_type len = (size_type)(mpEnd - mpBegin);
+ VDASSERT(pos <= len);
+
+ len -= pos;
+ if (n > len)
+ n = len;
+
+ value_type *p = mpBegin + pos;
+ return VDStringSpanA(p, p+n);
+ }
+
+protected:
+ friend bool operator==(const VDStringSpanA& x, const VDStringSpanA& y);
+ friend bool operator==(const VDStringSpanA& x, const char *y);
+
+ value_type *mpBegin;
+ value_type *mpEnd;
+
+ static const value_type sNull[1];
+};
+
+inline bool operator==(const VDStringSpanA& x, const VDStringSpanA& y) { VDStringSpanA::size_type len = (VDStringSpanA::size_type)(x.mpEnd - x.mpBegin); return len == (VDStringSpanA::size_type)(y.mpEnd - y.mpBegin) && !memcmp(x.mpBegin, y.mpBegin, len*sizeof(char)); }
+inline bool operator==(const VDStringSpanA& x, const char *y) { size_t len = strlen(y); return len == (size_t)(x.mpEnd - x.mpBegin) && !memcmp(x.mpBegin, y, len*sizeof(char)); }
+inline bool operator==(const char *x, const VDStringSpanA& y) { return y == x; }
+
+inline bool operator!=(const VDStringSpanA& x, const VDStringSpanA& y) { return !(x == y); }
+inline bool operator!=(const VDStringSpanA& x, const char *y) { return !(x == y); }
+inline bool operator!=(const char *x, const VDStringSpanA& y) { return !(y == x); }
+
+inline bool operator<(const VDStringSpanA& x, const VDStringSpanA& y) {
+ return x.compare(y) < 0;
+}
+
+inline bool operator>(const VDStringSpanA& x, const VDStringSpanA& y) {
+ return x.compare(y) > 0;
+}
+
+inline bool operator<=(const VDStringSpanA& x, const VDStringSpanA& y) {
+ return x.compare(y) <= 0;
+}
+
+inline bool operator>=(const VDStringSpanA& x, const VDStringSpanA& y) {
+ return x.compare(y) >= 0;
+}
+
+class VDStringRefA : public VDStringSpanA {
+public:
+ typedef VDStringRefA this_type;
+
+ VDStringRefA() {
+ }
+
+ explicit VDStringRefA(const value_type *s)
+ : VDStringSpanA(s)
+ {
+ }
+
+ explicit VDStringRefA(const VDStringSpanA& s)
+ : VDStringSpanA(s)
+ {
+ }
+
+ VDStringRefA(const value_type *s, const value_type *t)
+ : VDStringSpanA(s, t)
+ {
+ }
+
+ this_type& operator=(const value_type *s) {
+ assign(s);
+ return *this;
+ }
+
+ this_type& operator=(const VDStringSpanA& str) {
+ assign(str);
+ return *this;
+ }
+
+ void assign(const value_type *s) {
+ static_cast<VDStringSpanA&>(*this) = VDStringSpanA(s);
+ }
+
+ void assign(const value_type *s, const value_type *t) {
+ static_cast<VDStringSpanA&>(*this) = VDStringSpanA(s, t);
+ }
+
+ void assign(const VDStringSpanA& s) {
+ static_cast<VDStringSpanA&>(*this) = s;
+ }
+
+ bool split(value_type c, VDStringRefA& token) {
+ size_type pos = find(c);
+
+ if (pos == npos)
+ return false;
+
+ token = subspan(0, pos);
+ mpBegin += pos+1;
+ return true;
+ }
+};
+
+class VDStringA : public VDStringSpanA {
+public:
+ typedef VDStringA this_type;
+
+ // 21.3.1 construct/copy/destroy
+
+ VDStringA()
+ : mpEOS(const_cast<value_type *>(sNull))
+ {
+ }
+
+ VDStringA(const VDStringSpanA& x)
+ : mpEOS(const_cast<value_type *>(sNull))
+ {
+ assign(x.begin(), x.end());
+ }
+
+ VDStringA(const this_type& x)
+ : mpEOS(const_cast<value_type *>(sNull))
+ {
+ assign(x);
+ }
+
+ explicit VDStringA(const value_type *s)
+ : mpEOS(const_cast<value_type *>(sNull))
+ {
+ assign(s);
+ }
+
+ explicit VDStringA(size_type n)
+ : mpEOS(const_cast<value_type *>(sNull))
+ {
+ resize(n);
+ }
+
+ VDStringA(const value_type *s, size_type n)
+ : mpEOS(const_cast<value_type *>(sNull))
+ {
+ assign(s, n);
+ }
+
+ VDStringA(const value_type *s, const value_type *t)
+ : mpEOS(const_cast<value_type *>(sNull))
+ {
+ assign(s, t);
+ }
+
+ ~VDStringA() {
+ if (mpBegin != sNull)
+ delete[] mpBegin;
+ }
+
+ this_type& operator=(const value_type *s) {
+ assign(s);
+ return *this;
+ }
+
+ this_type& operator=(const this_type& str) {
+ assign(str);
+ return *this;
+ }
+
+ this_type& operator=(const VDStringSpanA& str) {
+ assign(str);
+ return *this;
+ }
+
+ // 21.3.2 iterators
+ using VDStringSpanA::begin;
+ using VDStringSpanA::end;
+
+ iterator begin() { return mpBegin; }
+ iterator end() { return mpEnd; }
+
+ // 21.3.3 capacity (COMPLETE)
+ void resize(size_type n) {
+ size_type current = (size_type)(mpEnd - mpBegin);
+
+ if (n < current) {
+ mpEnd = mpBegin + n;
+ mpEnd[0] = 0;
+ } else if (n > current)
+ resize_slow(n, current);
+ }
+
+ void resize(size_type n, value_type v) {
+ size_type current = (size_type)(mpEnd - mpBegin);
+
+ if (n < current) {
+ mpEnd = mpBegin + n;
+ mpEnd[0] = 0;
+ } else if (n > current)
+ resize_slow(n, current, v);
+ }
+
+ size_type capacity() const { return mpEOS - mpBegin; }
+
+ void reserve(size_t n) {
+ size_type current = (size_type)(mpEOS - mpBegin);
+
+ if (n > current)
+ reserve_slow(n, current);
+ }
+
+ void clear() {
+ if (mpEnd != mpBegin) {
+ mpEnd = mpBegin;
+ mpEnd[0] = 0;
+ }
+ }
+
+ // 21.3.4 element access
+ using VDStringSpanA::operator[];
+ using VDStringSpanA::at;
+ using VDStringSpanA::front;
+ using VDStringSpanA::back;
+
+ reference operator[](size_type pos) { VDASSERT(pos < (size_type)(mpEnd - mpBegin)); return mpBegin[pos]; }
+ reference at(size_type pos) { VDASSERT(pos < (size_type)(mpEnd - mpBegin)); return mpBegin[pos]; }
+ reference front() { VDASSERT(mpBegin != mpEnd); return *mpBegin; }
+ reference back() { VDASSERT(mpBegin != mpEnd); return mpEnd[-1]; }
+
+ // 21.3.5 modifiers
+ this_type& operator+=(const this_type& str) {
+ return append(str.mpBegin, str.mpEnd);
+ }
+
+ this_type& operator+=(const value_type *s) {
+ return append(s, s+strlen(s));
+ }
+
+ this_type& operator+=(value_type c) {
+ if (mpEnd == mpEOS)
+ push_back_extend();
+
+ *mpEnd++ = c;
+ *mpEnd = 0;
+ return *this;
+ }
+
+ this_type& append(const this_type& str) {
+ return append(str.mpBegin, str.mpEnd);
+ }
+
+ this_type& append(const this_type& str, size_type pos, size_type n) {
+ size_type len = (size_type)(str.mpEnd - str.mpBegin);
+ VDASSERT(pos <= len);
+
+ len -= pos;
+ if (n > len)
+ n = len;
+
+ return append(str.mpBegin + pos, str.mpBegin + pos + n);
+ }
+
+ this_type& append(const value_type *s, size_type n) {
+ return append(s, s+n);
+ }
+
+ this_type& append(const value_type *s) {
+ return append(s, s+strlen(s));
+ }
+
+ this_type& append(const value_type *s, const value_type *t) {
+ if (s != t) {
+ size_type current_size = (size_type)(mpEnd - mpBegin);
+ size_type current_capacity = (size_type)(mpEOS - mpBegin);
+ size_type n = (size_type)(t - s);
+
+ if (current_capacity - current_size < n)
+ reserve_amortized_slow(n, current_size, current_capacity);
+
+ memcpy(mpBegin + current_size, s, n*sizeof(value_type));
+ mpEnd += n;
+ *mpEnd = 0;
+ }
+ return *this;
+ }
+
+ void push_back(const value_type c) {
+ if (mpEnd == mpEOS)
+ push_back_extend();
+
+ *mpEnd++ = c;
+ *mpEnd = 0;
+ }
+
+ this_type& assign(const VDStringSpanA& str) {
+ return assign(str.begin(), str.end());
+ }
+
+ this_type& assign(const this_type& str) {
+ return assign(str.mpBegin, str.mpEnd);
+ }
+
+ this_type& assign(const this_type& str, size_type pos, size_type n) {
+ size_type len = (size_type)(str.mpEnd - str.mpBegin);
+ VDASSERT(pos <= len);
+
+ len -= pos;
+ if (n > len)
+ n = len;
+
+ return assign(str.mpBegin + pos, str.mpBegin + pos + n);
+ }
+
+ this_type& assign(const value_type *s, size_type n) {
+ return assign(s, s+n);
+ }
+
+ this_type& assign(const value_type *s) {
+ return assign(s, s+strlen(s));
+ }
+
+ this_type& assign(size_type n, value_type c) {
+ size_type current_capacity = (size_type)(mpEOS - mpBegin);
+
+ if (current_capacity < n)
+ reserve_slow(n, current_capacity);
+
+ if (mpBegin != sNull) {
+ mpEnd = mpBegin;
+ while(n--)
+ *mpEnd++ = c;
+ }
+
+ return *this;
+ }
+
+ this_type& assign(const value_type *s, const value_type *t) {
+ size_type current_capacity = (size_type)(mpEOS - mpBegin);
+ size_type n = (size_type)(t - s);
+
+ if (current_capacity < n)
+ reserve_slow(n, current_capacity);
+
+ if (mpBegin != sNull) {
+ memcpy(mpBegin, s, sizeof(value_type)*n);
+ mpEnd = mpBegin + n;
+ *mpEnd = 0;
+ }
+
+ return *this;
+ }
+
+ this_type& insert(iterator it, value_type c) {
+ if (mpEnd == mpEOS) {
+ size_type pos = (size_type)(it - mpBegin);
+ push_back_extend();
+ it = mpBegin + pos;
+ }
+
+ memmove(it + 1, it, (mpEnd - it + 1)*sizeof(value_type));
+ *it = c;
+ ++mpEnd;
+ return *this;
+ }
+
+ this_type& erase(size_type pos = 0, size_type n = npos) {
+ size_type len = (size_type)(mpEnd - mpBegin);
+
+ VDASSERT(pos <= len);
+ len -= pos;
+ if (n > len)
+ n = len;
+
+ if (n) {
+ size_type pos2 = pos + n;
+ memmove(mpBegin + pos, mpBegin + pos2, (len + 1 - n)*sizeof(value_type));
+ mpEnd -= n;
+ }
+
+ return *this;
+ }
+
+ iterator erase(iterator x) {
+ VDASSERT(x != mpEnd);
+
+ memmove(x, x+1, (mpEnd - x)*sizeof(value_type));
+ --mpEnd;
+ return x;
+ }
+
+ iterator erase(iterator first, iterator last) {
+ VDASSERT(last >= first);
+
+ memmove(first, last, ((mpEnd - last) + 1)*sizeof(value_type));
+ mpEnd -= (last - first);
+ return first;
+ }
+
+ this_type& replace(size_type pos, size_type n1, const value_type *s, size_type n2) {
+ size_type len = (size_type)(mpEnd - mpBegin);
+
+ VDASSERT(pos <= len);
+ size_type limit = len - pos;
+ if (n1 > limit)
+ n1 = limit;
+
+ size_type len2 = len - n1 + n2;
+ size_type current_capacity = (size_type)(mpEOS - mpBegin);
+
+ if (current_capacity < len2)
+ reserve_slow(len2, current_capacity);
+
+ memmove(mpBegin + pos + n2, mpBegin + pos + n1, (limit - n1 + 1) * sizeof(value_type));
+ memcpy(mpBegin + pos, s, n2*sizeof(value_type));
+ mpEnd = mpBegin + len2;
+ return *this;
+ }
+
+ void swap(this_type& x) {
+ value_type *p;
+
+ p = mpBegin; mpBegin = x.mpBegin; x.mpBegin = p;
+ p = mpEnd; mpEnd = x.mpEnd; x.mpEnd = p;
+ p = mpEOS; mpEOS = x.mpEOS; x.mpEOS = p;
+ }
+
+ // 21.3.6 string operations
+ const_pointer c_str() const { return mpBegin; }
+
+ this_type& sprintf(const value_type *format, ...);
+ this_type& append_sprintf(const value_type *format, ...);
+ this_type& append_vsprintf(const value_type *format, va_list val);
+
+protected:
+ void push_back_extend();
+ void resize_slow(size_type n, size_type current_size);
+ void resize_slow(size_type n, size_type current_size, value_type c);
+ void reserve_slow(size_type n, size_type current_capacity);
+ void reserve_amortized_slow(size_type n, size_type current_size, size_type current_capacity);
+
+ char *mpEOS;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+inline VDStringA operator+(const VDStringA& str, const VDStringA& s) {
+ VDStringA result;
+ result.reserve(str.size() + s.size());
+ result.assign(str);
+ result.append(s);
+ return result;
+}
+
+inline VDStringA operator+(const VDStringA& str, const char *s) {
+ VDStringA result;
+ result.reserve(str.size() + strlen(s));
+ result.assign(str);
+ result.append(s);
+ return result;
+}
+
+inline VDStringA operator+(const VDStringA& str, char c) {
+ VDStringA result;
+ result.reserve(str.size() + 1);
+ result.assign(str);
+ result += c;
+ return result;
+}
+
+namespace std {
+ template<>
+ struct less<VDStringA> : binary_function<VDStringA, VDStringA, bool> {
+ bool operator()(const VDStringA& x, const VDStringA& y) const {
+ return x.compare(y) < 0;
+ }
+ };
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDStringSpanW {
+public:
+ typedef wchar_t value_type;
+ typedef uint32 size_type;
+ typedef ptrdiff_t difference_type;
+ typedef value_type& reference;
+ typedef const value_type& const_reference;
+ typedef value_type * pointer;
+ typedef const value_type * const_pointer;
+ typedef pointer iterator;
+ typedef const_pointer const_iterator;
+
+ static const size_type npos = (size_type)-1;
+
+ VDStringSpanW()
+ : mpBegin(const_cast<value_type *>(sNull))
+ , mpEnd(const_cast<value_type *>(sNull))
+ {
+ }
+
+ explicit VDStringSpanW(const value_type *s)
+ : mpBegin(const_cast<value_type *>(s))
+ , mpEnd(const_cast<value_type *>(s) + wcslen(s))
+ {
+ }
+
+ VDStringSpanW(const value_type *s, const value_type *t)
+ : mpBegin(const_cast<value_type *>(s))
+ , mpEnd(const_cast<value_type *>(t))
+ {
+ }
+
+ // 21.3.2 iterators
+ const_iterator begin() const { return mpBegin; }
+ const_iterator end() const { return mpEnd; }
+
+ // 21.3.3 capacity
+ size_type size() const { return mpEnd - mpBegin; }
+ size_type length() const { return mpEnd - mpBegin; }
+ bool empty() const { return mpBegin == mpEnd; }
+
+ // 21.3.4 element access
+ const_reference operator[](size_type pos) const { VDASSERT(pos < (size_type)(mpEnd - mpBegin)); return mpBegin[pos]; }
+ const_reference at(size_type pos) const { VDASSERT(pos < (size_type)(mpEnd - mpBegin)); return mpBegin[pos]; }
+
+ const_reference front() const { VDASSERT(mpBegin != mpEnd); return *mpBegin; }
+ const_reference back() const { VDASSERT(mpBegin != mpEnd); return mpEnd[-1]; }
+
+ // 21.3.6 string operations
+ const_pointer data() const { return mpBegin; }
+
+ size_type copy(value_type *dst, size_type n, size_type pos = 0) const {
+ size_type len = (size_type)(mpEnd - mpBegin);
+ VDASSERT(pos <= len);
+
+ len -= pos;
+ if (n > len)
+ n = len;
+
+ memcpy(dst, mpBegin + pos, n*sizeof(value_type));
+ return n;
+ }
+
+ size_type find(value_type c, size_type pos = 0) const {
+ VDASSERT(pos <= (size_type)(mpEnd - mpBegin));
+ const void *p = wmemchr(mpBegin + pos, c, mpEnd - (mpBegin + pos));
+
+ return p ? (const value_type *)p - mpBegin : npos;
+ }
+
+ // extensions
+ const VDStringSpanW subspan(size_type pos, size_type n) const {
+ size_type len = (size_type)(mpEnd - mpBegin);
+ VDASSERT(pos <= len);
+
+ len -= pos;
+ if (n > len)
+ n = len;
+
+ value_type *p = mpBegin + pos;
+ return VDStringSpanW(p, p+n);
+ }
+
+protected:
+ friend bool operator==(const VDStringSpanW& x, const VDStringSpanW& y);
+ friend bool operator==(const VDStringSpanW& x, const wchar_t *y);
+
+ value_type *mpBegin;
+ value_type *mpEnd;
+
+ static const value_type sNull[1];
+};
+
+inline bool operator==(const VDStringSpanW& x, const VDStringSpanW& y) { VDStringA::size_type len = (VDStringSpanW::size_type)(x.mpEnd - x.mpBegin); return len == (VDStringSpanW::size_type)(y.mpEnd - y.mpBegin) && !memcmp(x.mpBegin, y.mpBegin, len*sizeof(wchar_t)); }
+inline bool operator==(const VDStringSpanW& x, const wchar_t *y) { size_t len = wcslen(y); return len == (size_t)(x.mpEnd - x.mpBegin) && !memcmp(x.mpBegin, y, len*sizeof(wchar_t)); }
+inline bool operator==(const wchar_t *x, const VDStringSpanW& y) { return y == x; }
+
+inline bool operator!=(const VDStringSpanW& x, const VDStringSpanW& y) { return !(x == y); }
+inline bool operator!=(const VDStringSpanW& x, const wchar_t *y) { return !(x == y); }
+inline bool operator!=(const wchar_t *x, const VDStringSpanW& y) { return !(y == x); }
+
+class VDStringRefW : public VDStringSpanW {
+public:
+ typedef VDStringRefW this_type;
+
+ VDStringRefW() {
+ }
+
+ explicit VDStringRefW(const value_type *s)
+ : VDStringSpanW(s)
+ {
+ }
+
+ explicit VDStringRefW(const VDStringSpanW& s)
+ : VDStringSpanW(s)
+ {
+ }
+
+ VDStringRefW(const value_type *s, const value_type *t)
+ : VDStringSpanW(s, t)
+ {
+ }
+
+ this_type& operator=(const value_type *s) {
+ assign(s);
+ return *this;
+ }
+
+ this_type& operator=(const VDStringSpanW& str) {
+ assign(str);
+ return *this;
+ }
+
+ void assign(const value_type *s) {
+ static_cast<VDStringSpanW&>(*this) = VDStringSpanW(s);
+ }
+
+ void assign(const value_type *s, const value_type *t) {
+ static_cast<VDStringSpanW&>(*this) = VDStringSpanW(s, t);
+ }
+
+ void assign(const VDStringSpanW& s) {
+ static_cast<VDStringSpanW&>(*this) = s;
+ }
+
+ bool split(value_type c, VDStringRefW& token) {
+ size_type pos = find(c);
+
+ if (pos == npos)
+ return false;
+
+ token = subspan(0, pos);
+ mpBegin += pos+1;
+ return true;
+ }
+};
+
+class VDStringW : public VDStringSpanW {
+public:
+ typedef VDStringW this_type;
+
+ // 21.3.1 construct/copy/destroy
+
+ VDStringW()
+ : mpEOS(const_cast<value_type *>(sNull))
+ {
+ }
+
+ VDStringW(const VDStringSpanW& x)
+ : mpEOS(const_cast<value_type *>(sNull))
+ {
+ assign(x.begin(), x.end());
+ }
+
+ VDStringW(const this_type& x)
+ : mpEOS(const_cast<value_type *>(sNull))
+ {
+ assign(x);
+ }
+
+ explicit VDStringW(const value_type *s)
+ : mpEOS(const_cast<value_type *>(sNull))
+ {
+ assign(s);
+ }
+
+ explicit VDStringW(size_type n)
+ : mpEOS(const_cast<value_type *>(sNull))
+ {
+ resize(n);
+ }
+
+ VDStringW(const value_type *s, size_type n)
+ : mpEOS(const_cast<value_type *>(sNull))
+ {
+ assign(s, n);
+ }
+
+ VDStringW(const value_type *s, const value_type *t)
+ : mpEOS(const_cast<value_type *>(sNull))
+ {
+ assign(s, t);
+ }
+
+ ~VDStringW() {
+ if (mpBegin != sNull)
+ delete[] mpBegin;
+ }
+
+ this_type& operator=(const wchar_t *s) {
+ assign(s);
+ return *this;
+ }
+
+ this_type& operator=(const this_type& str) {
+ assign(str);
+ return *this;
+ }
+
+ // 21.3.2 iterators
+ using VDStringSpanW::begin;
+ using VDStringSpanW::end;
+ iterator begin() { return mpBegin; }
+ iterator end() { return mpEnd; }
+
+ // 21.3.3 capacity (COMPLETE)
+ void resize(size_type n) {
+ size_type current = (size_type)(mpEnd - mpBegin);
+
+ if (n < current) {
+ mpEnd = mpBegin + n;
+ mpEnd[0] = 0;
+ } else if (n > current)
+ resize_slow(n, current);
+ }
+
+ void resize(size_type n, value_type v) {
+ size_type current = (size_type)(mpEnd - mpBegin);
+
+ if (n < current) {
+ mpEnd = mpBegin + n;
+ mpEnd[0] = 0;
+ } else if (n > current)
+ resize_slow(n, current);
+ wmemset(mpBegin, v, n);
+ }
+
+ size_type capacity() const { return mpEOS - mpBegin; }
+
+ void reserve(size_t n) {
+ size_type current = (size_type)(mpEOS - mpBegin);
+
+ if (n > current)
+ reserve_slow(n, current);
+ }
+
+ void clear() {
+ if (mpEnd != mpBegin) {
+ mpEnd = mpBegin;
+ mpEnd[0] = 0;
+ }
+ }
+
+ // 21.3.4 element access
+ using VDStringSpanW::operator[];
+ using VDStringSpanW::at;
+ using VDStringSpanW::front;
+ using VDStringSpanW::back;
+ reference operator[](size_type pos) { VDASSERT(pos < (size_type)(mpEnd - mpBegin)); return mpBegin[pos]; }
+ reference at(size_type pos) { VDASSERT(pos < (size_type)(mpEnd - mpBegin)); return mpBegin[pos]; }
+ reference front() { VDASSERT(mpBegin != mpEnd); return *mpBegin; }
+ reference back() { VDASSERT(mpBegin != mpEnd); return mpEnd[-1]; }
+
+ // 21.3.5 modifiers
+ this_type& operator+=(const this_type& str) {
+ return append(str.mpBegin, str.mpEnd);
+ }
+
+ this_type& operator+=(const value_type *s) {
+ return append(s, s+wcslen(s));
+ }
+
+ this_type& operator+=(value_type c) {
+ if (mpEnd == mpEOS)
+ push_back_extend();
+
+ *mpEnd++ = c;
+ *mpEnd = 0;
+ return *this;
+ }
+
+ this_type& append(const this_type& str) {
+ return append(str.mpBegin, str.mpEnd);
+ }
+
+ this_type& append(const this_type& str, size_type pos, size_type n) {
+ size_type len = (size_type)(str.mpEnd - str.mpBegin);
+ VDASSERT(pos <= len);
+
+ len -= pos;
+ if (n > len)
+ n = len;
+
+ return append(str.mpBegin + pos, str.mpBegin + pos + n);
+ }
+
+ this_type& append(const value_type *s, size_type n) {
+ return append(s, s+n);
+ }
+
+ this_type& append(const value_type *s) {
+ return append(s, s+wcslen(s));
+ }
+
+ this_type& append(const value_type *s, const value_type *t) {
+ if (s != t) {
+ size_type current_size = (size_type)(mpEnd - mpBegin);
+ size_type current_capacity = (size_type)(mpEOS - mpBegin);
+ size_type n = (size_type)(t - s);
+
+ if (current_capacity - current_size < n)
+ reserve_amortized_slow(n, current_size, current_capacity);
+
+ memcpy(mpBegin + current_size, s, n*sizeof(value_type));
+ mpEnd += n;
+ *mpEnd = 0;
+ }
+ return *this;
+ }
+
+ void push_back(const value_type c) {
+ if (mpEnd == mpEOS)
+ push_back_extend();
+
+ *mpEnd++ = c;
+ *mpEnd = 0;
+ }
+
+ this_type& assign(const this_type& str) {
+ return assign(str.mpBegin, str.mpEnd);
+ }
+
+ this_type& assign(const this_type& str, size_type pos, size_type n) {
+ size_type len = (size_type)(str.mpEnd - str.mpBegin);
+ VDASSERT(pos <= len);
+
+ len -= pos;
+ if (n > len)
+ n = len;
+
+ return assign(str.mpBegin + pos, str.mpBegin + pos + n);
+ }
+
+ this_type& assign(const value_type *s, size_type n) {
+ return assign(s, s+n);
+ }
+
+ this_type& assign(const value_type *s) {
+ return assign(s, s+wcslen(s));
+ }
+
+ this_type& assign(size_type n, value_type c) {
+ size_type current_capacity = (size_type)(mpEOS - mpBegin);
+
+ if (current_capacity < n)
+ reserve_slow(n, current_capacity);
+
+ if (mpBegin != sNull) {
+ mpEnd = mpBegin;
+ while(n--)
+ *mpEnd++ = c;
+ }
+
+ return *this;
+ }
+
+ this_type& assign(const value_type *s, const value_type *t) {
+ size_type current_capacity = (size_type)(mpEOS - mpBegin);
+ size_type n = (size_type)(t - s);
+
+ if (current_capacity < n)
+ reserve_slow(n, current_capacity);
+
+ if (mpBegin != sNull) {
+ memcpy(mpBegin, s, sizeof(value_type)*n);
+ mpEnd = mpBegin + n;
+ *mpEnd = 0;
+ }
+
+ return *this;
+ }
+
+ this_type& insert(iterator it, value_type c) {
+ if (mpEnd == mpEOS) {
+ size_type pos = (size_type)(it - mpBegin);
+ push_back_extend();
+ it = mpBegin + pos;
+ }
+
+ memmove(it + 1, it, (mpEnd - it + 1)*sizeof(value_type));
+ *it = c;
+ ++mpEnd;
+ return *this;
+ }
+
+ this_type& erase(size_type pos = 0, size_type n = npos) {
+ size_type len = (size_type)(mpEnd - mpBegin);
+
+ VDASSERT(pos <= len);
+ len -= pos;
+ if (n > len)
+ n = len;
+
+ if (n) {
+ size_type pos2 = pos + n;
+ memmove(mpBegin + pos, mpBegin + pos2, (len + 1 - n)*sizeof(value_type));
+ mpEnd -= n;
+ }
+
+ return *this;
+ }
+
+ iterator erase(iterator x) {
+ VDASSERT(x != mpEnd);
+
+ memmove(x, x+1, (mpEnd - x)*sizeof(value_type));
+ --mpEnd;
+ return x;
+ }
+
+ iterator erase(iterator first, iterator last) {
+ VDASSERT(last >= first);
+
+ memmove(first, last, ((mpEnd - last) + 1)*sizeof(value_type));
+ mpEnd -= (last - first);
+ return first;
+ }
+
+ this_type& replace(size_type pos, size_type n1, const value_type *s, size_type n2) {
+ size_type len = (size_type)(mpEnd - mpBegin);
+
+ VDASSERT(pos <= len);
+ size_type limit = len - pos;
+ if (n1 > limit)
+ n1 = limit;
+
+ size_type len2 = len - n1 + n2;
+ size_type current_capacity = (size_type)(mpEOS - mpBegin);
+
+ if (current_capacity < len2)
+ reserve_slow(len2, current_capacity);
+
+ memmove(mpBegin + pos + n2, mpBegin + pos + n1, (limit - n1 + 1) * sizeof(value_type));
+ memcpy(mpBegin + pos, s, n2*sizeof(value_type));
+ mpEnd = mpBegin + len2;
+ return *this;
+ }
+
+ void swap(this_type& x) {
+ value_type *p;
+
+ p = mpBegin; mpBegin = x.mpBegin; x.mpBegin = p;
+ p = mpEnd; mpEnd = x.mpEnd; x.mpEnd = p;
+ p = mpEOS; mpEOS = x.mpEOS; x.mpEOS = p;
+ }
+
+ // 21.3.6 string operations
+ const_pointer c_str() const { return mpBegin; }
+
+ this_type& sprintf(const value_type *format, ...);
+ this_type& append_sprintf(const value_type *format, ...);
+ this_type& append_vsprintf(const value_type *format, va_list val);
+
+protected:
+ void push_back_extend();
+ void resize_slow(size_type n, size_type current_size);
+ void reserve_slow(size_type n, size_type current_capacity);
+ void reserve_amortized_slow(size_type n, size_type current_size, size_type current_capacity);
+
+ value_type *mpEOS;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+inline VDStringW operator+(const VDStringW& str, const VDStringW& s) {
+ VDStringW result;
+ result.reserve(str.size() + s.size());
+ result.assign(str);
+ result.append(s);
+ return result;
+}
+
+inline VDStringW operator+(const VDStringW& str, const wchar_t *s) {
+ VDStringW result;
+ result.reserve(str.size() + wcslen(s));
+ result.assign(str);
+ result.append(s);
+ return result;
+}
+
+inline VDStringW operator+(const VDStringW& str, wchar_t c) {
+ VDStringW result;
+ result.reserve(str.size() + 1);
+ result.assign(str);
+ result += c;
+ return result;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+typedef VDStringA VDString;
+
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/atomic.h b/src/thirdparty/VirtualDub/h/vd2/system/atomic.h
new file mode 100644
index 000000000..a7c2eb532
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/atomic.h
@@ -0,0 +1,282 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_ATOMIC_H
+#define f_VD2_SYSTEM_ATOMIC_H
+
+#include <vd2/system/vdtypes.h>
+
+// Intrinsics available in VC6.0
+extern "C" long __cdecl _InterlockedDecrement(volatile long *p);
+extern "C" long __cdecl _InterlockedIncrement(volatile long *p);
+extern "C" long __cdecl _InterlockedCompareExchange(volatile long *p, long n, long p_compare);
+extern "C" long __cdecl _InterlockedExchange(volatile long *p, long n);
+extern "C" long __cdecl _InterlockedExchangeAdd(volatile long *p, long n);
+
+#pragma intrinsic(_InterlockedDecrement)
+#pragma intrinsic(_InterlockedIncrement)
+#pragma intrinsic(_InterlockedCompareExchange)
+#pragma intrinsic(_InterlockedExchange)
+#pragma intrinsic(_InterlockedExchangeAdd)
+
+// Intrinsics available in VC7.1. Note that the compiler is smart enough to
+// use straight LOCK AND/OR/XOR if the return value is not needed; otherwise
+// it uses a LOCK CMPXCHG loop.
+#if _MSC_VER >= 1310
+ extern "C" long __cdecl _InterlockedAnd(volatile long *p, long n);
+ extern "C" long __cdecl _InterlockedOr(volatile long *p, long n);
+ extern "C" long __cdecl _InterlockedXor(volatile long *p, long n);
+
+ #pragma intrinsic(_InterlockedAnd)
+ #pragma intrinsic(_InterlockedOr)
+ #pragma intrinsic(_InterlockedXor)
+#endif
+
+// Intrinsics available with AMD64
+#ifdef _M_AMD64
+ extern "C" void *__cdecl _InterlockedExchangePointer(void *volatile *pp, void *p);
+ #pragma intrinsic(_InterlockedExchangePointer)
+ extern "C" void *__cdecl _InterlockedCompareExchangePointer(void *volatile *pp, void *p, void *compare);
+ #pragma intrinsic(_InterlockedCompareExchangePointer)
+#endif
+
+inline void *VDAtomicCompareExchangePointer(void *volatile *pp, void *p, void *compare) {
+#ifdef _M_AMD64
+ return _InterlockedCompareExchangePointer(pp, p, compare);
+#else
+ return (void *)(sintptr)_InterlockedCompareExchange((volatile long *)(volatile sintptr *)pp, (long)(sintptr)p, (long)(sintptr)compare);
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////////
+/// \class VDAtomicInt
+/// \brief Wrapped integer supporting thread-safe atomic operations.
+///
+/// VDAtomicInt allows integer values shared between threads to be
+/// modified with several common operations in a lock-less manner and
+/// without the need for explicit barriers. This is particularly useful
+/// for thread-safe reference counting.
+///
+class VDAtomicInt {
+protected:
+ volatile int n;
+
+public:
+ VDAtomicInt() {}
+ VDAtomicInt(int v) : n(v) {}
+
+ bool operator!() const { return !n; }
+ bool operator!=(volatile int v) const { return n!=v; }
+ bool operator==(volatile int v) const { return n==v; }
+ bool operator<=(volatile int v) const { return n<=v; }
+ bool operator>=(volatile int v) const { return n>=v; }
+ bool operator<(volatile int v) const { return n<v; }
+ bool operator>(volatile int v) const { return n>v; }
+
+ ///////////////////////////////
+
+ /// Atomically exchanges a value with an integer in memory.
+ static inline int staticExchange(volatile int *dst, int v) {
+ return (int)_InterlockedExchange((volatile long *)dst, v);
+ }
+
+ /// Atomically adds one to an integer in memory.
+ static inline void staticIncrement(volatile int *dst) {
+ _InterlockedExchangeAdd((volatile long *)dst, 1);
+ }
+
+ /// Atomically subtracts one from an integer in memory.
+ static inline void staticDecrement(volatile int *dst) {
+ _InterlockedExchangeAdd((volatile long *)dst, -1);
+ }
+
+ /// Atomically subtracts one from an integer in memory and returns
+ /// true if the result is zero.
+ static inline bool staticDecrementTestZero(volatile int *dst) {
+ return 1 == _InterlockedExchangeAdd((volatile long *)dst, -1);
+ }
+
+ /// Atomically adds a value to an integer in memory and returns the
+ /// result.
+ static inline int staticAdd(volatile int *dst, int v) {
+ return (int)_InterlockedExchangeAdd((volatile long *)dst, v) + v;
+ }
+
+ /// Atomically adds a value to an integer in memory and returns the
+ /// old result (post-add).
+ static inline int staticExchangeAdd(volatile int *dst, int v) {
+ return _InterlockedExchangeAdd((volatile long *)dst, v);
+ }
+
+ /// Atomically compares an integer in memory to a compare value and
+ /// swaps the memory location with a second value if the compare
+ /// succeeds. The return value is the memory value prior to the swap.
+ static inline int staticCompareExchange(volatile int *dst, int v, int compare) {
+ return _InterlockedCompareExchange((volatile long *)dst, v, compare);
+ }
+
+ ///////////////////////////////
+
+ int operator=(int v) { return n = v; }
+
+ int operator++() { return staticAdd(&n, 1); }
+ int operator--() { return staticAdd(&n, -1); }
+ int operator++(int) { return staticExchangeAdd(&n, 1); }
+ int operator--(int) { return staticExchangeAdd(&n, -1); }
+ int operator+=(int v) { return staticAdd(&n, v); }
+ int operator-=(int v) { return staticAdd(&n, -v); }
+
+#if _MSC_VER >= 1310
+ void operator&=(int v) { _InterlockedAnd((volatile long *)&n, v); } ///< Atomic bitwise AND.
+ void operator|=(int v) { _InterlockedOr((volatile long *)&n, v); } ///< Atomic bitwise OR.
+ void operator^=(int v) { _InterlockedXor((volatile long *)&n, v); } ///< Atomic bitwise XOR.
+#else
+ /// Atomic bitwise AND.
+ void operator&=(int v) {
+ __asm mov eax,v
+ __asm mov ecx,this
+ __asm lock and dword ptr [ecx],eax
+ }
+
+ /// Atomic bitwise OR.
+ void operator|=(int v) {
+ __asm mov eax,v
+ __asm mov ecx,this
+ __asm lock or dword ptr [ecx],eax
+ }
+
+ /// Atomic bitwise XOR.
+ void operator^=(int v) {
+ __asm mov eax,v
+ __asm mov ecx,this
+ __asm lock xor dword ptr [ecx],eax
+ }
+#endif
+
+ operator int() const {
+ return n;
+ }
+
+ /// Atomic exchange.
+ int xchg(int v) {
+ return staticExchange(&n, v);
+ }
+
+ /// Compare/exchange (486+).
+ int compareExchange(int newValue, int oldValue) {
+ return staticCompareExchange(&n, newValue, oldValue);
+ }
+
+ // 486 only, but much nicer. They return the actual result.
+
+ int inc() { return operator++(); } ///< Atomic increment.
+ int dec() { return operator--(); } ///< Atomic decrement.
+ int add(int v) { return operator+=(v); } ///< Atomic add.
+
+ // These return the result before the operation, which is more inline with
+ // what XADD allows us to do.
+
+ int postinc() { return operator++(0); } ///< Atomic post-increment.
+ int postdec() { return operator--(0); } ///< Atomic post-decrement.
+ int postadd(int v) { return staticExchangeAdd(&n, v); } ///< Atomic post-add.
+
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDAtomicFloat {
+protected:
+ volatile float n;
+
+public:
+ VDAtomicFloat() {}
+ VDAtomicFloat(float v) : n(v) {}
+
+ bool operator!=(float v) const { return n!=v; }
+ bool operator==(float v) const { return n==v; }
+ bool operator<=(float v) const { return n<=v; }
+ bool operator>=(float v) const { return n>=v; }
+ bool operator<(float v) const { return n<v; }
+ bool operator>(float v) const { return n>v; }
+
+ float operator=(float v) { return n = v; }
+
+ operator float() const {
+ return n;
+ }
+
+ /// Atomic exchange.
+ float xchg(float v) {
+ union { int i; float f; } converter = {VDAtomicInt::staticExchange((volatile int *)&n, *(const int *)&v)};
+
+ return converter.f;
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////
+/// \class VDAtomicPtr
+/// \brief Wrapped pointer supporting thread-safe atomic operations.
+///
+/// VDAtomicPtr allows a shared pointer to be safely manipulated by
+/// multiple threads without locks. Note that atomicity is only guaranteed
+/// for the pointer itself, so any operations on the object must be dealt
+/// with in other manners, such as an inner lock or other atomic
+/// operations. An atomic pointer can serve as a single entry queue.
+///
+template<typename T>
+class VDAtomicPtr {
+protected:
+ T *volatile ptr;
+
+public:
+ VDAtomicPtr() {}
+ VDAtomicPtr(T *p) : ptr(p) { }
+
+ operator T*() const { return ptr; }
+ T* operator->() const { return ptr; }
+
+ T* operator=(T* p) {
+ return ptr = p;
+ }
+
+ /// Atomic pointer exchange.
+ T *xchg(T* p) {
+#ifdef _M_AMD64
+ return ptr == p ? p : (T *)_InterlockedExchangePointer((void *volatile *)&ptr, p);
+#else
+ return ptr == p ? p : (T *)_InterlockedExchange((volatile long *)&ptr, (long)p);
+#endif
+ }
+
+ T *compareExchange(T *newValue, T *oldValue) {
+#ifdef _M_AMD64
+ return (T *)_InterlockedCompareExchangePointer((void *volatile *)&ptr, (void *)newValue, (void *)oldValue);
+#else
+ return (T *)_InterlockedCompareExchange((volatile long *)&ptr, (long)(size_t)newValue, (long)(size_t)oldValue);
+#endif
+ }
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/binary.h b/src/thirdparty/VirtualDub/h/vd2/system/binary.h
new file mode 100644
index 000000000..66542a516
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/binary.h
@@ -0,0 +1,184 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_BINARY_H
+#define f_VD2_SYSTEM_BINARY_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+
+#define VDMAKEFOURCC(byte1, byte2, byte3, byte4) (((uint8)byte1) + (((uint8)byte2) << 8) + (((uint8)byte3) << 16) + (((uint8)byte4) << 24))
+
+#ifdef _MSC_VER
+ unsigned short _byteswap_ushort(unsigned short);
+ unsigned long _byteswap_ulong(unsigned long);
+ unsigned __int64 _byteswap_uint64(unsigned __int64);
+
+ #pragma intrinsic(_byteswap_ushort)
+ #pragma intrinsic(_byteswap_ulong)
+ #pragma intrinsic(_byteswap_uint64)
+
+ inline uint16 VDSwizzleU16(uint16 value) { return (uint16)_byteswap_ushort((unsigned short)value); }
+ inline sint16 VDSwizzleS16(sint16 value) { return (sint16)_byteswap_ushort((unsigned short)value); }
+ inline uint32 VDSwizzleU32(uint32 value) { return (uint32)_byteswap_ulong((unsigned long)value); }
+ inline sint32 VDSwizzleS32(sint32 value) { return (sint32)_byteswap_ulong((unsigned long)value); }
+ inline uint64 VDSwizzleU64(uint64 value) { return (uint32)_byteswap_uint64((unsigned __int64)value); }
+ inline sint64 VDSwizzleS64(sint64 value) { return (sint32)_byteswap_uint64((unsigned __int64)value); }
+#else
+ inline uint16 VDSwizzleU16(uint16 value) {
+ return (value >> 8) + (value >> 8);
+ }
+
+ inline sint16 VDSwizzleS16(sint16 value) {
+ return (sint16)(((uint16)value >> 8) + ((uint16)value >> 8));
+ }
+
+ inline uint32 VDSwizzleU32(uint32 value) {
+ return (value >> 24) + (value << 24) + ((value&0xff00)<<8) + ((value&0xff0000)>>8);
+ }
+
+ inline sint32 VDSwizzleS32(sint32 value) {
+ return (sint32)(((uint32)value >> 24) + ((uint32)value << 24) + (((uint32)value&0xff00)<<8) + (((uint32)value&0xff0000)>>8));
+ }
+
+ inline uint64 VDSwizzleU64(uint64 value) {
+ return ((value & 0xFF00000000000000) >> 56) +
+ ((value & 0x00FF000000000000) >> 40) +
+ ((value & 0x0000FF0000000000) >> 24) +
+ ((value & 0x000000FF00000000) >> 8) +
+ ((value & 0x00000000FF000000) << 8) +
+ ((value & 0x0000000000FF0000) << 24) +
+ ((value & 0x000000000000FF00) << 40) +
+ ((value & 0x00000000000000FF) << 56);
+ }
+
+ inline sint64 VDSwizzleS64(sint64 value) {
+ return (sint64)((((uint64)value & 0xFF00000000000000) >> 56) +
+ (((uint64)value & 0x00FF000000000000) >> 40) +
+ (((uint64)value & 0x0000FF0000000000) >> 24) +
+ (((uint64)value & 0x000000FF00000000) >> 8) +
+ (((uint64)value & 0x00000000FF000000) << 8) +
+ (((uint64)value & 0x0000000000FF0000) << 24) +
+ (((uint64)value & 0x000000000000FF00) << 40) +
+ (((uint64)value & 0x00000000000000FF) << 56));
+ }
+#endif
+
+inline uint16 VDReadUnalignedU16(const void *p) { return *(uint16 *)p; }
+inline sint16 VDReadUnalignedS16(const void *p) { return *(sint16 *)p; }
+inline uint32 VDReadUnalignedU32(const void *p) { return *(uint32 *)p; }
+inline sint32 VDReadUnalignedS32(const void *p) { return *(sint32 *)p; }
+inline uint64 VDReadUnalignedU64(const void *p) { return *(uint64 *)p; }
+inline sint64 VDReadUnalignedS64(const void *p) { return *(sint64 *)p; }
+inline float VDReadUnalignedF(const void *p) { return *(float *)p; }
+inline double VDReadUnalignedD(const void *p) { return *(double *)p; }
+
+inline uint16 VDReadUnalignedLEU16(const void *p) { return *(uint16 *)p; }
+inline sint16 VDReadUnalignedLES16(const void *p) { return *(sint16 *)p; }
+inline uint32 VDReadUnalignedLEU32(const void *p) { return *(uint32 *)p; }
+inline sint32 VDReadUnalignedLES32(const void *p) { return *(sint32 *)p; }
+inline uint64 VDReadUnalignedLEU64(const void *p) { return *(uint64 *)p; }
+inline sint64 VDReadUnalignedLES64(const void *p) { return *(sint64 *)p; }
+inline float VDReadUnalignedLEF(const void *p) { return *(float *)p; }
+inline double VDReadUnalignedLED(const void *p) { return *(double *)p; }
+
+inline uint16 VDReadUnalignedBEU16(const void *p) { return VDSwizzleU16(*(uint16 *)p); }
+inline sint16 VDReadUnalignedBES16(const void *p) { return VDSwizzleS16(*(sint16 *)p); }
+inline uint32 VDReadUnalignedBEU32(const void *p) { return VDSwizzleU32(*(uint32 *)p); }
+inline sint32 VDReadUnalignedBES32(const void *p) { return VDSwizzleS32(*(sint32 *)p); }
+inline uint64 VDReadUnalignedBEU64(const void *p) { return VDSwizzleU64(*(uint64 *)p); }
+inline sint64 VDReadUnalignedBES64(const void *p) { return VDSwizzleS64(*(sint64 *)p); }
+inline float VDReadUnalignedBEF(const void *p) {
+ union {
+ uint32 i;
+ float f;
+ } conv = {VDSwizzleU32(*(const uint32 *)p)};
+ return conv.f;
+}
+inline double VDReadUnalignedBED(const void *p) {
+ union {
+ uint64 i;
+ double d;
+ } conv = {VDSwizzleU64(*(const uint32 *)p)};
+ return conv.d;
+}
+
+inline void VDWriteUnalignedU16 (void *p, uint16 v) { *(uint16 *)p = v; }
+inline void VDWriteUnalignedS16 (void *p, sint16 v) { *(sint16 *)p = v; }
+inline void VDWriteUnalignedU32 (void *p, uint32 v) { *(uint32 *)p = v; }
+inline void VDWriteUnalignedS32 (void *p, sint32 v) { *(sint32 *)p = v; }
+inline void VDWriteUnalignedU64 (void *p, uint64 v) { *(uint64 *)p = v; }
+inline void VDWriteUnalignedS64 (void *p, sint64 v) { *(sint64 *)p = v; }
+inline void VDWriteUnalignedF (void *p, float v) { *(float *)p = v; }
+inline void VDWriteUnalignedD (void *p, double v) { *(double *)p = v; }
+
+inline void VDWriteUnalignedLEU16(void *p, uint16 v) { *(uint16 *)p = v; }
+inline void VDWriteUnalignedLES16(void *p, sint16 v) { *(sint16 *)p = v; }
+inline void VDWriteUnalignedLEU32(void *p, uint32 v) { *(uint32 *)p = v; }
+inline void VDWriteUnalignedLES32(void *p, sint32 v) { *(sint32 *)p = v; }
+inline void VDWriteUnalignedLEU64(void *p, uint64 v) { *(uint64 *)p = v; }
+inline void VDWriteUnalignedLES64(void *p, sint64 v) { *(sint64 *)p = v; }
+inline void VDWriteUnalignedLEF (void *p, float v) { *(float *)p = v; }
+inline void VDWriteUnalignedLED (void *p, double v) { *(double *)p = v; }
+
+inline void VDWriteUnalignedBEU16(void *p, uint16 v) { *(uint16 *)p = VDSwizzleU16(v); }
+inline void VDWriteUnalignedBES16(void *p, sint16 v) { *(sint16 *)p = VDSwizzleS16(v); }
+inline void VDWriteUnalignedBEU32(void *p, uint32 v) { *(uint32 *)p = VDSwizzleU32(v); }
+inline void VDWriteUnalignedBES32(void *p, sint32 v) { *(sint32 *)p = VDSwizzleS32(v); }
+inline void VDWriteUnalignedBEU64(void *p, uint64 v) { *(uint64 *)p = VDSwizzleU64(v); }
+inline void VDWriteUnalignedBES64(void *p, sint64 v) { *(sint64 *)p = VDSwizzleS64(v); }
+inline void VDReadUnalignedBEF(void *p, float v) {
+ union {
+ float f;
+ uint32 i;
+ } conv = {v};
+ *(uint32 *)p = VDSwizzleU32(conv.i);
+}
+inline double VDReadUnalignedBED(void *p, double v) {
+ union {
+ double f;
+ uint64 i;
+ } conv = {v};
+ *(uint64 *)p = VDSwizzleU64(conv.i);
+}
+
+#define VDFromLE8(x) (x)
+#define VDFromLE16(x) (x)
+#define VDFromLE32(x) (x)
+#define VDFromBE8(x) VDSwizzleU8(x)
+#define VDFromBE16(x) VDSwizzleU16(x)
+#define VDFromBE32(x) VDSwizzleU32(x)
+
+#define VDToLE8(x) (x)
+#define VDToLE16(x) (x)
+#define VDToLE32(x) (x)
+#define VDToBE8(x) VDSwizzleU8(x)
+#define VDToBE16(x) VDSwizzleU16(x)
+#define VDToBE32(x) VDSwizzleU32(x)
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/bitmath.h b/src/thirdparty/VirtualDub/h/vd2/system/bitmath.h
new file mode 100644
index 000000000..fc1c185a7
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/bitmath.h
@@ -0,0 +1,75 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_BITMATH_H
+#define f_VD2_SYSTEM_BITMATH_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#ifndef f_VD2_SYSTEM_VDTYPES_H
+ #include <vd2/system/vdtypes.h>
+#endif
+
+int VDCountBits(uint32 v);
+int VDFindLowestSetBit(uint32 v);
+int VDFindHighestSetBit(uint32 v);
+uint32 VDCeilToPow2(uint32 v);
+
+///////////////////////////////////////////////////////////////////////////////
+
+#ifdef VD_COMPILER_MSVC_VC8
+ #include <intrin.h>
+ #pragma intrinsic(_BitScanForward)
+ #pragma intrinsic(_BitScanReverse)
+
+ inline int VDFindLowestSetBit(uint32 v) {
+ unsigned long index;
+ return _BitScanForward(&index, v) ? index : 32;
+ }
+
+ inline int VDFindHighestSetBit(uint32 v) {
+ unsigned long index;
+ return _BitScanReverse(&index, v) ? index : -1;
+ }
+
+ inline int VDFindLowestSetBitFast(uint32 v) {
+ unsigned long index;
+ _BitScanForward(&index, v);
+ return index;
+ }
+
+ inline int VDFindHighestSetBitFast(uint32 v) {
+ unsigned long index;
+ _BitScanReverse(&index, v);
+ return index;
+ }
+#else
+ #define VDFindLowestSetBitFast VDFindLowestSetBit
+ #define VDFindHighestSetBitFast VDFindHighestSetBit
+#endif
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/cache.h b/src/thirdparty/VirtualDub/h/vd2/system/cache.h
new file mode 100644
index 000000000..8fbdea7c2
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/cache.h
@@ -0,0 +1,325 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2005 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_CACHE_H
+#define f_VD2_SYSTEM_CACHE_H
+
+#include <vd2/system/thread.h>
+#include <vd2/system/vdstl.h>
+
+///////////////////////////////////////////////////////////////////////////
+
+struct vdhashmap_node {
+ vdhashmap_node *mpHashPrev;
+ vdhashmap_node *mpHashNext;
+};
+
+template<class K>
+struct vdhash {
+ size_t operator()(const K key) const {
+ return (size_t)key;
+ }
+};
+
+template<class K, class V, class Hash = vdhash<K>, int N = 256>
+class vdhashmap_iterator {
+public:
+ typedef vdhashmap_node node;
+
+ bool operator==(vdhashmap_iterator& x) const { return mpNode == x.mpNode; }
+ bool operator!=(vdhashmap_iterator& x) const { return mpNode != x.mpNode; }
+
+ V& operator*() const { return *static_cast<V *>((node *)mpNode); }
+ V *operator->() const { return static_cast<V *>((node *)mpNode); }
+
+ vdhashmap_iterator& operator++() {
+ do {
+ mpNode = ((node *)mpNode)->mpHashNext;
+ if (mpNode != mpTableNode)
+ break;
+
+ ++mpTableNode;
+ mpNode = mpTableNode->mpHashNext;
+ } while(mpNode);
+
+ return *this;
+ }
+
+ vdhashmap_iterator operator++(int) {
+ vdhashmap_iterator it(*this);
+ ++*this;
+ return it;
+ }
+
+public:
+ vdhashmap_node *mpNode;
+ vdhashmap_node *mpTableNode;
+};
+
+template<class K, class V, class Hash = vdhash<K>, int N = 256>
+class vdhashmap {
+public:
+ typedef K key_type;
+ typedef V value_type;
+ typedef Hash hash_type;
+ typedef vdhashmap_node node;
+ typedef vdhashmap_iterator<K, V> iterator;
+
+ vdhashmap() {
+ for(int i=0; i<N; ++i)
+ m.mpTable[i].mpHashPrev = m.mpTable[i].mpHashNext = &m.mpTable[i];
+ }
+
+ iterator begin() {
+ int i;
+ for(i=0; i<N && !m.mpTable[i]; ++i)
+ ;
+ iterator it = { m.mpTable[i].mpFirst, &m.mpTable[i] };
+ return it;
+ }
+
+ iterator end() {
+ iterator it = { NULL, NULL };
+ return it;
+ }
+
+ V *operator[](const K& key) {
+ const size_t htidx = m(key) % N;
+
+ node *r = &m.mpTable[htidx];
+ for(node *p = r->mpHashNext; p != r; p = p->mpHashNext) {
+ if (static_cast<V *>(p)->mHashKey == key)
+ return static_cast<V *>(p);
+ }
+
+ return NULL;
+ }
+
+ iterator find(const K& key) {
+ const size_t htidx = m(key) % N;
+
+ node *r = &m.mpTable[htidx];
+ for(node *p = r->mpHashNext; p != r; p = p->mpHashNext) {
+ if (static_cast<V *>(p)->mHashKey == key) {
+ iterator it = { p, &m.mpTable[htidx] };
+ return it;
+ }
+ }
+
+ return end();
+ }
+
+ iterator insert(V *p) {
+ const size_t htidx = m(p->mHashKey) % N;
+
+ node *r = &m.mpTable[htidx];
+ node *n = r->mpHashNext;
+ r->mpHashNext = p;
+ p->mpHashPrev = &m.mpTable[htidx];
+ p->mpHashNext = n;
+ n->mpHashPrev = p;
+
+ iterator it = { p, &m.mpTable[htidx] };
+ return it;
+ }
+
+ void erase(V *x) {
+ node *p = x->mpHashPrev;
+ node *n = x->mpHashNext;
+
+ p->mpHashNext = n;
+ n->mpHashPrev = p;
+ }
+
+ void erase(iterator it) {
+ erase(it.mpNode);
+ }
+
+protected:
+ struct Data : public Hash {
+ vdhashmap_node mpTable[N];
+ } m;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDCachedObject;
+
+class IVDCacheAllocator {
+public:
+ virtual VDCachedObject *OnCacheAllocate() = 0;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+enum VDCacheState {
+ kVDCacheStateFree,
+ kVDCacheStatePending,
+ kVDCacheStateReady,
+ kVDCacheStateActive,
+ kVDCacheStateComplete,
+ kVDCacheStateIdle,
+ kVDCacheStateAborting,
+ kVDCacheStateCount
+};
+
+struct VDCachedObjectNodes : public vdlist_node, public vdhashmap_node {
+ sint64 mHashKey;
+};
+
+class VDCache {
+public:
+ VDCache(IVDCacheAllocator *pAllocator);
+ ~VDCache();
+
+ void Shutdown();
+
+ int GetStateCount(int state);
+
+ void DumpListStatus(int state);
+
+ VDCachedObject *Create(sint64 key, bool& is_new);
+
+ VDCachedObject *Allocate(sint64 key);
+ void Schedule(VDCachedObject *); // Moves a Pending or Active object to Ready.
+ VDCachedObject *GetNextReady(); // Selects a Ready object and moves it to Active.
+ void MarkCompleted(VDCachedObject *); // Marks an object as completed.
+
+public:
+ void NotifyFree(VDCachedObject *pObject);
+
+protected:
+ void Evict(uint32 level);
+
+protected:
+ VDCriticalSection mLock;
+
+ IVDCacheAllocator *mpAllocator;
+ uint32 mObjectCount;
+ uint32 mObjectLimit;
+
+ typedef vdlist<VDCachedObjectNodes> ObjectList;
+ ObjectList mLists[kVDCacheStateCount];
+
+ vdhashmap<sint64, VDCachedObjectNodes> mHash;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDCachedObject : private VDCachedObjectNodes {
+ friend class VDCache;
+public:
+ VDCachedObject();
+ virtual ~VDCachedObject() {}
+
+ int AddRef();
+ int Release();
+
+ void WeakAddRef();
+ void WeakRelease();
+
+protected:
+ virtual void OnCacheEvict() {}
+ virtual void OnCacheAbortPending() {}
+ virtual void DumpStatus() {}
+
+protected:
+ int GetRefCount() const { return mRefCount; }
+ void SetCache(VDCache *pCache);
+
+ VDCacheState GetState() const { return mState; }
+ void SetState(VDCacheState state) { mState = state; }
+
+ sint64 GetCacheKey() const { return mHashKey; }
+
+ virtual bool IsValid() const { return true; }
+
+protected:
+ VDCache *mpCache;
+ VDAtomicInt mRefCount;
+ VDCacheState mState;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDPooledObject;
+
+class IVDPoolAllocator {
+public:
+ virtual VDPooledObject *OnPoolAllocate() = 0;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+enum VDPoolState {
+ kVDPoolStateFree,
+ kVDPoolStateActive,
+ kVDPoolStateCount
+};
+
+struct VDPooledObjectNodes : public vdlist_node {};
+
+class VDPool {
+public:
+ VDPool(IVDPoolAllocator *pAllocator);
+ ~VDPool();
+
+ void Shutdown();
+
+ VDPooledObject *Allocate();
+
+public:
+ void NotifyFree(VDPooledObject *pObject);
+
+protected:
+ VDCriticalSection mLock;
+
+ IVDPoolAllocator *mpAllocator;
+ uint32 mObjectCount;
+ uint32 mObjectLimit;
+
+ typedef vdlist<VDPooledObjectNodes> ObjectList;
+ ObjectList mLists[kVDPoolStateCount];
+};
+
+class VDPooledObject : private VDPooledObjectNodes {
+ friend class VDPool;
+public:
+ VDPooledObject();
+ virtual ~VDPooledObject() {}
+
+ int AddRef();
+ int Release();
+
+protected:
+ int GetRefCount() const { return mRefCount; }
+ void SetPool(VDPool *pPool);
+
+protected:
+ VDPool *mpPool;
+ VDAtomicInt mRefCount;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/cmdline.h b/src/thirdparty/VirtualDub/h/vd2/system/cmdline.h
new file mode 100644
index 000000000..eb1d94480
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/cmdline.h
@@ -0,0 +1,69 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2005 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_CMDLINE_H
+#define f_VD2_SYSTEM_CMDLINE_H
+
+#include <vd2/system/vdstl.h>
+
+class VDCommandLineIterator {
+ friend class VDCommandLine;
+public:
+ VDCommandLineIterator() : mIndex(1) {}
+
+private:
+ int mIndex;
+};
+
+class VDCommandLine {
+public:
+ VDCommandLine();
+ VDCommandLine(const wchar_t *s);
+ ~VDCommandLine();
+
+ void Init(const wchar_t *s);
+
+ uint32 GetCount() const;
+ const wchar_t *operator[](int index) const;
+
+ bool GetNextArgument(VDCommandLineIterator& index, const wchar_t *& token, bool& isSwitch) const;
+ bool GetNextNonSwitchArgument(VDCommandLineIterator& index, const wchar_t *& token) const;
+ bool GetNextSwitchArgument(VDCommandLineIterator& index, const wchar_t *& token) const;
+ bool FindAndRemoveSwitch(const wchar_t *name);
+ bool FindAndRemoveSwitch(const wchar_t *name, const wchar_t *& token);
+
+protected:
+ vdfastvector<wchar_t> mLine;
+
+ struct Token {
+ int mTokenIndex;
+ bool mbIsSwitch;
+ bool mbQuoted;
+ };
+
+ vdfastvector<Token> mTokens;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/cpuaccel.h b/src/thirdparty/VirtualDub/h/vd2/system/cpuaccel.h
new file mode 100644
index 000000000..a15bc8be9
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/cpuaccel.h
@@ -0,0 +1,49 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VIRTUALDUB_CPUACCEL_H
+#define f_VIRTUALDUB_CPUACCEL_H
+
+#define CPUF_SUPPORTS_CPUID (0x00000001L)
+#define CPUF_SUPPORTS_FPU (0x00000002L)
+#define CPUF_SUPPORTS_MMX (0x00000004L)
+#define CPUF_SUPPORTS_INTEGER_SSE (0x00000008L)
+#define CPUF_SUPPORTS_SSE (0x00000010L)
+#define CPUF_SUPPORTS_SSE2 (0x00000020L)
+#define CPUF_SUPPORTS_3DNOW (0x00000040L)
+#define CPUF_SUPPORTS_3DNOW_EXT (0x00000080L)
+#define CPUF_SUPPORTS_SSE3 (0x00000100L)
+#define CPUF_SUPPORTS_SSSE3 (0x00000200L)
+#define CPUF_SUPPORTS_SSE41 (0x00000400L)
+#define CPUF_SUPPORTS_MASK (0x000007FFL)
+
+long CPUCheckForExtensions();
+long CPUEnableExtensions(long lEnableFlags);
+long CPUGetEnabledExtensions();
+void VDCPUCleanupExtensions();
+
+extern "C" bool FPU_enabled, MMX_enabled, SSE_enabled, ISSE_enabled, SSE2_enabled;
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/debug.h b/src/thirdparty/VirtualDub/h/vd2/system/debug.h
new file mode 100644
index 000000000..a4eb59e60
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/debug.h
@@ -0,0 +1,96 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_DEBUG_H
+#define f_VD2_SYSTEM_DEBUG_H
+
+#include <vd2/system/vdtypes.h>
+
+class IVDExternalCallTrap {
+public:
+ virtual void OnMMXTrap(const wchar_t *context, const char *file, int line) = 0;
+ virtual void OnFPUTrap(const wchar_t *context, const char *file, int line, uint16 fpucw) = 0;
+ virtual void OnSSETrap(const wchar_t *context, const char *file, int line, uint32 mxcsr) = 0;
+};
+
+void VDSetExternalCallTrap(IVDExternalCallTrap *);
+
+bool IsMMXState();
+void ClearMMXState();
+void VDClearEvilCPUStates();
+void VDPreCheckExternalCodeCall(const char *file, int line);
+void VDPostCheckExternalCodeCall(const wchar_t *mpContext, const char *mpFile, int mLine);
+
+struct VDSilentExternalCodeBracket {
+ VDSilentExternalCodeBracket() {
+ VDClearEvilCPUStates();
+ }
+
+ ~VDSilentExternalCodeBracket() {
+ VDClearEvilCPUStates();
+ }
+};
+
+struct VDExternalCodeBracketLocation {
+ VDExternalCodeBracketLocation(const wchar_t *pContext, const char *file, const int line)
+ : mpContext(pContext)
+ , mpFile(file)
+ , mLine(line)
+ {
+ }
+
+ const wchar_t *mpContext;
+ const char *mpFile;
+ const int mLine;
+};
+
+struct VDExternalCodeBracket {
+ VDExternalCodeBracket(const wchar_t *pContext, const char *file, const int line)
+ : mpContext(pContext)
+ , mpFile(file)
+ , mLine(line)
+ {
+ VDPreCheckExternalCodeCall(file, line);
+ }
+
+ VDExternalCodeBracket(const VDExternalCodeBracketLocation& loc)
+ : mpContext(loc.mpContext)
+ , mpFile(loc.mpFile)
+ , mLine(loc.mLine)
+ {
+ }
+
+ ~VDExternalCodeBracket() {
+ VDPostCheckExternalCodeCall(mpContext, mpFile, mLine);
+ }
+
+ operator bool() const { return false; }
+
+ const wchar_t *mpContext;
+ const char *mpFile;
+ const int mLine;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/debugx86.h b/src/thirdparty/VirtualDub/h/vd2/system/debugx86.h
new file mode 100644
index 000000000..03a4f29a3
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/debugx86.h
@@ -0,0 +1,37 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+enum VDInstructionTypeX86 {
+ kX86InstUnknown,
+ kX86InstP6,
+ kX86InstMMX,
+ kX86InstMMX2,
+ kX86InstSSE,
+ kX86InstSSE2,
+ kX86Inst3DNow
+};
+
+bool VDIsValidCallX86(const char *buf, int len);
+VDInstructionTypeX86 VDGetInstructionTypeX86(const void *p);
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/event.h b/src/thirdparty/VirtualDub/h/vd2/system/event.h
new file mode 100644
index 000000000..a725f8d43
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/event.h
@@ -0,0 +1,201 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2006 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_EVENT_H
+#define f_VD2_SYSTEM_EVENT_H
+
+struct VDDelegateNode {
+ VDDelegateNode *mpNext, *mpPrev;
+};
+
+class VDDelegate;
+
+class VDEventBase {
+protected:
+ VDEventBase();
+ ~VDEventBase();
+
+ void Add(VDDelegate&);
+ void Remove(VDDelegate&);
+ void Raise(void *src, const void *info);
+
+ VDDelegateNode mAnchor;
+};
+
+// Because Visual C++ uses different pointer-to-member representations for
+// different inheritance regimes, we have to include a whole lot of stupid
+// logic to detect and switch code paths based on the inheritance used.
+// We detect the inheritance by the size of the member function pointer.
+//
+// Some have managed to make faster and more compact delegates by hacking
+// into the PMT representation and pre-folding the this pointer adjustment.
+// I'm avoiding this for now because (a) it's even less portable than what
+// we have here, and (b) that fails if the object undergoes a change in
+// virtual table status while the delegate is alive (which is possible
+// during construction/destruction).
+//
+// Note: We can't handle virtual inheritance here because on X64, MSVC uses
+// 16 bytes for both multiple and virtual inheritance cases.
+
+#ifdef _MSC_VER
+ class __single_inheritance VDDelegateHolderS;
+ class __multiple_inheritance VDDelegateHolderM;
+#else
+ class VDDelegateHolderS;
+#endif
+
+template<class Source, class ArgType>
+class VDDelegateBinding {
+public:
+ VDDelegate *mpBoundDelegate;
+};
+
+template<class T, class Source, class ArgType>
+struct VDDelegateAdapterS {
+ typedef void (T::*T_Fn)(Source *, const ArgType&);
+ typedef void (T::*T_Fn2)(Source *, ArgType);
+
+ static void Init(VDDelegate& dst, T_Fn fn) {
+ dst.mpCallback = Fn;
+ dst.mpFnS = reinterpret_cast<void(VDDelegateHolderS::*)()>(fn);
+ }
+
+ static void Init(VDDelegate& dst, T_Fn2 fn) {
+ dst.mpCallback = Fn2;
+ dst.mpFnS = reinterpret_cast<void(VDDelegateHolderS::*)()>(fn);
+ }
+
+ static void Fn(void *src, const void *info, VDDelegate& del) {
+ return (((T *)del.mpObj)->*reinterpret_cast<T_Fn>(del.mpFnS))(static_cast<Source *>(src), *static_cast<const ArgType *>(info));
+ }
+
+ static void Fn2(void *src, const void *info, VDDelegate& del) {
+ return (((T *)del.mpObj)->*reinterpret_cast<T_Fn2>(del.mpFnS))(static_cast<Source *>(src), *static_cast<const ArgType *>(info));
+ }
+};
+
+template<int size>
+class VDDelegateAdapter {
+public:
+ template<class T, class Source, class ArgType>
+ struct AdapterLookup {
+ typedef VDDelegateAdapterS<T, Source, ArgType> result;
+ };
+};
+
+#ifdef _MSC_VER
+template<class T, class Source, class ArgType>
+struct VDDelegateAdapterM {
+ typedef void (T::*T_Fn)(Source *, const ArgType&);
+ typedef void (T::*T_Fn2)(Source *, ArgType);
+
+ static void Init(VDDelegate& dst, T_Fn fn) {
+ dst.mpCallback = Fn;
+ dst.mpFnM = reinterpret_cast<void(VDDelegateHolderM::*)()>(fn);
+ }
+
+ static void Init(VDDelegate& dst, T_Fn2 fn) {
+ dst.mpCallback = Fn2;
+ dst.mpFnM = reinterpret_cast<void(VDDelegateHolderM::*)()>(fn2);
+ }
+
+ static void Fn(void *src, const void *info, VDDelegate& del) {
+ return (((T *)del.mpObj)->*reinterpret_cast<T_Fn>(del.mpFnM))(static_cast<Source *>(src), *static_cast<const ArgType *>(info));
+ }
+
+ static void Fn2(void *src, const void *info, VDDelegate& del) {
+ return (((T *)del.mpObj)->*reinterpret_cast<T_Fn2>(del.mpFnM))(static_cast<Source *>(src), *static_cast<const ArgType *>(info));
+ }
+};
+
+
+template<>
+class VDDelegateAdapter<sizeof(void (VDDelegateHolderM::*)())> {
+public:
+ template<class T, class Source, class ArgType>
+ struct AdapterLookup {
+ typedef VDDelegateAdapterM<T, Source, ArgType> result;
+ };
+};
+#endif
+
+class VDDelegate : public VDDelegateNode {
+ friend class VDEventBase;
+public:
+ VDDelegate();
+ ~VDDelegate();
+
+ template<class T, class Source, class ArgType>
+ VDDelegateBinding<Source, ArgType> operator()(T *obj, void (T::*fn)(Source *, const ArgType&)) {
+ mpObj = obj;
+
+ VDDelegateAdapter<sizeof fn>::AdapterLookup<T, Source, ArgType>::result::Init(*this, fn);
+
+ VDDelegateBinding<Source, ArgType> binding = {this};
+ return binding;
+ }
+
+ template<class T, class Source, class ArgType>
+ VDDelegateBinding<Source, ArgType> Bind(T *obj, void (T::*fn)(Source *, ArgType)) {
+ mpObj = obj;
+
+ VDDelegateAdapter<sizeof fn>::AdapterLookup<T, Source, ArgType>::result::Init(*this, fn);
+
+ VDDelegateBinding<Source, ArgType> binding = {this};
+ return binding;
+ }
+
+public:
+ void (*mpCallback)(void *src, const void *info, VDDelegate&);
+ void *mpObj;
+
+#ifdef _MSC_VER
+ union {
+ void (VDDelegateHolderS::*mpFnS)();
+ void (VDDelegateHolderM::*mpFnM)();
+ };
+#else
+ class VDDelegateHolderS;
+ void (VDDelegateHolderS::*mpFnS)();
+#endif
+};
+
+template<class Source, class ArgType>
+class VDEvent : public VDEventBase {
+public:
+ void operator+=(const VDDelegateBinding<Source, ArgType>& binding) {
+ Add(*binding.mpBoundDelegate);
+ }
+
+ void operator-=(const VDDelegateBinding<Source, ArgType>& binding) {
+ Remove(*binding.mpBoundDelegate);
+ }
+
+ void Raise(Source *src, const ArgType& args) {
+ VDEventBase::Raise(src, &args);
+ }
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/file.h b/src/thirdparty/VirtualDub/h/vd2/system/file.h
new file mode 100644
index 000000000..bfdfab44e
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/file.h
@@ -0,0 +1,323 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_FILE_H
+#define f_VD2_SYSTEM_FILE_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <limits.h>
+#include <stdarg.h>
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/vdalloc.h>
+#include <vd2/system/vdstl.h>
+#include <vector>
+
+#ifdef WIN32
+ typedef void *VDFileHandle; // this needs to match wtypes.h definition for HANDLE
+#else
+ #error No operating system target declared??
+#endif
+
+namespace nsVDFile {
+ enum eSeekMode {
+ kSeekStart=0, kSeekCur, kSeekEnd
+ };
+
+ enum eFlags {
+ kRead = 0x00000001,
+ kWrite = 0x00000002,
+ kReadWrite = kRead | kWrite,
+
+ kDenyNone = 0x00000000,
+ kDenyRead = 0x00000010,
+ kDenyWrite = 0x00000020,
+ kDenyAll = kDenyRead | kDenyWrite,
+
+ kOpenExisting = 0x00000100,
+ kOpenAlways = 0x00000200,
+ kCreateAlways = 0x00000300,
+ kCreateNew = 0x00000400,
+ kTruncateExisting = 0x00000500, // not particularly useful, really
+ kCreationMask = 0x0000FF00,
+
+ kSequential = 0x00010000,
+ kRandomAccess = 0x00020000,
+ kUnbuffered = 0x00040000, // much faster on Win32 thanks to the crappy cache, but possibly bad in Unix?
+ kWriteThrough = 0x00080000,
+
+ kAllFileFlags = 0xFFFFFFFF
+ };
+};
+
+class VDFile {
+protected:
+ VDFileHandle mhFile;
+ vdautoptr2<wchar_t> mpFilename;
+ sint64 mFilePosition;
+
+private:
+ VDFile(const VDFile&);
+ const VDFile& operator=(const VDFile& f);
+
+public:
+ VDFile() : mhFile(NULL) {}
+ VDFile(const char *pszFileName, uint32 flags = nsVDFile::kRead | nsVDFile::kDenyWrite | nsVDFile::kOpenExisting);
+ VDFile(const wchar_t *pwszFileName, uint32 flags = nsVDFile::kRead | nsVDFile::kDenyWrite | nsVDFile::kOpenExisting);
+ VDFile(VDFileHandle h);
+ ~VDFile();
+
+ // The "NT" functions are non-throwing and return success/failure; the regular functions throw exceptions
+ // when something bad happens.
+
+ void open(const char *pszFileName, uint32 flags = nsVDFile::kRead | nsVDFile::kDenyWrite | nsVDFile::kOpenExisting);
+ void open(const wchar_t *pwszFileName, uint32 flags = nsVDFile::kRead | nsVDFile::kDenyWrite | nsVDFile::kOpenExisting);
+
+ bool openNT(const wchar_t *pwszFileName, uint32 flags = nsVDFile::kRead | nsVDFile::kDenyWrite | nsVDFile::kOpenExisting);
+
+ bool closeNT();
+ void close();
+ bool truncateNT();
+ void truncate();
+
+ // extendValid() pushes the valid threshold of a file out, so that the system allocates
+ // space for a file without ensuring that it is cleared. It is mainly useful for
+ // preallocating a file without waiting for the system to clear all of it. The caveats:
+ //
+ // - only required on NTFS
+ // - requires Windows XP or Windows Server 2003
+ // - does not work on compressed or sparse files
+ //
+ // As such, it shouldn't normally be relied upon, and extendValidNT() should be the call
+ // of choice.
+ //
+ // enableExtendValid() must be called beforehand, as SeVolumeNamePrivilege must be
+ // enabled on the process before the file is opened!
+
+ bool extendValidNT(sint64 pos);
+ void extendValid(sint64 pos);
+ static bool enableExtendValid();
+
+ sint64 size();
+ void read(void *buffer, long length);
+ long readData(void *buffer, long length);
+ void write(const void *buffer, long length);
+ long writeData(const void *buffer, long length);
+ bool seekNT(sint64 newPos, nsVDFile::eSeekMode mode = nsVDFile::kSeekStart);
+ void seek(sint64 newPos, nsVDFile::eSeekMode mode = nsVDFile::kSeekStart);
+ bool skipNT(sint64 delta);
+ void skip(sint64 delta);
+ sint64 tell();
+
+ bool isOpen();
+ VDFileHandle getRawHandle();
+
+ const wchar_t *getFilenameForError() const { return mpFilename; }
+
+ // unbuffered I/O requires aligned buffers ("unbuffers")
+ static void *AllocUnbuffer(size_t nBytes);
+ static void FreeUnbuffer(void *p);
+
+protected:
+ bool open_internal(const char *pszFilename, const wchar_t *pwszFilename, uint32 flags, bool throwOnError);
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+template<class T>
+class VDFileUnbufferAllocator {
+public:
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef T* pointer;
+ typedef const T* const_pointer;
+ typedef T& reference;
+ typedef const T& const_reference;
+ typedef T value_type;
+
+ template<class U> struct rebind { typedef VDFileUnbufferAllocator<U> other; };
+
+ pointer address(reference x) const { return &x; }
+ const_pointer address(const_reference x) const { return &x; }
+
+ pointer allocate(size_type n, void *p = 0) { return (pointer)VDFile::AllocUnbuffer(n * sizeof(T)); }
+ void deallocate(pointer p, size_type n) { VDFile::FreeUnbuffer(p); }
+ size_type max_size() const throw() { return MAX_INT; }
+
+ void construct(pointer p, const T& val) { new((void *)p) T(val); }
+ void destroy(pointer p) { ((T*)p)->~T(); }
+
+#if defined(_MSC_VER) && _MSC_VER < 1300
+ char * _Charalloc(size_type n) { return (char *)allocate((n + sizeof(T) - 1) / sizeof(T)); }
+#endif
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+class IVDStream {
+public:
+ virtual const wchar_t *GetNameForError() = 0;
+ virtual sint64 Pos() = 0;
+ virtual void Read(void *buffer, sint32 bytes) = 0;
+ virtual sint32 ReadData(void *buffer, sint32 bytes) = 0;
+ virtual void Write(const void *buffer, sint32 bytes) = 0;
+};
+
+class IVDRandomAccessStream : public IVDStream {
+public:
+ virtual sint64 Length() = 0;
+ virtual void Seek(sint64 offset) = 0;
+};
+
+class VDFileStream : public VDFile, public IVDRandomAccessStream {
+private:
+ VDFileStream(const VDFile&);
+ const VDFileStream& operator=(const VDFileStream& f);
+
+public:
+ VDFileStream() {}
+ VDFileStream(const char *pszFileName, uint32 flags = nsVDFile::kRead | nsVDFile::kDenyWrite | nsVDFile::kOpenExisting)
+ : VDFile(pszFileName, flags) {}
+ VDFileStream(const wchar_t *pwszFileName, uint32 flags = nsVDFile::kRead | nsVDFile::kDenyWrite | nsVDFile::kOpenExisting)
+ : VDFile(pwszFileName, flags) {}
+ VDFileStream(VDFileHandle h) : VDFile(h) {}
+ ~VDFileStream();
+
+ const wchar_t *GetNameForError();
+ sint64 Pos();
+ void Read(void *buffer, sint32 bytes);
+ sint32 ReadData(void *buffer, sint32 bytes);
+ void Write(const void *buffer, sint32 bytes);
+ sint64 Length();
+ void Seek(sint64 offset);
+};
+
+class VDMemoryStream : public IVDRandomAccessStream {
+public:
+ VDMemoryStream(const void *pSrc, uint32 len);
+
+ const wchar_t *GetNameForError();
+ sint64 Pos();
+ void Read(void *buffer, sint32 bytes);
+ sint32 ReadData(void *buffer, sint32 bytes);
+ void Write(const void *buffer, sint32 bytes);
+ sint64 Length();
+ void Seek(sint64 offset);
+
+protected:
+ const char *mpSrc;
+ const uint32 mLength;
+ uint32 mPos;
+};
+
+class VDBufferedStream : public IVDRandomAccessStream {
+public:
+ VDBufferedStream(IVDRandomAccessStream *pSrc, uint32 bufferSize);
+ ~VDBufferedStream();
+
+ const wchar_t *GetNameForError();
+ sint64 Pos();
+ void Read(void *buffer, sint32 bytes);
+ sint32 ReadData(void *buffer, sint32 bytes);
+ void Write(const void *buffer, sint32 bytes);
+
+ sint64 Length();
+ void Seek(sint64 offset);
+
+ void Skip(sint64 size);
+
+protected:
+ IVDRandomAccessStream *mpSrc;
+ vdblock<char> mBuffer;
+ sint64 mBasePosition;
+ uint32 mBufferOffset;
+ uint32 mBufferValidSize;
+};
+
+class VDTextStream {
+public:
+ VDTextStream(IVDStream *pSrc);
+ ~VDTextStream();
+
+ const char *GetNextLine();
+
+protected:
+ IVDStream *mpSrc;
+ uint32 mBufferPos;
+ uint32 mBufferLimit;
+ enum {
+ kFetchLine,
+ kEatNextIfCR,
+ kEatNextIfLF
+ } mState;
+
+ enum {
+ kFileBufferSize = 4096
+ };
+
+ vdfastvector<char> mLineBuffer;
+ vdblock<char> mFileBuffer;
+};
+
+class VDTextInputFile {
+public:
+ VDTextInputFile(const wchar_t *filename, uint32 flags = nsVDFile::kOpenExisting);
+ ~VDTextInputFile();
+
+ inline const char *GetNextLine() {
+ return mTextStream.GetNextLine();
+ }
+
+protected:
+ VDFileStream mFileStream;
+ VDTextStream mTextStream;
+};
+
+class VDTextOutputStream {
+public:
+ VDTextOutputStream(IVDStream *stream);
+ ~VDTextOutputStream();
+
+ void Flush();
+
+ void Write(const char *s, int len);
+ void PutLine();
+ void PutLine(const char *s);
+ void FormatLine(const char *format, ...);
+
+protected:
+ void FormatLine2(const char *format, va_list val);
+ void PutData(const char *s, int len);
+
+ enum { kBufSize = 4096 };
+
+ int mLevel;
+ IVDStream *mpDst;
+ char mBuf[kBufSize];
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/fileasync.h b/src/thirdparty/VirtualDub/h/vd2/system/fileasync.h
new file mode 100644
index 000000000..7693aa30f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/fileasync.h
@@ -0,0 +1,64 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_FILEASYNC_H
+#define f_VD2_SYSTEM_FILEASYNC_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+
+class VDRTProfileChannel;
+
+class IVDFileAsync {
+public:
+ enum Mode {
+ kModeSynchronous, ///< Use synchronous I/O.
+ kModeThreaded, ///< Use multithreaded I/O.
+ kModeAsynchronous, ///< Use true asynchronous I/O (Windows NT only).
+ kModeCount
+ };
+
+ virtual ~IVDFileAsync() {}
+ virtual void SetPreemptiveExtend(bool b) = 0;
+ virtual bool IsPreemptiveExtendActive() = 0;
+ virtual bool IsOpen() = 0;
+ virtual void Open(const wchar_t *pszFilename, uint32 count, uint32 bufferSize) = 0;
+ virtual void Close() = 0;
+ virtual void FastWrite(const void *pData, uint32 bytes) = 0;
+ virtual void FastWriteEnd() = 0;
+ virtual void Write(sint64 pos, const void *pData, uint32 bytes) = 0;
+ virtual bool Extend(sint64 pos) = 0;
+ virtual void Truncate(sint64 pos) = 0;
+ virtual void SafeTruncateAndClose(sint64 pos) = 0;
+ virtual sint64 GetFastWritePos() = 0;
+ virtual sint64 GetSize() = 0;
+};
+
+IVDFileAsync *VDCreateFileAsync(IVDFileAsync::Mode);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/filesys.h b/src/thirdparty/VirtualDub/h/vd2/system/filesys.h
new file mode 100644
index 000000000..4aa830833
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/filesys.h
@@ -0,0 +1,170 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_FILESYS_H
+#define f_VD2_SYSTEM_FILESYS_H
+
+#include <ctype.h>
+#include <vector>
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/VDString.h>
+
+// VDFileSplitPath returns a pointer to the first character of the filename,
+// or the beginning of the string if the path only contains one component.
+
+const char *VDFileSplitFirstDir(const char *s);
+const wchar_t *VDFileSplitFirstDir(const wchar_t *s);
+
+static inline char *VDFileSplitFirstDir(char *s) {
+ return const_cast<char *>(VDFileSplitFirstDir(const_cast<const char *>(s)));
+}
+
+static inline wchar_t *VDFileSplitFirstDir(wchar_t *s) {
+ return const_cast<wchar_t *>(VDFileSplitFirstDir(const_cast<const wchar_t *>(s)));
+}
+
+const char *VDFileSplitPath(const char *);
+const wchar_t *VDFileSplitPath(const wchar_t *);
+
+static inline char *VDFileSplitPath(char *s) {
+ return const_cast<char *>(VDFileSplitPath(const_cast<const char *>(s)));
+}
+
+static inline wchar_t *VDFileSplitPath(wchar_t *s) {
+ return const_cast<wchar_t *>(VDFileSplitPath(const_cast<const wchar_t *>(s)));
+}
+
+VDString VDFileSplitPathLeft(const VDString&);
+VDString VDFileSplitPathRight(const VDString&);
+VDStringW VDFileSplitPathLeft(const VDStringW&);
+VDStringW VDFileSplitPathRight(const VDStringW&);
+
+// VDSplitRoot returns a pointer to the second component of the filename,
+// or the beginning of the string if there is no second component.
+
+const char *VDFileSplitRoot(const char *);
+const wchar_t *VDFileSplitRoot(const wchar_t *);
+
+static inline char *VDFileSplitRoot(char *s) {
+ return const_cast<char *>(VDFileSplitRoot(const_cast<const char *>(s)));
+}
+
+static inline wchar_t *VDFileSplitRoot(wchar_t *s) {
+ return const_cast<wchar_t *>(VDFileSplitRoot(const_cast<const wchar_t *>(s)));
+}
+
+VDString VDFileSplitRoot(const VDString&);
+VDStringW VDFileSplitRoot(const VDStringW&);
+
+// VDSplitExtension returns a pointer to the extension, including the period.
+// The ending null terminator is returned if there is no extension.
+
+const char *VDFileSplitExt(const char *);
+const wchar_t *VDFileSplitExt(const wchar_t *);
+
+static inline char *VDFileSplitExt(char *s) {
+ return const_cast<char *>(VDFileSplitExt(const_cast<const char *>(s)));
+}
+
+static inline wchar_t *VDFileSplitExt(wchar_t *s) {
+ return const_cast<wchar_t *>(VDFileSplitExt(const_cast<const wchar_t *>(s)));
+}
+
+VDString VDFileSplitExtLeft(const VDString&);
+VDStringW VDFileSplitExtLeft(const VDStringW&);
+VDString VDFileSplitExtRight(const VDString&);
+VDStringW VDFileSplitExtRight(const VDStringW&);
+
+/////////////////////////////////////////////////////////////////////////////
+
+/// Perform a case-insensitive wildcard match against a filename; returns
+/// true if the pattern matches, false otherwise. '?' matches any single
+/// character, and '*' matches zero or more characters.
+///
+/// NOTE: This is not guaranteed or intended to perfectly match the
+/// underlying OS wildcard mechanism. In particular, we don't try to
+/// emulate MSDOS or Windows goofiness.
+bool VDFileWildMatch(const char *pattern, const char *path);
+bool VDFileWildMatch(const wchar_t *pattern, const wchar_t *path);
+
+/////////////////////////////////////////////////////////////////////////////
+
+sint64 VDGetDiskFreeSpace(const wchar_t *path);
+void VDCreateDirectory(const wchar_t *path);
+
+extern bool (*VDRemoveFile)(const wchar_t *path);
+
+bool VDDoesPathExist(const wchar_t *fileName);
+
+uint64 VDFileGetLastWriteTime(const wchar_t *path);
+VDStringW VDFileGetRootPath(const wchar_t *partialPath);
+VDStringW VDGetFullPath(const wchar_t *partialPath);
+
+VDStringW VDMakePath(const wchar_t *base, const wchar_t *file);
+void VDFileFixDirPath(VDStringW& path);
+VDStringW VDGetLocalModulePath();
+VDStringW VDGetProgramPath();
+
+/////////////////////////////////////////////////////////////////////////////
+
+class VDDirectoryIterator {
+ VDDirectoryIterator(const VDDirectoryIterator&);
+ VDDirectoryIterator& operator=(VDDirectoryIterator&);
+public:
+ VDDirectoryIterator(const wchar_t *path);
+ ~VDDirectoryIterator();
+
+ bool Next();
+
+ bool IsDirectory() const {
+ return mbDirectory;
+ }
+
+ const wchar_t *GetName() const {
+ return mFilename.c_str();
+ }
+
+ const VDStringW GetFullPath() const {
+ return mBasePath + mFilename;
+ }
+
+ const sint64 GetSize() const {
+ return mFileSize;
+ }
+
+protected:
+ void *mpHandle;
+ bool mbSearchComplete;
+
+ VDStringW mSearchPath;
+ VDStringW mBasePath;
+
+ VDStringW mFilename;
+ sint64 mFileSize;
+ bool mbDirectory;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/filewatcher.h b/src/thirdparty/VirtualDub/h/vd2/system/filewatcher.h
new file mode 100644
index 000000000..db1a02312
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/filewatcher.h
@@ -0,0 +1,45 @@
+#ifndef f_VD2_SYSTEM_FILEWATCHER_H
+#define f_VD2_SYSTEM_FILEWATCHER_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/VDString.h>
+
+class VDFunctionThunk;
+
+class IVDFileWatcherCallback {
+public:
+ virtual bool OnFileUpdated(const wchar_t *path) = 0;
+};
+
+class VDFileWatcher {
+public:
+ VDFileWatcher();
+ ~VDFileWatcher();
+
+ bool IsActive() const;
+
+ void Init(const wchar_t *file, IVDFileWatcherCallback *cb);
+ void Shutdown();
+
+ bool Wait(uint32 delay = 0xFFFFFFFFU);
+
+protected:
+ void StaticTimerCallback(void *, unsigned, unsigned, unsigned long);
+
+ void *mChangeHandle;
+ uint64 mLastWriteTime;
+ VDStringW mPath;
+
+ IVDFileWatcherCallback *mpCB;
+
+ bool mbRepeatRequested;
+ bool mbThunksInited;
+ VDFunctionThunk *mpThunk;
+ uint32 mTimerId;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/halffloat.h b/src/thirdparty/VirtualDub/h/vd2/system/halffloat.h
new file mode 100644
index 000000000..e65a4109c
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/halffloat.h
@@ -0,0 +1,9 @@
+#ifndef f_VD2_SYSTEM_HALFFLOAT_H
+#define f_VD2_SYSTEM_HALFFLOAT_H
+
+#include <vd2/system/vdtypes.h>
+
+uint16 VDConvertFloatToHalf(const void *f);
+void VDConvertHalfToFloat(uint16 h, void *dst);
+
+#endif // f_VD2_SYSTEM_HALFFLOAT_H
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/hash.h b/src/thirdparty/VirtualDub/h/vd2/system/hash.h
new file mode 100644
index 000000000..d5f3612e1
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/hash.h
@@ -0,0 +1,47 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_HASH_H
+#define f_VD2_SYSTEM_HASH_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#ifndef f_VD2_SYSTEM_VDTYPES_H
+ #include <vd2/system/vdtypes.h>
+#endif
+
+// Case-sensitive string hashes
+
+uint32 VDHashString32(const char *s);
+uint32 VDHashString32(const char *s, uint32 len);
+
+// Case-insensitive, culture-invariant string hashes
+
+uint32 VDHashString32I(const wchar_t *s);
+uint32 VDHashString32I(const wchar_t *s, uint32 len);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/int128.h b/src/thirdparty/VirtualDub/h/vd2/system/int128.h
new file mode 100644
index 000000000..da86c4878
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/int128.h
@@ -0,0 +1,361 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_INT128_H
+#define f_VD2_SYSTEM_INT128_H
+
+#include <vd2/system/vdtypes.h>
+
+struct vdint128;
+struct vduint128;
+
+#ifdef _M_AMD64
+ extern "C" __int64 _mul128(__int64 x, __int64 y, __int64 *hiresult);
+ extern "C" unsigned __int64 _umul128(unsigned __int64 x, unsigned __int64 y, unsigned __int64 *hiresult);
+ extern "C" unsigned __int64 __shiftleft128(unsigned __int64 low, unsigned __int64 high, unsigned char shift);
+ extern "C" unsigned __int64 __shiftright128(unsigned __int64 low, unsigned __int64 high, unsigned char shift);
+
+ #pragma intrinsic(_mul128)
+ #pragma intrinsic(_umul128)
+ #pragma intrinsic(__shiftleft128)
+ #pragma intrinsic(__shiftright128)
+
+ extern "C" {
+ void vdasm_uint128_add(uint64 dst[2], const uint64 x[2], const uint64 y[2]);
+ void vdasm_uint128_sub(uint64 dst[2], const uint64 x[2], const uint64 y[2]);
+ void vdasm_uint128_mul(uint64 dst[2], const uint64 x[2], const uint64 y[2]);
+ }
+#else
+ extern "C" {
+ void __cdecl vdasm_uint128_add(uint64 dst[2], const uint64 x[2], const uint64 y[2]);
+ void __cdecl vdasm_uint128_sub(uint64 dst[2], const uint64 x[2], const uint64 y[2]);
+ }
+#endif
+
+struct vdint128 {
+public:
+ union {
+ sint32 d[4];
+ sint64 q[2];
+ };
+
+ vdint128() {}
+
+ vdint128(sint64 x) {
+ q[0] = x;
+ q[1] = x>>63;
+ }
+
+ vdint128(uint64 x) {
+ q[0] = (sint64)x;
+ q[1] = 0;
+ }
+
+ vdint128(int x) {
+ q[0] = x;
+ q[1] = (sint64)x >> 63;
+ }
+
+ vdint128(unsigned int x) {
+ q[0] = x;
+ q[1] = 0;
+ }
+
+ vdint128(unsigned long x) {
+ q[0] = x;
+ q[1] = 0;
+ }
+
+ vdint128(sint64 hi, uint64 lo) {
+ q[0] = lo;
+ q[1] = hi;
+ }
+
+ sint64 getHi() const { return q[1]; }
+ uint64 getLo() const { return q[0]; }
+
+ operator double() const;
+ operator sint64() const {
+ return (sint64)q[0];
+ }
+ operator uint64() const {
+ return (uint64)q[0];
+ }
+
+ bool operator==(const vdint128& x) const {
+ return q[1] == x.q[1] && q[0] == x.q[0];
+ }
+
+ bool operator!=(const vdint128& x) const {
+ return q[1] != x.q[1] || q[0] != x.q[0];
+ }
+
+ bool operator<(const vdint128& x) const {
+ return q[1] < x.q[1] || (q[1] == x.q[1] && (uint64)q[0] < (uint64)x.q[0]);
+ }
+
+ bool operator<=(const vdint128& x) const {
+ return q[1] < x.q[1] || (q[1] == x.q[1] && (uint64)q[0] <= (uint64)x.q[0]);
+ }
+
+ bool operator>(const vdint128& x) const {
+ return q[1] > x.q[1] || (q[1] == x.q[1] && (uint64)q[0] > (uint64)x.q[0]);
+ }
+
+ bool operator>=(const vdint128& x) const {
+ return q[1] > x.q[1] || (q[1] == x.q[1] && (uint64)q[0] >= (uint64)x.q[0]);
+ }
+
+ const vdint128 operator+(const vdint128& x) const {
+ vdint128 t;
+ vdasm_uint128_add((uint64 *)t.q, (const uint64 *)q, (const uint64 *)x.q);
+ return t;
+ }
+
+ const vdint128 operator-(const vdint128& x) const {
+ vdint128 t;
+ vdasm_uint128_sub((uint64 *)t.q, (const uint64 *)q, (const uint64 *)x.q);
+ return t;
+ }
+
+ const vdint128& operator+=(const vdint128& x) {
+ vdasm_uint128_add((uint64 *)q, (const uint64 *)q, (const uint64 *)x.q);
+ return *this;
+ }
+
+ const vdint128& operator-=(const vdint128& x) {
+ vdasm_uint128_sub((uint64 *)q, (const uint64 *)q, (const uint64 *)x.q);
+ return *this;
+ }
+
+ const vdint128 operator*(const vdint128& x) const;
+
+ const vdint128 operator/(int x) const;
+
+ const vdint128 operator-() const {
+ vdint128 t(0);
+ vdasm_uint128_sub((uint64 *)t.q, (const uint64 *)t.q, (const uint64 *)q);
+ return t;
+ }
+
+ const vdint128 abs() const {
+ return q[1] < 0 ? -*this : *this;
+ }
+
+#ifdef _M_AMD64
+ void setSquare(sint64 v) {
+ const vdint128 v128(v);
+ operator=(v128*v128);
+ }
+
+ const vdint128 operator<<(int count) const {
+ vdint128 t;
+
+ if (count >= 64) {
+ t.q[0] = 0;
+ t.q[1] = q[0] << (count-64);
+ } else {
+ t.q[0] = q[0] << count;
+ t.q[1] = __shiftleft128(q[0], q[1], count);
+ }
+
+ return t;
+ }
+
+ const vdint128 operator>>(int count) const {
+ vdint128 t;
+
+ if (count >= 64) {
+ t.q[0] = q[1] >> (count-64);
+ t.q[1] = q[1] >> 63;
+ } else {
+ t.q[0] = __shiftright128(q[0], q[1], count);
+ t.q[1] = q[1] >> count;
+ }
+
+ return t;
+ }
+#else
+ void setSquare(sint64 v);
+
+ const vdint128 operator<<(int v) const;
+ const vdint128 operator>>(int v) const;
+#endif
+};
+
+struct vduint128 {
+public:
+ union {
+ uint32 d[4];
+ uint64 q[2];
+ };
+
+ vduint128() {}
+
+ vduint128(sint64 x) {
+ q[0] = (sint64)x;
+ q[1] = 0;
+ }
+
+ vduint128(uint64 x) {
+ q[0] = x;
+ q[1] = 0;
+ }
+
+ vduint128(int x) {
+ q[0] = (uint64)x;
+ q[1] = 0;
+ }
+
+ vduint128(unsigned x) {
+ q[0] = x;
+ q[1] = 0;
+ }
+
+ vduint128(uint64 hi, uint64 lo) {
+ q[0] = lo;
+ q[1] = hi;
+ }
+
+ uint64 getHi() const { return q[1]; }
+ uint64 getLo() const { return q[0]; }
+
+ operator sint64() const {
+ return (sint64)q[0];
+ }
+
+ operator uint64() const {
+ return (uint64)q[0];
+ }
+
+ bool operator==(const vduint128& x) const {
+ return q[1] == x.q[1] && q[0] == x.q[0];
+ }
+
+ bool operator!=(const vduint128& x) const {
+ return q[1] != x.q[1] || q[0] != x.q[0];
+ }
+
+ bool operator<(const vduint128& x) const {
+ return q[1] < x.q[1] || (q[1] == x.q[1] && q[0] < x.q[0]);
+ }
+
+ bool operator<=(const vduint128& x) const {
+ return q[1] < x.q[1] || (q[1] == x.q[1] && q[0] <= x.q[0]);
+ }
+
+ bool operator>(const vduint128& x) const {
+ return q[1] > x.q[1] || (q[1] == x.q[1] && q[0] > x.q[0]);
+ }
+
+ bool operator>=(const vduint128& x) const {
+ return q[1] > x.q[1] || (q[1] == x.q[1] && q[0] >= x.q[0]);
+ }
+
+ const vduint128 operator+(const vduint128& x) const {
+ vduint128 t;
+ vdasm_uint128_add(t.q, q, x.q);
+ return t;
+ }
+
+ const vduint128 operator-(const vduint128& x) const {
+ vduint128 t;
+ vdasm_uint128_sub(t.q, q, x.q);
+ return t;
+ }
+
+ const vduint128& operator+=(const vduint128& x) {
+ vdasm_uint128_add(q, q, x.q);
+ return *this;
+ }
+
+ const vduint128& operator-=(const vduint128& x) {
+ vdasm_uint128_sub(q, q, x.q);
+ return *this;
+ }
+
+ const vduint128 operator*(const vduint128& x) const;
+
+ const vduint128 operator-() const {
+ vduint128 t(0U);
+ vdasm_uint128_sub((uint64 *)t.q, (const uint64 *)t.q, (const uint64 *)q);
+ return t;
+ }
+
+ vduint128& operator<<=(int count) {
+ return operator=(operator<<(count));
+ }
+
+ vduint128& operator>>=(int count) {
+ return operator=(operator>>(count));
+ }
+
+#ifdef _M_AMD64
+ const vduint128 operator<<(int count) const {
+ vduint128 t;
+
+ if (count >= 64) {
+ t.q[0] = 0;
+ t.q[1] = q[0] << (count-64);
+ } else {
+ t.q[0] = q[0] << count;
+ t.q[1] = __shiftleft128(q[0], q[1], count);
+ }
+
+ return t;
+ }
+
+ const vduint128 operator>>(int count) const {
+ vduint128 t;
+
+ if (count >= 64) {
+ t.q[0] = q[1] >> (count-64);
+ t.q[1] = 0;
+ } else {
+ t.q[0] = __shiftright128(q[0], q[1], count);
+ t.q[1] = q[1] >> count;
+ }
+
+ return t;
+ }
+#else
+ const vduint128 operator<<(int v) const;
+ const vduint128 operator>>(int v) const;
+#endif
+};
+
+#ifdef _M_AMD64
+ inline vduint128 VDUMul64x64To128(uint64 x, uint64 y) {
+ vduint128 result;
+ result.q[0] = _umul128(x, y, &result.q[1]);
+ return result;
+ }
+ uint64 VDUDiv128x64To64(const vduint128& dividend, uint64 divisor, uint64& remainder);
+#else
+ vduint128 VDUMul64x64To128(uint64 x, uint64 y);
+ uint64 VDUDiv128x64To64(const vduint128& dividend, uint64 divisor, uint64& remainder);
+#endif
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/list.h b/src/thirdparty/VirtualDub/h/vd2/system/list.h
new file mode 100644
index 000000000..e2c39b4e5
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/list.h
@@ -0,0 +1,275 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_LIST_H
+#define f_LIST_H
+
+class ListNode {
+public:
+ ListNode *next, *prev;
+
+ void Remove() {
+ next->prev = prev;
+ prev->next = next;
+#ifdef _DEBUG
+ prev = next = 0;
+#endif
+ }
+
+ void InsertAfter(ListNode *node) {
+ next = node;
+ prev = node->prev;
+ if (node->prev) node->prev->next = this;
+ node->prev = this;
+ }
+
+ void InsertBefore(ListNode *node) {
+ next = node->next;
+ prev = node;
+ if (node->next) node->next->prev = this;
+ node->next = this;
+ }
+
+ ListNode *NextFromHead() const {
+ return prev;
+ }
+
+ ListNode *NextFromTail() const {
+ return next;
+ }
+};
+
+class List {
+private:
+public:
+ ListNode head, tail;
+
+ // <--- next prev --->
+ //
+ // head <-> node <-> node <-> tail
+
+ List();
+ List(int) {}
+
+ void Init();
+
+ void AddHead(ListNode *node) {
+ node->InsertAfter(&head);
+ }
+
+ void AddTail(ListNode *node) {
+ node->InsertBefore(&tail);
+ }
+
+ ListNode *RemoveHead();
+ ListNode *RemoveTail();
+
+ bool IsEmpty() const {
+ return !head.prev->prev;
+ }
+
+ ListNode *AtHead() const {
+ return head.prev;
+ }
+
+ ListNode *AtTail() const {
+ return tail.next;
+ }
+
+ void Take(List& from);
+ void Swap(List& with);
+};
+
+// Templated classes... templated classes good.
+
+template<class T> class List2;
+
+template<class T>
+class ListNode2 : public ListNode {
+friend List2<T>;
+public:
+ void InsertBefore(ListNode2<T> *node) { ListNode::InsertBefore(node); }
+ void InsertAfter(ListNode2<T> *node) { ListNode::InsertAfter(node); }
+
+ void Remove() { ListNode::Remove(); }
+ T *NextFromHead() const { return static_cast<T *>(static_cast<ListNode2<T>*>(ListNode::NextFromHead())); }
+ T *NextFromTail() const { return static_cast<T *>(static_cast<ListNode2<T>*>(ListNode::NextFromTail())); }
+};
+
+template<class T>
+class List2 : public List {
+public:
+ List2<T>() {}
+
+ // This is a really lame, stupid way to postpone initialization of the
+ // list.
+
+ List2<T>(int v) : List(v) {}
+
+ void AddHead(ListNode2<T> *node) { List::AddHead(node); }
+ void AddTail(ListNode2<T> *node) { List::AddTail(node); }
+ T *RemoveHead() { return static_cast<T *>(static_cast<ListNode2<T>*>(List::RemoveHead())); }
+ T *RemoveTail() { return static_cast<T *>(static_cast<ListNode2<T>*>(List::RemoveTail())); }
+ T *AtHead() const { return static_cast<T *>(static_cast<ListNode2<T>*>(List::AtHead())); }
+ T *AtTail() const { return static_cast<T *>(static_cast<ListNode2<T>*>(List::AtTail())); }
+
+ // I must admit to being pampered by STL (end is different though!!)
+
+ T *begin() const { return AtHead(); }
+ T *end() const { return AtTail(); }
+
+ void take(List2<T>& from) { List::take(from); }
+
+ class iterator {
+ protected:
+ ListNode2<T> *node;
+ ListNode2<T> *next;
+
+ public:
+ iterator() {}
+ iterator(const iterator& src) throw() : node(src.node), next(src.next) {}
+
+ bool operator!() const throw() { return 0 == next; }
+ T *operator->() const throw() { return (T *)node; }
+ operator bool() const throw() { return 0 != next; }
+ operator T *() const throw() { return (T *)node; }
+ T& operator *() const throw() { return *(T *)node; }
+ };
+
+ // fwit: forward iterator (SAFE if node disappears)
+ // rvit: reverse iterator (SAFE if node disappears)
+
+ class fwit : public iterator {
+ public:
+ fwit() throw() {}
+ fwit(const fwit& src) throw() : iterator(src) {}
+ fwit(ListNode2<T> *start) throw() {
+ node = start;
+ next = start->NextFromHead();
+ }
+
+ const fwit& operator=(ListNode2<T> *start) throw() {
+ node = start;
+ next = start->NextFromHead();
+
+ return *this;
+ }
+
+ fwit& operator++() throw() {
+ node = next;
+ next = node->NextFromHead();
+
+ return *this;
+ }
+
+ const fwit& operator+=(int v) throw() {
+ while(next && v--) {
+ node = next;
+ next = node->NextFromHead();
+ }
+
+ return *this;
+ }
+
+ fwit operator+(int v) const throw() {
+ fwit t(*this);
+
+ t += v;
+
+ return t;
+ }
+
+ // This one's for my sanity.
+
+ void operator++(int) throw() {
+ ++*this;
+ }
+ };
+
+ class rvit : public iterator {
+ public:
+ rvit() throw() {}
+
+ rvit(ListNode2<T> *start) throw() {
+ node = start;
+ next = start->NextFromTail();
+ }
+
+ const rvit& operator=(ListNode2<T> *start) throw() {
+ node = start;
+ next = start->NextFromTail();
+
+ return *this;
+ }
+
+ rvit& operator--() throw() {
+ node = next;
+ next = node->NextFromTail();
+
+ return *this;
+ }
+
+ const rvit& operator-=(int v) throw() {
+ while(next && v--) {
+ node = next;
+ next = node->NextFromTail();
+ }
+
+ return *this;
+ }
+
+ rvit operator-(int v) const throw() {
+ rvit t(*this);
+
+ t -= v;
+
+ return t;
+ }
+
+ // This one's for my sanity.
+
+ void operator--(int) throw() {
+ --*this;
+ }
+ };
+};
+
+template<class T>
+class ListAlloc : public List2<T> {
+public:
+ ListAlloc<T>() {}
+ ~ListAlloc<T>() {
+ dispose();
+ }
+
+ void dispose() {
+ T *node;
+
+ while(node = RemoveHead())
+ delete node;
+ }
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/log.h b/src/thirdparty/VirtualDub/h/vd2/system/log.h
new file mode 100644
index 000000000..b36e36e7e
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/log.h
@@ -0,0 +1,70 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_LOG_H
+#define f_VD2_SYSTEM_LOG_H
+
+#include <vd2/system/VDString.h>
+#include <list>
+
+class IVDLogger {
+public:
+ virtual void AddLogEntry(int severity, const VDStringW& s) = 0;
+};
+
+enum {
+ kVDLogInfo, kVDLogMarker, kVDLogWarning, kVDLogError
+};
+
+void VDLog(int severity, const VDStringW& s);
+void VDLogF(int severity, const wchar_t *format, ...);
+void VDAttachLogger(IVDLogger *pLogger, bool bThisThreadOnly, bool bReplayLog);
+void VDDetachLogger(IVDLogger *pLogger);
+
+class VDAutoLogger : public IVDLogger {
+public:
+ struct Entry {
+ int severity;
+ VDStringW text;
+
+ Entry(int sev, const VDStringW& s) : severity(sev), text(s) {}
+ };
+
+ typedef std::list<Entry> tEntries;
+
+ VDAutoLogger(int min_severity);
+ ~VDAutoLogger();
+
+ void AddLogEntry(int severity, const VDStringW& s);
+
+ const tEntries& GetEntries();
+
+protected:
+ tEntries mEntries;
+ const int mMinSeverity;
+ bool mbAttached;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/math.h b/src/thirdparty/VirtualDub/h/vd2/system/math.h
new file mode 100644
index 000000000..aa4d03f77
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/math.h
@@ -0,0 +1,259 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_MATH_H
+#define f_VD2_SYSTEM_MATH_H
+
+#include <math.h>
+#include <vd2/system/vdtypes.h>
+
+// Constants
+namespace nsVDMath {
+ static const float kfPi = 3.1415926535897932384626433832795f;
+ static const double krPi = 3.1415926535897932384626433832795;
+ static const float kfTwoPi = 6.283185307179586476925286766559f;
+ static const double krTwoPi = 6.283185307179586476925286766559;
+ static const float kfLn2 = 0.69314718055994530941723212145818f;
+ static const double krLn2 = 0.69314718055994530941723212145818;
+ static const float kfLn10 = 2.3025850929940456840179914546844f;
+ static const double krLn10 = 2.3025850929940456840179914546844;
+ static const float kfOneOverLn10 = 0.43429448190325182765112891891661f;
+ static const double krOneOverLn10 = 0.43429448190325182765112891891661;
+};
+
+///////////////////////////////////////////////////////////////////////////
+// Integer clamping functions
+//
+#ifdef _M_IX86
+ inline uint32 VDClampToUint32(sint64 v) {
+ union U {
+ __int64 v64;
+ struct {
+ unsigned lo;
+ int hi;
+ } v32;
+ };
+
+ return ((U *)&v)->v32.hi ? ~(((U *)&v)->v32.hi >> 31) : ((U *)&v)->v32.lo;
+ }
+#else
+ inline uint32 VDClampToUint32(sint64 v) {
+ uint32 r = (uint32)v;
+ return r == v ? r : (uint32)~(sint32)(v>>63);
+ }
+#endif
+
+inline sint32 VDClampToSint32(uint32 v) {
+ return (v | ((sint32)v >> 31)) & 0x7FFFFFFF;
+}
+
+inline sint32 VDClampToSint32(sint64 v) {
+ sint32 r = (sint32)v;
+ return r == v ? r : (sint32)(v >> 63) ^ 0x7FFFFFFF;
+}
+
+inline uint16 VDClampToUint16(uint32 v) {
+ if (v > 0xffff)
+ v = 0xffff;
+ return (uint16)v;
+}
+
+///////////////////////////////////////////////////////////////////////////
+// Absolute value functions
+inline sint64 VDAbs64(sint64 v) {
+ return v<0 ? -v : v;
+}
+
+inline ptrdiff_t VDAbsPtrdiff(ptrdiff_t v) {
+ return v<0 ? -v : v;
+}
+
+// Rounding functions
+//
+// Round a double to an int or a long. Behavior is not specified at
+// int(y)+0.5, if x is NaN or Inf, or if x is out of range.
+
+int VDRoundToInt(double x);
+long VDRoundToLong(double x);
+sint32 VDRoundToInt32(double x);
+sint64 VDRoundToInt64(double x);
+
+inline sint32 VDRoundToIntFast(float x) {
+ union {
+ float f;
+ sint32 i;
+ } u = {x + 12582912.0f}; // 2^22+2^23
+
+ return (sint32)u.i - 0x4B400000;
+}
+
+inline sint32 VDRoundToIntFastFullRange(double x) {
+ union {
+ double f;
+ sint32 i[2];
+ } u = {x + 6755399441055744.0f}; // 2^51+2^52
+
+ return (sint32)u.i[0];
+}
+
+#ifdef _M_AMD64
+ inline sint32 VDFloorToInt(double x) {
+ return (sint32)floor(x);
+ }
+
+ inline sint64 VDFloorToInt64(double x) {
+ return (sint64)floor(x);
+ }
+#else
+ #pragma warning(push)
+ #pragma warning(disable: 4035) // warning C4035: 'VDFloorToInt' : no return value
+ inline sint32 VDFloorToInt(double x) {
+ sint32 temp;
+
+ __asm {
+ fld x
+ fist temp
+ fild temp
+ mov eax, temp
+ fsub
+ fstp temp
+ cmp temp, 80000001h
+ adc eax, -1
+ }
+ }
+ inline sint64 VDFloorToInt64(double x) {
+ sint64 temp;
+ sint32 temp2;
+
+ __asm {
+ fld x
+ fld st(0)
+ fistp qword ptr temp
+ fild qword ptr temp
+ mov eax, dword ptr temp
+ mov edx, dword ptr temp+4
+ fsub
+ fstp dword ptr temp2
+ cmp dword ptr temp2, 80000001h
+ adc eax, -1
+ adc edx, -1
+ }
+ }
+ #pragma warning(pop)
+#endif
+
+#ifdef _M_AMD64
+ inline sint32 VDCeilToInt(double x) {
+ return (sint32)ceil(x);
+ }
+
+ inline sint64 VDCeilToInt64(double x) {
+ return (sint64)ceil(x);
+ }
+#else
+ #pragma warning(push)
+ #pragma warning(disable: 4035) // warning C4035: 'VDCeilToInt' : no return value
+ inline sint32 VDCeilToInt(double x) {
+ sint32 temp;
+
+ __asm {
+ fld x
+ fist temp
+ fild temp
+ mov eax, temp
+ fsubr
+ fstp temp
+ cmp temp, 80000001h
+ sbb eax, -1
+ }
+ }
+
+ inline sint32 VDCeilToInt64(double x) {
+ sint64 temp;
+ sint32 temp2;
+
+ __asm {
+ fld x
+ fld st(0)
+ fistp temp
+ fild temp
+ mov eax, dword ptr temp
+ mov edx, dword ptr temp+4
+ fsubr
+ fstp temp2
+ cmp temp2, 80000001h
+ sbb eax, -1
+ sbb edx, -1
+ }
+ }
+ #pragma warning(pop)
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+inline sint16 VDClampedRoundFixedToInt16Fast(float x) {
+ union {
+ float f;
+ sint32 i;
+ } u = {x + 384.0f}; // 2^7+2^8
+
+ sint32 v = (sint32)u.i - 0x43BF8000;
+
+ if ((uint32)v >= 0x10000)
+ v = ~v >> 31;
+
+ return (sint16)(v - 0x8000);
+}
+
+inline uint8 VDClampedRoundFixedToUint8Fast(float x) {
+ union {
+ float f;
+ sint32 i;
+ } u = {x * 255.0f + 12582912.0f}; // 2^22+2^23
+
+ sint32 v = (sint32)u.i - 0x4B400000;
+
+ if ((uint32)v >= 0xFF)
+ v = ~v >> 31;
+
+ return (uint8)v;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+#ifdef _M_IX86
+ sint64 __stdcall VDFractionScale64(uint64 a, uint32 b, uint32 c, uint32& remainder);
+ uint64 __stdcall VDUMulDiv64x32(uint64 a, uint32 b, uint32 c);
+#else
+ extern "C" sint64 VDFractionScale64(uint64 a, uint64 b, uint64 c, uint32& remainder);
+ extern "C" uint64 VDUMulDiv64x32(uint64 a, uint32 b, uint32 c);
+#endif
+
+sint64 VDMulDiv64(sint64 a, sint64 b, sint64 c);
+
+///////////////////////////////////////////////////////////////////////////
+
+bool VDVerifyFiniteFloats(const float *p, uint32 n);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/memory.h b/src/thirdparty/VirtualDub/h/vd2/system/memory.h
new file mode 100644
index 000000000..56decc401
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/memory.h
@@ -0,0 +1,84 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_MEMORY_H
+#define f_VD2_SYSTEM_MEMORY_H
+
+#include <vd2/system/vdtypes.h>
+
+void *VDAlignedMalloc(size_t n, unsigned alignment);
+void VDAlignedFree(void *p);
+
+template<unsigned alignment>
+struct VDAlignedObject {
+ inline void *operator new(size_t n) { return VDAlignedMalloc(n, alignment); }
+ inline void operator delete(void *p) { VDAlignedFree(p); }
+};
+
+void *VDAlignedVirtualAlloc(size_t n);
+void VDAlignedVirtualFree(void *p);
+
+extern void (__cdecl *VDSwapMemory)(void *p0, void *p1, size_t bytes);
+
+void VDInvertMemory(void *p, unsigned bytes);
+
+bool VDIsValidReadRegion(const void *p, size_t bytes);
+bool VDIsValidWriteRegion(void *p, size_t bytes);
+
+bool VDCompareRect(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, size_t w, size_t h);
+
+const void *VDMemCheck8(const void *src, uint8 value, size_t count);
+
+void VDMemset8(void *dst, uint8 value, size_t count);
+void VDMemset16(void *dst, uint16 value, size_t count);
+void VDMemset24(void *dst, uint32 value, size_t count);
+void VDMemset32(void *dst, uint32 value, size_t count);
+void VDMemset64(void *dst, uint64 value, size_t count);
+void VDMemset128(void *dst, const void *value, size_t count);
+void VDMemsetPointer(void *dst, const void *value, size_t count);
+
+void VDMemset8Rect(void *dst, ptrdiff_t pitch, uint8 value, size_t w, size_t h);
+void VDMemset16Rect(void *dst, ptrdiff_t pitch, uint16 value, size_t w, size_t h);
+void VDMemset24Rect(void *dst, ptrdiff_t pitch, uint32 value, size_t w, size_t h);
+void VDMemset32Rect(void *dst, ptrdiff_t pitch, uint32 value, size_t w, size_t h);
+
+#if defined(_WIN32) && defined(_M_IX86)
+ extern void (__cdecl *VDFastMemcpyPartial)(void *dst, const void *src, size_t bytes);
+ extern void (__cdecl *VDFastMemcpyFinish)();
+ void VDFastMemcpyAutodetect();
+#else
+ void VDFastMemcpyPartial(void *dst, const void *src, size_t bytes);
+ void VDFastMemcpyFinish();
+ void VDFastMemcpyAutodetect();
+#endif
+
+
+void VDMemcpyRect(void *dst, ptrdiff_t dststride, const void *src, ptrdiff_t srcstride, size_t w, size_t h);
+
+/// Copy a region of memory with an access violation guard; used in cases where a sporadic
+/// AV is unavoidable (dynamic Direct3D VB under XP). The regions must not overlap.
+bool VDMemcpyGuarded(void *dst, const void *src, size_t bytes);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/profile.h b/src/thirdparty/VirtualDub/h/vd2/system/profile.h
new file mode 100644
index 000000000..ff4f1b3d7
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/profile.h
@@ -0,0 +1,167 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_PROFILE_H
+#define f_VD2_SYSTEM_PROFILE_H
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/vdstl.h>
+#include <vector>
+
+class VDRTProfiler;
+
+void VDInitProfilingSystem();
+void VDDeinitProfilingSystem();
+VDRTProfiler *VDGetRTProfiler();
+
+//
+// VDRTProfiler Real-time profiler
+//
+// This class forms the base for a very simple real-time profiler: threads
+// record events in channels, and periodically, someone swaps the active
+// recording array with a second array, and draws the sampled events off
+// that array. In VirtualDub, this is done via RTProfileDisplay. Events
+// are sampled via the high-performance counter in Win32, but clients need
+// never know this fact.
+//
+// All methods in VDRTProfiler are thread-safe. However, it is assumed
+// that only one client will be calling Swap() and accessing the Paint
+// channel set. Swap() should be called from rather low-level code as
+// it may introduce deadlocks otherwise.
+//
+// Strings passed to VDRTProfiler must be constant data in the main EXE.
+// No dynamic strings or DLLs. The reason is that there is an
+// indefinite delay between a call to FreeChannel() and the last time
+// data from that channel is displayed.
+//
+// Channels are not restricted to a particular thread; it is permissible
+// to allocate a channel in one thread and use it in another. However,
+// channels must not be simultaneously used by two threads -- that will
+// generate interesting output.
+//
+class VDRTProfiler {
+public:
+ enum CounterType {
+ kCounterTypeUint32,
+ kCounterTypeDouble
+ };
+
+public:
+ VDRTProfiler();
+ ~VDRTProfiler();
+
+ void BeginCollection();
+ void EndCollection();
+ void Swap();
+
+ bool IsEnabled() const { return mbEnableCollection; }
+
+ int AllocChannel(const char *name);
+ void FreeChannel(int ch);
+ void BeginEvent(int channel, uint32 color, const char *name);
+ void EndEvent(int channel);
+
+ void RegisterCounterD(const char *name, const double *val);
+ void RegisterCounterU32(const char *name, const uint32 *val);
+ void RegisterCounter(const char *name, const void *val, CounterType type);
+ void UnregisterCounter(void *p);
+
+public:
+ struct Event {
+ uint64 mStartTime;
+ uint64 mEndTime; // only last 32 bits of counter
+ uint32 mColor;
+ const char *mpName;
+ };
+
+ struct Channel {
+ const char *mpName;
+ bool mbEventPending;
+ vdfastvector<Event> mEventList;
+ };
+
+ struct Counter {
+ const char *mpName;
+ const void *mpData;
+ CounterType mType;
+ union {
+ uint32 u32;
+ double d;
+ } mData, mDataLast;
+ };
+
+ struct CounterByNamePred;
+
+ typedef std::vector<Channel> tChannels;
+ typedef vdfastvector<Counter> Counters;
+
+ VDCriticalSection mLock;
+ tChannels mChannelArray;
+ tChannels mChannelArrayToPaint;
+ Counters mCounterArray;
+ Counters mCounterArrayToPaint;
+ uint64 mPerfFreq;
+ uint64 mSnapshotTime;
+
+ volatile bool mbEnableCollection;
+};
+
+//
+// VDRTProfileChannel
+//
+// This helper simply makes channel acquisition easier. It automatically
+// stubs out if no profiler is available. However, it's still advisable
+// not to call this from your inner loop!
+//
+class VDRTProfileChannel {
+public:
+ VDRTProfileChannel(const char *name)
+ : mpProfiler(VDGetRTProfiler())
+ , mProfileChannel(mpProfiler ? mpProfiler->AllocChannel(name) : 0)
+ {
+ }
+ ~VDRTProfileChannel() {
+ if (mpProfiler)
+ mpProfiler->FreeChannel(mProfileChannel);
+ }
+
+ void Begin(uint32 color, const char *name) {
+ if (mpProfiler)
+ mpProfiler->BeginEvent(mProfileChannel, color, name);
+ }
+
+ void End() {
+ if (mpProfiler)
+ mpProfiler->EndEvent(mProfileChannel);
+ }
+
+protected:
+ VDRTProfiler *const mpProfiler;
+ int mProfileChannel;
+};
+
+#endif
+
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/progress.h b/src/thirdparty/VirtualDub/h/vd2/system/progress.h
new file mode 100644
index 000000000..976e3c6e3
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/progress.h
@@ -0,0 +1,96 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_SYSTEM_PROGRESS_H
+#define f_SYSTEM_PROGRESS_H
+
+#include <vd2/system/error.h>
+
+class VDAtomicInt;
+class VDSignalPersistent;
+
+class IProgress {
+public:
+ virtual void Error(const char *)=0;
+ virtual void Warning(const char *)=0;
+ virtual bool Query(const char *query, bool fDefault)=0;
+ virtual void ProgressStart(const char *text, const char *caption, const char *progtext, long lMax)=0;
+ virtual void ProgressAdvance(long)=0;
+ virtual void ProgressEnd()=0;
+ virtual void Output(const char *text)=0;
+ virtual VDAtomicInt *ProgressGetAbortFlag()=0;
+ virtual VDSignalPersistent *ProgressGetAbortSignal()=0;
+};
+
+
+void ProgressSetHandler(IProgress *pp);
+IProgress *ProgressGetHandler();
+
+bool ProgressCheckAbort();
+void ProgressSetAbort(bool bNewValue);
+VDSignalPersistent *ProgressGetAbortSignal();
+void ProgressError(const class MyError&);
+void ProgressWarning(const char *format, ...);
+void ProgressOutput(const char *format, ...);
+bool ProgressQuery(bool fDefault, const char *format, ...);
+void ProgressStart(long lMax, const char *caption, const char *progresstext, const char *format, ...);
+void ProgressAdvance(long lNewValue);
+void ProgressEnd();
+
+
+class VDProgress {
+public:
+ VDProgress(long lMax, const char *caption, const char *progresstext, const char *format, ...) {
+ ProgressStart(lMax, caption, progresstext, format);
+ }
+
+ ~VDProgress() {
+ ProgressEnd();
+ }
+
+ void advance(long v) {
+ ProgressAdvance(v);
+ }
+};
+
+class VDProgressAbortable {
+public:
+ VDProgressAbortable(long lMax, const char *caption, const char *progresstext, const char *format, ...) {
+ ProgressStart(lMax, caption, progresstext, format);
+ ProgressSetAbort(false);
+ }
+
+ ~VDProgressAbortable() {
+ ProgressEnd();
+ }
+
+ void advance(long v) {
+ if (ProgressCheckAbort())
+ throw MyUserAbortError();
+ ProgressAdvance(v);
+ }
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/protscope.h b/src/thirdparty/VirtualDub/h/vd2/system/protscope.h
new file mode 100644
index 000000000..6c22a54ad
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/protscope.h
@@ -0,0 +1,245 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_PROTSCOPE_H
+#define f_VD2_SYSTEM_PROTSCOPE_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Protected scope macros
+//
+// These macros allow you to define a scope which is known to the crash
+// handler -- that is, if the application crashes within a protected scope
+// the handler will report the scope information in the crash output.
+//
+
+class VDProtectedAutoScope;
+
+typedef VDProtectedAutoScope *(*tpVDGetProtectedScopeLink)();
+typedef void (*tpVDSetProtectedScopeLink)(VDProtectedAutoScope *);
+
+extern tpVDGetProtectedScopeLink g_pVDGetProtectedScopeLink;
+extern tpVDSetProtectedScopeLink g_pVDSetProtectedScopeLink;
+
+// The reason for this function is a bug in the Intel compiler regarding
+// construction optimization -- it stores VDProtectedAutoScope::'vtable'
+// in the vtable slot instead of VDProtectedAutoScope1<T>::'vtable', thus
+// killing the printf()s. "volatile" doesn't work to fix the problem, but
+// calling an opaque global function does. Oh well.
+
+#ifdef __INTEL_COMPILER
+void VDProtectedAutoScopeICLWorkaround();
+#endif
+
+class IVDProtectedScopeOutput {
+public:
+ virtual void write(const char *s) = 0;
+ virtual void writef(const char *s, ...) = 0;
+};
+
+class VDProtectedAutoScope {
+public:
+ VDProtectedAutoScope(const char *file, int line, const char *action) : mpFile(file), mLine(line), mpAction(action), mpLink(g_pVDGetProtectedScopeLink()) {
+ // Note that the assignment to g_protectedScopeLink cannot occur here, as the
+ // derived class has not been constructed yet. Uninitialized objects in
+ // the debugging chain are *bad*.
+ }
+
+ ~VDProtectedAutoScope() {
+ g_pVDSetProtectedScopeLink(mpLink);
+ }
+
+ operator bool() const { return false; }
+
+ virtual void Write(IVDProtectedScopeOutput& out) {
+ out.write(mpAction);
+ }
+
+ VDProtectedAutoScope *mpLink;
+ const char *const mpFile;
+ const int mLine;
+ const char *const mpAction;
+};
+
+class VDProtectedAutoScopeData0 {
+public:
+ VDProtectedAutoScopeData0(const char *file, int line, const char *action) : mpFile(file), mLine(line), mpAction(action) {}
+ const char *const mpFile;
+ const int mLine;
+ const char *const mpAction;
+};
+
+template<class T1>
+class VDProtectedAutoScopeData1 {
+public:
+ VDProtectedAutoScopeData1(const char *file, int line, const char *action, const T1 a1) : mpFile(file), mLine(line), mpAction(action), mArg1(a1) {}
+ const char *const mpFile;
+ const int mLine;
+ const char *const mpAction;
+ const T1 mArg1;
+};
+
+template<class T1, class T2>
+class VDProtectedAutoScopeData2 {
+public:
+ VDProtectedAutoScopeData2(const char *file, int line, const char *action, const T1 a1, const T2 a2) : mpFile(file), mLine(line), mpAction(action), mArg1(a1), mArg2(a2) {}
+ const char *const mpFile;
+ const int mLine;
+ const char *const mpAction;
+ const T1 mArg1;
+ const T2 mArg2;
+};
+
+template<class T1, class T2, class T3>
+class VDProtectedAutoScopeData3 {
+public:
+ VDProtectedAutoScopeData3(const char *file, int line, const char *action, const T1 a1, const T2 a2, const T3 a3) : mpFile(file), mLine(line), mpAction(action), mArg1(a1), mArg2(a2), mArg3(a3) {}
+ const char *const mpFile;
+ const int mLine;
+ const char *const mpAction;
+ const T1 mArg1;
+ const T2 mArg2;
+ const T3 mArg3;
+};
+
+template<class T1, class T2, class T3, class T4>
+class VDProtectedAutoScopeData4 {
+public:
+ VDProtectedAutoScopeData4(const char *file, int line, const char *action, const T1 a1, const T2 a2, const T3 a3, const T4 a4) : mpFile(file), mLine(line), mpAction(action), mArg1(a1), mArg2(a2), mArg3(a3), mArg4(a4) {}
+ const char *const mpFile;
+ const int mLine;
+ const char *const mpAction;
+ const T1 mArg1;
+ const T2 mArg2;
+ const T3 mArg3;
+ const T4 mArg4;
+};
+
+class VDProtectedAutoScope0 : public VDProtectedAutoScope {
+public:
+ VDProtectedAutoScope0(const VDProtectedAutoScopeData0& data) : VDProtectedAutoScope(data.mpFile, data.mLine, data.mpAction) {
+ g_pVDSetProtectedScopeLink(this);
+#ifdef __INTEL_COMPILER
+ VDProtectedAutoScopeICLWorkaround();
+#endif
+ }
+};
+
+template<class T1>
+class VDProtectedAutoScope1 : public VDProtectedAutoScope {
+public:
+ VDProtectedAutoScope1(const VDProtectedAutoScopeData1<T1>& data) : VDProtectedAutoScope(data.mpFile, data.mLine, data.mpAction), mArg1(data.mArg1) {
+ g_pVDSetProtectedScopeLink(this);
+#ifdef __INTEL_COMPILER
+ VDProtectedAutoScopeICLWorkaround();
+#endif
+ }
+
+ virtual void Write(IVDProtectedScopeOutput& out) {
+ out.writef(mpAction, mArg1);
+ }
+
+ const T1 mArg1;
+};
+
+template<class T1, class T2>
+class VDProtectedAutoScope2 : public VDProtectedAutoScope {
+public:
+ VDProtectedAutoScope2(const VDProtectedAutoScopeData2<T1,T2>& data) : VDProtectedAutoScope(data.mpFile, data.mLine, data.mpAction), mArg1(data.mArg1), mArg2(data.mArg2) {
+ g_pVDSetProtectedScopeLink(this);
+#ifdef __INTEL_COMPILER
+ VDProtectedAutoScopeICLWorkaround();
+#endif
+ }
+
+ virtual void Write(IVDProtectedScopeOutput& out) {
+ out.writef(mpAction, mArg1, mArg2);
+ }
+
+ const T1 mArg1;
+ const T2 mArg2;
+};
+
+template<class T1, class T2, class T3>
+class VDProtectedAutoScope3 : public VDProtectedAutoScope {
+public:
+ VDProtectedAutoScope3(const VDProtectedAutoScopeData3<T1,T2,T3>& data) : VDProtectedAutoScope(data.mpFile, data.mLine, data.mpAction), mArg1(data.mArg1), mArg2(data.mArg2), mArg3(data.mArg3) {
+ g_pVDSetProtectedScopeLink(this);
+#ifdef __INTEL_COMPILER
+ VDProtectedAutoScopeICLWorkaround();
+#endif
+ }
+
+ virtual void Write(IVDProtectedScopeOutput& out) {
+ out.writef(mpAction, mArg1, mArg2, mArg3);
+ }
+
+ const T1 mArg1;
+ const T2 mArg2;
+ const T3 mArg3;
+};
+
+template<class T1, class T2, class T3, class T4>
+class VDProtectedAutoScope4 : public VDProtectedAutoScope {
+public:
+ VDProtectedAutoScope4(const VDProtectedAutoScopeData4<T1,T2,T3,T4>& data) : VDProtectedAutoScope(data.mpFile, data.mLine, data.mpAction), mArg1(data.mArg1), mArg2(data.mArg2), mArg3(data.mArg3), mArg4(data.mArg4) {
+ g_pVDSetProtectedScopeLink(this);
+#ifdef __INTEL_COMPILER
+ VDProtectedAutoScopeICLWorkaround();
+#endif
+ }
+
+ virtual void Write(IVDProtectedScopeOutput& out) {
+ out.writef(mpAction, mArg1, mArg2, mArg3, mArg4);
+ }
+
+ const T1 mArg1;
+ const T2 mArg2;
+ const T3 mArg3;
+ const T4 mArg4;
+};
+
+
+#define vdprotected(action) vdobjectscope(VDProtectedAutoScope0 autoscope = VDProtectedAutoScopeData0(__FILE__, __LINE__, action))
+#define vdprotected1(actionf, type1, arg1) vdobjectscope(VDProtectedAutoScope1<type1> autoscope = VDProtectedAutoScopeData1<type1>(__FILE__, __LINE__, actionf, arg1))
+
+// @&#(* preprocessor doesn't view template brackets as escaping commas, so we have a slight
+// problem....
+
+#if defined(VD_COMPILER_MSVC) && (VD_COMPILER_MSVC < 1400 || defined(VD_COMPILER_MSVC_VC8_DDK))
+#define vdprotected2(actionf, type1, arg1, type2, arg2) if(VDProtectedAutoScope2<type1, type2> autoscope = VDProtectedAutoScopeData2<type1, type2>(__FILE__, __LINE__, actionf, arg1, arg2)) VDNEVERHERE; else
+#define vdprotected3(actionf, type1, arg1, type2, arg2, type3, arg3) if(VDProtectedAutoScope3<type1, type2, type3> autoscope = VDProtectedAutoScopeData3<type1, type2, type3>(__FILE__, __LINE__, actionf, arg1, arg2, arg3)) VDNEVERHERE; else
+#define vdprotected4(actionf, type1, arg1, type2, arg2, type3, arg3, type4, arg4) if(VDProtectedAutoScope4<type1, type2, type3, type4> autoscope = VDProtectedAutoScopeData4<type1, type2, type3, type4>(__FILE__, __LINE__, actionf, arg1, arg2, arg3, arg4)) VDNEVERHERE; else
+#else
+#define vdprotected2(actionf, type1, arg1, type2, arg2) switch(VDProtectedAutoScope2<type1, type2> autoscope = VDProtectedAutoScopeData2<type1, type2>(__FILE__, __LINE__, actionf, arg1, arg2)) case 0: default:
+#define vdprotected3(actionf, type1, arg1, type2, arg2, type3, arg3) switch(VDProtectedAutoScope3<type1, type2, type3> autoscope = VDProtectedAutoScopeData3<type1, type2, type3>(__FILE__, __LINE__, actionf, arg1, arg2, arg3)) case 0: default:
+#define vdprotected4(actionf, type1, arg1, type2, arg2, type3, arg3, type4, arg4) switch(VDProtectedAutoScope4<type1, type2, type3, type4> autoscope = VDProtectedAutoScopeData4<type1, type2, type3, type4>(__FILE__, __LINE__, actionf, arg1, arg2, arg3, arg4)) case 0: default:
+#endif
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/refcount.h b/src/thirdparty/VirtualDub/h/vd2/system/refcount.h
new file mode 100644
index 000000000..654cbe24c
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/refcount.h
@@ -0,0 +1,282 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_REFCOUNT_H
+#define f_VD2_SYSTEM_REFCOUNT_H
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/atomic.h>
+
+///////////////////////////////////////////////////////////////////////////
+// IVDRefCount
+/// Base interface for reference-counted objects.
+///
+/// Reference counting is a relatively straightforward and simple method
+/// of garbage collection. The rules are:
+///
+/// 1) Objects increment their reference count on an AddRef() and
+/// decrement it on a Release().
+/// 2) Objects destroy themselves when their reference count is dropped
+/// to zero.
+/// 3) Clients create references with AddRef() and destroy them with
+/// Release().
+///
+/// One way to interact with refcounted objects is to call AddRef()
+/// whenever a pointer is created, and Release() when the pointer is
+/// nulled or changed. The vdrefptr<T> template does this automatically.
+/// Reference counting may be "combined" between pointers for optimization
+/// reasons, such that fewer reference counts are outstanding than actual
+/// pointers; this requires weak (non-refcounted) pointers and explicit
+/// refcount management.
+///
+/// Reference counting has two issues:
+///
+/// A) It is expensive. VirtualDub uses it somewhat sparingly.
+///
+/// B) Reference counting cannot cope with cycles. This issue is
+/// avoided by arranging objects in a clearly ordered tree, such that
+/// no class ever holds a pointer to another object of the same class
+/// or to a parent in the reference hierarchy. vdrefptr<T> can
+/// implicitly create cycles if you are not careful.
+///
+/// In VirtualDub, reference counting must be multithread safe, so atomic
+/// increment/decrement should be used. vdrefcounted<T> handles this
+/// automatically for the template type class.
+///
+/// Two final implementation details:
+///
+/// - Little or no code should be executed after the reference count
+/// drops to zero, preferably nothing more than the destructor implicitly
+/// generated by the compiler. The reason is that otherwise there is the
+/// potential for an object to be resurrected past its final release by
+/// temporarily creating a new reference on the object.
+///
+/// - AddRef() and Release() traditionally return the reference count on
+/// the object after increment or decrement, but this is not required.
+/// For Release builds, it is only required that the value for Release()
+/// be zero iff the object is destroyed. (The same applies for AddRef(),
+/// but since the result of AddRef() is always non-zero, the return of
+/// AddRef() is of no use unless it is the actual count.)
+///
+class VDINTERFACE IVDRefCount {
+public:
+ virtual int AddRef()=0;
+ virtual int Release()=0;
+};
+
+///////////////////////////////////////////////////////////////////////////
+// vdrefcounted<T>
+/// Implements thread-safe reference counting on top of a base class.
+///
+/// vdrefcounted<T> is used to either add reference counting to a base
+/// class or to implement it on an interface. Use it by deriving your
+/// class from it.
+///
+template<class T> class vdrefcounted : public T {
+public:
+ vdrefcounted() : mRefCount(0) {}
+ vdrefcounted(const vdrefcounted<T>& src) : mRefCount(0) {} // do not copy the refcount
+ virtual ~vdrefcounted() {}
+
+ vdrefcounted<T>& operator=(const vdrefcounted<T>&) {} // do not copy the refcount
+
+ inline virtual int AddRef() {
+ return mRefCount.inc();
+ }
+
+ inline virtual int Release() {
+ int rc = --mRefCount;
+
+ if (!rc) {
+ delete this;
+ return 0;
+ }
+
+ VDASSERT(rc > 0);
+
+ return rc;
+ }
+
+protected:
+ VDAtomicInt mRefCount;
+};
+
+///////////////////////////////////////////////////////////////////////////
+// vdrefptr<T>
+/// Reference-counting smart pointer.
+///
+/// Maintains a strong reference on any object that supports AddRef/Release
+/// semantics. This includes any interface including IVDRefCount,
+/// IVDRefUnknown, or the IUnknown interface in Microsoft COM. Because
+/// references are automatically traded as necessary, smart pointers are
+/// very useful for maintaining exception safety.
+///
+template<class T> class vdrefptr {
+protected:
+ T *ptr;
+
+public:
+ typedef vdrefptr<T> self_type;
+ typedef T element_type;
+
+ /// Creates a new smart pointer and obtains a new reference on the
+ /// specified object.
+ explicit vdrefptr(T *p = 0) : ptr(p) {
+ if (p)
+ p->AddRef();
+ }
+
+ /// Clones a smart pointer, duplicating any held reference.
+ vdrefptr(const self_type& src) {
+ ptr = src.ptr;
+ if (ptr)
+ ptr->AddRef();
+ }
+
+ /// Destroys the smart pointer, releasing any held reference.
+ ~vdrefptr() {
+ if (ptr)
+ ptr->Release();
+ }
+
+ /// Assigns a new object to a smart pointer. Any old object is released
+ /// and the new object is addrefed.
+ inline self_type& operator=(T *src) {
+ if (src)
+ src->AddRef();
+ if (ptr)
+ ptr->Release();
+ ptr = src;
+ return *this;
+ }
+
+ /// Assigns a new object to a smart pointer. Any old object is released
+ /// and the new object is addrefed.
+ inline self_type& operator=(const vdrefptr& src) {
+ if (src.ptr)
+ src.ptr->AddRef();
+ if (ptr)
+ ptr->Release();
+ ptr = src.ptr;
+ return *this;
+ }
+
+ operator T*() const { return ptr; }
+ T& operator*() const { return *ptr; }
+ T *operator->() const { return ptr; }
+
+ /// Removes any old reference and returns a double-pointer to the nulled
+ /// internal pointer. This is useful for passing to IUnknown-derived
+ /// interfaces that accept (T **) parameters, like QueryInterface().
+ T** operator~() {
+ if (ptr) {
+ ptr->Release();
+ ptr = NULL;
+ }
+ return &ptr;
+ }
+
+ /// Removes any held reference.
+ inline void clear() {
+ if (ptr)
+ ptr->Release();
+ ptr = NULL;
+ }
+
+ /// Removes any existing reference and moves a reference from another
+ /// smart pointer. The source pointer is cleared afterward.
+ inline void from(vdrefptr& src) {
+ if (ptr)
+ ptr->Release();
+ ptr = src.ptr;
+ src.ptr = NULL;
+ }
+
+ /// Removes any existing reference and accepts a reference to a new
+ /// object without actually obtaining one. This is useful if someone
+ /// has already addrefed an object for you.
+ inline void set(T* src) {
+ if (ptr)
+ ptr->Release();
+
+ ptr = src;
+ }
+
+ /// Returns the held reference and clears the smart pointer without
+ /// releasing the reference. This is useful for holding onto a reference
+ /// in an exception-safe manner up until the last moment.
+ inline T *release() {
+ T *p = ptr;
+ ptr = NULL;
+ return p;
+ }
+
+ /// Swaps the references between two smart pointers.
+ void swap(vdrefptr& r) {
+ T *p = ptr;
+ ptr = r.ptr;
+ r.ptr = p;
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+template<class T, class U>
+bool VDRefCountObjectFactory(U **pp) {
+ T *p = new_nothrow T;
+ if (!p)
+ return false;
+
+ *pp = static_cast<U *>(p);
+ p->AddRef();
+ return true;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+struct vdsaferelease_t {};
+extern vdsaferelease_t vdsaferelease;
+
+template<class T>
+inline vdsaferelease_t& operator<<=(vdsaferelease_t& x, T *& p) {
+ if (p) {
+ p->Release();
+ p = 0;
+ }
+
+ return x;
+}
+
+template<class T>
+inline vdsaferelease_t& operator,(vdsaferelease_t& x, T *& p) {
+ if (p) {
+ p->Release();
+ p = 0;
+ }
+
+ return x;
+}
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/registry.h b/src/thirdparty/VirtualDub/h/vd2/system/registry.h
new file mode 100644
index 000000000..c9ee119da
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/registry.h
@@ -0,0 +1,84 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_REGISTRY_H
+#define f_VD2_SYSTEM_REGISTRY_H
+
+#include <vd2/system/VDString.h>
+
+class VDRegistryKey {
+private:
+ void *pHandle;
+
+public:
+ VDRegistryKey(const char *pszKey, bool global = false, bool write = true);
+ ~VDRegistryKey();
+
+ void *getRawHandle() const { return pHandle; }
+
+ bool isReady() const { return pHandle != 0; }
+
+ bool setBool(const char *pszName, bool) const;
+ bool setInt(const char *pszName, int) const;
+ bool setString(const char *pszName, const char *pszString) const;
+ bool setString(const char *pszName, const wchar_t *pszString) const;
+ bool setBinary(const char *pszName, const char *data, int len) const;
+
+ bool getBool(const char *pszName, bool def=false) const;
+ int getInt(const char *pszName, int def=0) const;
+ int getEnumInt(const char *pszName, int maxVal, int def=0) const;
+ bool getString(const char *pszName, VDStringA& s) const;
+ bool getString(const char *pszName, VDStringW& s) const;
+
+ int getBinaryLength(const char *pszName) const;
+ bool getBinary(const char *pszName, char *buf, int maxlen) const;
+
+ bool removeValue(const char *);
+};
+
+class VDRegistryValueIterator {
+public:
+ VDRegistryValueIterator(const VDRegistryKey& key);
+
+ const char *Next();
+
+protected:
+ void *mpHandle;
+ uint32 mIndex;
+ char mName[256];
+};
+
+class VDRegistryAppKey : public VDRegistryKey {
+private:
+ static VDString s_appbase;
+
+public:
+ VDRegistryAppKey();
+ VDRegistryAppKey(const char *pszKey, bool write = true);
+
+ static void setDefaultKey(const char *pszAppName);
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/source/bitmath.cpp b/src/thirdparty/VirtualDub/h/vd2/system/source/bitmath.cpp
new file mode 100644
index 000000000..d8eaf47ae
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/source/bitmath.cpp
@@ -0,0 +1,67 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vd2/system/bitmath.h>
+
+int VDCountBits(uint32 v) {
+ v -= (v >> 1) & 0x55555555;
+ v = ((v & 0xcccccccc) >> 2) + (v & 0x33333333);
+ v = (v + (v >> 4)) & 0x0f0f0f0f;
+ return (v * 0x01010101) >> 24;
+}
+
+#ifndef VD_COMPILER_MSVC_VC8
+
+ int VDFindLowestSetBit(uint32 v) {
+ for(int i=0; i<32; ++i) {
+ if (v & 1)
+ return i;
+ v >>= 1;
+ }
+
+ return 32;
+ }
+
+ int VDFindHighestSetBit(uint32 v) {
+ for(int i=31; i>=0; --i) {
+ if ((sint32)v < 0)
+ return i;
+ v += v;
+ }
+ return -1;
+ }
+
+#endif
+
+uint32 VDCeilToPow2(uint32 v) {
+ v += v;
+ --v;
+
+ while(uint32 x = v & (v - 1))
+ v = x;
+
+ return v;
+}
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/strutil.h b/src/thirdparty/VirtualDub/h/vd2/system/strutil.h
new file mode 100644
index 000000000..2f1fdf84f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/strutil.h
@@ -0,0 +1,44 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+#ifndef f_VD2_SYSTEM_STRUTIL_H
+#define f_VD2_SYSTEM_STRUTIL_H
+
+#include <string.h>
+#include <vd2/system/vdtypes.h>
+
+char *strncpyz(char *strDest, const char *strSource, size_t count);
+wchar_t *wcsncpyz(wchar_t *strDest, const wchar_t *strSource, size_t count);
+const char *strskipspace(const char *s) throw();
+
+inline char *strskipspace(char *s) throw() {
+ return const_cast<char *>(strskipspace(s));
+}
+
+size_t vdstrlcpy(char *dst, const char *src, size_t sizeChars);
+size_t vdwcslcpy(wchar_t *dst, const wchar_t *src, size_t sizeChars);
+
+size_t vdstrlcat(char *dst, const char *src, size_t sizeChars);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/text.h b/src/thirdparty/VirtualDub/h/vd2/system/text.h
new file mode 100644
index 000000000..bc8ea93f3
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/text.h
@@ -0,0 +1,60 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_TEXT_H
+#define f_VD2_SYSTEM_TEXT_H
+
+#include <ctype.h>
+#include <stdarg.h>
+
+class VDStringA;
+class VDStringW;
+
+// The max_dst value needs to include space for the NULL as well. The number
+// of characters produced is returned, minus the null terminator.
+
+int VDTextWToA(char *dst, int max_dst, const wchar_t *src, int max_src = -1);
+int VDTextAToW(wchar_t *dst, int max_dst, const char *src, int max_src = -1);
+
+VDStringA VDTextWToA(const wchar_t *src, int length = -1);
+VDStringA VDTextWToA(const VDStringW& sw);
+VDStringW VDTextAToW(const char *src, int length = -1);
+VDStringW VDTextAToW(const VDStringA& sw);
+
+VDStringA VDTextWToU8(const VDStringW& s);
+VDStringA VDTextWToU8(const wchar_t *s, int length);
+VDStringW VDTextU8ToW(const VDStringA& s);
+VDStringW VDTextU8ToW(const char *s, int length);
+
+// The terminating NULL character is not included in these.
+
+int VDTextWToALength(const wchar_t *s, int length=-1);
+int VDTextAToWLength(const char *s, int length=-1);
+
+VDStringW VDaswprintf(const wchar_t *format, int args, const void *const *argv);
+VDStringW VDvswprintf(const wchar_t *format, int args, va_list val);
+VDStringW VDswprintf(const wchar_t *format, int args, ...);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/thread.h b/src/thirdparty/VirtualDub/h/vd2/system/thread.h
new file mode 100644
index 000000000..6cf1fc7a0
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/thread.h
@@ -0,0 +1,269 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_THREAD_H
+#define f_VD2_SYSTEM_THREAD_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/atomic.h>
+
+typedef void *VDThreadHandle;
+typedef uint32 VDThreadID;
+typedef uint32 VDThreadId;
+typedef uint32 VDProcessId;
+
+struct _RTL_CRITICAL_SECTION;
+
+extern "C" void __declspec(dllimport) __stdcall InitializeCriticalSection(_RTL_CRITICAL_SECTION *lpCriticalSection);
+extern "C" void __declspec(dllimport) __stdcall LeaveCriticalSection(_RTL_CRITICAL_SECTION *lpCriticalSection);
+extern "C" void __declspec(dllimport) __stdcall EnterCriticalSection(_RTL_CRITICAL_SECTION *lpCriticalSection);
+extern "C" void __declspec(dllimport) __stdcall DeleteCriticalSection(_RTL_CRITICAL_SECTION *lpCriticalSection);
+extern "C" unsigned long __declspec(dllimport) __stdcall WaitForSingleObject(void *hHandle, unsigned long dwMilliseconds);
+extern "C" int __declspec(dllimport) __stdcall ReleaseSemaphore(void *hSemaphore, long lReleaseCount, long *lpPreviousCount);
+
+VDThreadID VDGetCurrentThreadID();
+VDProcessId VDGetCurrentProcessId();
+
+void VDSetThreadDebugName(VDThreadID tid, const char *name);
+void VDThreadSleep(int milliseconds);
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VDThread
+//
+// VDThread is a quick way to portably create threads -- to use it,
+// derive a subclass from it that implements the ThreadRun() function.
+//
+// Win32 notes:
+//
+// The thread startup code will attempt to notify the VC++ debugger of
+// the debug name of the thread. Only the first 9 characters are used
+// by Visual C 6.0; Visual Studio .NET will accept a few dozen.
+//
+// VDThread objects must not be WaitThread()ed or destructed from a
+// DllMain() function, TLS callback for an executable, or static
+// destructor unless the thread has been detached from the object.
+// The reason is that Win32 serializes calls to DllMain() functions.
+// If you attempt to do so, you will cause a deadlock when Win32
+// attempts to fire thread detach notifications.
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDThread {
+public:
+ VDThread(const char *pszDebugName = NULL); // NOTE: pszDebugName must have static duration
+ ~VDThread() throw();
+
+ // external functions
+
+ bool ThreadStart(); // start thread
+ void ThreadDetach(); // detach thread (wait() won't be called)
+ void ThreadWait(); // wait for thread to finish
+
+ bool isThreadActive();
+
+ bool isThreadAttached() const { // NOTE: Will return true if thread started, even if thread has since exited
+ return mhThread != 0;
+ }
+
+ VDThreadHandle getThreadHandle() const { // get handle to thread (Win32: HANDLE)
+ return mhThread;
+ }
+
+ VDThreadID getThreadID() const { // get ID of thread (Win32: DWORD)
+ return mThreadID;
+ }
+
+ void *ThreadLocation() const; // retrieve current EIP of thread (use only for debug purposes -- may not return reliable information on syscall, etc.)
+
+ // thread-local functions
+
+ virtual void ThreadRun() = 0; // thread, come to life
+ void ThreadFinish(); // exit thread
+
+private:
+ static unsigned __stdcall StaticThreadStart(void *pThis);
+
+ const char *mpszDebugName;
+ VDThreadHandle mhThread;
+ VDThreadID mThreadID;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDCriticalSection {
+private:
+ struct CritSec { // This is a clone of CRITICAL_SECTION.
+ void *DebugInfo;
+ sint32 LockCount;
+ sint32 RecursionCount;
+ void *OwningThread;
+ void *LockSemaphore;
+ uint32 SpinCount;
+ } csect;
+
+ VDCriticalSection(const VDCriticalSection&);
+ const VDCriticalSection& operator=(const VDCriticalSection&);
+ static void StructCheck();
+public:
+ class AutoLock {
+ private:
+ VDCriticalSection& cs;
+ public:
+ AutoLock(VDCriticalSection& csect) : cs(csect) { cs.Lock(); }
+ ~AutoLock() { cs.Unlock(); }
+
+ inline operator bool() const { return false; }
+ };
+
+ VDCriticalSection() {
+ InitializeCriticalSection((_RTL_CRITICAL_SECTION *)&csect);
+ }
+
+ ~VDCriticalSection() {
+ DeleteCriticalSection((_RTL_CRITICAL_SECTION *)&csect);
+ }
+
+ void operator++() {
+ EnterCriticalSection((_RTL_CRITICAL_SECTION *)&csect);
+ }
+
+ void operator--() {
+ LeaveCriticalSection((_RTL_CRITICAL_SECTION *)&csect);
+ }
+
+ void Lock() {
+ EnterCriticalSection((_RTL_CRITICAL_SECTION *)&csect);
+ }
+
+ void Unlock() {
+ LeaveCriticalSection((_RTL_CRITICAL_SECTION *)&csect);
+ }
+};
+
+// 'vdsynchronized' keyword
+//
+// The vdsynchronized(lock) keyword emulates Java's 'synchronized' keyword, which
+// protects the following statement or block from race conditions by obtaining a
+// lock during its execution:
+//
+// vdsynchronized(list_lock) {
+// mList.pop_back();
+// if (mList.empty())
+// return false;
+// }
+//
+// The construct is exception safe and will release the lock even if a return,
+// continue, break, or thrown exception exits the block. However, hardware
+// exceptions (access violations) may not work due to synchronous model
+// exception handling.
+//
+// There are two Visual C++ bugs we need to work around here (both are in VC6 and VC7).
+//
+// 1) Declaring an object with a non-trivial destructor in a switch() condition
+// causes a C1001 INTERNAL COMPILER ERROR.
+//
+// 2) Using __LINE__ in a macro expanded in a function with Edit and Continue (/ZI)
+// breaks the preprocessor (KB article Q199057). Shame, too, because without it
+// all the autolocks look the same.
+
+#define vdsynchronized2(lock) if(VDCriticalSection::AutoLock vd__lock=(lock))VDNEVERHERE;else
+#define vdsynchronized1(lock) vdsynchronized2(lock)
+#define vdsynchronized(lock) vdsynchronized1(lock)
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDSignalBase {
+protected:
+ void *hEvent;
+
+public:
+ ~VDSignalBase();
+
+ void signal();
+ bool check();
+ void wait();
+ int wait(VDSignalBase *second);
+ int wait(VDSignalBase *second, VDSignalBase *third);
+ static int waitMultiple(const VDSignalBase **signals, int count);
+ void *getHandle() { return hEvent; }
+
+ void operator()() { signal(); }
+};
+
+class VDSignal : public VDSignalBase {
+ VDSignal(const VDSignal&);
+ VDSignal& operator=(const VDSignal&);
+public:
+ VDSignal();
+};
+
+class VDSignalPersistent : public VDSignalBase {
+ VDSignalPersistent(const VDSignalPersistent&);
+ VDSignalPersistent& operator=(const VDSignalPersistent&);
+public:
+ VDSignalPersistent();
+
+ void unsignal();
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+class VDSemaphore {
+public:
+ VDSemaphore(int initial);
+ ~VDSemaphore();
+
+ void *GetHandle() const {
+ return mKernelSema;
+ }
+
+ void Reset(int count);
+
+ void Wait() {
+ WaitForSingleObject(mKernelSema, 0xFFFFFFFFU);
+ }
+
+ bool Wait(int timeout) {
+ return 0 == WaitForSingleObject(mKernelSema, timeout);
+ }
+
+ bool TryWait() {
+ return 0 == WaitForSingleObject(mKernelSema, 0);
+ }
+
+ void Post() {
+ ReleaseSemaphore(mKernelSema, 1, NULL);
+ }
+
+private:
+ void *mKernelSema;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/thunk.h b/src/thirdparty/VirtualDub/h/vd2/system/thunk.h
new file mode 100644
index 000000000..cf92407ac
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/thunk.h
@@ -0,0 +1,76 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_THUNK_H
+#define f_VD2_SYSTEM_THUNK_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+bool VDInitThunkAllocator();
+void VDShutdownThunkAllocator();
+
+void *VDAllocateThunkMemory(size_t len);
+void VDFreeThunkMemory(void *p, size_t len);
+void VDSetThunkMemory(void *p, const void *src, size_t len);
+void VDFlushThunkMemory(void *p, size_t len);
+
+class VDFunctionThunk;
+
+VDFunctionThunk *VDCreateFunctionThunkFromMethod(void *method, void *pThis, size_t argbytes, bool stdcall_thunk);
+void VDDestroyFunctionThunk(VDFunctionThunk *pFnThunk);
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<typename T> struct VDMetaSizeofArg { enum { value = (sizeof(T) + sizeof(void *) - 1) & ~(sizeof(void *) - 1) }; };
+
+// This doesn't work for references. Sadly, these seem to get stripped during template matching.
+template<class T, class R>
+char (&VDMetaGetMethodArgBytes(R (T::*method)()))[1];
+
+template<class T, class R, class A1>
+char (&VDMetaGetMethodArgBytes(R (T::*method)(A1)))[1 + VDMetaSizeofArg<A1>::value];
+
+template<class T, class R, class A1, class A2>
+char (&VDMetaGetMethodArgBytes(R (T::*method)(A1, A2)))[1 + VDMetaSizeofArg<A1>::value + VDMetaSizeofArg<A2>::value];
+
+template<class T, class R, class A1, class A2, class A3>
+char (&VDMetaGetMethodArgBytes(R (T::*method)(A1, A2, A3)))[1 + VDMetaSizeofArg<A1>::value + VDMetaSizeofArg<A2>::value + VDMetaSizeofArg<A3>::value];
+
+template<class T, class R, class A1, class A2, class A3, class A4>
+char (&VDMetaGetMethodArgBytes(R (T::*method)(A1, A2, A3, A4)))[1 + VDMetaSizeofArg<A1>::value + VDMetaSizeofArg<A2>::value + VDMetaSizeofArg<A3>::value + VDMetaSizeofArg<A4>::value];
+
+template<class T, class R, class A1, class A2, class A3, class A4, class A5>
+char (&VDMetaGetMethodArgBytes(R (T::*method)(A1, A2, A3, A4)))[1 + VDMetaSizeofArg<A1>::value + VDMetaSizeofArg<A2>::value + VDMetaSizeofArg<A3>::value + VDMetaSizeofArg<A4>::value + VDMetaSizeofArg<A5>::value];
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<class T, class T_Method>
+VDFunctionThunk *VDCreateFunctionThunkFromMethod(T *pThis, T_Method method, bool stdcall_thunk) {
+ return VDCreateFunctionThunkFromMethod(*(void **)&method, pThis, sizeof VDMetaGetMethodArgBytes(method) - 1, stdcall_thunk);
+}
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/time.h b/src/thirdparty/VirtualDub/h/vd2/system/time.h
new file mode 100644
index 000000000..e2da3ce4a
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/time.h
@@ -0,0 +1,118 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_TIME_H
+#define f_VD2_SYSTEM_TIME_H
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/atomic.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/win32/miniwindows.h>
+
+class VDFunctionThunk;
+
+// VDGetCurrentTick: Retrieve current process timer, in milliseconds. Should only
+// be used for sparsing updates/checks, and not for precision timing. Approximate
+// resolution is 55ms under Win9x and 10-15ms under WinNT. The advantage of this
+// call is that it is usually extremely fast (just reading from the PEB).
+uint32 VDGetCurrentTick();
+
+// VDGetPreciseTick: Retrieves high-performance timer (QueryPerformanceCounter in
+// Win32). This is very precise, often <1us, but often suffers from various bugs.
+// that make it undesirable for high-accuracy requirements. On x64 Windows it
+// can run at 1/2 speed when CPU throttling is enabled, and on some older buggy
+// chipsets it can skip around occasionally.
+uint64 VDGetPreciseTick();
+uint64 VDGetPreciseTicksPerSecondI();
+double VDGetPreciseTicksPerSecond();
+double VDGetPreciseSecondsPerTick();
+
+// VDGetAccurateTick: Reads a timer with good precision and accuracy, in
+// milliseconds. On Win9x, it has 1ms precision; on WinNT, it may have anywhere
+// from 1ms to 10-15ms, although 1ms can be forced with timeBeginPeriod().
+uint32 VDGetAccurateTick();
+
+// VDCallbackTimer is an abstraction of the Windows multimedia timer. As such, it
+// is rather expensive to instantiate, and should only be used for critical timing
+// needs... such as multimedia. Basically, there should only really be one or two
+// of these running. Win32 typically implements these as separate threads
+// triggered off a timer, so despite the outdated documentation -- which still hasn't
+// been updated from Windows 3.1 -- you can call almost any function from the
+// callback. Execution time in the callback delays other timers, however, so the
+// callback should still execute as quickly as possible.
+
+class VDINTERFACE IVDTimerCallback {
+public:
+ virtual void TimerCallback() = 0;
+};
+
+class VDCallbackTimer : private VDThread {
+public:
+ VDCallbackTimer();
+ ~VDCallbackTimer();
+
+ bool Init(IVDTimerCallback *pCB, uint32 period_ms);
+ bool Init2(IVDTimerCallback *pCB, uint32 period_100ns);
+ bool Init3(IVDTimerCallback *pCB, uint32 period_100ns, uint32 accuracy_100ns, bool precise);
+ void Shutdown();
+
+ void SetRateDelta(int delta_100ns);
+ void AdjustRate(int adjustment_100ns);
+
+ bool IsTimerRunning() const;
+
+private:
+ void ThreadRun();
+
+ IVDTimerCallback *mpCB;
+ unsigned mTimerAccuracy;
+ uint32 mTimerPeriod;
+ VDAtomicInt mTimerPeriodDelta;
+ VDAtomicInt mTimerPeriodAdjustment;
+
+ VDSignal msigExit;
+
+ volatile bool mbExit; // this doesn't really need to be atomic -- think about it
+ bool mbPrecise;
+};
+
+
+class VDLazyTimer {
+public:
+ VDLazyTimer();
+ ~VDLazyTimer();
+
+ void SetOneShot(IVDTimerCallback *pCB, uint32 delay);
+ void Stop();
+
+protected:
+ void StaticTimeCallback(VDZHWND hwnd, VDZUINT msg, VDZUINT_PTR id, VDZDWORD time);
+
+ uint32 mTimerId;
+ VDFunctionThunk *mpThunk;
+ IVDTimerCallback *mpCB;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/tls.h b/src/thirdparty/VirtualDub/h/vd2/system/tls.h
new file mode 100644
index 000000000..2cd2ecc70
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/tls.h
@@ -0,0 +1,38 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_SYSTEM_TLS_H
+#define f_SYSTEM_TLS_H
+
+#include <ctype.h>
+
+void VDInitThreadData(const char *pszThreadName);
+void VDDeinitThreadData();
+
+typedef void (*VDThreadInitHook)(bool init, const char *threadName);
+
+void VDSetThreadInitHook(VDThreadInitHook pHook);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/unknown.h b/src/thirdparty/VirtualDub/h/vd2/system/unknown.h
new file mode 100644
index 000000000..1a3efb71b
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/unknown.h
@@ -0,0 +1,77 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_UNKNOWN_H
+#define f_VD2_SYSTEM_UNKNOWN_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+
+///////////////////////////////////////////////////////////////////////////
+// IVDUnknown
+/// Base interface for runtime type discovery.
+class IVDUnknown {
+public:
+ /// Attempt to cast to another type. Returns NULL if interface is unsupported.
+ virtual void *AsInterface(uint32 id) = 0;
+
+ inline const void *AsInterface(uint32 id) const {
+ return const_cast<IVDUnknown *>(this)->AsInterface(id);
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////
+// IVDUnknown
+/// Base interface for runtime type discovery with reference counting.
+class IVDRefUnknown : public IVDUnknown {
+public:
+ virtual int AddRef() = 0; ///< Add strong reference to object. Returns new reference count (debug builds only).
+ virtual int Release() = 0; ///< Remove strong refence from object, and destroy it if the refcount drops to zero. Returns zero if object was destroyed.
+};
+
+template<class T>
+inline uint32 vdpoly_id_from_ptr(T *p) {
+ return T::kTypeID;
+}
+
+///////////////////////////////////////////////////////////////////////////
+// vdpoly_cast
+/// Performs a runtime polymorphic cast on an IUnknown-based object.
+///
+/// \param pUnk Pointer to cast. May be NULL.
+///
+/// Attempts to cast a pointer to a different type using the
+/// \c AsInterface() method. The destination type must support the
+/// \c kTypeID convention for returning the type ID.
+///
+template<class T>
+T vdpoly_cast(IVDUnknown *pUnk) {
+ return pUnk ? (T)pUnk->AsInterface(vdpoly_id_from_ptr(T(NULL))) : NULL;
+}
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/vdalloc.h b/src/thirdparty/VirtualDub/h/vd2/system/vdalloc.h
new file mode 100644
index 000000000..2c9fa2efd
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/vdalloc.h
@@ -0,0 +1,123 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_VDALLOC_H
+#define f_VD2_SYSTEM_VDALLOC_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <stdlib.h>
+
+// Why don't I use STL auto_ptr? Two reasons. First, auto_ptr has
+// the overhead of an ownership flag, and second, auto_ptr can't
+// be used with malloc() blocks. So think of these as auto_ptr
+// objects, but not quite....
+
+#pragma warning(push)
+#pragma warning(disable: 4284) // operator-> must return pointer to UDT
+
+class vdautoblockptr {
+protected:
+ void *ptr;
+
+public:
+ explicit vdautoblockptr(void *p = 0) : ptr(p) {}
+ ~vdautoblockptr() { free(ptr); }
+
+ vdautoblockptr& operator=(void *src) { free(ptr); ptr = src; return *this; }
+
+ operator void*() const { return ptr; }
+
+ vdautoblockptr& from(vdautoblockptr& src) { free(ptr); ptr=src.ptr; src.ptr=0; }
+ void *get() const { return ptr; }
+ void *release() { void *v = ptr; ptr = NULL; return v; }
+};
+
+template<class T> class vdautoptr2 {
+protected:
+ T *ptr;
+
+public:
+ explicit vdautoptr2(T *p = 0) : ptr(p) {}
+ ~vdautoptr2() { free((void *)ptr); }
+
+ vdautoptr2<T>& operator=(T *src) { free((void *)ptr); ptr = src; return *this; }
+
+ operator T*() const { return ptr; }
+ T& operator*() const { return *ptr; }
+ T *operator->() const { return ptr; }
+
+ vdautoptr2<T>& from(vdautoptr2<T>& src) { free((void *)ptr); ptr=src.ptr; src.ptr=0; }
+ T *get() const { return ptr; }
+ T *release() { T *v = ptr; ptr = NULL; return v; }
+};
+
+template<class T> class vdautoptr {
+protected:
+ T *ptr;
+
+public:
+ explicit vdautoptr(T *p = 0) : ptr(p) {}
+ ~vdautoptr() { delete ptr; }
+
+ vdautoptr<T>& operator=(T *src) { delete ptr; ptr = src; return *this; }
+
+ operator T*() const { return ptr; }
+ T& operator*() const { return *ptr; }
+ T *operator->() const { return ptr; }
+
+ vdautoptr<T>& from(vdautoptr<T>& src) { delete ptr; ptr=src.ptr; src.ptr=0; }
+ T *get() const { return ptr; }
+ T *release() { T *v = ptr; ptr = NULL; return v; }
+
+ void swap(vdautoptr<T>& other) {
+ T *p = other.ptr;
+ other.ptr = ptr;
+ ptr = p;
+ }
+};
+
+template<class T> class vdautoarrayptr {
+protected:
+ T *ptr;
+
+public:
+ explicit vdautoarrayptr(T *p = 0) : ptr(p) {}
+ ~vdautoarrayptr() { delete[] ptr; }
+
+ vdautoarrayptr<T>& operator=(T *src) { delete[] ptr; ptr = src; return *this; }
+
+ T& operator[](int offset) const { return ptr[offset]; }
+
+ vdautoarrayptr<T>& from(vdautoarrayptr<T>& src) { delete[] ptr; ptr=src.ptr; src.ptr=0; }
+ T *get() const { return ptr; }
+ T *release() { T *v = ptr; ptr = NULL; return v; }
+};
+
+#pragma warning(pop)
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/vdstl.h b/src/thirdparty/VirtualDub/h/vd2/system/vdstl.h
new file mode 100644
index 000000000..aeaaf15d6
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/vdstl.h
@@ -0,0 +1,1610 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef VD2_SYSTEM_VDSTL_H
+#define VD2_SYSTEM_VDSTL_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <limits.h>
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/memory.h>
+
+///////////////////////////////////////////////////////////////////////////
+//
+// glue
+//
+///////////////////////////////////////////////////////////////////////////
+
+template<class Category, class T, class Distance = ptrdiff_t, class Pointer = T*, class Reference = T&>
+struct vditerator {
+#if defined(VD_COMPILER_MSVC) && (VD_COMPILER_MSVC < 1310 || (defined(VD_COMPILER_MSVC_VC8_PSDK) || defined(VD_COMPILER_MSVC_VC8_DDK)))
+ typedef std::iterator<Category, T, Distance> type;
+#else
+ typedef std::iterator<Category, T, Distance, Pointer, Reference> type;
+#endif
+};
+
+template<class Iterator, class T>
+struct vdreverse_iterator {
+#if defined(VD_COMPILER_MSVC) && (VD_COMPILER_MSVC < 1310 || (defined(VD_COMPILER_MSVC_VC8_PSDK) || defined(VD_COMPILER_MSVC_VC8_DDK)))
+ typedef std::reverse_iterator<Iterator, T> type;
+#else
+ typedef std::reverse_iterator<Iterator> type;
+#endif
+};
+
+///////////////////////////////////////////////////////////////////////////
+class vdallocator_base {
+protected:
+ void VDNORETURN throw_oom();
+};
+
+template<class T>
+class vdallocator : public vdallocator_base {
+public:
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef T* pointer;
+ typedef const T* const_pointer;
+ typedef T& reference;
+ typedef const T& const_reference;
+ typedef T value_type;
+
+ template<class U> struct rebind { typedef vdallocator<U> other; };
+
+ pointer address(reference x) const { return &x; }
+ const_pointer address(const_reference x) const { return &x; }
+
+ pointer allocate(size_type n, void *p_close = 0) {
+ pointer p = (pointer)malloc(n*sizeof(T));
+
+ if (!p)
+ throw_oom();
+
+ return p;
+ }
+
+ void deallocate(pointer p, size_type n) {
+ free(p);
+ }
+
+ size_type max_size() const throw() { return ((~(size_type)0) >> 1) / sizeof(T); }
+
+ void construct(pointer p, const T& val) { new((void *)p) T(val); }
+ void destroy(pointer p) { ((T*)p)->~T(); }
+
+#if defined(_MSC_VER) && _MSC_VER < 1300
+ char * _Charalloc(size_type n) { return rebind<char>::other::allocate(n); }
+#endif
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+template<class T, unsigned kDeadZone = 16>
+class vddebug_alloc {
+public:
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef T* pointer;
+ typedef const T* const_pointer;
+ typedef T& reference;
+ typedef const T& const_reference;
+ typedef T value_type;
+
+ template<class U> struct rebind { typedef vddebug_alloc<U, kDeadZone> other; };
+
+ pointer address(reference x) const { return &x; }
+ const_pointer address(const_reference x) const { return &x; }
+
+ pointer allocate(size_type n, void *p_close = 0) {
+ pointer p = (pointer)VDAlignedMalloc(n*sizeof(T) + 2*kDeadZone, 16);
+
+ if (!p)
+ return p;
+
+ memset((char *)p, 0xa9, kDeadZone);
+ memset((char *)p + kDeadZone + n*sizeof(T), 0xa9, kDeadZone);
+
+ return (pointer)((char *)p + kDeadZone);
+ }
+
+ void deallocate(pointer p, size_type n) {
+ char *p1 = (char *)p - kDeadZone;
+ char *p2 = (char *)p + n*sizeof(T);
+
+ for(uint32 i=0; i<kDeadZone; ++i) {
+ VDASSERT(p1[i] == (char)0xa9);
+ VDASSERT(p2[i] == (char)0xa9);
+ }
+
+ VDAlignedFree(p1);
+ }
+
+ size_type max_size() const throw() { return MAX_INT - 2*kDeadZone; }
+
+ void construct(pointer p, const T& val) { new((void *)p) T(val); }
+ void destroy(pointer p) { ((T*)p)->~T(); }
+
+#if defined(_MSC_VER) && _MSC_VER < 1300
+ char * _Charalloc(size_type n) { return rebind<char>::other::allocate(n); }
+#endif
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+template<class T, unsigned kAlignment = 16>
+class vdaligned_alloc {
+public:
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef T* pointer;
+ typedef const T* const_pointer;
+ typedef T& reference;
+ typedef const T& const_reference;
+ typedef T value_type;
+
+ vdaligned_alloc() {}
+
+ template<class U, unsigned kAlignment2>
+ vdaligned_alloc(const vdaligned_alloc<U, kAlignment2>&) {}
+
+ template<class U> struct rebind { typedef vdaligned_alloc<U, kAlignment> other; };
+
+ pointer address(reference x) const { return &x; }
+ const_pointer address(const_reference x) const { return &x; }
+
+ pointer allocate(size_type n, void *p = 0) { return (pointer)VDAlignedMalloc(n*sizeof(T), kAlignment); }
+ void deallocate(pointer p, size_type n) { VDAlignedFree(p); }
+ size_type max_size() const throw() { return INT_MAX; }
+
+ void construct(pointer p, const T& val) { new((void *)p) T(val); }
+ void destroy(pointer p) { ((T*)p)->~T(); }
+
+#if defined(_MSC_VER) && _MSC_VER < 1300
+ char * _Charalloc(size_type n) { return rebind<char>::other::allocate(n); }
+#endif
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// vdblock
+//
+// vdblock<T> is similar to vector<T>, except:
+//
+// 1) May only be used with POD types.
+// 2) No construction or destruction of elements is performed.
+// 3) Capacity is always equal to size, and reallocation is performed
+// whenever the size changes.
+// 4) Contents are undefined after a reallocation.
+// 5) No insertion or deletion operations are provided.
+//
+///////////////////////////////////////////////////////////////////////////
+
+template<class T, class A = vdallocator<T> >
+class vdblock : protected A {
+public:
+ typedef T value_type;
+ typedef typename A::pointer pointer;
+ typedef typename A::const_pointer const_pointer;
+ typedef typename A::reference reference;
+ typedef typename A::const_reference const_reference;
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef pointer iterator;
+ typedef const_pointer const_iterator;
+ typedef typename vdreverse_iterator<iterator, T>::type reverse_iterator;
+ typedef typename vdreverse_iterator<const_iterator, const T>::type const_reverse_iterator;
+
+ vdblock(const A& alloc = A()) : A(alloc), mpBlock(NULL), mSize(0) {}
+ vdblock(size_type s, const A& alloc = A()) : A(alloc), mpBlock(A::allocate(s, 0)), mSize(s) {}
+ ~vdblock() {
+ if (mpBlock)
+ A::deallocate(mpBlock, mSize);
+ }
+
+ reference operator[](size_type n) { return mpBlock[n]; }
+ const_reference operator[](size_type n) const { return mpBlock[n]; }
+ reference at(size_type n) { return n < mSize ? mpBlock[n] : throw std::length_error; }
+ const_reference at(size_type n) const { return n < mSize ? mpBlock[n] : throw std::length_error; }
+ reference front() { return *mpBlock; }
+ const_reference front() const { return *mpBlock; }
+ reference back() { return mpBlock[mSize-1]; }
+ const_reference back() const { return mpBlock[mSize-1]; }
+
+ const_pointer data() const { return mpBlock; }
+ pointer data() { return mpBlock; }
+
+ const_iterator begin() const { return mpBlock; }
+ iterator begin() { return mpBlock; }
+ const_iterator end() const { return mpBlock + mSize; }
+ iterator end() { return mpBlock + mSize; }
+
+ const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
+ reverse_iterator rbegin() { return reverse_iterator(end()); }
+ const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
+ reverse_iterator rend() { return reverse_iterator(begin()); }
+
+ bool empty() const { return !mSize; }
+ size_type size() const { return mSize; }
+ size_type capacity() const { return mSize; }
+
+ void clear() {
+ if (mpBlock)
+ A::deallocate(mpBlock, mSize);
+ mpBlock = NULL;
+ mSize = 0;
+ }
+
+ void resize(size_type s) {
+ if (s != mSize) {
+ if (mpBlock) {
+ A::deallocate(mpBlock, mSize);
+ mpBlock = NULL;
+ }
+ mSize = s;
+ if (s)
+ mpBlock = A::allocate(mSize, 0);
+ }
+ }
+
+ void resize(size_type s, const T& value) {
+ if (s != mSize) {
+ if (mpBlock) {
+ A::deallocate(mpBlock, mSize);
+ mpBlock = NULL;
+ }
+ mSize = s;
+ if (s) {
+ mpBlock = A::allocate(mSize, 0);
+ std::fill(mpBlock, mpBlock+s, value);
+ }
+ }
+ }
+
+ void swap(vdblock& x) {
+ std::swap(mpBlock, x.mpBlock);
+ std::swap(mSize, x.mSize);
+ }
+
+protected:
+ typename A::pointer mpBlock;
+ typename A::size_type mSize;
+
+ union PODType {
+ T x;
+ };
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// vdstructex
+//
+// vdstructex describes an extensible format structure, such as
+// BITMAPINFOHEADER or WAVEFORMATEX, without the pain-in-the-butt
+// casting normally associated with one.
+//
+///////////////////////////////////////////////////////////////////////////
+
+template<class T>
+class vdstructex {
+public:
+ typedef size_t size_type;
+ typedef T value_type;
+
+ vdstructex() : mpMemory(NULL), mSize(0) {}
+
+ explicit vdstructex(size_t len) : mpMemory(NULL), mSize(0) {
+ resize(len);
+ }
+
+ vdstructex(const T *pStruct, size_t len) : mSize(len), mpMemory((T*)malloc(len)) {
+ memcpy(mpMemory, pStruct, len);
+ }
+
+ vdstructex(const vdstructex<T>& src) : mSize(src.mSize), mpMemory((T*)malloc(src.mSize)) {
+ memcpy(mpMemory, src.mpMemory, mSize);
+ }
+
+ ~vdstructex() {
+ free(mpMemory);
+ }
+
+ bool empty() const { return !mpMemory; }
+ size_type size() const { return mSize; }
+ T* data() const { return mpMemory; }
+
+ T& operator *() const { return *(T *)mpMemory; }
+ T* operator->() const { return (T *)mpMemory; }
+
+ bool operator==(const vdstructex& x) const {
+ return mSize == x.mSize && (!mSize || !memcmp(mpMemory, x.mpMemory, mSize));
+ }
+
+ bool operator!=(const vdstructex& x) const {
+ return mSize != x.mSize || (mSize && memcmp(mpMemory, x.mpMemory, mSize));
+ }
+
+ vdstructex<T>& operator=(const vdstructex<T>& src) {
+ assign(src.mpMemory, src.mSize);
+ return *this;
+ }
+
+ void assign(const T *pStruct, size_type len) {
+ if (mSize != len)
+ resize(len);
+
+ memcpy(mpMemory, pStruct, len);
+ }
+
+ void clear() {
+ free(mpMemory);
+ mpMemory = NULL;
+ mSize = 0;
+ }
+
+ void resize(size_type len) {
+ if (mSize != len)
+ mpMemory = (T *)realloc(mpMemory, mSize = len);
+ }
+
+protected:
+ size_type mSize;
+ T *mpMemory;
+};
+
+///////////////////////////////////////////////////////////////////////////
+//
+// vdlist
+//
+// vdlist<T> is similar to list<T*>, except:
+//
+// 1) The node structure must be embedded as a superclass of T.
+// Thus, the client is in full control of allocation.
+// 2) Node pointers may be converted back into iterators in O(1).
+//
+///////////////////////////////////////////////////////////////////////////
+
+struct vdlist_node {
+ vdlist_node *mListNodeNext, *mListNodePrev;
+};
+
+template<class T, class T_Nonconst>
+class vdlist_iterator : public vditerator<std::bidirectional_iterator_tag, T, ptrdiff_t>::type {
+public:
+ vdlist_iterator() {}
+ vdlist_iterator(T *p) : mp(p) {}
+ vdlist_iterator(const vdlist_iterator<T_Nonconst, T_Nonconst>& src) : mp(src.mp) {}
+
+ T* operator *() const {
+ return static_cast<T*>(mp);
+ }
+
+ bool operator==(const vdlist_iterator<T, T_Nonconst>& x) const {
+ return mp == x.mp;
+ }
+
+ bool operator!=(const vdlist_iterator<T, T_Nonconst>& x) const {
+ return mp != x.mp;
+ }
+
+ vdlist_iterator& operator++() {
+ mp = mp->mListNodeNext;
+ return *this;
+ }
+
+ vdlist_iterator& operator--() {
+ mp = mp->mListNodePrev;
+ return *this;
+ }
+
+ vdlist_iterator operator++(int) {
+ iterator tmp(*this);
+ mp = mp->mListNodeNext;
+ return tmp;
+ }
+
+ vdlist_iterator& operator--(int) {
+ iterator tmp(*this);
+ mp = mp->mListNodePrev;
+ return tmp;
+ }
+
+ vdlist_node *mp;
+};
+
+class vdlist_base {
+public:
+ typedef vdlist_node node;
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+
+ bool empty() const {
+ return mAnchor.mListNodeNext == &mAnchor;
+ }
+
+ size_type size() const {
+ node *p = { mAnchor.mListNodeNext };
+ size_type s = 0;
+
+ if (p != &mAnchor)
+ do {
+ ++s;
+ p = p->mListNodeNext;
+ } while(p != &mAnchor);
+
+ return s;
+ }
+
+ void clear() {
+ mAnchor.mListNodePrev = &mAnchor;
+ mAnchor.mListNodeNext = &mAnchor;
+ }
+
+ void pop_front() {
+ mAnchor.mListNodeNext = mAnchor.mListNodeNext->mListNodeNext;
+ mAnchor.mListNodeNext->mListNodePrev = &mAnchor;
+ }
+
+ void pop_back() {
+ mAnchor.mListNodePrev = mAnchor.mListNodePrev->mListNodePrev;
+ mAnchor.mListNodePrev->mListNodeNext = &mAnchor;
+ }
+
+ static void unlink(vdlist_node& node) {
+ vdlist_node& n1 = *node.mListNodePrev;
+ vdlist_node& n2 = *node.mListNodeNext;
+
+ n1.mListNodeNext = &n2;
+ n2.mListNodePrev = &n1;
+ }
+
+protected:
+ node mAnchor;
+};
+
+template<class T>
+class vdlist : public vdlist_base {
+public:
+ typedef T* value_type;
+ typedef T** pointer;
+ typedef const T** const_pointer;
+ typedef T*& reference;
+ typedef const T*& const_reference;
+ typedef vdlist_iterator<T, T> iterator;
+ typedef vdlist_iterator<const T, T> const_iterator;
+ typedef typename vdreverse_iterator<iterator, T>::type reverse_iterator;
+ typedef typename vdreverse_iterator<const_iterator, const T>::type const_reverse_iterator;
+
+ vdlist() {
+ mAnchor.mListNodePrev = &mAnchor;
+ mAnchor.mListNodeNext = &mAnchor;
+ }
+
+ iterator begin() {
+ iterator it;
+ it.mp = mAnchor.mListNodeNext;
+ return it;
+ }
+
+ const_iterator begin() const {
+ const_iterator it;
+ it.mp = mAnchor.mListNodeNext;
+ return it;
+ }
+
+ iterator end() {
+ iterator it;
+ it.mp = &mAnchor;
+ return it;
+ }
+
+ const_iterator end() const {
+ const_iterator it;
+ it.mp = &mAnchor;
+ return it;
+ }
+
+ reverse_iterator rbegin() {
+ return reverse_iterator(begin());
+ }
+
+ const_reverse_iterator rbegin() const {
+ return const_reverse_iterator(begin());
+ }
+
+ reverse_iterator rend() {
+ return reverse_iterator(end);
+ }
+
+ const_reverse_iterator rend() const {
+ return const_reverse_iterator(end());
+ }
+
+ const value_type front() const {
+ return static_cast<T *>(mAnchor.mListNodeNext);
+ }
+
+ const value_type back() const {
+ return static_cast<T *>(mAnchor.mListNodePrev);
+ }
+
+ iterator find(T *p) {
+ iterator it;
+ it.mp = mAnchor.mListNodeNext;
+
+ if (it.mp != &mAnchor)
+ do {
+ if (it.mp == static_cast<node *>(p))
+ break;
+
+ it.mp = it.mp->mListNodeNext;
+ } while(it.mp != &mAnchor);
+
+ return it;
+ }
+
+ const_iterator find(T *p) const {
+ const_iterator it;
+ it.mp = mAnchor.mListNodeNext;
+
+ if (it.mp != &mAnchor)
+ do {
+ if (it.mp == static_cast<node *>(p))
+ break;
+
+ it.mp = it.mp->mListNodeNext;
+ } while(it.mp != &mAnchor);
+
+ return it;
+ }
+
+ iterator fast_find(T *p) {
+ iterator it(p);
+ return it;
+ }
+
+ const_iterator fast_find(T *p) const {
+ iterator it(p);
+ }
+
+ void push_front(T *p) {
+ node& n = *p;
+ n.mListNodePrev = &mAnchor;
+ n.mListNodeNext = mAnchor.mListNodeNext;
+ n.mListNodeNext->mListNodePrev = &n;
+ mAnchor.mListNodeNext = &n;
+ }
+
+ void push_back(T *p) {
+ node& n = *p;
+ n.mListNodeNext = &mAnchor;
+ n.mListNodePrev = mAnchor.mListNodePrev;
+ n.mListNodePrev->mListNodeNext = &n;
+ mAnchor.mListNodePrev = &n;
+ }
+
+ iterator erase(T *p) {
+ return erase(fast_find(p));
+ }
+
+ iterator erase(iterator it) {
+ node& n = *it.mp;
+
+ n.mListNodePrev->mListNodeNext = n.mListNodeNext;
+ n.mListNodeNext->mListNodePrev = n.mListNodePrev;
+
+ it.mp = n.mListNodeNext;
+ return it;
+ }
+
+ iterator erase(iterator i1, iterator i2) {
+ node& np = *i1.mp->mListNodePrev;
+ node& nn = *i2.mp;
+
+ np.mListNodeNext = &nn;
+ nn.mListNodePrev = &np;
+
+ return i2;
+ }
+
+ void insert(iterator dst, T *src) {
+ node& ns = *src;
+ node& nd = *dst.mp;
+
+ ns.mListNodeNext = &nd;
+ ns.mListNodePrev = nd.mListNodePrev;
+ nd.mListNodePrev->mListNodeNext = &ns;
+ nd.mListNodePrev = &ns;
+ }
+
+ void insert(iterator dst, iterator i1, iterator i2) {
+ if (i1 != i2) {
+ node& np = *dst.mp->mListNodePrev;
+ node& nn = *dst.mp;
+ node& n1 = *i1.mp;
+ node& n2 = *i2.mp->mListNodePrev;
+
+ np.mListNodeNext = &n1;
+ n1.mListNodePrev = &np;
+ n2.mListNodeNext = &nn;
+ nn.mListNodePrev = &n2;
+ }
+ }
+
+ void splice(iterator dst, vdlist<T>& srclist) {
+ insert(dst, srclist.begin(), srclist.end());
+ srclist.clear();
+ }
+
+ void splice(iterator dst, vdlist<T>& srclist, iterator src) {
+ T *v = *src;
+ srclist.erase(src);
+ insert(dst, v);
+ }
+
+ void splice(iterator dst, vdlist<T>& srclist, iterator i1, iterator i2) {
+ if (dst.mp != i1.mp && dst.mp != i2.mp) {
+ srclist.erase(i1, i2);
+ insert(dst, i1, i2);
+ }
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(_DEBUG) && defined(_MSC_VER)
+ #define VD_ACCELERATE_TEMPLATES
+#endif
+
+#ifndef VDTINLINE
+ #ifdef VD_ACCELERATE_TEMPLATES
+ #ifndef VDTEXTERN
+ #define VDTEXTERN extern
+ #endif
+
+ #define VDTINLINE
+ #else
+ #define VDTINLINE inline
+ #endif
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<class T>
+class vdspan {
+public:
+ typedef T value_type;
+ typedef T* pointer;
+ typedef const T* const_pointer;
+ typedef T& reference;
+ typedef const T& const_reference;
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef pointer iterator;
+ typedef const_pointer const_iterator;
+ typedef typename vdreverse_iterator<iterator, T>::type reverse_iterator;
+ typedef typename vdreverse_iterator<const_iterator, const T>::type const_reverse_iterator;
+
+ VDTINLINE vdspan();
+ VDTINLINE vdspan(T *p1, T *p2);
+ VDTINLINE vdspan(T *p1, size_type len);
+
+public:
+ VDTINLINE bool empty() const;
+ VDTINLINE size_type size() const;
+
+ VDTINLINE pointer data();
+ VDTINLINE const_pointer data() const;
+
+ VDTINLINE iterator begin();
+ VDTINLINE const_iterator begin() const;
+ VDTINLINE iterator end();
+ VDTINLINE const_iterator end() const;
+
+ VDTINLINE reverse_iterator rbegin();
+ VDTINLINE const_reverse_iterator rbegin() const;
+ VDTINLINE reverse_iterator rend();
+ VDTINLINE const_reverse_iterator rend() const;
+
+ VDTINLINE reference front();
+ VDTINLINE const_reference front() const;
+ VDTINLINE reference back();
+ VDTINLINE const_reference back() const;
+
+ VDTINLINE reference operator[](size_type n);
+ VDTINLINE const_reference operator[](size_type n) const;
+
+protected:
+ T *mpBegin;
+ T *mpEnd;
+};
+
+#ifdef VD_ACCELERATE_TEMPLATES
+ #pragma warning(push)
+ #pragma warning(disable: 4231) // warning C4231: nonstandard extension used : 'extern' before template explicit instantiation
+ VDTEXTERN template vdspan<char>;
+ VDTEXTERN template vdspan<uint8>;
+ VDTEXTERN template vdspan<uint16>;
+ VDTEXTERN template vdspan<uint32>;
+ VDTEXTERN template vdspan<uint64>;
+ VDTEXTERN template vdspan<sint8>;
+ VDTEXTERN template vdspan<sint16>;
+ VDTEXTERN template vdspan<sint32>;
+ VDTEXTERN template vdspan<sint64>;
+ VDTEXTERN template vdspan<float>;
+ VDTEXTERN template vdspan<double>;
+ VDTEXTERN template vdspan<wchar_t>;
+ #pragma warning(pop)
+#endif
+
+template<class T> VDTINLINE vdspan<T>::vdspan() : mpBegin(NULL), mpEnd(NULL) {}
+template<class T> VDTINLINE vdspan<T>::vdspan(T *p1, T *p2) : mpBegin(p1), mpEnd(p2) {}
+template<class T> VDTINLINE vdspan<T>::vdspan(T *p, size_type len) : mpBegin(p), mpEnd(p+len) {}
+template<class T> VDTINLINE bool vdspan<T>::empty() const { return mpBegin == mpEnd; }
+template<class T> VDTINLINE typename vdspan<T>::size_type vdspan<T>::size() const { return size_type(mpEnd - mpBegin); }
+template<class T> VDTINLINE typename vdspan<T>::pointer vdspan<T>::data() { return mpBegin; }
+template<class T> VDTINLINE typename vdspan<T>::const_pointer vdspan<T>::data() const { return mpBegin; }
+template<class T> VDTINLINE typename vdspan<T>::iterator vdspan<T>::begin() { return mpBegin; }
+template<class T> VDTINLINE typename vdspan<T>::const_iterator vdspan<T>::begin() const { return mpBegin; }
+template<class T> VDTINLINE typename vdspan<T>::iterator vdspan<T>::end() { return mpEnd; }
+template<class T> VDTINLINE typename vdspan<T>::const_iterator vdspan<T>::end() const { return mpEnd; }
+template<class T> VDTINLINE typename vdspan<T>::reverse_iterator vdspan<T>::rbegin() { return reverse_iterator(mpBegin); }
+template<class T> VDTINLINE typename vdspan<T>::const_reverse_iterator vdspan<T>::rbegin() const { return const_reverse_iterator(mpBegin); }
+template<class T> VDTINLINE typename vdspan<T>::reverse_iterator vdspan<T>::rend() { return reverse_iterator(mpEnd); }
+template<class T> VDTINLINE typename vdspan<T>::const_reverse_iterator vdspan<T>::rend() const { return const_reverse_iterator(mpEnd); }
+template<class T> VDTINLINE typename vdspan<T>::reference vdspan<T>::front() { return *mpBegin; }
+template<class T> VDTINLINE typename vdspan<T>::const_reference vdspan<T>::front() const { return *mpBegin; }
+template<class T> VDTINLINE typename vdspan<T>::reference vdspan<T>::back() { VDASSERT(mpBegin != mpEnd); return mpEnd[-1]; }
+template<class T> VDTINLINE typename vdspan<T>::const_reference vdspan<T>::back() const { VDASSERT(mpBegin != mpEnd); return mpEnd[-1]; }
+template<class T> VDTINLINE typename vdspan<T>::reference vdspan<T>::operator[](size_type n) { VDASSERT(n < size_type(mpEnd - mpBegin)); return mpBegin[n]; }
+template<class T> VDTINLINE typename vdspan<T>::const_reference vdspan<T>::operator[](size_type n) const { VDASSERT(n < size_type(mpEnd - mpBegin)); return mpBegin[n]; }
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<class T>
+bool operator==(const vdspan<T>& x, const vdspan<T>& y) {
+ uint32 len = x.size();
+ if (len != y.size())
+ return false;
+
+ const T *px = x.data();
+ const T *py = y.data();
+
+ for(uint32 i=0; i<len; ++i) {
+ if (px[i] != py[i])
+ return false;
+ }
+
+ return true;
+}
+
+template<class T>
+inline bool operator!=(const vdspan<T>& x, const vdspan<T>& y) { return !(x == y); }
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<class T, class S, class A = vdallocator<T> >
+class vdfastvector_base : public vdspan<T> {
+public:
+ ~vdfastvector_base() {
+ if (static_cast<const S&>(m).is_deallocatable_storage(mpBegin))
+ m.deallocate(mpBegin, m.eos - mpBegin);
+ }
+
+ size_type capacity() const { return size_type(m.eos - mpBegin); }
+
+public:
+ T *alloc(size_type n) {
+ size_type offset = (size_type)(mpEnd - mpBegin);
+ resize(offset + n);
+ return mpBegin + offset;
+ }
+
+ void assign(const T *p1, const T *p2) {
+ resize(p2 - p1);
+ memcpy(mpBegin, p1, (char *)p2 - (char *)p1);
+ }
+
+ void clear() {
+ mpEnd = mpBegin;
+ }
+
+ iterator erase(iterator it) {
+ VDASSERT(it - mpBegin < mpEnd - mpBegin);
+
+ memmove(it, it+1, (char *)mpEnd - (char *)(it+1));
+
+ --mpEnd;
+
+ return it;
+ }
+
+ iterator erase(iterator it1, iterator it2) {
+ VDASSERT(it1 - mpBegin <= mpEnd - mpBegin);
+ VDASSERT(it2 - mpBegin <= mpEnd - mpBegin);
+ VDASSERT(it1 <= it2);
+
+ memmove(it1, it2, (char *)mpEnd - (char *)it2);
+
+ mpEnd -= (it2 - it1);
+
+ return it1;
+ }
+
+ iterator insert(iterator it, const T& value) {
+ const T temp(value); // copy in case value is inside container.
+
+ if (mpEnd == m.eos) {
+ difference_type delta = it - mpBegin;
+ _reserve_always_add_one();
+ it = mpBegin + delta;
+ }
+
+ memmove(it+1, it, sizeof(T) * (mpEnd - it));
+ *it = temp;
+ ++mpEnd;
+ VDASSERT(mpEnd <= m.eos);
+
+ return it;
+ }
+
+ iterator insert(iterator it, size_type n, const T& value) {
+ const T temp(value); // copy in case value is inside container.
+
+ ptrdiff_t bytesToInsert = n * sizeof(T);
+
+ if ((char *)m.eos - (char *)mpEnd < bytesToInsert) {
+ difference_type delta = it - mpBegin;
+ _reserve_always_add(bytesToInsert);
+ it = mpBegin + delta;
+ }
+
+ memmove((char *)it + bytesToInsert, it, (char *)mpEnd - (char *)it);
+ for(size_t i=0; i<n; ++i)
+ *it++ = temp;
+ mpEnd += n;
+ VDASSERT(mpEnd <= m.eos);
+ return it;
+ }
+
+ iterator insert(iterator it, const T *p1, const T *p2) {
+ ptrdiff_t elementsToCopy = p2 - p1;
+ ptrdiff_t bytesToCopy = (char *)p2 - (char *)p1;
+
+ if ((char *)m.eos - (char *)mpEnd < bytesToCopy) {
+ difference_type delta = it - mpBegin;
+ _reserve_always_add(bytesToCopy);
+ it = mpBegin + delta;
+ }
+
+ memmove((char *)it + bytesToCopy, it, (char *)mpEnd - (char *)it);
+ memcpy(it, p1, bytesToCopy);
+ mpEnd += elementsToCopy;
+ VDASSERT(mpEnd <= m.eos);
+ return it;
+ }
+
+ reference push_back() {
+ if (mpEnd == m.eos)
+ _reserve_always_add_one();
+
+ return *mpEnd++;
+ }
+
+ void push_back(const T& value) {
+ const T temp(value); // copy in case value is inside container.
+
+ if (mpEnd == m.eos)
+ _reserve_always_add_one();
+
+ *mpEnd++ = temp;
+ }
+
+ void pop_back() {
+ VDASSERT(mpBegin != mpEnd);
+ --mpEnd;
+ }
+
+ void resize(size_type n) {
+ if (n*sizeof(T) > size_type((char *)m.eos - (char *)mpBegin))
+ _reserve_always_amortized(n);
+
+ mpEnd = mpBegin + n;
+ }
+
+ void resize(size_type n, const T& value) {
+ const T temp(value);
+
+ if (n*sizeof(T) > size_type((char *)m.eos - (char *)mpBegin)) {
+ _reserve_always_amortized(n);
+ }
+
+ const iterator newEnd(mpBegin + n);
+ if (newEnd > mpEnd)
+ std::fill(mpEnd, newEnd, temp);
+ mpEnd = newEnd;
+ }
+
+ void reserve(size_type n) {
+ if (n*sizeof(T) > size_type((char *)m.eos - (char *)mpBegin))
+ _reserve_always(n);
+ }
+
+protected:
+#ifdef _MSC_VER
+ __declspec(noinline)
+#endif
+ void _reserve_always_add_one() {
+ _reserve_always((m.eos - mpBegin) * 2 + 1);
+ }
+
+#ifdef _MSC_VER
+ __declspec(noinline)
+#endif
+ void _reserve_always_add(size_type n) {
+ _reserve_always((m.eos - mpBegin) * 2 + n);
+ }
+
+#ifdef _MSC_VER
+ __declspec(noinline)
+#endif
+ void _reserve_always(size_type n) {
+ size_type oldSize = mpEnd - mpBegin;
+ T *oldStorage = mpBegin;
+ T *newStorage = m.allocate(n, NULL);
+
+ memcpy(newStorage, mpBegin, (char *)mpEnd - (char *)mpBegin);
+ if (static_cast<const S&>(m).is_deallocatable_storage(oldStorage))
+ m.deallocate(oldStorage, m.eos - mpBegin);
+ mpBegin = newStorage;
+ mpEnd = newStorage + oldSize;
+ m.eos = newStorage + n;
+ }
+
+#ifdef _MSC_VER
+ __declspec(noinline)
+#endif
+ void _reserve_always_amortized(size_type n) {
+ size_type nextCapacity = (size_type)((m.eos - mpBegin)*2);
+
+ if (nextCapacity < n)
+ nextCapacity = n;
+
+ _reserve_always(nextCapacity);
+ }
+
+ struct : A, S {
+ T *eos;
+ } m;
+
+ union TrivialObjectConstraint {
+ T m;
+ };
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+struct vdfastvector_storage {
+ bool is_deallocatable_storage(void *p) const {
+ return p != 0;
+ }
+};
+
+template<class T, class A = vdallocator<T> >
+class vdfastvector : public vdfastvector_base<T, vdfastvector_storage, A> {
+public:
+ vdfastvector() {
+ m.eos = NULL;
+ }
+
+ vdfastvector(size_type len) {
+ mpBegin = m.allocate(len, NULL);
+ mpEnd = mpBegin + len;
+ m.eos = mpEnd;
+ }
+
+ vdfastvector(size_type len, const T& fill) {
+ mpBegin = m.allocate(len, NULL);
+ mpEnd = mpBegin + len;
+ m.eos = mpEnd;
+
+ std::fill(mpBegin, mpEnd, fill);
+ }
+
+ vdfastvector(const vdfastvector& x) {
+ size_type n = x.mpEnd - x.mpBegin;
+ mpBegin = m.allocate(n, NULL);
+ mpEnd = mpBegin + n;
+ m.eos = mpEnd;
+ memcpy(mpBegin, x.mpBegin, sizeof(T) * n);
+ }
+
+ vdfastvector(const value_type *p, const value_type *q) {
+ m.eos = NULL;
+
+ assign(p, q);
+ }
+
+ vdfastvector& operator=(const vdfastvector& x) {
+ if (this != &x)
+ assign(x.mpBegin, x.mpEnd);
+
+ return *this;
+ }
+
+ void swap(vdfastvector& x) {
+ T *p;
+
+ p = mpBegin; mpBegin = x.mpBegin; x.mpBegin = p;
+ p = mpEnd; mpEnd = x.mpEnd; x.mpEnd = p;
+ p = m.eos; m.eos = x.m.eos; x.m.eos = p;
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<class T, size_t N>
+struct vdfastfixedvector_storage {
+ T mArray[N];
+
+ bool is_deallocatable_storage(void *p) const {
+ return p != mArray;
+ }
+};
+
+template<class T, size_t N, class A = vdallocator<T> >
+class vdfastfixedvector : public vdfastvector_base<T, vdfastfixedvector_storage<T, N>, A> {
+public:
+ vdfastfixedvector() {
+ mpBegin = m.mArray;
+ mpEnd = m.mArray;
+ m.eos = m.mArray + N;
+ }
+
+ vdfastfixedvector(size_type len) {
+ if (len <= N) {
+ mpBegin = m.mArray;
+ mpEnd = m.mArray + len;
+ m.eos = m.mArray + N;
+ } else {
+ mpBegin = m.allocate(len, NULL);
+ mpEnd = mpBegin + len;
+ m.eos = mpEnd;
+ }
+ }
+
+ vdfastfixedvector(size_type len, const T& fill) {
+ mpBegin = m.allocate(len, NULL);
+ mpEnd = mpBegin + len;
+ m.eos = mpEnd;
+
+ std::fill(mpBegin, mpEnd, fill);
+ }
+
+ vdfastfixedvector(const vdfastfixedvector& x) {
+ size_type n = x.mpEnd - x.mpBegin;
+
+ if (n <= N) {
+ mpBegin = m.mArray;
+ mpEnd = m.mArray + n;
+ m.eos = m.mArray + N;
+ } else {
+ mpBegin = m.allocate(n, NULL);
+ mpEnd = mpBegin + n;
+ m.eos = mpEnd;
+ }
+
+ memcpy(mpBegin, x.mpBegin, sizeof(T) * n);
+ }
+
+ vdfastfixedvector(const value_type *p, const value_type *q) {
+ mpBegin = m.mArray;
+ mpEnd = m.mArray;
+ m.eos = m.mArray + N;
+
+ assign(p, q);
+ }
+
+ vdfastfixedvector& operator=(const vdfastfixedvector& x) {
+ if (this != &x)
+ assign(x.mpBegin, x.mpEnd);
+
+ return *this;
+ }
+
+ void swap(vdfastfixedvector& x) {
+ size_t this_bytes = (char *)mpEnd - (char *)mpBegin;
+ size_t other_bytes = (char *)x.mpEnd - (char *)x.mpBegin;
+
+ T *p;
+
+ if (mpBegin == m.mArray) {
+ if (x.mpBegin == x.m.mArray) {
+ if (this_bytes < other_bytes) {
+ VDSwapMemory(m.mArray, x.m.mArray, this_bytes);
+ memcpy((char *)m.mArray + this_bytes, (char *)x.m.mArray + this_bytes, other_bytes - this_bytes);
+ } else {
+ VDSwapMemory(m.mArray, x.m.mArray, other_bytes);
+ memcpy((char *)m.mArray + other_bytes, (char *)x.m.mArray + other_bytes, this_bytes - other_bytes);
+ }
+
+ mpEnd = (T *)((char *)mpBegin + other_bytes);
+ x.mpEnd = (T *)((char *)x.mpBegin + this_bytes);
+ } else {
+ memcpy(x.m.mArray, mpBegin, this_bytes);
+
+ mpBegin = x.mpBegin;
+ mpEnd = x.mpEnd;
+ m.eos = x.m.eos;
+
+ x.mpBegin = x.m.mArray;
+ x.mpEnd = (T *)((char *)x.m.mArray + this_bytes);
+ x.m.eos = x.m.mArray + N;
+ }
+ } else {
+ if (x.mpBegin == x.m.mArray) {
+ memcpy(x.m.mArray, mpBegin, other_bytes);
+
+ x.mpBegin = mpBegin;
+ x.mpEnd = mpEnd;
+ x.m.eos = m.eos;
+
+ mpBegin = m.mArray;
+ mpEnd = (T *)((char *)m.mArray + other_bytes);
+ m.eos = m.mArray + N;
+ } else {
+ p = mpBegin; mpBegin = x.mpBegin; x.mpBegin = p;
+ p = mpEnd; mpEnd = x.mpEnd; x.mpEnd = p;
+ p = m.eos; m.eos = x.m.eos; x.m.eos = p;
+ }
+ }
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<class T>
+struct vdfastdeque_block {
+ enum {
+ kBlockSize = 32,
+ kBlockSizeBits = 5
+ };
+
+ T data[kBlockSize];
+};
+
+template<class T, class T_Base>
+class vdfastdeque_iterator {
+public:
+ vdfastdeque_iterator(const vdfastdeque_iterator<T_Base, T_Base>&);
+ vdfastdeque_iterator(vdfastdeque_block<T_Base> **pMapEntry, uint32 index);
+
+ T& operator *() const;
+ T& operator ->() const;
+ vdfastdeque_iterator& operator++();
+ vdfastdeque_iterator operator++(int);
+ vdfastdeque_iterator& operator--();
+ vdfastdeque_iterator operator--(int);
+
+public:
+ vdfastdeque_block<T_Base> **mpMap;
+ vdfastdeque_block<T_Base> *mpBlock;
+ uint32 mIndex;
+};
+
+template<class T, class T_Base>
+vdfastdeque_iterator<T, T_Base>::vdfastdeque_iterator(const vdfastdeque_iterator<T_Base, T_Base>& x)
+ : mpMap(x.mpMap)
+ , mpBlock(x.mpBlock)
+ , mIndex(x.mIndex)
+{
+}
+
+template<class T, class T_Base>
+vdfastdeque_iterator<T, T_Base>::vdfastdeque_iterator(vdfastdeque_block<T_Base> **pMapEntry, uint32 index)
+ : mpMap(pMapEntry)
+ , mpBlock(mpMap ? *mpMap : NULL)
+ , mIndex(index)
+{
+}
+
+template<class T, class T_Base>
+T& vdfastdeque_iterator<T, T_Base>::operator *() const {
+ return mpBlock->data[mIndex];
+}
+
+template<class T, class T_Base>
+T& vdfastdeque_iterator<T, T_Base>::operator ->() const {
+ return mpBlock->data[mIndex];
+}
+
+template<class T, class T_Base>
+vdfastdeque_iterator<T, T_Base>& vdfastdeque_iterator<T, T_Base>::operator++() {
+ if (++mIndex >= vdfastdeque_block<T>::kBlockSize) {
+ mIndex = 0;
+ mpBlock = *++mpMap;
+ }
+ return *this;
+}
+
+template<class T, class T_Base>
+vdfastdeque_iterator<T, T_Base> vdfastdeque_iterator<T, T_Base>::operator++(int) {
+ vdfastdeque_iterator r(*this);
+ operator++();
+ return r;
+}
+
+template<class T, class T_Base>
+vdfastdeque_iterator<T, T_Base>& vdfastdeque_iterator<T, T_Base>::operator--() {
+ if (mIndex-- == 0) {
+ mIndex = vdfastdeque_block<T, T_Base>::kBlockSize - 1;
+ mpBlock = *--mpMap;
+ }
+ return *this;
+}
+
+template<class T, class T_Base>
+vdfastdeque_iterator<T, T_Base> vdfastdeque_iterator<T, T_Base>::operator--(int) {
+ vdfastdeque_iterator r(*this);
+ operator--();
+ return r;
+}
+
+template<class T, class U, class T_Base>
+bool operator==(const vdfastdeque_iterator<T, T_Base>& x,const vdfastdeque_iterator<U, T_Base>& y) {
+ return x.mpBlock == y.mpBlock && x.mIndex == y.mIndex;
+}
+
+template<class T, class U, class T_Base>
+bool operator!=(const vdfastdeque_iterator<T, T_Base>& x,const vdfastdeque_iterator<U, T_Base>& y) {
+ return x.mpBlock != y.mpBlock || x.mIndex != y.mIndex;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<class T, class A = vdallocator<T> >
+class vdfastdeque {
+public:
+ typedef typename A::reference reference;
+ typedef typename A::const_reference const_reference;
+ typedef typename A::pointer pointer;
+ typedef typename A::const_pointer const_pointer;
+ typedef T value_type;
+ typedef A allocator_type;
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef vdfastdeque_iterator<T, T> iterator;
+ typedef vdfastdeque_iterator<const T, T> const_iterator;
+ typedef typename vdreverse_iterator<iterator, T>::type reverse_iterator;
+ typedef typename vdreverse_iterator<const_iterator, const T>::type const_reverse_iterator;
+
+ vdfastdeque();
+ ~vdfastdeque();
+
+ bool empty() const;
+ size_type size() const;
+
+ reference front();
+ const_reference front() const;
+ reference back();
+ const_reference back() const;
+
+ iterator begin();
+ const_iterator begin() const;
+ iterator end();
+ const_iterator end() const;
+
+ reference operator[](size_type n);
+ const_reference operator[](size_type n) const;
+
+ void clear();
+
+ reference push_back();
+ void push_back(const_reference x);
+
+ void pop_front();
+ void pop_back();
+
+ void swap(vdfastdeque& x);
+
+protected:
+ void push_back_extend();
+ void validate();
+
+ typedef vdfastdeque_block<T> Block;
+
+ enum {
+ kBlockSize = Block::kBlockSize,
+ kBlockSizeBits = Block::kBlockSizeBits
+ };
+
+ struct M1 : public A::rebind<Block *>::other {
+ Block **mapStartAlloc; // start of map
+ Block **mapStartCommit; // start of range of allocated blocks
+ Block **mapStart; // start of range of active blocks
+ Block **mapEnd; // end of range of active blocks
+ Block **mapEndCommit; // end of range of allocated blocks
+ Block **mapEndAlloc; // end of map
+ } m;
+
+ struct M2 : public A::rebind<Block>::other {
+ int startIndex;
+ int endIndex;
+ } mTails;
+
+ union TrivialObjectConstraint {
+ T obj;
+ };
+};
+
+template<class T, class A>
+vdfastdeque<T, A>::vdfastdeque() {
+ m.mapStartAlloc = NULL;
+ m.mapStartCommit = NULL;
+ m.mapStart = NULL;
+ m.mapEnd = NULL;
+ m.mapEndCommit = NULL;
+ m.mapEndAlloc = NULL;
+ mTails.startIndex = 0;
+ mTails.endIndex = kBlockSize - 1;
+}
+
+template<class T, class A>
+vdfastdeque<T,A>::~vdfastdeque() {
+ while(m.mapStartCommit != m.mapEndCommit) {
+ mTails.deallocate(*m.mapStartCommit++, 1);
+ }
+
+ if (m.mapStartAlloc)
+ m.deallocate(m.mapStartAlloc, m.mapEndAlloc - m.mapStartAlloc);
+}
+
+template<class T, class A>
+bool vdfastdeque<T,A>::empty() const {
+ return size() == 0;
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::size_type vdfastdeque<T,A>::size() const {
+ if (m.mapEnd == m.mapStart)
+ return 0;
+
+ return kBlockSize * ((m.mapEnd - m.mapStart) - 1) + (mTails.endIndex + 1) - mTails.startIndex;
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::reference vdfastdeque<T,A>::front() {
+ VDASSERT(m.mapStart != m.mapEnd);
+ return (*m.mapStart)->data[mTails.startIndex];
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::const_reference vdfastdeque<T,A>::front() const {
+ VDASSERT(m.mapStart != m.mapEnd);
+ return (*m.mapStart)->data[mTails.startIndex];
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::reference vdfastdeque<T,A>::back() {
+ VDASSERT(m.mapStart != m.mapEnd);
+ return m.mapEnd[-1]->data[mTails.endIndex];
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::const_reference vdfastdeque<T,A>::back() const {
+ VDASSERT(m.mapStart != m.mapEnd);
+ return m.mapEnd[-1]->data[mTails.endIndex];
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::iterator vdfastdeque<T,A>::begin() {
+ return iterator(m.mapStart, mTails.startIndex);
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::const_iterator vdfastdeque<T,A>::begin() const {
+ return const_iterator(m.mapStart, mTails.startIndex);
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::iterator vdfastdeque<T,A>::end() {
+ if (mTails.endIndex == kBlockSize - 1)
+ return iterator(m.mapEnd, 0);
+ else
+ return iterator(m.mapEnd - 1, mTails.endIndex + 1);
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::const_iterator vdfastdeque<T,A>::end() const {
+ if (mTails.endIndex == kBlockSize - 1)
+ return const_iterator(m.mapEnd, 0);
+ else
+ return const_iterator(m.mapEnd - 1, mTails.endIndex + 1);
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::reference vdfastdeque<T,A>::operator[](size_type n) {
+ n += mTails.startIndex;
+ return m.mapStart[n >> kBlockSizeBits]->data[n & (kBlockSize - 1)];
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::const_reference vdfastdeque<T,A>::operator[](size_type n) const {
+ n += mTails.startIndex;
+ return m.mapStart[n >> kBlockSizeBits]->data[n & (kBlockSize - 1)];
+}
+
+template<class T, class A>
+void vdfastdeque<T,A>::clear() {
+ m.mapEnd = m.mapStart;
+ mTails.startIndex = 0;
+ mTails.endIndex = kBlockSize - 1;
+}
+
+template<class T, class A>
+typename vdfastdeque<T,A>::reference vdfastdeque<T,A>::push_back() {
+ if (mTails.endIndex >= kBlockSize - 1) {
+ push_back_extend();
+
+ mTails.endIndex = -1;
+ }
+
+ ++mTails.endIndex;
+
+ VDASSERT(m.mapEnd[-1]);
+ reference r = m.mapEnd[-1]->data[mTails.endIndex];
+ return r;
+}
+
+template<class T, class A>
+void vdfastdeque<T,A>::push_back(const_reference x) {
+ const T x2(x);
+ push_back() = x2;
+}
+
+template<class T, class A>
+void vdfastdeque<T,A>::pop_front() {
+ if (++mTails.startIndex >= kBlockSize) {
+ VDASSERT(m.mapEnd != m.mapStart);
+ mTails.startIndex = 0;
+ ++m.mapStart;
+ }
+}
+
+template<class T, class A>
+void vdfastdeque<T,A>::pop_back() {
+ if (--mTails.endIndex < 0) {
+ VDASSERT(m.mapEnd != m.mapStart);
+ mTails.endIndex = kBlockSize - 1;
+ --m.mapEnd;
+ }
+}
+
+template<class T, class A>
+void vdfastdeque<T,A>::swap(vdfastdeque& x) {
+ std::swap(m.mapStartAlloc, x.m.mapStartAlloc);
+ std::swap(m.mapStartCommit, x.m.mapStartCommit);
+ std::swap(m.mapStart, x.m.mapStart);
+ std::swap(m.mapEnd, x.m.mapEnd);
+ std::swap(m.mapEndCommit, x.m.mapEndCommit);
+ std::swap(m.mapEndAlloc, x.m.mapEndAlloc);
+ std::swap(mTails.startIndex, x.mTails.startIndex);
+ std::swap(mTails.endIndex, x.mTails.endIndex);
+}
+
+/////////////////////////////////
+
+template<class T, class A>
+void vdfastdeque<T,A>::push_back_extend() {
+ validate();
+
+ // check if we need to extend the map itself
+ if (m.mapEnd == m.mapEndAlloc) {
+ // can we just shift the map?
+ size_type currentMapSize = m.mapEndAlloc - m.mapStartAlloc;
+ size_type freeAtStart = m.mapStartCommit - m.mapStartAlloc;
+
+ if (freeAtStart >= 2 && (freeAtStart + freeAtStart) >= currentMapSize) {
+ size_type shiftDistance = freeAtStart >> 1;
+
+ VDASSERT(!m.mapStartAlloc[0]);
+ memmove(m.mapStartAlloc, m.mapStartAlloc + shiftDistance, sizeof(Block *) * (currentMapSize - shiftDistance));
+ memset(m.mapStartAlloc + (currentMapSize - shiftDistance), 0, shiftDistance * sizeof(Block *));
+
+ // relocate pointers
+ m.mapEndCommit -= shiftDistance;
+ m.mapEnd -= shiftDistance;
+ m.mapStart -= shiftDistance;
+ m.mapStartCommit -= shiftDistance;
+ validate();
+ } else {
+ size_type newMapSize = currentMapSize*2+1;
+
+ Block **newMap = m.allocate(newMapSize);
+
+ memcpy(newMap, m.mapStartAlloc, currentMapSize * sizeof(Block *));
+ memset(newMap + currentMapSize, 0, (newMapSize - currentMapSize) * sizeof(Block *));
+
+ // relocate pointers
+ m.mapEndAlloc = newMap + newMapSize;
+ m.mapEndCommit = newMap + (m.mapEndCommit - m.mapStartAlloc);
+ m.mapEnd = newMap + (m.mapEnd - m.mapStartAlloc);
+ m.mapStart = newMap + (m.mapStart - m.mapStartAlloc);
+ m.mapStartCommit = newMap + (m.mapStartCommit - m.mapStartAlloc);
+
+ m.deallocate(m.mapStartAlloc, currentMapSize);
+ m.mapStartAlloc = newMap;
+ validate();
+ }
+ }
+
+ VDASSERT(m.mapEnd != m.mapEndAlloc);
+
+ // check if we already have a block we can use
+ if (*m.mapEnd) {
+ ++m.mapEnd;
+ validate();
+ return;
+ }
+
+ // check if we can steal a block from the beginning
+ if (m.mapStartCommit != m.mapStart) {
+ VDASSERT(*m.mapStartCommit);
+ if (m.mapStartCommit != m.mapEnd) {
+ *m.mapEnd = *m.mapStartCommit;
+ *m.mapStartCommit = NULL;
+ ++m.mapStartCommit;
+ }
+ ++m.mapEnd;
+ m.mapEndCommit = m.mapEnd;
+ validate();
+ return;
+ }
+
+ // allocate a new block
+ *m.mapEnd = mTails.allocate(1);
+ ++m.mapEnd;
+ m.mapEndCommit = m.mapEnd;
+ validate();
+}
+
+template<class T, class A>
+void vdfastdeque<T,A>::validate() {
+ VDASSERT(m.mapStartAlloc <= m.mapStartCommit);
+ VDASSERT(m.mapStartCommit <= m.mapStart);
+ VDASSERT(m.mapStart <= m.mapEnd);
+ VDASSERT(m.mapEnd <= m.mapEndCommit);
+ VDASSERT(m.mapEndCommit <= m.mapEndAlloc);
+
+ VDASSERT(m.mapStartAlloc == m.mapStartCommit || !*m.mapStartAlloc);
+ VDASSERT(m.mapStartCommit == m.mapEndCommit || m.mapStartCommit[0]);
+ VDASSERT(m.mapStart == m.mapEnd || (m.mapStart[0] && m.mapEnd[-1]));
+ VDASSERT(m.mapEndCommit == m.mapEndAlloc || !m.mapEndCommit[0]);
+}
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/vdtypes.h b/src/thirdparty/VirtualDub/h/vd2/system/vdtypes.h
new file mode 100644
index 000000000..0a5a63e50
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/vdtypes.h
@@ -0,0 +1,415 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_VDTYPES_H
+#define f_VD2_SYSTEM_VDTYPES_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <algorithm>
+#include <stdio.h>
+#include <stdarg.h>
+#include <new>
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+// compiler detection
+//
+///////////////////////////////////////////////////////////////////////////
+
+#ifndef VD_COMPILER_DETECTED
+ #define VD_COMPILER_DETECTED
+
+ #ifdef _MSC_VER
+ #define VD_COMPILER_MSVC _MSC_VER
+
+ #if _MSC_VER >= 1400
+ #define VD_COMPILER_MSVC_VC8 1
+
+ #if _MSC_FULL_VER == 140040310
+ #define VD_COMPILER_MSVC_VC8_PSDK 1
+ #elif _MSC_FULL_VER == 14002207
+ #define VD_COMPILER_MSVC_VC8_DDK 1
+ #endif
+
+ #elif _MSC_VER >= 1310
+ #define VD_COMPILER_MSVC_VC71 1
+ #elif _MSC_VER >= 1300
+ #define VD_COMPILER_MSVC_VC7 1
+ #elif _MSC_VER >= 1200
+ #define VD_COMPILER_MSVC_VC6 1
+ #endif
+
+ #endif
+#endif
+
+#ifndef VD_CPU_DETECTED
+ #define VD_CPU_DETECTED
+
+ #ifdef _M_AMD64
+ #define VD_CPU_AMD64 1
+ #elif _M_IX86
+ #define VD_CPU_X86 1
+ #endif
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+// types
+//
+///////////////////////////////////////////////////////////////////////////
+
+#ifndef VD_STANDARD_TYPES_DECLARED
+ #if defined(_MSC_VER)
+ typedef signed __int64 sint64;
+ typedef unsigned __int64 uint64;
+ #elif defined(__GNUC__)
+ typedef signed long long sint64;
+ typedef unsigned long long uint64;
+ #endif
+ typedef signed int sint32;
+ typedef unsigned int uint32;
+ typedef signed short sint16;
+ typedef unsigned short uint16;
+ typedef signed char sint8;
+ typedef unsigned char uint8;
+
+ typedef sint64 int64;
+ typedef sint32 int32;
+ typedef sint16 int16;
+ typedef sint8 int8;
+
+ #ifdef _M_AMD64
+ typedef sint64 sintptr;
+ typedef uint64 uintptr;
+ #else
+ #if _MSC_VER >= 1310
+ typedef __w64 sint32 sintptr;
+ typedef __w64 uint32 uintptr;
+ #else
+ typedef sint32 sintptr;
+ typedef uint32 uintptr;
+ #endif
+ #endif
+#endif
+
+#if defined(_MSC_VER)
+ #define VD64(x) x##i64
+#elif defined(__GNUC__)
+ #define VD64(x) x##ll
+#else
+ #error Please add an entry for your compiler for 64-bit constant literals.
+#endif
+
+
+#define VDAPIENTRY __cdecl
+
+typedef int64 VDTime;
+typedef int64 VDPosition;
+typedef struct __VDGUIHandle *VDGUIHandle;
+
+// enforce wchar_t under Visual C++
+
+#if defined(_MSC_VER) && !defined(_WCHAR_T_DEFINED)
+ #include <ctype.h>
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+// allocation
+//
+///////////////////////////////////////////////////////////////////////////
+
+#if defined(VD_COMPILER_MSVC) && (VD_COMPILER_MSVC < 1300 || (defined(VD_COMPILER_MSVC_VC8_PSDK) || defined(VD_COMPILER_MSVC_VC8_DDK)))
+#define new_nothrow new
+#else
+#define new_nothrow new(std::nothrow)
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+// STL fixes
+//
+///////////////////////////////////////////////////////////////////////////
+
+#if defined(VD_COMPILER_MSVC_VC6) || defined(VD_COMPILER_MSVC_VC8_DDK) || defined(VD_COMPILER_MSVC_VC8_PSDK)
+ // The VC6 STL was deliberately borked to avoid conflicting with
+ // Windows min/max macros. We work around this bogosity here. Note
+ // that NOMINMAX must be defined for these to compile properly. Also,
+ // there is a bug in the VC6 compiler that sometimes causes long
+ // lvalues to "promote" to int, causing ambiguous override errors.
+ // To avoid this, always explicitly declare which type you are using,
+ // i.e. min<int>(x,0). None of this is a problem with VC7 or later.
+ namespace std {
+ template<class T>
+ inline const T& min(const T& x, const T& y) {
+ return _cpp_min(x, y);
+ }
+
+ template<class T>
+ inline const T& max(const T& x, const T& y) {
+ return _cpp_max(x, y);
+ }
+ };
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+// compiler fixes
+//
+///////////////////////////////////////////////////////////////////////////
+
+#if defined(VD_COMPILER_MSVC) && (VD_COMPILER_MSVC < 1400 || (defined(VD_COMPILER_MSVC_VC8_PSDK) || defined(VD_COMPILER_MSVC_VC8_DDK)))
+ inline int vswprintf(wchar_t *dst, size_t bufsize, const wchar_t *format, va_list val) {
+ return _vsnwprintf(dst, bufsize, format, val);
+ }
+
+ inline int swprintf(wchar_t *dst, size_t bufsize, const wchar_t *format, ...) {
+ va_list val;
+
+ va_start(val, format);
+ int r = vswprintf(dst, bufsize, format, val);
+ va_end(val);
+
+ return r;
+ }
+
+ #define _strdup strdup
+ #define _stricmp stricmp
+ #define _strnicmp strnicmp
+ #define _wcsdup wcsdup
+ #define _wcsicmp wcsicmp
+ #define _wcsnicmp wcsnicmp
+#endif
+
+#if defined(VD_COMPILER_MSVC) && VD_COMPILER_MSVC < 1400
+ #define vdfor if(0);else for
+#else
+ #define vdfor for
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+// attribute support
+//
+///////////////////////////////////////////////////////////////////////////
+
+#if defined(VD_COMPILER_MSVC)
+ #define VDINTERFACE __declspec(novtable)
+ #define VDNORETURN __declspec(noreturn)
+ #define VDPUREFUNC
+ #if VD_COMPILER_MSVC >= 1400
+ #define VDRESTRICT __restrict
+ #else
+ #define VDRESTRICT
+ #endif
+#elif defined(__GNUC__)
+ #define VDINTERFACE
+ #define VDNORETURN __attribute__((noreturn))
+ #define VDPUREFUNC __attribute__((pure))
+ #define VDRESTRICT
+#else
+ #define VDINTERFACE
+ #define VDNORETURN
+ #define VDPUREFUNC
+ #define VDRESTRICT
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+// debug support
+//
+///////////////////////////////////////////////////////////////////////////
+
+enum VDAssertResult {
+ kVDAssertBreak,
+ kVDAssertContinue,
+ kVDAssertIgnore
+};
+
+extern VDAssertResult VDAssert(const char *exp, const char *file, int line);
+extern VDAssertResult VDAssertPtr(const char *exp, const char *file, int line);
+extern void VDDebugPrint(const char *format, ...);
+
+#if defined(_MSC_VER)
+ #if _MSC_VER >= 1300
+ #define VDBREAK __debugbreak()
+ #else
+ #define VDBREAK __asm { int 3 }
+ #endif
+#elif defined(__GNUC__)
+ #define VDBREAK __asm__ volatile ("int3" : : )
+#else
+ #define VDBREAK *(volatile char *)0 = *(volatile char *)0
+#endif
+
+
+#ifdef _DEBUG
+
+ namespace {
+ template<int line>
+ struct VDAssertHelper {
+ VDAssertHelper(const char *exp, const char *file) {
+ if (!sbAssertDisabled)
+ switch(VDAssert(exp, file, line)) {
+ case kVDAssertBreak:
+ VDBREAK;
+ break;
+ case kVDAssertIgnore:
+ sbAssertDisabled = true;
+ break;
+ }
+ }
+
+ static bool sbAssertDisabled;
+ };
+
+ template<int lineno>
+ bool VDAssertHelper<lineno>::sbAssertDisabled;
+ }
+
+ #define VDASSERT(exp) if (static bool active = true) if (exp); else switch(VDAssert (#exp, __FILE__, __LINE__)) { case kVDAssertBreak: VDBREAK; break; case kVDAssertIgnore: active = false; } else ((void)0)
+ #define VDASSERTPTR(exp) if (static bool active = true) if (exp); else switch(VDAssertPtr(#exp, __FILE__, __LINE__)) { case kVDAssertBreak: VDBREAK; break; case kVDAssertIgnore: active = false; } else ((void)0)
+ #define VDVERIFY(exp) if (exp); else if (static bool active = true) switch(VDAssert (#exp, __FILE__, __LINE__)) { case kVDAssertBreak: VDBREAK; break; case kVDAssertIgnore: active = false; } else ((void)0)
+ #define VDVERIFYPTR(exp) if (exp); else if (static bool active = true) switch(VDAssertPtr(#exp, __FILE__, __LINE__)) { case kVDAssertBreak: VDBREAK; break; case kVDAssertIgnore: active = false; } else ((void)0)
+ #define VDASSERTCT(exp) (void)sizeof(int[(exp)?1:-1])
+
+ #define VDINLINEASSERT(exp) ((exp)||(VDAssertHelper<__LINE__>(#exp, __FILE__),false))
+ #define VDINLINEASSERTFALSE(exp) ((exp)&&(VDAssertHelper<__LINE__>("!("#exp")", __FILE__),true))
+
+ #define NEVER_HERE do { if (VDAssert( "[never here]", __FILE__, __LINE__ )) VDBREAK; __assume(false); } while(false)
+ #define VDNEVERHERE do { if (VDAssert( "[never here]", __FILE__, __LINE__ )) VDBREAK; __assume(false); } while(false)
+
+ #define VDDEBUG VDDebugPrint
+
+#else
+
+ #if defined(_MSC_VER)
+ #ifndef _M_AMD64
+ #define VDASSERT(exp) __assume(!!(exp))
+ #define VDASSERTPTR(exp) __assume(!!(exp))
+ #else
+ #define VDASSERT(exp) __noop(exp)
+ #define VDASSERTPTR(exp) __noop(exp)
+ #endif
+ #elif defined(__GNUC__)
+ #define VDASSERT(exp) __builtin_expect(0 != (exp), 1)
+ #define VDASSERTPTR(exp) __builtin_expect(0 != (exp), 1)
+ #endif
+
+ #define VDVERIFY(exp) (exp)
+ #define VDVERIFYPTR(exp) (exp)
+ #define VDASSERTCT(exp)
+
+ #define VDINLINEASSERT(exp) (exp)
+ #define VDINLINEASSERTFALSE(exp) (exp)
+
+ #if defined(VD_COMPILER_MSVC)
+ #define NEVER_HERE __assume(false)
+ #define VDNEVERHERE __assume(false)
+ #else
+ #define NEVER_HERE VDASSERT(false)
+ #define VDNEVERHERE VDASSERT(false)
+ #endif
+
+ extern int VDDEBUG_Helper(const char *, ...);
+ #define VDDEBUG (void)sizeof VDDEBUG_Helper
+
+#endif
+
+#define VDDEBUG2 VDDebugPrint
+
+// TODO macros
+//
+// These produce a diagnostic during compilation that indicate a TODO for
+// later:
+//
+// #pragma message(__TODO__ "Fix this.)
+// #vdpragma_TODO("Fix this.")
+
+#define vdpragma_TODO2(x) #x
+#define vdpragma_TODO1(x) vdpragma_TODO2(x)
+#define vdpragma_TODO0 __FILE__ "(" vdpragma_TODO1(__LINE__) ") : TODO: "
+
+#ifdef _MSC_VER
+#define vdpragma_TODO(x) message(vdpragma_TODO0 x)
+#else
+#define vdpragma_TODO(x)
+#endif
+
+// BS macros
+//
+// These tag code that is not meant to go into a final build.
+
+#define vdpragma_BS2(x) #x
+#define vdpragma_BS1(x) vdpragma_BS2(x)
+#define vdpragma_BS0 __FILE__ "(" vdpragma_BS1(__LINE__) ") : BS: "
+
+#ifdef _MSC_VER
+#define vdpragma_BS(x) message(vdpragma_BS0 x)
+#else
+#define vdpragma_BS(x)
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Object scope macros
+//
+// vdobjectscope() allows you to define a construct where an object is
+// constructed and live only within the controlled statement. This is
+// used for vdsynchronized (thread.h) and protected scopes below.
+// It relies on a strange quirk of C++ regarding initialized objects
+// in the condition of a selection statement and also horribly abuses
+// the switch statement, generating rather good code in release builds.
+// The catch is that the controlled object must implement a conversion to
+// bool returning false and must only be initialized with one argument (C
+// syntax).
+//
+// Unfortunately, handy as this macro is, it is also damned good at
+// breaking compilers. For a start, declaring an object with a non-
+// trivial destructor in a switch() kills both VC6 and VC7 with a C1001.
+// The bug is fixed in VC8 (MSC 14.00).
+//
+// A somewhat safer alternative is the for() statement, along the lines
+// of:
+//
+// switch(bool v=false) case 0: default: for(object_def; !v; v=true)
+//
+// This avoids the conversion operator but unfortunately usually generates
+// an actual loop in the output.
+
+#if defined(VD_COMPILER_MSVC) && (VD_COMPILER_MSVC < 1400 || defined(VD_COMPILER_MSVC_VC8_DDK))
+#define vdobjectscope(object_def) if(object_def) VDNEVERHERE; else
+#else
+#define vdobjectscope(object_def) switch(object_def) case 0: default:
+#endif
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/vectors.h b/src/thirdparty/VirtualDub/h/vd2/system/vectors.h
new file mode 100644
index 000000000..6dcbe65fa
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/vectors.h
@@ -0,0 +1,568 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_VECTORS_H
+#define f_VD2_SYSTEM_VECTORS_H
+
+#ifdef _MSC_VER
+ #pragma once
+#endif
+
+#include <vd2/system/vdtypes.h>
+#include <math.h>
+#include <limits>
+
+#ifndef VDFORCEINLINE
+ #define VDFORCEINLINE __forceinline
+#endif
+
+///////////////////////////////////////////////////////////////////////////
+
+bool VDSolveLinearEquation(double *src, int n, ptrdiff_t stride_elements, double *b, double tolerance = 1e-5);
+
+///////////////////////////////////////////////////////////////////////////
+
+#include <vd2/system/vectors_float.h>
+#include <vd2/system/vectors_int.h>
+
+///////////////////////////////////////////////////////////////////////////
+
+class vdfloat2x2 {
+public:
+ enum zero_type { zero };
+ enum identity_type { identity };
+
+ typedef float value_type;
+ typedef vdfloat2 vector_type;
+ typedef vdfloat2c vector_ctor_type;
+ typedef vdfloat2x2 self_type;
+
+ vdfloat2x2() {}
+ vdfloat2x2(zero_type) { m[0] = m[1] = vector_ctor_type(0, 0); }
+ vdfloat2x2(identity_type) {
+ m[0] = vector_ctor_type(1, 0);
+ m[1] = vector_ctor_type(0, 1);
+ }
+
+ vector_type& operator[](int k) { return m[k]; }
+ const vector_type& operator[](int k) const { return m[k]; }
+
+ self_type operator*(const self_type& v) const {
+ self_type result;
+
+#define DO(i,j) result.m[i].v[j] = m[i].v[0]*v.m[0].v[j] + m[i].v[1]*v.m[1].v[j]
+ DO(0,0);
+ DO(0,1);
+ DO(1,0);
+ DO(1,1);
+#undef DO
+
+ return result;
+ }
+
+ vector_type operator*(const vector_type& r) const {
+ return vector_ctor_type(
+ m[0].v[0]*r.v[0] + m[0].v[1]*r.v[1],
+ m[1].v[0]*r.v[0] + m[1].v[1]*r.v[1]);
+ }
+
+ self_type transpose() const {
+ self_type res;
+
+ res.m[0].v[0] = m[0].v[0];
+ res.m[0].v[1] = m[1].v[0];
+ res.m[1].v[0] = m[0].v[1];
+ res.m[1].v[1] = m[1].v[1];
+
+ return res;
+ }
+
+ self_type adjunct() const {
+ self_type res;
+
+ res.m[0].set(m[1].v[1], -m[0].v[1]);
+ res.m[1].set(-m[1].v[0], -m[0].v[0]);
+
+ return res;
+ }
+
+ value_type det() const {
+ return m[0].v[0]*m[1].v[1] - m[1].v[0]*m[0].v[1];
+ }
+
+ self_type operator~() const {
+ return adjunct() / det();
+ }
+
+ self_type& operator*=(const value_type factor) {
+ m[0] *= factor;
+ m[1] *= factor;
+
+ return *this;
+ }
+
+ self_type& operator/=(const value_type factor) {
+ return operator*=(value_type(1)/factor);
+ }
+
+ self_type operator*(const value_type factor) const {
+ return self_type(*this) *= factor;
+ }
+
+ self_type operator/(const value_type factor) const {
+ return self_type(*this) /= factor;
+ }
+
+ vector_type m[2];
+};
+
+class vdfloat3x3 {
+public:
+ enum zero_type { zero };
+ enum identity_type { identity };
+ enum rotation_x_type { rotation_x };
+ enum rotation_y_type { rotation_y };
+ enum rotation_z_type { rotation_z };
+
+ typedef float value_type;
+ typedef vdfloat3 vector_type;
+ typedef vdfloat3c vector_ctor_type;
+ typedef vdfloat3x3 self_type;
+
+ vdfloat3x3() {}
+ vdfloat3x3(zero_type) { m[0] = m[1] = m[2] = vector_ctor_type(0, 0, 0); }
+ vdfloat3x3(identity_type) {
+ m[0].set(1, 0, 0);
+ m[1].set(0, 1, 0);
+ m[2].set(0, 0, 1);
+ }
+ vdfloat3x3(rotation_x_type, value_type angle) {
+ const value_type s(sin(angle));
+ const value_type c(cos(angle));
+
+ m[0].set( 1, 0, 0);
+ m[1].set( 0, c,-s);
+ m[2].set( 0, s, c);
+ }
+
+ vdfloat3x3(rotation_y_type, value_type angle) {
+ const value_type s(sin(angle));
+ const value_type c(cos(angle));
+
+ m[0].set( c, 0, s);
+ m[1].set( 0, 1, 0);
+ m[2].set(-s, 0, c);
+ }
+ vdfloat3x3(rotation_z_type, value_type angle) {
+ const value_type s(sin(angle));
+ const value_type c(cos(angle));
+
+ m[0].set( c,-s, 0);
+ m[1].set( s, c, 0);
+ m[2].set( 0, 0, 1);
+ }
+
+ vector_type& operator[](int k) { return m[k]; }
+ const vector_type& operator[](int k) const { return m[k]; }
+
+ self_type operator*(const self_type& v) const {
+ self_type result;
+
+#define DO(i,j) result.m[i].v[j] = m[i].v[0]*v.m[0].v[j] + m[i].v[1]*v.m[1].v[j] + m[i].v[2]*v.m[2].v[j]
+ DO(0,0);
+ DO(0,1);
+ DO(0,2);
+ DO(1,0);
+ DO(1,1);
+ DO(1,2);
+ DO(2,0);
+ DO(2,1);
+ DO(2,2);
+#undef DO
+
+ return result;
+ }
+
+ vector_type operator*(const vector_type& r) const {
+ return vector_ctor_type(
+ m[0].v[0]*r.v[0] + m[0].v[1]*r.v[1] + m[0].v[2]*r.v[2],
+ m[1].v[0]*r.v[0] + m[1].v[1]*r.v[1] + m[1].v[2]*r.v[2],
+ m[2].v[0]*r.v[0] + m[2].v[1]*r.v[1] + m[2].v[2]*r.v[2]);
+ }
+
+ self_type transpose() const {
+ self_type res;
+
+ res.m[0].v[0] = m[0].v[0];
+ res.m[0].v[1] = m[1].v[0];
+ res.m[0].v[2] = m[2].v[0];
+ res.m[1].v[0] = m[0].v[1];
+ res.m[1].v[1] = m[1].v[1];
+ res.m[1].v[2] = m[2].v[1];
+ res.m[2].v[0] = m[0].v[2];
+ res.m[2].v[1] = m[1].v[2];
+ res.m[2].v[2] = m[2].v[2];
+
+ return res;
+ }
+
+ self_type adjunct() const {
+ using namespace nsVDMath;
+
+ self_type res;
+
+ res.m[0] = cross(m[1], m[2]);
+ res.m[1] = cross(m[2], m[0]);
+ res.m[2] = cross(m[0], m[1]);
+
+ return res.transpose();
+ }
+
+ value_type det() const {
+ return + m[0].v[0] * m[1].v[1] * m[2].v[2]
+ + m[1].v[0] * m[2].v[1] * m[0].v[2]
+ + m[2].v[0] * m[0].v[1] * m[1].v[2]
+ - m[0].v[0] * m[2].v[1] * m[1].v[2]
+ - m[1].v[0] * m[0].v[1] * m[2].v[2]
+ - m[2].v[0] * m[1].v[1] * m[0].v[2];
+ }
+
+ self_type operator~() const {
+ return adjunct() / det();
+ }
+
+ self_type& operator*=(const value_type factor) {
+ m[0] *= factor;
+ m[1] *= factor;
+ m[2] *= factor;
+
+ return *this;
+ }
+
+ self_type& operator/=(const value_type factor) {
+ return operator*=(value_type(1)/factor);
+ }
+
+ self_type operator*(const value_type factor) const {
+ return self_type(*this) *= factor;
+ }
+
+ self_type operator/(const value_type factor) const {
+ return self_type(*this) /= factor;
+ }
+
+ vector_type m[3];
+};
+
+class vdfloat4x4 {
+public:
+ enum zero_type { zero };
+ enum identity_type { identity };
+ enum rotation_x_type { rotation_x };
+ enum rotation_y_type { rotation_y };
+ enum rotation_z_type { rotation_z };
+
+ typedef float value_type;
+ typedef vdfloat4 vector_type;
+ typedef vdfloat4c vector_ctor_type;
+
+ vdfloat4x4() {}
+ vdfloat4x4(const vdfloat3x3& v) {
+ m[0].set(v.m[0].x, v.m[0].y, v.m[0].z, 0.0f);
+ m[1].set(v.m[1].x, v.m[1].y, v.m[1].z, 0.0f);
+ m[2].set(v.m[2].x, v.m[2].y, v.m[2].z, 0.0f);
+ m[3].set(0, 0, 0, 1);
+ }
+
+ vdfloat4x4(zero_type) {
+ m[0].setzero();
+ m[1].setzero();
+ m[2].setzero();
+ m[3].setzero();
+ }
+
+ vdfloat4x4(identity_type) {
+ m[0].set(1, 0, 0, 0);
+ m[1].set(0, 1, 0, 0);
+ m[2].set(0, 0, 1, 0);
+ m[3].set(0, 0, 0, 1);
+ }
+ vdfloat4x4(rotation_x_type, value_type angle) {
+ const value_type s(sin(angle));
+ const value_type c(cos(angle));
+
+ m[0].set( 1, 0, 0, 0);
+ m[1].set( 0, c,-s, 0);
+ m[2].set( 0, s, c, 0);
+ m[3].set( 0, 0, 0, 1);
+ }
+ vdfloat4x4(rotation_y_type, value_type angle) {
+ const value_type s(sin(angle));
+ const value_type c(cos(angle));
+
+ m[0].set( c, 0, s, 0);
+ m[1].set( 0, 1, 0, 0);
+ m[2].set(-s, 0, c, 0);
+ m[3].set( 0, 0, 0, 1);
+ }
+ vdfloat4x4(rotation_z_type, value_type angle) {
+ const value_type s(sin(angle));
+ const value_type c(cos(angle));
+
+ m[0].set( c,-s, 0, 0);
+ m[1].set( s, c, 0, 0);
+ m[2].set( 0, 0, 1, 0);
+ m[3].set( 0, 0, 0, 1);
+ }
+
+ const value_type *data() const { return &m[0][0]; }
+
+ vector_type& operator[](int n) { return m[n]; }
+ const vector_type& operator[](int n) const { return m[n]; }
+
+ vdfloat4x4 operator*(const vdfloat4x4& v) const {
+ vdfloat4x4 result;
+
+#define DO(i,j) result.m[i].v[j] = m[i].v[0]*v.m[0].v[j] + m[i].v[1]*v.m[1].v[j] + m[i].v[2]*v.m[2].v[j] + m[i].v[3]*v.m[3].v[j]
+ DO(0,0);
+ DO(0,1);
+ DO(0,2);
+ DO(0,3);
+ DO(1,0);
+ DO(1,1);
+ DO(1,2);
+ DO(1,3);
+ DO(2,0);
+ DO(2,1);
+ DO(2,2);
+ DO(2,3);
+ DO(3,0);
+ DO(3,1);
+ DO(3,2);
+ DO(3,3);
+#undef DO
+
+ return result;
+ }
+
+ vdfloat4x4& operator*=(const vdfloat4x4& v) {
+ return operator=(operator*(v));
+ }
+
+ vector_type operator*(const vdfloat3& r) const {
+ return vector_ctor_type(
+ m[0].v[0]*r.v[0] + m[0].v[1]*r.v[1] + m[0].v[2]*r.v[2] + m[0].v[3],
+ m[1].v[0]*r.v[0] + m[1].v[1]*r.v[1] + m[1].v[2]*r.v[2] + m[1].v[3],
+ m[2].v[0]*r.v[0] + m[2].v[1]*r.v[1] + m[2].v[2]*r.v[2] + m[2].v[3],
+ m[3].v[0]*r.v[0] + m[3].v[1]*r.v[1] + m[3].v[2]*r.v[2] + m[3].v[3]);
+ }
+
+ vector_type operator*(const vector_type& r) const {
+ return vector_ctor_type(
+ m[0].v[0]*r.v[0] + m[0].v[1]*r.v[1] + m[0].v[2]*r.v[2] + m[0].v[3]*r.v[3],
+ m[1].v[0]*r.v[0] + m[1].v[1]*r.v[1] + m[1].v[2]*r.v[2] + m[1].v[3]*r.v[3],
+ m[2].v[0]*r.v[0] + m[2].v[1]*r.v[1] + m[2].v[2]*r.v[2] + m[2].v[3]*r.v[3],
+ m[3].v[0]*r.v[0] + m[3].v[1]*r.v[1] + m[3].v[2]*r.v[2] + m[3].v[3]*r.v[3]);
+ }
+
+ vector_type m[4];
+};
+
+template<class T>
+struct VDSize {
+ typedef T value_type;
+
+ int w, h;
+
+ VDSize() {}
+ VDSize(int _w, int _h) : w(_w), h(_h) {}
+
+ bool operator==(const VDSize& s) const { return w==s.w && h==s.h; }
+ bool operator!=(const VDSize& s) const { return w!=s.w || h!=s.h; }
+
+ VDSize& operator+=(const VDSize& s) {
+ w += s.w;
+ h += s.h;
+ return *this;
+ }
+
+ T area() const { return w*h; }
+
+ void include(const VDSize& s) {
+ if (w < s.w)
+ w = s.w;
+ if (h < s.h)
+ h = s.h;
+ }
+};
+
+template<class T>
+class VDRect {
+public:
+ typedef T value_type;
+
+ VDRect();
+ VDRect(T left_, T top_, T right_, T bottom_);
+
+ bool empty() const;
+ bool valid() const;
+
+ void clear();
+ void invalidate();
+ void set(T l, T t, T r, T b);
+
+ void add(T x, T y);
+ void add(const VDRect& r);
+ void translate(T x, T y);
+ void scale(T x, T y);
+ void transform(T scaleX, T scaleY, T offsetX, T offsety);
+
+ bool operator==(const VDRect& r) const;
+ bool operator!=(const VDRect& r) const;
+
+ T width() const;
+ T height() const;
+ T area() const;
+ VDSize<T> size() const;
+
+public:
+ T left, top, right, bottom;
+};
+
+template<class T>
+VDRect<T>::VDRect() {
+}
+
+template<class T>
+VDRect<T>::VDRect(T left_, T top_, T right_, T bottom_)
+ : left(left_)
+ , top(top_)
+ , right(right_)
+ , bottom(bottom_)
+{
+}
+
+template<class T>
+bool VDRect<T>::empty() const {
+ return left >= right || top >= bottom;
+}
+
+template<class T>
+bool VDRect<T>::valid() const {
+ return left <= right;
+}
+
+template<class T>
+void VDRect<T>::clear() {
+ left = top = right = bottom = 0;
+}
+
+template<class T>
+void VDRect<T>::invalidate() {
+ left = top = (std::numeric_limits<T>::max)();
+ right = bottom = std::numeric_limits<T>::is_signed ? -(std::numeric_limits<T>::max)() : T(0);
+}
+
+template<class T>
+void VDRect<T>::set(T l, T t, T r, T b) {
+ left = l;
+ top = t;
+ right = r;
+ bottom = b;
+}
+
+template<class T>
+void VDRect<T>::add(T x, T y) {
+ if (left > x)
+ left = x;
+ if (top > y)
+ top = y;
+ if (right < x)
+ right = x;
+ if (bottom < y)
+ bottom = y;
+}
+
+template<class T>
+void VDRect<T>::add(const VDRect& src) {
+ if (left > src.left)
+ left = src.left;
+ if (top > src.top)
+ top = src.top;
+ if (right < src.right)
+ right = src.right;
+ if (bottom < src.bottom)
+ bottom = src.bottom;
+}
+
+template<class T>
+void VDRect<T>::translate(T x, T y) {
+ left += x;
+ top += y;
+ right += x;
+ bottom += y;
+}
+
+template<class T>
+void VDRect<T>::scale(T x, T y) {
+ left *= x;
+ top *= y;
+ right *= x;
+ bottom *= y;
+}
+
+template<class T>
+void VDRect<T>::transform(T scaleX, T scaleY, T offsetX, T offsetY) {
+ left = left * scaleX + offsetX;
+ top = top * scaleY + offsetY;
+ right = right * scaleX + offsetX;
+ bottom = bottom * scaleY + offsetY;
+}
+
+template<class T>
+bool VDRect<T>::operator==(const VDRect& r) const { return left==r.left && top==r.top && right==r.right && bottom==r.bottom; }
+
+template<class T>
+bool VDRect<T>::operator!=(const VDRect& r) const { return left!=r.left || top!=r.top || right!=r.right || bottom!=r.bottom; }
+
+template<class T>
+T VDRect<T>::width() const { return right-left; }
+
+template<class T>
+T VDRect<T>::height() const { return bottom-top; }
+
+template<class T>
+T VDRect<T>::area() const { return (right-left)*(bottom-top); }
+
+template<class T>
+VDSize<T> VDRect<T>::size() const { return VDSize<T>(right-left, bottom-top); }
+
+///////////////////////////////////////////////////////////////////////////////
+typedef VDSize<sint32> vdsize32;
+typedef VDSize<float> vdsize32f;
+typedef VDRect<sint32> vdrect32;
+typedef VDRect<float> vdrect32f;
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/vectors_float.h b/src/thirdparty/VirtualDub/h/vd2/system/vectors_float.h
new file mode 100644
index 000000000..3be7fb4ac
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/vectors_float.h
@@ -0,0 +1,207 @@
+class vdfloat2 {
+public:
+ typedef vdfloat2 self_type;
+ typedef float value_type;
+
+ void set(float x2, float y2) { x=x2; y=y2; }
+
+ float& operator[](int k) { return v[k]; }
+ const float& operator[](int k) const { return v[k]; }
+
+ float lensq() const { return x*x + y*y; }
+
+ self_type operator-() const { self_type a = {-x, -y}; return a; }
+
+ self_type operator+(const self_type& r) const { self_type a = {x+r.x, y+r.y}; return a; }
+ self_type operator-(const self_type& r) const { self_type a = {x-r.x, y-r.y}; return a; }
+
+ self_type& operator+=(const self_type& r) { x+=r.x; y+=r.y; return *this; }
+ self_type& operator-=(const self_type& r) { x-=r.x; y-=r.y; return *this; }
+
+ self_type operator*(const float s) const { self_type a = {x*s, x*s}; return a; }
+ self_type& operator*=(const float s) { x*=s; y*=s; return *this; }
+
+ self_type operator/(const float s) const { const float inv(float(1)/s); self_type a = {x*inv, y*inv}; return a; }
+ self_type& operator/=(const float s) { const float inv(float(1)/s); x*=inv; y*=inv; return *this; }
+
+ self_type operator*(const self_type& r) const { self_type a = {x*r.x, y*r.y}; return a; }
+ self_type& operator*=(const self_type& r) { x*=r.x; y*=r.y; return *this; }
+
+ self_type operator/(const self_type& r) const { self_type a = {x/r.x, y/r.y}; return a; }
+ self_type& operator/=(const self_type& r) { x/=r.x; y/=r.y; return *this; }
+
+ union {
+ struct {
+ float x;
+ float y;
+ };
+ float v[2];
+ };
+};
+
+VDFORCEINLINE vdfloat2 operator*(const float s, const vdfloat2& v) { return v*s; }
+
+///////////////////////////////////////////////////////////////////////////
+
+class vdfloat3 {
+public:
+ typedef vdfloat3 self_type;
+ typedef float value_type;
+
+ void set(float x2, float y2, float z2) { x=x2; y=y2; z=z2; }
+
+ float& operator[](int k) { return v[k]; }
+ const float& operator[](int k) const { return v[k]; }
+
+ float lensq() const { return x*x + y*y + z*z; }
+
+ vdfloat2 project() const { const float inv(float(1)/z); const vdfloat2 a = {x*inv, y*inv}; return a; }
+ vdfloat2 as2d() const { const vdfloat2 a = {x, y}; return a; }
+
+ self_type operator-() const { const self_type a = {-x, -y, -z}; return a; }
+
+ self_type operator+(const self_type& r) const { const self_type a = {x+r.x, y+r.y, z+r.z}; return a; }
+ self_type operator-(const self_type& r) const { const self_type a = {x-r.x, y-r.y, z-r.z}; return a; }
+
+ self_type& operator+=(const self_type& r) { x+=r.x; y+=r.y; z+=r.z; return *this; }
+ self_type& operator-=(const self_type& r) { x-=r.x; y-=r.y; z-=r.z; return *this; }
+
+ self_type operator*(const float s) const { const self_type a = {x*s, y*s, z*s}; return a; }
+ self_type& operator*=(const float s) { x*=s; y*=s; z*=s; return *this; }
+
+ self_type operator/(const float s) const { const float inv(float(1)/s); const self_type a = {x*inv, y*inv, z*inv}; return a; }
+ self_type& operator/=(const float s) { const float inv(float(1)/s); x*=inv; y*=inv; z*=inv; return *this; }
+
+ self_type operator*(const self_type& r) const { self_type a = {x*r.x, y*r.y, z*r.z}; return a; }
+ self_type& operator*=(const self_type& r) { x*=r.x; y*=r.y; z*=r.z; return *this; }
+
+ self_type operator/(const self_type& r) const { self_type a = {x/r.x, y/r.y, z/r.z}; return a; }
+ self_type& operator/=(const self_type& r) { x/=r.x; y/=r.y; z/=r.z; return *this; }
+
+ union {
+ struct {
+ float x;
+ float y;
+ float z;
+ };
+ float v[3];
+ };
+};
+
+VDFORCEINLINE vdfloat3 operator*(const float s, const vdfloat3& v) { return v*s; }
+
+///////////////////////////////////////////////////////////////////////////
+
+class vdfloat4 {
+public:
+ typedef vdfloat4 self_type;
+ typedef float value_type;
+
+ void setzero() { x=y=z=w = 0; }
+ void set(float x2, float y2, float z2, float w2) { x=x2; y=y2; z=z2; w=w2; }
+
+ float& operator[](int i) { return v[i]; }
+ const float& operator[](int i) const { return v[i]; }
+
+ float lensq() const { return x*x + y*y + z*z + w*w; }
+
+ vdfloat3 project() const { const float inv(float(1)/w); const vdfloat3 a = {x*inv, y*inv, z*inv}; return a; }
+
+ self_type operator-() const { const self_type a = {-x, -y, -z, -w}; return a; }
+
+ self_type operator+(const self_type& r) const { const self_type a = {x+r.x, y+r.y, z+r.z, w+r.w}; return a; }
+ self_type operator-(const self_type& r) const { const self_type a = {x-r.x, y-r.y, z-r.z, w-r.w}; return a; }
+
+ self_type& operator+=(const self_type& r) { x+=r.x; y+=r.y; z+=r.z; w+=r.w; return *this; }
+ self_type& operator-=(const self_type& r) { x-=r.x; y-=r.y; z-=r.z; w-=r.w; return *this; }
+
+ self_type operator*(const float factor) const { const self_type a = {x*factor, y*factor, z*factor, w*factor}; return a; }
+ self_type operator/(const float factor) const { const float inv(float(1) / factor); const self_type a = {x*inv, y*inv, z*inv, w*inv}; return a; }
+
+ self_type& operator*=(const float factor) { x *= factor; y *= factor; z *= factor; w *= factor; return *this; }
+ self_type& operator/=(const float factor) { const float inv(float(1) / factor); x *= inv; y *= inv; z *= inv; w *= inv; return *this; }
+
+ self_type operator*(const self_type& r) const { self_type a = {x*r.x, y*r.y, z*r.z, w*r.w}; return a; }
+ self_type& operator*=(const self_type& r) { x*=r.x; y*=r.y; z*=r.z; w*=r.w; return *this; }
+
+ self_type operator/(const self_type& r) const { self_type a = {x/r.x, y/r.y, z/r.z, w*r.w}; return a; }
+ self_type& operator/=(const self_type& r) { x/=r.x; y/=r.y; z/=r.z; w/=r.w; return *this; }
+
+ union {
+ struct {
+ float x;
+ float y;
+ float z;
+ float w;
+ };
+ float v[4];
+ };
+};
+
+VDFORCEINLINE vdfloat4 operator*(const float s, const vdfloat4& v) { return v*s; }
+
+///////////////////////////////////////////////////////////////////////////
+
+class vdfloat2c : public vdfloat2 {
+public:
+ VDFORCEINLINE vdfloat2c(float x2, float y2) {x=x2; y=y2;}
+ VDFORCEINLINE vdfloat2c(const float src[2]) {x=src[0]; y=src[1];}
+};
+
+class vdfloat3c : public vdfloat3 {
+public:
+ VDFORCEINLINE vdfloat3c(float x2, float y2, float z2) { x=x2; y=y2; z=z2; }
+ VDFORCEINLINE vdfloat3c(const float src[3]) { x=src[0]; y=src[1]; z=src[2]; }
+};
+
+class vdfloat4c : public vdfloat4 {
+public:
+ VDFORCEINLINE vdfloat4c(float x2, float y2, float z2, float w2) { x=x2; y=y2; z=z2; w=w2; }
+ VDFORCEINLINE vdfloat4c(const float src[4]) { x=src[0]; y=src[1]; z=src[2]; w=src[3]; }
+};
+
+
+///////////////////////////////////////////////////////////////////////////
+
+namespace nsVDMath {
+ VDFORCEINLINE float length(const vdfloat2& a) {
+ return sqrtf(a.x*a.x + a.y*a.y);
+ }
+
+ VDFORCEINLINE float length(const vdfloat3& a) {
+ return sqrtf(a.x*a.x + a.y*a.y + a.z*a.z);
+ }
+
+ VDFORCEINLINE float length(const vdfloat4& a) {
+ return sqrtf(a.x*a.x + a.y*a.y + a.z*a.z + a.w*a.w);
+ }
+
+ VDFORCEINLINE vdfloat2 normalize(const vdfloat2& a) {
+ return a / length(a);
+ }
+
+ VDFORCEINLINE vdfloat3 normalize(const vdfloat3& a) {
+ return a / length(a);
+ }
+
+ VDFORCEINLINE vdfloat4 normalize(const vdfloat4& a) {
+ return a / length(a);
+ }
+
+ VDFORCEINLINE float dot(const vdfloat2& a, const vdfloat2& b) {
+ return a.x*b.x + a.y*b.y;
+ }
+
+ VDFORCEINLINE float dot(const vdfloat3& a, const vdfloat3& b) {
+ return a.x*b.x + a.y*b.y + a.z*b.z;
+ }
+
+ VDFORCEINLINE float dot(const vdfloat4& a, const vdfloat4& b) {
+ return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
+ }
+
+ VDFORCEINLINE vdfloat3 cross(const vdfloat3& a, const vdfloat3& b) {
+ const vdfloat3 r = {a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x};
+ return r;
+ }
+};
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/vectors_int.h b/src/thirdparty/VirtualDub/h/vd2/system/vectors_int.h
new file mode 100644
index 000000000..78c796761
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/vectors_int.h
@@ -0,0 +1,183 @@
+class vdint2 {
+public:
+ typedef vdint2 self_type;
+ typedef int value_type;
+
+ void set(int x2, int y2) { x=x2; y=y2; }
+
+ int& operator[](int k) { return v[k]; }
+ const int& operator[](int k) const { return v[k]; }
+
+ int lensq() const { return x*x + y*y; }
+ int len() const { return (int)sqrtf((float)(x*x + y*y)); }
+ self_type normalized() const { return *this / len(); }
+
+ self_type operator-() const { const self_type a = {-x, -y}; return a; }
+
+ self_type operator+(const self_type& r) const { const self_type a = {x+r.x, y+r.y}; return a; }
+ self_type operator-(const self_type& r) const { const self_type a = {x-r.x, y-r.y}; return a; }
+
+ self_type& operator+=(const self_type& r) { x+=r.x; y+=r.y; return *this; }
+ self_type& operator-=(const self_type& r) { x-=r.x; y-=r.y; return *this; }
+
+ self_type operator*(const int s) const { const self_type a = {x*s, x*s}; return a; }
+ self_type& operator*=(const int s) { x*=s; y*=s; return *this; }
+
+ self_type operator/(const int s) const { const self_type a = {x/s, y/s}; return a; }
+ self_type& operator/=(const int s) { x/=s; y/=s; return *this; }
+
+ self_type operator*(const self_type& r) const { self_type a = {x*r.x, y*r.y}; return a; }
+ self_type& operator*=(const self_type& r) { x*=r.x; y*=r.y; return *this; }
+
+ self_type operator/(const self_type& r) const { self_type a = {x/r.x, y/r.y}; return a; }
+ self_type& operator/=(const self_type& r) { x/=r.x; y/=r.y; return *this; }
+
+ union {
+ struct {
+ int x;
+ int y;
+ };
+ int v[2];
+ };
+};
+
+VDFORCEINLINE vdint2 operator*(const int s, const vdint2& v) { return v*s; }
+
+///////////////////////////////////////////////////////////////////////////
+
+class vdint3 {
+public:
+ typedef vdint3 self_type;
+ typedef int value_type;
+
+ int& operator[](int k) { return v[k]; }
+ const int& operator[](int k) const { return v[k]; }
+
+ int lensq() const { return x*x + y*y + z*z; }
+ int len() const { return (int)sqrtf((float)(x*x + y*y + z*z)); }
+ self_type normalized() const { return *this / len(); }
+
+ vdint2 project() const { const int inv(int(1)/z); const vdint2 a = {x*inv, y*inv}; return a; }
+ vdint2 as2d() const { const vdint2 a = {x, y}; return a; }
+
+ self_type operator-() const { const self_type a = {-x, -y, -z}; return a; }
+
+ self_type operator+(const self_type& r) const { const self_type a = {x+r.x, y+r.y, z+r.z}; return a; }
+ self_type operator-(const self_type& r) const { const self_type a = {x-r.x, y-r.y, z-r.z}; return a; }
+
+ self_type& operator+=(const self_type& r) { x+=r.x; y+=r.y; z+=r.z; return *this; }
+ self_type& operator-=(const self_type& r) { x-=r.x; y-=r.y; z-=r.z; return *this; }
+
+ self_type operator*(const int s) const { const self_type a = {x*s, y*s, z*s}; return a; }
+ self_type& operator*=(const int s) { x*=s; y*=s; z*=s; return *this; }
+
+ self_type operator/(const int s) const { const self_type a = {x/s, y/s, z/s}; return a; }
+ self_type& operator/=(const int s) { x /= s; y /= s; z /= s; return *this; }
+
+ self_type operator*(const self_type& r) const { self_type a = {x*r.x, y*r.y, z*r.z}; return a; }
+ self_type& operator*=(const self_type& r) { x*=r.x; y*=r.y; z*=r.z; return *this; }
+
+ self_type operator/(const self_type& r) const { self_type a = {x/r.x, y/r.y, z/r.z}; return a; }
+ self_type& operator/=(const self_type& r) { x/=r.x; y/=r.y; z/=r.z; return *this; }
+
+ union {
+ struct {
+ int x;
+ int y;
+ int z;
+ };
+ int v[3];
+ };
+};
+
+VDFORCEINLINE vdint3 operator*(const int s, const vdint3& v) { return v*s; }
+
+///////////////////////////////////////////////////////////////////////////
+
+class vdint4 {
+public:
+ typedef vdint4 self_type;
+ typedef int value_type;
+
+ int& operator[](int i) { return v[i]; }
+ const int& operator[](int i) const { return v[i]; }
+
+ int lensq() const { return x*x + y*y + z*z + w*w; }
+ int len() const { return (int)sqrtf((float)(x*x + y*y + z*z + w*w)); }
+ self_type normalized() const { return *this / len(); }
+
+ vdint3 project() const { const int inv(int(1)/w); const vdint3 a = {x*inv, y*inv, z*inv}; return a; }
+
+ self_type operator-() const { const self_type a = {-x, -y, -z, -w}; return a; }
+
+ self_type operator+(const self_type& r) const { const self_type a = {x+r.x, y+r.y, z+r.z, w+r.w}; return a; }
+ self_type operator-(const self_type& r) const { const self_type a = {x-r.x, y-r.y, z-r.z, w-r.w}; return a; }
+
+ self_type& operator+=(const self_type& r) { x+=r.x; y+=r.y; z+=r.z; w+=r.w; return *this; }
+ self_type& operator-=(const self_type& r) { x-=r.x; y-=r.y; z-=r.z; w-=r.w; return *this; }
+
+ self_type operator*(const int factor) const { const self_type a = {x*factor, y*factor, z*factor, w*factor}; return a; }
+ self_type operator/(const int factor) const { const self_type a = {x/factor, y/factor, z/factor, w/factor}; return a; }
+
+ self_type& operator*=(const int factor) { x *= factor; y *= factor; z *= factor; w *= factor; return *this; }
+ self_type& operator/=(const int factor) { x /= factor; y /= factor; z /= factor; w /= factor; return *this; }
+
+ self_type operator*(const self_type& r) const { self_type a = {x*r.x, y*r.y, z*r.z, w*r.w}; return a; }
+ self_type& operator*=(const self_type& r) { x*=r.x; y*=r.y; z*=r.z; w*=r.w; return *this; }
+
+ self_type operator/(const self_type& r) const { self_type a = {x/r.x, y/r.y, z/r.z, w*r.w}; return a; }
+ self_type& operator/=(const self_type& r) { x/=r.x; y/=r.y; z/=r.z; w/=r.w; return *this; }
+
+ union {
+ struct {
+ int x;
+ int y;
+ int z;
+ int w;
+ };
+ int v[4];
+ };
+};
+
+VDFORCEINLINE vdint4 operator*(const int s, const vdint4& v) { return v*s; }
+
+///////////////////////////////////////////////////////////////////////////
+
+class vdint2c : vdint2 {
+public:
+ VDFORCEINLINE vdint2c(int x2, int y2) {x=x2; y=y2;}
+ VDFORCEINLINE vdint2c(const int src[2]) {x=src[0]; y=src[1];}
+};
+
+class vdint3c : vdint3 {
+public:
+ VDFORCEINLINE vdint3c(int x2, int y2, int z2) { x=x2; y=y2; z=z2; }
+ VDFORCEINLINE vdint3c(const int src[3]) { x=src[0]; y=src[1]; z=src[2]; }
+};
+
+class vdint4c : vdint4 {
+public:
+ VDFORCEINLINE vdint4c(int x2, int y2, int z2, int w2) { x=x2; y=y2; z=z2; w=w2; }
+ VDFORCEINLINE vdint4c(const int src[4]) { x=src[0]; y=src[1]; z=src[2]; w=src[3]; }
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+namespace nsVDMath {
+ VDFORCEINLINE int dot(const vdint2& a, const vdint2& b) {
+ return a.x*b.x + a.y*b.y;
+ }
+
+ VDFORCEINLINE int dot(const vdint3& a, const vdint3& b) {
+ return a.x*b.x + a.y*b.y + a.z*b.z;
+ }
+
+ VDFORCEINLINE int dot(const vdint4& a, const vdint4& b) {
+ return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
+ }
+
+ VDFORCEINLINE vdint3 cross(const vdint3& a, const vdint3& b) {
+ const vdint3 r = {a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x};
+ return r;
+ }
+};
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/w32assist.h b/src/thirdparty/VirtualDub/h/vd2/system/w32assist.h
new file mode 100644
index 000000000..e47e20f52
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/w32assist.h
@@ -0,0 +1,95 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_W32ASSIST_H
+#define f_VD2_SYSTEM_W32ASSIST_H
+
+#include <windows.h>
+
+#include <vd2/system/VDString.h>
+
+inline bool VDIsWindowsNT() {
+#ifdef _M_AMD64
+ return true;
+#else
+ static bool is_nt = !(GetVersion() & 0x80000000);
+
+ return is_nt;
+#endif
+}
+
+// useful constants missing from the Platform SDK
+
+enum {
+#ifdef _M_AMD64
+ MENUITEMINFO_SIZE_VERSION_400A = sizeof(MENUITEMINFOA),
+ MENUITEMINFO_SIZE_VERSION_400W = sizeof(MENUITEMINFOW)
+#else
+ MENUITEMINFO_SIZE_VERSION_400A = (offsetof(MENUITEMINFOA, cch) + sizeof(UINT)),
+ MENUITEMINFO_SIZE_VERSION_400W = (offsetof(MENUITEMINFOW, cch) + sizeof(UINT))
+#endif
+};
+
+// helper functions
+
+bool VDIsForegroundTaskW32();
+
+LPVOID VDConvertThreadToFiberW32(LPVOID parm);
+void VDSwitchToFiberW32(LPVOID fiber);
+
+int VDGetSizeOfBitmapHeaderW32(const BITMAPINFOHEADER *pHdr);
+void VDSetWindowTextW32(HWND hwnd, const wchar_t *s);
+void VDSetWindowTextFW32(HWND hwnd, const wchar_t *format, ...);
+VDStringW VDGetWindowTextW32(HWND hwnd);
+void VDAppendMenuW32(HMENU hmenu, UINT flags, UINT id, const wchar_t *text);
+void VDCheckMenuItemByCommandW32(HMENU hmenu, UINT cmd, bool checked);
+void VDCheckRadioMenuItemByCommandW32(HMENU hmenu, UINT cmd, bool checked);
+void VDEnableMenuItemByCommandW32(HMENU hmenu, UINT cmd, bool checked);
+VDStringW VDGetMenuItemTextByCommandW32(HMENU hmenu, UINT cmd);
+void VDSetMenuItemTextByCommandW32(HMENU hmenu, UINT cmd, const wchar_t *text);
+
+LRESULT VDDualCallWindowProcW32(WNDPROC wp, HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam);
+LRESULT VDDualDefWindowProcW32(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam);
+
+EXECUTION_STATE VDSetThreadExecutionStateW32(EXECUTION_STATE esFlags);
+
+bool VDSetFilePointerW32(HANDLE h, sint64 pos, DWORD dwMoveMethod);
+bool VDGetFileSizeW32(HANDLE h, sint64& size);
+
+#if !defined(_MSC_VER) || _MSC_VER < 1300
+ HMODULE VDGetLocalModuleHandleW32();
+#else
+ extern "C" IMAGE_DOS_HEADER __ImageBase;
+ inline HMODULE VDGetLocalModuleHandleW32() {
+ return (HINSTANCE)&__ImageBase;
+ }
+#endif
+
+bool VDDrawTextW32(HDC hdc, const wchar_t *s, int nCount, LPRECT lpRect, UINT uFormat);
+
+bool VDPatchModuleImportTableW32(HMODULE hmod, const char *srcModule, const char *name, void *pCompareValue, void *pNewValue, void *volatile *ppOldValue);
+bool VDPatchModuleExportTableW32(HMODULE hmod, const char *name, void *pCompareValue, void *pNewValue, void *volatile *ppOldValue);
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/win32/miniwindows.h b/src/thirdparty/VirtualDub/h/vd2/system/win32/miniwindows.h
new file mode 100644
index 000000000..be4ee5695
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/win32/miniwindows.h
@@ -0,0 +1,53 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_VD2_SYSTEM_WIN32_MINIWINDOWS_H
+#define f_VD2_SYSTEM_WIN32_MINIWINDOWS_H
+
+#define VDZCALLBACK __stdcall
+
+#ifndef _WIN64
+ typedef __w64 int VDZINT_PTR;
+ typedef __w64 unsigned VDZUINT_PTR;
+ typedef __w64 long VDZLONG_PTR;
+#else
+ typedef __int64 VDZINT_PTR;
+ typedef unsigned __int64 VDZUINT_PTR;
+ typedef __int64 VDZLONG_PTR;
+#endif
+
+typedef struct HWND__ *VDZHWND;
+typedef unsigned VDZUINT;
+typedef unsigned short VDZWORD;
+typedef unsigned long VDZDWORD;
+typedef VDZUINT_PTR VDZWPARAM;
+typedef VDZLONG_PTR VDZLPARAM;
+typedef VDZLONG_PTR VDZLRESULT;
+typedef struct HDROP__ *VDZHDROP;
+typedef struct HACCEL__ *VDZHACCEL;
+
+typedef VDZWORD VDZATOM;
+
+#endif
diff --git a/src/thirdparty/VirtualDub/h/vd2/system/zip.h b/src/thirdparty/VirtualDub/h/vd2/system/zip.h
new file mode 100644
index 000000000..06b864ccf
--- /dev/null
+++ b/src/thirdparty/VirtualDub/h/vd2/system/zip.h
@@ -0,0 +1,220 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#ifndef f_ZIP_H
+#define f_ZIP_H
+
+// Rest in peace, Phil Katz.
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/file.h>
+#include <vd2/system/file.h>
+#include <vd2/system/VDString.h>
+#include <string.h>
+#include <vector>
+
+class VDDeflateBitReader {
+public:
+ VDDeflateBitReader() : mpSrc(0), mBufferPt(0), accum(0), bits(0) {}
+
+ void init(IVDStream *pSrc, uint64 limit) {
+ mpSrc = pSrc;
+ mBytesLeft = limit;
+ refill();
+ consume(0);
+ }
+
+ IVDStream *stream() const {
+ return mpSrc;
+ }
+
+ unsigned long peek() const {
+ return accum;
+ }
+
+ bool consume(unsigned n) {
+// printf("%08lx/%d\n", accum << ((-bits)&7), bits);
+ bits -= n;
+
+ if ((int)bits < 0)
+ return false;
+
+ accum >>= n;
+
+ while(bits <= 24 && (mBufferPt || refill())) {
+ accum += mBuffer[kBufferSize + mBufferPt++] << bits;
+ bits += 8;
+ }
+
+ return true;
+ }
+
+ bool refill();
+
+ bool getbit() {
+ unsigned rv = accum;
+
+ consume(1);
+
+ return (rv&1) != 0;
+ }
+
+ unsigned getbits(unsigned n) {
+ unsigned rv = accum & ((1<<n)-1);
+
+ consume(n);
+
+ return rv;
+ }
+
+ bool empty() const {
+ return bits != 0;
+ }
+
+ unsigned avail() const {
+ return bits;
+ }
+
+ unsigned bitsleft() const {
+ return bits + (mBytesLeftLimited<<3);
+ }
+
+ unsigned bytesleft() const {
+ return (bits>>3) + mBytesLeftLimited;
+ }
+
+ void align() {
+ consume(bits&7);
+ }
+
+ void readbytes(void *dst, unsigned len);
+
+protected:
+ enum { kBigAvailThreshold = 16777216 };
+ enum { kBufferSize = 256 };
+
+ unsigned long accum;
+ unsigned bits;
+ int mBufferPt; // counts from -256 to 0
+ uint64 mBytesLeft;
+ unsigned mBytesLeftLimited;
+
+ IVDStream *mpSrc;
+ uint8 mBuffer[kBufferSize];
+};
+
+class VDCRCChecker {
+public:
+ enum {
+ kCRC32 = 0xEDB88320 // CRC-32 used by PKZIP, PNG (x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + 1)
+ };
+
+ VDCRCChecker() {}
+ VDCRCChecker(uint32 crc) { Init(crc); }
+
+ void Init(uint32 crc);
+ void Process(const void *src, sint32 len);
+
+ uint32 CRC() const { return ~mValue; }
+ uint32 CRC(uint32 crc, const void *src, sint32 len);
+
+protected:
+ uint32 mValue;
+ uint32 mTable[256];
+};
+
+class VDZipStream : public IVDStream {
+public:
+ VDZipStream();
+ VDZipStream(IVDStream *pSrc, uint64 limit, bool bStored);
+ ~VDZipStream();
+
+ void Init(IVDStream *pSrc, uint64 limit, bool bStored);
+ void EnableCRC(uint32 crc = VDCRCChecker::kCRC32) { mCRCChecker.Init(crc); mbCRCEnabled = true; }
+ uint32 CRC() { return mCRCChecker.CRC(); }
+
+ const wchar_t *GetNameForError();
+
+ sint64 Pos();
+ void Read(void *buffer, sint32 bytes);
+ sint32 ReadData(void *buffer, sint32 bytes);
+ void Write(const void *buffer, sint32 bytes);
+
+protected:
+ bool ParseBlockHeader();
+ bool Inflate();
+
+ VDDeflateBitReader mBits; // critical -- make this first!
+ uint32 mReadPt, mWritePt, mBufferLevel;
+
+ enum {
+ kNoBlock,
+ kStoredBlock,
+ kDeflatedBlock
+ } mBlockType;
+
+ uint32 mStoredBytesLeft;
+ bool mbNoMoreBlocks;
+ bool mbCRCEnabled;
+
+ sint64 mPos;
+ uint8 mBuffer[65536];
+
+ uint16 mCodeDecode[32768];
+ uint8 mCodeLengths[288 + 32];
+ uint16 mDistDecode[32768];
+
+ VDCRCChecker mCRCChecker;
+};
+
+class VDZipArchive {
+public:
+ struct FileInfo {
+ VDString mFileName;
+ uint32 mCompressedSize;
+ uint32 mUncompressedSize;
+ uint32 mCRC32;
+ bool mbPacked;
+ };
+
+ VDZipArchive();
+ ~VDZipArchive();
+
+ void Init(IVDRandomAccessStream *pSrc);
+
+ sint32 GetFileCount();
+ const FileInfo& GetFileInfo(sint32 idx);
+ IVDStream *OpenRawStream(sint32 idx);
+
+protected:
+ struct FileInfoInternal : public FileInfo {
+ uint32 mDataStart;
+ };
+
+ std::vector<FileInfoInternal> mDirectory;
+ IVDRandomAccessStream *mpStream;
+};
+
+#endif
diff --git a/src/thirdparty/VirtualDub/system/h/stdafx.h b/src/thirdparty/VirtualDub/system/h/stdafx.h
new file mode 100644
index 000000000..21373ed9f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/h/stdafx.h
@@ -0,0 +1,12 @@
+#define _WIN32_WINNT 0x0400
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/atomic.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/error.h>
+#include <windows.h>
+#include <process.h>
+#include <intrin.h>
+#include <string.h>
+#include <stdarg.h>
+#include <math.h>
+#include <ctype.h>
diff --git a/src/thirdparty/VirtualDub/system/source/Error.cpp b/src/thirdparty/VirtualDub/system/source/Error.cpp
new file mode 100644
index 000000000..727354c96
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/Error.cpp
@@ -0,0 +1,340 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <stdio.h>
+#include <stdarg.h>
+#include <crtdbg.h>
+#include <windows.h>
+#include <vfw.h>
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/Error.h>
+#include <vd2/system/log.h>
+
+MyError::MyError() {
+ buf = NULL;
+}
+
+MyError::MyError(const MyError& err) {
+ buf = _strdup(err.buf);
+}
+
+MyError::MyError(const char *f, ...)
+ : buf(NULL)
+{
+ va_list val;
+
+ va_start(val, f);
+ vsetf(f, val);
+ va_end(val);
+}
+
+MyError::~MyError() {
+ free(buf);
+}
+
+void MyError::clear() {
+ if (buf) // we do this check because debug free() always does a heapchk even if buf==NULL
+ free(buf);
+ buf = NULL;
+}
+
+void MyError::assign(const MyError& e) {
+ if (buf)
+ free(buf);
+ buf = _strdup(e.buf);
+}
+
+void MyError::assign(const char *s) {
+ if (buf)
+ free(buf);
+ buf = _strdup(s);
+}
+
+void MyError::setf(const char *f, ...) {
+ va_list val;
+
+ va_start(val, f);
+ vsetf(f,val);
+ va_end(val);
+}
+
+void MyError::vsetf(const char *f, va_list val) {
+ for(int size = 1024; size <= 32768; size += size) {
+ free(buf);
+ buf = NULL;
+
+ buf = (char *)malloc(size);
+ if (!buf)
+ return;
+
+ if ((unsigned)_vsnprintf(buf, size, f, val) < (unsigned)size)
+ return;
+ }
+
+ free(buf);
+ buf = NULL;
+}
+
+void MyError::post(HWND hWndParent, const char *title) const {
+ if (!buf || !*buf)
+ return;
+
+ VDDEBUG("*** %s: %s\n", title, buf);
+ VDLog(kVDLogError, VDswprintf(L"Error: %hs", 1, &buf));
+
+ MessageBox(hWndParent, buf, title, MB_OK | MB_ICONERROR | MB_SETFOREGROUND);
+}
+
+void MyError::discard() {
+ free(buf);
+ buf = NULL;
+}
+
+void MyError::swap(MyError& err) {
+ char *s = err.buf;
+ err.buf = buf;
+ buf = s;
+}
+
+void MyError::TransferFrom(MyError& err) {
+ if (buf)
+ free(buf);
+
+ buf = err.buf;
+ err.buf = NULL;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
+static const char *GetVCMErrorString(uint32 icErr) {
+ const char *err = "(unknown)";
+
+ // Does anyone have the *real* text strings for this?
+
+ switch(icErr) {
+ case ICERR_OK: err = "The operation completed successfully."; break; // sorry, couldn't resist....
+ case ICERR_UNSUPPORTED: err = "The operation is not supported."; break;
+ case ICERR_BADFORMAT: err = "The source image format is not acceptable."; break;
+ case ICERR_MEMORY: err = "Not enough memory."; break;
+ case ICERR_INTERNAL: err = "An internal error occurred."; break;
+ case ICERR_BADFLAGS: err = "An invalid flag was specified."; break;
+ case ICERR_BADPARAM: err = "An invalid parameter was specified."; break;
+ case ICERR_BADSIZE: err = "An invalid size was specified."; break;
+ case ICERR_BADHANDLE: err = "The handle is invalid."; break;
+ case ICERR_CANTUPDATE: err = "Cannot update the destination image."; break;
+ case ICERR_ABORT: err = "The operation was aborted by the user."; break;
+ case ICERR_ERROR: err = "An unknown error occurred (may be corrupt data)."; break;
+ case ICERR_BADBITDEPTH: err = "The source color depth is not acceptable."; break;
+ case ICERR_BADIMAGESIZE: err = "The source image size is not acceptable."; break;
+ default:
+ if (icErr <= ICERR_CUSTOM) err = "A codec-specific error occurred.";
+ break;
+ }
+
+ return err;
+}
+
+MyICError::MyICError(const char *s, uint32 icErr) {
+ setf("%s error: %s (error code %ld)", s, GetVCMErrorString(icErr), icErr);
+}
+
+MyICError::MyICError(uint32 icErr, const char *format, ...) {
+ char tmpbuf[1024];
+
+ va_list val;
+ va_start(val, format);
+ tmpbuf[(sizeof tmpbuf) - 1] = 0;
+ _vsnprintf(tmpbuf, (sizeof tmpbuf) - 1, format, val);
+ va_end(val);
+
+ setf(tmpbuf, GetVCMErrorString(icErr));
+}
+
+MyMMIOError::MyMMIOError(const char *s, uint32 mmioerr) {
+ const char *err = "(Unknown)";
+
+ switch(mmioerr) {
+ case MMIOERR_FILENOTFOUND: err = "file not found"; break;
+ case MMIOERR_OUTOFMEMORY: err = "out of memory"; break;
+ case MMIOERR_CANNOTOPEN: err = "couldn't open"; break;
+ case MMIOERR_CANNOTCLOSE: err = "couldn't close"; break;
+ case MMIOERR_CANNOTREAD: err = "couldn't read"; break;
+ case MMIOERR_CANNOTWRITE: err = "couldn't write"; break;
+ case MMIOERR_CANNOTSEEK: err = "couldn't seek"; break;
+ case MMIOERR_CANNOTEXPAND: err = "couldn't expand"; break;
+ case MMIOERR_CHUNKNOTFOUND: err = "chunk not found"; break;
+ case MMIOERR_UNBUFFERED: err = "unbuffered"; break;
+ case MMIOERR_PATHNOTFOUND: err = "path not found"; break;
+ case MMIOERR_ACCESSDENIED: err = "access denied"; break;
+ case MMIOERR_SHARINGVIOLATION: err = "sharing violation"; break;
+ case MMIOERR_NETWORKERROR: err = "network error"; break;
+ case MMIOERR_TOOMANYOPENFILES: err = "too many open files"; break;
+ case MMIOERR_INVALIDFILE: err = "invalid file"; break;
+ }
+
+ setf("%s error: %s (%ld)", s, err, mmioerr);
+}
+
+MyAVIError::MyAVIError(const char *s, uint32 avierr) {
+ const char *err = "(Unknown)";
+
+ switch(avierr) {
+ case AVIERR_UNSUPPORTED: err = "unsupported"; break;
+ case AVIERR_BADFORMAT: err = "bad format"; break;
+ case AVIERR_MEMORY: err = "out of memory"; break;
+ case AVIERR_INTERNAL: err = "internal error"; break;
+ case AVIERR_BADFLAGS: err = "bad flags"; break;
+ case AVIERR_BADPARAM: err = "bad parameters"; break;
+ case AVIERR_BADSIZE: err = "bad size"; break;
+ case AVIERR_BADHANDLE: err = "bad AVIFile handle"; break;
+ case AVIERR_FILEREAD: err = "file read error"; break;
+ case AVIERR_FILEWRITE: err = "file write error"; break;
+ case AVIERR_FILEOPEN: err = "file open error"; break;
+ case AVIERR_COMPRESSOR: err = "compressor error"; break;
+ case AVIERR_NOCOMPRESSOR: err = "compressor not available"; break;
+ case AVIERR_READONLY: err = "file marked read-only"; break;
+ case AVIERR_NODATA: err = "no data (?)"; break;
+ case AVIERR_BUFFERTOOSMALL: err = "buffer too small"; break;
+ case AVIERR_CANTCOMPRESS: err = "can't compress (?)"; break;
+ case AVIERR_USERABORT: err = "aborted by user"; break;
+ case AVIERR_ERROR: err = "error (?)"; break;
+ }
+
+ setf("%s error: %s (%08lx)", s, err, avierr);
+}
+
+MyMemoryError::MyMemoryError() {
+ setf("Out of memory");
+}
+
+MyWin32Error::MyWin32Error(const char *format, uint32 err, ...) {
+ char szError[1024];
+ char szTemp[1024];
+ va_list val;
+
+ va_start(val, err);
+ szError[(sizeof szError)-1] = 0;
+ _vsnprintf(szError, (sizeof szError)-1, format, val);
+ va_end(val);
+
+ // Determine the position of the last %s, and escape everything else. This doesn't
+ // track escaped % signs properly, but it works for the strings that we receive (and at
+ // worst just produces a funny message).
+ const char *keep = strstr(szError, "%s");
+ if (keep) {
+ for(;;) {
+ const char *test = strstr(keep + 1, "%s");
+
+ if (!test)
+ break;
+
+ keep = test;
+ }
+ }
+
+ char *t = szTemp;
+ char *end = szTemp + (sizeof szTemp) - 1;
+ const char *s = szError;
+
+ while(char c = *s++) {
+ if (c == '%') {
+ // We allow one %s to go through. Everything else gets escaped.
+ if (s-1 != keep) {
+ if (t >= end)
+ break;
+
+ *t++ = '%';
+ }
+ }
+
+ if (t >= end)
+ break;
+
+ *t++ = c;
+ }
+
+ *t = 0;
+
+ if (!FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
+ 0,
+ err,
+ MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+ szError,
+ sizeof szError,
+ NULL))
+ {
+ szError[0] = 0;
+ }
+
+ if (szError[0]) {
+ long l = strlen(szError);
+
+ if (l>1 && szError[l-2] == '\r')
+ szError[l-2] = 0;
+ else if (szError[l-1] == '\n')
+ szError[l-1] = 0;
+ }
+
+ setf(szTemp, szError);
+}
+
+MyCrashError::MyCrashError(const char *format, uint32 dwExceptionCode) {
+ const char *s = "(Unknown Exception)";
+
+ switch(dwExceptionCode) {
+ case EXCEPTION_ACCESS_VIOLATION:
+ s = "Access Violation";
+ break;
+ case EXCEPTION_PRIV_INSTRUCTION:
+ s = "Privileged Instruction";
+ break;
+ case EXCEPTION_INT_DIVIDE_BY_ZERO:
+ s = "Integer Divide By Zero";
+ break;
+ case EXCEPTION_BREAKPOINT:
+ s = "User Breakpoint";
+ break;
+ }
+
+ setf(format, s);
+}
+
+MyUserAbortError::MyUserAbortError() {
+ buf = _strdup("");
+}
+
+MyInternalError::MyInternalError(const char *format, ...) {
+ char buf[1024];
+ va_list val;
+
+ va_start(val, format);
+ _vsnprintf(buf, (sizeof buf) - 1, format, val);
+ buf[1023] = 0;
+ va_end(val);
+
+ setf("Internal error: %s", buf);
+}
diff --git a/src/thirdparty/VirtualDub/system/source/Fraction.cpp b/src/thirdparty/VirtualDub/system/source/Fraction.cpp
new file mode 100644
index 000000000..ab6693d01
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/Fraction.cpp
@@ -0,0 +1,327 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2006 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <math.h>
+
+#include <vd2/system/fraction.h>
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/math.h>
+
+VDFraction::VDFraction(double d) {
+ int xp;
+ double mant = frexp(d, &xp);
+
+ if (xp >= 33) {
+ hi = 0xFFFFFFFF;
+ lo = 1;
+ } else if (xp < -31) {
+ hi = 0;
+ lo = 1;
+ } else if (xp >= 0) {
+ *this = reduce((uint64)(0.5 + ldexp(mant, 62)), 1i64<<(62-xp));
+ } else {
+ // This is not quite accurate for very tiny numbers.
+ VDFraction t(1.0 / d);
+ lo = t.hi;
+ hi = t.lo;
+ }
+}
+
+VDFraction VDFraction::reduce(uint64 hi, uint64 lo) {
+
+ // Check for undefined.
+
+ if (!lo)
+ return VDFraction(0,0);
+
+ // Check for zero.
+
+ if (!hi) {
+ return VDFraction(0,1);
+ }
+
+ // Check for infinity.
+
+ if (!((uint64)lo>>32) && (uint64)hi > ((uint64)lo<<32)-lo)
+ return VDFraction(0xFFFFFFFFUL, 1);
+
+ // Algorithm from Wikipedia, Continued Fractions:
+ uint64 n0 = 0;
+ uint64 d0 = 1;
+ uint32 n1 = 1;
+ uint32 d1 = 0;
+ uint64 fp = 0;
+
+ uint32 n_best;
+ uint32 d_best;
+
+ for(;;) {
+ uint64 a = hi/lo; // next continued fraction term
+ uint64 f = hi%lo; // remainder
+
+ uint64 n2 = n0 + n1*a; // next convergent numerator
+ uint64 d2 = d0 + d1*a; // next convergent denominator
+
+ uint32 n_overflow = (uint32)(n2 >> 32);
+ uint32 d_overflow = (uint32)(d2 >> 32);
+
+ if (n_overflow | d_overflow) {
+ uint64 a2 = a;
+
+ // reduce last component until numerator and denominator are within range
+ if (n_overflow)
+ a2 = (0xFFFFFFFF - n0) / n1;
+
+ if (d_overflow) {
+ uint64 a3 = (0xFFFFFFFF - d0) / d1;
+ if (a2 > a3)
+ a2 = a3;
+ }
+
+ // check if new term is better
+ // 1/2a_k admissibility test
+ if (a2*2 < a || (a2*2 == a && d0*fp <= f*d1))
+ return VDFraction((uint32)n_best, (uint32)d_best);
+
+ return VDFraction((uint32)(n0 + n1*a2), (uint32)(d0 + d1*a2));
+ }
+
+ n_best = (uint32)n2;
+ d_best = (uint32)d2;
+
+ // if fraction is exact, we're done.
+ if (!f)
+ return VDFraction((uint32)n_best, (uint32)d_best);
+
+ n0 = n1;
+ n1 = (uint32)n2;
+ d0 = d1;
+ d1 = (uint32)d2;
+ fp = f;
+
+ hi = lo;
+ lo = f;
+ }
+}
+
+// a (cond) b
+// a-b (cond) 0
+// aH*bL - aL*bh (cond) 0
+// aH*bL (cond) aL*bH
+
+bool VDFraction::operator==(VDFraction b) const {
+ return (uint64)hi * b.lo == (uint64)lo * b.hi;
+}
+
+bool VDFraction::operator!=(VDFraction b) const {
+ return (uint64)hi * b.lo != (uint64)lo * b.hi;
+}
+
+bool VDFraction::operator< (VDFraction b) const {
+ return (uint64)hi * b.lo < (uint64)lo * b.hi;
+}
+
+bool VDFraction::operator<=(VDFraction b) const {
+ return (uint64)hi * b.lo <= (uint64)lo * b.hi;
+}
+
+bool VDFraction::operator> (VDFraction b) const {
+ return (uint64)hi * b.lo > (uint64)lo * b.hi;
+}
+
+bool VDFraction::operator>=(VDFraction b) const {
+ return (uint64)hi * b.lo >= (uint64)lo * b.hi;
+}
+
+VDFraction VDFraction::operator*(VDFraction b) const {
+ return reduce((uint64)hi * b.hi, (uint64)lo * b.lo);
+}
+
+VDFraction VDFraction::operator/(VDFraction b) const {
+ return reduce((uint64)hi * b.lo, (uint64)lo * b.hi);
+}
+
+VDFraction VDFraction::operator*(unsigned long b) const {
+ return reduce((uint64)hi * b, lo);
+}
+
+VDFraction VDFraction::operator/(unsigned long b) const {
+ return reduce(hi, (uint64)lo * b);
+}
+
+VDFraction& VDFraction::operator*=(VDFraction b) {
+ return *this = reduce((uint64)hi * b.hi, (uint64)lo * b.lo);
+}
+
+VDFraction& VDFraction::operator/=(VDFraction b) {
+ return *this = reduce((uint64)hi * b.lo, (uint64)lo * b.hi);
+}
+
+VDFraction& VDFraction::operator*=(unsigned long b) {
+ return *this = reduce((uint64)hi * b, lo);
+}
+
+VDFraction& VDFraction::operator/=(unsigned long b) {
+ return *this = reduce(hi, (uint64)lo * b);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+sint64 VDFraction::scale64t(sint64 v) const {
+ uint32 r;
+ return v<0 ? -VDFractionScale64(-v, hi, lo, r) : VDFractionScale64(v, hi, lo, r);
+}
+
+sint64 VDFraction::scale64u(sint64 v) const {
+ uint32 r;
+ if (v<0) {
+ v = -VDFractionScale64(-v, hi, lo, r);
+ return v;
+ } else {
+ v = +VDFractionScale64(+v, hi, lo, r);
+ return v + (r > 0);
+ }
+}
+
+sint64 VDFraction::scale64r(sint64 v) const {
+ uint32 r;
+ if (v<0) {
+ v = -VDFractionScale64(-v, hi, lo, r);
+ return v - (r >= (lo>>1) + (lo&1));
+ } else {
+ v = +VDFractionScale64(+v, hi, lo, r);
+ return v + (r >= (lo>>1) + (lo&1));
+ }
+}
+
+sint64 VDFraction::scale64it(sint64 v) const {
+ uint32 r;
+ return v<0 ? -VDFractionScale64(-v, lo, hi, r) : +VDFractionScale64(+v, lo, hi, r);
+}
+
+sint64 VDFraction::scale64ir(sint64 v) const {
+ uint32 r;
+ if (v<0) {
+ v = -VDFractionScale64(-v, lo, hi, r);
+ return v - (r >= (hi>>1) + (hi&1));
+ } else {
+ v = +VDFractionScale64(+v, lo, hi, r);
+ return v + (r >= (hi>>1) + (hi&1));
+ }
+}
+
+sint64 VDFraction::scale64iu(sint64 v) const {
+ uint32 r;
+ if (v<0) {
+ v = -VDFractionScale64(-v, lo, hi, r);
+ return v;
+ } else {
+ v = +VDFractionScale64(+v, lo, hi, r);
+ return v + (r > 0);
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+double VDFraction::asDouble() const {
+ return (double)hi / (double)lo;
+}
+
+double VDFraction::AsInverseDouble() const {
+ return (double)lo / (double)hi;
+}
+
+unsigned long VDFraction::roundup32ul() const {
+ return (hi + (lo-1)) / lo;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+bool VDFraction::Parse(const char *s) {
+ char c;
+
+ // skip whitespace
+ while((c = *s) && (c == ' ' || c == '\t'))
+ ++s;
+
+ // accumulate integer digits
+ uint64 x = 0;
+ uint64 y = 1;
+
+ while(c = *s) {
+ uint32 offset = (uint32)c - '0';
+
+ if (offset >= 10)
+ break;
+
+ x = (x * 10) + offset;
+
+ // check for overflow
+ if (x >> 32)
+ return false;
+
+ ++s;
+ }
+
+ if (c == '.') {
+ ++s;
+
+ while(c = *s) {
+ uint32 offset = (uint32)c - '0';
+
+ if (offset >= 10)
+ break;
+
+ if (x >= 100000000000000000 ||
+ y >= 100000000000000000) {
+ if (offset >= 5)
+ ++x;
+ while((c = *s) && (unsigned)(c - '0') < 10)
+ ++s;
+ break;
+ }
+
+ x = (x * 10) + offset;
+ y *= 10;
+ ++s;
+ }
+ }
+
+ while(c == ' ' || c == '\t')
+ c = *++s;
+
+ // check for trailing garbage
+ if (c)
+ return false;
+
+ // check for overflow
+ if (!(y >> 32) && ((uint64)(uint32)y << 32) <= x)
+ return false;
+
+ // reduce fraction and return success
+ *this = reduce(x, y);
+ return true;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/VDNamespace.cpp b/src/thirdparty/VirtualDub/system/source/VDNamespace.cpp
new file mode 100644
index 000000000..8ba706fa3
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/VDNamespace.cpp
@@ -0,0 +1,254 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <string.h>
+#include <ctype.h>
+#include <crtdbg.h>
+
+#include <vd2/system/list.h>
+#include <vd2/system/VDNamespace.h>
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Group
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDNamespaceGroup::VDNamespaceGroup(const char *_pszName, VDNamespaceGroup *parent)
+: VDNamespaceNode(namedup(_pszName),parent)
+{
+ const char *t = strchr(_pszName,'/');
+
+ if (t) {
+
+ } else
+ strcpy((char *)pszName, _pszName);
+}
+
+VDNamespaceGroup::~VDNamespaceGroup() {
+ delete[] (char *)pszName;
+}
+
+const char *VDNamespaceGroup::namedup(const char *s) {
+ const char *t = strchr(s,'/');
+ char *mem;
+
+ if (t) {
+ mem = new char[(t-s)+1];
+
+ memcpy(mem, s, (t-s));
+ mem[t-s] = 0;
+
+ return mem;
+ } else {
+ mem = new char[strlen(s)+1];
+
+ return strcpy(mem, s);
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Item
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDNamespaceItem::VDNamespaceItem(const char *_pszName, VDNamespaceGroup *parent, const void *src)
+: VDNamespaceNode(_pszName,parent), object(src)
+{}
+
+VDNamespaceItem::~VDNamespaceItem() {}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VDNamespace
+//
+///////////////////////////////////////////////////////////////////////////
+
+bool VDNamespaceCompare(const char *psz1, const char *psz2) {
+ char c, d;
+
+ while((!!(c=toupper(*psz1++)) & !!(d=toupper(*psz2++))) && c!='/' && d!='/' && c==d)
+ ;
+
+ if (c=='/') c=0;
+ if (d=='/') d=0;
+
+ return c==d;
+}
+
+VDNamespace::VDNamespace() : root("", NULL) {
+}
+
+VDNamespace::~VDNamespace() {
+}
+
+VDNamespaceGroup *VDNamespace::_lookupGroup(const char *pszName, bool fCreate, bool fIsFilter) {
+ const char *pszNameLimit = pszName;
+ const char *slash = NULL;
+ VDNamespaceGroup *pGroup = &root, *pGroupNext;
+
+ while(*pszNameLimit) {
+ if (*pszNameLimit++ == '/')
+ slash = pszNameLimit - 1;
+ }
+
+ if (fIsFilter)
+ pszNameLimit = slash;
+
+ while(pszName < pszNameLimit) {
+ VDNamespaceGroup *pGroupParent = pGroup;
+
+ pGroup = pGroup->listGroups.AtHead();
+
+ while(pGroupNext = pGroup->NextFromHead()) {
+ if (VDNamespaceCompare(pszName, pGroup->pszName))
+ break;
+
+ pGroup = pGroupNext;
+ }
+
+ if (!pGroupNext && fCreate) {
+ pGroupNext = pGroup = new VDNamespaceGroup(pszName, pGroupParent);
+
+ pGroupParent->listGroups.AddTail(pGroup);
+ }
+
+ // group not found?
+
+ if (!pGroupNext) {
+ return NULL;
+ }
+
+ // advance to next slash
+
+ while(*pszName && *pszName++!='/')
+ ;
+ }
+
+ return pGroup;
+}
+
+void VDNamespace::clear() {
+ root.listGroups.dispose();
+ root.listItems.dispose();
+}
+
+void VDNamespace::add(const char *pszGroup, const char *pszName, const void *pDef) {
+ VDNamespaceGroup *pGroup = _lookupGroup(pszGroup, true, false);
+
+ pGroup->listItems.AddTail(new VDNamespaceItem(pszName, pGroup, pDef));
+}
+
+const void *VDNamespace::lookup(const char *pszName) {
+ VDNamespaceGroup *pGroup = _lookupGroup(pszName, false, true);
+
+ if (!pGroup)
+ return NULL;
+
+ const char *pszNameBase = pszName;
+
+ while(*pszName++)
+ if (pszName[-1]=='/')
+ pszNameBase = pszName;
+
+ for(ListAlloc<VDNamespaceItem >::fwit it = pGroup->listItems.begin(); it; ++it)
+ if (!_stricmp(it->pszName, pszNameBase))
+ return it->object;
+
+ return NULL;
+}
+
+bool VDNamespace::enumerateGroups(const VDNamespaceGroup *pGroupRoot, tGroupEnumerator pEnum, void *pvData) {
+ VDNamespaceGroup *pGroup, *pGroupNext;
+
+ pGroup = (pGroupRoot ? pGroupRoot : &root)->listGroups.AtHead();
+ while(pGroupNext = pGroup->NextFromHead()) {
+ if (!pEnum(this, pGroup->pszName, pGroup, pvData))
+ return false;
+
+ pGroup = pGroupNext;
+ }
+
+ return true;
+}
+
+bool VDNamespace::enumerateItems(const VDNamespaceGroup *pGroupRoot, tItemEnumerator pEnum, void *pvData) {
+ VDNamespaceItem *pEntry, *pEntryNext;
+
+ pEntry = pGroupRoot->listItems.AtHead();
+ while(pEntryNext = pEntry->NextFromHead()) {
+ if (!pEnum(this, pEntry->pszName, pEntry->object, pvData))
+ return false;
+
+ pEntry = pEntryNext;
+ }
+
+ return true;
+}
+
+VDNamespaceItem *VDNamespace::_findItemByObject(const VDNamespaceGroup *pGroup, const void *pObj) {
+ for(ListAlloc<VDNamespaceItem>::fwit it=pGroup->listItems.begin(); it; ++it) {
+ if (it->object == pObj) {
+ return it;
+ }
+ }
+
+ for(ListAlloc<VDNamespaceGroup>::fwit it2=pGroup->listGroups.begin(); it2; ++it2) {
+ VDNamespaceItem *v;
+
+ if (v = _findItemByObject(it2, pObj))
+ return v;
+ }
+
+ return NULL;
+}
+
+bool VDNamespace::_getPathByItem(const VDNamespaceNode *pEntry, char *buf, int maxlen) {
+ if (!pEntry)
+ return false;
+
+ if (maxlen < (int)strlen(pEntry->pszName)+2)
+ return false;
+
+ if (pEntry->pParent && pEntry->pParent->pParent) {
+ if (!_getPathByItem(pEntry->pParent, buf, maxlen))
+ return false;
+
+ while(*buf)
+ ++buf, --maxlen;
+
+ *buf++ = '/';
+ }
+
+ strcpy(buf, pEntry->pszName);
+
+ return true;
+}
+
+bool VDNamespace::getPathByItem(const void *pObj, char *buf, int maxlen) {
+ return _getPathByItem(_findItemByObject(&root, pObj), buf, maxlen);
+}
diff --git a/src/thirdparty/VirtualDub/system/source/VDScheduler.cpp b/src/thirdparty/VirtualDub/system/source/VDScheduler.cpp
new file mode 100644
index 000000000..cdfc97269
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/VDScheduler.cpp
@@ -0,0 +1,261 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/VDScheduler.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/error.h>
+#include <windows.h>
+
+VDScheduler::VDScheduler()
+ : mpErrorCB(NULL)
+ , pWakeupSignal(NULL)
+ , pParentSchedulerNode(NULL)
+ , mbExitThreads(false)
+{
+}
+
+VDScheduler::~VDScheduler() {
+}
+
+void VDScheduler::setSignal(VDSignal *pSignal) {
+ pWakeupSignal = pSignal;
+}
+
+void VDScheduler::setSchedulerNode(VDSchedulerNode *pSchedulerNode) {
+ pParentSchedulerNode = pSchedulerNode;
+}
+
+void VDScheduler::BeginShutdown() {
+ mbExitThreads = true;
+ Ping();
+}
+
+void VDScheduler::Repost(VDSchedulerNode *pNode, bool bReschedule) {
+ vdsynchronized(csScheduler) {
+ if (pNode->bCondemned) {
+ tSuspendList::iterator it(listSuspends.begin()), itEnd(listSuspends.end());
+
+ while(it!=itEnd) {
+ VDSchedulerSuspendNode *pSuspendNode = *it;
+
+ if (pSuspendNode->mpNode == pNode) {
+ it = listSuspends.erase(it);
+ pSuspendNode->mSignal.signal();
+ } else
+ ++it;
+ }
+ } else {
+ pNode->bRunning = false;
+ if (bReschedule || pNode->bReschedule) {
+ pNode->bReschedule = false;
+ pNode->bReady = true;
+ listReady.push_back(pNode);
+ } else
+ listWaiting.push_back(pNode);
+ }
+ }
+}
+
+bool VDScheduler::Run() {
+ VDSchedulerNode *pNode = NULL;
+ vdsynchronized(csScheduler) {
+ if (!listReady.empty()) {
+ pNode = listReady.front();
+ listReady.pop_front();
+ pNode->bRunning = true;
+ pNode->bReady = false;
+ }
+ }
+
+ if (!pNode)
+ return false;
+
+ bool bReschedule;
+ try {
+ bReschedule = pNode->Service();
+ } catch(MyError& e) {
+ Repost(pNode, false);
+
+ vdsynchronized(csScheduler) {
+ if (mpErrorCB) {
+ if (!mpErrorCB->OnAsyncError(e))
+ throw;
+ }
+ }
+
+ return true;
+ } catch(...) {
+ Repost(pNode, false);
+ throw;
+ }
+
+ Repost(pNode, bReschedule);
+
+ return true;
+}
+
+bool VDScheduler::IdleWait() {
+ if (mbExitThreads)
+ return false;
+
+ if (pWakeupSignal) {
+#if 0
+ while(WAIT_TIMEOUT == WaitForSingleObject(pWakeupSignal->getHandle(), 1000))
+ DumpStatus();
+#else
+ pWakeupSignal->wait();
+#endif
+ }
+
+ return true;
+}
+
+void VDScheduler::Ping() {
+ if (pWakeupSignal)
+ pWakeupSignal->signal();
+}
+
+void VDScheduler::Lock() {
+ ++csScheduler;
+}
+
+void VDScheduler::Unlock() {
+ --csScheduler;
+}
+
+void VDScheduler::Reschedule(VDSchedulerNode *pNode) {
+ VDCriticalSection::AutoLock lock(csScheduler);
+
+ RescheduleFast(pNode);
+}
+
+void VDScheduler::RescheduleFast(VDSchedulerNode *pNode) {
+ if (pNode->bReady)
+ return;
+
+ pNode->bReady = true;
+
+ if (pNode->bRunning)
+ pNode->bReschedule = true;
+ else {
+ if (pWakeupSignal)
+ pWakeupSignal->signal();
+
+ if (pParentSchedulerNode)
+ pParentSchedulerNode->Reschedule();
+
+ listWaiting.erase(pNode);
+ listReady.push_back(pNode);
+ }
+}
+
+void VDScheduler::Add(VDSchedulerNode *pNode) {
+ VDASSERT(pNode);
+
+ pNode->pScheduler = this;
+ pNode->bRunning = false;
+ pNode->bReschedule = false;
+ pNode->bReady = true;
+ pNode->bCondemned = false;
+
+ vdsynchronized(csScheduler) {
+ tNodeList::iterator it(listReady.begin()), itEnd(listReady.end());
+
+ while(it != itEnd && (*it)->nPriority <= pNode->nPriority)
+ ++it;
+
+ listReady.insert(it, pNode);
+ }
+
+ if (pWakeupSignal)
+ pWakeupSignal->signal();
+
+ if (pParentSchedulerNode)
+ pParentSchedulerNode->Reschedule();
+}
+
+void VDScheduler::Remove(VDSchedulerNode *pNode) {
+ VDASSERT(pNode);
+
+ VDSchedulerSuspendNode suspendNode(pNode);
+ bool running = false;
+
+ vdsynchronized(csScheduler) {
+ pNode->bCondemned = true;
+ if (pNode->bRunning) {
+ running = true;
+ listSuspends.push_back(&suspendNode);
+ } else if (pNode->bReady)
+ listReady.erase(pNode);
+ else
+ listWaiting.erase(pNode);
+ }
+
+ if (running)
+ suspendNode.mSignal.wait();
+}
+
+void VDScheduler::DumpStatus() {
+ vdsynchronized(csScheduler) {
+ VDDEBUG2("\n Waiting nodes:\n");
+ for(tNodeList::iterator it(listWaiting.begin()), itEnd(listWaiting.end()); it!=itEnd; ++it)
+ (*it)->DumpStatus();
+ VDDEBUG2("\n Ready nodes:\n");
+ for(tNodeList::iterator it2(listReady.begin()), it2End(listReady.end()); it2!=it2End; ++it2)
+ (*it2)->DumpStatus();
+ }
+}
+
+void VDSchedulerNode::DumpStatus() {
+ VDDEBUG2(" anonymous %p\n", this);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDSchedulerThread::VDSchedulerThread()
+ : mpScheduler(NULL)
+{
+}
+
+VDSchedulerThread::~VDSchedulerThread() {
+}
+
+bool VDSchedulerThread::Start(VDScheduler *pScheduler) {
+ mpScheduler = pScheduler;
+ return VDThread::ThreadStart();
+}
+
+void VDSchedulerThread::ThreadRun() {
+ VDScheduler& scheduler = *mpScheduler;
+
+ do {
+ while(scheduler.Run())
+ ;
+ } while(scheduler.IdleWait());
+
+ scheduler.Ping();
+}
diff --git a/src/thirdparty/VirtualDub/system/source/VDString.cpp b/src/thirdparty/VirtualDub/system/source/VDString.cpp
new file mode 100644
index 000000000..5877fadb5
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/VDString.cpp
@@ -0,0 +1,209 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vd2/system/VDString.h>
+#include <vd2/system/vdstl.h>
+
+const VDStringSpanA::value_type VDStringSpanA::sNull[1] = {0};
+
+void VDStringA::push_back_extend() {
+ VDASSERT(mpEOS == mpEnd);
+ size_type current_size = (size_type)(mpEnd - mpBegin);
+
+ reserve_slow(current_size * 2 + 1, current_size);
+}
+
+void VDStringA::resize_slow(size_type n, size_type current_size) {
+ resize_slow(n, current_size, 0);
+}
+
+void VDStringA::resize_slow(size_type n, size_type current_size, value_type c) {
+ VDASSERT(n > current_size);
+
+ size_type current_capacity = (size_type)(mpEOS - mpBegin);
+ if (n > current_capacity)
+ reserve_slow(n, current_capacity);
+
+ memset(mpBegin + current_size, c, n - current_size);
+ mpEnd = mpBegin + n;
+ *mpEnd = 0;
+}
+
+void VDStringA::reserve_slow(size_type n, size_type current_capacity) {
+ VDASSERT(n > current_capacity);
+
+ size_type current_size = (size_type)(mpEnd - mpBegin);
+ value_type *s = new value_type[n + 1];
+ memcpy(s, mpBegin, (current_size + 1) * sizeof(value_type));
+ if (mpBegin != sNull)
+ delete[] mpBegin;
+
+ mpBegin = s;
+ mpEnd = s + current_size;
+ mpEOS = s + n;
+}
+
+void VDStringA::reserve_amortized_slow(size_type n, size_type current_size, size_type current_capacity) {
+ n += current_size;
+
+ size_type doublesize = current_size * 2;
+ if (n < doublesize)
+ n = doublesize;
+
+ reserve_slow(n, current_capacity);
+}
+
+VDStringA& VDStringA::sprintf(const value_type *format, ...) {
+ clear();
+ va_list val;
+ va_start(val, format);
+ append_vsprintf(format, val);
+ va_end(val);
+ return *this;
+}
+
+VDStringA& VDStringA::append_sprintf(const value_type *format, ...) {
+ va_list val;
+ va_start(val, format);
+ append_vsprintf(format, val);
+ va_end(val);
+ return *this;
+}
+
+VDStringA& VDStringA::append_vsprintf(const value_type *format, va_list val) {
+ char buf[2048];
+
+ int len = _vsnprintf(buf, 2048, format, val);
+ if (len >= 0)
+ append(buf, buf+len);
+ else {
+ int len;
+
+ vdfastvector<char> tmp;
+ for(int siz = 8192; siz <= 65536; siz += siz) {
+ tmp.resize(siz);
+
+ char *tmpp = tmp.data();
+ len = _vsnprintf(tmp.data(), siz, format, val);
+ if (len >= 0) {
+ append(tmpp, tmpp+len);
+ break;
+ }
+ }
+ }
+
+ return *this;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+const VDStringSpanW::value_type VDStringSpanW::sNull[1] = {0};
+
+void VDStringW::push_back_extend() {
+ VDASSERT(mpEOS == mpEnd);
+ size_type current_size = (size_type)(mpEnd - mpBegin);
+
+ reserve_slow(current_size * 2 + 1, current_size);
+}
+
+void VDStringW::resize_slow(size_type n, size_type current_size) {
+ VDASSERT(n > current_size);
+
+ size_type current_capacity = (size_type)(mpEOS - mpBegin);
+ if (n > current_capacity)
+ reserve_slow(n, current_capacity);
+
+ mpEnd = mpBegin + n;
+ *mpEnd = 0;
+}
+
+void VDStringW::reserve_slow(size_type n, size_type current_capacity) {
+ VDASSERT(current_capacity == (size_type)(mpEOS - mpBegin));
+ VDASSERT(n > current_capacity);
+
+ size_type current_size = (size_type)(mpEnd - mpBegin);
+ value_type *s = new value_type[n + 1];
+ memcpy(s, mpBegin, (current_size + 1) * sizeof(value_type));
+ if (mpBegin != sNull)
+ delete[] mpBegin;
+
+ mpBegin = s;
+ mpEnd = s + current_size;
+ mpEOS = s + n;
+}
+
+void VDStringW::reserve_amortized_slow(size_type n, size_type current_size, size_type current_capacity) {
+ n += current_size;
+
+ size_type doublesize = current_size * 2;
+ if (n < doublesize)
+ n = doublesize;
+
+ reserve_slow(n, current_capacity);
+}
+
+VDStringW& VDStringW::sprintf(const value_type *format, ...) {
+ clear();
+ va_list val;
+ va_start(val, format);
+ append_vsprintf(format, val);
+ va_end(val);
+ return *this;
+}
+
+VDStringW& VDStringW::append_sprintf(const value_type *format, ...) {
+ va_list val;
+ va_start(val, format);
+ append_vsprintf(format, val);
+ va_end(val);
+ return *this;
+}
+
+VDStringW& VDStringW::append_vsprintf(const value_type *format, va_list val) {
+ wchar_t buf[1024];
+
+ int len = vswprintf(buf, 1024, format, val);
+ if (len >= 0)
+ append(buf, buf+len);
+ else {
+ int len;
+
+ vdfastvector<wchar_t> tmp;
+ for(int siz = 4096; siz <= 65536; siz += siz) {
+ tmp.resize(siz);
+
+ wchar_t *tmpp = tmp.data();
+ len = vswprintf(tmpp, siz, format, val);
+ if (len >= 0) {
+ append(tmpp, tmpp+len);
+ break;
+ }
+ }
+ }
+
+ va_end(val);
+ return *this;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/a64_fraction.asm b/src/thirdparty/VirtualDub/system/source/a64_fraction.asm
new file mode 100644
index 000000000..43b0baddf
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/a64_fraction.asm
@@ -0,0 +1,58 @@
+; VirtualDub - Video processing and capture application
+; System library component
+; Copyright (C) 1998-2006 Avery Lee, All Rights Reserved.
+;
+; Beginning with 1.6.0, the VirtualDub system library is licensed
+; differently than the remainder of VirtualDub. This particular file is
+; thus licensed as follows (the "zlib" license):
+;
+; This software is provided 'as-is', without any express or implied
+; warranty. In no event will the authors be held liable for any
+; damages arising from the use of this software.
+;
+; Permission is granted to anyone to use this software for any purpose,
+; including commercial applications, and to alter it and redistribute it
+; freely, subject to the following restrictions:
+;
+; 1. The origin of this software must not be misrepresented; you must
+; not claim that you wrote the original software. If you use this
+; software in a product, an acknowledgment in the product
+; documentation would be appreciated but is not required.
+; 2. Altered source versions must be plainly marked as such, and must
+; not be misrepresented as being the original software.
+; 3. This notice may not be removed or altered from any source
+; distribution.
+
+ segment .text
+
+;--------------------------------------------------------------------------
+; VDFractionScale64(
+; [rcx] uint64 a,
+; [rdx] uint64 b,
+; [r8] uint64 c,
+; [r9] uint32& remainder);
+;
+;
+ global VDFractionScale64
+VDFractionScale64:
+ mov rax, rcx
+ mul rdx
+ div r8
+ mov [r9], edx
+ ret
+
+;--------------------------------------------------------------------------
+; VDUMulDiv64x32(
+; [rcx] uint64 a,
+; [rdx] uint64 b,
+; [r8] uint64 c);
+;
+;
+ global VDUMulDiv64x32
+VDUMulDiv64x32:
+ mov rax, rcx
+ mul rdx
+ div r8
+ ret
+
+ end
diff --git a/src/thirdparty/VirtualDub/system/source/a64_int128.asm b/src/thirdparty/VirtualDub/system/source/a64_int128.asm
new file mode 100644
index 000000000..706e298f6
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/a64_int128.asm
@@ -0,0 +1,73 @@
+; VirtualDub - Video processing and capture application
+; System library component
+; Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+;
+; Beginning with 1.6.0, the VirtualDub system library is licensed
+; differently than the remainder of VirtualDub. This particular file is
+; thus licensed as follows (the "zlib" license):
+;
+; This software is provided 'as-is', without any express or implied
+; warranty. In no event will the authors be held liable for any
+; damages arising from the use of this software.
+;
+; Permission is granted to anyone to use this software for any purpose,
+; including commercial applications, and to alter it and redistribute it
+; freely, subject to the following restrictions:
+;
+; 1. The origin of this software must not be misrepresented; you must
+; not claim that you wrote the original software. If you use this
+; software in a product, an acknowledgment in the product
+; documentation would be appreciated but is not required.
+; 2. Altered source versions must be plainly marked as such, and must
+; not be misrepresented as being the original software.
+; 3. This notice may not be removed or altered from any source
+; distribution.
+
+ segment .text
+
+ global vdasm_uint128_add
+vdasm_uint128_add:
+ mov rax, [rdx]
+ add rax, [r8]
+ mov [rcx], rax
+ mov rax, [rdx+8]
+ adc rax, [r8+8]
+ mov [rcx+8], rax
+ ret
+
+ global vdasm_uint128_sub
+vdasm_uint128_sub:
+ mov rax, [rdx]
+ sub rax, [r8]
+ mov [rcx], rax
+ mov rax, [rdx+8]
+ sbb rax, [r8+8]
+ mov [rcx+8], rax
+ ret
+
+proc_frame vdasm_uint128_mul
+ mov [esp+8], rbx
+ [savereg rbx, 8]
+ mov [esp+16], rsi
+ [savereg rsi, 16]
+end_prolog
+
+ mov rbx, rdx ;rbx = src1
+ mov rax, [rdx] ;rax = src1a
+ mov rsi, [r8] ;rsi = src2a
+ mul rsi ;rdx:rax = src1a*src2a
+ mov [rcx], rax ;write low result
+ mov r9, rdx ;r9 = (src1a*src2a).hi
+ mov rax, [rbx+8] ;rax = src1b
+ mul rsi ;rdx:rax = src1b*src2a
+ add r9, rax ;r9 = (src1a*src2a).hi + (src1b*src2a).lo
+ mov rax, [rbx] ;rax = src1a
+ mul qword [r8+8] ;rdx:rax = src1a*src2b
+ add rax, r9 ;rax = (src1a*src2b).lo + (src1b*src2a).lo + (src1a*src2a).hi
+ mov [rcx+8], rax ;write high result
+ mov rsi, [esp+16]
+ mov rbx, [esp+8]
+ ret
+endproc_frame
+
+ end
diff --git a/src/thirdparty/VirtualDub/system/source/a64_thunk.asm b/src/thirdparty/VirtualDub/system/source/a64_thunk.asm
new file mode 100644
index 000000000..b9e09e1e8
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/a64_thunk.asm
@@ -0,0 +1,58 @@
+ segment .text
+
+ global VDMethodToFunctionThunk64
+proc_frame VDMethodToFunctionThunk64
+ ;prolog
+ db 48h ;emit REX prefix -- first instruction must be two bytes for hot patching
+ push rbp
+ [pushreg rbp]
+
+ mov rbp, rsp ;create stack pointer
+ [setframe rbp, 0]
+
+ mov [rbp+16], rcx ;save arg1
+ [savereg rcx, 0]
+
+ mov [rbp+24], rdx ;save arg2
+ [savereg rcx, 8]
+
+ mov [rbp+32], r8 ;save arg3
+ [savereg rcx, 16]
+
+ mov [rbp+40], r9 ;save arg4
+ [savereg rcx, 24]
+
+end_prolog
+
+ ;re-copy arguments 4 and up
+ mov ecx, [rax+24]
+ or ecx, ecx
+ jz .argsdone
+ lea rdx, [rcx+48-8]
+.argsloop:
+ push qword [rsp+rdx]
+ sub ecx, 8
+ jnz .argsloop
+.argsdone:
+
+ ;load 'this' pointer
+ mov rcx, [rax+16]
+
+ ;reload arguments 1-3
+ mov rdx, [rbp+16]
+ mov r8, [rbp+24]
+ mov r9, [rbp+32]
+
+ ;reserve argument 1-4 space on stack
+ sub rsp, 32
+
+ ;call function
+ call qword [rax+8]
+
+ ;epilog
+ lea rsp, [rbp] ;pop off stack frame and any additional arg space
+ pop rbp ;restore base pointer
+ ret ;all done
+endproc_frame
+
+ end
diff --git a/src/thirdparty/VirtualDub/system/source/a_memory.asm b/src/thirdparty/VirtualDub/system/source/a_memory.asm
new file mode 100644
index 000000000..e4b6cac8b
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/a_memory.asm
@@ -0,0 +1,135 @@
+; VirtualDub - Video processing and capture application
+; System library component
+; Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+;
+; Beginning with 1.6.0, the VirtualDub system library is licensed
+; differently than the remainder of VirtualDub. This particular file is
+; thus licensed as follows (the "zlib" license):
+;
+; This software is provided 'as-is', without any express or implied
+; warranty. In no event will the authors be held liable for any
+; damages arising from the use of this software.
+;
+; Permission is granted to anyone to use this software for any purpose,
+; including commercial applications, and to alter it and redistribute it
+; freely, subject to the following restrictions:
+;
+; 1. The origin of this software must not be misrepresented; you must
+; not claim that you wrote the original software. If you use this
+; software in a product, an acknowledgment in the product
+; documentation would be appreciated but is not required.
+; 2. Altered source versions must be plainly marked as such, and must
+; not be misrepresented as being the original software.
+; 3. This notice may not be removed or altered from any source
+; distribution.
+
+ segment .text
+
+ global _VDFastMemcpyPartialScalarAligned8
+_VDFastMemcpyPartialScalarAligned8:
+ mov eax, [esp+12]
+ mov edx, [esp+4]
+ mov ecx, [esp+8]
+ add ecx, eax
+ add edx, eax
+ neg eax
+ jz .nobytes
+ add eax, 8
+ jz .doodd
+ jmp short .xloop
+ align 16
+.xloop:
+ fild qword [ecx+eax-8]
+ fild qword [ecx+eax]
+ fxch
+ fistp qword [edx+eax-8]
+ fistp qword [edx+eax]
+ add eax,16
+ jnc .xloop
+ jnz .nobytes
+.doodd:
+ fild qword [ecx-8]
+ fistp qword [edx-8]
+.nobytes:
+ ret
+
+ global _VDFastMemcpyPartialMMX
+_VDFastMemcpyPartialMMX:
+ push edi
+ push esi
+
+ mov edi, [esp+4+8]
+ mov esi, [esp+8+8]
+ mov ecx, [esp+12+8]
+ mov edx, ecx
+ shr ecx, 2
+ and edx, 3
+ rep movsd
+ mov ecx, edx
+ rep movsb
+ pop esi
+ pop edi
+ ret
+
+ global _VDFastMemcpyPartialMMX2
+_VDFastMemcpyPartialMMX2:
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ mov ebx, [esp+4+16]
+ mov edx, [esp+8+16]
+ mov eax, [esp+12+16]
+ neg eax
+ add eax, 63
+ jbe .skipblastloop
+.blastloop:
+ movq mm0, [edx]
+ movq mm1, [edx+8]
+ movq mm2, [edx+16]
+ movq mm3, [edx+24]
+ movq mm4, [edx+32]
+ movq mm5, [edx+40]
+ movq mm6, [edx+48]
+ movq mm7, [edx+56]
+ movntq [ebx], mm0
+ movntq [ebx+8], mm1
+ movntq [ebx+16], mm2
+ movntq [ebx+24], mm3
+ movntq [ebx+32], mm4
+ movntq [ebx+40], mm5
+ movntq [ebx+48], mm6
+ movntq [ebx+56], mm7
+ add ebx, 64
+ add edx, 64
+ add eax, 64
+ jnc .blastloop
+.skipblastloop:
+ sub eax, 63-7
+ jns .noextras
+.quadloop:
+ movq mm0, [edx]
+ movntq [ebx], mm0
+ add edx, 8
+ add ebx, 8
+ add eax, 8
+ jnc .quadloop
+.noextras:
+ sub eax, 7
+ jz .nooddballs
+ mov ecx, eax
+ neg ecx
+ mov esi, edx
+ mov edi, ebx
+ rep movsb
+.nooddballs:
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+
+ end
+
diff --git a/src/thirdparty/VirtualDub/system/source/a_thunk.asm b/src/thirdparty/VirtualDub/system/source/a_thunk.asm
new file mode 100644
index 000000000..5dcdecbbe
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/a_thunk.asm
@@ -0,0 +1,63 @@
+ segment .text
+
+ align 16
+ global _VDMethodToFunctionThunk32
+_VDMethodToFunctionThunk32:
+ pop eax ;get return address in thunk
+
+ ;re-copy arguments
+ movzx ecx, byte [eax+1]
+ mov edx, ecx
+argsloop:
+ push dword [esp+edx]
+ sub ecx, 4
+ jnz argsloop
+
+ push eax ;replace thunk return address
+
+ mov ecx, [eax+7] ;load 'this' pointer
+ jmp dword [eax+3] ;tail-call function
+
+ align 16
+ global _VDMethodToFunctionThunk32_4
+_VDMethodToFunctionThunk32_4:
+ pop eax ;get return address in thunk
+ push dword [esp+4] ;replicate 1st argument
+ push eax ;replace thunk return address
+ mov ecx, [eax+7] ;load 'this' pointer
+ jmp dword [eax+3] ;tail-call function
+
+ align 16
+ global _VDMethodToFunctionThunk32_8
+_VDMethodToFunctionThunk32_8:
+ pop eax ;get return address in thunk
+ push dword [esp+8] ;replicate 2nd argument
+ push dword [esp+8] ;replicate 1st argument
+ push eax ;replace thunk return address
+ mov ecx, [eax+7] ;load 'this' pointer
+ jmp dword [eax+3] ;tail-call function
+
+ align 16
+ global _VDMethodToFunctionThunk32_12
+_VDMethodToFunctionThunk32_12:
+ pop eax ;get return address in thunk
+ push dword [esp+12] ;replicate 3rd argument
+ push dword [esp+12] ;replicate 2nd argument
+ push dword [esp+12] ;replicate 1st argument
+ push eax ;replace thunk return address
+ mov ecx, [eax+7] ;load 'this' pointer
+ jmp dword [eax+3] ;tail-call function
+
+ align 16
+ global _VDMethodToFunctionThunk32_16
+_VDMethodToFunctionThunk32_16:
+ pop eax ;get return address in thunk
+ push dword [esp+16] ;replicate 4th argument
+ push dword [esp+16] ;replicate 3rd argument
+ push dword [esp+16] ;replicate 2nd argument
+ push dword [esp+16] ;replicate 1st argument
+ push eax ;replace thunk return address
+ mov ecx, [eax+7] ;load 'this' pointer
+ jmp dword [eax+3] ;tail-call function
+
+ end
diff --git a/src/thirdparty/VirtualDub/system/source/cache.cpp b/src/thirdparty/VirtualDub/system/source/cache.cpp
new file mode 100644
index 000000000..5da77d089
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/cache.cpp
@@ -0,0 +1,422 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2005 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vd2/system/cache.h>
+
+///////////////////////////////////////////////////////////////////////////
+
+VDCache::VDCache(IVDCacheAllocator *pAllocator)
+ : mpAllocator(pAllocator)
+ , mObjectCount(0)
+ , mObjectLimit(16)
+{
+}
+
+VDCache::~VDCache() {
+ Shutdown();
+}
+
+void VDCache::Shutdown() {
+ for(int i=0; i<kVDCacheStateCount; ++i) {
+ ObjectList& ol = mLists[i];
+
+ while(!ol.empty()) {
+ VDCachedObject *pObject = static_cast<VDCachedObject *>(ol.back());
+ ol.pop_back();
+
+ pObject->OnCacheEvict();
+ pObject->SetCache(NULL); // will release object
+
+ if (i != kVDCacheStateFree) {
+ VDASSERT((int)--mObjectCount >= 0);
+ }
+ }
+ }
+}
+
+int VDCache::GetStateCount(int state) {
+ vdsynchronized(mLock) {
+ return mLists[state].size();
+ }
+}
+
+void VDCache::DumpListStatus(int state) {
+ vdsynchronized(mLock) {
+ ObjectList& ol = mLists[state];
+
+ for(ObjectList::iterator it(ol.begin()), itEnd(ol.end()); it!=itEnd; ++it) {
+ VDCachedObject *pObj = static_cast<VDCachedObject *>(*it);
+
+ pObj->DumpStatus();
+ }
+ }
+}
+
+VDCachedObject *VDCache::Allocate(sint64 key) {
+ VDCachedObject *pObj = NULL;
+
+ vdsynchronized(mLock) {
+ if (mObjectCount >= mObjectLimit - 1)
+ Evict(mObjectLimit - 1);
+
+ ObjectList& fl = mLists[kVDCacheStateFree];
+ ObjectList& pl = mLists[kVDCacheStatePending];
+
+ if (fl.empty()) {
+ VDCachedObject *pNewObject = mpAllocator->OnCacheAllocate();
+
+ pNewObject->SetCache(this);
+ pNewObject->SetState(kVDCacheStateFree);
+
+ fl.push_front(pNewObject);
+ }
+
+ ++mObjectCount;
+
+ pObj = static_cast<VDCachedObject *>(fl.front());
+ VDASSERT(pObj->GetState() == kVDCacheStateFree);
+ pObj->AddRef();
+ pObj->SetState(kVDCacheStatePending);
+ pObj->mHashKey = key;
+ pl.splice(pl.begin(), fl, fl.fast_find(pObj));
+ mHash.insert(pObj);
+ }
+
+ return pObj;
+}
+
+VDCachedObject *VDCache::Create(sint64 key, bool& is_new) {
+ // The pending, ready, active, and complete lists are eligible for lookup.
+ // The free and aborted lists are not.
+
+ VDCachedObject *pObj = NULL;
+
+ is_new = false;
+
+ vdsynchronized(mLock) {
+ pObj = static_cast<VDCachedObject *>(mHash[key]);
+
+ if (pObj) {
+ pObj->AddRef();
+
+ VDASSERT(pObj->GetState() != kVDCacheStateFree);
+
+ if (pObj->GetState() == kVDCacheStateIdle) {
+ pObj->SetState(kVDCacheStateComplete);
+
+ ObjectList& il = mLists[kVDCacheStateIdle];
+ ObjectList& cl = mLists[kVDCacheStateComplete];
+
+ cl.splice(cl.begin(), il, il.fast_find(pObj));
+ }
+ }
+
+ if (!pObj) {
+ is_new = true;
+ pObj = Allocate(key);
+ }
+ }
+
+ return pObj;
+}
+
+void VDCache::Evict(uint32 level) {
+ if (mObjectCount <= level)
+ return;
+
+ int maxfree = mObjectCount - level;
+
+ ObjectList& il = mLists[kVDCacheStateIdle];
+ ObjectList& al = mLists[kVDCacheStateAborting];
+
+ while(maxfree-- > 0 && mObjectCount >= level && !il.empty()) {
+ VDCachedObject *pObject = static_cast<VDCachedObject *>(il.back());
+ VDASSERT(pObject->GetState() == kVDCacheStateIdle);
+
+ pObject->SetState(kVDCacheStateAborting);
+ al.splice(al.begin(), il, pObject);
+
+ pObject->WeakAddRef();
+
+ mLock.Unlock();
+
+ pObject->OnCacheEvict();
+ pObject->WeakRelease(); // Will move to free list.
+
+ mLock.Lock();
+ }
+}
+
+void VDCache::NotifyFree(VDCachedObject *pObject) {
+ vdsynchronized(mLock) {
+ int rc = pObject->GetRefCount();
+
+ // This check is required because it is possible for a call to
+ // Allocate() to sneak in before we acquire the lock.
+ if (rc < 0x10000) {
+ VDCacheState oldState = pObject->GetState();
+ VDCacheState newState = oldState;
+
+ if (rc & 0xfffe)
+ newState = kVDCacheStateAborting;
+ else if (pObject->IsValid())
+ newState = kVDCacheStateIdle;
+ else {
+ VDVERIFY((int)--mObjectCount >= 0);
+ newState = kVDCacheStateFree;
+ mHash.erase(pObject);
+ }
+
+ if (newState != oldState) {
+ pObject->SetState(newState);
+
+ ObjectList& nl = mLists[newState];
+ ObjectList& ol = mLists[oldState];
+ nl.splice(nl.begin(), ol, ol.fast_find(pObject));
+ }
+
+ if (oldState == kVDCacheStatePending || oldState == kVDCacheStateReady)
+ pObject->OnCacheAbortPending();
+ }
+ }
+}
+
+void VDCache::Schedule(VDCachedObject *pObject) {
+ vdsynchronized(mLock) {
+ VDCacheState oldState = pObject->GetState();
+
+ VDASSERT(oldState == kVDCacheStatePending || oldState == kVDCacheStateActive);
+
+ ObjectList& ol = mLists[oldState];
+ ObjectList& nl = mLists[kVDCacheStateReady];
+
+ nl.splice(nl.back(), ol, ol.fast_find(pObject));
+ pObject->SetState(kVDCacheStateReady);
+ }
+}
+
+VDCachedObject *VDCache::GetNextReady() {
+ VDCachedObject *pObject = NULL;
+
+ vdsynchronized(mLock) {
+ ObjectList& rl = mLists[kVDCacheStateReady];
+ ObjectList& al = mLists[kVDCacheStateActive];
+
+ if (!rl.empty()) {
+ pObject = static_cast<VDCachedObject *>(rl.front());
+ VDASSERT(pObject->GetState() == kVDCacheStateReady);
+
+ al.splice(al.end(), rl, rl.begin());
+
+ pObject->SetState(kVDCacheStateActive);
+ pObject->AddRef();
+ }
+ }
+
+ return pObject;
+}
+
+void VDCache::MarkCompleted(VDCachedObject *pObject) {
+ vdsynchronized(mLock) {
+ VDCacheState oldState = pObject->GetState();
+ VDASSERT(oldState == kVDCacheStatePending || oldState == kVDCacheStateActive);
+
+ ObjectList& al = mLists[oldState];
+ ObjectList& cl = mLists[kVDCacheStateComplete];
+
+ if (!al.empty()) {
+ cl.splice(cl.end(), al, al.fast_find(pObject));
+
+ pObject = static_cast<VDCachedObject *>(cl.back());
+ pObject->SetState(kVDCacheStateComplete);
+ }
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDCachedObject::VDCachedObject()
+ : mRefCount(0)
+ , mpCache(NULL)
+{
+}
+
+int VDCachedObject::AddRef() {
+ int rv = (mRefCount += 0x10000);
+
+ return rv >> 16;
+}
+
+int VDCachedObject::Release() {
+ int rv = (mRefCount -= 0x10000);
+
+ VDASSERT(rv >= 0);
+
+ if (rv < 0x10000) {
+ if (!rv)
+ delete this;
+ else if (mpCache)
+ mpCache->NotifyFree(this);
+ }
+
+ return rv >> 16;
+}
+
+void VDCachedObject::WeakAddRef() {
+ mRefCount += 2;
+}
+
+void VDCachedObject::WeakRelease() {
+ int rv = (mRefCount -= 2);
+
+ VDASSERT((rv & 0xffff) < 0x8000);
+
+ if (rv < 2) {
+ if (!rv)
+ delete this;
+ else
+ mpCache->NotifyFree(this);
+ }
+}
+
+void VDCachedObject::SetCache(VDCache *pCache) {
+ mpCache = pCache;
+ if (pCache)
+ ++mRefCount;
+ else {
+ if (!--mRefCount)
+ delete this;
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDPool::VDPool(IVDPoolAllocator *pAllocator)
+ : mpAllocator(pAllocator)
+ , mObjectCount(0)
+ , mObjectLimit(16)
+{
+}
+
+VDPool::~VDPool() {
+ Shutdown();
+}
+
+void VDPool::Shutdown() {
+ for(int i=0; i<kVDPoolStateCount; ++i) {
+ ObjectList& ol = mLists[i];
+
+ while(!ol.empty()) {
+ VDPooledObject *pObject = static_cast<VDPooledObject *>(ol.back());
+ ol.pop_back();
+
+ pObject->SetPool(NULL); // will release object
+
+ VDASSERT((int)--mObjectCount >= 0);
+ }
+ }
+}
+
+VDPooledObject *VDPool::Allocate() {
+ VDPooledObject *pObj = NULL;
+
+ vdsynchronized(mLock) {
+ ObjectList& fl = mLists[kVDPoolStateFree];
+ ObjectList& pl = mLists[kVDPoolStateActive];
+
+ if (fl.empty()) {
+ VDPooledObject *pNewObject = mpAllocator->OnPoolAllocate();
+
+ pNewObject->SetPool(this);
+
+ fl.push_front(pNewObject);
+ ++mObjectCount;
+ }
+
+ pObj = static_cast<VDPooledObject *>(fl.front());
+ pObj->AddRef();
+ pl.splice(pl.begin(), fl, fl.fast_find(pObj));
+ }
+
+ return pObj;
+}
+
+void VDPool::NotifyFree(VDPooledObject *pObject) {
+ vdsynchronized(mLock) {
+ // This check is required because it is possible for a call to
+ // Allocate() to sneak in before we acquire the lock.
+
+ if (pObject->GetRefCount() < 2) {
+ VDPoolState oldState = kVDPoolStateActive;
+ VDPoolState newState = kVDPoolStateFree;
+
+ mLists[kVDPoolStateActive].erase(pObject);
+
+ if (mObjectCount > mObjectLimit) {
+ delete pObject;
+ --mObjectCount;
+ } else
+ mLists[kVDPoolStateFree].push_back(pObject);
+ }
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDPooledObject::VDPooledObject()
+ : mRefCount(0)
+ , mpPool(NULL)
+{
+}
+
+int VDPooledObject::AddRef() {
+ return (mRefCount += 2) >> 1;
+}
+
+int VDPooledObject::Release() {
+ int rv = (mRefCount -= 2);
+
+ VDASSERT(rv >= 0);
+
+ if (rv < 2) {
+ if (!rv)
+ delete this;
+ else if (mpPool)
+ mpPool->NotifyFree(this);
+ }
+
+ return rv >> 1;
+}
+
+void VDPooledObject::SetPool(VDPool *pPool) {
+ mpPool = pPool;
+ if (pPool)
+ ++mRefCount;
+ else {
+ if (!--mRefCount)
+ delete this;
+ }
+}
diff --git a/src/thirdparty/VirtualDub/system/source/cmdline.cpp b/src/thirdparty/VirtualDub/system/source/cmdline.cpp
new file mode 100644
index 000000000..2bd1cbe42
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/cmdline.cpp
@@ -0,0 +1,178 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2005 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vd2/system/cmdline.h>
+
+VDCommandLine::VDCommandLine() {
+}
+
+VDCommandLine::VDCommandLine(const wchar_t *s) {
+ Init(s);
+}
+
+VDCommandLine::~VDCommandLine() {
+}
+
+void VDCommandLine::Init(const wchar_t *s) {
+ for(;;) {
+ while(iswspace(*s))
+ ++s;
+
+ if (!*s)
+ break;
+
+ Token te = { (int)mLine.size(), *s == L'/', *s == L'"' };
+
+ if (te.mbIsSwitch) {
+ mLine.push_back(L'/');
+ ++s;
+ }
+
+ mTokens.push_back(te);
+
+ // special case for /?
+ if (te.mbIsSwitch && *s == L'?') {
+ mLine.push_back(L'?');
+ ++s;
+ }
+
+ while(*s && *s != L' ' && *s != L'/') {
+ if (te.mbIsSwitch) {
+ if (!isalnum((unsigned char)*s))
+ break;
+
+ mLine.push_back(*s++);
+ } else if (*s == L'"') {
+ ++s;
+ while(*s && *s != L'"')
+ mLine.push_back(*s++);
+
+ if (*s) {
+ ++s;
+
+ if (*s == ',') {
+ ++s;
+ break;
+ }
+ }
+ } else
+ mLine.push_back(*s++);
+ }
+
+ mLine.push_back(0);
+ }
+}
+
+uint32 VDCommandLine::GetCount() const {
+ return mTokens.size();
+}
+
+const wchar_t *VDCommandLine::operator[](int index) const {
+ return (uint32)index < mTokens.size() ? mLine.data() + mTokens[index].mTokenIndex : NULL;
+}
+
+bool VDCommandLine::GetNextArgument(VDCommandLineIterator& it, const wchar_t *& token, bool& isSwitch) const {
+ int count = (int)mTokens.size();
+
+ if (it.mIndex >= count)
+ return false;
+
+ token = mLine.data() + mTokens[it.mIndex].mTokenIndex;
+ isSwitch = mTokens[it.mIndex].mbIsSwitch;
+
+ ++it.mIndex;
+ return true;
+}
+
+bool VDCommandLine::GetNextNonSwitchArgument(VDCommandLineIterator& it, const wchar_t *& token) const {
+ int count = (int)mTokens.size();
+
+ if (it.mIndex >= count)
+ return false;
+
+ if (mTokens[it.mIndex].mbIsSwitch)
+ return false;
+
+ token = mLine.data() + mTokens[it.mIndex++].mTokenIndex;
+ return true;
+}
+
+bool VDCommandLine::GetNextSwitchArgument(VDCommandLineIterator& it, const wchar_t *& token) const {
+ int count = (int)mTokens.size();
+
+ if (it.mIndex >= count)
+ return false;
+
+ if (!mTokens[it.mIndex].mbIsSwitch)
+ return false;
+
+ token = mLine.data() + mTokens[it.mIndex++].mTokenIndex;
+ return true;
+}
+
+bool VDCommandLine::FindAndRemoveSwitch(const wchar_t *name) {
+ int count = (int)mTokens.size();
+
+ for(int i=1; i<count; ++i) {
+ if (mTokens[i].mbIsSwitch && !_wcsicmp(name, mLine.data() + mTokens[i].mTokenIndex + 1)) {
+ mTokens.erase(mTokens.begin() + i);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool VDCommandLine::FindAndRemoveSwitch(const wchar_t *name, const wchar_t *& token) {
+ int count = (int)mTokens.size();
+ size_t namelen = wcslen(name);
+
+ for(int i=1; i<count; ++i) {
+ if (!mTokens[i].mbIsSwitch)
+ continue;
+
+ const wchar_t *s = mLine.data() + mTokens[i].mTokenIndex + 1;
+
+ if (!_wcsnicmp(name, s, namelen)) {
+ token = s+namelen;
+
+ switch(*token) {
+ case L':':
+ ++token;
+ break;
+ case 0:
+ break;
+ default:
+ continue;
+ }
+
+ mTokens.erase(mTokens.begin() + i);
+ return true;
+ }
+ }
+
+ return false;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/cpuaccel.cpp b/src/thirdparty/VirtualDub/system/source/cpuaccel.cpp
new file mode 100644
index 000000000..eb326e9ae
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/cpuaccel.cpp
@@ -0,0 +1,251 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <wtypes.h>
+#include <winnt.h>
+#include <intrin.h>
+#include <vd2/system/cpuaccel.h>
+
+static long g_lCPUExtensionsEnabled;
+static long g_lCPUExtensionsAvailable;
+
+extern "C" {
+ bool FPU_enabled, MMX_enabled, SSE_enabled, ISSE_enabled, SSE2_enabled;
+};
+
+
+#ifdef _M_AMD64
+
+ long CPUCheckForExtensions() {
+ long flags = CPUF_SUPPORTS_FPU;
+
+ // This code used to use IsProcessorFeaturePresent(), but this function is somewhat
+ // suboptimal in Win64 -- for one thing, it doesn't return true for MMX, at least
+ // on Vista 64.
+
+ // check for SSE3, SSSE3, SSE4.1
+ int cpuInfo[4];
+ __cpuid(cpuInfo, 1);
+
+ if (cpuInfo[3] & (1 << 23))
+ flags |= CPUF_SUPPORTS_MMX;
+
+ if (cpuInfo[3] & (1 << 25))
+ flags |= CPUF_SUPPORTS_SSE | CPUF_SUPPORTS_INTEGER_SSE;
+
+ if (cpuInfo[3] & (1 << 26))
+ flags |= CPUF_SUPPORTS_SSE2;
+
+ if (cpuInfo[2] & 0x00000001)
+ flags |= CPUF_SUPPORTS_SSE3;
+
+ if (cpuInfo[2] & 0x00000200)
+ flags |= CPUF_SUPPORTS_SSSE3;
+
+ if (cpuInfo[2] & 0x00080000)
+ flags |= CPUF_SUPPORTS_SSE41;
+
+ // check for 3DNow!, 3DNow! extensions
+ __cpuid(cpuInfo, 0x80000000);
+ if (cpuInfo[0] >= 0x80000001) {
+ __cpuid(cpuInfo, 0x80000001);
+
+ if (cpuInfo[3] & (1 << 31))
+ flags |= CPUF_SUPPORTS_3DNOW;
+
+ if (cpuInfo[3] & (1 << 30))
+ flags |= CPUF_SUPPORTS_3DNOW_EXT;
+
+ if (cpuInfo[3] & (1 << 22))
+ flags |= CPUF_SUPPORTS_INTEGER_SSE;
+ }
+
+ return flags;
+ }
+
+#else
+
+ // This is ridiculous.
+
+ static long CPUCheckForSSESupport() {
+ __try {
+ // __asm andps xmm0,xmm0
+
+ __asm _emit 0x0f
+ __asm _emit 0x54
+ __asm _emit 0xc0
+
+ } __except(EXCEPTION_EXECUTE_HANDLER) {
+ if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION)
+ g_lCPUExtensionsAvailable &= ~(CPUF_SUPPORTS_SSE|CPUF_SUPPORTS_SSE2|CPUF_SUPPORTS_SSE3|CPUF_SUPPORTS_SSSE3);
+ }
+
+ return g_lCPUExtensionsAvailable;
+ }
+
+ long __declspec(naked) CPUCheckForExtensions() {
+ __asm {
+ push ebp
+ push edi
+ push esi
+ push ebx
+
+ xor ebp,ebp ;cpu flags - if we don't have CPUID, we probably
+ ;won't want to try FPU optimizations.
+
+ ;check for CPUID.
+
+ pushfd ;flags -> EAX
+ pop eax
+ or eax,00200000h ;set the ID bit
+ push eax ;EAX -> flags
+ popfd
+ pushfd ;flags -> EAX
+ pop eax
+ and eax,00200000h ;ID bit set?
+ jz done ;nope...
+
+ ;CPUID exists, check for features register.
+
+ mov ebp,00000003h
+ xor eax,eax
+ cpuid
+ or eax,eax
+ jz done ;no features register?!?
+
+ ;features register exists, look for MMX, SSE, SSE2.
+
+ mov eax,1
+ cpuid
+ mov ebx,edx
+ and ebx,00800000h ;MMX is bit 23 of EDX
+ shr ebx,21
+ or ebp,ebx ;set bit 2 if MMX exists
+
+ mov ebx,edx
+ and edx,02000000h ;SSE is bit 25 of EDX
+ shr edx,25
+ neg edx
+ and edx,00000018h ;set bits 3 and 4 if SSE exists
+ or ebp,edx
+
+ and ebx,04000000h ;SSE2 is bit 26 of EDX
+ shr ebx,21
+ and ebx,00000020h ;set bit 5
+ or ebp,ebx
+
+ test ecx, 1 ;SSE3 is bit 0 of ECX
+ jz no_sse3
+ or ebp, 100h
+no_sse3:
+
+ test ecx, 200h ;SSSE3 is bit 9 of ECX
+ jz no_ssse3
+ or ebp, 200h
+no_ssse3:
+
+ test ecx, 80000h ;SSE4_1 is bit 19 of ECX
+ jz no_sse4_1
+ or ebp, 400h
+no_sse4_1:
+
+ ;check for vendor feature register (K6/Athlon).
+
+ mov eax,80000000h
+ cpuid
+ mov ecx,80000001h
+ cmp eax,ecx
+ jb done
+
+ ;vendor feature register exists, look for 3DNow! and Athlon extensions
+
+ mov eax,ecx
+ cpuid
+
+ mov eax,edx
+ and edx,80000000h ;3DNow! is bit 31
+ shr edx,25
+ or ebp,edx ;set bit 6
+
+ mov edx,eax
+ and eax,40000000h ;3DNow!2 is bit 30
+ shr eax,23
+ or ebp,eax ;set bit 7
+
+ and edx,00400000h ;AMD MMX extensions (integer SSE) is bit 22
+ shr edx,19
+ or ebp,edx
+
+ done:
+ mov eax,ebp
+ mov g_lCPUExtensionsAvailable, ebp
+
+ ;Full SSE and SSE-2 require OS support for the xmm* registers.
+
+ test eax,00000030h
+ jz nocheck
+ call CPUCheckForSSESupport
+ nocheck:
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+ }
+ }
+
+#endif
+
+long CPUEnableExtensions(long lEnableFlags) {
+ g_lCPUExtensionsEnabled = lEnableFlags;
+
+ MMX_enabled = !!(g_lCPUExtensionsEnabled & CPUF_SUPPORTS_MMX);
+ FPU_enabled = !!(g_lCPUExtensionsEnabled & CPUF_SUPPORTS_FPU);
+ SSE_enabled = !!(g_lCPUExtensionsEnabled & CPUF_SUPPORTS_SSE);
+ ISSE_enabled = !!(g_lCPUExtensionsEnabled & CPUF_SUPPORTS_INTEGER_SSE);
+ SSE2_enabled = !!(g_lCPUExtensionsEnabled & CPUF_SUPPORTS_SSE2);
+
+ return g_lCPUExtensionsEnabled;
+}
+
+long CPUGetAvailableExtensions() {
+ return g_lCPUExtensionsAvailable;
+}
+
+long CPUGetEnabledExtensions() {
+ return g_lCPUExtensionsEnabled;
+}
+
+void VDCPUCleanupExtensions() {
+#ifndef _M_AMD64
+ if (ISSE_enabled)
+ __asm sfence
+ if (MMX_enabled)
+ __asm emms
+#else
+ _mm_sfence();
+#endif
+}
diff --git a/src/thirdparty/VirtualDub/system/source/debug.cpp b/src/thirdparty/VirtualDub/system/source/debug.cpp
new file mode 100644
index 000000000..9bb6a3dc6
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/debug.cpp
@@ -0,0 +1,290 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <stdio.h>
+
+#include <windows.h>
+#include <intrin.h>
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/cpuaccel.h>
+#include <vd2/system/debug.h>
+#include <vd2/system/thread.h>
+
+#ifdef _DEBUG
+
+class VDSafeMessageBoxThreadW32 : public VDThread {
+public:
+ VDSafeMessageBoxThreadW32(HWND hwndParent, const char *pszText, const char *pszCaption, DWORD dwFlags)
+ : mhwndParent(hwndParent)
+ , mpszText(pszText)
+ , mpszCaption(pszCaption)
+ , mdwFlags(dwFlags)
+ {
+ }
+
+ DWORD GetResult() const { return mdwResult; }
+
+protected:
+ void ThreadRun() {
+ mdwResult = MessageBox(mhwndParent, mpszText, mpszCaption, mdwFlags);
+ }
+
+ HWND mhwndParent;
+ const char *const mpszText;
+ const char *const mpszCaption;
+ const DWORD mdwFlags;
+ DWORD mdwResult;
+};
+
+UINT VDSafeMessageBoxW32(HWND hwndParent, const char *pszText, const char *pszCaption, DWORD dwFlags) {
+ VDSafeMessageBoxThreadW32 mbox(hwndParent, pszText, pszCaption, dwFlags);
+
+ mbox.ThreadStart();
+ mbox.ThreadWait();
+ return mbox.GetResult();
+}
+
+VDAssertResult VDAssert(const char *exp, const char *file, int line) {
+ DWORD dwOldError = GetLastError();
+ char szText[1024];
+
+ VDDEBUG("%s(%d): Assert failed: %s\n", file, line, exp);
+
+ wsprintf(szText,
+ "Assert failed in module %s, line %d:\n"
+ "\n"
+ "\t%s\n"
+ "\n"
+ "Break into debugger?", file, line, exp);
+
+ UINT result = VDSafeMessageBoxW32(NULL, szText, "Assert failure", MB_ABORTRETRYIGNORE|MB_ICONWARNING|MB_TASKMODAL);
+
+ SetLastError(dwOldError);
+
+ switch(result) {
+ case IDABORT:
+ ::Sleep(250); // Pause for a moment so the VC6 debugger doesn't freeze.
+ return kVDAssertBreak;
+ case IDRETRY:
+ return kVDAssertContinue;
+ default:
+ VDNEVERHERE;
+ case IDIGNORE:
+ return kVDAssertIgnore;
+ }
+}
+
+VDAssertResult VDAssertPtr(const char *exp, const char *file, int line) {
+ DWORD dwOldError = GetLastError();
+ char szText[1024];
+
+ VDDEBUG("%s(%d): Assert failed: %s is not a valid pointer\n", file, line, exp);
+
+ wsprintf(szText,
+ "Assert failed in module %s, line %d:\n"
+ "\n"
+ "\t(%s) not a valid pointer\n"
+ "\n"
+ "Break into debugger?", file, line, exp);
+
+ UINT result = VDSafeMessageBoxW32(NULL, szText, "Assert failure", MB_ABORTRETRYIGNORE|MB_ICONWARNING|MB_TASKMODAL);
+
+ SetLastError(dwOldError);
+
+ switch(result) {
+ case IDABORT:
+ return kVDAssertBreak;
+ case IDRETRY:
+ return kVDAssertContinue;
+ default:
+ VDNEVERHERE;
+ case IDIGNORE:
+ return kVDAssertIgnore;
+ }
+}
+
+#endif
+
+void VDProtectedAutoScopeICLWorkaround() {}
+
+void VDDebugPrint(const char *format, ...) {
+ char buf[4096];
+
+ va_list val;
+ va_start(val, format);
+ _vsnprintf(buf, sizeof buf, format, val);
+ va_end(val);
+ Sleep(0);
+ OutputDebugString(buf);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+namespace {
+ IVDExternalCallTrap *g_pExCallTrap;
+}
+
+void VDSetExternalCallTrap(IVDExternalCallTrap *trap) {
+ g_pExCallTrap = trap;
+}
+
+#if defined(WIN32) && defined(_M_IX86)
+ namespace {
+ bool IsFPUStateOK(unsigned& ctlword) {
+ ctlword = 0;
+
+ __asm mov eax, ctlword
+ __asm fnstcw [eax]
+
+ ctlword &= 0x0f3f;
+
+ return ctlword == 0x023f;
+ }
+
+ void ResetFPUState() {
+ static const unsigned ctlword = 0x027f;
+
+ __asm fnclex
+ __asm fldcw ctlword
+ }
+
+ bool IsSSEStateOK(uint32& ctlword) {
+ ctlword = _mm_getcsr();
+
+ // Intel C/C++ flips FTZ and DAZ. :(
+ return (ctlword & 0x7f80) == 0x1f80;
+ }
+
+ void ResetSSEState() {
+ _mm_setcsr(0x1f80);
+ }
+ }
+
+ bool IsMMXState() {
+ char buf[28];
+ unsigned short tagword;
+
+ __asm fnstenv buf // this resets the FPU control word somehow!?
+
+ tagword = *(unsigned short *)(buf + 8);
+
+ return (tagword != 0xffff);
+ }
+ void ClearMMXState() {
+ if (MMX_enabled)
+ __asm emms
+ else {
+ __asm {
+ ffree st(0)
+ ffree st(1)
+ ffree st(2)
+ ffree st(3)
+ ffree st(4)
+ ffree st(5)
+ ffree st(6)
+ ffree st(7)
+ }
+ }
+ }
+
+ void VDClearEvilCPUStates() {
+ ResetFPUState();
+ ClearMMXState();
+ }
+
+ void VDPreCheckExternalCodeCall(const char *file, int line) {
+ unsigned fpucw;
+ uint32 mxcsr;
+ bool bFPUStateBad = !IsFPUStateOK(fpucw);
+ bool bSSEStateBad = SSE_enabled && !IsSSEStateOK(mxcsr);
+ bool bMMXStateBad = IsMMXState();
+
+ if (bMMXStateBad || bFPUStateBad || bSSEStateBad) {
+ ClearMMXState();
+ ResetFPUState();
+ if (SSE_enabled)
+ ResetSSEState();
+ }
+
+ if (g_pExCallTrap) {
+ if (bMMXStateBad)
+ g_pExCallTrap->OnMMXTrap(NULL, file, line);
+
+ if (bFPUStateBad)
+ g_pExCallTrap->OnFPUTrap(NULL, file, line, fpucw);
+
+ if (bSSEStateBad)
+ g_pExCallTrap->OnSSETrap(NULL, file, line, mxcsr);
+ }
+ }
+
+ void VDPostCheckExternalCodeCall(const wchar_t *mpContext, const char *mpFile, int mLine) {
+ unsigned fpucw;
+ uint32 mxcsr;
+ bool bFPUStateBad = !IsFPUStateOK(fpucw);
+ bool bSSEStateBad = SSE_enabled && !IsSSEStateOK(mxcsr);
+ bool bMMXStateBad = IsMMXState();
+ bool bBadState = bMMXStateBad || bFPUStateBad || bSSEStateBad;
+
+ if (bBadState) {
+ ClearMMXState();
+ ResetFPUState();
+ if (SSE_enabled)
+ ResetSSEState();
+ }
+
+ if (g_pExCallTrap) {
+ if (bMMXStateBad)
+ g_pExCallTrap->OnMMXTrap(mpContext, mpFile, mLine);
+
+ if (bFPUStateBad)
+ g_pExCallTrap->OnFPUTrap(mpContext, mpFile, mLine, fpucw);
+
+ if (bSSEStateBad)
+ g_pExCallTrap->OnSSETrap(mpContext, mpFile, mLine, mxcsr);
+ }
+ }
+
+#else
+
+ bool IsMMXState() {
+ return false;
+ }
+
+ void ClearMMXState() {
+ }
+
+ void VDClearEvilCPUStates() {
+ }
+
+ void VDPreCheckExternalCodeCall(const char *file, int line) {
+ }
+
+ void VDPostCheckExternalCodeCall(const wchar_t *mpContext, const char *mpFile, int mLine) {
+ }
+
+#endif
diff --git a/src/thirdparty/VirtualDub/system/source/debugx86.cpp b/src/thirdparty/VirtualDub/system/source/debugx86.cpp
new file mode 100644
index 000000000..bbbd5e180
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/debugx86.cpp
@@ -0,0 +1,154 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/debugx86.h>
+
+bool VDIsValidCallX86(const char *buf, int len) {
+ // Permissible CALL sequences that we care about:
+ //
+ // E8 xx xx xx xx CALL near relative
+ // FF (group 2) CALL near absolute indirect
+ //
+ // Minimum sequence is 2 bytes (call eax).
+ // Maximum sequence is 7 bytes (call dword ptr [eax+disp32]).
+
+ if (len >= 5 && buf[-5] == (char)0xE8)
+ return true;
+
+ // FF 14 xx CALL [reg32+reg32*scale]
+
+ if (len >= 3 && buf[-3] == (char)0xFF && buf[-2]==0x14)
+ return true;
+
+ // FF 15 xx xx xx xx CALL disp32
+
+ if (len >= 6 && buf[-6] == (char)0xFF && buf[-5]==0x15)
+ return true;
+
+ // FF 00-3F(!14/15) CALL [reg32]
+
+ if (len >= 2 && buf[-2] == (char)0xFF && (unsigned char)buf[-1] < 0x40)
+ return true;
+
+ // FF D0-D7 CALL reg32
+
+ if (len >= 2 && buf[-2] == (char)0xFF && (buf[-1]&0xF8) == 0xD0)
+ return true;
+
+ // FF 50-57 xx CALL [reg32+reg32*scale+disp8]
+
+ if (len >= 3 && buf[-3] == (char)0xFF && (buf[-2]&0xF8) == 0x50)
+ return true;
+
+ // FF 90-97 xx xx xx xx xx CALL [reg32+reg32*scale+disp32]
+
+ if (len >= 7 && buf[-7] == (char)0xFF && (buf[-6]&0xF8) == 0x90)
+ return true;
+
+ return false;
+}
+
+VDInstructionTypeX86 VDGetInstructionTypeX86(const void *p) {
+ struct local {
+ static bool RangeHitTest(const uint8 *range, uint8 c) {
+ while(*range) {
+ if (c>=range[0] && c<=range[1])
+ return true;
+ range += 2;
+ }
+
+ return false;
+ }
+ };
+
+ VDInstructionTypeX86 type = kX86InstUnknown;
+
+ __try {
+ unsigned char buf[8];
+
+ memcpy(buf, p, 8);
+
+ if (buf[0] == 0x0f && buf[1] == 0x0f)
+ type = kX86Inst3DNow; // Conveniently, all 3DNow! instructions begin 0F 0F
+ else if ((buf[0] == 0xdb || buf[0] == 0xdf) && (buf[1]>=0xe8 && buf[1]<=0xf7))
+ type = kX86InstP6; // DB/DF E8-F7: FCOMI/FCOMIP/FUCOMI/FUCOMIP (P6)
+ else if ((buf[0]&0xfe)==0xda && (buf[1]&0xe0)==0xc0)
+ type = kX86InstP6; // DA/DB C0-DF: FCMOVcc (P6)
+ else if (buf[0] == 0x0f && (buf[1]&0xf0)==0x40)
+ type = kX86InstP6; // 0F 40-4F: CMOVcc (P6)
+ else {
+ const unsigned char *s = buf;
+ bool bWide = false;
+ bool bRepF2 = false;
+ bool bRepF3 = false;
+
+ // At this point we're down to MMX, SSE, SSE2 -- which makes things simpler
+ // as we must see F2 0F, F3 0F, or 0F next. MMX ops use 0F exclusively,
+ // some SSE ops use F2, and a few SSE2 ones use F3. If we see 66 on an
+ // MMX or SSE op it's automatically SSE2 as it's either a 128-bit MMX op
+ // or a double-precision version of an SSE one.
+
+ if (*s == 0x66) { // 66h override used by SSE2 and is supposed to be ahead of F2/F3 in encodings
+ ++s;
+ bWide = true;
+ }
+
+ if (*s == 0xf2) {
+ ++s;
+ bRepF2 = true;
+ }
+
+ if (*s == 0xf3) {
+ ++s;
+ bRepF3 = true;
+ }
+
+ if (*s++ == 0x0f) {
+ // SSE - 1x, 28-2F, 5x, C2, AE
+ // MMX2 - 70, C4-C6, D7, DA, DE, E0, E3, E4, E7, EA, EE, F6, F7
+ // MMX - 6x, 7x, Dx, Ex, and Fx except for MMX2
+ // SSE2 - C3, SSE ops with 66 or F2, MMX/MMX2 ops with 66/F2/F3
+
+ static const uint8 sse_ranges[]={0x10,0x1f,0x28,0x2f,0x50,0x5f,0xc2,0xc2,0xae,0xae,0};
+ static const uint8 sse2_ranges[]={0xc3,0xc3,0};
+ static const uint8 mmx2_ranges[]={0x70,0x70,0xc4,0xc6,0xd7,0xd7,0xda,0xda,0xde,0xde,0xe0,0xe0,0xe3,0xe4,0xe7,0xe7,0xea,0xea,0xee,0xee,0xf6,0xf7,0};
+ static const uint8 mmx_ranges[]={0x60,0x7f,0xd0,0xff,0};
+
+ if (local::RangeHitTest(sse_ranges, *s))
+ type = (bWide||bRepF2) ? kX86InstSSE2 : kX86InstSSE;
+ else if (local::RangeHitTest(sse2_ranges, *s))
+ type = kX86InstSSE2;
+ else if (local::RangeHitTest(mmx2_ranges, *s))
+ type = (bWide||bRepF2||bRepF3) ? kX86InstSSE2 : kX86InstMMX2;
+ else if (local::RangeHitTest(mmx_ranges, *s))
+ type = (bWide||bRepF2||bRepF3) ? kX86InstSSE2 : kX86InstMMX;
+ }
+ }
+ } __except(1) {
+ }
+ return type;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/event.cpp b/src/thirdparty/VirtualDub/system/source/event.cpp
new file mode 100644
index 000000000..368f03cb1
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/event.cpp
@@ -0,0 +1,81 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2006 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/event.h>
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDDelegate::VDDelegate() {
+ mpPrev = mpNext = this;
+}
+
+VDDelegate::~VDDelegate() {
+ VDDelegateNode *next = mpNext;
+ VDDelegateNode *prev = mpPrev;
+ prev->mpNext = next;
+ next->mpPrev = prev;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDEventBase::VDEventBase() {
+ mAnchor.mpPrev = mAnchor.mpNext = &mAnchor;
+}
+
+VDEventBase::~VDEventBase() {
+ while(mAnchor.mpPrev != &mAnchor)
+ Remove(static_cast<VDDelegate&>(*mAnchor.mpPrev));
+}
+
+void VDEventBase::Add(VDDelegate& dbase) {
+ VDDelegateNode *next = mAnchor.mpNext;
+
+ VDASSERT(dbase.mpPrev == &dbase);
+
+ mAnchor.mpNext = &dbase;
+ dbase.mpPrev = &mAnchor;
+ dbase.mpNext = next;
+ next->mpPrev = &dbase;
+}
+
+void VDEventBase::Remove(VDDelegate& dbase) {
+ VDASSERT(dbase.mpPrev != &dbase);
+
+ VDDelegateNode *next = dbase.mpNext;
+ VDDelegateNode *prev = dbase.mpPrev;
+ prev->mpNext = next;
+ next->mpPrev = prev;
+ dbase.mpPrev = dbase.mpNext = &dbase;
+}
+
+void VDEventBase::Raise(void *src, const void *info) {
+ for(VDDelegateNode *node = mAnchor.mpNext; node != &mAnchor; node = node->mpNext) {
+ VDDelegate& dbase = static_cast<VDDelegate&>(*node);
+
+ dbase.mpCallback(src, info, dbase);
+ }
+}
diff --git a/src/thirdparty/VirtualDub/system/source/file.cpp b/src/thirdparty/VirtualDub/system/source/file.cpp
new file mode 100644
index 000000000..11ab82eeb
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/file.cpp
@@ -0,0 +1,795 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <windows.h>
+
+#include <vd2/system/error.h>
+#include <vd2/system/filesys.h>
+#include <vd2/system/VDString.h>
+#include <vd2/system/file.h>
+
+namespace {
+ bool IsWindowsNT() {
+ static bool sbIsNT = (LONG)GetVersion()>=0;
+ return sbIsNT;
+ }
+
+ bool IsHardDrivePath(const wchar_t *path) {
+ const VDStringW rootPath(VDFileGetRootPath(path));
+
+ UINT type = GetDriveTypeW(rootPath.c_str());
+
+ return type == DRIVE_FIXED || type == DRIVE_UNKNOWN || type == DRIVE_REMOVABLE;
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// VDFile
+//
+///////////////////////////////////////////////////////////////////////////////
+
+using namespace nsVDFile;
+
+VDFile::VDFile(const char *pszFileName, uint32 flags)
+ : mhFile(NULL)
+{
+ open_internal(pszFileName, NULL, flags, true);
+}
+
+VDFile::VDFile(const wchar_t *pwszFileName, uint32 flags)
+ : mhFile(NULL)
+{
+ open_internal(NULL, pwszFileName, flags, true);
+}
+
+VDFile::VDFile(HANDLE h)
+ : mhFile(h)
+{
+ LONG lo, hi = 0;
+
+ lo = SetFilePointer(h, 0, &hi, FILE_CURRENT);
+
+ mFilePosition = (uint32)lo + ((uint64)(uint32)hi << 32);
+}
+
+VDFile::~VDFile() {
+ closeNT();
+}
+
+void VDFile::open(const char *pszFilename, uint32 flags) {
+ open_internal(pszFilename, NULL, flags, true);
+}
+
+void VDFile::open(const wchar_t *pwszFilename, uint32 flags) {
+ open_internal(NULL, pwszFilename, flags, true);
+}
+
+bool VDFile::openNT(const wchar_t *pwszFilename, uint32 flags) {
+ return open_internal(NULL, pwszFilename, flags, false);
+}
+
+bool VDFile::open_internal(const char *pszFilename, const wchar_t *pwszFilename, uint32 flags, bool throwOnError) {
+ close();
+
+ mpFilename = _wcsdup(VDFileSplitPath(pszFilename ? VDTextAToW(pszFilename).c_str() : pwszFilename));
+ if (!mpFilename) {
+ if (!throwOnError)
+ return false;
+ throw MyMemoryError();
+ }
+
+ // At least one of the read/write flags must be set.
+ VDASSERT(flags & (kRead | kWrite));
+
+ DWORD dwDesiredAccess = 0;
+
+ if (flags & kRead) dwDesiredAccess = GENERIC_READ;
+ if (flags & kWrite) dwDesiredAccess |= GENERIC_WRITE;
+
+ // Win32 docs are screwed here -- FILE_SHARE_xxx is the inverse of a deny flag.
+
+ DWORD dwShareMode = FILE_SHARE_READ | FILE_SHARE_WRITE;
+ if (flags & kDenyRead) dwShareMode = FILE_SHARE_WRITE;
+ if (flags & kDenyWrite) dwShareMode &= ~FILE_SHARE_WRITE;
+
+ // One of the creation flags must be set.
+ VDASSERT(flags & kCreationMask);
+
+ DWORD dwCreationDisposition;
+
+ uint32 creationType = flags & kCreationMask;
+
+ switch(creationType) {
+ case kOpenExisting: dwCreationDisposition = OPEN_EXISTING; break;
+ case kOpenAlways: dwCreationDisposition = OPEN_ALWAYS; break;
+ case kCreateAlways: dwCreationDisposition = CREATE_ALWAYS; break;
+ case kCreateNew: dwCreationDisposition = CREATE_NEW; break;
+ case kTruncateExisting: dwCreationDisposition = TRUNCATE_EXISTING; break;
+ default:
+ VDNEVERHERE;
+ return false;
+ }
+
+ VDASSERT((flags & (kSequential | kRandomAccess)) != (kSequential | kRandomAccess));
+
+ DWORD dwAttributes = FILE_ATTRIBUTE_NORMAL;
+
+ if (flags & kSequential) dwAttributes |= FILE_FLAG_SEQUENTIAL_SCAN;
+ if (flags & kRandomAccess) dwAttributes |= FILE_FLAG_RANDOM_ACCESS;
+ if (flags & kWriteThrough) dwAttributes |= FILE_FLAG_WRITE_THROUGH;
+ if (flags & kUnbuffered) dwAttributes |= FILE_FLAG_NO_BUFFERING;
+
+ VDStringA tempFilenameA;
+ VDStringW tempFilenameW;
+
+ if (IsWindowsNT()) {
+ if (pszFilename) {
+ tempFilenameW = VDTextAToW(pszFilename);
+ pwszFilename = tempFilenameW.c_str();
+ pszFilename = NULL;
+ }
+ } else {
+ if (pwszFilename) {
+ tempFilenameA = VDTextWToA(pwszFilename);
+ pszFilename = tempFilenameA.c_str();
+ pwszFilename = NULL;
+ }
+ }
+
+ if (pszFilename)
+ mhFile = CreateFileA(pszFilename, dwDesiredAccess, dwShareMode, NULL, dwCreationDisposition, dwAttributes, NULL);
+ else {
+ if (!IsHardDrivePath(pwszFilename))
+ flags &= ~FILE_FLAG_NO_BUFFERING;
+
+ mhFile = CreateFileW(pwszFilename, dwDesiredAccess, dwShareMode, NULL, dwCreationDisposition, dwAttributes, NULL);
+ }
+
+ DWORD err = GetLastError();
+
+ // If we failed and FILE_FLAG_NO_BUFFERING was set, strip it and try again.
+ // VPC and Novell shares sometimes do this....
+ if (mhFile == INVALID_HANDLE_VALUE && err != ERROR_FILE_NOT_FOUND && err != ERROR_PATH_NOT_FOUND) {
+ if (dwAttributes & FILE_FLAG_NO_BUFFERING) {
+ dwAttributes &= ~FILE_FLAG_NO_BUFFERING;
+ dwAttributes |= FILE_FLAG_WRITE_THROUGH;
+
+ if (pszFilename)
+ mhFile = CreateFileA(pszFilename, dwDesiredAccess, dwShareMode, NULL, dwCreationDisposition, dwAttributes, NULL);
+ else
+ mhFile = CreateFileW(pwszFilename, dwDesiredAccess, dwShareMode, NULL, dwCreationDisposition, dwAttributes, NULL);
+
+ err = GetLastError();
+ }
+ }
+
+ // INVALID_HANDLE_VALUE isn't NULL. *sigh*
+
+ if (mhFile == INVALID_HANDLE_VALUE) {
+ mhFile = NULL;
+
+ if (!throwOnError)
+ return false;
+
+ throw MyWin32Error("Cannot open file \"%ls\":\n%%s", err, mpFilename.get());
+ }
+
+ mFilePosition = 0;
+ return true;
+}
+
+bool VDFile::closeNT() {
+ if (mhFile) {
+ HANDLE h = mhFile;
+ mhFile = NULL;
+ if (!CloseHandle(h))
+ return false;
+ }
+
+ return true;
+}
+
+void VDFile::close() {
+ if (!closeNT())
+ throw MyWin32Error("Cannot complete file \"%ls\": %%s", GetLastError(), mpFilename.get());
+}
+
+bool VDFile::truncateNT() {
+ return 0 != SetEndOfFile(mhFile);
+}
+
+void VDFile::truncate() {
+ if (!truncateNT())
+ throw MyWin32Error("Cannot truncate file \"%ls\": %%s", GetLastError(), mpFilename.get());
+}
+
+bool VDFile::extendValidNT(sint64 pos) {
+ if (GetVersion() & 0x80000000)
+ return true; // No need, Windows 95/98/ME do this automatically anyway.
+
+ // The SetFileValidData() API is only available on XP and Server 2003.
+
+ typedef BOOL (APIENTRY *tpSetFileValidData)(HANDLE hFile, LONGLONG ValidDataLength); // Windows XP, Server 2003
+ static tpSetFileValidData pSetFileValidData = (tpSetFileValidData)GetProcAddress(GetModuleHandle("kernel32"), "SetFileValidData");
+
+ if (!pSetFileValidData) {
+ SetLastError(ERROR_CALL_NOT_IMPLEMENTED);
+ return false;
+ }
+
+ return 0 != pSetFileValidData(mhFile, pos);
+}
+
+void VDFile::extendValid(sint64 pos) {
+ if (!extendValidNT(pos))
+ throw MyWin32Error("Cannot extend file \"%ls\": %%s", GetLastError(), mpFilename.get());
+}
+
+bool VDFile::enableExtendValid() {
+ if (GetVersion() & 0x80000000)
+ return true; // Not Windows NT, no privileges involved
+
+ // SetFileValidData() requires the SE_MANAGE_VOLUME_NAME privilege, so we must enable it
+ // on the process token. We don't attempt to strip the privilege afterward as that would
+ // introduce race conditions.
+ bool bSuccessful = false;
+ DWORD err = 0;
+
+ SetLastError(0);
+
+ HANDLE h;
+ if (OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES|TOKEN_QUERY, &h)) {
+ LUID luid;
+
+ if (LookupPrivilegeValue(NULL, SE_MANAGE_VOLUME_NAME, &luid)) {
+ TOKEN_PRIVILEGES tp;
+ tp.PrivilegeCount = 1;
+ tp.Privileges[0].Luid = luid;
+ tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+
+ if (AdjustTokenPrivileges(h, FALSE, &tp, 0, NULL, NULL))
+ bSuccessful = true;
+ else
+ err = GetLastError();
+ }
+
+ CloseHandle(h);
+ }
+
+ if (!bSuccessful && err)
+ SetLastError(err);
+
+ return bSuccessful;
+}
+
+long VDFile::readData(void *buffer, long length) {
+ DWORD dwActual;
+
+ if (!ReadFile(mhFile, buffer, (DWORD)length, &dwActual, NULL))
+ throw MyWin32Error("Cannot read from file \"%ls\": %%s", GetLastError(), mpFilename.get());
+
+ mFilePosition += dwActual;
+
+ return dwActual;
+}
+
+void VDFile::read(void *buffer, long length) {
+ if (length != readData(buffer, length))
+ throw MyWin32Error("Cannot read from file \"%ls\": Premature end of file.", GetLastError(), mpFilename.get());
+}
+
+long VDFile::writeData(const void *buffer, long length) {
+ DWORD dwActual;
+ bool success = false;
+
+ if (!WriteFile(mhFile, buffer, (DWORD)length, &dwActual, NULL) || dwActual != (DWORD)length)
+ goto found_error;
+
+ mFilePosition += dwActual;
+
+ return dwActual;
+
+found_error:
+ throw MyWin32Error("Cannot write to file \"%ls\": %%s", GetLastError(), mpFilename.get());
+}
+
+void VDFile::write(const void *buffer, long length) {
+ if (length != writeData(buffer, length))
+ throw MyWin32Error("Cannot write to file \"%ls\": Unable to write all data.", GetLastError(), mpFilename.get());
+}
+
+bool VDFile::seekNT(sint64 newPos, eSeekMode mode) {
+ DWORD dwMode;
+
+ switch(mode) {
+ case kSeekStart:
+ dwMode = FILE_BEGIN;
+ break;
+ case kSeekCur:
+ dwMode = FILE_CURRENT;
+ break;
+ case kSeekEnd:
+ dwMode = FILE_END;
+ break;
+ default:
+ VDNEVERHERE;
+ return false;
+ }
+
+ union {
+ sint64 pos;
+ LONG l[2];
+ } u = { newPos };
+
+ u.l[0] = SetFilePointer(mhFile, u.l[0], &u.l[1], dwMode);
+
+ if (u.l[0] == -1 && GetLastError() != NO_ERROR)
+ return false;
+
+ mFilePosition = u.pos;
+ return true;
+}
+
+void VDFile::seek(sint64 newPos, eSeekMode mode) {
+ if (!seekNT(newPos, mode))
+ throw MyWin32Error("Cannot seek within file \"%ls\": %%s", GetLastError(), mpFilename.get());
+}
+
+bool VDFile::skipNT(sint64 delta) {
+ if (!delta)
+ return true;
+
+ char buf[1024];
+
+ if (delta <= sizeof buf) {
+ return (long)delta == readData(buf, (long)delta);
+ } else
+ return seekNT(delta, kSeekCur);
+}
+
+void VDFile::skip(sint64 delta) {
+ if (!delta)
+ return;
+
+ char buf[1024];
+
+ if (delta > 0 && delta <= sizeof buf) {
+ if ((long)delta != readData(buf, (long)delta))
+ throw MyWin32Error("Cannot seek within file \"%ls\": %%s", GetLastError(), mpFilename.get());
+ } else
+ seek(delta, kSeekCur);
+}
+
+sint64 VDFile::size() {
+ union {
+ uint64 siz;
+ DWORD l[2];
+ } u;
+
+ u.l[0] = GetFileSize(mhFile, &u.l[1]);
+
+ DWORD err;
+
+ if (u.l[0] == (DWORD)-1L && (err = GetLastError()) != NO_ERROR)
+ throw MyWin32Error("Cannot retrieve size of file \"%ls\": %%s", GetLastError(), mpFilename.get());
+
+ return (sint64)u.siz;
+}
+
+sint64 VDFile::tell() {
+ return mFilePosition;
+}
+
+bool VDFile::isOpen() {
+ return mhFile != 0;
+}
+
+VDFileHandle VDFile::getRawHandle() {
+ return mhFile;
+}
+
+void *VDFile::AllocUnbuffer(size_t nBytes) {
+ return VirtualAlloc(NULL, nBytes, MEM_COMMIT, PAGE_READWRITE);
+}
+
+void VDFile::FreeUnbuffer(void *p) {
+ VirtualFree(p, 0, MEM_RELEASE);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDFileStream::~VDFileStream() {
+}
+
+const wchar_t *VDFileStream::GetNameForError() {
+ return getFilenameForError();
+}
+
+sint64 VDFileStream::Pos() {
+ return tell();
+}
+
+void VDFileStream::Read(void *buffer, sint32 bytes) {
+ read(buffer, bytes);
+}
+
+sint32 VDFileStream::ReadData(void *buffer, sint32 bytes) {
+ return readData(buffer, bytes);
+}
+
+void VDFileStream::Write(const void *buffer, sint32 bytes) {
+ write(buffer, bytes);
+}
+
+sint64 VDFileStream::Length() {
+ return size();
+}
+
+void VDFileStream::Seek(sint64 offset) {
+ seek(offset);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDMemoryStream::VDMemoryStream(const void *pSrc, uint32 len)
+ : mpSrc((const char *)pSrc)
+ , mPos(0)
+ , mLength(len)
+{
+}
+
+const wchar_t *VDMemoryStream::GetNameForError() {
+ return L"memory stream";
+}
+
+sint64 VDMemoryStream::Pos() {
+ return mPos;
+}
+
+void VDMemoryStream::Read(void *buffer, sint32 bytes) {
+ if (bytes != ReadData(buffer, bytes))
+ throw MyError("Attempt to read beyond stream.");
+}
+
+sint32 VDMemoryStream::ReadData(void *buffer, sint32 bytes) {
+ if (bytes <= 0)
+ return 0;
+
+ if (bytes + mPos > mLength)
+ bytes = mLength - mPos;
+
+ if (bytes > 0) {
+ memcpy(buffer, mpSrc+mPos, bytes);
+ mPos += bytes;
+ }
+
+ return bytes;
+}
+
+void VDMemoryStream::Write(const void *buffer, sint32 bytes) {
+ throw MyError("Memory streams are read-only.");
+}
+
+sint64 VDMemoryStream::Length() {
+ return mLength;
+}
+
+void VDMemoryStream::Seek(sint64 offset) {
+ if (offset < 0 || offset > mLength)
+ throw MyError("Invalid seek position");
+
+ mPos = (uint32)offset;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDBufferedStream::VDBufferedStream(IVDRandomAccessStream *pSrc, uint32 bufferSize)
+ : mpSrc(pSrc)
+ , mBuffer(bufferSize)
+ , mBasePosition(0)
+ , mBufferOffset(0)
+ , mBufferValidSize(0)
+{
+}
+
+VDBufferedStream::~VDBufferedStream() {
+}
+
+const wchar_t *VDBufferedStream::GetNameForError() {
+ return mpSrc->GetNameForError();
+}
+
+sint64 VDBufferedStream::Pos() {
+ return mBasePosition + mBufferOffset;
+}
+
+void VDBufferedStream::Read(void *buffer, sint32 bytes) {
+ if (bytes != ReadData(buffer, bytes))
+ throw MyError("Cannot read %d bytes at location %08llx from %ls", bytes, mBasePosition + mBufferOffset, mpSrc->GetNameForError());
+}
+
+sint32 VDBufferedStream::ReadData(void *buffer, sint32 bytes) {
+ if (bytes <= 0)
+ return 0;
+
+ uint32 actual = 0;
+ for(;;) {
+ uint32 tc = mBufferValidSize - mBufferOffset;
+
+ if (tc > (uint32)bytes)
+ tc = (uint32)bytes;
+
+ if (tc) {
+ if (buffer) {
+ memcpy(buffer, mBuffer.data() + mBufferOffset, tc);
+ buffer = (char *)buffer + tc;
+ }
+
+ mBufferOffset += tc;
+ bytes -= tc;
+ actual += tc;
+
+ if (!bytes)
+ break;
+ }
+
+ // At this point, the buffer is empty.
+ if (mBufferValidSize) {
+ VDASSERT(mBufferOffset >= mBufferValidSize);
+
+ mBasePosition += mBufferValidSize;
+ mBufferOffset = 0;
+ mBufferValidSize = 0;
+ }
+
+ // If the remaining read is large, issue it directly to the underlying stream.
+ if (buffer && (uint32)bytes >= mBuffer.size() * 2) {
+ sint32 localActual = mpSrc->ReadData(buffer, bytes);
+ mBasePosition += localActual;
+ actual += localActual;
+ break;
+ }
+
+ // Refill the buffer.
+ mBufferValidSize = mpSrc->ReadData(mBuffer.data(), mBuffer.size());
+ mBufferOffset = 0;
+ if (!mBufferValidSize)
+ break;
+ }
+
+ return actual;
+}
+
+void VDBufferedStream::Write(const void *buffer, sint32 bytes) {
+ throw MyError("Buffered streams are read-only.");
+}
+
+sint64 VDBufferedStream::Length() {
+ return mpSrc->Length();
+}
+
+void VDBufferedStream::Seek(sint64 offset) {
+ // check if an in-buffer skip is possible
+ sint64 relativeOffset = offset - mBasePosition;
+ if (relativeOffset >= 0 && relativeOffset <= (sint64)mBufferValidSize) {
+ mBufferOffset = (uint32)relativeOffset;
+ return;
+ }
+
+ // flush buffer
+ mBufferOffset = 0;
+ mBufferValidSize = 0;
+
+ // issue seek
+ mpSrc->Seek(offset);
+ mBasePosition = offset;
+}
+
+void VDBufferedStream::Skip(sint64 size) {
+ sint64 targetPos = mBasePosition + mBufferOffset + size;
+ sint64 bufferEnd = mBasePosition + mBufferValidSize;
+
+ // check if we can do a buffered skip
+ if (targetPos >= bufferEnd && targetPos < bufferEnd + (sint64)mBuffer.size()) {
+ Read(NULL, (sint32)size);
+ return;
+ }
+
+ // issue a seek
+ Seek(targetPos);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDTextStream::VDTextStream(IVDStream *pSrc)
+ : mpSrc(pSrc)
+ , mBufferPos(0)
+ , mBufferLimit(0)
+ , mState(kFetchLine)
+ , mFileBuffer(kFileBufferSize)
+{
+}
+
+VDTextStream::~VDTextStream() {
+}
+
+const char *VDTextStream::GetNextLine() {
+ if (!mpSrc)
+ return NULL;
+
+ mLineBuffer.clear();
+
+ for(;;) {
+ if (mBufferPos >= mBufferLimit) {
+ mBufferPos = 0;
+ mBufferLimit = mpSrc->ReadData(mFileBuffer.data(), mFileBuffer.size());
+
+ if (!mBufferLimit) {
+ mpSrc = NULL;
+
+ if (mLineBuffer.empty())
+ return NULL;
+
+ mLineBuffer.push_back(0);
+
+ return mLineBuffer.data();
+ }
+ }
+
+ switch(mState) {
+
+ case kEatNextIfCR:
+ mState = kFetchLine;
+ if (mFileBuffer[mBufferPos] == '\r')
+ ++mBufferPos;
+ continue;
+
+ case kEatNextIfLF:
+ mState = kFetchLine;
+ if (mFileBuffer[mBufferPos] == '\n')
+ ++mBufferPos;
+ continue;
+
+ case kFetchLine:
+ uint32 base = mBufferPos;
+
+ do {
+ const char c = mFileBuffer[mBufferPos++];
+
+ if (c == '\r') {
+ mState = kEatNextIfLF;
+ mLineBuffer.insert(mLineBuffer.end(), mFileBuffer.begin() + base, mFileBuffer.begin() + (mBufferPos-1));
+ mLineBuffer.push_back(0);
+ return mLineBuffer.data();
+ }
+ if (c == '\n') {
+ mState = kEatNextIfCR;
+ mLineBuffer.insert(mLineBuffer.end(), mFileBuffer.begin() + base, mFileBuffer.begin() + (mBufferPos-1));
+ mLineBuffer.push_back(0);
+ return mLineBuffer.data();
+ }
+ } while(mBufferPos < mBufferLimit);
+ mLineBuffer.insert(mLineBuffer.end(), mFileBuffer.begin() + base, mFileBuffer.begin() + mBufferLimit);
+ break;
+ }
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDTextInputFile::VDTextInputFile(const wchar_t *filename, uint32 flags)
+ : mFileStream(filename, flags | nsVDFile::kRead)
+ , mTextStream(&mFileStream)
+{
+}
+
+VDTextInputFile::~VDTextInputFile() {
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDTextOutputStream::VDTextOutputStream(IVDStream *stream)
+ : mpDst(stream)
+ , mLevel(0)
+{
+}
+
+VDTextOutputStream::~VDTextOutputStream() {
+ try {
+ Flush();
+ } catch(const MyError&) {
+ // ignore errors in destructor
+ }
+}
+
+void VDTextOutputStream::Flush() {
+ if (mLevel) {
+ mpDst->Write(mBuf, mLevel);
+ mLevel = 0;
+ }
+}
+
+void VDTextOutputStream::Write(const char *s, int len) {
+ PutData(s, len);
+}
+
+void VDTextOutputStream::PutLine() {
+ PutData("\r\n", 2);
+}
+
+void VDTextOutputStream::PutLine(const char *s) {
+ PutData(s, strlen(s));
+ PutData("\r\n", 2);
+}
+
+void VDTextOutputStream::FormatLine(const char *format, ...) {
+ va_list val;
+
+ va_start(val, format);
+
+ int rv = -1;
+ if (mLevel < kBufSize-4)
+ rv = _vsnprintf(mBuf+mLevel, kBufSize-mLevel, format, val);
+
+ if (rv >= 0)
+ mLevel += rv;
+ else
+ FormatLine2(format, val);
+
+ PutData("\r\n", 2);
+ va_end(val);
+}
+
+void VDTextOutputStream::FormatLine2(const char *format, va_list val) {
+ char buf[3072];
+
+ int rv = _vsnprintf(buf, 3072, format, val);
+ if (rv > 0)
+ PutData(buf, rv);
+}
+
+void VDTextOutputStream::PutData(const char *s, int len) {
+ while(len > 0) {
+ int left = kBufSize - mLevel;
+ if (!left) {
+ mpDst->Write(mBuf, kBufSize);
+ mLevel = 0;
+ left = kBufSize;
+ }
+
+ int tc = len;
+
+ if (tc > left)
+ tc = left;
+
+ memcpy(mBuf + mLevel, s, tc);
+
+ s += tc;
+ len -= tc;
+ mLevel += tc;
+ }
+}
diff --git a/src/thirdparty/VirtualDub/system/source/fileasync.cpp b/src/thirdparty/VirtualDub/system/source/fileasync.cpp
new file mode 100644
index 000000000..18f97afc8
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/fileasync.cpp
@@ -0,0 +1,832 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <windows.h>
+#include <malloc.h>
+#include <vd2/system/error.h>
+#include <vd2/system/file.h>
+#include <vd2/system/fileasync.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/system/VDString.h>
+#include <vd2/system/VDRingBuffer.h>
+#include <vd2/system/w32assist.h>
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VDFileAsync - Windows 9x implementation
+//
+///////////////////////////////////////////////////////////////////////////
+
+class VDFileAsync9x : public IVDFileAsync, protected VDThread {
+public:
+ VDFileAsync9x(bool useFastMode);
+ ~VDFileAsync9x();
+
+ void SetPreemptiveExtend(bool b) { mbPreemptiveExtend = b; }
+ bool IsPreemptiveExtendActive() { return mbPreemptiveExtend; }
+
+ bool IsOpen() { return mhFileSlow != INVALID_HANDLE_VALUE; }
+
+ void Open(const wchar_t *pszFilename, uint32 count, uint32 bufferSize);
+ void Close();
+ void FastWrite(const void *pData, uint32 bytes);
+ void FastWriteEnd();
+ void Write(sint64 pos, const void *pData, uint32 bytes);
+ bool Extend(sint64 pos);
+ void Truncate(sint64 pos);
+ void SafeTruncateAndClose(sint64 pos);
+ sint64 GetSize();
+ sint64 GetFastWritePos() { return mClientFastPointer; }
+
+protected:
+ void WriteZero(sint64 pos, uint32 bytes);
+ void Seek(sint64 pos);
+ bool SeekNT(sint64 pos);
+ void ThrowError();
+ void ThreadRun();
+
+ HANDLE mhFileSlow;
+ HANDLE mhFileFast;
+ uint32 mBlockSize;
+ uint32 mBlockCount;
+ uint32 mSectorSize;
+ sint64 mClientFastPointer;
+
+ const bool mbUseFastMode;
+
+ volatile bool mbPreemptiveExtend;
+
+ enum {
+ kStateNormal,
+ kStateFlush,
+ kStateAbort
+ };
+ VDAtomicInt mState;
+
+ VDSignal mReadOccurred;
+ VDSignal mWriteOccurred;
+
+ VDRingBuffer<char, VDFileUnbufferAllocator<char> > mBuffer;
+
+ VDStringA mFilename;
+ VDAtomicPtr<MyError> mpError;
+};
+
+///////////////////////////////////////////////////////////////////////////
+
+VDFileAsync9x::VDFileAsync9x(bool useFastMode)
+ : mhFileSlow(INVALID_HANDLE_VALUE)
+ , mhFileFast(INVALID_HANDLE_VALUE)
+ , mClientFastPointer(0)
+ , mbUseFastMode(useFastMode)
+ , mbPreemptiveExtend(false)
+ , mpError(NULL)
+{
+}
+
+VDFileAsync9x::~VDFileAsync9x() {
+ Close();
+}
+
+void VDFileAsync9x::Open(const wchar_t *pszFilename, uint32 count, uint32 bufferSize) {
+ try {
+ mFilename = VDTextWToA(pszFilename);
+
+ mhFileSlow = CreateFile(mFilename.c_str(), GENERIC_WRITE, FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_WRITE_THROUGH, NULL);
+ if (mhFileSlow == INVALID_HANDLE_VALUE)
+ throw MyWin32Error("Unable to open file \"%s\" for write: %%s", GetLastError(), mFilename.c_str());
+
+ if (mbUseFastMode)
+ mhFileFast = CreateFile(mFilename.c_str(), GENERIC_WRITE, FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_NO_BUFFERING, NULL);
+
+ mSectorSize = 4096; // guess for now... proper way would be GetVolumeMountPoint() followed by GetDiskFreeSpace().
+
+ mBlockSize = bufferSize;
+ mBlockCount = count;
+ mBuffer.Init(count * bufferSize);
+
+ mState = kStateNormal;
+ } catch(const MyError&) {
+ Close();
+ throw;
+ }
+
+ ThreadStart();
+}
+
+void VDFileAsync9x::Close() {
+ mState = kStateAbort;
+ mWriteOccurred.signal();
+ ThreadWait();
+
+ if (mhFileSlow != INVALID_HANDLE_VALUE) {
+ CloseHandle(mhFileSlow);
+ mhFileSlow = INVALID_HANDLE_VALUE;
+ }
+ if (mhFileFast != INVALID_HANDLE_VALUE) {
+ CloseHandle(mhFileFast);
+ mhFileFast = INVALID_HANDLE_VALUE;
+ }
+}
+
+void VDFileAsync9x::FastWrite(const void *pData, uint32 bytes) {
+ if (mhFileFast == INVALID_HANDLE_VALUE) {
+ if (pData)
+ Write(mClientFastPointer, pData, bytes);
+ else
+ WriteZero(mClientFastPointer, bytes);
+ } else {
+ if (mpError)
+ ThrowError();
+
+ uint32 bytesLeft = bytes;
+ while(bytesLeft) {
+ int actual;
+ void *p = mBuffer.LockWrite(bytesLeft, actual);
+
+ if (!actual) {
+ mReadOccurred.wait();
+ if (mpError)
+ ThrowError();
+ continue;
+ }
+
+ if (pData) {
+ memcpy(p, pData, actual);
+ pData = (const char *)pData + actual;
+ } else {
+ memset(p, 0, actual);
+ }
+ mBuffer.UnlockWrite(actual);
+ mWriteOccurred.signal();
+ bytesLeft -= actual;
+ }
+ }
+
+ mClientFastPointer += bytes;
+}
+
+void VDFileAsync9x::FastWriteEnd() {
+ FastWrite(NULL, mSectorSize - 1);
+
+ mState = kStateFlush;
+ mWriteOccurred.signal();
+ ThreadWait();
+
+ if (mpError)
+ ThrowError();
+}
+
+void VDFileAsync9x::Write(sint64 pos, const void *p, uint32 bytes) {
+ Seek(pos);
+
+ DWORD dwActual;
+ if (!WriteFile(mhFileSlow, p, bytes, &dwActual, NULL) || dwActual != bytes)
+ throw MyWin32Error("Write error occurred on file \"%s\": %%s\n", GetLastError(), mFilename.c_str());
+}
+
+void VDFileAsync9x::WriteZero(sint64 pos, uint32 bytes) {
+ uint32 bufsize = bytes > 2048 ? 2048 : bytes;
+ void *p = _alloca(bufsize);
+ memset(p, 0, bufsize);
+
+ while(bytes > 0) {
+ uint32 tc = bytes > 2048 ? 2048 : bytes;
+
+ Write(pos, p, tc);
+ pos += tc;
+ bytes -= tc;
+ }
+}
+
+bool VDFileAsync9x::Extend(sint64 pos) {
+ return SeekNT(pos) && SetEndOfFile(mhFileSlow);
+}
+
+void VDFileAsync9x::Truncate(sint64 pos) {
+ Seek(pos);
+ if (!SetEndOfFile(mhFileSlow))
+ throw MyWin32Error("I/O error on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+}
+
+void VDFileAsync9x::SafeTruncateAndClose(sint64 pos) {
+ if (mhFileSlow != INVALID_HANDLE_VALUE) {
+ FastWrite(NULL, mSectorSize - 1);
+
+ mState = kStateFlush;
+ mWriteOccurred.signal();
+ ThreadWait();
+
+ Extend(pos);
+ Close();
+ }
+}
+
+sint64 VDFileAsync9x::GetSize() {
+ DWORD dwSizeHigh;
+ DWORD dwSizeLow = GetFileSize(mhFileSlow, &dwSizeHigh);
+
+ if (dwSizeLow == (DWORD)-1 && GetLastError() != NO_ERROR)
+ throw MyWin32Error("I/O error on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+
+ return dwSizeLow + ((sint64)dwSizeHigh << 32);
+}
+
+void VDFileAsync9x::Seek(sint64 pos) {
+ if (!SeekNT(pos))
+ throw MyWin32Error("I/O error on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+}
+
+bool VDFileAsync9x::SeekNT(sint64 pos) {
+ LONG posHi = (LONG)(pos >> 32);
+ DWORD result = SetFilePointer(mhFileSlow, (LONG)pos, &posHi, FILE_BEGIN);
+
+ if (result == INVALID_SET_FILE_POINTER) {
+ DWORD dwError = GetLastError();
+
+ if (dwError != NO_ERROR)
+ return false;
+ }
+
+ return true;
+}
+
+void VDFileAsync9x::ThrowError() {
+ MyError *e = mpError.xchg(NULL);
+
+ if (e) {
+ if (mhFileFast != INVALID_HANDLE_VALUE) {
+ CloseHandle(mhFileFast);
+ mhFileFast = INVALID_HANDLE_VALUE;
+ }
+
+ MyError tmp;
+ tmp.TransferFrom(*e);
+ delete e;
+ throw tmp;
+ }
+}
+
+void VDFileAsync9x::ThreadRun() {
+ bool bPreemptiveExtend = mbPreemptiveExtend;
+ sint64 currentSize;
+ sint64 pos = 0;
+ uint32 bufferSize = mBlockCount * mBlockSize;
+ HANDLE hFile = mhFileFast != INVALID_HANDLE_VALUE ? mhFileFast : mhFileSlow;
+
+ try {
+ if (!VDGetFileSizeW32(hFile, currentSize))
+ throw MyWin32Error("I/O error on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+
+ for(;;) {
+ int state = mState;
+
+ if (state == kStateAbort)
+ break;
+
+ int actual;
+ const void *p = mBuffer.LockRead(mBlockSize, actual);
+
+ if ((uint32)actual < mBlockSize) {
+ if (state == kStateNormal) {
+ mWriteOccurred.wait();
+ continue;
+ }
+
+ VDASSERT(state == kStateFlush);
+
+ actual &= ~(mSectorSize-1);
+ if (!actual)
+ break;
+ } else {
+ if (bPreemptiveExtend) {
+ sint64 checkpt = pos + mBlockSize + bufferSize;
+
+ if (checkpt > currentSize) {
+ currentSize += bufferSize;
+ if (currentSize < checkpt)
+ currentSize = checkpt;
+
+ if (!VDSetFilePointerW32(hFile, currentSize, FILE_BEGIN)
+ || !SetEndOfFile(hFile))
+ mbPreemptiveExtend = bPreemptiveExtend = false;
+
+ if (!VDSetFilePointerW32(hFile, pos, FILE_BEGIN))
+ throw MyWin32Error("Seek error occurred on file \"%s\": %%s\n", GetLastError(), mFilename.c_str());
+ }
+ }
+ }
+
+ DWORD dwActual;
+ if (!WriteFile(hFile, p, actual, &dwActual, NULL) || dwActual != actual) {
+ DWORD dwError = GetLastError();
+ throw MyWin32Error("Write error occurred on file \"%s\": %%s\n", dwError, mFilename.c_str());
+ }
+
+ pos += actual;
+
+ mBuffer.UnlockRead(actual);
+
+ mReadOccurred.signal();
+ }
+ } catch(MyError& e) {
+ MyError *p = new MyError;
+
+ p->TransferFrom(e);
+ delete mpError.xchg(p);
+ mReadOccurred.signal();
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VDFileAsync - Windows NT implementation
+//
+///////////////////////////////////////////////////////////////////////////
+
+struct VDFileAsyncNTBuffer : public OVERLAPPED {
+ bool mbActive;
+ bool mbPending;
+ uint32 mLength;
+
+ VDFileAsyncNTBuffer() : mbActive(false), mbPending(false) { hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); }
+ ~VDFileAsyncNTBuffer() { if (hEvent) CloseHandle(hEvent); }
+};
+
+class VDFileAsyncNT : public IVDFileAsync, private VDThread {
+public:
+ VDFileAsyncNT();
+ ~VDFileAsyncNT();
+
+ void SetPreemptiveExtend(bool b) { mbPreemptiveExtend = b; }
+ bool IsPreemptiveExtendActive() { return mbPreemptiveExtend; }
+
+ bool IsOpen() { return mhFileSlow != INVALID_HANDLE_VALUE; }
+
+ void Open(const wchar_t *pszFilename, uint32 count, uint32 bufferSize);
+ void Close();
+ void FastWrite(const void *pData, uint32 bytes);
+ void FastWriteEnd();
+ void Write(sint64 pos, const void *pData, uint32 bytes);
+ bool Extend(sint64 pos);
+ void Truncate(sint64 pos);
+ void SafeTruncateAndClose(sint64 pos);
+ sint64 GetSize();
+ sint64 GetFastWritePos() { return mClientFastPointer; }
+
+protected:
+ void WriteZero(sint64 pos, uint32 bytes);
+ void Seek(sint64 pos);
+ bool SeekNT(sint64 pos);
+ void ThrowError();
+ void ThreadRun();
+
+ HANDLE mhFileSlow;
+ HANDLE mhFileFast;
+ uint32 mBlockSize;
+ uint32 mBlockCount;
+ uint32 mBufferSize;
+ uint32 mSectorSize;
+
+ enum {
+ kStateNormal,
+ kStateFlush,
+ kStateAbort
+ };
+ VDAtomicInt mState;
+
+ VDSignal mReadOccurred;
+ VDSignal mWriteOccurred;
+
+ uint32 mWriteOffset;
+ VDAtomicInt mBufferLevel;
+ sint64 mClientFastPointer;
+ sint64 mFastPointer;
+
+ volatile bool mbPreemptiveExtend;
+
+ vdautoarrayptr<VDFileAsyncNTBuffer> mpBlocks;
+
+ vdblock<char, VDFileUnbufferAllocator<char> > mBuffer;
+
+ VDAtomicPtr<MyError> mpError;
+ VDStringA mFilename;
+};
+
+VDFileAsyncNT::VDFileAsyncNT()
+ : mhFileSlow(INVALID_HANDLE_VALUE)
+ , mhFileFast(INVALID_HANDLE_VALUE)
+ , mFastPointer(0)
+ , mClientFastPointer(0)
+ , mbPreemptiveExtend(false)
+ , mpError(NULL)
+{
+}
+
+VDFileAsyncNT::~VDFileAsyncNT() {
+ Close();
+}
+
+void VDFileAsyncNT::Open(const wchar_t *pszFilename, uint32 count, uint32 bufferSize) {
+ try {
+ mFilename = VDTextWToA(pszFilename);
+
+ mhFileSlow = CreateFileW(pszFilename, GENERIC_WRITE, FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+ if (mhFileSlow == INVALID_HANDLE_VALUE)
+ throw MyWin32Error("Unable to open file \"%s\" for write: %%s", GetLastError(), mFilename.c_str());
+
+ mhFileFast = CreateFileW(pszFilename, GENERIC_WRITE, FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_NO_BUFFERING | FILE_FLAG_OVERLAPPED, NULL);
+ if (mhFileFast == INVALID_HANDLE_VALUE)
+ mhFileFast = CreateFileW(pszFilename, GENERIC_WRITE, FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_WRITE_THROUGH | FILE_FLAG_OVERLAPPED, NULL);
+
+ mSectorSize = 4096; // guess for now... proper way would be GetVolumeMountPoint() followed by GetDiskFreeSpace().
+
+ mBlockSize = bufferSize;
+ mBlockCount = count;
+ mBufferSize = mBlockSize * mBlockCount;
+
+ mWriteOffset = 0;
+ mBufferLevel = 0;
+
+ mState = kStateNormal;
+
+ if (mhFileFast != INVALID_HANDLE_VALUE) {
+ mpBlocks = new VDFileAsyncNTBuffer[count];
+ mBuffer.resize(count * bufferSize);
+ ThreadStart();
+ }
+ } catch(const MyError&) {
+ Close();
+ throw;
+ }
+}
+
+void VDFileAsyncNT::Close() {
+ mState = kStateAbort;
+ mWriteOccurred.signal();
+ ThreadWait();
+
+ if (mpError) {
+ delete mpError;
+ mpError = NULL;
+ }
+
+ if (mhFileSlow != INVALID_HANDLE_VALUE) {
+ CloseHandle(mhFileSlow);
+ mhFileSlow = INVALID_HANDLE_VALUE;
+ }
+ if (mhFileFast != INVALID_HANDLE_VALUE) {
+ CloseHandle(mhFileFast);
+ mhFileFast = INVALID_HANDLE_VALUE;
+ }
+
+ mpBlocks = NULL;
+}
+
+void VDFileAsyncNT::FastWrite(const void *pData, uint32 bytes) {
+ if (mhFileFast == INVALID_HANDLE_VALUE) {
+ if (pData)
+ Write(mClientFastPointer, pData, bytes);
+ else
+ WriteZero(mClientFastPointer, bytes);
+ } else {
+ if (mpError)
+ ThrowError();
+
+ uint32 bytesLeft = bytes;
+ while(bytesLeft) {
+ uint32 actual = mBufferSize - mBufferLevel;
+
+ if (actual > bytesLeft)
+ actual = bytesLeft;
+
+ if (mWriteOffset + actual > mBufferSize)
+ actual = mBufferSize - mWriteOffset;
+
+ if (!actual) {
+ mReadOccurred.wait();
+ if (mpError)
+ ThrowError();
+ continue;
+ }
+
+ if (pData) {
+ memcpy(&mBuffer[mWriteOffset], pData, actual);
+ pData = (const char *)pData + actual;
+ } else {
+ memset(&mBuffer[mWriteOffset], 0, actual);
+ }
+
+ uint32 oldWriteOffset = mWriteOffset;
+ mWriteOffset += actual;
+ if (mWriteOffset >= mBufferSize)
+ mWriteOffset = 0;
+ mBufferLevel += actual;
+
+ // only bother signaling if the write offset crossed a block boundary
+ if (oldWriteOffset % mBlockSize + actual >= mBlockSize) {
+ mWriteOccurred.signal();
+ if (mpError)
+ ThrowError();
+ }
+
+ bytesLeft -= actual;
+ }
+ }
+
+ mClientFastPointer += bytes;
+}
+
+void VDFileAsyncNT::FastWriteEnd() {
+ FastWrite(NULL, mSectorSize - 1);
+ mState = kStateFlush;
+ mWriteOccurred.signal();
+ ThreadWait();
+ if (mpError)
+ ThrowError();
+}
+
+void VDFileAsyncNT::Write(sint64 pos, const void *p, uint32 bytes) {
+ Seek(pos);
+
+ DWORD dwActual;
+ if (!WriteFile(mhFileSlow, p, bytes, &dwActual, NULL) || dwActual != bytes)
+ throw MyWin32Error("Write error occurred on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+}
+
+void VDFileAsyncNT::WriteZero(sint64 pos, uint32 bytes) {
+ uint32 bufsize = bytes > 2048 ? 2048 : bytes;
+ void *p = _alloca(bufsize);
+ memset(p, 0, bufsize);
+
+ while(bytes > 0) {
+ uint32 tc = bytes > 2048 ? 2048 : bytes;
+
+ Write(pos, p, tc);
+ pos += tc;
+ bytes -= tc;
+ }
+}
+
+bool VDFileAsyncNT::Extend(sint64 pos) {
+ return SeekNT(pos) && SetEndOfFile(mhFileSlow);
+}
+
+void VDFileAsyncNT::Truncate(sint64 pos) {
+ Seek(pos);
+ if (!SetEndOfFile(mhFileSlow))
+ throw MyWin32Error("I/O error on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+}
+
+void VDFileAsyncNT::SafeTruncateAndClose(sint64 pos) {
+ if (isThreadAttached()) {
+ mState = kStateAbort;
+ mWriteOccurred.signal();
+ ThreadWait();
+
+ if (mpError) {
+ delete mpError;
+ mpError = NULL;
+ }
+ }
+
+ if (mhFileSlow != INVALID_HANDLE_VALUE) {
+ Extend(pos);
+ Close();
+ }
+}
+
+sint64 VDFileAsyncNT::GetSize() {
+ DWORD dwSizeHigh;
+ DWORD dwSizeLow = GetFileSize(mhFileSlow, &dwSizeHigh);
+
+ if (dwSizeLow == (DWORD)-1 && GetLastError() != NO_ERROR)
+ throw MyWin32Error("I/O error on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+
+ return dwSizeLow + ((sint64)dwSizeHigh << 32);
+}
+
+void VDFileAsyncNT::Seek(sint64 pos) {
+ if (!SeekNT(pos))
+ throw MyWin32Error("I/O error on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+}
+
+bool VDFileAsyncNT::SeekNT(sint64 pos) {
+ LONG posHi = (LONG)(pos >> 32);
+ DWORD result = SetFilePointer(mhFileSlow, (LONG)pos, &posHi, FILE_BEGIN);
+
+ if (result == INVALID_SET_FILE_POINTER) {
+ DWORD dwError = GetLastError();
+
+ if (dwError != NO_ERROR)
+ return false;
+ }
+
+ return true;
+}
+
+void VDFileAsyncNT::ThrowError() {
+ MyError *e = mpError.xchg(NULL);
+
+ if (e) {
+ if (mhFileFast != INVALID_HANDLE_VALUE) {
+ CloseHandle(mhFileFast);
+ mhFileFast = INVALID_HANDLE_VALUE;
+ }
+
+ MyError tmp;
+ tmp.TransferFrom(*e);
+ delete e;
+ throw tmp;
+ }
+}
+
+void VDFileAsyncNT::ThreadRun() {
+ int requestHead = 0;
+ int requestTail = 0;
+ int requestCount = mBlockCount;
+ uint32 pendingLevel = 0;
+ uint32 readOffset = 0;
+ bool bPreemptiveExtend = mbPreemptiveExtend;
+ sint64 currentSize;
+
+ try {
+ if (!VDGetFileSizeW32(mhFileFast, currentSize))
+ throw MyWin32Error("I/O error on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+
+ for(;;) {
+ int state = mState;
+
+ if (state == kStateAbort) {
+ typedef BOOL (WINAPI *tpCancelIo)(HANDLE);
+ static const tpCancelIo pCancelIo = (tpCancelIo)GetProcAddress(GetModuleHandle("kernel32"), "CancelIo");
+ pCancelIo(mhFileFast);
+ break;
+ }
+
+ uint32 actual = mBufferLevel - pendingLevel;
+ VDASSERT((int)actual >= 0);
+ if (readOffset + actual > mBufferSize)
+ actual = mBufferSize - readOffset;
+
+ if (actual < mBlockSize) {
+ if (state == kStateNormal || actual < mSectorSize) {
+ // check for blocks that have completed
+ bool blocksCompleted = false;
+ for(;;) {
+ VDFileAsyncNTBuffer& buf = mpBlocks[requestTail];
+
+ if (!buf.mbActive) {
+ if (state == kStateFlush)
+ goto all_done;
+
+ if (!blocksCompleted) {
+ // wait for further writes
+ mWriteOccurred.wait();
+ }
+ break;
+ }
+
+ if (buf.mbPending) {
+ HANDLE h[2] = {buf.hEvent, mWriteOccurred.getHandle()};
+ DWORD waitResult = WaitForMultipleObjects(2, h, FALSE, INFINITE);
+
+ if (waitResult == WAIT_OBJECT_0+1) // write pending
+ break;
+
+ DWORD dwActual;
+ if (!GetOverlappedResult(mhFileFast, &buf, &dwActual, TRUE))
+ throw MyWin32Error("Write error occurred on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+ }
+
+ buf.mbActive = false;
+
+ blocksCompleted = true;
+
+ if (++requestTail >= requestCount)
+ requestTail = 0;
+
+ mBufferLevel -= buf.mLength;
+ pendingLevel -= buf.mLength;
+ VDASSERT((int)mBufferLevel >= 0);
+ VDASSERT((int)pendingLevel >= 0);
+
+ mReadOccurred.signal();
+
+ }
+
+ if (state == kStateNormal)
+ continue;
+ }
+
+ VDASSERT(state == kStateFlush);
+
+ actual &= ~(mSectorSize-1);
+
+ VDASSERT(actual > 0);
+ } else {
+ actual = mBlockSize;
+
+ if (bPreemptiveExtend) {
+ sint64 checkpt = mFastPointer + mBlockSize + mBufferSize;
+
+ if (checkpt > currentSize) {
+ currentSize += mBufferSize;
+ if (currentSize < checkpt)
+ currentSize = checkpt;
+
+ if (!VDSetFilePointerW32(mhFileFast, currentSize, FILE_BEGIN)
+ || !SetEndOfFile(mhFileFast))
+ mbPreemptiveExtend = bPreemptiveExtend = false;
+ }
+ }
+ }
+
+ // Issue a write to OS
+ VDFileAsyncNTBuffer& buf = mpBlocks[requestHead];
+
+ VDASSERT(!buf.mbActive);
+
+ DWORD dwActual;
+
+ buf.Offset = (DWORD)mFastPointer;
+ buf.OffsetHigh = (DWORD)((uint64)mFastPointer >> 32);
+ buf.Internal = 0;
+ buf.InternalHigh = 0;
+ buf.mLength = actual;
+ buf.mbPending = false;
+
+ if (!WriteFile(mhFileFast, &mBuffer[readOffset], actual, &dwActual, &buf)) {
+ if (GetLastError() != ERROR_IO_PENDING)
+ throw MyWin32Error("Write error occurred on file \"%s\": %%s", GetLastError(), mFilename.c_str());
+
+ buf.mbPending = true;
+ }
+
+ buf.mbActive = true;
+
+ pendingLevel += actual;
+ VDASSERT(pendingLevel <= (uint32)mBufferLevel);
+
+ readOffset += actual;
+ VDASSERT(readOffset <= mBufferSize);
+ if (readOffset >= mBufferSize)
+ readOffset = 0;
+
+ mFastPointer += actual;
+
+ if (++requestHead >= requestCount)
+ requestHead = 0;
+ }
+all_done:
+ ;
+
+ } catch(MyError& e) {
+ MyError *p = new MyError;
+
+ p->TransferFrom(e);
+ delete mpError.xchg(p);
+ mReadOccurred.signal();
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+IVDFileAsync *VDCreateFileAsync(IVDFileAsync::Mode mode) {
+ switch(mode) {
+
+ case IVDFileAsync::kModeAsynchronous:
+ if (VDIsWindowsNT())
+ return new VDFileAsyncNT;
+ // Can't do async I/O. Fall-through to 9x method.
+ case IVDFileAsync::kModeThreaded:
+ return new VDFileAsync9x(true);
+
+ default:
+ return new VDFileAsync9x(false);
+ }
+}
diff --git a/src/thirdparty/VirtualDub/system/source/filesys.cpp b/src/thirdparty/VirtualDub/system/source/filesys.cpp
new file mode 100644
index 000000000..a85c0f5c7
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/filesys.cpp
@@ -0,0 +1,663 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <ctype.h>
+#include <string.h>
+
+#include <vd2/system/VDString.h>
+#include <vd2/system/filesys.h>
+#include <vd2/system/Error.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/system/w32assist.h>
+
+///////////////////////////////////////////////////////////////////////////
+
+template<class T, class U>
+static inline T splitimpL(const T& string, const U *s) {
+ const U *p = string.c_str();
+ return T(p, s - p);
+}
+
+template<class T, class U>
+static inline T splitimpR(const T& string, const U *s) {
+ const U *p = string.c_str();
+ return T(s);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+const char *VDFileSplitFirstDir(const char *s) {
+ const char *start = s;
+
+ while(*s++)
+ if (s[-1] == ':' || s[-1] == '\\' || s[-1] == '/')
+ return s;
+
+ return start;
+}
+
+const wchar_t *VDFileSplitFirstDir(const wchar_t *s) {
+ const wchar_t *start = s;
+
+ while(*s++)
+ if (s[-1] == L':' || s[-1] == L'\\' || s[-1] == L'/')
+ return s;
+
+ return start;
+}
+
+const char *VDFileSplitPath(const char *s) {
+ const char *lastsep = s;
+
+ while(*s++)
+ if (s[-1] == ':' || s[-1] == '\\' || s[-1] == '/')
+ lastsep = s;
+
+ return lastsep;
+}
+
+const wchar_t *VDFileSplitPath(const wchar_t *s) {
+ const wchar_t *lastsep = s;
+
+ while(*s++)
+ if (s[-1] == L':' || s[-1] == L'\\' || s[-1] == L'/')
+ lastsep = s;
+
+ return lastsep;
+}
+
+VDString VDFileSplitPathLeft (const VDString& s) { return splitimpL(s, VDFileSplitPath(s.c_str())); }
+VDStringW VDFileSplitPathLeft (const VDStringW& s) { return splitimpL(s, VDFileSplitPath(s.c_str())); }
+VDString VDFileSplitPathRight(const VDString& s) { return splitimpR(s, VDFileSplitPath(s.c_str())); }
+VDStringW VDFileSplitPathRight(const VDStringW& s) { return splitimpR(s, VDFileSplitPath(s.c_str())); }
+
+const char *VDFileSplitRoot(const char *s) {
+ // Test for a UNC path.
+ if (s[0] == '\\' && s[1] == '\\') {
+ // For these, we scan for the fourth backslash.
+ s += 2;
+ for(int i=0; i<2; ++i) {
+ while(*s && *s != '\\')
+ ++s;
+ if (*s == '\\')
+ ++s;
+ }
+ return s;
+ }
+
+ const char *const t = s;
+
+ while(*s && *s != ':' && *s != '/' && *s != '\\')
+ ++s;
+
+ return *s ? *s == ':' && (s[1]=='/' || s[1]=='\\') ? s+2 : s+1 : t;
+}
+
+const wchar_t *VDFileSplitRoot(const wchar_t *s) {
+ // Test for a UNC path.
+ if (s[0] == '\\' && s[1] == '\\') {
+ // For these, we scan for the fourth backslash.
+ s += 2;
+ for(int i=0; i<2; ++i) {
+ while(*s && *s != '\\')
+ ++s;
+ if (*s == '\\')
+ ++s;
+ }
+ return s;
+ }
+
+ const wchar_t *const t = s;
+
+ while(*s && *s != L':' && *s != L'/' && *s != L'\\')
+ ++s;
+
+ return *s ? *s == L':' && (s[1]==L'/' || s[1]==L'\\') ? s+2 : s+1 : t;
+}
+
+VDString VDFileSplitRoot(const VDString& s) { return splitimpL(s, VDFileSplitRoot(s.c_str())); }
+VDStringW VDFileSplitRoot(const VDStringW& s) { return splitimpL(s, VDFileSplitRoot(s.c_str())); }
+
+const char *VDFileSplitExt(const char *s) {
+ const char *t = s;
+
+ while(*t)
+ ++t;
+
+ const char *const end = t;
+
+ while(t>s) {
+ --t;
+
+ if (*t == '.')
+ return t;
+
+ if (*t == ':' || *t == '\\' || *t == '/')
+ break;
+ }
+
+ return NULL;
+}
+
+const wchar_t *VDFileSplitExt(const wchar_t *s) {
+ const wchar_t *t = s;
+
+ while(*t)
+ ++t;
+
+ const wchar_t *const end = t;
+
+ while(t>s) {
+ --t;
+
+ if (*t == L'.')
+ return t;
+
+ if (*t == L':' || *t == L'\\' || *t == L'/')
+ break;
+ }
+
+ return end;
+}
+
+VDString VDFileSplitExtLeft (const VDString& s) { return splitimpL(s, VDFileSplitExt(s.c_str())); }
+VDStringW VDFileSplitExtLeft (const VDStringW& s) { return splitimpL(s, VDFileSplitExt(s.c_str())); }
+VDString VDFileSplitExtRight(const VDString& s) { return splitimpR(s, VDFileSplitExt(s.c_str())); }
+VDStringW VDFileSplitExtRight(const VDStringW& s) { return splitimpR(s, VDFileSplitExt(s.c_str())); }
+
+/////////////////////////////////////////////////////////////////////////////
+
+bool VDFileWildMatch(const char *pattern, const char *path) {
+ // What we do here is split the string into segments that are bracketed
+ // by sequences of asterisks. The trick is that the first match for a
+ // segment as the best possible match, so we can continue. So we just
+ // take each segment at a time and walk it forward until we find the
+ // first match or we fail.
+ //
+ // Time complexity is O(NM), where N=length of string and M=length of
+ // the pattern. In practice, it's rather fast.
+
+ bool star = false;
+ int i = 0;
+ for(;;) {
+ char c = (char)tolower((unsigned char)pattern[i]);
+ if (c == '*') {
+ star = true;
+ pattern += i+1;
+ if (!*pattern)
+ return true;
+ path += i;
+ i = 0;
+ continue;
+ }
+
+ char d = (char)tolower((unsigned char)path[i]);
+ ++i;
+
+ if (c == '?') { // ? matches any character but null.
+ if (!d)
+ return false;
+ } else if (c != d) { // Literal character must match itself.
+ // If we're at the end of the string or there is no
+ // previous asterisk (anchored search), there's no other
+ // match to find.
+ if (!star || !d || !i)
+ return false;
+
+ // Restart segment search at next position in path.
+ ++path;
+ i = 0;
+ continue;
+ }
+
+ if (!c)
+ return true;
+ }
+}
+
+bool VDFileWildMatch(const wchar_t *pattern, const wchar_t *path) {
+ // What we do here is split the string into segments that are bracketed
+ // by sequences of asterisks. The trick is that the first match for a
+ // segment as the best possible match, so we can continue. So we just
+ // take each segment at a time and walk it forward until we find the
+ // first match or we fail.
+ //
+ // Time complexity is O(NM), where N=length of string and M=length of
+ // the pattern. In practice, it's rather fast.
+
+ bool star = false;
+ int i = 0;
+ for(;;) {
+ wchar_t c = towlower(pattern[i]);
+ if (c == L'*') {
+ star = true;
+ pattern += i+1;
+ if (!*pattern)
+ return true;
+ path += i;
+ i = 0;
+ continue;
+ }
+
+ wchar_t d = towlower(path[i]);
+ ++i;
+
+ if (c == L'?') { // ? matches any character but null.
+ if (!d)
+ return false;
+ } else if (c != d) { // Literal character must match itself.
+ // If we're at the end of the string or there is no
+ // previous asterisk (anchored search), there's no other
+ // match to find.
+ if (!star || !d || !i)
+ return false;
+
+ // Restart segment search at next position in path.
+ ++path;
+ i = 0;
+ continue;
+ }
+
+ if (!c)
+ return true;
+ }
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
+#include <windows.h>
+#include <vd2/system/w32assist.h>
+
+sint64 VDGetDiskFreeSpace(const wchar_t *path) {
+ typedef BOOL (WINAPI *tpGetDiskFreeSpaceExA)(LPCSTR lpDirectoryName, PULARGE_INTEGER lpFreeBytesAvailable, PULARGE_INTEGER lpTotalNumberOfBytes, PULARGE_INTEGER lpTotalNumberOfFreeBytes);
+ typedef BOOL (WINAPI *tpGetDiskFreeSpaceExW)(LPCWSTR lpDirectoryName, PULARGE_INTEGER lpFreeBytesAvailable, PULARGE_INTEGER lpTotalNumberOfBytes, PULARGE_INTEGER lpTotalNumberOfFreeBytes);
+
+ static bool sbChecked = false;
+ static tpGetDiskFreeSpaceExA spGetDiskFreeSpaceExA;
+ static tpGetDiskFreeSpaceExW spGetDiskFreeSpaceExW;
+
+ if (!sbChecked) {
+ HMODULE hmodKernel = GetModuleHandle("kernel32.dll");
+ spGetDiskFreeSpaceExA = (tpGetDiskFreeSpaceExA)GetProcAddress(hmodKernel, "GetDiskFreeSpaceExA");
+ spGetDiskFreeSpaceExW = (tpGetDiskFreeSpaceExW)GetProcAddress(hmodKernel, "GetDiskFreeSpaceExW");
+
+ sbChecked = true;
+ }
+
+ if (spGetDiskFreeSpaceExA) {
+ BOOL success;
+ uint64 freeClient, totalBytes, totalFreeBytes;
+ VDStringW directoryName(path);
+
+ if (!directoryName.empty()) {
+ wchar_t c = directoryName[directoryName.length()-1];
+
+ if (c != L'/' && c != L'\\')
+ directoryName += L'\\';
+ }
+
+ if ((LONG)GetVersion() < 0)
+ success = spGetDiskFreeSpaceExA(VDTextWToA(directoryName).c_str(), (PULARGE_INTEGER)&freeClient, (PULARGE_INTEGER)&totalBytes, (PULARGE_INTEGER)&totalFreeBytes);
+ else
+ success = spGetDiskFreeSpaceExW(directoryName.c_str(), (PULARGE_INTEGER)&freeClient, (PULARGE_INTEGER)&totalBytes, (PULARGE_INTEGER)&totalFreeBytes);
+
+ return success ? (sint64)freeClient : -1;
+ } else {
+ DWORD sectorsPerCluster, bytesPerSector, freeClusters, totalClusters;
+ BOOL success;
+
+ VDStringW rootPath(VDFileGetRootPath(path));
+
+ if ((LONG)GetVersion() < 0)
+ success = GetDiskFreeSpaceA(rootPath.empty() ? NULL : VDTextWToA(rootPath).c_str(), &sectorsPerCluster, &bytesPerSector, &freeClusters, &totalClusters);
+ else
+ success = GetDiskFreeSpaceW(rootPath.empty() ? NULL : rootPath.c_str(), &sectorsPerCluster, &bytesPerSector, &freeClusters, &totalClusters);
+
+ return success ? (sint64)((uint64)sectorsPerCluster * bytesPerSector * freeClusters) : -1;
+ }
+}
+
+bool VDDoesPathExist(const wchar_t *fileName) {
+ bool bExists;
+
+ if (!(GetVersion() & 0x80000000)) {
+ bExists = ((DWORD)-1 != GetFileAttributesW(fileName));
+ } else {
+ bExists = ((DWORD)-1 != GetFileAttributesA(VDTextWToA(fileName).c_str()));
+ }
+
+ return bExists;
+}
+
+void VDCreateDirectory(const wchar_t *path) {
+ // can't create dir with trailing slash
+ VDStringW::size_type l(wcslen(path));
+
+ if (l) {
+ const wchar_t c = path[l-1];
+
+ if (c == L'/' || c == L'\\') {
+ VDCreateDirectory(VDStringW(path, l-1).c_str());
+ return;
+ }
+ }
+
+ BOOL succeeded;
+
+ if (!(GetVersion() & 0x80000000)) {
+ succeeded = CreateDirectoryW(path, NULL);
+ } else {
+ succeeded = CreateDirectoryA(VDTextWToA(path).c_str(), NULL);
+ }
+
+ if (!succeeded)
+ throw MyWin32Error("Cannot create directory: %%s", GetLastError());
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+bool VDDeletePathAutodetect(const wchar_t *path);
+bool (*VDRemoveFile)(const wchar_t *path) = VDDeletePathAutodetect;
+
+namespace {
+ typedef BOOL (APIENTRY *tpDeleteFileW)(LPCWSTR path);
+ tpDeleteFileW spDeleteFileW;
+}
+
+bool VDDeleteFile9x(const wchar_t *path) {
+ return !!DeleteFileA(VDTextWToA(path).c_str());
+}
+
+bool VDDeleteFileNT(const wchar_t *path) {
+ return !!spDeleteFileW(path);
+}
+
+bool VDDeletePathAutodetect(const wchar_t *path) {
+ if (VDIsWindowsNT()) {
+ spDeleteFileW = (tpDeleteFileW)GetProcAddress(GetModuleHandle("kernel32"), "DeleteFileW");
+ VDRemoveFile = VDDeleteFileNT;
+ } else
+ VDRemoveFile = VDDeleteFile9x;
+
+ return VDRemoveFile(path);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+namespace {
+ typedef BOOL (WINAPI *tpGetVolumePathNameW)(LPCWSTR lpszPathName, LPWSTR lpszVolumePathName, DWORD cchBufferLength);
+ typedef BOOL (WINAPI *tpGetFullPathNameW)(LPCWSTR lpFileName, DWORD nBufferLength, LPWSTR lpBuffer, LPWSTR *lpFilePart);
+}
+
+uint64 VDFileGetLastWriteTime(const wchar_t *path) {
+ if (VDIsWindowsNT()) {
+ WIN32_FIND_DATAW fdw;
+ HANDLE h = FindFirstFileW(path, &fdw);
+ if (h == INVALID_HANDLE_VALUE)
+ return 0;
+
+ FindClose(h);
+
+ return ((uint64)fdw.ftLastWriteTime.dwHighDateTime << 32) + fdw.ftLastWriteTime.dwLowDateTime;
+ } else {
+ WIN32_FIND_DATAA fda;
+ HANDLE h = FindFirstFileA(VDTextWToA(path).c_str(), &fda);
+ if (h == INVALID_HANDLE_VALUE)
+ return 0;
+
+ FindClose(h);
+
+ return ((uint64)fda.ftLastWriteTime.dwHighDateTime << 32) + fda.ftLastWriteTime.dwLowDateTime;
+ }
+}
+
+VDStringW VDFileGetRootPath(const wchar_t *path) {
+ static tpGetVolumePathNameW spGetVolumePathNameW = (tpGetVolumePathNameW)GetProcAddress(GetModuleHandle("kernel32.dll"), "GetVolumePathNameW");
+ static tpGetFullPathNameW spGetFullPathNameW = (tpGetFullPathNameW)GetProcAddress(GetModuleHandle("kernel32.dll"), "GetFullPathNameW");
+
+ VDStringW fullPath(VDGetFullPath(path));
+
+ // Windows 2000/XP path
+ if (spGetVolumePathNameW) {
+ vdblock<wchar_t> buf(std::max<size_t>(fullPath.size() + 1, MAX_PATH));
+
+ if (spGetVolumePathNameW(path, buf.data(), buf.size()))
+ return VDStringW(buf.data());
+ }
+
+ // Windows 95/98/ME/NT4 path
+ const wchar_t *s = fullPath.c_str();
+ VDStringW root(s, VDFileSplitRoot(s) - s);
+ VDFileFixDirPath(root);
+ return root;
+}
+
+VDStringW VDGetFullPath(const wchar_t *partialPath) {
+ static tpGetFullPathNameW spGetFullPathNameW = (tpGetFullPathNameW)GetProcAddress(GetModuleHandle("kernel32.dll"), "GetFullPathNameW");
+
+ union {
+ char a[MAX_PATH];
+ wchar_t w[MAX_PATH];
+ } tmpBuf;
+
+ if (spGetFullPathNameW && !(GetVersion() & 0x80000000)) {
+ LPWSTR p;
+
+ tmpBuf.w[0] = 0;
+ DWORD count = spGetFullPathNameW(partialPath, MAX_PATH, tmpBuf.w, &p);
+
+ if (count < MAX_PATH)
+ return VDStringW(tmpBuf.w);
+
+ VDStringW tmp(count);
+
+ DWORD newCount = spGetFullPathNameW(partialPath, count, (wchar_t *)tmp.data(), &p);
+ if (newCount < count)
+ return tmp;
+
+ return VDStringW(partialPath);
+ } else {
+ LPSTR p;
+ VDStringA pathA(VDTextWToA(partialPath));
+
+ tmpBuf.a[0] = 0;
+ DWORD count = GetFullPathNameA(pathA.c_str(), MAX_PATH, tmpBuf.a, &p);
+
+ if (count < MAX_PATH)
+ return VDStringW(VDTextAToW(tmpBuf.a));
+
+ VDStringA tmpA(count);
+
+ DWORD newCount = GetFullPathNameA(pathA.c_str(), count, (char *)tmpA.data(), &p);
+ if (newCount < count)
+ return VDTextAToW(tmpA);
+
+ return VDStringW(partialPath);
+ }
+}
+
+VDStringW VDMakePath(const wchar_t *base, const wchar_t *file) {
+ if (!*base)
+ return VDStringW(file);
+
+ VDStringW result(base);
+
+ const wchar_t c = result[result.size() - 1];
+
+ if (c != L'/' && c != L'\\' && c != L':')
+ result += L'\\';
+
+ result.append(file);
+
+ return result;
+}
+
+void VDFileFixDirPath(VDStringW& path) {
+ if (!path.empty()) {
+ wchar_t c = path[path.size()-1];
+
+ if (c != L'/' && c != L'\\' && c != L':')
+ path += L'\\';
+ }
+}
+
+namespace {
+ VDStringW VDGetModulePathW32(HINSTANCE hInst) {
+ union {
+ wchar_t w[MAX_PATH];
+ char a[MAX_PATH];
+ } buf;
+
+ VDStringW wstr;
+
+ if (VDIsWindowsNT()) {
+ wcscpy(buf.w, L".");
+ if (GetModuleFileNameW(hInst, buf.w, MAX_PATH))
+ *VDFileSplitPath(buf.w) = 0;
+ wstr = buf.w;
+ } else {
+ strcpy(buf.a, ".");
+ if (GetModuleFileNameA(hInst, buf.a, MAX_PATH))
+ *VDFileSplitPath(buf.a) = 0;
+ wstr = VDTextAToW(buf.a, -1);
+ }
+
+ VDStringW wstr2(VDGetFullPath(wstr.c_str()));
+
+ return wstr2;
+ }
+}
+
+VDStringW VDGetLocalModulePath() {
+ return VDGetModulePathW32(VDGetLocalModuleHandleW32());
+}
+
+VDStringW VDGetProgramPath() {
+ return VDGetModulePathW32(NULL);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDDirectoryIterator::VDDirectoryIterator(const wchar_t *path)
+ : mSearchPath(path)
+ , mpHandle(NULL)
+ , mbSearchComplete(false)
+{
+ mBasePath = VDFileSplitPathLeft(mSearchPath);
+ VDFileFixDirPath(mBasePath);
+}
+
+VDDirectoryIterator::~VDDirectoryIterator() {
+ if (mpHandle)
+ FindClose((HANDLE)mpHandle);
+}
+
+bool VDDirectoryIterator::Next() {
+ if (mbSearchComplete)
+ return false;
+
+ union {
+ WIN32_FIND_DATAA a;
+ WIN32_FIND_DATAW w;
+ } wfd;
+
+ if (GetVersion() & 0x80000000) {
+ if (mpHandle)
+ mbSearchComplete = !FindNextFileA((HANDLE)mpHandle, &wfd.a);
+ else {
+ mpHandle = FindFirstFileA(VDTextWToA(mSearchPath).c_str(), &wfd.a);
+ mbSearchComplete = (INVALID_HANDLE_VALUE == mpHandle);
+ }
+ if (mbSearchComplete)
+ return false;
+
+ mbDirectory = (wfd.a.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0;
+ mFilename = VDTextAToW(wfd.a.cFileName);
+ mFileSize = wfd.a.nFileSizeLow + ((sint64)wfd.w.nFileSizeHigh << 32);
+ } else {
+ if (mpHandle)
+ mbSearchComplete = !FindNextFileW((HANDLE)mpHandle, &wfd.w);
+ else {
+ mpHandle = FindFirstFileW(mSearchPath.c_str(), &wfd.w);
+ mbSearchComplete = (INVALID_HANDLE_VALUE == mpHandle);
+ }
+ if (mbSearchComplete)
+ return false;
+
+ mbDirectory = (wfd.w.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0;
+ mFilename = wfd.w.cFileName;
+ mFileSize = wfd.w.nFileSizeLow + ((sint64)wfd.w.nFileSizeHigh << 32);
+ }
+
+ return true;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+#ifdef _DEBUG
+
+struct VDSystemFilesysTestObject {
+ VDSystemFilesysTestObject() {
+#define TEST(fn, x, y1, y2) VDASSERT(!strcmp(fn(x), y2)); VDASSERT(!wcscmp(fn(L##x), L##y2)); VDASSERT(fn##Left(VDStringA(x))==y1); VDASSERT(fn##Right(VDStringA(x))==y2); VDASSERT(fn##Left(VDStringW(L##x))==L##y1); VDASSERT(fn##Right(VDStringW(L##x))==L##y2)
+ TEST(VDFileSplitPath, "", "", "");
+ TEST(VDFileSplitPath, "x", "", "x");
+ TEST(VDFileSplitPath, "x\\y", "x\\", "y");
+ TEST(VDFileSplitPath, "x\\y\\z", "x\\y\\", "z");
+ TEST(VDFileSplitPath, "x\\", "x\\", "");
+ TEST(VDFileSplitPath, "x\\y\\z\\", "x\\y\\z\\", "");
+ TEST(VDFileSplitPath, "c:", "c:", "");
+ TEST(VDFileSplitPath, "c:x", "c:", "x");
+ TEST(VDFileSplitPath, "c:\\", "c:\\", "");
+ TEST(VDFileSplitPath, "c:\\x", "c:\\", "x");
+ TEST(VDFileSplitPath, "c:\\x\\", "c:\\x\\", "");
+ TEST(VDFileSplitPath, "c:\\x\\", "c:\\x\\", "");
+ TEST(VDFileSplitPath, "c:x\\y", "c:x\\", "y");
+ TEST(VDFileSplitPath, "\\\\server\\share\\", "\\\\server\\share\\", "");
+ TEST(VDFileSplitPath, "\\\\server\\share\\x", "\\\\server\\share\\", "x");
+#undef TEST
+#define TEST(fn, x, y1, y2) VDASSERT(!strcmp(fn(x), y2)); VDASSERT(!wcscmp(fn(L##x), L##y2)); VDASSERT(fn(VDStringA(x))==y1); VDASSERT(fn(VDStringW(L##x))==L##y1)
+ TEST(VDFileSplitRoot, "", "", "");
+ TEST(VDFileSplitRoot, "c:", "c:", "");
+ TEST(VDFileSplitRoot, "c:x", "c:", "x");
+ TEST(VDFileSplitRoot, "c:x\\", "c:", "x\\");
+ TEST(VDFileSplitRoot, "c:x\\y", "c:", "x\\y");
+ TEST(VDFileSplitRoot, "c:\\", "c:\\", "");
+ TEST(VDFileSplitRoot, "c:\\x", "c:\\", "x");
+ TEST(VDFileSplitRoot, "c:\\x\\", "c:\\", "x\\");
+ TEST(VDFileSplitRoot, "\\", "\\", "");
+ TEST(VDFileSplitRoot, "\\x", "\\", "x");
+ TEST(VDFileSplitRoot, "\\x\\", "\\", "x\\");
+ TEST(VDFileSplitRoot, "\\x\\y", "\\", "x\\y");
+ TEST(VDFileSplitRoot, "\\\\server\\share", "\\\\server\\share", "");
+ TEST(VDFileSplitRoot, "\\\\server\\share\\", "\\\\server\\share\\", "");
+ TEST(VDFileSplitRoot, "\\\\server\\share\\x", "\\\\server\\share\\", "x");
+ TEST(VDFileSplitRoot, "\\\\server\\share\\x\\", "\\\\server\\share\\", "x\\");
+ TEST(VDFileSplitRoot, "\\\\server\\share\\x\\y", "\\\\server\\share\\", "x\\y");
+#undef TEST
+ }
+} g_VDSystemFilesysTestObject;
+
+#endif
diff --git a/src/thirdparty/VirtualDub/system/source/filewatcher.cpp b/src/thirdparty/VirtualDub/system/source/filewatcher.cpp
new file mode 100644
index 000000000..3d32150fd
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/filewatcher.cpp
@@ -0,0 +1,117 @@
+#include "stdafx.h"
+#include <windows.h>
+#include <vd2/system/filesys.h>
+#include <vd2/system/filewatcher.h>
+#include <vd2/system/thunk.h>
+#include <vd2/system/w32assist.h>
+
+VDFileWatcher::VDFileWatcher()
+ : mChangeHandle(INVALID_HANDLE_VALUE)
+ , mLastWriteTime(0)
+ , mbRepeatRequested(false)
+ , mbThunksInited(false)
+ , mpThunk(NULL)
+ , mTimerId(0)
+{
+}
+
+VDFileWatcher::~VDFileWatcher() {
+ Shutdown();
+}
+
+bool VDFileWatcher::IsActive() const {
+ return mChangeHandle != INVALID_HANDLE_VALUE;
+}
+
+void VDFileWatcher::Init(const wchar_t *file, IVDFileWatcherCallback *callback) {
+ Shutdown();
+
+ const wchar_t *pathEnd = VDFileSplitPath(file);
+
+ VDStringW basePath(file, pathEnd);
+
+ if (basePath.empty())
+ basePath = L".";
+
+ if (VDIsWindowsNT())
+ mChangeHandle = FindFirstChangeNotificationW(basePath.c_str(), FALSE, FILE_NOTIFY_CHANGE_SIZE | FILE_NOTIFY_CHANGE_LAST_WRITE);
+ else
+ mChangeHandle = FindFirstChangeNotificationA(VDTextWToA(basePath).c_str(), FALSE, FILE_NOTIFY_CHANGE_SIZE | FILE_NOTIFY_CHANGE_LAST_WRITE);
+
+ if (mChangeHandle == INVALID_HANDLE_VALUE)
+ throw MyError("Unable to monitor file: %ls", file);
+
+ mPath = file;
+ mLastWriteTime = VDFileGetLastWriteTime(mPath.c_str());
+ mpCB = callback;
+ mbRepeatRequested = false;
+
+ if (callback) {
+ if (!mbThunksInited)
+ mbThunksInited = VDInitThunkAllocator();
+
+ if (mbThunksInited) {
+ mpThunk = VDCreateFunctionThunkFromMethod(this, &VDFileWatcher::StaticTimerCallback, true);
+
+ if (mpThunk) {
+ mTimerId = SetTimer(NULL, 0, 1000, (TIMERPROC)mpThunk);
+ }
+ }
+ }
+}
+
+void VDFileWatcher::Shutdown() {
+ if (mChangeHandle != INVALID_HANDLE_VALUE) {
+ FindCloseChangeNotification(mChangeHandle);
+ mChangeHandle = INVALID_HANDLE_VALUE;
+ }
+
+ if (mTimerId) {
+ KillTimer(NULL, mTimerId);
+ mTimerId = 0;
+ }
+
+ if (mpThunk) {
+ VDDestroyFunctionThunk(mpThunk);
+ mpThunk = NULL;
+ }
+
+ if (mbThunksInited) {
+ mbThunksInited = false;
+
+ VDShutdownThunkAllocator();
+ }
+}
+
+bool VDFileWatcher::Wait(uint32 delay) {
+ if (mChangeHandle == INVALID_HANDLE_VALUE)
+ return false;
+
+ if (WAIT_OBJECT_0 != WaitForSingleObject(mChangeHandle, delay))
+ return false;
+
+ FindNextChangeNotification(mChangeHandle);
+
+ uint64 t = VDFileGetLastWriteTime(mPath.c_str());
+
+ if (mLastWriteTime == t)
+ return false;
+
+ mLastWriteTime = t;
+ return true;
+}
+
+void VDFileWatcher::StaticTimerCallback(void *, unsigned, unsigned, unsigned long) {
+ if (mbRepeatRequested) {
+ if (mpCB)
+ mbRepeatRequested = !mpCB->OnFileUpdated(mPath.c_str());
+ else
+ mbRepeatRequested = false;
+ return;
+ }
+
+ if (Wait(0)) {
+ if (mpCB)
+ mbRepeatRequested = !mpCB->OnFileUpdated(mPath.c_str());
+ }
+}
diff --git a/src/thirdparty/VirtualDub/system/source/halffloat.cpp b/src/thirdparty/VirtualDub/system/source/halffloat.cpp
new file mode 100644
index 000000000..9875a3003
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/halffloat.cpp
@@ -0,0 +1,79 @@
+#include "stdafx.h"
+#include <vd2/system/halffloat.h>
+
+uint16 VDConvertFloatToHalf(const void *f) {
+ uint32 v = *(const uint32 *)f;
+
+ uint32 sign = (v >> 16) & 0x8000;
+ sint32 exmant = v & 0x7fffffff;
+
+ if (exmant > 0x7f800000) {
+ // convert NaNs directly
+ exmant = (exmant & 0x00400000) + 0x47a00000;
+ } else if (exmant > 0x47800000) {
+ // clamp large numbers to infinity
+ exmant = 0x47800000;
+ } else if (exmant < 0x33800000) {
+ // clamp very tiny numbers to zero
+ exmant = 0x38000000;
+ } else if (exmant < 0x38800000) {
+ // normalized finite converting to denormal
+ uint32 ex = exmant & 0x7f800000;
+ uint32 mant = (exmant & 0x007fffff) | 0x800000;
+ uint32 sticky = 0;
+
+ while(ex < 0x38800000) {
+ ex += 0x00800000;
+ sticky |= mant;
+ mant >>= 1;
+ }
+
+ // round to nearest even
+ sticky |= mant >> 13;
+
+ // round up with sticky bits
+ mant += (sticky & 1);
+
+ // round up with round bit
+ mant += 0x0fff;
+
+ exmant = ex + mant - 0x800000;
+ } else {
+ // round normal numbers using round to nearest even
+ exmant |= (exmant & 0x00002000) >> 13;
+ exmant += 0x00000fff;
+ }
+
+ // shift and rebias exponent
+ exmant -= 0x38000000;
+ exmant >>= 13;
+
+ return (uint16)(sign + exmant);
+}
+
+void VDConvertHalfToFloat(uint16 h, void *dst) {
+ uint32 sign = ((uint32)h << 16) & 0x80000000;
+ uint32 exmant = (uint32)h & 0x7fff;
+ uint32 v = 0;
+
+ if (exmant >= 0x7c00) {
+ // infinity or NaN
+ v = (exmant << 13) + 0x70000000;
+ } else if (exmant >= 0x0400) {
+ // normalized finite
+ v = (exmant << 13) + 0x38000000;
+ } else if (exmant) {
+ // denormal
+ uint32 ex32 = 0x38000000;
+ uint32 mant32 = (exmant & 0x3ff) << 13;
+
+ while(!(mant32 & 0x800000)) {
+ mant32 <<= 1;
+ ex32 -= 0x800000;
+ }
+
+ v = ex32 + mant32;
+ }
+
+ *(uint32 *)dst = v + sign;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/hash.cpp b/src/thirdparty/VirtualDub/system/source/hash.cpp
new file mode 100644
index 000000000..8962a511d
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/hash.cpp
@@ -0,0 +1,98 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vd2/system/hash.h>
+#include <vd2/system/binary.h>
+
+// Based on: SuperFastHash by Paul Hsieh
+// http://www.azillionmonkeys.com/qed/hash.html
+
+uint32 VDHashString32(const char *s) {
+ uint32 len = (uint32)strlen(s);
+
+ return VDHashString32(s, len);
+}
+
+uint32 VDHashString32(const char *s, uint32 len) {
+ uint32 hash = len;
+
+ uint32 rem = len & 3;
+ len >>= 2;
+
+ uint32 tmp;
+ for(uint32 i=0; i<len; ++i) {
+ hash += VDReadUnalignedU16(s);
+ tmp = (VDReadUnalignedU16(s + 2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ s += 4;
+ hash += hash >> 11;
+ }
+
+ switch(rem) {
+ case 3:
+ hash += VDReadUnalignedU16(s);
+ hash ^= hash << 16;
+ hash ^= (uint32)(uint8)s[2] << 18;
+ hash += hash >> 11;
+ break;
+ case 2:
+ hash += VDReadUnalignedU16(s);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
+ case 1:
+ hash += (uint8)s[0];
+ hash ^= hash << 10;
+ hash += hash >> 1;
+ break;
+ }
+
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
+
+ return hash;
+}
+
+uint32 VDHashString32I(const wchar_t *s) {
+ uint32 len = (uint32)wcslen(s);
+
+ return VDHashString32I(s, len);
+}
+
+uint32 VDHashString32I(const wchar_t *s, uint32 len) {
+ uint32 hash = 2166136261;
+
+ for(uint32 i=0; i<len; ++i) {
+ hash *= 16777619;
+ hash ^= (uint32)towlower(*s++);
+ }
+
+ return hash;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/int128.cpp b/src/thirdparty/VirtualDub/system/source/int128.cpp
new file mode 100644
index 000000000..fbc8ece86
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/int128.cpp
@@ -0,0 +1,478 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <math.h>
+
+#include <vd2/system/int128.h>
+
+#ifndef _M_AMD64
+ void __declspec(naked) __cdecl vdasm_uint128_add(uint64 dst[2], const uint64 x[2], const uint64 y[2]) {
+ __asm {
+ push ebx
+
+ mov ebx, [esp+16]
+ mov ecx, [esp+12]
+ mov edx, [esp+8]
+
+ mov eax, [ecx+0]
+ add eax, [ebx+0]
+ mov [edx+0],eax
+ mov eax, [ecx+4]
+ adc eax, [ebx+4]
+ mov [edx+4],eax
+ mov eax, [ecx+8]
+ adc eax, [ebx+8]
+ mov [edx+8],eax
+ mov eax, [ecx+12]
+ adc eax, [ebx+12]
+ mov [edx+12],eax
+
+ pop ebx
+ ret
+ }
+ }
+
+ void __declspec(naked) __cdecl vdasm_uint128_sub(uint64 dst[2], const uint64 x[2], const uint64 y[2]) {
+ __asm {
+ push ebx
+
+ mov ebx, [esp+16]
+ mov ecx, [esp+12]
+ mov edx, [esp+8]
+
+ mov eax, [ecx+0]
+ sub eax, [ebx+0]
+ mov [edx+0],eax
+ mov eax, [ecx+4]
+ sbb eax, [ebx+4]
+ mov [edx+4],eax
+ mov eax, [ecx+8]
+ sbb eax, [ebx+8]
+ mov [edx+8],eax
+ mov eax, [ecx+12]
+ sbb eax, [ebx+12]
+ mov [edx+12],eax
+
+ pop ebx
+ ret
+ }
+ }
+
+ void __declspec(naked) vdint128::setSquare(sint64 v) {
+ __asm {
+ push edi
+ push esi
+ push ebx
+ mov eax, [esp+20]
+ cdq
+ mov esi, eax
+ mov eax, [esp+16]
+ xor eax, edx
+ xor esi, edx
+ sub eax, edx
+ sbb esi, edx
+ mov ebx, eax
+ mul eax
+ mov [ecx], eax
+ mov edi, edx
+ mov eax, ebx
+ mul esi
+ mov ebx, 0
+ add eax, eax
+ adc edx, edx
+ add eax, edi
+ adc edx, 0
+ mov edi, edx
+ adc ebx, 0
+ mov [ecx+4], eax
+ mov eax, esi
+ mul esi
+ add eax, edi
+ adc edx, ebx
+ mov [ecx+8], eax
+ mov [ecx+12], edx
+ pop ebx
+ pop esi
+ pop edi
+ ret 8
+ }
+ }
+
+ const vdint128 __declspec(naked) vdint128::operator<<(int v) const {
+ __asm {
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov esi,ecx
+ mov edx,[esp+20]
+
+ mov ecx,[esp+24]
+ cmp ecx,128
+ jae zeroit
+
+ mov eax,[esi+12]
+ mov ebx,[esi+8]
+ mov edi,[esi+4]
+ mov ebp,[esi]
+
+ dwordloop:
+ cmp ecx,32
+ jb bits
+
+ mov eax,ebx
+ mov ebx,edi
+ mov edi,ebp
+ xor ebp,ebp
+ sub ecx,32
+ jmp short dwordloop
+
+ bits:
+ shld eax,ebx,cl
+ shld ebx,edi,cl
+ mov [edx+12],eax
+ mov [edx+8],ebx
+ shld edi,ebp,cl
+
+ shl ebp,cl
+ mov [edx+4],edi
+ mov [edx],ebp
+
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ mov eax,[esp+4]
+ ret 8
+
+ zeroit:
+ xor eax,eax
+ mov [edx+0],eax
+ mov [edx+4],eax
+ mov [edx+8],eax
+ mov [edx+12],eax
+
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ mov eax,[esp+4]
+ ret 8
+ }
+ }
+
+ const vdint128 __declspec(naked) vdint128::operator>>(int v) const {
+ __asm {
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov esi,ecx
+ mov edx,[esp+20]
+
+ mov eax,[esi+12]
+ mov ecx,[esp+24]
+ cmp ecx,127
+ jae clearit
+
+ mov ebx,[esi+8]
+ mov edi,[esi+4]
+ mov ebp,[esi]
+
+ dwordloop:
+ cmp ecx,32
+ jb bits
+
+ mov ebp,edi
+ mov edi,ebx
+ mov ebx,eax
+ sar eax,31
+ sub ecx,32
+ jmp short dwordloop
+
+ bits:
+ shrd ebp,edi,cl
+ shrd edi,ebx,cl
+ mov [edx],ebp
+ mov [edx+4],edi
+ shrd ebx,eax,cl
+
+ sar eax,cl
+ mov [edx+8],ebx
+ mov [edx+12],eax
+
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ mov eax,[esp+4]
+ ret 8
+
+ clearit:
+ sar eax, 31
+ mov [edx+0],eax
+ mov [edx+4],eax
+ mov [edx+8],eax
+ mov [edx+12],eax
+
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ mov eax,[esp+4]
+ ret 8
+ }
+ }
+
+ const vduint128 __declspec(naked) vduint128::operator<<(int v) const {
+ __asm {
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov esi,ecx
+ mov edx,[esp+20]
+
+ mov ecx,[esp+24]
+ cmp ecx,128
+ jae zeroit
+
+ mov eax,[esi+12]
+ mov ebx,[esi+8]
+ mov edi,[esi+4]
+ mov ebp,[esi]
+
+ dwordloop:
+ cmp ecx,32
+ jb bits
+
+ mov eax,ebx
+ mov ebx,edi
+ mov edi,ebp
+ xor ebp,ebp
+ sub ecx,32
+ jmp short dwordloop
+
+ bits:
+ shld eax,ebx,cl
+ shld ebx,edi,cl
+ mov [edx+12],eax
+ mov [edx+8],ebx
+ shld edi,ebp,cl
+
+ shl ebp,cl
+ mov [edx+4],edi
+ mov [edx],ebp
+
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ mov eax,[esp+4]
+ ret 8
+
+ zeroit:
+ xor eax,eax
+ mov [edx+0],eax
+ mov [edx+4],eax
+ mov [edx+8],eax
+ mov [edx+12],eax
+
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ mov eax,[esp+4]
+ ret 8
+ }
+ }
+
+ const vduint128 __declspec(naked) vduint128::operator>>(int v) const {
+ __asm {
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov esi,ecx
+ mov edx,[esp+20]
+
+ mov eax,[esi+12]
+ mov ecx,[esp+24]
+ cmp ecx,127
+ jae clearit
+
+ mov ebx,[esi+8]
+ mov edi,[esi+4]
+ mov ebp,[esi]
+
+ dwordloop:
+ cmp ecx,32
+ jb bits
+
+ mov ebp,edi
+ mov edi,ebx
+ mov ebx,eax
+ xor eax,eax
+ sub ecx,32
+ jmp short dwordloop
+
+ bits:
+ shrd ebp,edi,cl
+ shrd edi,ebx,cl
+ mov [edx],ebp
+ mov [edx+4],edi
+ shrd ebx,eax,cl
+
+ shr eax,cl
+ mov [edx+8],ebx
+ mov [edx+12],eax
+
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ mov eax,[esp+4]
+ ret 8
+
+ clearit:
+ sar eax, 31
+ mov [edx+0],eax
+ mov [edx+4],eax
+ mov [edx+8],eax
+ mov [edx+12],eax
+
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ mov eax,[esp+4]
+ ret 8
+ }
+ }
+#endif
+
+const vdint128 vdint128::operator*(const vdint128& x) const {
+ vdint128 X = x.abs();
+ vdint128 Y = abs();
+
+ vduint128 bd(VDUMul64x64To128(X.q[0], Y.q[0]));
+
+ bd.q[1] += X.q[0]*Y.q[1] + X.q[1]*Y.q[0];
+
+ return (q[1]^x.q[1])<0 ? -(const vdint128&)bd : (const vdint128&)bd;
+}
+
+const vdint128 vdint128::operator/(int x) const {
+ vdint128 r;
+ sint64 accum;
+
+ r.d[3] = d[3] / x;
+
+ accum = ((sint64)(d[3] % x) << 32) + d[2];
+ r.d[2] = (sint32)(accum / x);
+
+ accum = ((accum % x) << 32) + d[1];
+ r.d[1] = (sint32)(accum / x);
+
+ accum = ((accum % x) << 32) + d[0];
+ r.d[0] = (sint32)(accum / x);
+
+ return r;
+}
+
+vdint128::operator double() const {
+ return (double)(unsigned long)q[0]
+ + ldexp((double)(unsigned long)((unsigned __int64)q[0]>>32), 32)
+ + ldexp((double)q[1], 64);
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
+const vduint128 vduint128::operator*(const vduint128& x) const {
+ vduint128 result(VDUMul64x64To128(q[0], x.q[0]));
+
+ result.q[1] += q[0]*x.q[1] + q[1]*x.q[0];
+
+ return result;
+}
+
+#ifdef _M_IX86
+ vduint128 __declspec(naked) __cdecl VDUMul64x64To128(uint64 x, uint64 y) {
+ __asm {
+ mov ecx,[esp+4]
+
+ mov eax,[esp+8]
+ mul dword ptr [esp+16] ;EDX:EAX = BD
+ mov [ecx+0],eax
+ mov [ecx+4],edx
+
+ mov eax,[esp+12]
+ mul dword ptr [esp+20] ;EDX:EAX = AC
+ mov [ecx+8],eax
+ mov [ecx+12],edx
+
+ mov eax,[esp+8]
+ mul dword ptr [esp+20] ;EDX:EAX = BC
+ add [ecx+4],eax
+ adc [ecx+8],edx
+ adc dword ptr [ecx+12], 0
+
+ mov eax,[esp+12]
+ mul dword ptr [esp+16] ;EDX:EAX = AD
+ add [ecx+4],eax
+ adc [ecx+8],edx
+ adc dword ptr [ecx+12], 0
+
+ mov eax, ecx
+ ret
+ }
+ }
+#endif
+
+uint64 VDUDiv128x64To64(const vduint128& dividend, uint64 divisor, uint64& remainder) {
+ vduint128 temp(dividend);
+ vduint128 divisor2(divisor);
+
+ divisor2 <<= 63;
+
+ uint64 result = 0;
+ for(int i=0; i<64; ++i) {
+ result += result;
+ if (temp >= divisor2) {
+ temp -= divisor2;
+ ++result;
+ }
+ temp += temp;
+ }
+
+ remainder = temp.q[1];
+
+ return result;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/list.cpp b/src/thirdparty/VirtualDub/system/source/list.cpp
new file mode 100644
index 000000000..bf443b6a6
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/list.cpp
@@ -0,0 +1,97 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+///////////////////////////////////////////////////////////////////////////
+//
+// For those of you who say this looks familiar... it should. This is
+// the same linked-list style that the Amiga Exec uses, with dummy head
+// and tail nodes. It's really a very convienent way to implement
+// doubly-linked lists.
+//
+
+#include "stdafx.h"
+#include <algorithm>
+#include <vd2/system/list.h>
+
+List::List() {
+ Init();
+}
+
+void List::Init() {
+ head.next = tail.prev = 0;
+ head.prev = &tail;
+ tail.next = &head;
+}
+
+ListNode *List::RemoveHead() {
+ if (head.prev->prev) {
+ ListNode *t = head.prev;
+
+ head.prev->Remove();
+ return t;
+ }
+
+ return 0;
+}
+
+ListNode *List::RemoveTail() {
+ if (tail.next->next) {
+ ListNode *t = tail.next;
+
+ tail.next->Remove();
+ return t;
+ }
+
+ return 0;
+}
+
+void List::Take(List &from) {
+ if (from.IsEmpty())
+ return;
+
+ head.prev = from.head.prev;
+ tail.next = from.tail.next;
+ head.prev->next = &head;
+ tail.next->prev = &tail;
+
+ from.Init();
+}
+
+void List::Swap(List &dst) {
+ if (IsEmpty())
+ Take(dst);
+ else if (dst.IsEmpty())
+ dst.Take(*this);
+ else {
+ std::swap(head.prev, dst.head.prev);
+ std::swap(tail.next, dst.tail.next);
+
+ head.prev->next = &head;
+ tail.next->prev = &tail;
+
+ dst.head.prev->next = &dst.head;
+ dst.tail.next->prev = &dst.tail;
+ }
+}
diff --git a/src/thirdparty/VirtualDub/system/source/log.cpp b/src/thirdparty/VirtualDub/system/source/log.cpp
new file mode 100644
index 000000000..fce3df920
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/log.cpp
@@ -0,0 +1,171 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vd2/system/vdtypes.h>
+#include <list>
+#include <utility>
+#include <vd2/system/log.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/VDString.h>
+
+namespace {
+ wchar_t g_log[16384]; // 32K log
+ int g_logHead, g_logTail;
+ VDCriticalSection g_csLog;
+
+ typedef std::list<std::pair<IVDLogger *, VDThreadID> > tVDLoggers;
+ tVDLoggers g_loggers;
+}
+
+void VDLog(int severity, const VDStringW& s) {
+ int strSize = s.size() + 1;
+
+ if (strSize >= 16384) {
+ VDASSERT(false);
+ return;
+ }
+
+ vdsynchronized(g_csLog) {
+ for(;;) {
+ int currentSize = (g_logTail - g_logHead) & 16383;
+
+ if (currentSize + strSize < 16384) // NOTE: This means that the last byte in the ring buffer can never be used.
+ break;
+
+ while(g_log[g_logHead++ & 16383])
+ ;
+
+ g_logHead &= 16383;
+ }
+
+ const wchar_t *ps = s.data();
+
+ g_log[g_logTail++] = severity;
+
+ for(int i=1; i<strSize; ++i)
+ g_log[g_logTail++ & 16383] = *ps++;
+
+ g_log[g_logTail++ & 16383] = 0;
+
+ g_logTail &= 16383;
+
+ VDThreadID currentThread = VDGetCurrentThreadID();
+ for(tVDLoggers::const_iterator it(g_loggers.begin()), itEnd(g_loggers.end()); it!=itEnd; ++it) {
+ if (!(*it).second || currentThread == (*it).second)
+ (*it).first->AddLogEntry(severity, s);
+ }
+ }
+}
+
+void VDLogF(int severity, const wchar_t *format, ...) {
+ va_list val;
+ va_start(val, format);
+ VDStringW s;
+ s.append_vsprintf(format, val);
+ va_end(val);
+
+ VDLog(severity, s);
+}
+
+void VDAttachLogger(IVDLogger *pLogger, bool bThisThreadOnly, bool bReplayLog) {
+ vdsynchronized(g_csLog) {
+ g_loggers.push_back(tVDLoggers::value_type(pLogger, bThisThreadOnly ? VDGetCurrentThreadID() : 0));
+
+ if (bReplayLog) {
+ int idx = g_logHead;
+
+ while(idx != g_logTail) {
+ int severity = g_log[idx++];
+ int headidx = idx;
+
+ idx &= 16383;
+
+ for(;;) {
+ wchar_t c = g_log[idx];
+
+ idx = (idx+1) & 16383;
+
+ if (!c)
+ break;
+ }
+
+ if (idx > headidx) {
+ pLogger->AddLogEntry(severity, VDStringW(g_log + headidx, idx-headidx-1));
+ } else {
+ VDStringW t(idx+16383-headidx);
+
+ std::copy(g_log + headidx, g_log + 16384, const_cast<wchar_t *>(t.data()));
+ std::copy(g_log, g_log + idx - 1, const_cast<wchar_t *>(t.data() + (16384 - headidx)));
+ pLogger->AddLogEntry(severity, t);
+ }
+ }
+ }
+ }
+}
+
+void VDDetachLogger(IVDLogger *pLogger) {
+ vdsynchronized(g_csLog) {
+ for(tVDLoggers::iterator it(g_loggers.begin()), itEnd(g_loggers.end()); it!=itEnd; ++it) {
+ if (pLogger == (*it).first) {
+ g_loggers.erase(it);
+ break;
+ }
+ }
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// autologger
+//
+///////////////////////////////////////////////////////////////////////////
+
+VDAutoLogger::VDAutoLogger(int min_severity)
+ : mbAttached(true)
+ , mMinSeverity(min_severity)
+{
+ VDAttachLogger(this, false, false);
+}
+
+VDAutoLogger::~VDAutoLogger() {
+ if (mbAttached)
+ VDDetachLogger(this);
+}
+
+void VDAutoLogger::AddLogEntry(int severity, const VDStringW& s) {
+ if (severity >= mMinSeverity)
+ mEntries.push_back(Entry(severity, s));
+}
+
+const VDAutoLogger::tEntries& VDAutoLogger::GetEntries() {
+ if (mbAttached) {
+ VDDetachLogger(this);
+ mbAttached = false;
+ }
+
+ return mEntries;
+}
+
diff --git a/src/thirdparty/VirtualDub/system/source/math.cpp b/src/thirdparty/VirtualDub/system/source/math.cpp
new file mode 100644
index 000000000..5368b13dc
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/math.cpp
@@ -0,0 +1,146 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <math.h>
+#include <vd2/system/math.h>
+#include <vd2/system/int128.h>
+
+int VDRoundToInt(double x) {
+ return (int)floor(x + 0.5);
+}
+
+long VDRoundToLong(double x) {
+ return (long)floor(x + 0.5);
+}
+
+sint32 VDRoundToInt32(double x) {
+ return (sint32)floor(x + 0.5);
+}
+
+sint64 VDRoundToInt64(double x) {
+ return (sint64)floor(x + 0.5);
+}
+
+#ifdef _M_IX86
+ sint64 __declspec(naked) __stdcall VDFractionScale64(uint64 a, uint32 b, uint32 c, uint32& remainder) {
+ __asm {
+ push edi
+ push ebx
+ mov edi, [esp+12+8] ;edi = b
+ mov eax, [esp+4+8] ;eax = a[lo]
+ mul edi ;edx:eax = a[lo]*b
+ mov ecx, eax ;ecx = (a*b)[lo]
+ mov eax, [esp+8+8] ;eax = a[hi]
+ mov ebx, edx ;ebx = (a*b)[mid]
+ mul edi ;edx:eax = a[hi]*b
+ add eax, ebx
+ mov ebx, [esp+16+8] ;ebx = c
+ adc edx, 0
+ div ebx ;eax = (a*b)/c [hi], edx = (a[hi]*b)%c
+ mov edi, eax ;edi = (a[hi]*b)/c
+ mov eax, ecx ;eax = (a*b)[lo]
+ mov ecx, [esp+20+8]
+ div ebx ;eax = (a*b)/c [lo], edx = (a*b)%c
+ mov [ecx], edx
+ mov edx, edi
+ pop ebx
+ pop edi
+ ret 20
+ }
+ }
+
+ uint64 __declspec(naked) __stdcall VDUMulDiv64x32(uint64 a, uint32 b, uint32 c) {
+ __asm {
+ mov eax, [esp+4] ;eax = a0
+ mul dword ptr [esp+12] ;edx:eax = a0*b
+ mov dword ptr [esp+4], eax ;tmp = a0*b[0:31]
+ mov ecx, edx ;ecx = a0*b[32:63]
+ mov eax, [esp+8] ;eax = a1
+ mul dword ptr [esp+12] ;edx:eax = a1*b
+ add eax, ecx ;edx:eax += a0*b[32:95]
+ adc edx, 0 ;(cont.)
+ cmp edx, [esp+16] ;test if a*b[64:95] >= c; equiv to a*b >= (c<<64)
+ jae invalid ;abort if so (overflow)
+ div dword ptr [esp+16] ;edx,eax = ((a*b)[32:95]/c, (a*b)[32:95]%c)
+ mov ecx, eax
+ mov eax, [esp+4]
+ div dword ptr [esp+16]
+ mov edx, ecx
+ ret 16
+invalid:
+ mov eax, -1 ;return FFFFFFFF'FFFFFFFF
+ mov edx, -1
+ ret 16
+ }
+ }
+#endif
+
+sint64 VDMulDiv64(sint64 a, sint64 b, sint64 c) {
+ bool flip = false;
+
+ if (a < 0) {
+ a = -a;
+ flip = true;
+ }
+
+ if (b < 0) {
+ b = -b;
+ flip = !flip;
+ }
+
+ if (c < 0) {
+ c = -c;
+ flip = !flip;
+ }
+
+ uint64 rem;
+ uint64 v = VDUDiv128x64To64(VDUMul64x64To128((uint64)a, (uint64)b), (uint64)c, rem);
+
+ if ((rem+rem) >= (uint64)c)
+ ++v;
+
+ return flip ? -(sint64)v : (sint64)v;
+}
+
+bool VDVerifyFiniteFloats(const float *p0, uint32 n) {
+ const uint32 *p = (const uint32 *)p0;
+
+ while(n--) {
+ uint32 v = *p++;
+
+ // 00000000 zero
+ // 00000001-007FFFFF denormal
+ // 00800000-7F7FFFFF finite
+ // 7F800000 infinity
+ // 7F800001-7FBFFFFF SNaN
+ // 7FC00000-7FFFFFFF QNaN
+
+ if ((v & 0x7FFFFFFF) >= 0x7F800000)
+ return false;
+ }
+
+ return true;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/memory.cpp b/src/thirdparty/VirtualDub/system/source/memory.cpp
new file mode 100644
index 000000000..3e03b5d34
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/memory.cpp
@@ -0,0 +1,456 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <malloc.h>
+#include <windows.h>
+#include <vd2/system/atomic.h>
+#include <vd2/system/memory.h>
+#include <vd2/system/cpuaccel.h>
+
+void *VDAlignedMalloc(size_t n, unsigned alignment) {
+ return _aligned_malloc(n, alignment);
+}
+
+void VDAlignedFree(void *p) {
+ _aligned_free(p);
+}
+
+void *VDAlignedVirtualAlloc(size_t n) {
+ return VirtualAlloc(NULL, n, MEM_COMMIT, PAGE_READWRITE);
+}
+
+void VDAlignedVirtualFree(void *p) {
+ VirtualFree(p, 0, MEM_RELEASE);
+}
+
+void VDSwapMemoryScalar(void *p0, void *p1, size_t bytes) {
+ uint32 *dst0 = (uint32 *)p0;
+ uint32 *dst1 = (uint32 *)p1;
+
+ while(bytes >= 4) {
+ uint32 a = *dst0;
+ uint32 b = *dst1;
+
+ *dst0++ = b;
+ *dst1++ = a;
+
+ bytes -= 4;
+ }
+
+ char *dstb0 = (char *)dst0;
+ char *dstb1 = (char *)dst1;
+
+ while(bytes--) {
+ char a = *dstb0;
+ char b = *dstb1;
+
+ *dstb0++ = b;
+ *dstb1++ = a;
+ }
+}
+
+#if defined(VD_CPU_AMD64) || defined(VD_CPU_X86)
+ void VDSwapMemorySSE(void *p0, void *p1, size_t bytes) {
+ if (((uint32)(size_t)p0 | (uint32)(size_t)p1) & 15)
+ return VDSwapMemoryScalar(p0, p1, bytes);
+
+ __m128 *pv0 = (__m128 *)p0;
+ __m128 *pv1 = (__m128 *)p1;
+
+ size_t veccount = bytes >> 4;
+ if (veccount) {
+ do {
+ __m128 v0 = *pv0;
+ __m128 v1 = *pv1;
+
+ *pv0++ = v1;
+ *pv1++ = v0;
+ } while(--veccount);
+ }
+
+ uint32 left = bytes & 15;
+ if (left) {
+ uint8 *pb0 = (uint8 *)pv0;
+ uint8 *pb1 = (uint8 *)pv1;
+ do {
+ uint8 b0 = *pb0;
+ uint8 b1 = *pb1;
+
+ *pb0++ = b1;
+ *pb1++ = b0;
+ } while(--left);
+ }
+ }
+#endif
+
+void (__cdecl *VDSwapMemory)(void *p0, void *p1, size_t bytes) = VDSwapMemoryScalar;
+
+void VDInvertMemory(void *p, unsigned bytes) {
+ char *dst = (char *)p;
+
+ if (!bytes)
+ return;
+
+ while((int)dst & 3) {
+ *dst = ~*dst;
+ ++dst;
+
+ if (!--bytes)
+ return;
+ }
+
+ unsigned lcount = bytes >> 2;
+
+ if (lcount)
+ do {
+ *(long *)dst = ~*(long *)dst;
+ dst += 4;
+ } while(--lcount);
+
+ bytes &= 3;
+
+ while(bytes--) {
+ *dst = ~*dst;
+ ++dst;
+ }
+}
+
+namespace {
+ uintptr VDGetSystemPageSizeW32() {
+ SYSTEM_INFO sysInfo;
+ GetSystemInfo(&sysInfo);
+
+ return sysInfo.dwPageSize;
+ }
+
+ uintptr VDGetSystemPageSize() {
+ static uintptr pageSize = VDGetSystemPageSizeW32();
+
+ return pageSize;
+ }
+}
+
+bool VDIsValidReadRegion(const void *p0, size_t bytes) {
+ if (!bytes)
+ return true;
+
+ if (!p0)
+ return false;
+
+ uintptr pageSize = VDGetSystemPageSize();
+ uintptr p = (uintptr)p0;
+ uintptr pLimit = p + (bytes-1);
+
+ __try {
+ for(;;) {
+ *(volatile char *)p;
+
+ if (pLimit - p < pageSize)
+ break;
+
+ p += pageSize;
+ }
+ } __except(1) {
+ return false;
+ }
+
+ return true;
+}
+
+bool VDIsValidWriteRegion(void *p0, size_t bytes) {
+ if (!bytes)
+ return true;
+
+ if (!p0)
+ return false;
+
+ // Note: Unlike IsValidWritePtr(), this is threadsafe.
+
+ uintptr pageSize = VDGetSystemPageSize();
+ uintptr p = (uintptr)p0;
+ uintptr pLimit = p + (bytes-1);
+ p &= ~(uintptr)3;
+
+ __try {
+ for(;;) {
+ VDAtomicInt::staticCompareExchange((volatile int *)p, 0xa5, 0xa5);
+
+ if (pLimit - p < pageSize)
+ break;
+
+ p += pageSize;
+ }
+ } __except(1) {
+ return false;
+ }
+
+ return true;
+}
+
+bool VDCompareRect(void *dst, ptrdiff_t dstpitch, const void *src, ptrdiff_t srcpitch, size_t w, size_t h) {
+ if (!w || !h)
+ return false;
+
+ do {
+ if (memcmp(dst, src, w))
+ return true;
+
+ dst = (char *)dst + dstpitch;
+ src = (const char *)src + srcpitch;
+ } while(--h);
+
+ return false;
+}
+
+const void *VDMemCheck8(const void *src, uint8 value, size_t count) {
+ if (count) {
+ const uint8 *src8 = (const uint8 *)src;
+
+ do {
+ if (*src8 != value)
+ return src8;
+
+ ++src8;
+ } while(--count);
+ }
+
+ return NULL;
+}
+
+void VDMemset8(void *dst, uint8 value, size_t count) {
+ if (count) {
+ uint8 *dst2 = (uint8 *)dst;
+
+ do {
+ *dst2++ = value;
+ } while(--count);
+ }
+}
+
+void VDMemset16(void *dst, uint16 value, size_t count) {
+ if (count) {
+ uint16 *dst2 = (uint16 *)dst;
+
+ do {
+ *dst2++ = value;
+ } while(--count);
+ }
+}
+
+void VDMemset24(void *dst, uint32 value, size_t count) {
+ if (count) {
+ uint8 *dst2 = (uint8 *)dst;
+ uint8 c0 = (uint8)value;
+ uint8 c1 = (uint8)(value >> 8);
+ uint8 c2 = (uint8)(value >> 16);
+
+ do {
+ *dst2++ = c0;
+ *dst2++ = c1;
+ *dst2++ = c2;
+ } while(--count);
+ }
+}
+
+void VDMemset32(void *dst, uint32 value, size_t count) {
+ if (count) {
+ uint32 *dst2 = (uint32 *)dst;
+
+ do {
+ *dst2++ = value;
+ } while(--count);
+ }
+}
+
+void VDMemset64(void *dst, uint64 value, size_t count) {
+ if (count) {
+ uint64 *dst2 = (uint64 *)dst;
+
+ do {
+ *dst2++ = value;
+ } while(--count);
+ }
+}
+
+void VDMemset128(void *dst, const void *src0, size_t count) {
+ if (count) {
+ const uint32 *src = (const uint32 *)src0;
+ uint32 a0 = src[0];
+ uint32 a1 = src[1];
+ uint32 a2 = src[2];
+ uint32 a3 = src[3];
+
+ uint32 *dst2 = (uint32 *)dst;
+
+ do {
+ dst2[0] = a0;
+ dst2[1] = a1;
+ dst2[2] = a2;
+ dst2[3] = a3;
+ dst2 += 4;
+ } while(--count);
+ }
+}
+
+void VDMemsetPointer(void *dst, const void *value, size_t count) {
+#if defined(_M_IX86)
+ VDMemset32(dst, (uint32)(size_t)value, count);
+#elif defined(_M_AMD64)
+ VDMemset64(dst, (uint64)(size_t)value, count);
+#else
+ #error Unknown pointer size
+#endif
+}
+
+void VDMemset8Rect(void *dst, ptrdiff_t pitch, uint8 value, size_t w, size_t h) {
+ if (w>0 && h>0) {
+ do {
+ memset(dst, value, w);
+ dst = (char *)dst + pitch;
+ } while(--h);
+ }
+}
+
+void VDMemset16Rect(void *dst, ptrdiff_t pitch, uint16 value, size_t w, size_t h) {
+ if (w>0 && h>0) {
+ do {
+ VDMemset16(dst, value, w);
+ dst = (char *)dst + pitch;
+ } while(--h);
+ }
+}
+
+void VDMemset24Rect(void *dst, ptrdiff_t pitch, uint32 value, size_t w, size_t h) {
+ if (w>0 && h>0) {
+ do {
+ VDMemset24(dst, value, w);
+ dst = (char *)dst + pitch;
+ } while(--h);
+ }
+}
+
+void VDMemset32Rect(void *dst, ptrdiff_t pitch, uint32 value, size_t w, size_t h) {
+ if (w>0 && h>0) {
+ do {
+ VDMemset32(dst, value, w);
+ dst = (char *)dst + pitch;
+ } while(--h);
+ }
+}
+
+#if defined(_WIN32) && defined(_M_IX86)
+ extern "C" void __cdecl VDFastMemcpyPartialScalarAligned8(void *dst, const void *src, size_t bytes);
+ extern "C" void __cdecl VDFastMemcpyPartialMMX(void *dst, const void *src, size_t bytes);
+ extern "C" void __cdecl VDFastMemcpyPartialMMX2(void *dst, const void *src, size_t bytes);
+
+ void VDFastMemcpyPartialScalar(void *dst, const void *src, size_t bytes) {
+ if (!(((int)dst | (int)src | bytes) & 7))
+ VDFastMemcpyPartialScalarAligned8(dst, src, bytes);
+ else
+ memcpy(dst, src, bytes);
+ }
+
+ void VDFastMemcpyFinishScalar() {
+ }
+
+ void __cdecl VDFastMemcpyFinishMMX() {
+ __asm emms
+ }
+
+ void __cdecl VDFastMemcpyFinishMMX2() {
+ __asm emms
+ __asm sfence
+ }
+
+ void (__cdecl *VDFastMemcpyPartial)(void *dst, const void *src, size_t bytes) = VDFastMemcpyPartialScalar;
+ void (__cdecl *VDFastMemcpyFinish)() = VDFastMemcpyFinishScalar;
+
+ void VDFastMemcpyAutodetect() {
+ long exts = CPUGetEnabledExtensions();
+
+ if (exts & CPUF_SUPPORTS_SSE) {
+ VDFastMemcpyPartial = VDFastMemcpyPartialMMX2;
+ VDFastMemcpyFinish = VDFastMemcpyFinishMMX2;
+ VDSwapMemory = VDSwapMemorySSE;
+ } else if (exts & CPUF_SUPPORTS_INTEGER_SSE) {
+ VDFastMemcpyPartial = VDFastMemcpyPartialMMX2;
+ VDFastMemcpyFinish = VDFastMemcpyFinishMMX2;
+ VDSwapMemory = VDSwapMemoryScalar;
+ } else if (exts & CPUF_SUPPORTS_MMX) {
+ VDFastMemcpyPartial = VDFastMemcpyPartialMMX;
+ VDFastMemcpyFinish = VDFastMemcpyFinishMMX;
+ VDSwapMemory = VDSwapMemoryScalar;
+ } else {
+ VDFastMemcpyPartial = VDFastMemcpyPartialScalar;
+ VDFastMemcpyFinish = VDFastMemcpyFinishScalar;
+ VDSwapMemory = VDSwapMemoryScalar;
+ }
+ }
+
+#else
+ void VDFastMemcpyPartial(void *dst, const void *src, size_t bytes) {
+ memcpy(dst, src, bytes);
+ }
+
+ void VDFastMemcpyFinish() {
+ }
+
+ void VDFastMemcpyAutodetect() {
+ }
+#endif
+
+void VDMemcpyRect(void *dst, ptrdiff_t dststride, const void *src, ptrdiff_t srcstride, size_t w, size_t h) {
+ if (w <= 0 || h <= 0)
+ return;
+
+ if (w == srcstride && w == dststride)
+ VDFastMemcpyPartial(dst, src, w*h);
+ // MPC custom code (begin)
+ else if (w == -srcstride && w == -dststride)
+ VDFastMemcpyPartial((char *)dst + dststride * (h - 1), (char *)src + srcstride * (h - 1), w*h);
+ // MPC custom code (end)
+ else {
+ char *dst2 = (char *)dst;
+ const char *src2 = (const char *)src;
+
+ do {
+ VDFastMemcpyPartial(dst2, src2, w);
+ dst2 += dststride;
+ src2 += srcstride;
+ } while(--h);
+ }
+ VDFastMemcpyFinish();
+}
+
+bool VDMemcpyGuarded(void *dst, const void *src, size_t bytes) {
+ __try {
+ memcpy(dst, src, bytes);
+ } __except(GetExceptionCode() == STATUS_ACCESS_VIOLATION ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
+ return false;
+ }
+
+ return true;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/profile.cpp b/src/thirdparty/VirtualDub/system/source/profile.cpp
new file mode 100644
index 000000000..3c91adb07
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/profile.cpp
@@ -0,0 +1,234 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <windows.h>
+#include <vd2/system/profile.h>
+
+///////////////////////////////////////////////////////////////////////////
+
+VDRTProfiler *g_pCentralProfiler;
+
+void VDInitProfilingSystem() {
+ if (!g_pCentralProfiler)
+ g_pCentralProfiler = new VDRTProfiler;
+}
+
+void VDDeinitProfilingSystem() {
+ delete g_pCentralProfiler;
+ g_pCentralProfiler = 0;
+}
+
+VDRTProfiler *VDGetRTProfiler() {
+ return g_pCentralProfiler;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDRTProfiler::VDRTProfiler()
+ : mbEnableCollection(false)
+{
+ LARGE_INTEGER freq;
+ QueryPerformanceFrequency(&freq);
+ mPerfFreq = freq.QuadPart;
+}
+
+VDRTProfiler::~VDRTProfiler() {
+}
+
+void VDRTProfiler::BeginCollection() {
+ mbEnableCollection = true;
+}
+
+void VDRTProfiler::EndCollection() {
+ mbEnableCollection = false;
+}
+
+void VDRTProfiler::Swap() {
+ vdsynchronized(mLock) {
+ LARGE_INTEGER tim;
+ QueryPerformanceCounter(&tim);
+
+ mSnapshotTime = tim.QuadPart;
+
+ // update channels
+ uint32 channelCount = mChannelArray.size();
+ mChannelArrayToPaint.resize(channelCount);
+
+ for(uint32 i=0; i<channelCount; ++i) {
+ Channel& src = mChannelArray[i];
+ Channel& dst = mChannelArrayToPaint[i];
+
+ dst.mpName = src.mpName;
+
+ dst.mEventList.clear();
+ dst.mEventList.swap(src.mEventList);
+ if (src.mbEventPending) {
+ src.mEventList.push_back(dst.mEventList.back());
+ src.mEventList.back().mEndTime = mSnapshotTime;
+ }
+ }
+
+ // update counters
+ Counters::iterator itC(mCounterArray.begin()), itCEnd(mCounterArray.end());
+ for(; itC != itCEnd; ++itC) {
+ Counter& ctr = *itC;
+
+ ctr.mDataLast = ctr.mData;
+
+ switch(ctr.mType) {
+ case kCounterTypeUint32:
+ ctr.mData.u32 = *(const uint32 *)ctr.mpData;
+ break;
+ case kCounterTypeDouble:
+ ctr.mData.d = *(const double *)ctr.mpData;
+ break;
+ }
+ }
+
+ mCounterArrayToPaint = mCounterArray;
+ }
+}
+
+int VDRTProfiler::AllocChannel(const char *name) {
+ uint32 i;
+
+ vdsynchronized(mLock) {
+ const uint32 nChannels = mChannelArray.size();
+
+ for(i=0; i<nChannels; ++i)
+ if (!mChannelArray[i].mpName)
+ break;
+
+ if (mChannelArray.size() <= i)
+ mChannelArray.resize(i + 1);
+
+ mChannelArray[i].mpName = name;
+ mChannelArray[i].mbEventPending = false;
+ }
+
+ return (int)i;
+}
+
+void VDRTProfiler::FreeChannel(int ch) {
+ vdsynchronized(mLock) {
+ mChannelArray[ch].mpName = 0;
+ mChannelArray[ch].mEventList.clear();
+ }
+}
+
+void VDRTProfiler::BeginEvent(int channel, uint32 color, const char *name) {
+ if (mbEnableCollection) {
+ LARGE_INTEGER tim;
+ QueryPerformanceCounter(&tim);
+ vdsynchronized(mLock) {
+ Channel& chan = mChannelArray[channel];
+
+ if (!chan.mbEventPending) {
+ chan.mbEventPending = true;
+ chan.mEventList.push_back(Event());
+ Event& ev = chan.mEventList.back();
+ ev.mpName = name;
+ ev.mColor = color;
+ ev.mStartTime = tim.QuadPart;
+ ev.mEndTime = tim.QuadPart;
+ }
+ }
+ }
+}
+
+void VDRTProfiler::EndEvent(int channel) {
+ if (mbEnableCollection) {
+ LARGE_INTEGER tim;
+
+ QueryPerformanceCounter(&tim);
+ vdsynchronized(mLock) {
+ Channel& chan = mChannelArray[channel];
+
+ if (chan.mbEventPending) {
+ chan.mEventList.back().mEndTime = tim.QuadPart;
+ chan.mbEventPending = false;
+ }
+ }
+ }
+}
+
+void VDRTProfiler::RegisterCounterU32(const char *name, const uint32 *val) {
+ RegisterCounter(name, val, kCounterTypeUint32);
+}
+
+void VDRTProfiler::RegisterCounterD(const char *name, const double *val) {
+ RegisterCounter(name, val, kCounterTypeDouble);
+}
+
+struct VDRTProfiler::CounterByNamePred {
+ bool operator()(const char *name1, const char *name2) const {
+ return strcmp(name1, name2) < 0;
+ }
+
+ bool operator()(const char *name1, const Counter& ctr) const {
+ return strcmp(name1, ctr.mpName) < 0;
+ }
+
+ bool operator()(const Counter& ctr, const char *name2) const {
+ return strcmp(ctr.mpName, name2) < 0;
+ }
+
+ bool operator()(const Counter& ctr1, const Counter& ctr2) const {
+ return strcmp(ctr1.mpName, ctr2.mpName) < 0;
+ }
+};
+
+void VDRTProfiler::RegisterCounter(const char *name, const void *val, CounterType type) {
+ VDASSERT(val);
+
+ vdsynchronized(mLock) {
+ Counters::iterator itBegin(mCounterArray.end());
+ Counters::iterator itEnd(mCounterArray.end());
+ Counters::iterator it(std::upper_bound(itBegin, itEnd, name, CounterByNamePred()));
+
+ it = mCounterArray.insert(it, Counter());
+ Counter& ctr = *it;
+
+ memset(&ctr.mData, 0, sizeof ctr.mData);
+ memset(&ctr.mDataLast, 0, sizeof ctr.mDataLast);
+ ctr.mpData = val;
+ ctr.mpName = name;
+ ctr.mType = type;
+ }
+}
+
+void VDRTProfiler::UnregisterCounter(void *p) {
+ vdsynchronized(mLock) {
+ Counters::iterator it(mCounterArray.begin()), itEnd(mCounterArray.end());
+ for(; it!=itEnd; ++it) {
+ const Counter& counter = *it;
+ if (counter.mpData == p) {
+ mCounterArray.erase(it);
+ return;
+ }
+ }
+ }
+}
diff --git a/src/thirdparty/VirtualDub/system/source/progress.cpp b/src/thirdparty/VirtualDub/system/source/progress.cpp
new file mode 100644
index 000000000..1ac26a0f6
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/progress.cpp
@@ -0,0 +1,35 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <stdio.h>
+#include <stdarg.h>
+
+#include <vd2/system/tls.h>
+#include <vd2/system/progress.h>
+#include <vd2/system/error.h>
+#include <vd2/system/atomic.h>
+#include <vd2/system/thread.h>
+
diff --git a/src/thirdparty/VirtualDub/system/source/protscope.cpp b/src/thirdparty/VirtualDub/system/source/protscope.cpp
new file mode 100644
index 000000000..612082824
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/protscope.cpp
@@ -0,0 +1,37 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vd2/system/protscope.h>
+
+VDProtectedAutoScope *VDGetProtectedScopeLinkNull() {
+ return NULL;
+}
+
+void VDSetProtectedScopeLinkNull(VDProtectedAutoScope *) {
+}
+
+tpVDGetProtectedScopeLink g_pVDGetProtectedScopeLink = VDGetProtectedScopeLinkNull;
+tpVDSetProtectedScopeLink g_pVDSetProtectedScopeLink = VDSetProtectedScopeLinkNull;
diff --git a/src/thirdparty/VirtualDub/system/source/refcount.cpp b/src/thirdparty/VirtualDub/system/source/refcount.cpp
new file mode 100644
index 000000000..f0d82760a
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/refcount.cpp
@@ -0,0 +1,29 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2009 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vd2/system/refcount.h>
+
+vdsaferelease_t vdsaferelease;
diff --git a/src/thirdparty/VirtualDub/system/source/registry.cpp b/src/thirdparty/VirtualDub/system/source/registry.cpp
new file mode 100644
index 000000000..18506e7f6
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/registry.cpp
@@ -0,0 +1,243 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <windows.h>
+
+#include <vd2/system/VDString.h>
+#include <vd2/system/registry.h>
+
+VDRegistryKey::VDRegistryKey(const char *keyName, bool global, bool write) {
+ const HKEY rootKey = global ? HKEY_LOCAL_MACHINE : HKEY_CURRENT_USER;
+
+ if (write) {
+ if (RegCreateKeyEx(rootKey, keyName, 0, NULL, REG_OPTION_NON_VOLATILE, KEY_ALL_ACCESS, NULL, (PHKEY)&pHandle, NULL))
+ pHandle = NULL;
+ } else {
+ if (RegOpenKeyEx(rootKey, keyName, 0, KEY_READ, (PHKEY)&pHandle))
+ pHandle = NULL;
+ }
+}
+
+VDRegistryKey::~VDRegistryKey() {
+ if (pHandle)
+ RegCloseKey((HKEY)pHandle);
+}
+
+bool VDRegistryKey::setBool(const char *pszName, bool v) const {
+ if (pHandle) {
+ DWORD dw = v;
+
+ if (RegSetValueEx((HKEY)pHandle, pszName, 0, REG_DWORD, (const BYTE *)&dw, sizeof dw))
+ return true;
+ }
+
+ return false;
+}
+
+bool VDRegistryKey::setInt(const char *pszName, int i) const {
+ if (pHandle) {
+ DWORD dw = i;
+
+ if (RegSetValueEx((HKEY)pHandle, pszName, 0, REG_DWORD, (const BYTE *)&dw, sizeof dw))
+ return true;
+ }
+
+ return false;
+}
+
+bool VDRegistryKey::setString(const char *pszName, const char *pszString) const {
+ if (pHandle) {
+ if (RegSetValueEx((HKEY)pHandle, pszName, 0, REG_SZ, (const BYTE *)pszString, strlen(pszString)))
+ return true;
+ }
+
+ return false;
+}
+
+bool VDRegistryKey::setString(const char *pszName, const wchar_t *pszString) const {
+ if (pHandle) {
+ if (GetVersion() & 0x80000000) {
+ VDStringA s(VDTextWToA(pszString));
+
+ if (RegSetValueEx((HKEY)pHandle, pszName, 0, REG_SZ, (const BYTE *)s.data(), s.size()))
+ return true;
+ } else {
+ if (RegSetValueExW((HKEY)pHandle, VDTextAToW(pszName).c_str(), 0, REG_SZ, (const BYTE *)pszString, sizeof(wchar_t) * wcslen(pszString)))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool VDRegistryKey::setBinary(const char *pszName, const char *data, int len) const {
+ if (pHandle) {
+ if (RegSetValueEx((HKEY)pHandle, pszName, 0, REG_BINARY, (const BYTE *)data, len))
+ return true;
+ }
+
+ return false;
+}
+
+bool VDRegistryKey::getBool(const char *pszName, bool def) const {
+ DWORD type, v, s=sizeof(DWORD);
+
+ if (!pHandle || RegQueryValueEx((HKEY)pHandle, pszName, 0, &type, (BYTE *)&v, &s)
+ || type != REG_DWORD)
+ return def;
+
+ return v != 0;
+}
+
+int VDRegistryKey::getInt(const char *pszName, int def) const {
+ DWORD type, v, s=sizeof(DWORD);
+
+ if (!pHandle || RegQueryValueEx((HKEY)pHandle, pszName, 0, &type, (BYTE *)&v, &s)
+ || type != REG_DWORD)
+ return def;
+
+ return (int)v;
+}
+
+int VDRegistryKey::getEnumInt(const char *pszName, int maxVal, int def) const {
+ int v = getInt(pszName, def);
+
+ if (v<0 || v>=maxVal)
+ v = def;
+
+ return v;
+}
+
+bool VDRegistryKey::getString(const char *pszName, VDStringA& str) const {
+ DWORD type, s = sizeof(DWORD);
+
+ if (!pHandle || RegQueryValueEx((HKEY)pHandle, pszName, 0, &type, NULL, &s) || type != REG_SZ)
+ return false;
+
+ str.resize(s);
+ if (RegQueryValueEx((HKEY)pHandle, pszName, 0, NULL, (BYTE *)str.data(), &s))
+ return false;
+
+ if (!s)
+ str.clear();
+ else
+ str.resize(strlen(str.c_str())); // Trim off pesky terminating NULLs.
+
+ return true;
+}
+
+bool VDRegistryKey::getString(const char *pszName, VDStringW& str) const {
+ if (!pHandle)
+ return false;
+
+ if (GetVersion() & 0x80000000) {
+ VDStringA v;
+ if (!getString(pszName, v))
+ return false;
+ str = VDTextAToW(v);
+ return true;
+ }
+
+ const VDStringW wsName(VDTextAToW(pszName));
+ DWORD type, s = sizeof(DWORD);
+
+ if (!pHandle || RegQueryValueExW((HKEY)pHandle, wsName.c_str(), 0, &type, NULL, &s) || type != REG_SZ)
+ return false;
+
+ if (s <= 0)
+ str.clear();
+ else {
+ str.resize((s + sizeof(wchar_t) - 1) / sizeof(wchar_t));
+
+ if (RegQueryValueExW((HKEY)pHandle, wsName.c_str(), 0, NULL, (BYTE *)&str[0], &s))
+ return false;
+
+ str.resize(wcslen(str.c_str())); // Trim off pesky terminating NULLs.
+ }
+
+ return true;
+}
+
+int VDRegistryKey::getBinaryLength(const char *pszName) const {
+ DWORD type, s = sizeof(DWORD);
+
+ if (!pHandle || RegQueryValueEx((HKEY)pHandle, pszName, 0, &type, NULL, &s)
+ || type != REG_BINARY)
+ return -1;
+
+ return s;
+}
+
+bool VDRegistryKey::getBinary(const char *pszName, char *buf, int maxlen) const {
+ DWORD type, s = maxlen;
+
+ if (!pHandle || RegQueryValueEx((HKEY)pHandle, pszName, 0, &type, (BYTE *)buf, &s) || maxlen < (int)s || type != REG_BINARY)
+ return false;
+
+ return true;
+}
+
+bool VDRegistryKey::removeValue(const char *name) {
+ if (!pHandle || RegDeleteValue((HKEY)pHandle, name))
+ return false;
+
+ return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDRegistryValueIterator::VDRegistryValueIterator(const VDRegistryKey& key)
+ : mpHandle(key.getRawHandle())
+ , mIndex(0)
+{
+}
+
+const char *VDRegistryValueIterator::Next() {
+ DWORD len = sizeof(mName)/sizeof(mName[0]);
+ LONG error = RegEnumValueA((HKEY)mpHandle, mIndex, mName, &len, NULL, NULL, NULL, NULL);
+
+ if (error)
+ return NULL;
+
+ ++mIndex;
+ return mName;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDString VDRegistryAppKey::s_appbase;
+
+VDRegistryAppKey::VDRegistryAppKey() : VDRegistryKey(s_appbase.c_str()) {
+}
+
+VDRegistryAppKey::VDRegistryAppKey(const char *pszKey, bool write)
+ : VDRegistryKey((s_appbase + pszKey).c_str(), false, write)
+{
+}
+
+void VDRegistryAppKey::setDefaultKey(const char *pszAppName) {
+ s_appbase = pszAppName;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/stdaccel.cpp b/src/thirdparty/VirtualDub/system/source/stdaccel.cpp
new file mode 100644
index 000000000..4cbfdcd18
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/stdaccel.cpp
@@ -0,0 +1,42 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2007 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#define VDTEXTERN
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/vdstl.h>
+
+template vdspan<char>;
+template vdspan<uint8>;
+template vdspan<uint16>;
+template vdspan<uint32>;
+template vdspan<uint64>;
+template vdspan<sint8>;
+template vdspan<sint16>;
+template vdspan<sint32>;
+template vdspan<sint64>;
+template vdspan<float>;
+template vdspan<double>;
+template vdspan<wchar_t>;
diff --git a/src/thirdparty/VirtualDub/system/source/stdafx.cpp b/src/thirdparty/VirtualDub/system/source/stdafx.cpp
new file mode 100644
index 000000000..acf0b47e4
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/stdafx.cpp
@@ -0,0 +1,46 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include <stdafx.h>
+
+#ifdef _MSC_VER
+ #pragma hdrstop
+#endif
+
+// compiler/setup checks
+
+#if defined(_MSC_VER)
+ #if _MSC_VER < 1300
+ #include <windows.h>
+
+ #line 1 " \n \n \n***** You do not have the correct version of the Microsoft Platform SDK installed *****\nPlease see Docs\\index.html for details.\n \n \n"
+ namespace { const DWORD PlatformSDKTest = INVALID_SET_FILE_POINTER; }
+ #line 1 ""
+
+ #line 1 " \n \n \n***** You do not have the Visual C++ Processor Pack installed *****\nPlease see Docs\\index.html for details.\n \n \n"
+ namespace { void VCPPCheck() { __asm { sfence } } }
+ #line 1 ""
+ #endif
+#endif
diff --git a/src/thirdparty/VirtualDub/system/source/strutil.cpp b/src/thirdparty/VirtualDub/system/source/strutil.cpp
new file mode 100644
index 000000000..2d9becc85
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/strutil.cpp
@@ -0,0 +1,99 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <string.h>
+#include <ctype.h>
+
+#include <vd2/system/strutil.h>
+
+char *strncpyz(char *strDest, const char *strSource, size_t count) {
+ char *s;
+
+ s = strncpy(strDest, strSource, count);
+ strDest[count-1] = 0;
+
+ return s;
+}
+
+wchar_t *wcsncpyz(wchar_t *strDest, const wchar_t *strSource, size_t count) {
+ wchar_t *s;
+
+ s = wcsncpy(strDest, strSource, count);
+ strDest[count-1] = 0;
+
+ return s;
+}
+
+const char *strskipspace(const char *s) {
+ while(isspace((unsigned char)*s++))
+ ;
+
+ return s-1;
+}
+
+size_t vdstrlcpy(char *dst, const char *src, size_t size) {
+ size_t len = strlen(src);
+
+ if (size) {
+ if (size > len)
+ size = len;
+
+ memcpy(dst, src, size);
+ dst[size] = 0;
+ }
+ return len;
+}
+
+size_t vdwcslcpy(wchar_t *dst, const wchar_t *src, size_t size) {
+ size_t len = wcslen(src);
+
+ if (size) {
+ if (size > len)
+ size = len;
+
+ memcpy(dst, src, size * sizeof(wchar_t));
+ dst[size] = 0;
+ }
+ return len;
+}
+
+size_t vdstrlcat(char *dst, const char *src, size_t size) {
+ size_t dlen = strlen(dst);
+ size_t slen = strlen(src);
+
+ if (dlen < size) {
+ size_t maxappend = size - dlen - 1;
+ if (maxappend > slen)
+ maxappend = slen;
+
+ if (maxappend) {
+ memcpy(dst + dlen, src, maxappend);
+ dst[dlen+maxappend] = 0;
+ }
+ }
+
+ return dlen+slen;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/text.cpp b/src/thirdparty/VirtualDub/system/source/text.cpp
new file mode 100644
index 000000000..64f263d88
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/text.cpp
@@ -0,0 +1,652 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vector>
+#include <algorithm>
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include <windows.h>
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/vdstl.h>
+#include <vd2/system/text.h>
+#include <vd2/system/tls.h>
+#include <vd2/system/VDString.h>
+
+int VDTextWToA(char *dst, int max_dst, const wchar_t *src, int max_src) {
+ VDASSERTPTR(dst);
+ VDASSERTPTR(src);
+ VDASSERT(max_dst>0);
+
+ *dst = 0;
+
+ int len = WideCharToMultiByte(CP_ACP, 0, src, max_src, dst, max_dst, NULL, NULL);
+
+ // remove null terminator if source was null-terminated (source
+ // length was provided)
+ return max_src<0 && len>0 ? len-1 : len;
+}
+
+int VDTextAToW(wchar_t *dst, int max_dst, const char *src, int max_src) {
+ VDASSERTPTR(dst);
+ VDASSERTPTR(src);
+ VDASSERT(max_dst>0);
+
+ *dst = 0;
+
+ int len = MultiByteToWideChar(CP_ACP, 0, src, max_src, dst, max_dst);
+
+ // remove null terminator if source was null-terminated (source
+ // length was provided)
+ return max_src<0 && len>0 ? len-1 : len;
+}
+
+VDStringA VDTextWToA(const VDStringW& sw) {
+ return VDTextWToA(sw.data(), sw.length());
+}
+
+VDStringA VDTextWToA(const wchar_t *src, int srclen) {
+ VDStringA s;
+
+ if (src) {
+ int l = VDTextWToALength(src, srclen);
+
+ if (l) {
+ s.resize(l);
+ VDTextWToA((char *)s.data(), l+1, src, srclen);
+ }
+ }
+
+ return s;
+}
+
+VDStringW VDTextAToW(const VDStringA& s) {
+ return VDTextAToW(s.data(), s.length());
+}
+
+VDStringW VDTextAToW(const char *src, int srclen) {
+ VDStringW sw;
+
+ if (src) {
+ int l = VDTextAToWLength(src, srclen);
+
+ if (l) {
+ sw.resize(l);
+ VDTextAToW(&sw[0], sw.length()+1, src, srclen);
+ }
+ }
+
+ return sw;
+}
+
+int VDTextWToALength(const wchar_t *s, int length) {
+ SetLastError(0);
+ int rv = WideCharToMultiByte(CP_ACP, 0, s, length, NULL, 0, NULL, 0);
+
+ if (length < 0 && rv>0)
+ --rv;
+
+ return rv;
+}
+
+int VDTextAToWLength(const char *s, int length) {
+ SetLastError(0);
+ int rv = MultiByteToWideChar(CP_ACP, 0, s, length, NULL, 0);
+
+ if (length < 0 && rv > 0)
+ --rv;
+
+ return rv;
+}
+
+namespace {
+ // UTF8:
+ // 000000000xxxxxxx -> 0xxxxxxx
+ // 00000yyyyyxxxxxx -> 110yyyyy 10xxxxxx
+ // zzzzyyyyyyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx
+ // uuuuuzzzzyyyyyyxxxxxx -> 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
+ // (UTF16) -> 110110wwwwzzzzyy (uuuuu = wwww+1)
+ // 110111yyyyxxxxxx
+ int VDGetCharLengthInUTF8(wchar_t c) {
+ if (c < 0x0080) // 7 bits
+ return 1;
+ else if (c < 0x0800) // 11 bits
+ return 2;
+ else if (c < 0x10000) // 16 bits
+ return 3;
+ else if (c < 0x200000) // 21 bits
+ return 4;
+ else {
+ VDASSERT(false);
+ return 1; // Uh oh. Well, we're screwed.
+ }
+ }
+
+ bool VDIsUnicodeSurrogateFirst(wchar_t c) {
+ return (c >= 0xD800 && c < 0xDC00);
+ }
+
+ bool VDIsUnicodeSurrogateSecond(wchar_t c) {
+ return (c >= 0xDC00 && c < 0xE000);
+ }
+};
+
+VDStringA VDTextWToU8(const VDStringW& s) {
+ return VDTextWToU8(s.data(), s.length());
+}
+
+VDStringA VDTextWToU8(const wchar_t *s, int length) {
+ vdfastvector<char> temp;
+
+ if (length<0) {
+ const wchar_t *t = s;
+ do {
+ ++length;
+ } while(*t++);
+ }
+
+ while(length--) {
+ uint32 c = *s++;
+
+ if (VDIsUnicodeSurrogateFirst(c)) {
+ if (!length || !VDIsUnicodeSurrogateSecond(*s)) {
+ VDASSERT(false);
+ c = '?';
+ } else {
+ c = 0x10000 + ((c & 0x3ff)<<10) + (*s++ & 0x3ff);
+ --length;
+ }
+ }
+
+ if (c < 0x0080) {
+ temp.push_back((char)c);
+ } else {
+ if (c < 0x0800)
+ temp.push_back((char)(0xc0 + (c>>6)));
+ else {
+ if (c < 0x10000)
+ temp.push_back((char)(0xe0 + (c>>12)));
+ else {
+ temp.push_back((char)(0xf0 + ((c>>18) & 0x07)));
+ temp.push_back((char)(0x80 + ((c>>12) & 0x3f)));
+ }
+ temp.push_back((char)(0x80 + ((c>>6) & 0x3f)));
+ }
+ temp.push_back((char)(0x80 + (c & 0x3f)));
+ }
+ }
+
+ VDStringA a(temp.data(), temp.size());
+
+ return a;
+}
+
+VDStringW VDTextU8ToW(const VDStringA& s) {
+ return VDTextU8ToW(s.data(), s.length());
+}
+
+VDStringW VDTextU8ToW(const char *s, int length) {
+ vdfastvector<wchar_t> temp;
+
+ if (length<0) {
+ const char *t = s;
+ VDASSERT(length == -1);
+ do {
+ ++length;
+ } while(*t++);
+ }
+
+ while(length--) {
+ unsigned char c = (char)*s++;
+ uint32 wc = c; // we reconstruct UTF-32 first and then split to UTF-16 if necessary
+
+ if (c >= 0x80) {
+ int required_extra = 0;
+
+ if (c < 0xc0 || c >= 0xf7) {
+ VDASSERT(false);
+ break;
+ }
+
+ while(c >= 0xc0) {
+ c <<= 1;
+ ++required_extra;
+ }
+
+ wc = (c&0x3f) >> required_extra;
+
+ do {
+ char d;
+
+ if (!length-- || (((d=*s++)&0xc0)!=0x80))
+ goto bad_sequence_exit;
+
+ wc = (wc<<6) + (d&0x3f);
+ } while(--required_extra);
+ }
+
+ // Two cases here. If we are using UTF-16, surrogates need to be split in half. If we are using
+ // UTF-32, surrogates need to be combined.
+
+ if (sizeof(wchar_t) > 2) {
+ if (VDIsUnicodeSurrogateSecond(wc)) {
+ if (temp.empty() || !VDIsUnicodeSurrogateFirst(temp.back())) {
+ VDASSERT(false);
+ break;
+ }
+
+ temp.back() = 0x10000 + ((temp.back()&0x3ff) << 10) + (wc & 0x3ff);
+ continue;
+ }
+ } else {
+ if (wc >= 0x10000) {
+ wc -= 0x10000;
+ temp.push_back(0xD800 + ((wc & 0x3ff) >> 10));
+ wc = 0xDC00 + (wc&0x3ff);
+ }
+ }
+ temp.push_back(wc);
+ }
+bad_sequence_exit:
+
+ VDStringW w(temp.data(), temp.size());
+
+ return w;
+}
+
+///////////////////////////////////////////////////////////////////////////
+//
+// VirtualDub's very own printf() functions.
+//
+// VD[v|a]swprintf() differs from wsprintf() in the following ways:
+//
+// * The output is a string.
+// * All parameters must be passed by pointer instead of by value.
+// * The 'll' modifier permits long long / __int64 integers.
+// * [n] allows picking parameters out of order.
+// * %lc/%ls forces Unicode; %hc/%hs forces ANSI.
+
+VDStringW VDaswprintf(const wchar_t *format, int args, const void *const *argv) {
+ const void *const *argv0 = argv;
+ vdfastfixedvector<wchar_t, 256> out;
+ wchar_t c;
+
+ VDStringW tempConv;
+
+ while(c = *format) {
+ if (c != L'%') {
+ const wchar_t *s = format;
+
+ while(*s && *s != L'%')
+ ++s;
+
+ int len = s - format;
+ int clen = out.size();
+
+ out.resize(clen + len);
+
+ std::copy(format, s, &out[clen]);
+
+ format = s;
+ } else {
+ ++format;
+
+ // check for %%
+
+ if (*format == L'%') {
+ ++format;
+ out.push_back(L'%');
+ continue;
+ }
+
+ // Check for a renumbering identifier.
+
+ if (*format == L'[') {
+ ++format;
+
+ int newid = wcstol(format, const_cast<wchar_t **>(&format), 0);
+
+ VDASSERT(newid >= 0 && newid < args);
+
+ argv = argv0 + newid;
+
+ VDVERIFY(*format++ == L']');
+ }
+
+ // process flags
+
+ struct {
+ bool bLeftAlign:1, // pad right with spaces (priority over zero pad)
+ bZeroPad:1, // pad left with zeroes
+ bPositiveSign:1, // prefix with + or -; priority over bPositiveBlank
+ bPositiveBlank:1, // prefix with space for nonnegative
+ bPrefix:1; // prefix with 0, 0x, 0X, or force decimal point
+ } flags={false};
+ int width = 0;
+ int precision = -1;
+
+ for(;;) {
+ c = *format;
+
+ if (c == L'0')
+ flags.bZeroPad = true;
+ else if (c == L' ')
+ flags.bPositiveBlank = true;
+ else if (c == L'#')
+ flags.bPrefix = true;
+ else if (c == L'-')
+ flags.bLeftAlign = true;
+ else if (c == L'+')
+ flags.bPositiveSign = true;
+ else
+ break;
+
+ ++format;
+ }
+
+ // process width
+
+ c = *format;
+ if (c == L'*') {
+ ++format;
+ width = *(int *)*argv++;
+ } else if (iswdigit(c))
+ width = (int)wcstol(format, const_cast<wchar_t **>(&format), 0);
+
+ // process precision
+
+ if (*format == L'.') {
+ c = *++format;
+
+ if (c == L'*') {
+ ++format;
+ precision = *(int *)*argv++;
+ } else if (iswdigit(c))
+ precision = (int)wcstol(format, const_cast<wchar_t **>(&format), 0);
+ }
+
+ // process flags
+
+ enum { kDefault, kLong, kLongLong, kShort } size = kDefault;
+
+ c = *format;
+
+ if (c == L'l') {
+ ++format;
+ size = kLong;
+
+ if (*format == L'l') {
+ ++format;
+ size = kLongLong;
+ }
+
+ } else if (c == L'h') {
+ ++format;
+ size = kShort;
+ }
+
+ // process format character
+
+ wchar_t xf[32], buf[32], *pxf = xf, *pbuf0 = buf, *pbuf = buf;
+ int zero_pad = 0;
+
+ switch(*format++) {
+ case L'd':
+ case L'i':
+ case L'o':
+ case L'u':
+ case L'x':
+ case L'X':
+ *pxf++ = '%';
+ if (flags.bPrefix)
+ *pxf++ = '#';
+ if (flags.bPositiveBlank)
+ *pxf++ = ' ';
+ if (flags.bPositiveSign)
+ *pxf++ = '+';
+
+ switch(size) {
+ case kShort:
+ *pxf++ = 'h';
+ *pxf++ = format[-1];
+ *pxf = 0;
+ pbuf += swprintf(pbuf, sizeof buf / sizeof buf[0], xf, *(const short *)*argv++);
+ break;
+ case kDefault:
+ *pxf++ = format[-1];
+ *pxf = 0;
+ pbuf += swprintf(pbuf, sizeof buf / sizeof buf[0], xf, *(const int *)*argv++);
+ break;
+ case kLong:
+ *pxf++ = 'l';
+ *pxf++ = format[-1];
+ *pxf = 0;
+ pbuf += swprintf(pbuf, sizeof buf / sizeof buf[0], xf, *(const long *)*argv++);
+ break;
+ case kLongLong:
+#if defined(_MSC_VER)
+ *pxf++ = 'I';
+ *pxf++ = '6';
+ *pxf++ = '4';
+#elif defined(__GNUC__)
+ *pxf++ = 'l';
+ *pxf++ = 'l';
+#else
+#error Please insert the appropriate 64-bit printf format for your platform.
+#endif
+ *pxf++ = format[-1];
+ *pxf = 0;
+ pbuf += swprintf(pbuf, sizeof buf / sizeof buf[0], xf, *(const int64 *)*argv++);
+ break;
+ default:
+ VDNEVERHERE;
+ }
+
+ if (pbuf - pbuf0 < precision)
+ zero_pad = precision - (pbuf - pbuf0);
+
+ break;
+
+ case L'c':
+ if (size == kShort) {
+ char buf[2] = {*(const char *)*argv++, 0};
+ pbuf += VDTextAToW(pbuf, 4, buf);
+ } else
+ *pbuf++ = *(const wchar_t *)*argv++;
+ break;
+
+ case L's':
+ if (size == kShort) {
+ const char *s = *(const char *const *)*argv++;
+ int maxsrc = strlen(s);
+
+ if (precision >= 0 && precision < maxsrc)
+ maxsrc = precision;
+
+ tempConv = VDTextAToW(s, maxsrc);
+ pbuf0 = const_cast<wchar_t *>(tempConv.c_str());
+
+ pbuf = pbuf0 + tempConv.size();
+ } else {
+ pbuf = pbuf0 = *(wchar_t *const *)*argv++;
+
+ while(*pbuf && precision) {
+ ++pbuf;
+ --precision;
+ }
+ }
+ break;
+
+ case L'e':
+ case L'E':
+ case L'f':
+ case L'F':
+ case L'g':
+ case L'G':
+ // We place an artificial limit of 256 characters on the precision value.
+ {
+ if (precision > 256)
+ precision = 256;
+
+ tempConv.resize(256);
+ pbuf0 = pbuf = const_cast<wchar_t *>(tempConv.data());
+
+ *pxf++ = '%';
+ if (flags.bPrefix)
+ *pxf++ = '#';
+ if (flags.bPositiveBlank)
+ *pxf++ = ' ';
+ if (flags.bPositiveSign)
+ *pxf++ = '+';
+ if (precision>=0) {
+ *pxf++ = '.';
+ *pxf++ = '*';
+ }
+ *pxf++ = format[-1];
+ *pxf = 0;
+
+ if (precision >= 0)
+ pbuf += swprintf(pbuf, 256, xf, precision, *(const double *)*argv++);
+ else
+ pbuf += swprintf(pbuf, 256, xf, *(const double *)*argv++);
+ }
+ break;
+
+ case L'n': // no flags honored; precision ignored
+ *(int *)(*argv++) = out.size();
+ continue;
+ case L'p': // no flags honored; precision ignored
+ pbuf += swprintf(pbuf, sizeof buf / sizeof buf[0], L"%p", *(void *const *)*argv++);
+ break;
+
+ case L'z':
+ switch(*format++) {
+ case L's':
+ {
+ int64 value;
+
+ switch(size) {
+ case kShort: value = *(const short *)*argv++; break;
+ case kDefault: value = *(const int *)*argv++; break;
+ case kLong: value = *(const long *)*argv++; break;
+ case kLongLong: value = *(const int64 *)*argv++; break;
+ break;
+ default:
+ VDNEVERHERE;
+ }
+
+ if (value < 0)
+ *pbuf++ = L'-';
+ else if (flags.bPositiveSign)
+ *pbuf++ = L'+';
+ else if (flags.bPositiveBlank)
+ *pbuf++ = L' ';
+
+ if (value < (VD64(10) << 10))
+ pbuf += swprintf(pbuf, (buf + sizeof(buf) / sizeof(buf[0])) - pbuf, L"%d bytes", (int)value);
+ else if (value < (VD64(10) << 20))
+ pbuf += swprintf(pbuf, (buf + sizeof(buf) / sizeof(buf[0])) - pbuf, L"%d KB", (int)((sint32)value >> 10));
+ else if (value < (VD64(10) << 30))
+ pbuf += swprintf(pbuf, (buf + sizeof(buf) / sizeof(buf[0])) - pbuf, L"%d MB", (int)((sint32)value >> 20));
+ else if (value < (VD64(10) << 40))
+ pbuf += swprintf(pbuf, (buf + sizeof(buf) / sizeof(buf[0])) - pbuf, L"%d GB", (int)(value >> 30));
+ else
+ pbuf += swprintf(pbuf, (buf + sizeof(buf) / sizeof(buf[0])) - pbuf, L"%d TB", (int)(value >> 40));
+ }
+
+ break;
+ }
+ break;
+
+ }
+
+ int string_width = (pbuf - pbuf0) + zero_pad;
+ int string_delta = width - string_width;
+
+ if (!flags.bLeftAlign && string_delta > 0) {
+ int siz = out.size();
+ out.resize(siz + string_delta, flags.bZeroPad ? L'0' : L' ');
+ }
+
+ if (zero_pad) {
+ int siz = out.size();
+ out.resize(siz + zero_pad);
+ std::fill(&out[siz], &out[siz+zero_pad], L'0');
+ }
+
+ if (pbuf != pbuf0) {
+ int siz = out.size();
+ out.resize(siz + (pbuf - pbuf0));
+
+ std::copy(pbuf0, pbuf, &out[siz]);
+ }
+
+ if (flags.bLeftAlign && string_delta > 0) {
+ int siz = out.size();
+ out.resize(siz + string_delta);
+ std::fill(&out[siz], &out[siz+string_delta], L' ');
+ }
+ }
+ }
+
+ out.push_back(0);
+
+ return VDStringW(out.data());
+}
+
+VDStringW VDvswprintf(const wchar_t *format, int args, va_list val) {
+ if (args < 16) {
+ const void *argv[16];
+
+ for(int i=0; i<args; ++i)
+ argv[i] = va_arg(val, const void *);
+
+ va_end(val);
+
+ return VDaswprintf(format, args, argv);
+ } else {
+ vdblock<const void *> argv(args);
+
+ for(int i=0; i<args; ++i)
+ argv[i] = va_arg(val, const void *);
+
+ va_end(val);
+
+ return VDaswprintf(format, args, argv.data());
+ }
+}
+
+VDStringW VDswprintf(const wchar_t *format, int args, ...) {
+ va_list val;
+
+ va_start(val, args);
+ VDStringW r = VDvswprintf(format, args, val);
+ va_end(val);
+
+ return r;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/thread.cpp b/src/thirdparty/VirtualDub/system/source/thread.cpp
new file mode 100644
index 000000000..910678bc4
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/thread.cpp
@@ -0,0 +1,274 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <process.h>
+
+#include <windows.h>
+
+#include <vd2/system/vdtypes.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/tls.h>
+#include <vd2/system/protscope.h>
+
+namespace {
+ //
+ // This apparently came from one a talk by one of the Visual Studio
+ // developers, i.e. I didn't write it.
+ //
+ #define MS_VC_EXCEPTION 0x406d1388
+
+ typedef struct tagTHREADNAME_INFO
+ {
+ DWORD dwType; // must be 0x1000
+ LPCSTR szName; // pointer to name (in same addr space)
+ DWORD dwThreadID; // thread ID (-1 caller thread)
+ DWORD dwFlags; // reserved for future use, most be zero
+ } THREADNAME_INFO;
+}
+
+VDThreadID VDGetCurrentThreadID() {
+ return (VDThreadID)GetCurrentThreadId();
+}
+
+VDProcessId VDGetCurrentProcessId() {
+ return (VDProcessId)GetCurrentProcessId();
+}
+
+void VDSetThreadDebugName(VDThreadID tid, const char *name) {
+ THREADNAME_INFO info;
+ info.dwType = 0x1000;
+ info.szName = name;
+ info.dwThreadID = tid;
+ info.dwFlags = 0;
+
+ __try {
+ RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(DWORD), (ULONG_PTR *)&info);
+ } __except (EXCEPTION_CONTINUE_EXECUTION) {
+ }
+}
+
+void VDThreadSleep(int milliseconds) {
+ if (milliseconds > 0)
+ ::Sleep(milliseconds);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDThread::VDThread(const char *pszDebugName)
+ : mpszDebugName(pszDebugName)
+ , mhThread(0)
+ , mThreadID(0)
+{
+}
+
+VDThread::~VDThread() throw() {
+ if (isThreadAttached())
+ ThreadWait();
+}
+
+bool VDThread::ThreadStart() {
+ VDASSERT(!isThreadAttached());
+
+ if (!isThreadAttached())
+ mhThread = (void *)_beginthreadex(NULL, 0, StaticThreadStart, this, 0, &mThreadID);
+
+ return mhThread != 0;
+}
+
+void VDThread::ThreadDetach() {
+ if (isThreadAttached()) {
+ CloseHandle((HANDLE)mhThread);
+ mhThread = NULL;
+ mThreadID = 0;
+ }
+}
+
+void VDThread::ThreadWait() {
+ if (isThreadAttached()) {
+ WaitForSingleObject((HANDLE)mhThread, INFINITE);
+ ThreadDetach();
+ mThreadID = 0;
+ }
+}
+
+bool VDThread::isThreadActive() {
+ if (isThreadAttached()) {
+ if (WAIT_TIMEOUT == WaitForSingleObject((HANDLE)mhThread, 0))
+ return true;
+
+ ThreadDetach();
+ mThreadID = 0;
+ }
+ return false;
+}
+
+void VDThread::ThreadFinish() {
+ _endthreadex(0);
+}
+
+void *VDThread::ThreadLocation() const {
+ if (!isThreadAttached())
+ return NULL;
+
+ CONTEXT ctx;
+
+ ctx.ContextFlags = CONTEXT_CONTROL;
+
+ SuspendThread(mhThread);
+ GetThreadContext(mhThread, &ctx);
+ ResumeThread(mhThread);
+
+#ifdef _M_AMD64
+ return (void *)ctx.Rip;
+#else
+ return (void *)ctx.Eip;
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+unsigned __stdcall VDThread::StaticThreadStart(void *pThisAsVoid) {
+ VDThread *pThis = static_cast<VDThread *>(pThisAsVoid);
+
+ // We cannot use mThreadID here because it might already have been
+ // invalidated by a detach in the main thread.
+ if (pThis->mpszDebugName)
+ VDSetThreadDebugName(GetCurrentThreadId(), pThis->mpszDebugName);
+
+ VDInitThreadData(pThis->mpszDebugName);
+
+ vdprotected1("running thread \"%.64s\"", const char *, pThis->mpszDebugName) {
+ pThis->ThreadRun();
+ }
+
+ // NOTE: Do not put anything referencing this here, since our object
+ // may have been destroyed by the threaded code.
+
+ VDDeinitThreadData();
+
+ return 0;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+void VDCriticalSection::StructCheck() {
+ VDASSERTCT(sizeof(CritSec) == sizeof(CRITICAL_SECTION));
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDSignal::VDSignal() {
+ hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
+}
+
+VDSignalPersistent::VDSignalPersistent() {
+ hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
+}
+
+VDSignalBase::~VDSignalBase() {
+ CloseHandle(hEvent);
+}
+
+void VDSignalBase::signal() {
+ SetEvent(hEvent);
+}
+
+void VDSignalBase::wait() {
+ WaitForSingleObject(hEvent, INFINITE);
+}
+
+bool VDSignalBase::check() {
+ return WAIT_OBJECT_0 == WaitForSingleObject(hEvent, 0);
+}
+
+int VDSignalBase::wait(VDSignalBase *second) {
+ HANDLE hArray[16];
+ DWORD dwRet;
+
+ hArray[0] = hEvent;
+ hArray[1] = second->hEvent;
+
+ dwRet = WaitForMultipleObjects(2, hArray, FALSE, INFINITE);
+
+ return dwRet == WAIT_FAILED ? -1 : dwRet - WAIT_OBJECT_0;
+}
+
+int VDSignalBase::wait(VDSignalBase *second, VDSignalBase *third) {
+ HANDLE hArray[3];
+ DWORD dwRet;
+
+ hArray[0] = hEvent;
+ hArray[1] = second->hEvent;
+ hArray[2] = third->hEvent;
+
+ dwRet = WaitForMultipleObjects(3, hArray, FALSE, INFINITE);
+
+ return dwRet == WAIT_FAILED ? -1 : dwRet - WAIT_OBJECT_0;
+}
+
+int VDSignalBase::waitMultiple(const VDSignalBase **signals, int count) {
+ VDASSERT(count <= 16);
+
+ HANDLE handles[16];
+ int active = 0;
+
+ for(int i=0; i<count; ++i) {
+ HANDLE h = signals[i]->hEvent;
+
+ if (h)
+ handles[active++] = h;
+ }
+
+ if (!active)
+ return -1;
+
+ DWORD dwRet = WaitForMultipleObjects(active, handles, FALSE, INFINITE);
+
+ return dwRet == WAIT_FAILED ? -1 : dwRet - WAIT_OBJECT_0;
+}
+
+void VDSignalPersistent::unsignal() {
+ ResetEvent(hEvent);
+}
+
+VDSemaphore::VDSemaphore(int initial)
+ : mKernelSema(CreateSemaphore(NULL, initial, 0x0fffffff, NULL))
+{
+}
+
+VDSemaphore::~VDSemaphore() {
+ if (mKernelSema)
+ CloseHandle(mKernelSema);
+}
+
+void VDSemaphore::Reset(int count) {
+ // reset semaphore to zero
+ while(WAIT_OBJECT_0 == WaitForSingleObject(mKernelSema, 0))
+ ;
+
+ if (count)
+ ReleaseSemaphore(mKernelSema, count, NULL);
+}
diff --git a/src/thirdparty/VirtualDub/system/source/thunk.cpp b/src/thirdparty/VirtualDub/system/source/thunk.cpp
new file mode 100644
index 000000000..b39089116
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/thunk.cpp
@@ -0,0 +1,306 @@
+#include "stdafx.h"
+#include <windows.h>
+#include <map>
+#include <vd2/system/atomic.h>
+#include <vd2/system/refcount.h>
+#include <vd2/system/thunk.h>
+#include <vd2/system/binary.h>
+
+class IVDJITAllocator {};
+
+class VDJITAllocator : public vdrefcounted<IVDJITAllocator> {
+public:
+ VDJITAllocator();
+ ~VDJITAllocator();
+
+ void *Allocate(size_t len);
+ void Free(void *p, size_t len);
+
+ void EndUpdate(void *p, size_t len);
+
+protected:
+ typedef std::map<void *, size_t> FreeChunks;
+ FreeChunks mFreeChunks;
+ FreeChunks::iterator mNextChunk;
+
+ typedef std::map<void *, size_t> Allocations;
+ Allocations mAllocations;
+
+ uintptr mAllocationGranularity;
+};
+
+VDJITAllocator::VDJITAllocator()
+ : mNextChunk(mFreeChunks.end())
+{
+ SYSTEM_INFO si;
+ GetSystemInfo(&si);
+
+ mAllocationGranularity = si.dwAllocationGranularity;
+}
+
+VDJITAllocator::~VDJITAllocator() {
+ for(Allocations::iterator it(mAllocations.begin()), itEnd(mAllocations.end()); it!=itEnd; ++it) {
+ VirtualFree(it->first, 0, MEM_RELEASE);
+ }
+}
+
+void *VDJITAllocator::Allocate(size_t len) {
+ len = (len + 15) & ~(size_t)15;
+
+ FreeChunks::iterator itMark(mNextChunk), itEnd(mFreeChunks.end()), it(itMark);
+
+ if (it == itEnd)
+ it = mFreeChunks.begin();
+
+ for(;;) {
+ for(; it!=itEnd; ++it) {
+ if (it->second >= len) {
+ it->second -= len;
+
+ void *p = (char *)it->first + it->second;
+
+ if (!it->second) {
+ if (mNextChunk == it)
+ ++mNextChunk;
+
+ mFreeChunks.erase(it);
+ }
+
+ return p;
+ }
+ }
+
+ if (itEnd == itMark)
+ break;
+
+ it = mFreeChunks.begin();
+ itEnd = itMark;
+ }
+
+ size_t alloclen = (len + mAllocationGranularity - 1) & ~(mAllocationGranularity - 1);
+
+ void *p = VirtualAlloc(NULL, alloclen, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+ if (p) {
+ try {
+ Allocations::iterator itA(mAllocations.insert(Allocations::value_type(p, alloclen)).first);
+
+ try {
+ if (len < alloclen)
+ mFreeChunks.insert(FreeChunks::value_type((char *)p + len, alloclen - len));
+
+ } catch(...) {
+ mAllocations.erase(itA);
+ throw;
+ }
+ } catch(...) {
+ VirtualFree(p, 0, MEM_RELEASE);
+ p = NULL;
+ }
+ }
+
+ return p;
+}
+
+void VDJITAllocator::Free(void *p, size_t len) {
+ VDASSERT(p);
+ VDASSERT(len < 0x10000);
+
+ FreeChunks::iterator cur(mFreeChunks.lower_bound(p));
+ if (cur != mFreeChunks.end() && (char *)p + len == cur->first) {
+ len += cur->second;
+ if (mNextChunk == cur)
+ ++mNextChunk;
+ cur = mFreeChunks.erase(cur);
+ }
+
+ if (cur != mFreeChunks.begin()) {
+ FreeChunks::iterator prev(cur);
+
+ --prev;
+ if ((char *)prev->first + prev->second == p) {
+ p = prev->first;
+ len += prev->second;
+ if (mNextChunk == prev)
+ ++mNextChunk;
+ mFreeChunks.erase(prev);
+ }
+ }
+
+ uintptr start = (size_t)p;
+ uintptr end = start + len;
+
+ if (!((start | end) & (mAllocationGranularity - 1))) {
+ Allocations::iterator it(mAllocations.find(p));
+
+ if (it != mAllocations.end()) {
+ VirtualFree((void *)start, 0, MEM_RELEASE);
+ mAllocations.erase(it);
+ return;
+ }
+ }
+
+ mFreeChunks.insert(FreeChunks::value_type((void *)start, end-start));
+}
+
+void VDJITAllocator::EndUpdate(void *p, size_t len) {
+ FlushInstructionCache(GetCurrentProcess(), p, len);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDJITAllocator *g_pVDJITAllocator;
+VDAtomicInt g_VDJITAllocatorLock;
+
+bool VDInitThunkAllocator() {
+ bool success = true;
+
+ while(g_VDJITAllocatorLock.xchg(1))
+ ::Sleep(1);
+
+ if (!g_pVDJITAllocator) {
+ g_pVDJITAllocator = new_nothrow VDJITAllocator;
+ if (!g_pVDJITAllocator)
+ success = false;
+ }
+
+ if (success)
+ g_pVDJITAllocator->AddRef();
+
+ VDVERIFY(1 == g_VDJITAllocatorLock.xchg(0));
+
+ return success;
+}
+
+void VDShutdownThunkAllocator() {
+ while(g_VDJITAllocatorLock.xchg(1))
+ ::Sleep(1);
+
+ VDASSERT(g_pVDJITAllocator);
+
+ if (!g_pVDJITAllocator->Release())
+ g_pVDJITAllocator = NULL;
+
+ VDVERIFY(1 == g_VDJITAllocatorLock.xchg(0));
+}
+
+void *VDAllocateThunkMemory(size_t len) {
+ return g_pVDJITAllocator->Allocate(len);
+}
+
+void VDFreeThunkMemory(void *p, size_t len) {
+ g_pVDJITAllocator->Free(p, len);
+}
+
+void VDSetThunkMemory(void *p, const void *src, size_t len) {
+ memcpy(p, src, len);
+ g_pVDJITAllocator->EndUpdate(p, len);
+}
+
+void VDFlushThunkMemory(void *p, size_t len) {
+ g_pVDJITAllocator->EndUpdate(p, len);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+#ifdef _M_AMD64
+ extern "C" void VDMethodToFunctionThunk64();
+#else
+ extern "C" void VDMethodToFunctionThunk32();
+ extern "C" void VDMethodToFunctionThunk32_4();
+ extern "C" void VDMethodToFunctionThunk32_8();
+ extern "C" void VDMethodToFunctionThunk32_12();
+ extern "C" void VDMethodToFunctionThunk32_16();
+#endif
+
+VDFunctionThunk *VDCreateFunctionThunkFromMethod(void *method, void *pThis, size_t argbytes, bool stdcall_thunk) {
+#if defined(_M_IX86)
+ void *pThunk = VDAllocateThunkMemory(16);
+
+ if (!pThunk)
+ return NULL;
+
+ if (stdcall_thunk || !argbytes) { // thiscall -> stdcall (easy case)
+ uint8 thunkbytes[16]={
+ 0xB9, 0x00, 0x00, 0x00, 0x00, // mov ecx, this
+ 0xE9, 0x00, 0x00, 0x00, 0x00 // jmp fn
+ };
+
+
+ VDWriteUnalignedLEU32(thunkbytes+1, (uint32)(uintptr)pThis);
+ VDWriteUnalignedLEU32(thunkbytes+6, (uint32)method - ((uint32)pThunk + 10));
+
+ VDSetThunkMemory(pThunk, thunkbytes, 15);
+ } else { // thiscall -> cdecl (hard case)
+ uint8 thunkbytes[16]={
+ 0xE8, 0x00, 0x00, 0x00, 0x00, // call VDFunctionThunk32
+ 0xC3, // ret
+ argbytes, // db argbytes
+ 0, // db 0
+ 0x00, 0x00, 0x00, 0x00, // dd method
+ 0x00, 0x00, 0x00, 0x00, // dd this
+ };
+
+ void *adapter;
+
+ switch(argbytes) {
+ case 4: adapter = VDMethodToFunctionThunk32_4; break;
+ case 8: adapter = VDMethodToFunctionThunk32_8; break;
+ case 12: adapter = VDMethodToFunctionThunk32_12; break;
+ case 16: adapter = VDMethodToFunctionThunk32_16; break;
+ default: adapter = VDMethodToFunctionThunk32; break;
+ }
+
+ VDWriteUnalignedLEU32(thunkbytes+1, (uint32)(uintptr)adapter - ((uint32)pThunk + 5));
+ VDWriteUnalignedLEU32(thunkbytes+8, (uint32)(uintptr)method);
+ VDWriteUnalignedLEU32(thunkbytes+12, (uint32)(uintptr)pThis);
+
+ VDSetThunkMemory(pThunk, thunkbytes, 16);
+ }
+
+ return (VDFunctionThunk *)pThunk;
+#elif defined(_M_AMD64)
+ void *pThunk = VDAllocateThunkMemory(44);
+ if (!pThunk)
+ return NULL;
+
+ uint8 thunkbytes[44]={
+ 0x48, 0x8D, 0x04, 0x25, 0x10, 0x00, 0x00, // lea rax, [eip+16]
+ 0x00,
+ 0xFF, 0x24, 0x25, 0x08, 0x00, 0x00, 0x00, // jmp qword ptr [rip+8]
+ 0x90, // nop
+ 0, 0, 0, 0, 0, 0, 0, 0, // dq VDFunctionThunk64
+ 0, 0, 0, 0, 0, 0, 0, 0, // dq method
+ 0, 0, 0, 0, 0, 0, 0, 0, // dq this
+ 0, 0, 0, 0 // dd argspillbytes
+ };
+
+ VDWriteUnalignedLEU64(thunkbytes+16, (uint64)(uintptr)VDMethodToFunctionThunk64);
+ VDWriteUnalignedLEU64(thunkbytes+24, (uint64)(uintptr)method);
+ VDWriteUnalignedLEU64(thunkbytes+32, (uint64)(uintptr)pThis);
+
+ // The stack must be aligned to a 16 byte boundary when the CALL
+ // instruction occurs. On entry to VDFunctionThunk64(), the stack is misaligned
+ // to 16n+8. Therefore, the number of argbytes must be 16m+8 and the number of
+ // argspillbytes must be 16m+8-24.
+ VDWriteUnalignedLEU32(thunkbytes+40, argbytes < 32 ? 0 : ((argbytes - 16 + 15) & ~15));
+
+ VDSetThunkMemory(pThunk, thunkbytes, 44);
+
+ return (VDFunctionThunk *)pThunk;
+#else
+ return NULL;
+#endif
+}
+
+void VDDestroyFunctionThunk(VDFunctionThunk *pFnThunk) {
+ // validate thunk
+#if defined(_M_IX86)
+ VDASSERT(((const uint8 *)pFnThunk)[0] == 0xB9 || ((const uint8 *)pFnThunk)[0] == 0xE8);
+ VDFreeThunkMemory(pFnThunk, 16);
+#elif defined(_M_AMD64)
+ VDFreeThunkMemory(pFnThunk, 44);
+#else
+ VDASSERT(false);
+#endif
+
+}
diff --git a/src/thirdparty/VirtualDub/system/source/time.cpp b/src/thirdparty/VirtualDub/system/source/time.cpp
new file mode 100644
index 000000000..ae0c3e4bf
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/time.cpp
@@ -0,0 +1,270 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <new>
+
+#include <windows.h>
+#include <mmsystem.h>
+
+#include <vd2/system/time.h>
+#include <vd2/system/thread.h>
+#include <vd2/system/thunk.h>
+
+#ifdef _MSC_VER
+ #pragma comment(lib, "winmm")
+#endif
+
+uint32 VDGetCurrentTick() {
+ return (uint32)GetTickCount();
+}
+
+uint64 VDGetPreciseTick() {
+ LARGE_INTEGER li;
+ QueryPerformanceCounter(&li);
+ return li.QuadPart;
+}
+
+namespace {
+ uint64 VDGetPreciseTicksPerSecondNowI() {
+ LARGE_INTEGER freq;
+ QueryPerformanceFrequency(&freq);
+ return freq.QuadPart;
+ }
+
+ double VDGetPreciseTicksPerSecondNow() {
+ LARGE_INTEGER freq;
+ QueryPerformanceFrequency(&freq);
+ return (double)freq.QuadPart;
+ }
+}
+
+uint64 VDGetPreciseTicksPerSecondI() {
+ static uint64 ticksPerSecond = VDGetPreciseTicksPerSecondNowI();
+
+ return ticksPerSecond;
+}
+
+double VDGetPreciseTicksPerSecond() {
+ static double ticksPerSecond = VDGetPreciseTicksPerSecondNow();
+
+ return ticksPerSecond;
+}
+
+double VDGetPreciseSecondsPerTick() {
+ static double secondsPerTick = 1.0 / VDGetPreciseTicksPerSecondNow();
+
+ return secondsPerTick;
+}
+
+uint32 VDGetAccurateTick() {
+ return timeGetTime();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+VDCallbackTimer::VDCallbackTimer()
+ : mTimerAccuracy(0)
+{
+}
+
+VDCallbackTimer::~VDCallbackTimer() {
+ Shutdown();
+}
+
+bool VDCallbackTimer::Init(IVDTimerCallback *pCB, uint32 period_ms) {
+ return Init2(pCB, period_ms * 10000);
+}
+
+bool VDCallbackTimer::Init2(IVDTimerCallback *pCB, uint32 period_100ns) {
+ return Init3(pCB, period_100ns, period_100ns >> 1, true);
+}
+
+bool VDCallbackTimer::Init3(IVDTimerCallback *pCB, uint32 period_100ns, uint32 accuracy_100ns, bool precise) {
+ Shutdown();
+
+ mpCB = pCB;
+ mbExit = false;
+ mbPrecise = precise;
+
+ UINT accuracy = accuracy_100ns / 10000;
+ if (accuracy > 10)
+ accuracy = 10;
+
+ TIMECAPS tc;
+ if (TIMERR_NOERROR == timeGetDevCaps(&tc, sizeof tc)) {
+ if (accuracy < tc.wPeriodMin)
+ accuracy = tc.wPeriodMin;
+ if (accuracy > tc.wPeriodMax)
+ accuracy = tc.wPeriodMax;
+ }
+
+ if (TIMERR_NOERROR == timeBeginPeriod(accuracy)) {
+ mTimerAccuracy = accuracy;
+ mTimerPeriod = period_100ns;
+ mTimerPeriodAdjustment = 0;
+ mTimerPeriodDelta = 0;
+
+ if (ThreadStart())
+ return true;
+ }
+
+ Shutdown();
+
+ return false;
+}
+
+void VDCallbackTimer::Shutdown() {
+ if (isThreadActive()) {
+ mbExit = true;
+ msigExit.signal();
+ ThreadWait();
+ }
+
+ if (mTimerAccuracy) {
+ timeEndPeriod(mTimerAccuracy);
+ mTimerAccuracy = 0;
+ }
+}
+
+void VDCallbackTimer::SetRateDelta(int delta_100ns) {
+ mTimerPeriodDelta = delta_100ns;
+}
+
+void VDCallbackTimer::AdjustRate(int adjustment_100ns) {
+ mTimerPeriodAdjustment += adjustment_100ns;
+}
+
+bool VDCallbackTimer::IsTimerRunning() const {
+ return const_cast<VDCallbackTimer *>(this)->isThreadActive();
+}
+
+void VDCallbackTimer::ThreadRun() {
+ uint32 timerPeriod = mTimerPeriod;
+ uint32 periodHi = timerPeriod / 10000;
+ uint32 periodLo = timerPeriod % 10000;
+ uint32 nextTimeHi = VDGetAccurateTick() + periodHi;
+ uint32 nextTimeLo = periodLo;
+
+ uint32 maxDelay = mTimerPeriod / 2000;
+
+ SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
+
+ HANDLE hExit = msigExit.getHandle();
+
+ if (!mbPrecise) {
+ while(!mbExit) {
+ DWORD res = ::WaitForSingleObject(hExit, periodHi);
+
+ if (res != WAIT_TIMEOUT)
+ break;
+
+ mpCB->TimerCallback();
+ }
+ } else {
+ while(!mbExit) {
+ uint32 currentTime = VDGetAccurateTick();
+ sint32 delta = nextTimeHi - currentTime;
+
+ if (delta > 0) {
+ // safety guard against the clock going nuts
+ DWORD res;
+ if ((uint32)delta > maxDelay)
+ res = ::WaitForSingleObject(hExit, maxDelay);
+ else
+ res = ::WaitForSingleObject(hExit, nextTimeHi - currentTime);
+
+ if (res != WAIT_TIMEOUT)
+ break;
+ }
+
+ if ((uint32)abs(delta) > maxDelay) {
+ nextTimeHi = currentTime + periodHi;
+ nextTimeLo = periodLo;
+ } else {
+ nextTimeLo += periodLo;
+ nextTimeHi += periodHi;
+ if (nextTimeLo >= 10000) {
+ nextTimeLo -= 10000;
+ ++nextTimeHi;
+ }
+ }
+
+ mpCB->TimerCallback();
+
+ int adjust = mTimerPeriodAdjustment.xchg(0);
+ int perdelta = mTimerPeriodDelta;
+
+ if (adjust || perdelta) {
+ timerPeriod += adjust;
+ periodHi = (timerPeriod+perdelta) / 10000;
+ periodLo = (timerPeriod+perdelta) % 10000;
+ }
+ }
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+VDLazyTimer::VDLazyTimer()
+ : mTimerId(NULL)
+ , mpCB(NULL)
+{
+ if (!VDInitThunkAllocator())
+ throw MyError("Unable to initialize thunk allocator.");
+
+ mpThunk = VDCreateFunctionThunkFromMethod(this, &VDLazyTimer::StaticTimeCallback, true);
+ if (!mpThunk) {
+ VDShutdownThunkAllocator();
+ throw MyError("Unable to create timer thunk.");
+ }
+}
+
+VDLazyTimer::~VDLazyTimer() {
+ Stop();
+
+ VDDestroyFunctionThunk(mpThunk);
+ VDShutdownThunkAllocator();
+}
+
+void VDLazyTimer::SetOneShot(IVDTimerCallback *pCB, uint32 delay) {
+ Stop();
+
+ mpCB = pCB;
+ mTimerId = SetTimer(NULL, 0, delay, (TIMERPROC)mpThunk);
+}
+
+void VDLazyTimer::Stop() {
+ if (mTimerId) {
+ KillTimer(NULL, mTimerId);
+ mTimerId = 0;
+ }
+}
+
+void VDLazyTimer::StaticTimeCallback(VDZHWND hwnd, VDZUINT msg, VDZUINT_PTR id, VDZDWORD time) {
+ Stop();
+
+ if (mpCB)
+ mpCB->TimerCallback();
+}
diff --git a/src/thirdparty/VirtualDub/system/source/tls.cpp b/src/thirdparty/VirtualDub/system/source/tls.cpp
new file mode 100644
index 000000000..71044d1e5
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/tls.cpp
@@ -0,0 +1,43 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vd2/system/tls.h>
+
+VDThreadInitHook g_pInitHook;
+
+void VDInitThreadData(const char *pszThreadName) {
+ if (g_pInitHook)
+ g_pInitHook(true, pszThreadName);
+}
+
+void VDDeinitThreadData() {
+ if (g_pInitHook)
+ g_pInitHook(false, NULL);
+}
+
+void VDSetThreadInitHook(VDThreadInitHook pHook) {
+ g_pInitHook = pHook;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/vdstl.cpp b/src/thirdparty/VirtualDub/system/source/vdstl.cpp
new file mode 100644
index 000000000..cabfee02f
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/vdstl.cpp
@@ -0,0 +1,32 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2008 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vd2/system/error.h>
+#include <vd2/system/vdstl.h>
+
+void VDNORETURN vdallocator_base::throw_oom() {
+ throw MyMemoryError();
+}
diff --git a/src/thirdparty/VirtualDub/system/source/vectors.cpp b/src/thirdparty/VirtualDub/system/source/vectors.cpp
new file mode 100644
index 000000000..c54885c45
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/vectors.cpp
@@ -0,0 +1,77 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vd2/system/vdstl.h>
+#include <vd2/system/vectors.h>
+
+bool VDSolveLinearEquation(double *src, int n, ptrdiff_t stride_elements, double *b, double tolerance) {
+ vdfastvector<double *> array(n);
+ double **m = &array[0];
+ int i, j, k;
+
+ for(i=0; i<n; ++i) {
+ m[i] = src;
+ src += stride_elements;
+ }
+
+ // factor U
+ for(i=0; i<n; ++i) {
+ int best = i;
+
+ for(j=i+1; j<n; ++j) {
+ if (fabs(m[best][i]) < fabs(m[j][i]))
+ best = j;
+ }
+
+ std::swap(m[i], m[best]);
+ std::swap(b[i], b[best]);
+
+ if (fabs(m[i][i]) < tolerance)
+ return false;
+
+ double f = 1.0 / m[i][i];
+
+ m[i][i] = 1.0;
+
+ for(j=i+1; j<n; ++j)
+ m[i][j] *= f;
+
+ b[i] *= f;
+
+ for(j=i+1; j<n; ++j) {
+ b[j] -= b[i] * m[j][i];
+ for(k=n-1; k>=i; --k)
+ m[j][k] -= m[i][k] * m[j][i];
+ }
+ }
+
+ // factor L
+ for(i=n-1; i>=0; --i)
+ for(j=i-1; j>=0; --j)
+ b[j] -= b[i] * m[j][i];
+
+ return true;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/w32assist.cpp b/src/thirdparty/VirtualDub/system/source/w32assist.cpp
new file mode 100644
index 000000000..1faf527ed
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/w32assist.cpp
@@ -0,0 +1,580 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vd2/system/w32assist.h>
+#include <vd2/system/text.h>
+#include <vd2/system/vdstl.h>
+
+bool VDIsForegroundTaskW32() {
+ HWND hwndFore = GetForegroundWindow();
+
+ if (!hwndFore)
+ return false;
+
+ DWORD dwProcessId = 0;
+ GetWindowThreadProcessId(hwndFore, &dwProcessId);
+
+ return dwProcessId == GetCurrentProcessId();
+}
+
+LPVOID VDConvertThreadToFiberW32(LPVOID parm) {
+ typedef LPVOID (WINAPI *tpConvertThreadToFiber)(LPVOID p);
+ static tpConvertThreadToFiber ctof = (tpConvertThreadToFiber)GetProcAddress(GetModuleHandle("kernel32"), "ConvertThreadToFiber");
+
+ if (!ctof)
+ return NULL;
+
+ return ctof(parm);
+}
+
+void VDSwitchToFiberW32(LPVOID fiber) {
+ typedef void (WINAPI *tpSwitchToFiber)(LPVOID p);
+ static tpSwitchToFiber stof = (tpSwitchToFiber)GetProcAddress(GetModuleHandle("kernel32"), "SwitchToFiber");
+
+ if (stof)
+ stof(fiber);
+}
+
+int VDGetSizeOfBitmapHeaderW32(const BITMAPINFOHEADER *pHdr) {
+ int palents = 0;
+
+ if ((pHdr->biCompression == BI_RGB || pHdr->biCompression == BI_RLE4 || pHdr->biCompression == BI_RLE8) && pHdr->biBitCount <= 8) {
+ palents = pHdr->biClrUsed;
+ if (!palents)
+ palents = 1 << pHdr->biBitCount;
+ }
+ int size = pHdr->biSize + palents * sizeof(RGBQUAD);
+
+ if (pHdr->biSize < sizeof(BITMAPV4HEADER) && pHdr->biCompression == BI_BITFIELDS)
+ size += sizeof(DWORD) * 3;
+
+ return size;
+}
+
+void VDSetWindowTextW32(HWND hwnd, const wchar_t *s) {
+ if (VDIsWindowsNT()) {
+ SetWindowTextW(hwnd, s);
+ } else {
+ SetWindowTextA(hwnd, VDTextWToA(s).c_str());
+ }
+}
+
+void VDSetWindowTextFW32(HWND hwnd, const wchar_t *format, ...) {
+ va_list val;
+
+ va_start(val, format);
+ {
+ wchar_t buf[512];
+ int r = vswprintf(buf, 512, format, val);
+
+ if ((unsigned)r < 512) {
+ VDSetWindowTextW32(hwnd, buf);
+ va_end(val);
+ return;
+ }
+ }
+
+ VDStringW s;
+ s.append_vsprintf(format, val);
+ VDSetWindowTextW32(hwnd, s.c_str());
+
+ va_end(val);
+}
+
+VDStringW VDGetWindowTextW32(HWND hwnd) {
+ union {
+ wchar_t w[256];
+ char a[512];
+ } buf;
+
+ if (VDIsWindowsNT()) {
+ int len = GetWindowTextLengthW(hwnd);
+
+ if (len > 255) {
+ vdblock<wchar_t> tmp(len + 1);
+ len = GetWindowTextW(hwnd, tmp.data(), tmp.size());
+
+ VDStringW text(tmp.data(), len);
+ return text;
+ } else if (len > 0) {
+ len = GetWindowTextW(hwnd, buf.w, 256);
+
+ VDStringW text(buf.w, len);
+ return text;
+ }
+ } else {
+ int len = GetWindowTextLengthA(hwnd);
+
+ if (len > 511) {
+ vdblock<char> tmp(len + 1);
+ len = GetWindowTextA(hwnd, tmp.data(), tmp.size());
+
+ VDStringW text(VDTextAToW(tmp.data(), len));
+ return text;
+ } else if (len > 0) {
+ len = GetWindowTextA(hwnd, buf.a, 512);
+
+ VDStringW text(VDTextAToW(buf.a, len));
+ return text;
+ }
+ }
+
+ return VDStringW();
+}
+
+void VDAppendMenuW32(HMENU hmenu, UINT flags, UINT id, const wchar_t *text){
+ if (VDIsWindowsNT()) {
+ AppendMenuW(hmenu, flags, id, text);
+ } else {
+ AppendMenuA(hmenu, flags, id, VDTextWToA(text).c_str());
+ }
+}
+
+void VDCheckMenuItemByCommandW32(HMENU hmenu, UINT cmd, bool checked) {
+ CheckMenuItem(hmenu, cmd, checked ? MF_BYCOMMAND|MF_CHECKED : MF_BYCOMMAND|MF_UNCHECKED);
+}
+
+void VDCheckRadioMenuItemByCommandW32(HMENU hmenu, UINT cmd, bool checked) {
+ MENUITEMINFOA mii;
+
+ mii.cbSize = sizeof(MENUITEMINFOA);
+ mii.fMask = MIIM_FTYPE | MIIM_STATE;
+ if (GetMenuItemInfo(hmenu, cmd, FALSE, &mii)) {
+ mii.fType |= MFT_RADIOCHECK;
+ mii.fState &= ~MFS_CHECKED;
+ if (checked)
+ mii.fState |= MFS_CHECKED;
+ SetMenuItemInfo(hmenu, cmd, FALSE, &mii);
+ }
+}
+
+void VDEnableMenuItemByCommandW32(HMENU hmenu, UINT cmd, bool checked) {
+ EnableMenuItem(hmenu, cmd, checked ? MF_BYCOMMAND|MF_ENABLED : MF_BYCOMMAND|MF_GRAYED);
+}
+
+VDStringW VDGetMenuItemTextByCommandW32(HMENU hmenu, UINT cmd) {
+ VDStringW s;
+
+ if (VDIsWindowsNT()) {
+ MENUITEMINFOW mmiW;
+ vdfastfixedvector<wchar_t, 256> bufW;
+
+ mmiW.cbSize = MENUITEMINFO_SIZE_VERSION_400W;
+ mmiW.fMask = MIIM_TYPE;
+ mmiW.fType = MFT_STRING;
+ mmiW.dwTypeData = NULL;
+ mmiW.cch = 0; // required to avoid crash on NT4
+
+ if (GetMenuItemInfoW(hmenu, cmd, FALSE, &mmiW)) {
+ bufW.resize(mmiW.cch + 1, 0);
+ ++mmiW.cch;
+ mmiW.dwTypeData = bufW.data();
+
+ if (GetMenuItemInfoW(hmenu, cmd, FALSE, &mmiW))
+ s = bufW.data();
+ }
+ } else {
+ MENUITEMINFOA mmiA;
+ vdfastfixedvector<char, 256> bufA;
+
+ mmiA.cbSize = MENUITEMINFO_SIZE_VERSION_400A;
+ mmiA.fMask = MIIM_TYPE;
+ mmiA.fType = MFT_STRING;
+ mmiA.dwTypeData = NULL;
+
+ if (GetMenuItemInfoA(hmenu, cmd, FALSE, &mmiA)) {
+ bufA.resize(mmiA.cch + 1, 0);
+ ++mmiA.cch;
+ mmiA.dwTypeData = bufA.data();
+
+ if (GetMenuItemInfoA(hmenu, cmd, FALSE, &mmiA))
+ s = VDTextAToW(bufA.data());
+ }
+ }
+
+ return s;
+}
+
+void VDSetMenuItemTextByCommandW32(HMENU hmenu, UINT cmd, const wchar_t *text) {
+ if (VDIsWindowsNT()) {
+ MENUITEMINFOW mmiW;
+
+ mmiW.cbSize = MENUITEMINFO_SIZE_VERSION_400W;
+ mmiW.fMask = MIIM_TYPE;
+ mmiW.fType = MFT_STRING;
+ mmiW.dwTypeData = (LPWSTR)text;
+
+ SetMenuItemInfoW(hmenu, cmd, FALSE, &mmiW);
+ } else {
+ MENUITEMINFOA mmiA;
+ VDStringA textA(VDTextWToA(text));
+
+ mmiA.cbSize = MENUITEMINFO_SIZE_VERSION_400A;
+ mmiA.fMask = MIIM_TYPE;
+ mmiA.fType = MFT_STRING;
+ mmiA.dwTypeData = (LPSTR)textA.c_str();
+
+ SetMenuItemInfoA(hmenu, cmd, FALSE, &mmiA);
+ }
+}
+
+LRESULT VDDualCallWindowProcW32(WNDPROC wp, HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam) {
+ return (IsWindowUnicode(hwnd) ? CallWindowProcW : CallWindowProcA)(wp, hwnd, msg, wParam, lParam);
+}
+
+LRESULT VDDualDefWindowProcW32(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam) {
+ return IsWindowUnicode(hwnd) ? DefWindowProcW(hwnd, msg, wParam, lParam) : DefWindowProcA(hwnd, msg, wParam, lParam);
+}
+
+EXECUTION_STATE VDSetThreadExecutionStateW32(EXECUTION_STATE esFlags) {
+ EXECUTION_STATE es = 0;
+
+ // SetThreadExecutionState(): requires Windows 98+/2000+.
+ typedef EXECUTION_STATE (WINAPI *tSetThreadExecutionState)(EXECUTION_STATE);
+ static tSetThreadExecutionState pFunc = (tSetThreadExecutionState)GetProcAddress(GetModuleHandle("kernel32"), "SetThreadExecutionState");
+
+ if (pFunc)
+ es = pFunc(esFlags);
+
+ return es;
+}
+
+bool VDSetFilePointerW32(HANDLE h, sint64 pos, DWORD dwMoveMethod) {
+ LONG posHi = (LONG)(pos >> 32);
+ DWORD result = SetFilePointer(h, (LONG)pos, &posHi, dwMoveMethod);
+
+ if (result != INVALID_SET_FILE_POINTER)
+ return true;
+
+ DWORD dwError = GetLastError();
+
+ return (dwError == NO_ERROR);
+}
+
+bool VDGetFileSizeW32(HANDLE h, sint64& size) {
+ DWORD dwSizeHigh;
+ DWORD dwSizeLow = GetFileSize(h, &dwSizeHigh);
+
+ if (dwSizeLow == (DWORD)-1 && GetLastError() != NO_ERROR)
+ return false;
+
+ size = dwSizeLow + ((sint64)dwSizeHigh << 32);
+ return true;
+}
+
+#if !defined(_MSC_VER) || _MSC_VER < 1300
+HMODULE VDGetLocalModuleHandleW32() {
+ MEMORY_BASIC_INFORMATION meminfo;
+ static HMODULE shmod = (VirtualQuery(&VDGetLocalModuleHandleW32, &meminfo, sizeof meminfo), (HMODULE)meminfo.AllocationBase);
+
+ return shmod;
+}
+#endif
+
+bool VDDrawTextW32(HDC hdc, const wchar_t *s, int nCount, LPRECT lpRect, UINT uFormat) {
+ RECT r;
+ if (VDIsWindowsNT()) {
+ // If multiline and vcentered (not normally supported...)
+ if (!((uFormat ^ DT_VCENTER) & (DT_VCENTER|DT_SINGLELINE))) {
+ uFormat &= ~DT_VCENTER;
+
+ r = *lpRect;
+ if (!DrawTextW(hdc, s, nCount, &r, uFormat | DT_CALCRECT))
+ return false;
+
+ int dx = ((lpRect->right - lpRect->left) - (r.right - r.left)) >> 1;
+ int dy = ((lpRect->bottom - lpRect->top) - (r.bottom - r.top)) >> 1;
+
+ r.left += dx;
+ r.right += dx;
+ r.top += dy;
+ r.bottom += dy;
+ lpRect = &r;
+ }
+
+ return !!DrawTextW(hdc, s, nCount, lpRect, uFormat);
+ } else {
+ VDStringA strA(VDTextWToA(s, nCount));
+
+ // If multiline and vcentered (not normally supported...)
+ if (!((uFormat ^ DT_VCENTER) & (DT_VCENTER|DT_SINGLELINE))) {
+ uFormat &= ~DT_VCENTER;
+
+ r = *lpRect;
+ if (!DrawTextA(hdc, strA.data(), strA.size(), &r, uFormat | DT_CALCRECT))
+ return false;
+
+ int dx = ((lpRect->right - lpRect->left) - (r.right - r.left)) >> 1;
+ int dy = ((lpRect->bottom - lpRect->top) - (r.bottom - r.top)) >> 1;
+
+ r.left += dx;
+ r.right += dx;
+ r.top += dy;
+ r.bottom += dy;
+ lpRect = &r;
+ }
+
+ return !!DrawTextA(hdc, strA.data(), strA.size(), lpRect, uFormat);
+ }
+}
+
+bool VDPatchModuleImportTableW32(HMODULE hmod, const char *srcModule, const char *name, void *pCompareValue, void *pNewValue, void *volatile *ppOldValue) {
+ char *pBase = (char *)hmod;
+
+ __try {
+ // The PEheader offset is at hmod+0x3c. Add the size of the optional header
+ // to step to the section headers.
+
+ const uint32 peoffset = ((const long *)pBase)[15];
+ const uint32 signature = *(uint32 *)(pBase + peoffset);
+
+ if (signature != IMAGE_NT_SIGNATURE)
+ return false;
+
+ const IMAGE_FILE_HEADER *pHeader = (const IMAGE_FILE_HEADER *)(pBase + peoffset + 4);
+
+ // Verify the PE optional structure.
+
+ if (pHeader->SizeOfOptionalHeader < 104)
+ return false;
+
+ // Find import header.
+
+ const IMAGE_IMPORT_DESCRIPTOR *pImportDir;
+ int nImports;
+
+ switch(*(short *)((char *)pHeader + IMAGE_SIZEOF_FILE_HEADER)) {
+
+#ifdef _M_AMD64
+ case IMAGE_NT_OPTIONAL_HDR64_MAGIC:
+ {
+ const IMAGE_OPTIONAL_HEADER64 *pOpt = (IMAGE_OPTIONAL_HEADER64 *)((const char *)pHeader + sizeof(IMAGE_FILE_HEADER));
+
+ if (pOpt->NumberOfRvaAndSizes < 2)
+ return false;
+
+ pImportDir = (const IMAGE_IMPORT_DESCRIPTOR *)(pBase + pOpt->DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress);
+ nImports = pOpt->DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].Size / sizeof(IMAGE_IMPORT_DESCRIPTOR);
+ }
+ break;
+#else
+ case IMAGE_NT_OPTIONAL_HDR32_MAGIC:
+ {
+ const IMAGE_OPTIONAL_HEADER32 *pOpt = (IMAGE_OPTIONAL_HEADER32 *)((const char *)pHeader + sizeof(IMAGE_FILE_HEADER));
+
+ if (pOpt->NumberOfRvaAndSizes < 2)
+ return false;
+
+ pImportDir = (const IMAGE_IMPORT_DESCRIPTOR *)(pBase + pOpt->DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress);
+ nImports = pOpt->DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].Size / sizeof(IMAGE_IMPORT_DESCRIPTOR);
+ }
+ break;
+#endif
+
+ default: // reject PE32+
+ return false;
+ }
+
+ // Hmmm... no imports?
+
+ if ((const char *)pImportDir == pBase)
+ return false;
+
+ // Scan down the import entries. We are looking for MSVFW32.
+
+ int i;
+
+ for(i=0; i<nImports; ++i) {
+ if (!_stricmp(pBase + pImportDir[i].Name, srcModule))
+ break;
+ }
+
+ if (i >= nImports)
+ return false;
+
+ // Found it. Start scanning MSVFW32 imports until we find DrawDibDraw.
+
+ const long *pImports = (const long *)(pBase + pImportDir[i].OriginalFirstThunk);
+ void * volatile *pVector = (void * volatile *)(pBase + pImportDir[i].FirstThunk);
+
+ while(*pImports) {
+ if (*pImports >= 0) {
+ const char *pName = pBase + *pImports + 2;
+
+ if (!strcmp(pName, name)) {
+
+ // Found it! Reset the protection.
+
+ DWORD dwOldProtect;
+
+ if (VirtualProtect((void *)pVector, sizeof(void *), PAGE_EXECUTE_READWRITE, &dwOldProtect)) {
+ if (ppOldValue) {
+ for(;;) {
+ void *old = *pVector;
+ if (pCompareValue && pCompareValue != old)
+ return false;
+
+ *ppOldValue = old;
+ if (old == VDAtomicCompareExchangePointer(pVector, pNewValue, old))
+ break;
+ }
+ } else {
+ *pVector = pNewValue;
+ }
+
+ VirtualProtect((void *)pVector, sizeof(void *), dwOldProtect, &dwOldProtect);
+
+ return true;
+ }
+
+ break;
+ }
+ }
+
+ ++pImports;
+ ++pVector;
+ }
+ } __except(1) {
+ }
+
+ return false;
+}
+
+bool VDPatchModuleExportTableW32(HMODULE hmod, const char *name, void *pCompareValue, void *pNewValue, void *volatile *ppOldValue) {
+ char *pBase = (char *)hmod;
+
+ __try {
+ // The PEheader offset is at hmod+0x3c. Add the size of the optional header
+ // to step to the section headers.
+
+ const uint32 peoffset = ((const long *)pBase)[15];
+ const uint32 signature = *(uint32 *)(pBase + peoffset);
+
+ if (signature != IMAGE_NT_SIGNATURE)
+ return false;
+
+ const IMAGE_FILE_HEADER *pHeader = (const IMAGE_FILE_HEADER *)(pBase + peoffset + 4);
+
+ // Verify the PE optional structure.
+
+ if (pHeader->SizeOfOptionalHeader < 104)
+ return false;
+
+ // Find export directory.
+
+ const IMAGE_EXPORT_DIRECTORY *pExportDir;
+
+ switch(*(short *)((char *)pHeader + IMAGE_SIZEOF_FILE_HEADER)) {
+
+#ifdef _M_AMD64
+ case IMAGE_NT_OPTIONAL_HDR64_MAGIC:
+ {
+ const IMAGE_OPTIONAL_HEADER64 *pOpt = (IMAGE_OPTIONAL_HEADER64 *)((const char *)pHeader + sizeof(IMAGE_FILE_HEADER));
+
+ if (pOpt->NumberOfRvaAndSizes < 1)
+ return false;
+
+ DWORD exportDirRVA = pOpt->DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress;
+
+ if (!exportDirRVA)
+ return false;
+
+ pExportDir = (const IMAGE_EXPORT_DIRECTORY *)(pBase + exportDirRVA);
+ }
+ break;
+#else
+ case IMAGE_NT_OPTIONAL_HDR32_MAGIC:
+ {
+ const IMAGE_OPTIONAL_HEADER32 *pOpt = (IMAGE_OPTIONAL_HEADER32 *)((const char *)pHeader + sizeof(IMAGE_FILE_HEADER));
+
+ if (pOpt->NumberOfRvaAndSizes < 1)
+ return false;
+
+ DWORD exportDirRVA = pOpt->DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress;
+
+ if (!exportDirRVA)
+ return false;
+
+ pExportDir = (const IMAGE_EXPORT_DIRECTORY *)(pBase + exportDirRVA);
+ }
+ break;
+#endif
+
+ default: // reject PE32+
+ return false;
+ }
+
+ // Scan for the export name.
+ DWORD nameCount = pExportDir->AddressOfNames;
+ const DWORD *nameRVAs = (const DWORD *)(pBase + pExportDir->AddressOfNames);
+ const WORD *nameOrdinals = (const WORD *)(pBase + pExportDir->AddressOfNameOrdinals);
+ DWORD *functionTable = (DWORD *)(pBase + pExportDir->AddressOfFunctions);
+
+ for(DWORD i=0; i<nameCount; ++i) {
+ DWORD nameRVA = nameRVAs[i];
+ const char *pName = (const char *)(pBase + nameRVA);
+
+ // compare names
+ if (!strcmp(pName, name)) {
+
+ // name matches -- look up the function entry
+ WORD ordinal = nameOrdinals[i];
+ DWORD *pRVA = &functionTable[ordinal];
+
+ // Reset the protection.
+
+ DWORD newRVA = (DWORD)pNewValue - (DWORD)pBase;
+
+ DWORD dwOldProtect;
+ if (VirtualProtect((void *)pRVA, sizeof(DWORD), PAGE_EXECUTE_READWRITE, &dwOldProtect)) {
+ if (ppOldValue) {
+ for(;;) {
+ DWORD oldRVA = *pRVA;
+ void *old = pBase + oldRVA;
+ if (pCompareValue && pCompareValue != old)
+ return false;
+
+ *ppOldValue = pBase + oldRVA;
+ if (oldRVA == VDAtomicInt::staticCompareExchange((volatile int *)pRVA, newRVA, oldRVA))
+ break;
+ }
+ } else {
+ *pRVA = newRVA;
+ }
+
+ VirtualProtect((void *)pRVA, sizeof(DWORD), dwOldProtect, &dwOldProtect);
+
+ return true;
+ }
+
+ break;
+ }
+ }
+ } __except(1) {
+ }
+
+ return false;
+}
diff --git a/src/thirdparty/VirtualDub/system/source/zip.cpp b/src/thirdparty/VirtualDub/system/source/zip.cpp
new file mode 100644
index 000000000..8ea2ce7bf
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/source/zip.cpp
@@ -0,0 +1,603 @@
+// VirtualDub - Video processing and capture application
+// System library component
+// Copyright (C) 1998-2004 Avery Lee, All Rights Reserved.
+//
+// Beginning with 1.6.0, the VirtualDub system library is licensed
+// differently than the remainder of VirtualDub. This particular file is
+// thus licensed as follows (the "zlib" license):
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any
+// damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source
+// distribution.
+
+#include "stdafx.h"
+#include <vd2/system/zip.h>
+#include <vd2/system/error.h>
+
+bool VDDeflateBitReader::refill() {
+ sint32 tc = mBytesLeft>kBufferSize?kBufferSize:(sint32)mBytesLeft;
+
+ if (!tc)
+ return false;
+
+ mpSrc->Read(mBuffer+kBufferSize-tc, tc); // might throw
+
+ mBufferPt = -tc;
+
+ mBytesLeftLimited = mBytesLeft > kBigAvailThreshold ? kBigAvailThreshold : (unsigned)mBytesLeft;
+ mBytesLeft -= tc;
+
+ return true;
+}
+
+void VDDeflateBitReader::readbytes(void *dst, unsigned len) {
+ // LAME: OPTIMIZE LATER
+ uint8 *dst2 = (uint8 *)dst;
+ while(len-->0)
+ *dst2++ = getbits(8);
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+void VDCRCChecker::Init(uint32 crc) {
+ mValue = 0xFFFFFFFF;
+
+ for(int i=0; i<256; ++i) {
+ unsigned v = i;
+ for(int j=0; j<8; ++j)
+ v = (v>>1) ^ (crc & -(sint32)(v&1));
+
+ mTable[i] = v;
+ }
+}
+
+void VDCRCChecker::Process(const void *src0, sint32 count) {
+ const uint8 *src = (const uint8 *)src0;
+
+ uint32 v = mValue;
+
+ // This code is from the PNG spec.
+ if (count > 0)
+ do {
+ v = mTable[(uint8)v ^ *src++] ^ (v >> 8);
+ } while(--count);
+
+ mValue = v;
+}
+
+uint32 VDCRCChecker::CRC(uint32 crc, const void *src, sint32 len) {
+ Init(crc);
+ Process(src, len);
+ return CRC();
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+VDZipStream::VDZipStream()
+ : mPos(0)
+ , mbCRCEnabled(false)
+{
+}
+
+VDZipStream::VDZipStream(IVDStream *pSrc, uint64 limit, bool bStored) {
+ Init(pSrc, limit, bStored);
+}
+
+VDZipStream::~VDZipStream() {
+}
+
+
+void VDZipStream::Init(IVDStream *pSrc, uint64 limit, bool bStored) {
+ mBits.init(pSrc, limit);
+ mBlockType = kNoBlock;
+ mReadPt = mWritePt = mBufferLevel = 0;
+ mStoredBytesLeft = 0;
+ mbNoMoreBlocks = false;
+
+ if (bStored) {
+ mStoredBytesLeft = (uint32)limit;
+ mbNoMoreBlocks = true;
+ mBlockType = kStoredBlock;
+ }
+}
+
+const wchar_t *VDZipStream::GetNameForError() {
+ return mBits.stream()->GetNameForError();
+}
+
+sint64 VDZipStream::Pos() {
+ return mPos;
+}
+
+void VDZipStream::Read(void *buffer, sint32 bytes) {
+ if (bytes != ReadData(buffer, bytes))
+ throw MyError("Read error on compressed data");
+}
+
+sint32 VDZipStream::ReadData(void *dst0, sint32 bytes) {
+ sint32 actual = 0;
+
+ uint8 *dst = (uint8 *)dst0;
+
+ while(bytes > 0) {
+ if (mBufferLevel > 0) {
+ unsigned tc = std::min<unsigned>(mBufferLevel, bytes);
+ unsigned bp = 65536 - mReadPt;
+
+ if (bp < tc) {
+ memcpy(dst, mBuffer+mReadPt, bp);
+ memcpy(dst+bp, mBuffer, tc-bp);
+ mReadPt = tc-bp;
+ } else {
+ memcpy(dst, mBuffer+mReadPt, tc);
+ mReadPt += tc;
+ }
+ mBufferLevel -= tc;
+ dst += tc;
+ bytes -= tc;
+ actual += tc;
+ } else {
+ uint32 origWritePt = mWritePt;
+ uint32 origBufferLevel = mBufferLevel;
+
+ if (!Inflate())
+ break;
+
+ if (mbCRCEnabled && mBufferLevel != origBufferLevel) {
+ if (mWritePt <= origWritePt) {
+ mCRCChecker.Process(mBuffer+origWritePt, 65536 - origWritePt);
+ mCRCChecker.Process(mBuffer, mWritePt);
+ } else {
+ mCRCChecker.Process(mBuffer+origWritePt, mWritePt - origWritePt);
+ }
+ }
+ }
+ }
+
+ mPos += actual;
+ return actual;
+}
+
+void VDZipStream::Write(const void *buffer, sint32 bytes) {
+ throw MyError("Zip streams are read-only.");
+}
+
+bool VDZipStream::Inflate() {
+ if (mBlockType == kNoBlock)
+ if (mbNoMoreBlocks || !ParseBlockHeader())
+ return false;
+
+ if (mBlockType == kStoredBlock) {
+ while(mBufferLevel < 65536) {
+ if (mStoredBytesLeft <= 0) {
+ mBlockType = kNoBlock;
+ break;
+ }
+ uint32 tc = std::min<uint32>(65536 - mWritePt, std::min<uint32>(65536 - mBufferLevel, mStoredBytesLeft));
+
+ mBits.readbytes(mBuffer + mWritePt, tc);
+
+ mWritePt = (mWritePt + tc) & 65535;
+ mStoredBytesLeft -= tc;
+ mBufferLevel += tc;
+ }
+ } else {
+ static const unsigned len_tbl[32]={
+ 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,
+ 131,163,195,227,258
+ };
+
+ static const unsigned char len_bits_tbl[32]={
+ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0
+ };
+
+ static const unsigned char dist_bits_tbl[]={
+ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13
+ };
+
+ static const unsigned dist_tbl[]={
+ 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,
+ 6145,8193,12289,16385,24577
+ };
+
+ while(mBufferLevel < 65024) {
+ unsigned code, bits;
+
+ code = mCodeDecode[mBits.peek() & 0x7fff];
+ bits = mCodeLengths[code];
+
+ if (!mBits.consume(bits))
+ return false;
+
+ if (code == 256) {
+ mBlockType = kNoBlock;
+ break;
+ } else if (code >= 257) {
+ unsigned dist, len;
+
+ code -= 257;
+
+ len = len_tbl[code] + mBits.getbits(len_bits_tbl[code]);
+
+ if (len < 3)
+ return false; // can happen with a bad static block
+
+ code = mDistDecode[mBits.peek() & 0x7fff];
+ bits = mCodeLengths[code + 288];
+
+ if (!mBits.consume(bits))
+ return false;
+
+ dist = dist_tbl[code] + mBits.getbits(dist_bits_tbl[code]);
+
+ unsigned copysrc = (mWritePt - dist) & 65535;
+
+ mBufferLevel += len;
+
+ // NOTE: This can be a self-replicating copy. It must be ascending and it must
+ // be by bytes.
+// printf("%08lx: distance %04x count %d\n", mWritePt, dist, len);
+ do {
+ mBuffer[mWritePt++] = mBuffer[copysrc++];
+ mWritePt &= 65535;
+ copysrc &= 65535;
+ } while(--len);
+ } else {
+// printf("%08lx: literal %02x\n", mWritePt, code);
+ mBuffer[mWritePt++] = code;
+ mWritePt &= 65535;
+ ++mBufferLevel;
+ }
+ }
+ }
+
+ return true;
+}
+
+namespace {
+ static unsigned revword8(unsigned x) {
+ x = (unsigned char )((x << 4) + (x >> 4));
+ x = ((x << 2) & 0xcc) + ((x >> 2) & 0x33);
+ return ((x << 1) & 0xaa) + ((x >> 1) & 0x55);
+ }
+
+ static unsigned revword15(unsigned x) {
+ x = ((x << 8) & 0xff00) + ((x >> 8) & 0x00ff);
+ x = ((x << 4) & 0xf0f0) + ((x >> 4) & 0x0f0f);
+ x = ((x << 2) & 0xcccc) + ((x >> 2) & 0x3333);
+ return (x & 0x5555) + ((x >> 2) & 0x2aaa);
+ }
+
+ static bool InflateExpandTable256(unsigned char *dst, unsigned char *lens, unsigned codes) {
+ unsigned k;
+ unsigned ki;
+ unsigned base=0;
+
+ for(unsigned i=1; i<16; ++i) {
+ ki = 1<<i;
+
+ for(unsigned j=0; j<codes; ++j) {
+ if (lens[j] == i) {
+ for(k=base; k<0x100; k+=ki)
+ dst[k] = j;
+
+ base = revword8((revword8(base)+(0x100 >> i)) & 0xff);
+ }
+ }
+ }
+
+ return !base;
+ }
+
+ static bool InflateExpandTable32K(unsigned short *dst, unsigned char *lens, unsigned codes) {
+ unsigned k;
+ unsigned ki;
+ unsigned base=0;
+
+ for(int i=1; i<16; ++i) {
+ ki = 1<<i;
+
+ for(unsigned j=0; j<codes; ++j) {
+ if (lens[j] == i) {
+ for(k=base; k<0x8000; k+=ki)
+ dst[k] = j;
+
+ base = revword15(revword15(base)+(0x8000 >> i));
+ }
+ }
+ }
+
+ return !base;
+ }
+}
+
+bool VDZipStream::ParseBlockHeader() {
+ unsigned char ltbl_lengths[20];
+ unsigned char ltbl_decode[256];
+
+ if (mBits.getbit())
+ mbNoMoreBlocks = true;
+
+ unsigned type = mBits.getbits(2);
+
+ switch(type) {
+ case 0: // stored
+ {
+ mBits.align();
+ if (mBits.avail() < 32)
+ return false;
+
+ mStoredBytesLeft = mBits.getbits(16);
+
+ uint32 invCount = mBits.getbits(16);
+
+ if ((uint16)~invCount != mStoredBytesLeft)
+ return false;
+
+ if (mBits.bytesleft() < mStoredBytesLeft)
+ return false;
+
+ mBlockType = kStoredBlock;
+ }
+ break;
+ case 1: // static trees
+ {
+ int i;
+
+ for(i=0; i<144; ++i) mCodeLengths[i] = 8;
+ for( ; i<256; ++i) mCodeLengths[i] = 9;
+ for( ; i<280; ++i) mCodeLengths[i] = 7;
+ for( ; i<288; ++i) mCodeLengths[i] = 8;
+ for(i=0; i< 32; ++i) mCodeLengths[i+288] = 5;
+
+ if (!InflateExpandTable32K(mCodeDecode, mCodeLengths, 288)) {
+ VDASSERT(false); // code table bad
+ return false;
+ }
+ if (!InflateExpandTable32K(mDistDecode, mCodeLengths+288, 32)) {
+ VDASSERT(false); // distance table bad
+ return false;
+ }
+
+ mBlockType = kDeflatedBlock;
+ }
+ break;
+ case 2: // dynamic trees
+ {
+ if (mBits.avail() < 16)
+ return false;
+
+ const unsigned code_count = mBits.getbits(5) + 257;
+ const unsigned dist_count = mBits.getbits(5) + 1;
+ const unsigned total_count = code_count + dist_count;
+ const unsigned ltbl_count = mBits.getbits(4) + 4;
+
+ // decompress length table tree
+
+ if (mBits.bitsleft() < 3*ltbl_count)
+ return false;
+
+ memset(ltbl_lengths, 0, sizeof ltbl_lengths);
+
+ static const unsigned char hclen_tbl[]={
+ 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15
+ };
+
+ for(unsigned i=0; i<ltbl_count; ++i) {
+ ltbl_lengths[hclen_tbl[i]] = mBits.getbits(3);
+ }
+
+ if (!InflateExpandTable256(ltbl_decode, ltbl_lengths, 20)) {
+ VDASSERT(false); // tree table bad
+ return false;
+ }
+
+ // decompress length table
+
+ unsigned j=0;
+ unsigned last = 0;
+ while(j < total_count) {
+ unsigned k = ltbl_decode[0xff & mBits.peek()];
+ unsigned run = 1;
+
+ if (!mBits.consume(ltbl_lengths[k]))
+ return false;
+
+ switch(k) {
+ case 16: // last run of 3-6
+ if (mBits.avail() < 2)
+ return false;
+ run = mBits.getbits(2) + 3;
+ break;
+ case 17: // zero run of 3-10
+ if (mBits.avail() < 3)
+ return false;
+ run = mBits.getbits(3) + 3;
+ last = 0;
+ break;
+ case 18: // zero run of 11-138
+ if (mBits.avail() < 7)
+ return false;
+ run = mBits.getbits(7) + 11;
+ last = 0;
+ break;
+ default:
+ last = k;
+ }
+
+ if (run+j > total_count) {
+ VDASSERT(false); // tree table bad
+ return false;
+ }
+
+ do {
+ mCodeLengths[j++] = last;
+ } while(--run);
+ }
+
+ memmove(mCodeLengths + 288, mCodeLengths + code_count, dist_count);
+
+ if (!InflateExpandTable32K(mCodeDecode, mCodeLengths, code_count)) {
+ VDASSERT(false); // code table bad
+ return false;
+ }
+ if (!InflateExpandTable32K(mDistDecode, mCodeLengths+288, dist_count)) {
+ VDASSERT(false); // data table bad
+ return false;
+ }
+ mBlockType = kDeflatedBlock;
+ }
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+///////////////////////////////////////////////////////////////////////////
+
+#pragma pack(push, 2)
+
+namespace {
+ enum {
+ kZipMethodStore = 0,
+ kZipMethodDeflate = 8
+ };
+
+ struct ZipFileHeader {
+ enum { kSignature = 0x04034b50 };
+ uint32 signature;
+ uint16 version_required;
+ uint16 flags;
+ uint16 method;
+ uint16 mod_time;
+ uint16 mod_date;
+ uint32 crc32;
+ uint32 compressed_size;
+ uint32 uncompressed_size;
+ uint16 filename_len;
+ uint16 extrafield_len;
+ };
+
+ struct ZipFileEntry {
+ enum { kSignature = 0x02014b50 };
+ uint32 signature;
+ uint16 version_create;
+ uint16 version_required;
+ uint16 flags;
+ uint16 method;
+ uint16 mod_time;
+ uint16 mod_date;
+ uint32 crc32;
+ uint32 compressed_size;
+ uint32 uncompressed_size;
+ uint16 filename_len;
+ uint16 extrafield_len;
+ uint16 comment_len;
+ uint16 diskno;
+ uint16 internal_attrib;
+ uint32 external_attrib;
+ uint32 reloff_localhdr;
+ };
+
+ struct ZipCentralDir {
+ enum { kSignature = 0x06054b50 };
+
+ uint32 signature;
+ uint16 diskno;
+ uint16 diskno_dir;
+ uint16 dirents;
+ uint16 dirents_total;
+ uint32 dirsize;
+ uint32 diroffset;
+ uint16 comment_len;
+ };
+}
+
+#pragma pack(pop)
+
+VDZipArchive::VDZipArchive() {
+}
+
+VDZipArchive::~VDZipArchive() {
+}
+
+void VDZipArchive::Init(IVDRandomAccessStream *pSrc) {
+ mpStream = pSrc;
+
+ // This seek is wrong for files with zip comments, but we aren't creating
+ // a general purpose Unzip utility anyway.
+ mpStream->Seek(mpStream->Length() - sizeof(ZipCentralDir));
+
+ ZipCentralDir cdirhdr;
+
+ mpStream->Read(&cdirhdr, sizeof cdirhdr);
+ if (cdirhdr.signature != ZipCentralDir::kSignature)
+ throw MyError("Zip file has missing or bad central directory");
+
+ mDirectory.resize(cdirhdr.dirents_total);
+
+ mpStream->Seek(cdirhdr.diroffset);
+
+ for(int i=0; i<cdirhdr.dirents_total; ++i) {
+ FileInfoInternal& fii = mDirectory[i];
+ ZipFileEntry ent;
+
+ mpStream->Read(&ent, sizeof ent);
+ if (ent.signature != ZipFileEntry::kSignature)
+ throw MyError("Zip directory is bad");
+
+ if (ent.method != kZipMethodStore && ent.method != kZipMethodDeflate)
+ throw MyError("Unsupported compression method in zip archive");
+
+ fii.mDataStart = ent.reloff_localhdr;
+ fii.mCompressedSize = ent.compressed_size;
+ fii.mUncompressedSize = ent.uncompressed_size;
+ fii.mCRC32 = ent.crc32;
+ fii.mbPacked = ent.method == kZipMethodDeflate;
+ fii.mFileName.resize(ent.filename_len);
+
+ mpStream->Read(&*fii.mFileName.begin(), ent.filename_len);
+
+ mpStream->Seek(mpStream->Pos() + ent.extrafield_len + ent.comment_len);
+ }
+}
+
+sint32 VDZipArchive::GetFileCount() {
+ return mDirectory.size();
+}
+
+const VDZipArchive::FileInfo& VDZipArchive::GetFileInfo(sint32 idx) {
+ VDASSERT((size_t)idx < mDirectory.size());
+ return mDirectory[idx];
+}
+
+IVDStream *VDZipArchive::OpenRawStream(sint32 idx) {
+ const FileInfoInternal& fi = mDirectory[idx];
+
+ mpStream->Seek(fi.mDataStart);
+
+ ZipFileHeader hdr;
+ mpStream->Read(&hdr, sizeof hdr);
+
+ if (hdr.signature != ZipFileHeader::kSignature)
+ throw MyError("Bad header for file in zip archive");
+
+ mpStream->Seek(fi.mDataStart + sizeof(hdr) + hdr.filename_len + hdr.extrafield_len);
+
+ return mpStream;
+}
diff --git a/src/thirdparty/VirtualDub/system/system.vcproj b/src/thirdparty/VirtualDub/system/system.vcproj
new file mode 100644
index 000000000..2744ccea9
--- /dev/null
+++ b/src/thirdparty/VirtualDub/system/system.vcproj
@@ -0,0 +1,1906 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+ ProjectType="Visual C++"
+ Version="9,00"
+ Name="system"
+ ProjectGUID="{C2082189-3ECB-4079-91FA-89D3C8A305C0}"
+ RootNamespace="system"
+ TargetFrameworkVersion="131072"
+ >
+ <Platforms>
+ <Platform
+ Name="Win32"
+ />
+ <Platform
+ Name="x64"
+ />
+ </Platforms>
+ <ToolFiles>
+ <ToolFile
+ RelativePath="..\..\..\YASM.rules"
+ />
+ </ToolFiles>
+ <Configurations>
+ <Configuration
+ Name="Release|Win32"
+ OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+ IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+ ConfigurationType="4"
+ InheritedPropertySheets="..\..\..\common.vsprops"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="false"
+ CharacterSet="2"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="YASM"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="1"
+ InlineFunctionExpansion="2"
+ AdditionalIncludeDirectories="..\h,.\h"
+ PreprocessorDefinitions="NDEBUG;_LIB;WIN32;NOMINMAX;WIN32_LEAN_AND_MEAN"
+ StringPooling="true"
+ RuntimeLibrary="0"
+ EnableFunctionLevelLinking="true"
+ UsePrecompiledHeader="2"
+ WarningLevel="3"
+ SuppressStartupBanner="true"
+ DebugInformationFormat="3"
+ DisableSpecificWarnings="4244;4267;4996"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="NDEBUG"
+ Culture="1033"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ SuppressStartupBanner="true"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ SuppressStartupBanner="true"
+ OutputFile=".\..\lib\Release/system.bsc"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Release|x64"
+ OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+ IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+ ConfigurationType="4"
+ InheritedPropertySheets="..\..\..\common.vsprops"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="false"
+ CharacterSet="2"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="YASM"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ TargetEnvironment="3"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="1"
+ InlineFunctionExpansion="2"
+ AdditionalIncludeDirectories="..\h,.\h"
+ PreprocessorDefinitions="NDEBUG;_LIB;WIN32;NOMINMAX;WIN32_LEAN_AND_MEAN"
+ StringPooling="true"
+ RuntimeLibrary="0"
+ EnableFunctionLevelLinking="true"
+ UsePrecompiledHeader="2"
+ WarningLevel="3"
+ SuppressStartupBanner="true"
+ DebugInformationFormat="3"
+ DisableSpecificWarnings="4244;4267;4996"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="NDEBUG"
+ Culture="1033"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ SuppressStartupBanner="true"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ SuppressStartupBanner="true"
+ OutputFile=".\..\lib\Release/system.bsc"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Debug|Win32"
+ OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+ IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+ ConfigurationType="4"
+ InheritedPropertySheets="..\..\..\common.vsprops"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="false"
+ CharacterSet="2"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="YASM"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories="../h,.\h"
+ PreprocessorDefinitions="_DEBUG;_LIB;WIN32;NOMINMAX;WIN32_LEAN_AND_MEAN"
+ StringPooling="true"
+ MinimalRebuild="true"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="1"
+ UsePrecompiledHeader="2"
+ WarningLevel="3"
+ SuppressStartupBanner="true"
+ DebugInformationFormat="3"
+ DisableSpecificWarnings="4244;4267;4996"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="_DEBUG"
+ Culture="1033"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ SuppressStartupBanner="true"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ SuppressStartupBanner="true"
+ OutputFile=".\..\lib\Debug/system.bsc"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Debug|x64"
+ OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+ IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+ ConfigurationType="4"
+ InheritedPropertySheets="..\..\..\common.vsprops"
+ UseOfMFC="0"
+ ATLMinimizesCRunTimeLibraryUsage="false"
+ CharacterSet="2"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="YASM"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ TargetEnvironment="3"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories="../h,.\h"
+ PreprocessorDefinitions="_DEBUG;_LIB;WIN32;NOMINMAX;WIN32_LEAN_AND_MEAN"
+ StringPooling="true"
+ MinimalRebuild="true"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="1"
+ UsePrecompiledHeader="2"
+ WarningLevel="3"
+ SuppressStartupBanner="true"
+ DebugInformationFormat="3"
+ DisableSpecificWarnings="4244;4267;4996"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="_DEBUG"
+ Culture="1033"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ SuppressStartupBanner="true"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ SuppressStartupBanner="true"
+ OutputFile=".\..\lib\Debug/system.bsc"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ </Configurations>
+ <References>
+ </References>
+ <Files>
+ <Filter
+ Name="Source Files"
+ Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+ >
+ <File
+ RelativePath="..\h\vd2\system\source\bitmath.cpp"
+ >
+ </File>
+ <File
+ RelativePath="source\cache.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\cmdline.cpp"
+ >
+ </File>
+ <File
+ RelativePath="source\cpuaccel.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\debug.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\debugx86.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\Error.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\event.cpp"
+ >
+ </File>
+ <File
+ RelativePath="source\file.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\fileasync.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\filesys.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\filewatcher.cpp"
+ >
+ </File>
+ <File
+ RelativePath="source\Fraction.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\halffloat.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\source\hash.cpp"
+ >
+ </File>
+ <File
+ RelativePath="source\int128.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\list.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\log.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\math.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\memory.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\profile.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\progress.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\protscope.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\source\refcount.cpp"
+ >
+ </File>
+ <File
+ RelativePath="source\registry.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\strutil.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\text.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\thread.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\thunk.cpp"
+ >
+ </File>
+ <File
+ RelativePath="source\time.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\tls.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\VDNamespace.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\VDScheduler.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\vdstl.cpp"
+ >
+ </File>
+ <File
+ RelativePath="source\VDString.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\vectors.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\w32assist.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\zip.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ />
+ </FileConfiguration>
+ </File>
+ </Filter>
+ <Filter
+ Name="Header Files"
+ Filter="h;hpp;hxx;hm;inl"
+ >
+ <File
+ RelativePath="..\h\vd2\system\atomic.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\binary.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\bitmath.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\cache.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\cmdline.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\cpuaccel.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\debug.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\debugx86.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\Error.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\event.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\file.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\fileasync.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\filesys.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\filewatcher.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\Fraction.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\halffloat.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\hash.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\int128.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\list.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\log.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\math.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\memory.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\profile.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\progress.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\protscope.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\refcount.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\registry.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\strutil.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\text.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\thread.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\thunk.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\time.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\tls.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\unknown.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\vdalloc.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\VDNamespace.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\VDQueue.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\VDRingBuffer.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\VDScheduler.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\vdstl.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\VDString.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\VD2\system\vdtypes.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\vectors.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\vectors_float.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\vectors_int.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\w32assist.h"
+ >
+ </File>
+ <File
+ RelativePath="..\h\vd2\system\zip.h"
+ >
+ </File>
+ <Filter
+ Name="win32"
+ >
+ <File
+ RelativePath="..\h\vd2\system\win32\miniwindows.h"
+ >
+ </File>
+ </Filter>
+ </Filter>
+ <Filter
+ Name="Assembly Files (x86)"
+ Filter="asm"
+ >
+ <File
+ RelativePath="source\a_memory.asm"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\a_thunk.asm"
+ >
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ </Filter>
+ <Filter
+ Name="Assembly Files (AMD64)"
+ >
+ <File
+ RelativePath="source\a64_fraction.asm"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\a64_int128.asm"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath=".\source\a64_thunk.asm"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="YASM"
+ />
+ </FileConfiguration>
+ </File>
+ </Filter>
+ <Filter
+ Name="Precompiled Header Support"
+ >
+ <File
+ RelativePath=".\source\stdaccel.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="3"
+ InlineFunctionExpansion="0"
+ EnableIntrinsicFunctions="false"
+ BasicRuntimeChecks="0"
+ UsePrecompiledHeader="0"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="3"
+ BasicRuntimeChecks="0"
+ UsePrecompiledHeader="0"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="source\stdafx.cpp"
+ >
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ UsePrecompiledHeader="1"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ UsePrecompiledHeader="1"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ UsePrecompiledHeader="1"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories=""
+ PreprocessorDefinitions=""
+ UsePrecompiledHeader="1"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="h\stdafx.h"
+ >
+ </File>
+ </Filter>
+ </Files>
+ <Globals>
+ </Globals>
+</VisualStudioProject>