13 files changed, 851 insertions, 398 deletions
diff --git a/src/DSUtil/MediaTypes.cpp b/src/DSUtil/MediaTypes.cpp
index 47a45c607..5edb08674 100644
--- a/src/DSUtil/MediaTypes.cpp
+++ b/src/DSUtil/MediaTypes.cpp
@@ -319,7 +319,7 @@ VIH2 vih2s[] =
 	},
 };
 
-int VIHSIZE = countof(vihs);
+UINT VIHSIZE = countof(vihs);
 
 CString VIH2String(int i)
 {
diff --git a/src/DSUtil/MediaTypes.h b/src/DSUtil/MediaTypes.h
index 609191d9d..e4beb1733 100644
--- a/src/DSUtil/MediaTypes.h
+++ b/src/DSUtil/MediaTypes.h
@@ -41,7 +41,7 @@ struct VIH2
 extern VIH vihs[];
 extern VIH2 vih2s[];
 
-extern int VIHSIZE;
+extern UINT VIHSIZE;
 
 extern CString VIH2String(int i), Subtype2String(const GUID& subtype);
 extern void CorrectMediaType(AM_MEDIA_TYPE* pmt);
diff --git a/src/filters/filters.sln b/src/filters/filters.sln
index 872103ec0..edeb293a1 100644
--- a/src/filters/filters.sln
+++ b/src/filters/filters.sln
@@ -404,6 +404,9 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "VSFilter", "transform\vsfil
 	EndProjectSection
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DSUtil", "..\DSUtil\dsutil.vcproj", "{FC70988B-1AE5-4381-866D-4F405E28AC42}"
+	ProjectSection(ProjectDependencies) = postProject
+		{0D252872-7542-4232-8D02-53F9182AEE15} = {0D252872-7542-4232-8D02-53F9182AEE15}
+	EndProjectSection
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "subtitles", "..\subtitles\subtitles.vcproj", "{5E56335F-0FB1-4EEA-B240-D8DC5E0608E4}"
 EndProject
@@ -421,6 +424,9 @@ EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "VD", "VD", "{0D3ED276-19CD-4385-9F47-99082ED2CE24}"
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Kasumi", "..\thirdparty\VirtualDub\Kasumi\Kasumi.vcproj", "{0D252872-7542-4232-8D02-53F9182AEE15}"
+	ProjectSection(ProjectDependencies) = postProject
+		{C2082189-3ECB-4079-91FA-89D3C8A305C0} = {C2082189-3ECB-4079-91FA-89D3C8A305C0}
+	EndProjectSection
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "system", "..\thirdparty\VirtualDub\system\system.vcproj", "{C2082189-3ECB-4079-91FA-89D3C8A305C0}"
 EndProject
diff --git a/src/filters/transform/vsfilter/Copy.cpp b/src/filters/transform/vsfilter/Copy.cpp
index 27153a3bf..c16f85a89 100644
--- a/src/filters/transform/vsfilter/Copy.cpp
+++ b/src/filters/transform/vsfilter/Copy.cpp
@@ -19,6 +19,7 @@
 #include "stdafx.h"
 #include <math.h>
 #include "DirectVobSubFilter.h"
+#include "Scale2x.h"
 #include "../../../DSUtil/DSUtil.h"
 #include "../../../DSUtil/MediaTypes.h"
 
@@ -112,384 +113,6 @@ void BltLineRGB32(DWORD* d, BYTE* sub, int w, const GUID& subtype)
 	}
 }
 
-#ifdef WIN64
-// For CPUID usage
-#include "../../../dsutil/vd.h"
-#include <emmintrin.h>
-#endif
-/* ResX2 */
-void Scale2x(const GUID& subtype, BYTE* d, int dpitch, BYTE* s, int spitch, int w, int h)
-{
-#ifdef WIN64
-	// CPUID from VDub
-	bool fSSE2 = !!(g_cpuid.m_flags & CCpuID::sse2);
-#endif
-
-	if(subtype == MEDIASUBTYPE_YV12 || subtype == MEDIASUBTYPE_I420 || subtype == MEDIASUBTYPE_IYUV)
-	{
-		BYTE* s1;
-		BYTE* s2;
-		BYTE* d1;
-
-		for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch) // TODO: replace this mess with mmx code
-		{
-			BYTE* stmp = s1 + spitch;
-			BYTE* dtmp = d1 + dpitch;
-
-			for(BYTE* s3 = s1 + (w-1); s1 < s3; s1 += 1, d1 += 2)
-			{
-				d1[0] = s1[0]; 
-				d1[1] = (s1[0]+s1[1])>>1;
-			}
-
-			d1[0] = d1[1] = s1[0]; 
-
-			s1 += 1;
-			d1 += 2;
-
-			s1 = stmp;
-			d1 = dtmp;
-		}
-
-		AvgLines8(d, h*2, dpitch);
-	}
-	else if(subtype == MEDIASUBTYPE_YUY2)
-	{
-		unsigned __int64 __0xffffffff00000000 = 0xffffffff00000000;
-		unsigned __int64 __0x00000000ffffffff = 0x00000000ffffffff;
-		unsigned __int64 __0x00ff00ff00ff00ff = 0x00ff00ff00ff00ff;
-
-		BYTE* s1;
-		BYTE* s2;
-		BYTE* d1;
-
-		for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch)
-		{
-			BYTE* stmp = s1 + spitch;
-			BYTE* dtmp = d1 + dpitch;
-
-			// row0, 4 pixels: y1|u1|y2|v1|y3|u2|y4|v2
-			// ->
-			// row0, 8 pixels: y1|u1|(y1+y2)/2|v1|y2|(u1+u2)/2|(y2+y3)/2|(v1+v2)/2
-
-#ifdef WIN64
-			if(fSSE2)
-			{
-				__m128i mm4 = _mm_cvtsi64_si128(__0x00ff00ff00ff00ff);
-				__m128i mm5 = _mm_cvtsi64_si128(__0x00000000ffffffff);
-				__m128i mm6 = _mm_cvtsi64_si128(__0xffffffff00000000);
-				for(BYTE* s3 = s1 + ((w>>1)-1)*4; s1 < s3; s1 += 4, d1 += 8)
-				{
-					__m128i mm0 = _mm_cvtsi64_si128(*(size_t*)s1); //movq	mm0, [esi]
-					__m128i mm2 = _mm_move_epi64(mm0);			//movq	mm2, mm0
-					mm0 = _mm_and_si128(mm0, mm4);				//pand	mm0, mm4	// mm0 = 00y400y300y200y1
-					mm2 = _mm_srli_epi16(mm2, 8);				//psrlw	mm2, 8		// mm2 = 00u200v200u100v1
-					__m128i mm1 = _mm_move_epi64(mm0);			//movq	mm1, mm0
-					mm0 = _mm_and_si128(mm0, mm5);				//pand	mm0, mm5	// mm0 = 0000000000y200y1
-					mm1 = _mm_slli_epi64(mm1, 16);				//psllq	mm1, 16
-					mm1 = _mm_and_si128(mm1, mm6);				//pand	mm1, mm6	// mm1 = 00y300y200000000
-					mm1 = _mm_or_si128(mm1, mm0);				//por	mm1, mm0	// mm1 = 00y300y200y200y1
-					mm0 = _mm_unpacklo_epi8(mm0, mm0);			//punpcklwd mm0, mm0	// mm0 = 00y200y200y100y1
-					mm0 = _mm_adds_epi16(mm0,mm1);				//paddw	mm0, mm1
-					mm0 = _mm_srli_epi16(mm0, 1);				//psrlw	mm0, 1		// mm0 = (mm0 + mm1) / 2
-					mm1 = _mm_move_epi64(mm2);					//movq	mm1, mm2
-					mm1 = _mm_unpacklo_epi32(mm1, mm1);			//punpckldq	mm1, mm1 // mm1 = 00u100v100u100v1
-					mm1 = _mm_adds_epi16(mm1,mm2);				//paddw	mm1, mm2
-					mm1 = _mm_srli_epi16(mm1, 1);				//psrlw	mm1, 1		// mm1 = (mm1 + mm2) / 2
-					mm1 = _mm_slli_epi64(mm1, 8);				//psllw	mm1, 8
-					mm1 = _mm_or_si128(mm0, mm1);				//por		mm0, mm1	// mm0 = (v1+v2)/2|(y2+y3)/2|(u1+u2)/2|y2|v1|(y1+y2)/2|u1|y1
-					*(size_t*)d1 = (size_t)_mm_cvtsi128_si64(mm0);		//movq	[edi], mm0
-				}
-			}
-			else
-			{
-				for(BYTE* s3 = s1 + ((w>>1)-1)*4; s1 < s3; s1 += 4, d1 += 8)
-				{
-					d1[0] = s1[0]; 
-					d1[1] = s1[1]; 
-					d1[2] = (s1[0]+s1[2])>>1;
-					d1[3] = s1[3];
-
-					d1[4] = s1[2];
-					d1[5] = (s1[1]+s1[5])>>1;
-					d1[6] = (s1[2]+s1[4])>>1;
-					d1[7] = (s1[3]+s1[7])>>1;
-				}
-			}
-#else
-			__asm
-			{
-				mov		esi, s1
-				mov		edi, d1
-
-				mov		ecx, w
-				shr		ecx, 1
-				dec		ecx
-
-				movq	mm4, __0x00ff00ff00ff00ff
-				movq	mm5, __0x00000000ffffffff
-				movq	mm6, __0xffffffff00000000
-row_loop1:
-				movq	mm0, [esi]
-				movq	mm2, mm0
-
-				pand	mm0, mm4	// mm0 = 00y400y300y200y1
-				psrlw	mm2, 8		// mm2 = 00u200v200u100v1
-
-
-				movq	mm1, mm0
-
-				pand	mm0, mm5	// mm0 = 0000000000y200y1
-
-				psllq	mm1, 16
-				pand	mm1, mm6	// mm1 = 00y300y200000000
-
-				por		mm1, mm0	// mm1 = 00y300y200y200y1
-
-				punpcklwd mm0, mm0	// mm0 = 00y200y200y100y1
-
-				paddw	mm0, mm1
-				psrlw	mm0, 1		// mm0 = (mm0 + mm1) / 2
-
-
-				movq	mm1, mm2
-				punpckldq	mm1, mm1 // mm1 = 00u100v100u100v1
-
-				paddw	mm1, mm2
-				psrlw	mm1, 1		// mm1 = (mm1 + mm2) / 2
-
-
-				psllw	mm1, 8
-				por		mm0, mm1	// mm0 = (v1+v2)/2|(y2+y3)/2|(u1+u2)/2|y2|v1|(y1+y2)/2|u1|y1
-
-				movq	[edi], mm0
-
-				lea		esi, [esi+4]
-				lea		edi, [edi+8]
-
-				dec		ecx
-				jnz		row_loop1
-
-				mov		s1, esi
-				mov		d1, edi
-			};
-#endif
-			*d1++ = s1[0];
-			*d1++ = s1[1];
-			*d1++ =(s1[0]+s1[2])>>1;
-			*d1++ = s1[3];
-
-			*d1++ = s1[2];
-			*d1++ = s1[1];
-			*d1++ = s1[2];
-			*d1++ = s1[3];
-
-			s1 += 4;
-
-			s1 = stmp;
-			d1 = dtmp;
-		}
-
-		AvgLines8(d, h*2, dpitch);
-	}
-	else if(subtype == MEDIASUBTYPE_RGB555)
-	{
-		BYTE* s1;
-		BYTE* s2;
-		BYTE* d1;
-
-		for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch) // TODO: replace this mess with mmx code
-		{
-			BYTE* stmp = s1 + spitch;
-			BYTE* dtmp = d1 + dpitch;
-
-			for(BYTE* s3 = s1 + (w-1)*2; s1 < s3; s1 += 2, d1 += 4)
-			{
-				*((WORD*)d1) = *((WORD*)s1);
-				*((WORD*)d1+1) = 
-					((((*((WORD*)s1)&0x7c00) + (*((WORD*)s1+1)&0x7c00)) >> 1)&0x7c00)|
-					((((*((WORD*)s1)&0x03e0) + (*((WORD*)s1+1)&0x03e0)) >> 1)&0x03e0)|
-					((((*((WORD*)s1)&0x001f) + (*((WORD*)s1+1)&0x001f)) >> 1)&0x001f);
-			}
-
-			*((WORD*)d1) = *((WORD*)s1);
-			*((WORD*)d1+1) = *((WORD*)s1);
-
-			s1 += 2;
-			d1 += 4;
-
-			s1 = stmp;
-			d1 = dtmp;
-		}
-
-		AvgLines555(d, h*2, dpitch);
-	}
-	else if(subtype == MEDIASUBTYPE_RGB565)
-	{
-		BYTE* s1;
-		BYTE* s2;
-		BYTE* d1;
-
-		for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch) // TODO: replace this mess with mmx code
-		{
-			BYTE* stmp = s1 + spitch;
-			BYTE* dtmp = d1 + dpitch;
-
-			for(BYTE* s3 = s1 + (w-1)*2; s1 < s3; s1 += 2, d1 += 4)
-			{
-				*((WORD*)d1) = *((WORD*)s1);
-				*((WORD*)d1+1) = 
-					((((*((WORD*)s1)&0xf800) + (*((WORD*)s1+1)&0xf800)) >> 1)&0xf800)|
-					((((*((WORD*)s1)&0x07e0) + (*((WORD*)s1+1)&0x07e0)) >> 1)&0x07e0)|
-					((((*((WORD*)s1)&0x001f) + (*((WORD*)s1+1)&0x001f)) >> 1)&0x001f);
-			}
-
-			*((WORD*)d1) = *((WORD*)s1);
-			*((WORD*)d1+1) = *((WORD*)s1);
-
-			s1 += 2;
-			d1 += 4;
-
-			s1 = stmp;
-			d1 = dtmp;
-		}
-
-		AvgLines565(d, h*2, dpitch);
-	}
-	else if(subtype == MEDIASUBTYPE_RGB24)
-	{
-		BYTE* s1;
-		BYTE* s2;
-		BYTE* d1;
-
-		for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch) // TODO: replace this mess with mmx code
-		{
-			BYTE* stmp = s1 + spitch;
-			BYTE* dtmp = d1 + dpitch;
-
-			for(BYTE* s3 = s1 + (w-1)*3; s1 < s3; s1 += 3, d1 += 6)
-			{
-				d1[0] = s1[0]; 
-				d1[1] = s1[1]; 
-				d1[2] = s1[2];
-				d1[3] = (s1[0]+s1[3])>>1;
-				d1[4] = (s1[1]+s1[4])>>1;
-				d1[5] = (s1[2]+s1[5])>>1;
-			}
-
-			d1[0] = d1[3] = s1[0]; 
-			d1[1] = d1[4] = s1[1]; 
-			d1[2] = d1[5] = s1[2];
-
-			s1 += 3;
-			d1 += 6;
-
-			s1 = stmp;
-			d1 = dtmp;
-		}
-
-		AvgLines8(d, h*2, dpitch);
-	}
-	else if(subtype == MEDIASUBTYPE_RGB32 || subtype == MEDIASUBTYPE_ARGB32)
-	{
-		BYTE* s1;
-		BYTE* s2;
-		BYTE* d1;
-
-		for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch)
-		{
-			BYTE* stmp = s1 + spitch;
-			BYTE* dtmp = d1 + dpitch;
-
-#ifdef WIN64
-			if(fSSE2) // SSE2 code
-			{
-				__m128i mm_zero = _mm_setzero_si128();//pxor	mm0, mm0
-				for(BYTE* s3 = s1 + (w-1)*4; s1 < s3; s1 += 4, d1 += 8)
-				{
-					
-					__m128i mm1 = _mm_cvtsi64_si128(*(size_t*)s1); //movq	mm1, [esi]
-					__m128i mm2 = _mm_move_epi64(mm1);			//movq	mm2, mm1
-
-					mm1 = _mm_unpacklo_epi8(mm1,mm_zero);//punpcklbw mm1, mm0	// mm1 = 00xx00r100g100b1
-					mm2 = _mm_unpacklo_epi8(mm2,mm_zero);//punpckhbw mm2, mm0	// mm2 = 00xx00r200g200b2
-
-					mm2 = _mm_adds_epi16(mm2,mm1);		//paddw	mm2, mm1
-					mm2 = _mm_srli_epi16(mm2, 1);		//psrlw	mm2, 1		// mm2 = (mm1 + mm2) / 2
-
-					mm1 = _mm_packus_epi16(mm1,mm2);	//packuswb	mm1, mm2
-
-					*(size_t*)d1=(size_t)_mm_cvtsi128_si64(mm1);//movq	[edi], mm1
-				}
-			}
-			else
-			{
-				for(BYTE* s3 = s1 + (w-1)*4; s1 < s3; s1 += 3, d1 += 6)
-				{
-					d1[0] = s1[0]; 
-					d1[1] = s1[1]; 
-					d1[2] = s1[2];
-					d1[3] = s1[3];
-
-					d1[4] = (s1[0]+s1[4])>>1;
-					d1[5] = (s1[1]+s1[5])>>1;
-					d1[6] = (s1[2]+s1[6])>>1;
-					d1[7] = (s1[4]+s1[7])>>1;
-				}
-			}
-#else
-			__asm
-			{
-				mov		esi, s1
-				mov		edi, d1
-
-				mov		ecx, w
-				dec		ecx
-
-				pxor	mm0, mm0
-row_loop3:
-				movq	mm1, [esi]
-				movq	mm2, mm1
-
-				punpcklbw mm1, mm0	// mm1 = 00xx00r100g100b1
-				punpckhbw mm2, mm0	// mm2 = 00xx00r200g200b2
-
-				paddw	mm2, mm1
-				psrlw	mm2, 1		// mm2 = (mm1 + mm2) / 2
-
-				packuswb	mm1, mm2
-
-				movq	[edi], mm1
-
-				lea		esi, [esi+4]
-				lea		edi, [edi+8]
-
-				dec		ecx
-				jnz		row_loop3
-
-				mov		s1, esi
-				mov		d1, edi
-			};
-#endif
-
-			*((DWORD*)d1) = *((DWORD*)s1);
-			*((DWORD*)d1+1) = *((DWORD*)s1);
-
-			s1 += 4;
-			d1 += 8;
-
-			s1 = stmp;
-			d1 = dtmp;
-		}
-
-		AvgLines8(d, h*2, dpitch);
-	}
-
-#ifndef WIN64
-	__asm emms;
-#endif
-}
-
 HRESULT CDirectVobSubFilter::Copy(BYTE* pSub, BYTE* pIn, CSize sub, CSize in, int bpp, const GUID& subtype, DWORD black)
 {
 	int wIn = in.cx, hIn = in.cy, pitchIn = wIn*bpp>>3;
diff --git a/src/filters/transform/vsfilter/DirectVobSubFilter.cpp b/src/filters/transform/vsfilter/DirectVobSubFilter.cpp
index 2a8ead2f7..0be1c7426 100644
--- a/src/filters/transform/vsfilter/DirectVobSubFilter.cpp
+++ b/src/filters/transform/vsfilter/DirectVobSubFilter.cpp
@@ -46,7 +46,7 @@ bool g_RegOK = true;//false; // doesn't work with the dvd graph builder
 
 CDirectVobSubFilter::CDirectVobSubFilter(LPUNKNOWN punk, HRESULT* phr, const GUID& clsid)
 	: CBaseVideoFilter(NAME("CDirectVobSubFilter"), punk, phr, clsid)
-	, m_nSubtitleId(-1)
+	, m_nSubtitleId((DWORD_PTR)-1)
 	, m_fMSMpeg4Fix(false)
 	, m_fps(25)
 {
@@ -356,7 +356,9 @@ STDMETHODIMP CDirectVobSubFilter::QueryFilterInfo(FILTER_INFO* pInfo)
 		return __super::QueryFilterInfo(pInfo);
 
 	wcscpy(pInfo->achName, L"DirectVobSub (forced auto-loading version)");
-	if(pInfo->pGraph = m_pGraph) m_pGraph->AddRef();
+	pInfo->pGraph = m_pGraph;
+	if(m_pGraph)
+		m_pGraph->AddRef();
 	
 	return S_OK;
 }
diff --git a/src/filters/transform/vsfilter/DirectVobSubPropPage.cpp b/src/filters/transform/vsfilter/DirectVobSubPropPage.cpp
index 06ab43906..c9db8e964 100644
--- a/src/filters/transform/vsfilter/DirectVobSubPropPage.cpp
+++ b/src/filters/transform/vsfilter/DirectVobSubPropPage.cpp
@@ -987,7 +987,7 @@ void CDVSColorPPage::UpdateControlData(bool fSave)
 {
 	if(fSave)
 	{
-		if(m_preflist.GetCount() == VIHSIZE)
+		if((UINT)m_preflist.GetCount() == VIHSIZE)
 		{
 			BYTE* pData = new BYTE[VIHSIZE];
 
@@ -1104,8 +1104,8 @@ bool CDVSPathsPPage::OnMessage(UINT uMsg, WPARAM wParam, LPARAM lParam)
 							bi.lParam = 0;
 							bi.iImage = 0; 
 
-							LPITEMIDLIST iil;
-							if(iil = SHBrowseForFolder(&bi))
+							LPITEMIDLIST iil = SHBrowseForFolder(&bi);
+							if(iil)
 							{
 								SHGetPathFromIDList(iil, pathbuff);
 								m_path.SetWindowText(pathbuff);
diff --git a/src/filters/transform/vsfilter/Scale2x.cpp b/src/filters/transform/vsfilter/Scale2x.cpp
new file mode 100644
index 000000000..ef88d7cec
--- /dev/null
+++ b/src/filters/transform/vsfilter/Scale2x.cpp
@@ -0,0 +1,435 @@
+// Copyright 2003-2006 Gabest
+// http://www.gabest.org
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
+// http://www.gnu.org/copyleft/gpl.html
+
+#include "stdafx.h"
+#include <moreuuids.h>
+
+// For CPUID usage
+#include "vd.h"
+#include <emmintrin.h>
+
+void Scale2x_YV( int w, int h, BYTE* d, int dpitch, BYTE* s, int spitch ) 
+{
+	BYTE* s1;
+	BYTE* s2;
+	BYTE* d1;
+
+	for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch) // TODO: replace this mess with mmx code
+	{
+		BYTE* stmp = s1 + spitch;
+		BYTE* dtmp = d1 + dpitch;
+
+		for(BYTE* s3 = s1 + (w-1); s1 < s3; s1 += 1, d1 += 2)
+		{
+			d1[0] = s1[0]; 
+			d1[1] = (s1[0]+s1[1])>>1;
+		}
+
+		d1[0] = d1[1] = s1[0]; 
+
+		s1 += 1;
+		d1 += 2;
+
+		s1 = stmp;
+		d1 = dtmp;
+	}
+
+	AvgLines8(d, h*2, dpitch);
+
+#ifndef WIN64
+	__asm emms;
+#endif
+}
+
+void Scale2x_YUY2( int w, int h, BYTE* d, int dpitch, BYTE* s, int spitch ) 
+{
+#ifdef WIN64
+	// CPUID from VDub
+	bool fSSE2 = !!(g_cpuid.m_flags & CCpuID::sse2);
+#endif
+
+	unsigned __int64 __0xffffffff00000000 = 0xffffffff00000000;
+	unsigned __int64 __0x00000000ffffffff = 0x00000000ffffffff;
+	unsigned __int64 __0x00ff00ff00ff00ff = 0x00ff00ff00ff00ff;
+
+	BYTE* s1;
+	BYTE* s2;
+	BYTE* d1;
+
+	for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch)
+	{
+		BYTE* stmp = s1 + spitch;
+		BYTE* dtmp = d1 + dpitch;
+
+		// row0, 4 pixels: y1|u1|y2|v1|y3|u2|y4|v2
+		// ->
+		// row0, 8 pixels: y1|u1|(y1+y2)/2|v1|y2|(u1+u2)/2|(y2+y3)/2|(v1+v2)/2
+
+#ifdef WIN64
+		if(fSSE2)
+		{
+			__m128i mm4 = _mm_cvtsi64_si128(__0x00ff00ff00ff00ff);
+			__m128i mm5 = _mm_cvtsi64_si128(__0x00000000ffffffff);
+			__m128i mm6 = _mm_cvtsi64_si128(__0xffffffff00000000);
+			for(BYTE* s3 = s1 + ((w>>1)-1)*4; s1 < s3; s1 += 4, d1 += 8)
+			{
+				__m128i mm0 = _mm_cvtsi64_si128(*(size_t*)s1); //movq	mm0, [esi]
+				__m128i mm2 = _mm_move_epi64(mm0);			//movq	mm2, mm0
+				mm0 = _mm_and_si128(mm0, mm4);				//pand	mm0, mm4	// mm0 = 00y400y300y200y1
+				mm2 = _mm_srli_epi16(mm2, 8);				//psrlw	mm2, 8		// mm2 = 00u200v200u100v1
+				__m128i mm1 = _mm_move_epi64(mm0);			//movq	mm1, mm0
+				mm0 = _mm_and_si128(mm0, mm5);				//pand	mm0, mm5	// mm0 = 0000000000y200y1
+				mm1 = _mm_slli_epi64(mm1, 16);				//psllq	mm1, 16
+				mm1 = _mm_and_si128(mm1, mm6);				//pand	mm1, mm6	// mm1 = 00y300y200000000
+				mm1 = _mm_or_si128(mm1, mm0);				//por	mm1, mm0	// mm1 = 00y300y200y200y1
+				mm0 = _mm_unpacklo_epi8(mm0, mm0);			//punpcklwd mm0, mm0	// mm0 = 00y200y200y100y1
+				mm0 = _mm_adds_epi16(mm0,mm1);				//paddw	mm0, mm1
+				mm0 = _mm_srli_epi16(mm0, 1);				//psrlw	mm0, 1		// mm0 = (mm0 + mm1) / 2
+				mm1 = _mm_move_epi64(mm2);					//movq	mm1, mm2
+				mm1 = _mm_unpacklo_epi32(mm1, mm1);			//punpckldq	mm1, mm1 // mm1 = 00u100v100u100v1
+				mm1 = _mm_adds_epi16(mm1,mm2);				//paddw	mm1, mm2
+				mm1 = _mm_srli_epi16(mm1, 1);				//psrlw	mm1, 1		// mm1 = (mm1 + mm2) / 2
+				mm1 = _mm_slli_epi64(mm1, 8);				//psllw	mm1, 8
+				mm1 = _mm_or_si128(mm0, mm1);				//por		mm0, mm1	// mm0 = (v1+v2)/2|(y2+y3)/2|(u1+u2)/2|y2|v1|(y1+y2)/2|u1|y1
+				*(size_t*)d1 = (size_t)_mm_cvtsi128_si64(mm0);		//movq	[edi], mm0
+			}
+		}
+		else
+		{
+			for(BYTE* s3 = s1 + ((w>>1)-1)*4; s1 < s3; s1 += 4, d1 += 8)
+			{
+				d1[0] = s1[0]; 
+				d1[1] = s1[1]; 
+				d1[2] = (s1[0]+s1[2])>>1;
+				d1[3] = s1[3];
+
+				d1[4] = s1[2];
+				d1[5] = (s1[1]+s1[5])>>1;
+				d1[6] = (s1[2]+s1[4])>>1;
+				d1[7] = (s1[3]+s1[7])>>1;
+			}
+		}
+#else
+		__asm
+		{
+			mov		esi, s1
+			mov		edi, d1
+
+			mov		ecx, w
+			shr		ecx, 1
+			dec		ecx
+
+			movq	mm4, __0x00ff00ff00ff00ff
+			movq	mm5, __0x00000000ffffffff
+			movq	mm6, __0xffffffff00000000
+row_loop1:
+			movq	mm0, [esi]
+			movq	mm2, mm0
+
+			pand	mm0, mm4	// mm0 = 00y400y300y200y1
+			psrlw	mm2, 8		// mm2 = 00u200v200u100v1
+
+
+			movq	mm1, mm0
+
+			pand	mm0, mm5	// mm0 = 0000000000y200y1
+
+			psllq	mm1, 16
+			pand	mm1, mm6	// mm1 = 00y300y200000000
+
+			por		mm1, mm0	// mm1 = 00y300y200y200y1
+
+			punpcklwd mm0, mm0	// mm0 = 00y200y200y100y1
+
+			paddw	mm0, mm1
+			psrlw	mm0, 1		// mm0 = (mm0 + mm1) / 2
+
+
+			movq	mm1, mm2
+			punpckldq	mm1, mm1 // mm1 = 00u100v100u100v1
+
+			paddw	mm1, mm2
+			psrlw	mm1, 1		// mm1 = (mm1 + mm2) / 2
+
+
+			psllw	mm1, 8
+			por		mm0, mm1	// mm0 = (v1+v2)/2|(y2+y3)/2|(u1+u2)/2|y2|v1|(y1+y2)/2|u1|y1
+
+			movq	[edi], mm0
+
+			lea		esi, [esi+4]
+			lea		edi, [edi+8]
+
+			dec		ecx
+			jnz		row_loop1
+
+			mov		s1, esi
+			mov		d1, edi
+		};
+#endif
+		*d1++ = s1[0];
+		*d1++ = s1[1];
+		*d1++ =(s1[0]+s1[2])>>1;
+		*d1++ = s1[3];
+
+		*d1++ = s1[2];
+		*d1++ = s1[1];
+		*d1++ = s1[2];
+		*d1++ = s1[3];
+
+		s1 += 4;
+
+		s1 = stmp;
+		d1 = dtmp;
+	}
+
+	AvgLines8(d, h*2, dpitch);
+
+#ifndef WIN64
+	__asm emms;
+#endif
+}
+
+void Scale2x_RGB555( int w, int h, BYTE* d, int dpitch, BYTE* s, int spitch ) 
+{
+	BYTE* s1;
+	BYTE* s2;
+	BYTE* d1;
+
+	for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch) // TODO: replace this mess with mmx code
+	{
+		BYTE* stmp = s1 + spitch;
+		BYTE* dtmp = d1 + dpitch;
+
+		for(BYTE* s3 = s1 + (w-1)*2; s1 < s3; s1 += 2, d1 += 4)
+		{
+			*((WORD*)d1) = *((WORD*)s1);
+			*((WORD*)d1+1) = 
+				((((*((WORD*)s1)&0x7c00) + (*((WORD*)s1+1)&0x7c00)) >> 1)&0x7c00)|
+				((((*((WORD*)s1)&0x03e0) + (*((WORD*)s1+1)&0x03e0)) >> 1)&0x03e0)|
+				((((*((WORD*)s1)&0x001f) + (*((WORD*)s1+1)&0x001f)) >> 1)&0x001f);
+		}
+
+		*((WORD*)d1) = *((WORD*)s1);
+		*((WORD*)d1+1) = *((WORD*)s1);
+
+		s1 += 2;
+		d1 += 4;
+
+		s1 = stmp;
+		d1 = dtmp;
+	}
+
+	AvgLines555(d, h*2, dpitch);
+
+#ifndef WIN64
+	__asm emms;
+#endif
+}
+
+void Scale2x_RGB565( int w, int h, BYTE* d, int dpitch, BYTE* s, int spitch ) 
+{
+	BYTE* s1;
+	BYTE* s2;
+	BYTE* d1;
+
+	for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch) // TODO: replace this mess with mmx code
+	{
+		BYTE* stmp = s1 + spitch;
+		BYTE* dtmp = d1 + dpitch;
+
+		for(BYTE* s3 = s1 + (w-1)*2; s1 < s3; s1 += 2, d1 += 4)
+		{
+			*((WORD*)d1) = *((WORD*)s1);
+			*((WORD*)d1+1) = 
+				((((*((WORD*)s1)&0xf800) + (*((WORD*)s1+1)&0xf800)) >> 1)&0xf800)|
+				((((*((WORD*)s1)&0x07e0) + (*((WORD*)s1+1)&0x07e0)) >> 1)&0x07e0)|
+				((((*((WORD*)s1)&0x001f) + (*((WORD*)s1+1)&0x001f)) >> 1)&0x001f);
+		}
+
+		*((WORD*)d1) = *((WORD*)s1);
+		*((WORD*)d1+1) = *((WORD*)s1);
+
+		s1 += 2;
+		d1 += 4;
+
+		s1 = stmp;
+		d1 = dtmp;
+	}
+
+	AvgLines565(d, h*2, dpitch);
+
+#ifndef WIN64
+	__asm emms;
+#endif
+}
+
+void Scale2x_RGB24( int w, int h, BYTE* d, int dpitch, BYTE* s, int spitch ) 
+{
+	BYTE* s1;
+	BYTE* s2;
+	BYTE* d1;
+
+	for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch) // TODO: replace this mess with mmx code
+	{
+		BYTE* stmp = s1 + spitch;
+		BYTE* dtmp = d1 + dpitch;
+
+		for(BYTE* s3 = s1 + (w-1)*3; s1 < s3; s1 += 3, d1 += 6)
+		{
+			d1[0] = s1[0]; 
+			d1[1] = s1[1]; 
+			d1[2] = s1[2];
+			d1[3] = (s1[0]+s1[3])>>1;
+			d1[4] = (s1[1]+s1[4])>>1;
+			d1[5] = (s1[2]+s1[5])>>1;
+		}
+
+		d1[0] = d1[3] = s1[0]; 
+		d1[1] = d1[4] = s1[1]; 
+		d1[2] = d1[5] = s1[2];
+
+		s1 += 3;
+		d1 += 6;
+
+		s1 = stmp;
+		d1 = dtmp;
+	}
+
+	AvgLines8(d, h*2, dpitch);
+
+#ifndef WIN64
+	__asm emms;
+#endif
+}
+
+void Scale2x_XRGB32( int w, int h, BYTE* d, int dpitch, BYTE* s, int spitch ) 
+{
+	BYTE* s1;
+	BYTE* s2;
+	BYTE* d1;
+
+	for(s1 = s, s2 = s + h*spitch, d1 = d; s1 < s2; d1 += dpitch)
+	{
+		BYTE* stmp = s1 + spitch;
+		BYTE* dtmp = d1 + dpitch;
+
+#ifdef WIN64
+		if(fSSE2) // SSE2 code
+		{
+			__m128i mm_zero = _mm_setzero_si128();//pxor	mm0, mm0
+			for(BYTE* s3 = s1 + (w-1)*4; s1 < s3; s1 += 4, d1 += 8)
+			{
+
+				__m128i mm1 = _mm_cvtsi64_si128(*(size_t*)s1); //movq	mm1, [esi]
+				__m128i mm2 = _mm_move_epi64(mm1);			//movq	mm2, mm1
+
+				mm1 = _mm_unpacklo_epi8(mm1,mm_zero);//punpcklbw mm1, mm0	// mm1 = 00xx00r100g100b1
+				mm2 = _mm_unpacklo_epi8(mm2,mm_zero);//punpckhbw mm2, mm0	// mm2 = 00xx00r200g200b2
+
+				mm2 = _mm_adds_epi16(mm2,mm1);		//paddw	mm2, mm1
+				mm2 = _mm_srli_epi16(mm2, 1);		//psrlw	mm2, 1		// mm2 = (mm1 + mm2) / 2
+
+				mm1 = _mm_packus_epi16(mm1,mm2);	//packuswb	mm1, mm2
+
+				*(size_t*)d1=(size_t)_mm_cvtsi128_si64(mm1);//movq	[edi], mm1
+			}
+		}
+		else
+		{
+			for(BYTE* s3 = s1 + (w-1)*4; s1 < s3; s1 += 3, d1 += 6)
+			{
+				d1[0] = s1[0]; 
+				d1[1] = s1[1]; 
+				d1[2] = s1[2];
+				d1[3] = s1[3];
+
+				d1[4] = (s1[0]+s1[4])>>1;
+				d1[5] = (s1[1]+s1[5])>>1;
+				d1[6] = (s1[2]+s1[6])>>1;
+				d1[7] = (s1[4]+s1[7])>>1;
+			}
+		}
+#else
+		__asm
+		{
+			mov		esi, s1
+			mov		edi, d1
+
+			mov		ecx, w
+			dec		ecx
+
+			pxor	mm0, mm0
+row_loop3:
+			movq	mm1, [esi]
+			movq	mm2, mm1
+
+			punpcklbw mm1, mm0	// mm1 = 00xx00r100g100b1
+			punpckhbw mm2, mm0	// mm2 = 00xx00r200g200b2
+
+			paddw	mm2, mm1
+			psrlw	mm2, 1		// mm2 = (mm1 + mm2) / 2
+
+			packuswb	mm1, mm2
+
+			movq	[edi], mm1
+
+			lea		esi, [esi+4]
+			lea		edi, [edi+8]
+
+			dec		ecx
+			jnz		row_loop3
+
+			mov		s1, esi
+			mov		d1, edi
+		};
+#endif
+
+		*((DWORD*)d1) = *((DWORD*)s1);
+		*((DWORD*)d1+1) = *((DWORD*)s1);
+
+		s1 += 4;
+		d1 += 8;
+
+		s1 = stmp;
+		d1 = dtmp;
+	}
+
+	AvgLines8(d, h*2, dpitch);
+
+#ifndef WIN64
+	__asm emms;
+#endif
+}
+
+/* ResX2 */
+void Scale2x(const GUID& subtype, BYTE* d, int dpitch, BYTE* s, int spitch, int w, int h)
+{
+	if(subtype == MEDIASUBTYPE_YV12 || subtype == MEDIASUBTYPE_I420 || subtype == MEDIASUBTYPE_IYUV)
+		Scale2x_YV(w, h, d, dpitch, s, spitch);
+	else if(subtype == MEDIASUBTYPE_YUY2)
+		Scale2x_YUY2(w, h, d, dpitch, s, spitch);
+	else if(subtype == MEDIASUBTYPE_RGB555)
+		Scale2x_RGB555(w, h, d, dpitch, s, spitch);
+	else if(subtype == MEDIASUBTYPE_RGB565)
+		Scale2x_RGB565(w, h, d, dpitch, s, spitch);
+	else if(subtype == MEDIASUBTYPE_RGB24)
+		Scale2x_RGB24(w, h, d, dpitch, s, spitch);
+	else if(subtype == MEDIASUBTYPE_RGB32 || subtype == MEDIASUBTYPE_ARGB32)
+		Scale2x_XRGB32(w, h, d, dpitch, s, spitch);
+}
diff --git a/src/filters/transform/vsfilter/Scale2x.h b/src/filters/transform/vsfilter/Scale2x.h
new file mode 100644
index 000000000..fbd3a2801
--- /dev/null
+++ b/src/filters/transform/vsfilter/Scale2x.h
@@ -0,0 +1,24 @@
+/* 
+ *	Copyright (C) 2003-2006 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+extern void Scale2x(const GUID& subtype, BYTE* d, int dpitch, BYTE* s, int spitch, int w, int h);
+\ No newline at end of file
diff --git a/src/filters/transform/vsfilter/Systray.cpp b/src/filters/transform/vsfilter/Systray.cpp
index 00a25bf33..31726f173 100644
--- a/src/filters/transform/vsfilter/Systray.cpp
+++ b/src/filters/transform/vsfilter/Systray.cpp
@@ -280,7 +280,7 @@ LRESULT CSystrayWindow::OnNotifyIcon(WPARAM wParam, LPARAM lParam)
 				DWORD cStreams = 0;
 				pStreams[j]->Count(&cStreams);
 
-				DWORD flags, group, prevgroup = -1;
+				DWORD flags, group, prevgroup = (DWORD)-1;
 				
 				for(UINT i = 0; i < cStreams; i++)
 				{
@@ -306,15 +306,19 @@ LRESULT CSystrayWindow::OnNotifyIcon(WPARAM wParam, LPARAM lParam)
 				if(cStreams > 0) popup.AppendMenu(MF_SEPARATOR);
 			}
 
-			int i;
+			int i = 0;
 
 			TCHAR* str;
-			for(i = 0; str = CallPPage(m_tbid->graph, i, (HWND)INVALID_HANDLE_VALUE); i++)
+			str = CallPPage(m_tbid->graph, i, (HWND)INVALID_HANDLE_VALUE);
+			while(str)
 			{
 				if(_tcsncmp(str, _T("DivX MPEG"), 9) || m_tbid->fRunOnce) // divx3's ppage will crash if the graph hasn't been run at least once yet
 					popup.AppendMenu(MF_ENABLED|MF_STRING|MF_UNCHECKED, (1<<14)|(i), str);
 
 				delete [] str;
+
+				i++;
+				str = CallPPage(m_tbid->graph, i, (HWND)INVALID_HANDLE_VALUE);
 			}
 
 			SetForegroundWindow();
@@ -355,7 +359,7 @@ DWORD CALLBACK SystrayThreadProc(void* pParam)
 
 	CSystrayWindow wnd((SystrayIconData*)pParam);
 	if(!wnd.CreateEx(0, AfxRegisterWndClass(0), _T("DVSWND"), WS_OVERLAPPED, CRect(0, 0, 0, 0), NULL, 0, NULL))
-		return -1;
+		return (DWORD)-1;
 
 	((SystrayIconData*)pParam)->hSystrayWnd = wnd.m_hWnd;
 
@@ -409,7 +413,8 @@ static TCHAR* CallPPage(IFilterGraph* pGraph, int idx, HWND hWnd)
 		}
 		else
 		{
-			if(ret = new TCHAR[wcslen(wstr)+1])
+			ret = new TCHAR[wcslen(wstr)+1];
+			if(ret)
 				_tcscpy(ret, CString(wstr));
 		}
 	}
diff --git a/src/filters/transform/vsfilter/VSFilter.vcproj b/src/filters/transform/vsfilter/VSFilter.vcproj
index 4ff6a0c5b..bca194e4e 100644
--- a/src/filters/transform/vsfilter/VSFilter.vcproj
+++ b/src/filters/transform/vsfilter/VSFilter.vcproj
@@ -28,7 +28,7 @@
 			>
 			<Tool
 				Name="VCPreBuildEventTool"
-				CommandLine="cd ..\..\..\..\&#x0D;&#x0A;update_version.bat"
+				CommandLine="cd ..\..\..\..\&#x0D;&#x0A;update_version.bat&#x0D;&#x0A;"
 			/>
 			<Tool
 				Name="VCCustomBuildTool"
@@ -102,7 +102,7 @@
 			>
 			<Tool
 				Name="VCPreBuildEventTool"
-				CommandLine="cd ..\..\..\..\&#x0D;&#x0A;update_version.bat"
+				CommandLine="cd ..\..\..\..\&#x0D;&#x0A;update_version.bat&#x0D;&#x0A;"
 			/>
 			<Tool
 				Name="VCCustomBuildTool"
@@ -174,7 +174,7 @@
 			>
 			<Tool
 				Name="VCPreBuildEventTool"
-				CommandLine="cd ..\..\..\..\&#x0D;&#x0A;update_version.bat"
+				CommandLine="cd ..\..\..\..\&#x0D;&#x0A;update_version.bat&#x0D;&#x0A;"
 			/>
 			<Tool
 				Name="VCCustomBuildTool"
@@ -256,7 +256,7 @@
 			>
 			<Tool
 				Name="VCPreBuildEventTool"
-				CommandLine="cd ..\..\..\..\&#x0D;&#x0A;update_version.bat"
+				CommandLine="cd ..\..\..\..\&#x0D;&#x0A;update_version.bat&#x0D;&#x0A;"
 			/>
 			<Tool
 				Name="VCCustomBuildTool"
@@ -360,6 +360,10 @@
 				>
 			</File>
 			<File
+				RelativePath=".\Scale2x.cpp"
+				>
+			</File>
+			<File
 				RelativePath=".\StdAfx.cpp"
 				>
 			</File>
@@ -376,6 +380,10 @@
 				>
 			</File>
 			<File
+				RelativePath=".\vd.cpp"
+				>
+			</File>
+			<File
 				RelativePath=".\vfr.cpp"
 				>
 			</File>
@@ -413,6 +421,10 @@
 				>
 			</File>
 			<File
+				RelativePath=".\Scale2x.h"
+				>
+			</File>
+			<File
 				RelativePath=".\StdAfx.h"
 				>
 			</File>
@@ -429,6 +441,10 @@
 				>
 			</File>
 			<File
+				RelativePath=".\vd.h"
+				>
+			</File>
+			<File
 				RelativePath=".\vfr.h"
 				>
 			</File>
diff --git a/src/filters/transform/vsfilter/plugins.cpp b/src/filters/transform/vsfilter/plugins.cpp
index b62c67d7b..b36c14244 100644
--- a/src/filters/transform/vsfilter/plugins.cpp
+++ b/src/filters/transform/vsfilter/plugins.cpp
@@ -356,12 +356,14 @@ namespace VirtualDub
 
 	int vobsubInitProc(FilterActivation* fa, const FilterFunctions* ff)
 	{
-		return !(*(CVirtualDubFilter**)fa->filter_data = new CVobSubVirtualDubFilter());
+		*(CVirtualDubFilter**)fa->filter_data = new CVobSubVirtualDubFilter();
+		return !(*(CVirtualDubFilter**)fa->filter_data);
 	}
 
 	int textsubInitProc(FilterActivation* fa, const FilterFunctions* ff)
 	{
-		return !(*(CVirtualDubFilter**)fa->filter_data = new CTextSubVirtualDubFilter());
+		*(CVirtualDubFilter**)fa->filter_data = new CTextSubVirtualDubFilter();
+		return !(*(CVirtualDubFilter**)fa->filter_data);
 	}
 
 	void baseDeinitProc(FilterActivation* fa, const FilterFunctions* ff)
@@ -485,8 +487,11 @@ namespace VirtualDub
 
 	extern "C" __declspec(dllexport) int __cdecl VirtualdubFilterModuleInit2(FilterModule *fm, const FilterFunctions *ff, int& vdfd_ver, int& vdfd_compat)
 	{
-		if(!(fd_vobsub = ff->addFilter(fm, &filterDef_vobsub, sizeof(FilterDefinition)))
-		|| !(fd_textsub = ff->addFilter(fm, &filterDef_textsub, sizeof(FilterDefinition))))
+		fd_vobsub = ff->addFilter(fm, &filterDef_vobsub, sizeof(FilterDefinition));
+		if(!fd_vobsub)
+			return 1;
+		fd_textsub = ff->addFilter(fm, &filterDef_textsub, sizeof(FilterDefinition));
+		if(!fd_textsub)
 			return 1;
 
 		vdfd_ver = VIRTUALDUB_FILTERDEF_VERSION;
diff --git a/src/filters/transform/vsfilter/vd.cpp b/src/filters/transform/vsfilter/vd.cpp
new file mode 100644
index 000000000..4e3e79501
--- /dev/null
+++ b/src/filters/transform/vsfilter/vd.cpp
@@ -0,0 +1,315 @@
+//	VirtualDub - Video processing and capture application
+//	Copyright (C) 1998-2001 Avery Lee
+//
+//	This program is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	This program is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with this program; if not, write to the Free Software
+//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#include "stdafx.h"
+
+
+void AvgLines8(BYTE* dst, DWORD h, DWORD pitch)
+{
+	if(h <= 1)
+		return;
+
+	BYTE* s = dst;
+	BYTE* d = dst + (h-2)*pitch;
+
+	for(; s < d; s += pitch*2)
+	{
+		BYTE* tmp = s;
+
+#ifndef _WIN64
+		if((g_cpuid.m_flags & CCpuID::sse2) && !((DWORD)tmp&0xf) && !((DWORD)pitch&0xf))
+		{
+			__asm
+			{
+				mov		esi, tmp
+				mov		ebx, pitch
+
+				mov		ecx, ebx
+				shr		ecx, 4
+
+AvgLines8_sse2_loop:
+				movdqa	xmm0, [esi]
+				pavgb	xmm0, [esi+ebx*2]
+				movdqa	[esi+ebx], xmm0
+				add		esi, 16
+
+				dec		ecx
+				jnz		AvgLines8_sse2_loop
+
+				mov		tmp, esi
+			}
+
+			for(ptrdiff_t i = pitch&7; i--; tmp++)
+			{
+				tmp[pitch] = (tmp[0] + tmp[pitch<<1] + 1) >> 1;
+			}
+		}
+		else if(g_cpuid.m_flags & CCpuID::mmx)
+		{
+			__asm
+			{
+				mov		esi, tmp
+				mov		ebx, pitch
+
+				mov		ecx, ebx
+				shr		ecx, 3
+
+				pxor	mm7, mm7
+AvgLines8_mmx_loop:
+				movq	mm0, [esi]
+				movq	mm1, mm0
+
+				punpcklbw	mm0, mm7
+				punpckhbw	mm1, mm7
+
+				movq	mm2, [esi+ebx*2]
+				movq	mm3, mm2
+
+				punpcklbw	mm2, mm7
+				punpckhbw	mm3, mm7
+
+				paddw	mm0, mm2
+				psrlw	mm0, 1
+
+				paddw	mm1, mm3
+				psrlw	mm1, 1
+
+				packuswb	mm0, mm1
+
+				movq	[esi+ebx], mm0
+
+				lea		esi, [esi+8]
+
+				dec		ecx
+				jnz		AvgLines8_mmx_loop
+
+				mov		tmp, esi
+			}
+
+			for(ptrdiff_t i = pitch&7; i--; tmp++)
+			{
+				tmp[pitch] = (tmp[0] + tmp[pitch<<1] + 1) >> 1;
+			}
+		}
+		else
+#endif
+		{
+			for(ptrdiff_t i = pitch; i--; tmp++)
+			{
+				tmp[pitch] = (tmp[0] + tmp[pitch<<1] + 1) >> 1;
+			}
+		}
+	}
+
+	if(!(h&1) && h >= 2)
+	{
+		dst += (h-2)*pitch;
+		memcpy(dst + pitch, dst, pitch);
+	}
+
+#ifndef _WIN64
+	__asm emms;
+#endif
+}
+
+void AvgLines555(BYTE* dst, DWORD h, DWORD pitch)
+{
+	if(h <= 1)
+		return;
+
+	unsigned __int64 __0x03e003e003e003e0 = 0x03e003e003e003e0;
+	unsigned __int64 __0x001f001f001f001f = 0x001f001f001f001f;
+
+	BYTE* s = dst;
+	BYTE* d = dst + (h-2)*pitch;
+
+	for(; s < d; s += pitch*2)
+	{
+		BYTE* tmp = s;
+
+#ifndef _WIN64
+		__asm
+		{
+			mov		esi, tmp
+			mov		ebx, pitch
+
+			mov		ecx, ebx
+			shr		ecx, 3
+
+			movq	mm6, __0x03e003e003e003e0
+			movq	mm7, __0x001f001f001f001f
+
+AvgLines555_loop:
+			movq	mm0, [esi]
+			movq	mm1, mm0
+			movq	mm2, mm0
+
+			psrlw	mm0, 10				// red1 bits: mm0 = 001f001f001f001f
+			pand	mm1, mm6			// green1 bits: mm1 = 03e003e003e003e0
+			pand	mm2, mm7			// blue1 bits: mm2 = 001f001f001f001f
+
+			movq	mm3, [esi+ebx*2]
+			movq	mm4, mm3
+			movq	mm5, mm3
+
+			psrlw	mm3, 10				// red2 bits: mm3 = 001f001f001f001f
+			pand	mm4, mm6			// green2 bits: mm4 = 03e003e003e003e0
+			pand	mm5, mm7			// blue2 bits: mm5 = 001f001f001f001f
+
+			paddw	mm0, mm3
+			psrlw	mm0, 1				// (red1+red2)/2
+			psllw	mm0, 10				// red bits at 7c007c007c007c00
+
+			paddw	mm1, mm4
+			psrlw	mm1, 1				// (green1+green2)/2
+			pand	mm1, mm6			// green bits at 03e003e003e003e0
+
+			paddw	mm2, mm5
+			psrlw	mm2, 1				// (blue1+blue2)/2
+			// blue bits at 001f001f001f001f (no need to pand, lower bits were discareded)
+
+			por		mm0, mm1
+			por		mm0, mm2
+
+			movq	[esi+ebx], mm0
+
+			lea		esi, [esi+8]
+
+			dec		ecx
+			jnz		AvgLines555_loop
+
+			mov		tmp, esi
+		}
+#endif
+
+		for(ptrdiff_t i = (pitch&7)>>1; i--; tmp++)
+		{
+			tmp[pitch] = 
+				((((*tmp&0x7c00) + (tmp[pitch<<1]&0x7c00)) >> 1)&0x7c00)|
+				((((*tmp&0x03e0) + (tmp[pitch<<1]&0x03e0)) >> 1)&0x03e0)|
+				((((*tmp&0x001f) + (tmp[pitch<<1]&0x001f)) >> 1)&0x001f);
+		}
+	}
+
+	if(!(h&1) && h >= 2)
+	{
+		dst += (h-2)*pitch;
+		memcpy(dst + pitch, dst, pitch);
+	}
+
+#ifndef _WIN64
+	__asm emms;
+#endif
+}
+
+void AvgLines565(BYTE* dst, DWORD h, DWORD pitch)
+{
+	if(h <= 1)
+		return;
+
+	unsigned __int64 __0x07e007e007e007e0 = 0x07e007e007e007e0;
+	unsigned __int64 __0x001f001f001f001f = 0x001f001f001f001f;
+
+	BYTE* s = dst;
+	BYTE* d = dst + (h-2)*pitch;
+
+	for(; s < d; s += pitch*2)
+	{
+		WORD* tmp = (WORD*)s;
+
+#ifndef _WIN64
+		__asm
+		{
+			mov		esi, tmp
+			mov		ebx, pitch
+
+			mov		ecx, ebx
+			shr		ecx, 3
+
+			movq	mm6, __0x07e007e007e007e0
+			movq	mm7, __0x001f001f001f001f
+
+AvgLines565_loop:
+			movq	mm0, [esi]
+			movq	mm1, mm0
+			movq	mm2, mm0
+
+			psrlw	mm0, 11				// red1 bits: mm0 = 001f001f001f001f
+			pand	mm1, mm6			// green1 bits: mm1 = 07e007e007e007e0
+			pand	mm2, mm7			// blue1 bits: mm2 = 001f001f001f001f
+
+			movq	mm3, [esi+ebx*2]
+			movq	mm4, mm3
+			movq	mm5, mm3
+
+			psrlw	mm3, 11				// red2 bits: mm3 = 001f001f001f001f
+			pand	mm4, mm6			// green2 bits: mm4 = 07e007e007e007e0
+			pand	mm5, mm7			// blue2 bits: mm5 = 001f001f001f001f
+
+			paddw	mm0, mm3
+			psrlw	mm0, 1				// (red1+red2)/2
+			psllw	mm0, 11				// red bits at f800f800f800f800
+
+			paddw	mm1, mm4
+			psrlw	mm1, 1				// (green1+green2)/2
+			pand	mm1, mm6			// green bits at 03e003e003e003e0
+
+			paddw	mm2, mm5
+			psrlw	mm2, 1				// (blue1+blue2)/2
+			// blue bits at 001f001f001f001f (no need to pand, lower bits were discareded)
+
+			por		mm0, mm1
+			por		mm0, mm2
+
+			movq	[esi+ebx], mm0
+
+			lea		esi, [esi+8]
+
+			dec		ecx
+			jnz		AvgLines565_loop
+
+			mov		tmp, esi
+		}
+#else
+		for(ptrdiff_t wd=(pitch>>3);wd--;tmp++)
+		{
+			tmp[0] = 
+				((((*tmp&0xf800) + (tmp[pitch<<1]&0xf800)) >> 1)&0xf800)|
+				((((*tmp&0x07e0) + (tmp[pitch<<1]&0x07e0)) >> 1)&0x07e0)|
+				((((*tmp&0x001f) + (tmp[pitch<<1]&0x001f)) >> 1)&0x001f);
+		}
+#endif
+
+		for(ptrdiff_t i = (pitch&7)>>1; i--; tmp++)
+		{
+			tmp[pitch] = 
+				((((*tmp&0xf800) + (tmp[pitch<<1]&0xf800)) >> 1)&0xf800)|
+				((((*tmp&0x07e0) + (tmp[pitch<<1]&0x07e0)) >> 1)&0x07e0)|
+				((((*tmp&0x001f) + (tmp[pitch<<1]&0x001f)) >> 1)&0x001f);
+		}
+	}
+
+	if(!(h&1) && h >= 2)
+	{
+		dst += (h-2)*pitch;
+		memcpy(dst + pitch, dst, pitch);
+	}
+
+#ifndef _WIN64
+	__asm emms;
+#endif
+}
diff --git a/src/filters/transform/vsfilter/vd.h b/src/filters/transform/vsfilter/vd.h
new file mode 100644
index 000000000..af769fbb3
--- /dev/null
+++ b/src/filters/transform/vsfilter/vd.h
@@ -0,0 +1,22 @@
+//	VirtualDub - Video processing and capture application
+//	Copyright (C) 1998-2001 Avery Lee
+//
+//	This program is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	This program is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with this program; if not, write to the Free Software
+//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#pragma once
+
+extern void AvgLines8(BYTE* dst, DWORD h, DWORD pitch);
+extern void AvgLines555(BYTE* dst, DWORD h, DWORD pitch);
+extern void AvgLines565(BYTE* dst, DWORD h, DWORD pitch);