Rename several directories to use MixedCase instead of lowercase.

They now mostly match the case used in #includes, and they're consistent with the names of the .h files they contain. git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@1648 10f7b99b-c216-0410-bff0-8a66a9350fd8
author: povaddict <povaddict@users.sourceforge.net> 2010-02-10 02:16:44 +0300
committer: povaddict <povaddict@users.sourceforge.net> 2010-02-10 02:16:44 +0300
commit: 726a91b12a7524e45e7a901c9e4883af5b1bffe6 (patch)
tree: f5d25e3b2e84c92f4901280c73d5d3d7e6c3cd19 /src/filters/source/D2VSource
parent: 02183f6e47ad4ea1057de9950482f291f2ae4290 (diff)
13 files changed, 7819 insertions, 0 deletions
diff --git a/src/filters/source/D2VSource/D2VSource.cpp b/src/filters/source/D2VSource/D2VSource.cpp
new file mode 100644
index 000000000..8984bf360
--- /dev/null
+++ b/src/filters/source/D2VSource/D2VSource.cpp
@@ -0,0 +1,283 @@
+/* 
+ *	Copyright (C) 2003-2006 Gabest
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *
+ */
+
+#include "stdafx.h"
+#include "D2VSource.h"
+#include "mpeg2dec.h"
+#include "../../../DSUtil/DSUtil.h"
+
+#ifdef REGISTER_FILTER
+
+const AMOVIESETUP_MEDIATYPE sudPinTypesOut[] =
+{
+	{&MEDIATYPE_Video, &MEDIASUBTYPE_YUY2}
+};
+
+const AMOVIESETUP_PIN sudOpPin[] =
+{
+	{L"Output", FALSE, TRUE, FALSE, FALSE, &CLSID_NULL, NULL, countof(sudPinTypesOut), sudPinTypesOut}
+};
+
+const AMOVIESETUP_FILTER sudFilter[] =
+{
+	{&__uuidof(CD2VSource), L"MPC - D2VSource", MERIT_NORMAL, countof(sudOpPin), sudOpPin, CLSID_LegacyAmFilterCategory}
+};
+
+CFactoryTemplate g_Templates[] =
+{
+	{sudFilter[0].strName, sudFilter[0].clsID, CreateInstance<CD2VSource>, NULL, &sudFilter[0]}
+};
+
+int g_cTemplates = countof(g_Templates);
+
+STDAPI DllRegisterServer()
+{
+	SetRegKeyValue(
+		_T("Media Type\\{e436eb83-524f-11ce-9f53-0020af0ba770}"), _T("{47CE0591-C4D5-4b41-BED7-28F59AD76228}"), 
+		_T("0"), _T("0,18,,4456443241564950726F6A65637446696C65")); // "DVD2AVIProjectFile"
+
+	SetRegKeyValue(
+		_T("Media Type\\{e436eb83-524f-11ce-9f53-0020af0ba770}"), _T("{47CE0591-C4D5-4b41-BED7-28F59AD76228}"), 
+		_T("Source Filter"), _T("{47CE0591-C4D5-4b41-BED7-28F59AD76228}"));
+
+	SetRegKeyValue(
+		_T("Media Type\\Extensions"), _T(".d2v"), 
+		_T("Source Filter"), _T("{47CE0591-C4D5-4b41-BED7-28F59AD76228}"));
+
+	return AMovieDllRegisterServer2(TRUE);
+}
+
+STDAPI DllUnregisterServer()
+{
+	DeleteRegKey(_T("Media Type\\{e436eb83-524f-11ce-9f53-0020af0ba770}"), _T("{47CE0591-C4D5-4b41-BED7-28F59AD76228}"));
+	DeleteRegKey(_T("Media Type\\Extensions"), _T(".d2v"));
+
+	return AMovieDllRegisterServer2(FALSE);
+}
+
+#include "../../FilterApp.h"
+
+CFilterApp theApp;
+
+#endif
+
+//
+// CD2VSource
+//
+
+CD2VSource::CD2VSource(LPUNKNOWN lpunk, HRESULT* phr)
+	: CBaseSource<CD2VStream>(NAME("CD2VSource"), lpunk, phr, __uuidof(this))
+{
+	if(phr) *phr = S_OK;
+}
+
+CD2VSource::~CD2VSource()
+{
+}
+
+//
+// CD2VStream
+//
+
+CD2VStream::CD2VStream(const WCHAR* fn, CSource* pParent, HRESULT* phr) 
+	: CBaseStream(NAME("D2VSourceStream"), pParent, phr)
+	, m_pFrameBuffer(NULL)
+{
+	CAutoLock cAutoLock(&m_cSharedState);
+
+	m_pDecoder.Attach(DNew CMPEG2Dec());
+	if(!m_pDecoder)
+	{
+		if(phr) *phr = E_OUTOFMEMORY;
+		return;
+	}
+
+	if(!m_pDecoder->Open(CString(fn), CMPEG2Dec::YUY2))
+	{
+		if(phr) *phr = E_FAIL;
+		return;
+	}
+
+	if(!m_pFrameBuffer.Allocate(m_pDecoder->Clip_Width*m_pDecoder->Clip_Height*4))
+	{
+		if(phr) *phr = E_OUTOFMEMORY;
+		return;
+	}
+
+	m_AvgTimePerFrame = 10000000000i64/m_pDecoder->VF_FrameRate;
+	m_rtDuration = m_rtStop = m_AvgTimePerFrame*m_pDecoder->VF_FrameLimit;
+
+	if(phr) *phr = m_rtDuration > 0 ? S_OK : E_FAIL;
+}
+
+CD2VStream::~CD2VStream()
+{
+	CAutoLock cAutoLock(&m_cSharedState);
+}
+
+HRESULT CD2VStream::DecideBufferSize(IMemAllocator* pAlloc, ALLOCATOR_PROPERTIES* pProperties)
+{
+//    CAutoLock cAutoLock(m_pFilter->pStateLock());
+
+    ASSERT(pAlloc);
+    ASSERT(pProperties);
+
+    HRESULT hr = NOERROR;
+
+	int w, h, bpp;
+	if(!GetDim(w, h, bpp))
+		return E_FAIL;
+
+	pProperties->cBuffers = 1;
+	pProperties->cbBuffer = w*h*bpp>>3;
+
+    ALLOCATOR_PROPERTIES Actual;
+    if(FAILED(hr = pAlloc->SetProperties(pProperties, &Actual))) return hr;
+
+    if(Actual.cbBuffer < pProperties->cbBuffer) return E_FAIL;
+    ASSERT(Actual.cBuffers == pProperties->cBuffers);
+
+    return NOERROR;
+}
+
+HRESULT CD2VStream::FillBuffer(IMediaSample* pSample, int nFrame, BYTE* pOut, long& len)
+{
+	if(!m_pDecoder)
+		return S_FALSE;
+
+	AM_MEDIA_TYPE* pmt;
+	if(SUCCEEDED(pSample->GetMediaType(&pmt)) && pmt)
+	{
+		CMediaType mt(*pmt);
+		SetMediaType(&mt);
+
+		DeleteMediaType(pmt);
+	}
+
+	int w, h, bpp;
+	if(!GetDim(w, h, bpp))
+		return S_FALSE;
+
+	BYTE* pIn = m_pFrameBuffer;
+
+	int pitchIn, pitchOut = 0;
+
+	pitchIn = m_pDecoder->Clip_Width*bpp>>3;
+	pitchOut = w*bpp>>3;
+
+	m_pDecoder->Decode(pIn, (unsigned long)(nFrame), pitchIn);
+
+	for(int y = 0, p = min(pitchIn, pitchOut); 
+		y < h; 
+		y++, pIn += pitchIn, pOut += pitchOut)
+	{
+		memcpy(pOut, pIn, p);
+	}
+
+	len = pitchOut*h;
+
+	return S_OK;
+}
+
+HRESULT CD2VStream::GetMediaType(int iPosition, CMediaType* pmt)
+{
+    CAutoLock cAutoLock(m_pFilter->pStateLock());
+
+    if(iPosition < 0) return E_INVALIDARG;
+    if(iPosition > 0) return VFW_S_NO_MORE_ITEMS;
+
+    pmt->SetType(&MEDIATYPE_Video);
+    pmt->SetSubtype(&MEDIASUBTYPE_YUY2);
+    pmt->SetFormatType(&FORMAT_VideoInfo);
+    pmt->SetTemporalCompression(FALSE);
+
+	VIDEOINFOHEADER* vih = (VIDEOINFOHEADER*)pmt->AllocFormatBuffer(sizeof(VIDEOINFOHEADER));
+	memset(vih, 0, sizeof(VIDEOINFOHEADER));
+	vih->AvgTimePerFrame = m_AvgTimePerFrame;
+	vih->bmiHeader.biSize = sizeof(vih->bmiHeader);
+	vih->bmiHeader.biWidth = m_pDecoder->Clip_Width;
+	vih->bmiHeader.biHeight = m_pDecoder->Clip_Height;
+	vih->bmiHeader.biPlanes = 1;
+	vih->bmiHeader.biBitCount = 16;
+	vih->bmiHeader.biCompression = '2YUY';
+	vih->bmiHeader.biSizeImage = vih->bmiHeader.biWidth*abs(vih->bmiHeader.biHeight)*vih->bmiHeader.biBitCount>>3;
+
+	pmt->SetSampleSize(vih->bmiHeader.biSizeImage);
+
+    return NOERROR;
+}
+
+HRESULT CD2VStream::SetMediaType(const CMediaType* pmt)
+{
+	if(m_pDecoder)
+	{
+		if(pmt->subtype == MEDIASUBTYPE_YUY2)
+			m_pDecoder->m_dstFormat = CMPEG2Dec::YUY2;
+		else
+			return E_FAIL;
+	}
+
+	return CSourceStream::SetMediaType(pmt);
+}
+
+HRESULT CD2VStream::CheckMediaType(const CMediaType* pmt)
+{
+	return pmt->majortype == MEDIATYPE_Video
+		&& pmt->subtype == MEDIASUBTYPE_YUY2
+		&& pmt->formattype == FORMAT_VideoInfo
+		? S_OK
+		: E_INVALIDARG;
+}
+
+STDMETHODIMP CD2VStream::Notify(IBaseFilter* pSender, Quality q)
+{
+	if(q.Late > 0 && q.Late < 100000000)
+	{
+		CAutoLock cAutoLockShared(&m_cSharedState);
+
+        m_rtSampleTime += (q.Late/m_AvgTimePerFrame)*m_AvgTimePerFrame;
+        m_rtPosition += (q.Late/m_AvgTimePerFrame)*m_AvgTimePerFrame;
+	}
+
+	return S_OK;
+}
+
+//
+
+bool CD2VStream::GetDim(int& w, int& h, int& bpp)
+{
+	if(m_mt.formattype == FORMAT_VideoInfo)
+	{
+		w = ((VIDEOINFOHEADER*)m_mt.pbFormat)->bmiHeader.biWidth;
+		h = abs(((VIDEOINFOHEADER*)m_mt.pbFormat)->bmiHeader.biHeight);
+		bpp = ((VIDEOINFOHEADER*)m_mt.pbFormat)->bmiHeader.biBitCount;
+	}
+	else if(m_mt.formattype == FORMAT_VideoInfo2)
+	{
+		w = ((VIDEOINFOHEADER2*)m_mt.pbFormat)->bmiHeader.biWidth;
+		h = abs(((VIDEOINFOHEADER2*)m_mt.pbFormat)->bmiHeader.biHeight);
+		bpp = ((VIDEOINFOHEADER2*)m_mt.pbFormat)->bmiHeader.biBitCount;
+	}
+	else
+	{
+		return(false);
+	}
+
+	return(true);
+}
diff --git a/src/filters/source/D2VSource/D2VSource.def b/src/filters/source/D2VSource/D2VSource.def
new file mode 100644
index 000000000..465cb0f61
--- /dev/null
+++ b/src/filters/source/D2VSource/D2VSource.def
@@ -0,0 +1,7 @@
+LIBRARY      "D2VSource.ax"
+
+EXPORTS
+	DllCanUnloadNow			PRIVATE
+	DllGetClassObject		PRIVATE
+	DllRegisterServer		PRIVATE
+	DllUnregisterServer		PRIVATE
diff --git a/src/filters/source/D2VSource/D2VSource.h b/src/filters/source/D2VSource/D2VSource.h
new file mode 100644
index 000000000..bf2e18f9a
--- /dev/null
+++ b/src/filters/source/D2VSource/D2VSource.h
@@ -0,0 +1,56 @@
+/* 
+ *	Copyright (C) 2003-2006 Gabest
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *
+ */
+
+#pragma once
+#include <atlbase.h>
+#include "../BaseSource/BaseSource.h"
+
+class CD2VStream;
+
+[uuid("47CE0591-C4D5-4b41-BED7-28F59AD76228")]
+class CD2VSource : public CBaseSource<CD2VStream>
+{
+public:
+	CD2VSource(LPUNKNOWN lpunk, HRESULT* phr);
+	virtual ~CD2VSource();
+};
+
+class CMPEG2Dec;
+
+class CD2VStream : public CBaseStream
+{
+private:
+	CAutoPtr<CMPEG2Dec> m_pDecoder;
+	CAutoVectorPtr<BYTE> m_pFrameBuffer;
+
+	bool GetDim(int& w, int& h, int& bpp);
+
+public:
+    CD2VStream(const WCHAR* fn, CSource* pParent, HRESULT* phr);
+	virtual ~CD2VStream();
+
+    HRESULT FillBuffer(IMediaSample* pSample, int nFrame, BYTE* pOut, long& len /*in+out*/);
+
+    HRESULT DecideBufferSize(IMemAllocator* pIMemAlloc, ALLOCATOR_PROPERTIES* pProperties);
+    HRESULT CheckMediaType(const CMediaType* pMediaType);
+    HRESULT GetMediaType(int iPosition, CMediaType* pmt);
+    HRESULT SetMediaType(const CMediaType* pmt);
+
+	STDMETHODIMP Notify(IBaseFilter* pSender, Quality q);
+};
diff --git a/src/filters/source/D2VSource/MPEG2Dec.cpp b/src/filters/source/D2VSource/MPEG2Dec.cpp
new file mode 100644
index 000000000..4be664c00
--- /dev/null
+++ b/src/filters/source/D2VSource/MPEG2Dec.cpp
@@ -0,0 +1,4490 @@
+#include "stdafx.h"
+#include "MPEG2Dec.h"
+
+int testint;
+
+struct CPU {
+	BOOL					mmx;
+	BOOL					_3dnow;
+	BOOL					ssemmx;
+	BOOL					ssefpu;
+} cpu;
+
+void CheckCPU()
+{
+	__asm
+	{
+		mov			eax, 1
+		cpuid
+		test		edx, 0x00800000		// STD MMX
+		jz			TEST_SSE
+		mov			[cpu.mmx], 1
+TEST_SSE:
+		test		edx, 0x02000000		// STD SSE
+		jz			TEST_3DNOW
+		mov			[cpu.ssemmx], 1
+		mov			[cpu.ssefpu], 1
+TEST_3DNOW:
+		mov			eax, 0x80000001
+		cpuid
+		test		edx, 0x80000000		// 3D NOW
+		jz			TEST_SSEMMX
+		mov			[cpu._3dnow], 1
+TEST_SSEMMX:
+		test		edx, 0x00400000		// SSE MMX
+		jz			TEST_END
+		mov			[cpu.ssemmx], 1
+TEST_END:
+	}
+}
+
+#pragma warning(disable:4799)	// no EMMS
+#pragma warning(disable:4731)	// ebp modified
+// idct
+extern "C" void __fastcall MMX_IDCT(short *block);
+extern "C" void __fastcall SSEMMX_IDCT(short *block);
+extern void Initialize_FPU_IDCT(void);
+extern void FPU_IDCT(short *block);
+extern void Initialize_REF_IDCT(void);
+extern void REF_IDCT(short *block);
+
+/* default intra quantization matrix */
+static unsigned char default_intra_quantizer_matrix[64] =
+{
+	8, 16, 19, 22, 26, 27, 29, 34,
+	16, 16, 22, 24, 27, 29, 34, 37,
+	19, 22, 26, 27, 29, 34, 34, 38,
+	22, 22, 26, 27, 29, 34, 37, 40,
+	22, 26, 27, 29, 32, 35, 40, 48,
+	26, 27, 29, 32, 35, 40, 48, 58,
+	26, 27, 29, 34, 38, 46, 56, 69,
+	27, 29, 35, 38, 46, 56, 69, 83
+};
+
+/* zig-zag and alternate scan patterns */
+static unsigned char scan[2][64] =
+{
+	{ /* Zig-Zag scan pattern  */
+		0,  1,  8, 16,  9,  2,  3, 10,
+	   17, 24, 32, 25, 18, 11,  4,  5,
+	   12, 19, 26, 33, 40, 48, 41, 34,
+	   27, 20, 13,  6,  7, 14, 21, 28,
+	   35, 42, 49, 56, 57, 50, 43, 36,
+	   29, 22, 15, 23, 30, 37, 44, 51,
+	   58, 59, 52, 45, 38, 31, 39, 46,
+	   53, 60, 61, 54, 47, 55, 62, 63
+	}
+	,
+	{ /* Alternate scan pattern */
+		0,  8, 16, 24,  1,  9,  2, 10,
+	   17, 25, 32, 40, 48, 56, 57, 49,
+	   41, 33, 26, 18,  3, 11, 4,  12,
+	   19, 27, 34, 42, 50, 58, 35, 43,
+	   51, 59, 20, 28,  5, 13,  6, 14,
+	   21, 29, 36, 44, 52, 60, 37, 45,
+	   53, 61, 22, 30,  7, 15, 23, 31,
+	   38, 46, 54, 62, 39, 47, 55, 63
+	}
+};
+
+/* non-linear quantization coefficient table */
+static unsigned char Non_Linear_quantizer_scale[32] =
+{
+	0, 1, 2, 3, 4, 5, 6, 7,
+	8, 10, 12, 14, 16, 18, 20, 22,
+	24, 28, 32, 36, 40, 44, 48, 52,
+	56, 64, 72, 80, 88, 96, 104, 112
+};
+
+#define ERROR_VALUE	(-1)
+
+typedef struct {
+	char run, level, len;
+} DCTtab;
+
+typedef struct {
+	char val, len;
+} VLCtab;
+
+/* Table B-10, motion_code, codes 0001 ... 01xx */
+static VLCtab MVtab0[8] =
+{
+	{ERROR_VALUE,0}, {3,3}, {2,2}, {2,2}, {1,1}, {1,1}, {1,1}, {1,1}
+};
+
+/* Table B-10, motion_code, codes 0000011 ... 000011x */
+static VLCtab MVtab1[8] =
+{
+	{ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, {7,6}, {6,6}, {5,6}, {4,5}, {4,5}
+};
+
+/* Table B-10, motion_code, codes 0000001100 ... 000001011x */
+static VLCtab MVtab2[12] =
+{
+	{16,9}, {15,9}, {14,9}, {13,9},
+	{12,9}, {11,9}, {10,8}, {10,8},
+	{9,8},  {9,8},  {8,8},  {8,8}
+};
+
+/* Table B-9, coded_block_pattern, codes 01000 ... 111xx */
+static VLCtab CBPtab0[32] =
+{
+	{ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0},
+	{ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0},
+	{62,5}, {2,5},  {61,5}, {1,5},  {56,5}, {52,5}, {44,5}, {28,5},
+	{40,5}, {20,5}, {48,5}, {12,5}, {32,4}, {32,4}, {16,4}, {16,4},
+	{8,4},  {8,4},  {4,4},  {4,4},  {60,3}, {60,3}, {60,3}, {60,3}
+};
+
+/* Table B-9, coded_block_pattern, codes 00000100 ... 001111xx */
+static VLCtab CBPtab1[64] =
+{
+	{ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0},
+	{58,8}, {54,8}, {46,8}, {30,8},
+	{57,8}, {53,8}, {45,8}, {29,8}, {38,8}, {26,8}, {37,8}, {25,8},
+	{43,8}, {23,8}, {51,8}, {15,8}, {42,8}, {22,8}, {50,8}, {14,8},
+	{41,8}, {21,8}, {49,8}, {13,8}, {35,8}, {19,8}, {11,8}, {7,8},
+	{34,7}, {34,7}, {18,7}, {18,7}, {10,7}, {10,7}, {6,7},  {6,7},
+	{33,7}, {33,7}, {17,7}, {17,7}, {9,7},  {9,7},  {5,7},  {5,7},
+	{63,6}, {63,6}, {63,6}, {63,6}, {3,6},  {3,6},  {3,6},  {3,6},
+	{36,6}, {36,6}, {36,6}, {36,6}, {24,6}, {24,6}, {24,6}, {24,6}
+};
+
+/* Table B-9, coded_block_pattern, codes 000000001 ... 000000111 */
+static VLCtab CBPtab2[8] =
+{
+	{ERROR_VALUE,0}, {0,9}, {39,9}, {27,9}, {59,9}, {55,9}, {47,9}, {31,9}
+};
+
+/* Table B-1, macroblock_address_increment, codes 00010 ... 011xx */
+static VLCtab MBAtab1[16] =
+{
+	{ERROR_VALUE,0}, {ERROR_VALUE,0}, {7,5}, {6,5}, {5,4}, {5,4}, {4,4},
+	{4,4}, {3,3}, {3,3}, {3,3}, {3,3}, {2,3}, {2,3}, {2,3}, {2,3}
+};
+
+/* Table B-1, macroblock_address_increment, codes 00000011000 ... 0000111xxxx */
+static VLCtab MBAtab2[104] =
+{
+	{33,11}, {32,11}, {31,11}, {30,11}, {29,11}, {28,11}, {27,11}, {26,11},
+	{25,11}, {24,11}, {23,11}, {22,11}, {21,10}, {21,10}, {20,10}, {20,10},
+	{19,10}, {19,10}, {18,10}, {18,10}, {17,10}, {17,10}, {16,10}, {16,10},
+	{15,8},  {15,8},  {15,8},  {15,8},  {15,8},  {15,8},  {15,8},  {15,8},
+	{14,8},  {14,8},  {14,8},  {14,8},  {14,8},  {14,8},  {14,8},  {14,8},
+	{13,8},  {13,8},  {13,8},  {13,8},  {13,8},  {13,8},  {13,8},  {13,8},
+	{12,8},  {12,8},  {12,8},  {12,8},  {12,8},  {12,8},  {12,8},  {12,8},
+	{11,8},  {11,8},  {11,8},  {11,8},  {11,8},  {11,8},  {11,8},  {11,8},
+	{10,8},  {10,8},  {10,8},  {10,8},  {10,8},  {10,8},  {10,8},  {10,8},
+	{9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},
+	{9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},
+	{8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7},
+	{8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7}
+};
+
+/* Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */
+static VLCtab DClumtab0[32] =
+{
+	{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+	{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+	{0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
+	{4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {ERROR_VALUE, 0}
+};
+
+/* Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */
+static VLCtab DClumtab1[16] =
+{
+	{7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6},
+	{8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10,9}, {11,9}
+};
+
+/* Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */
+static VLCtab DCchromtab0[32] =
+{
+	{0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
+	{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+	{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+	{3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {ERROR_VALUE, 0}
+};
+
+/* Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */
+static VLCtab DCchromtab1[32] =
+{
+	{6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
+	{6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
+	{7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7},
+	{8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10,10}, {11,10}
+};
+
+/* Table B-14, DCT coefficients table zero,
+ * codes 0100 ... 1xxx (used for first (DC) coefficient)
+ */
+static DCTtab DCTtabfirst[12] =
+{
+	{0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
+	{0,1,1}, {0,1,1}, {0,1,1}, {0,1,1},
+	{0,1,1}, {0,1,1}, {0,1,1}, {0,1,1}
+};
+
+/* Table B-14, DCT coefficients table zero,
+ * codes 0100 ... 1xxx (used for all other coefficients)
+ */
+static DCTtab DCTtabnext[12] =
+{
+	{0,2,4},  {2,1,4},  {1,1,3},  {1,1,3},
+	{64,0,2}, {64,0,2}, {64,0,2}, {64,0,2}, /* EOB */
+	{0,1,2},  {0,1,2},  {0,1,2},  {0,1,2}
+};
+
+/* Table B-14, DCT coefficients table zero,
+ * codes 000001xx ... 00111xxx
+ */
+static DCTtab DCTtab0[60] =
+{
+	{65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
+	{2,2,7}, {2,2,7}, {9,1,7}, {9,1,7},
+	{0,4,7}, {0,4,7}, {8,1,7}, {8,1,7},
+	{7,1,6}, {7,1,6}, {7,1,6}, {7,1,6},
+	{6,1,6}, {6,1,6}, {6,1,6}, {6,1,6},
+	{1,2,6}, {1,2,6}, {1,2,6}, {1,2,6},
+	{5,1,6}, {5,1,6}, {5,1,6}, {5,1,6},
+	{13,1,8}, {0,6,8}, {12,1,8}, {11,1,8},
+	{3,2,8}, {1,3,8}, {0,5,8}, {10,1,8},
+	{0,3,5}, {0,3,5}, {0,3,5}, {0,3,5},
+	{0,3,5}, {0,3,5}, {0,3,5}, {0,3,5},
+	{4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
+	{4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
+	{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
+	{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}
+};
+
+/* Table B-15, DCT coefficients table one,
+ * codes 000001xx ... 11111111
+*/
+static DCTtab DCTtab0a[252] =
+{
+	{65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
+	{7,1,7}, {7,1,7}, {8,1,7}, {8,1,7},
+	{6,1,7}, {6,1,7}, {2,2,7}, {2,2,7},
+	{0,7,6}, {0,7,6}, {0,7,6}, {0,7,6},
+	{0,6,6}, {0,6,6}, {0,6,6}, {0,6,6},
+	{4,1,6}, {4,1,6}, {4,1,6}, {4,1,6},
+	{5,1,6}, {5,1,6}, {5,1,6}, {5,1,6},
+	{1,5,8}, {11,1,8}, {0,11,8}, {0,10,8},
+	{13,1,8}, {12,1,8}, {3,2,8}, {1,4,8},
+	{2,1,5}, {2,1,5}, {2,1,5}, {2,1,5},
+	{2,1,5}, {2,1,5}, {2,1,5}, {2,1,5},
+	{1,2,5}, {1,2,5}, {1,2,5}, {1,2,5},
+	{1,2,5}, {1,2,5}, {1,2,5}, {1,2,5},
+	{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
+	{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
+	{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+	{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+	{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+	{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+	{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+	{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+	{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+	{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+	{64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, /* EOB */
+	{64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
+	{64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
+	{64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
+	{0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
+	{0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
+	{0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
+	{0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
+	{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+	{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+	{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+	{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+	{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+	{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+	{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+	{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+	{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+	{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+	{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+	{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+	{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+	{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+	{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+	{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+	{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+	{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+	{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+	{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+	{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+	{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+	{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+	{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+	{0,4,5}, {0,4,5}, {0,4,5}, {0,4,5},
+	{0,4,5}, {0,4,5}, {0,4,5}, {0,4,5},
+	{0,5,5}, {0,5,5}, {0,5,5}, {0,5,5},
+	{0,5,5}, {0,5,5}, {0,5,5}, {0,5,5},
+	{9,1,7}, {9,1,7}, {1,3,7}, {1,3,7},
+	{10,1,7}, {10,1,7}, {0,8,7}, {0,8,7},
+	{0,9,7}, {0,9,7}, {0,12,8}, {0,13,8},
+	{2,3,8}, {4,2,8}, {0,14,8}, {0,15,8}
+};
+
+/* Table B-14, DCT coefficients table zero,
+ * codes 0000001000 ... 0000001111
+ */
+static DCTtab DCTtab1[8] =
+{
+	{16,1,10}, {5,2,10}, {0,7,10}, {2,3,10},
+	{1,4,10}, {15,1,10}, {14,1,10}, {4,2,10}
+};
+
+/* Table B-15, DCT coefficients table one,
+ * codes 000000100x ... 000000111x
+ */
+static DCTtab DCTtab1a[8] =
+{
+	{5,2,9}, {5,2,9}, {14,1,9}, {14,1,9},
+	{2,4,10}, {16,1,10}, {15,1,9}, {15,1,9}
+};
+
+/* Table B-14/15, DCT coefficients table zero / one,
+ * codes 000000010000 ... 000000011111
+ */
+static DCTtab DCTtab2[16] =
+{
+	{0,11,12}, {8,2,12}, {4,3,12}, {0,10,12},
+	{2,4,12}, {7,2,12}, {21,1,12}, {20,1,12},
+	{0,9,12}, {19,1,12}, {18,1,12}, {1,5,12},
+	{3,3,12}, {0,8,12}, {6,2,12}, {17,1,12}
+};
+
+/* Table B-14/15, DCT coefficients table zero / one,
+ * codes 0000000010000 ... 0000000011111
+ */
+static DCTtab DCTtab3[16] =
+{
+	{10,2,13}, {9,2,13}, {5,3,13}, {3,4,13},
+	{2,5,13}, {1,7,13}, {1,6,13}, {0,15,13},
+	{0,14,13}, {0,13,13}, {0,12,13}, {26,1,13},
+	{25,1,13}, {24,1,13}, {23,1,13}, {22,1,13}
+};
+
+/* Table B-14/15, DCT coefficients table zero / one,
+ * codes 00000000010000 ... 00000000011111
+ */
+static DCTtab DCTtab4[16] =
+{
+	{0,31,14}, {0,30,14}, {0,29,14}, {0,28,14},
+	{0,27,14}, {0,26,14}, {0,25,14}, {0,24,14},
+	{0,23,14}, {0,22,14}, {0,21,14}, {0,20,14},
+	{0,19,14}, {0,18,14}, {0,17,14}, {0,16,14}
+};
+
+/* Table B-14/15, DCT coefficients table zero / one,
+ * codes 000000000010000 ... 000000000011111
+ */
+static DCTtab DCTtab5[16] =
+{
+	{0,40,15}, {0,39,15}, {0,38,15}, {0,37,15},
+	{0,36,15}, {0,35,15}, {0,34,15}, {0,33,15},
+	{0,32,15}, {1,14,15}, {1,13,15}, {1,12,15},
+	{1,11,15}, {1,10,15}, {1,9,15}, {1,8,15}
+};
+
+/* Table B-14/15, DCT coefficients table zero / one,
+ * codes 0000000000010000 ... 0000000000011111
+ */
+static DCTtab DCTtab6[16] =
+{
+	{1,18,16}, {1,17,16}, {1,16,16}, {1,15,16},
+	{6,3,16}, {16,2,16}, {15,2,16}, {14,2,16},
+	{13,2,16}, {12,2,16}, {11,2,16}, {31,1,16},
+	{30,1,16}, {29,1,16}, {28,1,16}, {27,1,16}
+};
+
+/* Table B-3, macroblock_type in P-pictures, codes 001..1xx */
+static VLCtab PMBtab0[8] =
+{
+	{ERROR_VALUE,0},
+	{MACROBLOCK_MOTION_FORWARD,3},
+	{MACROBLOCK_PATTERN,2}, {MACROBLOCK_PATTERN,2},
+	{MACROBLOCK_MOTION_FORWARD|MACROBLOCK_PATTERN,1}, 
+	{MACROBLOCK_MOTION_FORWARD|MACROBLOCK_PATTERN,1},
+	{MACROBLOCK_MOTION_FORWARD|MACROBLOCK_PATTERN,1}, 
+	{MACROBLOCK_MOTION_FORWARD|MACROBLOCK_PATTERN,1}
+};
+
+/* Table B-3, macroblock_type in P-pictures, codes 000001..00011x */
+static VLCtab PMBtab1[8] =
+{
+	{ERROR_VALUE,0},
+	{MACROBLOCK_QUANT|MACROBLOCK_INTRA,6},
+	{MACROBLOCK_QUANT|MACROBLOCK_PATTERN,5}, {MACROBLOCK_QUANT|MACROBLOCK_PATTERN,5},
+	{MACROBLOCK_QUANT|MACROBLOCK_MOTION_FORWARD|MACROBLOCK_PATTERN,5}, {MACROBLOCK_QUANT|MACROBLOCK_MOTION_FORWARD|MACROBLOCK_PATTERN,5},
+	{MACROBLOCK_INTRA,5}, {MACROBLOCK_INTRA,5}
+};
+
+/* Table B-4, macroblock_type in B-pictures, codes 0010..11xx */
+static VLCtab BMBtab0[16] =
+{
+	{ERROR_VALUE,0}, 
+	{ERROR_VALUE,0},
+	{MACROBLOCK_MOTION_FORWARD,4},
+	{MACROBLOCK_MOTION_FORWARD|MACROBLOCK_PATTERN,4},
+	{MACROBLOCK_MOTION_BACKWARD,3}, 
+	{MACROBLOCK_MOTION_BACKWARD,3},
+	{MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,3}, 
+	{MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,3},
+	{MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD,2}, 
+	{MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD,2},
+	{MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD,2}, 
+	{MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD,2},
+	{MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,2},
+	{MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,2},
+	{MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,2},
+	{MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,2}
+};
+
+/* Table B-4, macroblock_type in B-pictures, codes 000001..00011x */
+static VLCtab BMBtab1[8] =
+{
+	{ERROR_VALUE,0},
+	{MACROBLOCK_QUANT|MACROBLOCK_INTRA,6},
+	{MACROBLOCK_QUANT|MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,6},
+	{MACROBLOCK_QUANT|MACROBLOCK_MOTION_FORWARD|MACROBLOCK_PATTERN,6},
+	{MACROBLOCK_QUANT|MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,5},
+	{MACROBLOCK_QUANT|MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,5},
+	{MACROBLOCK_INTRA,5}, 
+	{MACROBLOCK_INTRA,5}
+};
+
+//
+// getbit
+//
+
+void CMPEG2Dec::Initialize_Buffer()
+{
+	Rdptr = Rdbfr + BUFFER_SIZE;
+	Rdmax = Rdptr;
+
+	if (SystemStream_Flag)
+	{
+		if (Rdptr >= Rdmax)
+			Next_Packet();
+		CurrentBfr = *Rdptr++ << 24;
+
+		if (Rdptr >= Rdmax)
+			Next_Packet();
+		CurrentBfr += *Rdptr++ << 16;
+
+		if (Rdptr >= Rdmax)
+			Next_Packet();
+		CurrentBfr += *Rdptr++ << 8;
+
+		if (Rdptr >= Rdmax)
+			Next_Packet();
+		CurrentBfr += *Rdptr++;
+
+		Fill_Next();
+	}
+	else
+	{
+		Fill_Buffer();
+
+		CurrentBfr = (*Rdptr << 24) + (*(Rdptr+1) << 16) + (*(Rdptr+2) << 8) + *(Rdptr+3);
+		Rdptr += 4;
+
+		Fill_Next();
+	}
+
+	BitsLeft = 32;
+}
+
+unsigned int CMPEG2Dec::Get_Bits_All(unsigned int N)
+{
+	N -= BitsLeft;
+	Val = (CurrentBfr << (32 - BitsLeft)) >> (32 - BitsLeft);
+
+	if (N != 0)
+		Val = (Val << N) + (NextBfr >> (32 - N));
+
+	CurrentBfr = NextBfr;
+	BitsLeft = 32 - N;
+	Fill_Next();
+
+	return Val;
+}
+
+void CMPEG2Dec::Flush_Buffer_All(unsigned int N)
+{
+	CurrentBfr = NextBfr;
+	BitsLeft = BitsLeft + 32 - N;
+	Fill_Next();
+}
+
+void CMPEG2Dec::Next_Packet()
+{
+	unsigned int code, Packet_Length, Packet_Header_Length;
+
+	for (;;)
+	{
+		code = Get_Short();
+		code = (code<<16) + Get_Short();
+
+		// remove system layer byte stuffing
+		while ((code & 0xffffff00) != 0x00000100)
+			code = (code<<8) + Get_Byte();
+
+		switch (code)
+		{
+			case PACK_START_CODE:
+				Rdptr += 8;
+				break;
+
+			case VIDEO_ELEMENTARY_STREAM:   
+				Packet_Length = Get_Short();
+				Rdmax = Rdptr + Packet_Length;
+
+				code = Get_Byte();
+
+				if ((code & 0xc0)==0x80)
+				{
+					code = Get_Byte();
+					Packet_Header_Length = Get_Byte();
+
+					Rdptr += Packet_Header_Length;
+					return;
+				}
+				else
+					Rdptr += Packet_Length-1;
+				break;
+
+			default:
+				if (code>=SYSTEM_START_CODE)
+				{
+					code = Get_Short();
+					Rdptr += code;
+				}
+				break;
+		}
+	}
+}
+
+void CMPEG2Dec::Fill_Buffer()
+{
+	Read = _read(Infile[File_Flag], Rdbfr, BUFFER_SIZE);
+
+	if (Read < BUFFER_SIZE)
+		Next_File();
+
+	if (KeyOp_Flag && (Rdbfr[20] & 0x10))
+	{
+		BufferOp(Rdbfr, lfsr0, lfsr1);
+		Rdbfr[20] &= ~0x10;
+	}
+
+	Rdptr = Rdbfr;
+
+	if (SystemStream_Flag)
+		Rdmax -= BUFFER_SIZE;
+}
+
+void CMPEG2Dec::Next_File()
+{
+	if (File_Flag < File_Limit-1)
+		File_Flag ++;
+
+	_lseeki64(Infile[File_Flag], 0, SEEK_SET);
+	_read(Infile[File_Flag], Rdbfr + Read, BUFFER_SIZE - Read);
+}
+
+
+unsigned int CMPEG2Dec::Show_Bits(unsigned int N)
+{
+	if (N <= BitsLeft)
+		return (CurrentBfr << (32 - BitsLeft)) >> (32 - N);
+	else
+	{
+		N -= BitsLeft;
+		return (((CurrentBfr << (32 - BitsLeft)) >> (32 - BitsLeft)) << N) + (NextBfr >> (32 - N));
+	}
+}
+
+unsigned int CMPEG2Dec::Get_Bits(unsigned int N)
+{
+	if (N < BitsLeft)
+	{
+		Val = (CurrentBfr << (32 - BitsLeft)) >> (32 - N);
+		BitsLeft -= N;
+		return Val;
+	}
+	else
+		return Get_Bits_All(N);
+}
+
+void CMPEG2Dec::Flush_Buffer(unsigned int N)
+{
+	if (N < BitsLeft)
+		BitsLeft -= N;
+	else
+		Flush_Buffer_All(N);	
+}
+
+void CMPEG2Dec::Fill_Next()
+{
+	if (SystemStream_Flag && Rdptr>=Rdmax-4)
+	{
+		if (Rdptr >= Rdmax)
+			Next_Packet();
+		NextBfr = Get_Byte() << 24;
+
+		if (Rdptr >= Rdmax)
+			Next_Packet();
+		NextBfr += Get_Byte() << 16;
+
+		if (Rdptr >= Rdmax)
+			Next_Packet();
+		NextBfr += Get_Byte() << 8;
+
+		if (Rdptr >= Rdmax)
+			Next_Packet();
+		NextBfr += Get_Byte();
+	}
+	else if (Rdptr < Rdbfr+BUFFER_SIZE-4)
+	{
+		NextBfr = (*Rdptr << 24) + (*(Rdptr+1) << 16) + (*(Rdptr+2) << 8) + *(Rdptr+3);
+		Rdptr += 4;
+	}
+	else
+	{
+		if (Rdptr >= Rdbfr+BUFFER_SIZE)
+			Fill_Buffer();
+		NextBfr = *Rdptr++ << 24;
+
+		if (Rdptr >= Rdbfr+BUFFER_SIZE)
+			Fill_Buffer();
+		NextBfr += *Rdptr++ << 16;
+
+		if (Rdptr >= Rdbfr+BUFFER_SIZE)
+			Fill_Buffer();
+		NextBfr += *Rdptr++ << 8;
+
+		if (Rdptr >= Rdbfr+BUFFER_SIZE)
+			Fill_Buffer();
+		NextBfr += *Rdptr++;
+	}
+}
+
+unsigned int CMPEG2Dec::Get_Byte()
+{
+	while (Rdptr >= (Rdbfr + BUFFER_SIZE))
+	{
+		Read = _read(Infile[File_Flag], Rdbfr, BUFFER_SIZE);
+
+		if (Read < BUFFER_SIZE)
+			Next_File();
+
+		if (KeyOp_Flag && (Rdbfr[20] & 0x10))
+		{
+			BufferOp(Rdbfr, lfsr0, lfsr1);
+			Rdbfr[20] &= ~0x10;
+		}
+
+		Rdptr -= BUFFER_SIZE;
+		Rdmax -= BUFFER_SIZE;
+	}
+
+	return *Rdptr++;
+}
+
+unsigned int CMPEG2Dec::Get_Short()
+{
+	unsigned int i = Get_Byte();
+	return (i<<8) + Get_Byte();
+}
+
+void CMPEG2Dec::next_start_code()
+{
+	Flush_Buffer(BitsLeft & 7);
+
+	while (Show_Bits(24) != 1)
+		Flush_Buffer(8);
+}
+
+//
+// gethdr
+//
+
+int CMPEG2Dec::Get_Hdr()
+{
+	for (;;)
+	{
+		/* look for next_start_code */
+		next_start_code();
+
+		switch (Get_Bits(32))
+		{
+			case SEQUENCE_HEADER_CODE:
+				sequence_header();
+				break;
+
+			case GROUP_START_CODE:
+				group_of_pictures_header();
+				break;
+
+			case PICTURE_START_CODE:
+				picture_header();
+				return 1;
+		}
+	}
+}
+
+/* decode group of pictures header */
+/* ISO/IEC 13818-2 section 6.2.2.6 */
+void CMPEG2Dec::group_of_pictures_header()
+{
+	int gop_hour;
+	int gop_minute;
+	int gop_sec;
+	int gop_frame;
+
+	int drop_flag;
+	int closed_gop;
+	int broken_link;
+
+	drop_flag   = Get_Bits(1);
+	gop_hour    = Get_Bits(5);
+	gop_minute  = Get_Bits(6);
+	Flush_Buffer(1);	// marker bit
+	gop_sec     = Get_Bits(6);
+	gop_frame	= Get_Bits(6);
+	closed_gop  = Get_Bits(1);
+	broken_link = Get_Bits(1);
+
+	extension_and_user_data();
+}
+
+/* decode picture header */
+/* ISO/IEC 13818-2 section 6.2.3 */
+void CMPEG2Dec::picture_header()
+{
+	int vbv_delay;
+	int full_pel_forward_vector;
+	int forward_f_code;
+	int full_pel_backward_vector;
+	int backward_f_code;
+	int Extra_Information_Byte_Count;
+
+	temporal_reference  = Get_Bits(10);
+	picture_coding_type = Get_Bits(3);
+	vbv_delay           = Get_Bits(16);
+
+	if (picture_coding_type==P_TYPE || picture_coding_type==B_TYPE)
+	{
+		full_pel_forward_vector = Get_Bits(1);
+		forward_f_code = Get_Bits(3);
+	}
+
+	if (picture_coding_type==B_TYPE)
+	{
+		full_pel_backward_vector = Get_Bits(1);
+		backward_f_code = Get_Bits(3);
+	}
+
+	Extra_Information_Byte_Count = extra_bit_information();
+	extension_and_user_data();
+}
+
+/* decode sequence header */
+void CMPEG2Dec::sequence_header()
+{
+	int frame_rate_code;
+	int vbv_buffer_size;
+	int aspect_ratio_information;
+	int bit_rate_value;
+
+	int constrained_parameters_flag;
+	int i;
+
+	horizontal_size             = Get_Bits(12);
+	vertical_size               = Get_Bits(12);
+	aspect_ratio_information    = Get_Bits(4);
+	frame_rate_code             = Get_Bits(4);
+	bit_rate_value              = Get_Bits(18);
+	Flush_Buffer(1);	// marker bit
+	vbv_buffer_size             = Get_Bits(10);
+	constrained_parameters_flag = Get_Bits(1);
+
+	if (load_intra_quantizer_matrix = Get_Bits(1))
+	{
+		for (i=0; i<64; i++)
+			intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
+	}
+	else
+	{
+		for (i=0; i<64; i++)
+			intra_quantizer_matrix[i] = default_intra_quantizer_matrix[i];
+	}
+
+	if (load_non_intra_quantizer_matrix = Get_Bits(1))
+	{
+		for (i=0; i<64; i++)
+			non_intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
+	}
+	else
+	{
+		for (i=0; i<64; i++)
+			non_intra_quantizer_matrix[i] = 16;
+	}
+
+	/* copy luminance to chrominance matrices */
+	for (i=0; i<64; i++)
+	{
+		chroma_intra_quantizer_matrix[i] = intra_quantizer_matrix[i];
+		chroma_non_intra_quantizer_matrix[i] = non_intra_quantizer_matrix[i];
+	}
+	extension_and_user_data();
+}
+
+/* decode slice header */
+/* ISO/IEC 13818-2 section 6.2.4 */
+int CMPEG2Dec::slice_header()
+{
+	int slice_vertical_position_extension;
+	int quantizer_scale_code;
+	int slice_picture_id_enable = 0;
+	int slice_picture_id = 0;
+	int extra_information_slice = 0;
+
+	slice_vertical_position_extension = vertical_size>2800 ? Get_Bits(3) : 0;
+
+	quantizer_scale_code = Get_Bits(5);
+	quantizer_scale = q_scale_type ? Non_Linear_quantizer_scale[quantizer_scale_code] : quantizer_scale_code<<1;
+
+	/* slice_id introduced in March 1995 as part of the video corridendum
+	   (after the IS was drafted in November 1994) */
+	if (Get_Bits(1))
+	{
+		Get_Bits(1);	// intra slice
+
+		slice_picture_id_enable = Get_Bits(1);
+		slice_picture_id = Get_Bits(6);
+
+		extra_information_slice = extra_bit_information();
+	}
+
+	return slice_vertical_position_extension;
+}
+
+/* decode extension and user data */
+/* ISO/IEC 13818-2 section 6.2.2.2 */
+void CMPEG2Dec::extension_and_user_data()
+{
+	int code, ext_ID;
+
+	next_start_code();
+
+	while ((code = Show_Bits(32))==EXTENSION_START_CODE || code==USER_DATA_START_CODE)
+	{
+		if (code==EXTENSION_START_CODE)
+		{
+			Flush_Buffer(32);
+			ext_ID = Get_Bits(4);
+
+			switch (ext_ID)
+			{
+				case SEQUENCE_EXTENSION_ID:
+					sequence_extension();
+					break;
+
+				case SEQUENCE_DISPLAY_EXTENSION_ID:
+					sequence_display_extension();
+					break;
+
+				case QUANT_MATRIX_EXTENSION_ID:
+					quant_matrix_extension();
+					break;
+
+				case PICTURE_DISPLAY_EXTENSION_ID:
+					picture_display_extension();
+					break;
+
+				case PICTURE_CODING_EXTENSION_ID:
+					picture_coding_extension();
+					break;
+
+				case COPYRIGHT_EXTENSION_ID:
+					copyright_extension();
+					break;
+			}
+			next_start_code();
+		}
+		else
+		{
+			Flush_Buffer(32);
+			next_start_code();
+		}
+	}
+}
+
+/* decode sequence extension */
+/* ISO/IEC 13818-2 section 6.2.2.3 */
+void CMPEG2Dec::sequence_extension()
+{
+	int profile_and_level_indication;
+	int low_delay;
+	int frame_rate_extension_n;
+	int frame_rate_extension_d;
+
+	int horizontal_size_extension;
+	int vertical_size_extension;
+	int bit_rate_extension;
+	int vbv_buffer_size_extension;
+
+	profile_and_level_indication = Get_Bits(8);
+	progressive_sequence         = Get_Bits(1);
+	chroma_format                = Get_Bits(2);
+	horizontal_size_extension    = Get_Bits(2);
+	vertical_size_extension      = Get_Bits(2);
+	bit_rate_extension           = Get_Bits(12);
+	Flush_Buffer(1);	// marker bit
+	vbv_buffer_size_extension    = Get_Bits(8);
+	low_delay                    = Get_Bits(1);
+ 
+	frame_rate_extension_n       = Get_Bits(2);
+	frame_rate_extension_d       = Get_Bits(5);
+
+	horizontal_size = (horizontal_size_extension<<12) | (horizontal_size&0x0fff);
+	vertical_size = (vertical_size_extension<<12) | (vertical_size&0x0fff);
+}
+
+/* decode sequence display extension */
+void CMPEG2Dec::sequence_display_extension()
+{
+	int video_format;  
+	int color_description;
+	int color_primaries;
+	int transfer_characteristics;
+	int matrix_coefficients;
+	int display_horizontal_size;
+	int display_vertical_size;
+
+	video_format      = Get_Bits(3);
+	color_description = Get_Bits(1);
+
+	if (color_description)
+	{
+		color_primaries          = Get_Bits(8);
+		transfer_characteristics = Get_Bits(8);
+		matrix_coefficients      = Get_Bits(8);
+	}
+
+	display_horizontal_size = Get_Bits(14);
+	Flush_Buffer(1);	// marker bit
+	display_vertical_size   = Get_Bits(14);
+}
+
+/* decode quant matrix entension */
+/* ISO/IEC 13818-2 section 6.2.3.2 */
+void CMPEG2Dec::quant_matrix_extension()
+{
+	int i;
+
+	if (load_intra_quantizer_matrix = Get_Bits(1))
+		for (i=0; i<64; i++)
+			chroma_intra_quantizer_matrix[scan[ZIG_ZAG][i]]
+				= intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
+
+	if (load_non_intra_quantizer_matrix = Get_Bits(1))
+		for (i=0; i<64; i++)
+			chroma_non_intra_quantizer_matrix[scan[ZIG_ZAG][i]]
+				= non_intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
+
+	if (load_chroma_intra_quantizer_matrix = Get_Bits(1))
+		for (i=0; i<64; i++)
+			chroma_intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
+
+	if (load_chroma_non_intra_quantizer_matrix = Get_Bits(1))
+		for (i=0; i<64; i++)
+			chroma_non_intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
+}
+
+/* decode picture display extension */
+/* ISO/IEC 13818-2 section 6.2.3.3. */
+void CMPEG2Dec::picture_display_extension()
+{
+	int frame_center_horizontal_offset[3];
+	int frame_center_vertical_offset[3];
+
+	int i;
+	int number_of_frame_center_offsets;
+
+	/* based on ISO/IEC 13818-2 section 6.3.12 
+	   (November 1994) Picture display extensions */
+
+	/* derive number_of_frame_center_offsets */
+	if (progressive_sequence)
+	{
+		if (repeat_first_field)
+		{
+			if (top_field_first)
+				number_of_frame_center_offsets = 3;
+			else
+				number_of_frame_center_offsets = 2;
+		}
+		else
+			number_of_frame_center_offsets = 1;
+	}
+	else
+	{
+		if (picture_structure!=FRAME_PICTURE)
+			number_of_frame_center_offsets = 1;
+		else
+		{
+			if (repeat_first_field)
+				number_of_frame_center_offsets = 3;
+			else
+				number_of_frame_center_offsets = 2;
+		}
+	}
+
+	/* now parse */
+	for (i=0; i<number_of_frame_center_offsets; i++)
+	{
+		frame_center_horizontal_offset[i] = Get_Bits(16);
+		Flush_Buffer(1);	// marker bit
+
+		frame_center_vertical_offset[i] = Get_Bits(16);
+		Flush_Buffer(1);	// marker bit
+	}
+}
+
+/* decode picture coding extension */
+void CMPEG2Dec::picture_coding_extension()
+{
+	int chroma_420_type;
+	int composite_display_flag;
+	int v_axis;
+	int field_sequence;
+	int sub_carrier;
+	int burst_amplitude;
+	int sub_carrier_phase;
+
+	f_code[0][0] = Get_Bits(4);
+	f_code[0][1] = Get_Bits(4);
+	f_code[1][0] = Get_Bits(4);
+	f_code[1][1] = Get_Bits(4);
+
+	intra_dc_precision         = Get_Bits(2);
+	picture_structure          = Get_Bits(2);
+	top_field_first            = Get_Bits(1);
+	frame_pred_frame_dct       = Get_Bits(1);
+	concealment_motion_vectors = Get_Bits(1);
+	q_scale_type			   = Get_Bits(1);
+	intra_vlc_format           = Get_Bits(1);
+	alternate_scan			   = Get_Bits(1);
+	repeat_first_field         = Get_Bits(1);
+	chroma_420_type            = Get_Bits(1);
+	progressive_frame          = Get_Bits(1);
+	composite_display_flag     = Get_Bits(1);
+
+	pf_current = progressive_frame;
+
+	if (composite_display_flag)
+	{
+		v_axis            = Get_Bits(1);
+		field_sequence    = Get_Bits(3);
+		sub_carrier       = Get_Bits(1);
+		burst_amplitude   = Get_Bits(7);
+		sub_carrier_phase = Get_Bits(8);
+	}
+}
+
+/* decode extra bit information */
+/* ISO/IEC 13818-2 section 6.2.3.4. */
+int CMPEG2Dec::extra_bit_information()
+{
+	int Byte_Count = 0;
+
+	while (Get_Bits(1))
+	{
+		Flush_Buffer(8);
+		Byte_Count ++;
+	}
+
+	return(Byte_Count);
+}
+
+/* Copyright extension */
+/* ISO/IEC 13818-2 section 6.2.3.6. */
+/* (header added in November, 1994 to the IS document) */
+void CMPEG2Dec::copyright_extension()
+{
+	int copyright_flag;
+	int copyright_identifier;
+	int original_or_copy;
+	int copyright_number_1;
+	int copyright_number_2;
+	int copyright_number_3;
+
+	int reserved_data;
+
+	copyright_flag =       Get_Bits(1); 
+	copyright_identifier = Get_Bits(8);
+	original_or_copy =     Get_Bits(1);
+  
+	/* reserved */
+	reserved_data = Get_Bits(7);
+
+	Flush_Buffer(1); // marker bit
+	copyright_number_1 =   Get_Bits(20);
+	Flush_Buffer(1); // marker bit
+	copyright_number_2 =   Get_Bits(22);
+	Flush_Buffer(1); // marker bit
+	copyright_number_3 =   Get_Bits(22);
+}
+
+//
+// getpic
+//
+
+static const unsigned char cc_table[12] = {
+	0, 0, 0, 0, 1, 2, 1, 2, 1, 2, 1, 2
+};
+
+void CMPEG2Dec::Decode_Picture(int ref, unsigned char *dst, int pitch)
+{
+	if (picture_structure==FRAME_PICTURE && Second_Field)
+		Second_Field = 0;
+
+	if (picture_coding_type!=B_TYPE)
+	{
+		pf_forward = pf_backward;
+		pf_backward = pf_current;
+	}
+
+	Update_Picture_Buffers();
+
+	picture_data();
+
+	if (ref && (picture_structure==FRAME_PICTURE || Second_Field))
+	{
+		if (picture_coding_type==B_TYPE)
+			assembleFrame(auxframe, pf_current, dst, pitch);
+		else
+			assembleFrame(forward_reference_frame, pf_forward, dst, pitch);
+	}
+
+	if (picture_structure!=FRAME_PICTURE)
+		Second_Field = !Second_Field;
+}
+
+/* reuse old picture buffers as soon as they are no longer needed */
+void CMPEG2Dec::Update_Picture_Buffers()
+{                           
+	int cc;              /* color component index */
+	unsigned char *tmp;  /* temporary swap pointer */
+
+	for (cc=0; cc<3; cc++)
+	{
+		/* B pictures  do not need to be save for future reference */
+		if (picture_coding_type==B_TYPE)
+			current_frame[cc] = auxframe[cc];
+		else
+		{
+			if (!Second_Field)
+			{
+				/* only update at the beginning of the coded frame */
+				tmp = forward_reference_frame[cc];
+
+				/* the previously decoded reference frame is stored coincident with the 
+				   location where the backward reference frame is stored (backwards 
+				   prediction is not needed in P pictures) */
+				forward_reference_frame[cc] = backward_reference_frame[cc];
+
+				/* update pointer for potential future B pictures */
+				backward_reference_frame[cc] = tmp;
+			}
+
+			/* can erase over old backward reference frame since it is not used
+			   in a P picture, and since any subsequent B pictures will use the 
+			   previously decoded I or P frame as the backward_reference_frame */
+			current_frame[cc] = backward_reference_frame[cc];
+		}
+
+	    if (picture_structure==BOTTOM_FIELD)
+			current_frame[cc] += (cc==0) ? Coded_Picture_Width : Chroma_Width;
+	}
+}
+
+/* decode all macroblocks of the current picture */
+/* stages described in ISO/IEC 13818-2 section 7 */
+void CMPEG2Dec::picture_data()
+{
+	int MBAmax;
+
+	/* number of macroblocks per picture */
+	MBAmax = mb_width*mb_height;
+
+	if (picture_structure!=FRAME_PICTURE)
+		MBAmax>>=1;
+
+	for (;;)
+		if (slice(MBAmax)<0)
+			return;
+}
+
+/* decode all macroblocks of the current picture */
+/* ISO/IEC 13818-2 section 6.3.16 */
+/* return 0 : go to next slice */
+/* return -1: go to next picture */
+int CMPEG2Dec::slice(int MBAmax)
+{
+	int MBA = 0, MBAinc =0, macroblock_type, motion_type, dct_type, ret;
+	int dc_dct_pred[3], PMV[2][2][2], motion_vertical_field_select[2][2], dmvector[2];
+
+	if ((ret=start_of_slice(&MBA, &MBAinc, dc_dct_pred, PMV))!=1)
+		return ret;
+
+	for (;;)
+	{
+		/* this is how we properly exit out of picture */
+		if (MBA>=MBAmax) return -1;		// all macroblocks decoded
+
+		if (MBAinc==0)
+		{
+			if (!Show_Bits(23) || Fault_Flag)	// next_start_code or fault
+			{
+resync:
+				Fault_Flag = 0;
+				return 0;	// trigger: go to next slice
+			}
+			else /* neither next_start_code nor Fault_Flag */
+			{
+				/* decode macroblock address increment */
+				MBAinc = Get_macroblock_address_increment();
+				if (Fault_Flag) goto resync;
+			}
+		}
+
+		if (MBAinc==1) /* not skipped */
+		{
+			if (!decode_macroblock(&macroblock_type, &motion_type, &dct_type, PMV,
+				dc_dct_pred, motion_vertical_field_select, dmvector))
+				goto resync;
+		}
+		else /* MBAinc!=1: skipped macroblock */
+			/* ISO/IEC 13818-2 section 7.6.6 */
+			skipped_macroblock(dc_dct_pred, PMV, &motion_type, motion_vertical_field_select, &macroblock_type);
+
+		/* ISO/IEC 13818-2 section 7.6 */
+		motion_compensation(MBA, macroblock_type, motion_type, PMV,
+							motion_vertical_field_select, dmvector, dct_type);
+
+		/* advance to next macroblock */
+		MBA++; MBAinc--;
+
+		if (MBA>=MBAmax) return -1;		// all macroblocks decoded
+	}
+}
+
+/* ISO/IEC 13818-2 section 6.3.17.1: Macroblock modes */
+void CMPEG2Dec::macroblock_modes(int *pmacroblock_type, int *pmotion_type,
+							 int *pmotion_vector_count, int *pmv_format,
+							 int *pdmv, int *pmvscale, int *pdct_type)
+{
+	int macroblock_type, motion_type = 0, motion_vector_count;
+	int mv_format, dmv, mvscale, dct_type;
+
+	/* get macroblock_type */
+	macroblock_type = Get_macroblock_type();
+	if (Fault_Flag) return;
+
+	/* get frame/field motion type */
+	if (macroblock_type & (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD))
+	{
+		if (picture_structure==FRAME_PICTURE)
+			motion_type = frame_pred_frame_dct ? MC_FRAME : Get_Bits(2);
+		else
+			motion_type = Get_Bits(2);
+    }
+	else if ((macroblock_type & MACROBLOCK_INTRA) && concealment_motion_vectors)
+		motion_type = (picture_structure==FRAME_PICTURE) ? MC_FRAME : MC_FIELD;
+
+	/* derive motion_vector_count, mv_format and dmv, (table 6-17, 6-18) */
+	if (picture_structure==FRAME_PICTURE)
+	{
+		motion_vector_count = (motion_type==MC_FIELD) ? 2 : 1;
+		mv_format = (motion_type==MC_FRAME) ? MV_FRAME : MV_FIELD;
+	}
+	else
+	{
+		motion_vector_count = (motion_type==MC_16X8) ? 2 : 1;
+		mv_format = MV_FIELD;
+	}
+	
+	dmv = (motion_type==MC_DMV); /* dual prime */
+
+	/*
+	   field mv predictions in frame pictures have to be scaled
+	   ISO/IEC 13818-2 section 7.6.3.1 Decoding the motion vectors
+	*/
+	mvscale = (mv_format==MV_FIELD && picture_structure==FRAME_PICTURE);
+
+	/* get dct_type (frame DCT / field DCT) */
+	dct_type = (picture_structure==FRAME_PICTURE) && (!frame_pred_frame_dct)
+				&& (macroblock_type & (MACROBLOCK_PATTERN|MACROBLOCK_INTRA)) ? Get_Bits(1) : 0;
+
+	/* return values */
+	*pmacroblock_type = macroblock_type;
+	*pmotion_type = motion_type;
+	*pmotion_vector_count = motion_vector_count;
+	*pmv_format = mv_format;
+	*pdmv = dmv;
+	*pmvscale = mvscale;
+	*pdct_type = dct_type;
+}
+
+/* move/add 8x8-Block from block[comp] to backward_reference_frame */
+/* copy reconstructed 8x8 block from block[comp] to current_frame[]
+   ISO/IEC 13818-2 section 7.6.8: Adding prediction and coefficient data
+   This stage also embodies some of the operations implied by:
+   - ISO/IEC 13818-2 section 7.6.7: Combining predictions
+   - ISO/IEC 13818-2 section 6.1.3: Macroblock
+*/
+void CMPEG2Dec::Add_Block(int count, int bx, int by, int dct_type, int addflag)
+{
+	static const __int64 mmmask_128 = 0x0080008000800080;
+
+	int comp, cc, iincr, bxh, byh;
+	unsigned char *rfp;
+	short *Block_Ptr;
+
+	for (comp=0; comp<count; comp++)
+	{
+		Block_Ptr = block[comp];
+		cc = cc_table[comp];
+
+		bxh = bx; byh = by;
+
+		if (cc==0)
+		{
+			if (picture_structure==FRAME_PICTURE)
+			{
+				if (dct_type)
+				{
+					rfp = current_frame[0] + Coded_Picture_Width*(by+((comp&2)>>1)) + bx + ((comp&1)<<3);
+					iincr = Coded_Picture_Width<<1;
+				}
+				else
+				{
+					rfp = current_frame[0] + Coded_Picture_Width*(by+((comp&2)<<2)) + bx + ((comp&1)<<3);
+					iincr = Coded_Picture_Width;
+				}
+			}
+			else
+			{
+				rfp = current_frame[0] + (Coded_Picture_Width<<1)*(by+((comp&2)<<2)) + bx + ((comp&1)<<3);
+				iincr = Coded_Picture_Width<<1;
+			}
+		}
+		else
+		{
+			if (chroma_format!=CHROMA444)
+				bxh >>= 1;
+			if (chroma_format==CHROMA420)
+				byh >>= 1;
+
+			if (picture_structure==FRAME_PICTURE)
+			{
+				if (dct_type && chroma_format!=CHROMA420)
+				{
+					/* field DCT coding */
+					rfp = current_frame[cc] + Chroma_Width*(byh+((comp&2)>>1)) + bxh + (comp&8);
+					iincr = Chroma_Width<<1;
+				}
+				else
+				{
+					/* frame DCT coding */
+					rfp = current_frame[cc] + Chroma_Width*(byh+((comp&2)<<2)) + bxh + (comp&8);
+					iincr = Chroma_Width;
+				}
+			}
+			else
+			{
+				/* field picture */
+				rfp = current_frame[cc] + (Chroma_Width<<1)*(byh+((comp&2)<<2)) + bxh + (comp&8);
+				iincr = Chroma_Width<<1;
+			}
+		}
+
+		if (addflag)
+		{
+			__asm
+			{
+				pxor		mm0, mm0
+				mov			eax, [rfp]
+				mov			ebx, [Block_Ptr]
+				mov			edi, 8
+addon:
+				movq		mm2, [ebx+8]
+
+				movq		mm3, [eax]
+				movq		mm4, mm3
+
+				movq		mm1, [ebx]
+				punpckhbw	mm3, mm0
+
+				paddsw		mm3, mm2
+				packuswb	mm3, mm0
+
+				punpcklbw	mm4, mm0
+				psllq		mm3, 32
+
+				paddsw		mm4, mm1
+				packuswb	mm4, mm0
+
+				por			mm3, mm4			
+				add			ebx, 16
+
+				dec			edi
+				movq		[eax], mm3
+
+				add			eax, [iincr]
+				cmp			edi, 0x00
+				jg			addon
+			}
+		}
+		else
+		{
+			__asm
+			{
+				mov			eax, [rfp]
+				mov			ebx, [Block_Ptr]
+				mov			edi, 8
+
+				pxor		mm0, mm0
+				movq		mm7, [mmmask_128]
+addoff:
+				movq		mm3, [ebx+8]
+				movq		mm4, [ebx]
+
+				paddsw		mm3, mm7
+				paddsw		mm4, mm7
+
+				packuswb	mm3, mm0
+				packuswb	mm4, mm0
+
+				psllq		mm3, 32
+				por			mm3, mm4
+			
+				add			ebx, 16
+				dec			edi
+
+				movq		[eax], mm3
+
+				add			eax, [iincr]
+				cmp			edi, 0x00
+				jg			addoff
+			}
+		}
+	}
+}
+
+/* set scratch pad macroblock to zero */
+void CMPEG2Dec::Clear_Block(int count)
+{
+	int comp;
+	short *Block_Ptr;
+
+	for (comp=0; comp<count; comp++)
+	{
+		Block_Ptr = block[comp];
+
+		__asm
+		{
+			mov			eax, [Block_Ptr];
+			pxor		mm0, mm0;
+			movq		[eax+0 ], mm0;
+			movq		[eax+8 ], mm0;
+			movq		[eax+16], mm0;
+			movq		[eax+24], mm0;
+			movq		[eax+32], mm0;
+			movq		[eax+40], mm0;
+			movq		[eax+48], mm0;
+			movq		[eax+56], mm0;
+			movq		[eax+64], mm0;
+			movq		[eax+72], mm0;
+			movq		[eax+80], mm0;
+			movq		[eax+88], mm0;
+			movq		[eax+96], mm0;
+			movq		[eax+104],mm0;
+			movq		[eax+112],mm0;
+			movq		[eax+120],mm0;
+		}
+	}
+}
+
+/* ISO/IEC 13818-2 section 7.6 */
+void CMPEG2Dec::motion_compensation(int MBA, int macroblock_type, int motion_type, 
+								int PMV[2][2][2], int motion_vertical_field_select[2][2],
+								int dmvector[2], int dct_type)
+{
+	int bx, by;
+	int comp;
+
+	/* derive current macroblock position within picture */
+	/* ISO/IEC 13818-2 section 6.3.1.6 and 6.3.1.7 */
+	bx = 16*(MBA%mb_width);
+	by = 16*(MBA/mb_width);
+
+	/* motion compensation */
+	if (!(macroblock_type & MACROBLOCK_INTRA))
+		form_predictions(bx, by, macroblock_type, motion_type, PMV, 
+			motion_vertical_field_select, dmvector);
+
+	switch (IDCT_Flag)
+	{
+		case IDCT_MMX:
+			for (comp=0; comp<block_count; comp++)
+				MMX_IDCT(block[comp]);
+			break;
+
+		case IDCT_SSEMMX:
+			for (comp=0; comp<block_count; comp++)
+				SSEMMX_IDCT(block[comp]);
+			break;
+
+		case IDCT_FPU:
+			__asm emms;
+			for (comp=0; comp<block_count; comp++)
+				FPU_IDCT(block[comp]);
+			break;
+
+		case IDCT_REF:
+			__asm emms;
+			for (comp=0; comp<block_count; comp++)
+				REF_IDCT(block[comp]);
+			break;
+	}
+
+	Add_Block(block_count, bx, by, dct_type, (macroblock_type & MACROBLOCK_INTRA)==0);
+}
+
+/* ISO/IEC 13818-2 section 7.6.6 */
+void CMPEG2Dec::skipped_macroblock(int dc_dct_pred[3], int PMV[2][2][2], int *motion_type, 
+							   int motion_vertical_field_select[2][2], int *macroblock_type)
+{
+	Clear_Block(block_count);
+
+	/* reset intra_dc predictors */
+	/* ISO/IEC 13818-2 section 7.2.1: DC coefficients in intra blocks */
+	dc_dct_pred[0]=dc_dct_pred[1]=dc_dct_pred[2]=0;
+
+	/* reset motion vector predictors */
+	/* ISO/IEC 13818-2 section 7.6.3.4: Resetting motion vector predictors */
+	if (picture_coding_type==P_TYPE)
+		PMV[0][0][0]=PMV[0][0][1]=PMV[1][0][0]=PMV[1][0][1]=0;
+
+	/* derive motion_type */
+	if (picture_structure==FRAME_PICTURE)
+		*motion_type = MC_FRAME;
+	else
+	{
+		*motion_type = MC_FIELD;
+		motion_vertical_field_select[0][0] = motion_vertical_field_select[0][1] = 
+			(picture_structure==BOTTOM_FIELD);
+	}
+
+	/* clear MACROBLOCK_INTRA */
+	*macroblock_type&= ~MACROBLOCK_INTRA;
+}
+
+/* return==-1 means go to next picture */
+/* the expression "start of slice" is used throughout the normative
+   body of the MPEG specification */
+int CMPEG2Dec::start_of_slice(int *MBA, int *MBAinc,
+						  int dc_dct_pred[3], int PMV[2][2][2])
+{
+	unsigned int code;
+	int slice_vert_pos_ext;
+
+	next_start_code();
+	code = Get_Bits(32);
+
+	if (code<SLICE_START_CODE_MIN || code>SLICE_START_CODE_MAX)
+	{
+		// only slice headers are allowed in picture_data
+		Fault_Flag = 10;
+		return -1;
+	}
+
+	/* decode slice header (may change quantizer_scale) */
+	slice_vert_pos_ext = slice_header();
+
+	/* decode macroblock address increment */
+	*MBAinc = Get_macroblock_address_increment();
+	if (Fault_Flag) return -1;
+
+	/* set current location */
+	/* NOTE: the arithmetic used to derive macroblock_address below is
+	   equivalent to ISO/IEC 13818-2 section 6.3.17: Macroblock */
+	*MBA = ((slice_vert_pos_ext<<7) + (code&255) - 1)*mb_width + *MBAinc - 1;
+	*MBAinc = 1;	// first macroblock in slice: not skipped
+
+	/* reset all DC coefficient and motion vector predictors */
+	/* ISO/IEC 13818-2 section 7.2.1: DC coefficients in intra blocks */
+	dc_dct_pred[0]=dc_dct_pred[1]=dc_dct_pred[2]=0;
+  
+	/* ISO/IEC 13818-2 section 7.6.3.4: Resetting motion vector predictors */
+	PMV[0][0][0]=PMV[0][0][1]=PMV[1][0][0]=PMV[1][0][1]=0;
+	PMV[0][1][0]=PMV[0][1][1]=PMV[1][1][0]=PMV[1][1][1]=0;
+
+	/* successfull: trigger decode macroblocks in slice */
+	return 1;
+}
+
+/* ISO/IEC 13818-2 sections 7.2 through 7.5 */
+int CMPEG2Dec::decode_macroblock(int *macroblock_type, int *motion_type, int *dct_type,
+							 int PMV[2][2][2], int dc_dct_pred[3], 
+							 int motion_vertical_field_select[2][2], int dmvector[2])
+{
+	int quantizer_scale_code, comp, motion_vector_count, mv_format; 
+	int dmv, mvscale, coded_block_pattern;
+
+	/* ISO/IEC 13818-2 section 6.3.17.1: Macroblock modes */
+	macroblock_modes(macroblock_type, motion_type, &motion_vector_count, &mv_format,
+					 &dmv, &mvscale, dct_type);
+	if (Fault_Flag) return 0;	// trigger: go to next slice
+
+	if (*macroblock_type & MACROBLOCK_QUANT)
+	{
+		quantizer_scale_code = Get_Bits(5);
+
+		/* ISO/IEC 13818-2 section 7.4.2.2: Quantizer scale factor */
+		quantizer_scale = q_scale_type ?
+		Non_Linear_quantizer_scale[quantizer_scale_code] : (quantizer_scale_code << 1);
+	}
+
+	/* ISO/IEC 13818-2 section 6.3.17.2: Motion vectors */
+	/* decode forward motion vectors */
+	if ((*macroblock_type & MACROBLOCK_MOTION_FORWARD) 
+		|| ((*macroblock_type & MACROBLOCK_INTRA) && concealment_motion_vectors))
+		motion_vectors(PMV, dmvector, motion_vertical_field_select, 0,
+		motion_vector_count, mv_format, f_code[0][0]-1, f_code[0][1]-1, dmv, mvscale);
+	if (Fault_Flag) return 0;	// trigger: go to next slice
+
+	/* decode backward motion vectors */
+	if (*macroblock_type & MACROBLOCK_MOTION_BACKWARD)
+		motion_vectors(PMV, dmvector, motion_vertical_field_select, 1,
+		motion_vector_count,mv_format, f_code[1][0]-1, f_code[1][1]-1, 0, mvscale);
+	if (Fault_Flag) return 0;  // trigger: go to next slice
+
+	if ((*macroblock_type & MACROBLOCK_INTRA) && concealment_motion_vectors)
+		Flush_Buffer(1);	// marker bit
+
+	/* macroblock_pattern */
+	/* ISO/IEC 13818-2 section 6.3.17.4: Coded block pattern */
+	if (*macroblock_type & MACROBLOCK_PATTERN)
+	{
+		coded_block_pattern = Get_coded_block_pattern();
+
+		if (chroma_format==CHROMA422)
+			coded_block_pattern = (coded_block_pattern<<2) | Get_Bits(2);
+		else if (chroma_format==CHROMA444)
+			coded_block_pattern = (coded_block_pattern<<6) | Get_Bits(6);
+	}
+	else
+	    coded_block_pattern = (*macroblock_type & MACROBLOCK_INTRA) ? (1<<block_count)-1 : 0;
+
+	if (Fault_Flag) return 0;	// trigger: go to next slice
+
+	Clear_Block(block_count);
+
+	/* decode blocks */
+	for (comp=0; comp<block_count; comp++)
+	{
+		if (coded_block_pattern & (1<<(block_count-1-comp)))
+		{
+			if (*macroblock_type & MACROBLOCK_INTRA)
+				Decode_MPEG2_Intra_Block(comp, dc_dct_pred);
+			else
+				Decode_MPEG2_Non_Intra_Block(comp);
+			if (Fault_Flag) return 0;	// trigger: go to next slice
+		}
+	}
+
+	/* reset intra_dc predictors */
+	/* ISO/IEC 13818-2 section 7.2.1: DC coefficients in intra blocks */
+	if (!(*macroblock_type & MACROBLOCK_INTRA))
+		dc_dct_pred[0]=dc_dct_pred[1]=dc_dct_pred[2]=0;
+
+	/* reset motion vector predictors */
+	if ((*macroblock_type & MACROBLOCK_INTRA) && !concealment_motion_vectors)
+	{
+		/* intra mb without concealment motion vectors */
+		/* ISO/IEC 13818-2 section 7.6.3.4: Resetting motion vector predictors */
+		PMV[0][0][0]=PMV[0][0][1]=PMV[1][0][0]=PMV[1][0][1]=0;
+		PMV[0][1][0]=PMV[0][1][1]=PMV[1][1][0]=PMV[1][1][1]=0;
+	}
+
+	/* special "No_MC" macroblock_type case */
+	/* ISO/IEC 13818-2 section 7.6.3.5: Prediction in P pictures */
+	if ((picture_coding_type==P_TYPE) 
+		&& !(*macroblock_type & (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_INTRA)))
+	{
+		/* non-intra mb without forward mv in a P picture */
+		/* ISO/IEC 13818-2 section 7.6.3.4: Resetting motion vector predictors */
+		PMV[0][0][0]=PMV[0][0][1]=PMV[1][0][0]=PMV[1][0][1]=0;
+
+		/* derive motion_type */
+		/* ISO/IEC 13818-2 section 6.3.17.1: Macroblock modes, frame_motion_type */
+		if (picture_structure==FRAME_PICTURE)
+			*motion_type = MC_FRAME;
+		else
+		{
+			*motion_type = MC_FIELD;
+			motion_vertical_field_select[0][0] = (picture_structure==BOTTOM_FIELD);
+		}
+	}
+	/* successfully decoded macroblock */
+	return 1 ;
+}
+
+/* decode one intra coded MPEG-2 block */
+void CMPEG2Dec::Decode_MPEG2_Intra_Block(int comp, int dc_dct_pred[])
+{
+	int val, i, j, sign, *qmat;
+	unsigned int code;
+	DCTtab *tab;
+	short *bp;
+
+	bp = block[comp];
+	qmat = (comp<4 || chroma_format==CHROMA420) 
+		? intra_quantizer_matrix : chroma_intra_quantizer_matrix;
+
+	/* ISO/IEC 13818-2 section 7.2.1: decode DC coefficients */
+	switch (cc_table[comp])
+	{
+		case 0:
+			val = (dc_dct_pred[0]+= Get_Luma_DC_dct_diff());
+			break;
+
+		case 1:
+			val = (dc_dct_pred[1]+= Get_Chroma_DC_dct_diff());
+			break;
+
+		case 2:
+			val = (dc_dct_pred[2]+= Get_Chroma_DC_dct_diff());
+			break;
+	}
+
+	bp[0] = val << (3-intra_dc_precision);
+
+	/* decode AC coefficients */
+	for (i=1; ; i++)
+	{
+		code = Show_Bits(16);
+
+		if (code>=16384 && !intra_vlc_format)
+			tab = &DCTtabnext[(code>>12)-4];
+		else if (code>=1024)
+		{
+			if (intra_vlc_format)
+				tab = &DCTtab0a[(code>>8)-4];
+			else
+				tab = &DCTtab0[(code>>8)-4];
+		}
+		else if (code>=512)
+		{
+			if (intra_vlc_format)
+				tab = &DCTtab1a[(code>>6)-8];
+			else
+				tab = &DCTtab1[(code>>6)-8];
+		}
+		else if (code>=256)
+			tab = &DCTtab2[(code>>4)-16];
+		else if (code>=128)
+			tab = &DCTtab3[(code>>3)-16];
+		else if (code>=64)
+			tab = &DCTtab4[(code>>2)-16];
+		else if (code>=32)
+			tab = &DCTtab5[(code>>1)-16];
+		else if (code>=16)
+			tab = &DCTtab6[code-16];
+		else
+		{
+			Fault_Flag = 1;
+			return;
+		}
+
+		Flush_Buffer(tab->len);
+
+		if (tab->run<64)
+		{
+			i+= tab->run;
+			val = tab->level;
+			sign = Get_Bits(1);
+		}
+		else if (tab->run==64) /* end_of_block */
+			return;
+		else /* escape */
+		{
+			i+= Get_Bits(6);
+			val = Get_Bits(12);
+
+			if (sign = (val>=2048))
+				val = 4096 - val;
+		}
+
+		j = scan[alternate_scan][i];
+
+		val = (val * quantizer_scale * qmat[j]) >> 4;
+		bp[j] = sign ? -val : val;
+	}
+}
+
+/* decode one non-intra coded MPEG-2 block */
+void CMPEG2Dec::Decode_MPEG2_Non_Intra_Block(int comp)
+{
+	int val, i, j, sign, *qmat;
+	unsigned int code;
+	DCTtab *tab;
+	short *bp;
+
+	bp = block[comp];
+	qmat = (comp<4 || chroma_format==CHROMA420) 
+		? non_intra_quantizer_matrix : chroma_non_intra_quantizer_matrix;
+
+	/* decode AC coefficients */
+	for (i=0; ; i++)
+	{
+		code = Show_Bits(16);
+
+		if (code>=16384)
+		{
+			if (i==0)
+				tab = &DCTtabfirst[(code>>12)-4];
+			else
+				tab = &DCTtabnext[(code>>12)-4];
+		}
+		else if (code>=1024)
+			tab = &DCTtab0[(code>>8)-4];
+		else if (code>=512)
+			tab = &DCTtab1[(code>>6)-8];
+		else if (code>=256)
+			tab = &DCTtab2[(code>>4)-16];
+		else if (code>=128)
+			tab = &DCTtab3[(code>>3)-16];
+		else if (code>=64)
+			tab = &DCTtab4[(code>>2)-16];
+		else if (code>=32)
+			tab = &DCTtab5[(code>>1)-16];
+		else if (code>=16)
+			tab = &DCTtab6[code-16];
+		else
+		{
+			Fault_Flag = 1;
+			return;
+		}
+
+		Flush_Buffer(tab->len);
+
+		if (tab->run<64)
+		{
+			i+= tab->run;
+			val = tab->level;
+			sign = Get_Bits(1);
+		}
+		else if (tab->run==64) /* end_of_block */
+			return;
+		else /* escape */
+		{
+			i+= Get_Bits(6);
+			val = Get_Bits(12);
+
+			if (sign = (val>=2048))
+				val = 4096 - val;
+		}
+
+		j = scan[alternate_scan][i];
+
+		val = (((val<<1)+1) * quantizer_scale * qmat[j]) >> 5;
+		bp[j] = sign ? -val : val;
+	}
+}
+
+int CMPEG2Dec::Get_macroblock_type()
+{
+	int macroblock_type;
+
+	switch (picture_coding_type)
+	{
+		case I_TYPE:
+			macroblock_type = Get_I_macroblock_type();
+			break;
+
+		case P_TYPE:
+			macroblock_type = Get_P_macroblock_type();
+			break;
+
+		case B_TYPE:
+			macroblock_type = Get_B_macroblock_type();
+			break;
+	}
+
+	return macroblock_type;
+}
+
+int CMPEG2Dec::Get_I_macroblock_type()
+{
+	if (Get_Bits(1))
+		return 1;
+
+	if (!Get_Bits(1))
+		Fault_Flag = 2;
+
+	return 17;
+}
+
+int CMPEG2Dec::Get_P_macroblock_type()
+{
+	int code;
+
+	if ((code = Show_Bits(6))>=8)
+	{
+		code >>= 3;
+		Flush_Buffer(PMBtab0[code].len);
+
+		return PMBtab0[code].val;
+	}
+
+	if (code==0)
+	{
+		Fault_Flag = 2;
+		return 0;
+	}
+
+	Flush_Buffer(PMBtab1[code].len);
+
+	return PMBtab1[code].val;
+}
+
+int CMPEG2Dec::Get_B_macroblock_type()
+{
+	int code;
+
+	if ((code = Show_Bits(6))>=8)
+	{
+		code >>= 2;
+		Flush_Buffer(BMBtab0[code].len);
+
+		return BMBtab0[code].val;
+	}
+
+	if (code==0)
+	{
+		Fault_Flag = 2;
+		return 0;
+	}
+
+	Flush_Buffer(BMBtab1[code].len);
+
+	return BMBtab1[code].val;
+}
+
+int CMPEG2Dec::Get_coded_block_pattern()
+{
+	int code;
+
+	if ((code = Show_Bits(9))>=128)
+	{
+		code >>= 4;
+		Flush_Buffer(CBPtab0[code].len);
+
+		return CBPtab0[code].val;
+	}
+
+	if (code>=8)
+	{
+		code >>= 1;
+		Flush_Buffer(CBPtab1[code].len);
+
+		return CBPtab1[code].val;
+	}
+
+	if (code<1)
+	{
+		Fault_Flag = 3;
+		return 0;
+	}
+
+	Flush_Buffer(CBPtab2[code].len);
+
+	return CBPtab2[code].val;
+}
+
+int CMPEG2Dec::Get_macroblock_address_increment()
+{
+	int code, val;
+
+	val = 0;
+
+	while ((code = Show_Bits(11))<24)
+	{
+		if (code!=15) /* if not macroblock_stuffing */
+		{
+			if (code==8) /* if macroblock_escape */
+				val+= 33;
+			else
+			{
+				Fault_Flag = 4;
+				return 1;
+			}
+		}
+		Flush_Buffer(11);
+	}
+
+	/* macroblock_address_increment == 1 */
+	/* ('1' is in the MSB position of the lookahead) */
+	if (code>=1024)
+	{
+		Flush_Buffer(1);
+		return val + 1;
+	}
+
+	/* codes 00010 ... 011xx */
+	if (code>=128)
+	{
+		/* remove leading zeros */
+		code >>= 6;
+		Flush_Buffer(MBAtab1[code].len);
+    
+		return val + MBAtab1[code].val;
+	}
+  
+	/* codes 00000011000 ... 0000111xxxx */
+	code-= 24; /* remove common base */
+	Flush_Buffer(MBAtab2[code].len);
+
+	return val + MBAtab2[code].val;
+}
+
+/*
+   parse VLC and perform dct_diff arithmetic.
+   MPEG-2:  ISO/IEC 13818-2 section 7.2.1 
+
+   Note: the arithmetic here is presented more elegantly than
+   the spec, yet the results, dct_diff, are the same.
+*/
+int CMPEG2Dec::Get_Luma_DC_dct_diff()
+{
+	int code, size, dct_diff;
+
+	/* decode length */
+	code = Show_Bits(5);
+
+	if (code<31)
+	{
+		size = DClumtab0[code].val;
+		Flush_Buffer(DClumtab0[code].len);
+	}
+	else
+	{
+		code = Show_Bits(9) - 0x1f0;
+		size = DClumtab1[code].val;
+		Flush_Buffer(DClumtab1[code].len);
+	}
+
+	if (size==0)
+		dct_diff = 0;
+	else
+	{
+		dct_diff = Get_Bits(size);
+
+		if ((dct_diff & (1<<(size-1)))==0)
+			dct_diff-= (1<<size) - 1;
+	}
+
+	return dct_diff;
+}
+
+int CMPEG2Dec::Get_Chroma_DC_dct_diff()
+{
+	int code, size, dct_diff;
+
+	/* decode length */
+	code = Show_Bits(5);
+
+	if (code<31)
+	{
+		size = DCchromtab0[code].val;
+		Flush_Buffer(DCchromtab0[code].len);
+	}
+	else
+	{
+		code = Show_Bits(10) - 0x3e0;
+		size = DCchromtab1[code].val;
+		Flush_Buffer(DCchromtab1[code].len);
+	}
+
+	if (size==0)
+		dct_diff = 0;
+	else
+	{
+		dct_diff = Get_Bits(size);
+
+		if ((dct_diff & (1<<(size-1)))==0)
+			dct_diff-= (1<<size) - 1;
+	}
+
+	return dct_diff;
+}
+
+/*
+static int currentfield;
+static unsigned char **predframe;
+static int DMV[2][2];
+static int stw;
+*/
+
+void CMPEG2Dec::form_predictions(int bx, int by, int macroblock_type, int motion_type,
+					  int PMV[2][2][2], int motion_vertical_field_select[2][2],
+					  int dmvector[2])
+{
+	int currentfield;
+	unsigned char **predframe;
+	int DMV[2][2];
+	int stw;
+	
+	stw = 0;
+
+	if ((macroblock_type & MACROBLOCK_MOTION_FORWARD) || (picture_coding_type==P_TYPE))
+	{
+		if (picture_structure==FRAME_PICTURE)
+		{
+			if ((motion_type==MC_FRAME) || !(macroblock_type & MACROBLOCK_MOTION_FORWARD))
+			{
+				/* frame-based prediction (broken into top and bottom halves
+				   for spatial scalability prediction purposes) */
+				form_prediction(forward_reference_frame, 0, current_frame, 0, Coded_Picture_Width, 
+					Coded_Picture_Width<<1, 16, 8, bx, by, PMV[0][0][0], PMV[0][0][1], stw);
+
+				form_prediction(forward_reference_frame, 1, current_frame, 1, Coded_Picture_Width, 
+					Coded_Picture_Width<<1, 16, 8, bx, by, PMV[0][0][0], PMV[0][0][1], stw);
+			}
+			else if (motion_type==MC_FIELD) /* field-based prediction */
+			{
+				/* top field prediction */
+				form_prediction(forward_reference_frame, motion_vertical_field_select[0][0], 
+					current_frame, 0, Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8,
+					bx, by>>1, PMV[0][0][0], PMV[0][0][1]>>1, stw);
+
+				/* bottom field prediction */
+				form_prediction(forward_reference_frame, motion_vertical_field_select[1][0], 
+					current_frame, 1, Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8,
+					bx, by>>1, PMV[1][0][0], PMV[1][0][1]>>1, stw);
+			}
+			else if (motion_type==MC_DMV) /* dual prime prediction */
+			{
+				/* calculate derived motion vectors */
+				Dual_Prime_Arithmetic(DMV, dmvector, PMV[0][0][0], PMV[0][0][1]>>1);
+
+				/* predict top field from top field */
+				form_prediction(forward_reference_frame, 0, current_frame, 0, 
+					Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8, bx, by>>1,
+					PMV[0][0][0], PMV[0][0][1]>>1, 0);
+
+				/* predict and add to top field from bottom field */
+				form_prediction(forward_reference_frame, 1, current_frame, 0,
+					Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8, bx, by>>1,
+					DMV[0][0], DMV[0][1], 1);
+
+				/* predict bottom field from bottom field */
+				form_prediction(forward_reference_frame, 1, current_frame, 1,
+					Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8, bx, by>>1,
+					PMV[0][0][0], PMV[0][0][1]>>1, 0);
+
+				/* predict and add to bottom field from top field */
+				form_prediction(forward_reference_frame, 0, current_frame, 1,
+					Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8, bx, by>>1,
+					DMV[1][0], DMV[1][1], 1);
+			}
+			else
+				Fault_Flag = 5;
+		}
+		else
+		{
+			/* field picture */
+			currentfield = (picture_structure==BOTTOM_FIELD);
+
+			/* determine which frame to use for prediction */
+			if (picture_coding_type==P_TYPE && Second_Field && currentfield!=motion_vertical_field_select[0][0])
+				predframe = backward_reference_frame;
+			else
+				predframe = forward_reference_frame;
+
+			if ((motion_type==MC_FIELD) || !(macroblock_type & MACROBLOCK_MOTION_FORWARD))
+			{
+				form_prediction(predframe, motion_vertical_field_select[0][0], current_frame, 0, 
+					Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 16, bx, by,
+					PMV[0][0][0], PMV[0][0][1], stw);
+			}
+			else if (motion_type==MC_16X8)
+			{
+				form_prediction(predframe, motion_vertical_field_select[0][0], current_frame, 0, 
+					Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8, bx, by,
+					PMV[0][0][0], PMV[0][0][1], stw);
+
+				if (picture_coding_type==P_TYPE && Second_Field && currentfield!=motion_vertical_field_select[1][0])
+					predframe = backward_reference_frame;
+				else
+					predframe = forward_reference_frame;
+
+				form_prediction(predframe, motion_vertical_field_select[1][0], current_frame, 
+					0, Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8, bx, by+8,
+					PMV[1][0][0], PMV[1][0][1], stw);
+			}
+			else if (motion_type==MC_DMV)
+			{
+				if (Second_Field)
+					predframe = backward_reference_frame;
+				else
+					predframe = forward_reference_frame;
+
+				/* calculate derived motion vectors */
+				Dual_Prime_Arithmetic(DMV, dmvector, PMV[0][0][0], PMV[0][0][1]);
+
+				/* predict from field of same parity */
+				form_prediction(forward_reference_frame, currentfield, current_frame, 0, 
+					Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 16, bx, by,
+					PMV[0][0][0], PMV[0][0][1], 0);
+
+				/* predict from field of opposite parity */
+				form_prediction(predframe, !currentfield, current_frame, 0,
+					Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 16, bx, by,
+					DMV[0][0], DMV[0][1], 1);
+			}
+			else
+				Fault_Flag = 5;
+		}
+
+		stw = 1;
+	}
+
+	if (macroblock_type & MACROBLOCK_MOTION_BACKWARD)
+	{
+		if (picture_structure==FRAME_PICTURE)
+		{
+			if (motion_type==MC_FRAME)
+			{
+				/* frame-based prediction */
+				form_prediction(backward_reference_frame, 0, current_frame, 0,
+					Coded_Picture_Width, Coded_Picture_Width<<1, 16, 8, bx, by,
+					PMV[0][1][0], PMV[0][1][1], stw);
+
+				form_prediction(backward_reference_frame, 1, current_frame, 1,
+					Coded_Picture_Width, Coded_Picture_Width<<1, 16, 8, bx, by,
+					PMV[0][1][0], PMV[0][1][1], stw);
+			}
+			else /* field-based prediction */
+			{
+				/* top field prediction */
+				form_prediction(backward_reference_frame, motion_vertical_field_select[0][1], 
+					current_frame, 0, Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8,
+					bx, by>>1, PMV[0][1][0], PMV[0][1][1]>>1, stw);
+
+				/* bottom field prediction */
+				form_prediction(backward_reference_frame, motion_vertical_field_select[1][1], 
+					current_frame, 1, Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8,
+					bx, by>>1, PMV[1][1][0], PMV[1][1][1]>>1, stw);
+			}
+		}
+		else
+		{
+			/* field picture */
+			if (motion_type==MC_FIELD)
+			{
+				/* field-based prediction */
+				form_prediction(backward_reference_frame, motion_vertical_field_select[0][1], 
+					current_frame, 0, Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 16,
+					bx, by, PMV[0][1][0], PMV[0][1][1], stw);
+			}
+			else if (motion_type==MC_16X8)
+			{
+				form_prediction(backward_reference_frame, motion_vertical_field_select[0][1],
+					current_frame, 0, Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8,
+					bx, by, PMV[0][1][0], PMV[0][1][1], stw);
+
+				form_prediction(backward_reference_frame, motion_vertical_field_select[1][1],
+					current_frame, 0, Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8,
+					bx, by+8, PMV[1][1][0], PMV[1][1][1], stw);
+			}
+			else
+				Fault_Flag = 5;
+		}
+	}
+}
+
+void CMPEG2Dec::form_prediction(unsigned char *src[], int sfield, unsigned char *dst[],
+							int dfield, int lx, int lx2, int w, int h, int x, int y,
+							int dx, int dy, int average_flag)
+{
+	form_component_prediction(src[0]+(sfield?lx2>>1:0), dst[0]+(dfield?lx2>>1:0),
+		lx, lx2, w, h, x, y, dx, dy, average_flag);
+
+	if (chroma_format!=CHROMA444)
+	{
+		lx>>=1; lx2>>=1; w>>=1; x>>=1; dx/=2;
+	}
+
+	if (chroma_format==CHROMA420)
+	{
+		h>>=1; y>>=1; dy/=2;
+	}
+
+	/* Cb */
+	form_component_prediction(src[1]+(sfield?lx2>>1:0), dst[1]+(dfield?lx2>>1:0),
+		lx, lx2, w, h, x, y, dx, dy, average_flag);
+
+	/* Cr */
+	form_component_prediction(src[2]+(sfield?lx2>>1:0), dst[2]+(dfield?lx2>>1:0),
+		lx, lx2, w, h, x, y, dx, dy, average_flag);
+}
+
+/* ISO/IEC 13818-2 section 7.6.4: Forming predictions */
+void CMPEG2Dec::form_component_prediction(unsigned char *src, unsigned char *dst,
+										  int lx, int lx2, int w, int h, int x, int y,
+										  int dx, int dy, int average_flag)
+{
+	static const __int64 mmmask_0001 = 0x0001000100010001;
+	static const __int64 mmmask_0002 = 0x0002000200020002;
+	static const __int64 mmmask_0003 = 0x0003000300030003;
+	static const __int64 mmmask_0006 = 0x0006000600060006;
+
+	unsigned char *s = src + lx * (y + (dy>>1)) + x + (dx>>1);
+	unsigned char *d = dst + lx * y + x;
+	int flag = (average_flag<<2) + ((dx & 1)<<1) + (dy & 1);
+
+	switch (flag)
+	{
+		case 0:
+			// d[i] = s[i];
+			__asm
+			{
+				mov			eax, [s]
+				mov			ebx, [d]
+				mov			esi, 0x00
+				mov			edi, [h]
+mc0:
+				movq		mm1, [eax+esi]
+				add			esi, 0x08
+				cmp			esi, [w]
+				movq		[ebx+esi-8], mm1
+				jl			mc0
+
+				add			eax, [lx2]
+				add			ebx, [lx2]
+				dec			edi
+				mov			esi, 0x00
+				cmp			edi, 0x00
+				jg			mc0
+			}
+			break;
+
+		case 1:
+			// d[i] = (s[i]+s[i+lx]+1)>>1;
+			__asm
+			{
+				pxor		mm0, mm0
+				movq		mm7, [mmmask_0001]
+				mov			eax, [s]
+				mov			ebx, [d]
+				mov			ecx, eax
+				add			ecx, [lx]
+				mov			esi, 0x00
+				mov			edi, [h]
+mc1:
+				movq		mm1, [eax+esi]
+				movq		mm2, [ecx+esi]
+
+				movq		mm3, mm1
+				movq		mm4, mm2
+
+				punpcklbw	mm1, mm0
+				punpckhbw	mm3, mm0
+				punpcklbw	mm2, mm0
+				punpckhbw	mm4, mm0
+
+				paddsw		mm1, mm2
+				paddsw		mm3, mm4
+
+				paddsw		mm1, mm7
+				paddsw		mm3, mm7
+
+				psrlw		mm1, 1
+				psrlw		mm3, 1
+
+				packuswb	mm1, mm0
+				packuswb	mm3, mm0
+
+				psllq		mm3, 32
+				por			mm1, mm3
+
+				add			esi, 0x08
+				cmp			esi, [w]
+				movq		[ebx+esi-8], mm1
+				jl			mc1
+
+				add			eax, [lx2]
+				add			ebx, [lx2]
+				add			ecx, [lx2]
+				dec			edi
+				mov			esi, 0x00
+				cmp			edi, 0x00
+				jg			mc1
+			}
+			break;
+
+		case 2:
+			// d[i] = (s[i]+s[i+1]+1)>>1;
+			__asm
+			{
+				pxor		mm0, mm0
+				movq		mm7, [mmmask_0001]
+				mov			eax, [s]
+				mov			ebx, [d]
+				mov			esi, 0x00
+				mov			edi, [h]
+mc2:
+				movq		mm1, [eax+esi]
+				movq		mm2, [eax+esi+1]
+
+				movq		mm3, mm1
+				movq		mm4, mm2
+
+				punpcklbw	mm1, mm0
+				punpckhbw	mm3, mm0
+
+				punpcklbw	mm2, mm0
+				punpckhbw	mm4, mm0
+
+				paddsw		mm1, mm2
+				paddsw		mm3, mm4
+
+				paddsw		mm1, mm7
+				paddsw		mm3, mm7
+
+				psrlw		mm1, 1
+				psrlw		mm3, 1
+
+				packuswb	mm1, mm0
+				packuswb	mm3, mm0
+
+				psllq		mm3, 32
+				por			mm1, mm3
+
+				add			esi, 0x08
+				cmp			esi, [w]
+				movq		[ebx+esi-8], mm1
+				jl			mc2
+
+				add			eax, [lx2]
+				add			ebx, [lx2]
+				dec			edi
+				mov			esi, 0x00
+				cmp			edi, 0x00
+				jg			mc2
+			}
+			break;
+
+		case 3:
+			// d[i] = (s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2)>>2;
+			__asm
+			{
+				pxor		mm0, mm0
+				movq		mm7, [mmmask_0002]
+				mov			eax, [s]
+				mov			ebx, [d]
+				mov			ecx, eax
+				add			ecx, [lx]
+				mov			esi, 0x00
+				mov			edi, [h]
+mc3:
+				movq		mm1, [eax+esi]
+				movq		mm2, [eax+esi+1]
+				movq		mm3, mm1
+				movq		mm4, mm2
+
+				punpcklbw	mm1, mm0
+				punpckhbw	mm3, mm0
+
+				punpcklbw	mm2, mm0
+				punpckhbw	mm4, mm0
+
+				paddsw		mm1, mm2
+				paddsw		mm3, mm4
+
+				movq		mm5, [ecx+esi]
+				paddsw		mm1, mm7
+
+				movq		mm6, [ecx+esi+1]
+				paddsw		mm3, mm7
+
+				movq		mm2, mm5
+				movq		mm4, mm6
+
+				punpcklbw	mm2, mm0
+				punpckhbw	mm5, mm0
+
+				punpcklbw	mm4, mm0
+				punpckhbw	mm6, mm0
+				
+				paddsw		mm2, mm4
+				paddsw		mm5, mm6
+
+				paddsw		mm1, mm2
+				paddsw		mm3, mm5
+
+				psrlw		mm1, 2
+				psrlw		mm3, 2
+
+				packuswb	mm1, mm0
+				packuswb	mm3, mm0
+
+				psllq		mm3, 32
+				por			mm1, mm3
+
+				add			esi, 0x08
+				cmp			esi, [w]
+				movq		[ebx+esi-8], mm1
+				jl			mc3
+
+				add			eax, [lx2]
+				add			ebx, [lx2]
+				add			ecx, [lx2]
+				dec			edi
+				mov			esi, 0x00
+				cmp			edi, 0x00
+				jg			mc3
+			}
+			break;
+
+		case 4:
+			// d[i] = (s[i]+d[i]+1)>>1;
+			__asm
+			{
+				pxor		mm0, mm0
+				movq		mm7, [mmmask_0001]
+				mov			eax, [s]
+				mov			ebx, [d]
+				mov			esi, 0x00
+				mov			edi, [h]
+mc4:
+				movq		mm1, [eax+esi]
+				movq		mm2, [ebx+esi]
+				movq		mm3, mm1
+				movq		mm4, mm2
+
+				punpcklbw	mm1, mm0
+				punpckhbw	mm3, mm0
+
+				punpcklbw	mm2, mm0
+				punpckhbw	mm4, mm0
+
+				paddsw		mm1, mm2
+				paddsw		mm3, mm4
+
+				paddsw		mm1, mm7
+				paddsw		mm3, mm7
+
+				psrlw		mm1, 1
+				psrlw		mm3, 1
+
+				packuswb	mm1, mm0
+				packuswb	mm3, mm0
+
+				psllq		mm3, 32
+				por			mm1, mm3
+
+				add			esi, 0x08
+				cmp			esi, [w]
+				movq		[ebx+esi-8], mm1
+				jl			mc4
+
+				add			eax, [lx2]
+				add			ebx, [lx2]
+				dec			edi
+				mov			esi, 0x00
+				cmp			edi, 0x00
+				jg			mc4
+			}
+			break;
+
+		case 5:
+			// d[i] = ((d[i]<<1) + s[i]+s[i+lx] + 3)>>2;
+			__asm
+			{
+				pxor		mm0, mm0
+				movq		mm7, [mmmask_0003]
+				mov			eax, [s]
+				mov			ebx, [d]
+				mov			ecx, eax
+				add			ecx, [lx]
+				mov			esi, 0x00
+				mov			edi, [h]
+mc5:
+				movq		mm1, [eax+esi]
+				movq		mm2, [ecx+esi]
+				movq		mm3, mm1
+				movq		mm4, mm2
+
+				punpcklbw	mm1, mm0
+				punpckhbw	mm3, mm0
+
+				punpcklbw	mm2, mm0
+				punpckhbw	mm4, mm0
+
+				paddsw		mm1, mm2
+				paddsw		mm3, mm4
+
+				movq		mm5, [ebx+esi]
+
+				paddsw		mm1, mm7
+				paddsw		mm3, mm7
+
+				movq		mm6, mm5
+				punpcklbw	mm5, mm0
+				punpckhbw	mm6, mm0
+
+				psllw		mm5, 1
+				psllw		mm6, 1
+
+				paddsw		mm1, mm5
+				paddsw		mm3, mm6
+
+				psrlw		mm1, 2
+				psrlw		mm3, 2
+
+				packuswb	mm1, mm0
+				packuswb	mm3, mm0
+
+				psllq		mm3, 32
+				por			mm1, mm3
+
+				add			esi, 0x08
+				cmp			esi, [w]
+				movq		[ebx+esi-8], mm1
+				jl			mc5
+
+				add			eax, [lx2]
+				add			ebx, [lx2]
+				add			ecx, [lx2]
+				dec			edi
+				mov			esi, 0x00
+				cmp			edi, 0x00
+				jg			mc5
+			}
+			break;
+
+		case 6:
+			// d[i] = ((d[i]<<1) + s[i]+s[i+1] + 3) >> 2;
+			__asm
+			{
+				pxor		mm0, mm0
+				movq		mm7, [mmmask_0003]
+				mov			eax, [s]
+				mov			ebx, [d]
+				mov			esi, 0x00
+				mov			edi, [h]
+mc6:
+				movq		mm1, [eax+esi]
+				movq		mm2, [eax+esi+1]
+				movq		mm3, mm1
+				movq		mm4, mm2
+
+				punpcklbw	mm1, mm0
+				punpckhbw	mm3, mm0
+
+				punpcklbw	mm2, mm0
+				punpckhbw	mm4, mm0
+
+				paddsw		mm1, mm2
+				paddsw		mm3, mm4
+
+				movq		mm5, [ebx+esi]
+
+				paddsw		mm1, mm7
+				paddsw		mm3, mm7
+
+				movq		mm6, mm5
+				punpcklbw	mm5, mm0
+				punpckhbw	mm6, mm0
+
+				psllw		mm5, 1
+				psllw		mm6, 1
+
+				paddsw		mm1, mm5
+				paddsw		mm3, mm6
+
+				psrlw		mm1, 2
+				psrlw		mm3, 2
+
+				packuswb	mm1, mm0
+				packuswb	mm3, mm0
+
+				psllq		mm3, 32
+				por			mm1, mm3
+
+				add			esi, 0x08
+				cmp			esi, [w]
+				movq		[ebx+esi-8], mm1
+				jl			mc6
+
+				add			eax, [lx2]
+				add			ebx, [lx2]
+				dec			edi
+				mov			esi, 0x00
+				cmp			edi, 0x00
+				jg			mc6
+			}
+			break;
+
+		case 7:
+			// d[i] = ((d[i]<<2) + s[i]+s[i+1]+s[i+lx]+s[i+lx+1] + 6)>>3;
+			__asm
+			{
+				pxor		mm0, mm0
+				movq		mm7, [mmmask_0006]
+				mov			eax, [s]
+				mov			ebx, [d]
+				mov			ecx, eax
+				add			ecx, [lx]
+				mov			esi, 0x00
+				mov			edi, [h]
+mc7:
+				movq		mm1, [eax+esi]
+				movq		mm2, [eax+esi+1]
+				movq		mm3, mm1
+				movq		mm4, mm2
+
+				punpcklbw	mm1, mm0
+				punpckhbw	mm3, mm0
+
+				punpcklbw	mm2, mm0
+				punpckhbw	mm4, mm0
+
+				paddsw		mm1, mm2
+				paddsw		mm3, mm4
+
+				movq		mm5, [ecx+esi]
+				paddsw		mm1, mm7
+
+				movq		mm6, [ecx+esi+1]
+				paddsw		mm3, mm7
+
+				movq		mm2, mm5
+				movq		mm4, mm6
+
+				punpcklbw	mm2, mm0
+				punpckhbw	mm5, mm0
+
+				punpcklbw	mm4, mm0
+				punpckhbw	mm6, mm0
+				
+				paddsw		mm2, mm4
+				paddsw		mm5, mm6
+
+				paddsw		mm1, mm2
+				paddsw		mm3, mm5
+
+				movq		mm6, [ebx+esi]
+
+				movq		mm4, mm6
+				punpcklbw	mm4, mm0
+				punpckhbw	mm6, mm0
+
+				psllw		mm4, 2
+				psllw		mm6, 2
+
+				paddsw		mm1, mm4
+				paddsw		mm3, mm6
+
+				psrlw		mm1, 3
+				psrlw		mm3, 3
+
+				packuswb	mm1, mm0
+				packuswb	mm3, mm0
+
+				psllq		mm3, 32
+				por			mm1, mm3
+
+				add			esi, 0x08
+				cmp			esi, [w]
+				movq		[ebx+esi-8], mm1
+				jl			mc7
+
+				add			eax, [lx2]
+				add			ebx, [lx2]
+				add			ecx, [lx2]
+				dec			edi
+				mov			esi, 0x00
+				cmp			edi, 0x00
+				jg			mc7
+			}
+			break;
+	}
+}
+
+//
+// motion
+//
+
+/* ISO/IEC 13818-2 sections 6.2.5.2, 6.3.17.2, and 7.6.3: Motion vectors */
+void CMPEG2Dec::motion_vectors(int PMV[2][2][2],int dmvector[2],
+					int motion_vertical_field_select[2][2], int s,
+					int motion_vector_count, int mv_format, int h_r_size,
+					int v_r_size, int dmv, int mvscale)
+{
+	if (motion_vector_count==1)
+	{
+		if (mv_format==MV_FIELD && !dmv)
+			motion_vertical_field_select[1][s] =
+			motion_vertical_field_select[0][s] = Get_Bits(1);
+
+		motion_vector(PMV[0][s],dmvector,h_r_size,v_r_size,dmv,mvscale,0);
+
+		/* update other motion vector predictors */
+		PMV[1][s][0] = PMV[0][s][0];
+		PMV[1][s][1] = PMV[0][s][1];
+	}
+	else
+	{
+		motion_vertical_field_select[0][s] = Get_Bits(1);
+		motion_vector(PMV[0][s],dmvector,h_r_size,v_r_size,dmv,mvscale,0);
+		motion_vertical_field_select[1][s] = Get_Bits(1);
+		motion_vector(PMV[1][s],dmvector,h_r_size,v_r_size,dmv,mvscale,0);
+	}
+}
+
+
+/* ISO/IEC 13818-2 section 7.6.3.6: Dual prime additional arithmetic */
+void CMPEG2Dec::Dual_Prime_Arithmetic(int DMV[][2],int *dmvector, int mvx,int mvy)
+{
+	if (picture_structure==FRAME_PICTURE)
+	{
+		if (top_field_first)
+		{
+			/* vector for prediction of top field from bottom field */
+			DMV[0][0] = ((mvx  +(mvx>0))>>1) + dmvector[0];
+			DMV[0][1] = ((mvy  +(mvy>0))>>1) + dmvector[1] - 1;
+
+			/* vector for prediction of bottom field from top field */
+			DMV[1][0] = ((3*mvx+(mvx>0))>>1) + dmvector[0];
+			DMV[1][1] = ((3*mvy+(mvy>0))>>1) + dmvector[1] + 1;
+		}
+		else
+		{
+			/* vector for prediction of top field from bottom field */
+			DMV[0][0] = ((3*mvx+(mvx>0))>>1) + dmvector[0];
+			DMV[0][1] = ((3*mvy+(mvy>0))>>1) + dmvector[1] - 1;
+
+			/* vector for prediction of bottom field from top field */
+			DMV[1][0] = ((mvx  +(mvx>0))>>1) + dmvector[0];
+			DMV[1][1] = ((mvy  +(mvy>0))>>1) + dmvector[1] + 1;
+		}
+	}
+	else
+	{
+		/* vector for prediction from field of opposite 'parity' */
+		DMV[0][0] = ((mvx+(mvx>0))>>1) + dmvector[0];
+		DMV[0][1] = ((mvy+(mvy>0))>>1) + dmvector[1];
+
+		/* correct for vertical field shift */
+		if (picture_structure==TOP_FIELD)
+			DMV[0][1]--;
+		else
+			DMV[0][1]++;
+	}
+}
+
+/* get and decode motion vector and differential motion vector for one prediction */
+void CMPEG2Dec::motion_vector(int *PMV, int *dmvector, int h_r_size, int v_r_size,
+				   int dmv, int mvscale, int full_pel_vector)
+{
+	int motion_code, motion_residual;
+
+	/* horizontal component */
+	/* ISO/IEC 13818-2 Table B-10 */
+	motion_code = Get_motion_code();
+
+	motion_residual = (h_r_size!=0 && motion_code!=0) ? Get_Bits(h_r_size) : 0;
+
+	decode_motion_vector(&PMV[0],h_r_size,motion_code,motion_residual,full_pel_vector);
+
+	if (dmv)
+		dmvector[0] = Get_dmvector();
+
+	/* vertical component */
+	motion_code     = Get_motion_code();
+	motion_residual = (v_r_size!=0 && motion_code!=0) ? Get_Bits(v_r_size) : 0;
+
+	if (mvscale)
+		PMV[1] >>= 1; /* DIV 2 */
+
+	decode_motion_vector(&PMV[1],v_r_size,motion_code,motion_residual,full_pel_vector);
+
+	if (mvscale)
+		PMV[1] <<= 1;
+
+	if (dmv)
+		dmvector[1] = Get_dmvector();
+}
+
+/* calculate motion vector component */
+/* ISO/IEC 13818-2 section 7.6.3.1: Decoding the motion vectors */
+/* Note: the arithmetic here is more elegant than that which is shown 
+   in 7.6.3.1.  The end results (PMV[][][]) should, however, be the same.  */
+void CMPEG2Dec::decode_motion_vector(int *pred, int r_size, int motion_code,
+								 int motion_residual, int full_pel_vector)
+{
+	int lim, vec;
+
+	lim = 16<<r_size;
+	vec = full_pel_vector ? (*pred >> 1) : (*pred);
+
+	if (motion_code>0)
+	{
+		vec+= ((motion_code-1)<<r_size) + motion_residual + 1;
+		if (vec>=lim)
+			vec-= lim + lim;
+	}
+	else if (motion_code<0)
+	{
+		vec-= ((-motion_code-1)<<r_size) + motion_residual + 1;
+		if (vec<-lim)
+			vec+= lim + lim;
+	}
+
+	*pred = full_pel_vector ? (vec<<1) : vec;
+}
+
+int CMPEG2Dec::Get_motion_code()
+{
+	int code;
+
+	if (Get_Bits(1))
+		return 0;
+
+	if ((code = Show_Bits(9))>=64)
+	{
+		code >>= 6;
+		Flush_Buffer(MVtab0[code].len);
+
+		return Get_Bits(1)?-MVtab0[code].val:MVtab0[code].val;
+	}
+
+	if (code>=24)
+	{
+		code >>= 3;
+		Flush_Buffer(MVtab1[code].len);
+
+		return Get_Bits(1)?-MVtab1[code].val:MVtab1[code].val;
+	}
+
+	if ((code-=12)<0)
+	{
+		Fault_Flag = 10;
+		return 0;
+	}
+
+	Flush_Buffer(MVtab2[code].len);
+
+	return Get_Bits(1) ? -MVtab2[code].val : MVtab2[code].val;
+}
+
+/* get differential motion vector (for dual prime prediction) */
+int CMPEG2Dec::Get_dmvector()
+{
+	if (Get_Bits(1))
+		return Get_Bits(1) ? -1 : 1;
+	else
+		return 0;
+}
+
+//
+// store
+//
+
+static const __int64 mmmask_0001 = 0x0001000100010001;
+static const __int64 mmmask_0002 = 0x0002000200020002;
+static const __int64 mmmask_0003 = 0x0003000300030003;
+static const __int64 mmmask_0004 = 0x0004000400040004;
+static const __int64 mmmask_0005 = 0x0005000500050005;
+static const __int64 mmmask_0007 = 0x0007000700070007;
+static const __int64 mmmask_0016 = 0x0010001000100010;
+static const __int64 mmmask_0040 = 0x0040004000400040;
+static const __int64 mmmask_0128 = 0x0080008000800080;
+
+void CMPEG2Dec::assembleFrame(unsigned char *src[], int pf, unsigned char *dst, int pitch)
+{
+	unsigned char *y444;
+
+	if (Fault_Flag)
+		Fault_Flag = 0;
+
+	if (Luminance_Flag)
+	{
+		Luminance_Filter(src[0], lum);
+		y444 = lum;
+	}
+	else
+		y444 = src[0];
+
+	if (chroma_format==CHROMA420)
+	{
+		conv420to422(src[1], u422, pf);
+		conv420to422(src[2], v422, pf);
+
+		if (!dstYUY2())
+		{
+			conv422to444(u422, u444);
+			conv422to444(v422, v444);
+		}
+	}
+	else if (!dstYUY2())
+	{
+		conv422to444(src[1], u444);
+		conv422to444(src[2], v444);
+	}
+
+	if (dstYUY2())
+		conv422toYUY2(y444, u422, v422, dst, pitch);
+	else
+ 		conv444toRGB24(y444, u444, v444, dst, pitch);
+}
+
+void CMPEG2Dec::Luminance_Filter(unsigned char *src, unsigned char *dst)
+{
+	src += CLIP_AREA;
+	dst += CLIP_AREA;
+
+	__asm
+	{
+		mov			edx, this
+		mov			eax, [src]
+		mov			ebx, [dst]
+		mov			esi, 0x00
+		mov			edi, [edx].LUM_AREA
+		pxor		mm0, mm0
+		movq		mm5, [edx].LumOffsetMask
+		movq		mm6, [edx].LumGainMask
+		movq		mm7, mmmask_0040
+
+lumconv:
+		movq		mm1, [eax+esi]
+		movq		mm2, mm1
+
+		punpcklbw	mm1, mm0
+		punpckhbw	mm2, mm0
+
+		pmullw		mm1, mm6
+		pmullw		mm2, mm6
+
+		paddw		mm1, mm7
+		paddw		mm2, mm7
+
+		psrlw		mm1, 7
+		psrlw		mm2, 7
+
+		paddw		mm1, mm5
+		paddw		mm2, mm5
+
+		packuswb	mm1, mm0
+		packuswb	mm2, mm0
+
+		add			esi, 0x08
+		cmp			esi, edi
+		movq		[ebx+esi-8], mm1
+		movq		[ebx+esi-4], mm2
+		jl			lumconv
+	}
+}
+
+void CMPEG2Dec::conv422to444(unsigned char *src, unsigned char *dst)
+{
+	src += HALF_CLIP_AREA;
+	dst += CLIP_AREA;
+
+	__asm
+	{
+		mov			edx, this
+		mov			eax, [src]
+		mov			ebx, [dst]
+		mov			edi, [edx].Clip_Height
+
+		movq		mm1, [mmmask_0001]
+		pxor		mm0, mm0
+
+convyuv444init:
+		movq		mm7, [eax]
+		mov			esi, 0x00
+
+convyuv444:
+		movq		mm2, mm7
+		movq		mm7, [eax+esi+8]
+		movq		mm3, mm2
+		movq		mm4, mm7
+
+		psrlq		mm3, 8
+		psllq		mm4, 56
+		por			mm3, mm4
+
+		movq		mm4, mm2
+		movq		mm5, mm3
+
+		punpcklbw	mm4, mm0
+		punpcklbw	mm5, mm0
+
+		movq		mm6, mm4
+		paddusw		mm4, mm1
+		paddusw		mm4, mm5
+		psrlw		mm4, 1
+		psllq		mm4, 8
+		por			mm4, mm6
+
+		punpckhbw	mm2, mm0
+		punpckhbw	mm3, mm0
+
+		movq		mm6, mm2
+		paddusw		mm2, mm1
+		paddusw		mm2, mm3
+
+		movq		[ebx+esi*2], mm4
+
+		psrlw		mm2, 1
+		psllq		mm2, 8
+		por			mm2, mm6
+
+		add			esi, 0x08
+		cmp			esi, [edx].HALF_WIDTH_D8
+		movq		[ebx+esi*2-8], mm2
+		jl			convyuv444
+
+		movq		mm2, mm7
+		punpcklbw	mm2, mm0
+		movq		mm3, mm2
+
+		psllq		mm2, 8
+		por			mm2, mm3
+
+		movq		[ebx+esi*2], mm2
+
+		punpckhbw	mm7, mm0
+		movq		mm6, mm7
+
+		psllq		mm6, 8
+		por			mm6, mm7
+
+		movq		[ebx+esi*2+8], mm6
+
+		add			eax, [edx].HALF_WIDTH
+		add			ebx, [edx].Coded_Picture_Width
+		dec			edi
+		cmp			edi, 0x00
+		jg			convyuv444init
+	}
+}
+
+void CMPEG2Dec::conv420to422(unsigned char *src, unsigned char *dst, int frame_type)
+{
+	if (frame_type)
+	{
+		__asm
+		{
+			push		ebp
+			mov			eax, [src]
+			mov			ebx, [dst]
+			mov			ebp, this
+			mov			ecx, ebx
+			add			ecx, ds:[ebp].HALF_WIDTH
+			mov			esi, 0x00
+			movq		mm3, [mmmask_0003]
+			pxor		mm0, mm0
+			movq		mm4, [mmmask_0002]
+
+			mov			edx, eax
+			add			edx, ds:[ebp].HALF_WIDTH
+convyuv422topp:
+			movd		mm1, [eax+esi]
+			movd		mm2, [edx+esi]
+			movd		[ebx+esi], mm1
+			punpcklbw	mm1, mm0
+			pmullw		mm1, mm3
+			paddusw		mm1, mm4
+			punpcklbw	mm2, mm0
+			paddusw		mm2, mm1
+			psrlw		mm2, 0x02
+			packuswb	mm2, mm0
+
+			add			esi, 0x04
+			cmp			esi, ds:[ebp].HALF_WIDTH
+			movd		[ecx+esi-4], mm2
+			jl			convyuv422topp
+
+			add			eax, ds:[ebp].HALF_WIDTH
+			add			ebx, ds:[ebp].Coded_Picture_Width
+			add			ecx, ds:[ebp].Coded_Picture_Width
+			mov			esi, 0x00
+
+			mov			edi, ds:[ebp].PROGRESSIVE_HEIGHT
+convyuv422p:
+			movd		mm1, [eax+esi]
+
+			punpcklbw	mm1, mm0
+			mov			edx, eax
+
+			pmullw		mm1, mm3
+			sub			edx, ds:[ebp].HALF_WIDTH
+
+			movd		mm5, [edx+esi]
+			movd		mm2, [edx+esi]
+
+			punpcklbw	mm5, mm0
+			punpcklbw	mm2, mm0
+			paddusw		mm5, mm1
+			paddusw		mm2, mm1
+			paddusw		mm5, mm4
+			paddusw		mm2, mm4
+			psrlw		mm5, 0x02
+			psrlw		mm2, 0x02
+			packuswb	mm5, mm0
+			packuswb	mm2, mm0
+
+			mov			edx, eax
+			add			edx, ds:[ebp].HALF_WIDTH
+			add			esi, 0x04
+			cmp			esi, ds:[ebp].HALF_WIDTH
+			movd		[ebx+esi-4], mm5
+			movd		[ecx+esi-4], mm2
+
+			jl			convyuv422p
+
+			add			eax, ds:[ebp].HALF_WIDTH
+			add			ebx, ds:[ebp].Coded_Picture_Width
+			add			ecx, ds:[ebp].Coded_Picture_Width
+			mov			esi, 0x00
+			dec			edi
+			cmp			edi, 0x00
+			jg			convyuv422p
+
+			mov			edx, eax
+			sub			edx, ds:[ebp].HALF_WIDTH
+convyuv422bottomp:
+			movd		mm1, [eax+esi]
+			movd		mm5, [edx+esi]
+			punpcklbw	mm5, mm0
+			movd		[ecx+esi], mm1
+
+			punpcklbw	mm1, mm0
+			pmullw		mm1, mm3
+			paddusw		mm5, mm1
+			paddusw		mm5, mm4
+			psrlw		mm5, 0x02
+			packuswb	mm5, mm0
+
+			add			esi, 0x04
+			cmp			esi, ds:[ebp].HALF_WIDTH
+			movd		[ebx+esi-4], mm5
+			jl			convyuv422bottomp
+			pop			ebp
+		}
+	}
+	else
+	{
+		__asm
+		{
+			push		ebp
+			mov			eax, [src]
+			mov			ecx, [dst]
+			mov			ebp, this
+			mov			esi, 0x00
+			pxor		mm0, mm0
+			movq		mm3, [mmmask_0003]
+			movq		mm4, [mmmask_0004]
+			movq		mm5, [mmmask_0005]
+
+convyuv422topi:
+			movd		mm1, [eax+esi]
+			mov			ebx, eax
+			add			ebx, ds:[ebp].HALF_WIDTH
+			movd		mm2, [ebx+esi]
+			movd		[ecx+esi], mm1
+			punpcklbw	mm1, mm0
+			movq		mm6, mm1
+			pmullw		mm1, mm3
+
+			punpcklbw	mm2, mm0
+			movq		mm7, mm2
+			pmullw		mm2, mm5
+			paddusw		mm2, mm1
+			paddusw		mm2, mm4
+			psrlw		mm2, 0x03
+			packuswb	mm2, mm0
+
+			mov			edx, ecx
+			add			edx, ds:[ebp].HALF_WIDTH
+			pmullw		mm6, mm5
+			movd		[edx+esi], mm2
+
+			add			ebx, ds:[ebp].HALF_WIDTH
+			movd		mm2, [ebx+esi]
+			punpcklbw	mm2, mm0
+			pmullw		mm2, mm3
+			paddusw		mm2, mm6
+			paddusw		mm2, mm4
+			psrlw		mm2, 0x03
+			packuswb	mm2, mm0
+
+			add			edx, ds:[ebp].HALF_WIDTH
+			add			ebx, ds:[ebp].HALF_WIDTH
+			pmullw		mm7, [mmmask_0007]
+			movd		[edx+esi], mm2
+
+			movd		mm2, [ebx+esi]
+			punpcklbw	mm2, mm0
+			paddusw		mm2, mm7
+			paddusw		mm2, mm4
+			psrlw		mm2, 0x03
+			packuswb	mm2, mm0
+
+			add			edx, ds:[ebp].HALF_WIDTH
+			add			esi, 0x04
+			cmp			esi, ds:[ebp].HALF_WIDTH
+			movd		[edx+esi-4], mm2
+
+			jl			convyuv422topi
+
+			add			eax, ds:[ebp].Coded_Picture_Width
+			add			ecx, ds:[ebp].DOUBLE_WIDTH
+			mov			esi, 0x00
+
+			mov			edi, ds:[ebp].INTERLACED_HEIGHT
+convyuv422i:
+			movd		mm1, [eax+esi]
+			punpcklbw	mm1, mm0
+			movq		mm6, mm1
+			mov			ebx, eax
+			sub			ebx, ds:[ebp].Coded_Picture_Width
+			movd		mm3, [ebx+esi]
+			pmullw		mm1, [mmmask_0007]
+			punpcklbw	mm3, mm0
+			paddusw		mm3, mm1
+			paddusw		mm3, mm4
+			psrlw		mm3, 0x03
+			packuswb	mm3, mm0
+
+			add			ebx, ds:[ebp].HALF_WIDTH
+			movq		mm1, [ebx+esi]
+			add			ebx, ds:[ebp].Coded_Picture_Width
+			movd		[ecx+esi], mm3
+
+			movq		mm3, [mmmask_0003]
+			movd		mm2, [ebx+esi]
+
+			punpcklbw	mm1, mm0
+			pmullw		mm1, mm3
+			punpcklbw	mm2, mm0
+			movq		mm7, mm2
+			pmullw		mm2, mm5
+			paddusw		mm2, mm1
+			paddusw		mm2, mm4
+			psrlw		mm2, 0x03
+			packuswb	mm2, mm0
+
+			pmullw		mm6, mm5
+			mov			edx, ecx
+			add			edx, ds:[ebp].HALF_WIDTH
+			movd		[edx+esi], mm2
+
+			add			ebx, ds:[ebp].HALF_WIDTH
+			movd		mm2, [ebx+esi]
+			punpcklbw	mm2, mm0
+			pmullw		mm2, mm3
+			paddusw		mm2, mm6
+			paddusw		mm2, mm4
+			psrlw		mm2, 0x03
+			packuswb	mm2, mm0
+
+			pmullw		mm7, [mmmask_0007]
+			add			edx, ds:[ebp].HALF_WIDTH
+			add			ebx, ds:[ebp].HALF_WIDTH
+ 			movd		[edx+esi], mm2
+
+			movd		mm2, [ebx+esi]
+			punpcklbw	mm2, mm0
+			paddusw		mm2, mm7
+			paddusw		mm2, mm4
+			psrlw		mm2, 0x03
+			packuswb	mm2, mm0
+
+			add			edx, ds:[ebp].HALF_WIDTH
+			add			esi, 0x04
+			cmp			esi, ds:[ebp].HALF_WIDTH
+			movd		[edx+esi-4], mm2
+
+			jl			convyuv422i
+			add			eax, ds:[ebp].Coded_Picture_Width
+			add			ecx, ds:[ebp].DOUBLE_WIDTH
+			mov			esi, 0x00
+			dec			edi
+			cmp			edi, 0x00
+			jg			convyuv422i
+
+convyuv422bottomi:
+			movd		mm1, [eax+esi]
+			movq		mm6, mm1
+			punpcklbw	mm1, mm0
+			mov			ebx, eax
+			sub			ebx, ds:[ebp].Coded_Picture_Width
+			movd		mm3, [ebx+esi]
+			punpcklbw	mm3, mm0
+			pmullw		mm1, [mmmask_0007]
+			paddusw		mm3, mm1
+			paddusw		mm3, mm4
+			psrlw		mm3, 0x03
+			packuswb	mm3, mm0
+
+			add			ebx, ds:[ebp].HALF_WIDTH
+			movq		mm1, [ebx+esi]
+			punpcklbw	mm1, mm0
+			movd		[ecx+esi], mm3
+
+			pmullw		mm1, [mmmask_0003]
+			add			ebx, ds:[ebp].Coded_Picture_Width
+			movd		mm2, [ebx+esi]
+			punpcklbw	mm2, mm0
+			movq		mm7, mm2
+			pmullw		mm2, mm5
+			paddusw		mm2, mm1
+			paddusw		mm2, mm4
+			psrlw		mm2, 0x03
+			packuswb	mm2, mm0
+
+			mov			edx, ecx
+			add			edx, ds:[ebp].HALF_WIDTH
+			pmullw		mm7, [mmmask_0007]
+			movd		[edx+esi], mm2
+
+			add			edx, ds:[ebp].HALF_WIDTH
+ 			movd		[edx+esi], mm6
+
+			punpcklbw	mm6, mm0
+			paddusw		mm6, mm7
+			paddusw		mm6, mm4
+			psrlw		mm6, 0x03
+			packuswb	mm6, mm0
+
+			add			edx, ds:[ebp].HALF_WIDTH
+			add			esi, 0x04
+			cmp			esi, ds:[ebp].HALF_WIDTH
+			movd		[edx+esi-4], mm6
+
+			jl			convyuv422bottomi
+			pop			ebp
+		}
+	}
+}
+
+void CMPEG2Dec::conv444toRGB24(unsigned char *py, unsigned char *pu, unsigned char *pv, unsigned char *dst, int pitch)
+{
+	int PWIDTH = pitch - DSTBYTES;
+
+	py += CLIP_STEP;
+	pu += CLIP_STEP;
+	pv += CLIP_STEP;
+
+	int Clip_Height = this->Clip_Height;
+	__int64 RGB_Offset = this->RGB_Offset;
+	__int64 RGB_Scale = this->RGB_Scale;
+	__int64 RGB_CBU = this->RGB_CBU;
+	__int64 RGB_CRV = this->RGB_CRV;
+	__int64 RGB_CGX = this->RGB_CGX;
+	int Clip_Width = this->Clip_Width;
+	int Coded_Picture_Width = this->Coded_Picture_Width;
+
+	__asm
+	{
+		mov			eax, [py]
+		mov			ebx, [pu]
+		mov			ecx, [pv]
+		mov			edx, [dst]
+		mov			edi, Clip_Height
+		mov			esi, 0x00
+		pxor		mm0, mm0
+
+convRGB24:
+		movd		mm1, [eax+esi]
+		movd		mm3, [ebx+esi]
+		punpcklbw	mm1, mm0
+		punpcklbw	mm3, mm0
+		movd		mm5, [ecx+esi]
+		punpcklbw	mm5, mm0
+		movq		mm7, [mmmask_0128]
+		psubw		mm3, mm7
+		psubw		mm5, mm7
+
+		psubw		mm1, RGB_Offset
+		movq		mm2, mm1
+		movq		mm7, [mmmask_0001]
+		punpcklwd	mm1, mm7
+		punpckhwd	mm2, mm7
+		movq		mm7, RGB_Scale
+		pmaddwd		mm1, mm7
+		pmaddwd		mm2, mm7
+
+		movq		mm4, mm3
+		punpcklwd	mm3, mm0
+		punpckhwd	mm4, mm0
+		movq		mm7, RGB_CBU
+		pmaddwd		mm3, mm7
+		pmaddwd		mm4, mm7
+		paddd		mm3, mm1
+		paddd		mm4, mm2
+		psrld		mm3, 13
+		psrld		mm4, 13
+		packuswb	mm3, mm0
+		packuswb	mm4, mm0
+
+		movq		mm6, mm5
+		punpcklwd	mm5, mm0
+		punpckhwd	mm6, mm0
+		movq		mm7, RGB_CRV
+		pmaddwd		mm5, mm7
+		pmaddwd		mm6, mm7
+		paddd		mm5, mm1
+		paddd		mm6, mm2
+		psrld		mm5, 13
+		psrld		mm6, 13
+		packuswb	mm5, mm0
+		packuswb	mm6, mm0
+
+		punpcklbw	mm3, mm5
+		punpcklbw	mm4, mm6
+		movq		mm5, mm3
+		movq		mm6, mm4
+		psrlq		mm5, 16
+		psrlq		mm6, 16
+		por			mm3, mm5
+		por			mm4, mm6
+
+		movd		mm5, [ebx+esi]
+		movd		mm6, [ecx+esi]
+		punpcklbw	mm5, mm0
+		punpcklbw	mm6, mm0
+		movq		mm7, [mmmask_0128]
+		psubw		mm5, mm7
+		psubw		mm6, mm7
+
+		movq		mm7, mm6
+		punpcklwd	mm6, mm5
+		punpckhwd	mm7, mm5		
+		movq		mm5, RGB_CGX
+		pmaddwd		mm6, mm5
+		pmaddwd		mm7, mm5
+		paddd		mm6, mm1
+		paddd		mm7, mm2
+
+		psrld		mm6, 13
+		psrld		mm7, 13
+		packuswb	mm6, mm0
+		packuswb	mm7, mm0
+
+		punpcklbw	mm3, mm6
+		punpcklbw	mm4, mm7
+
+		movq		mm1, mm3
+		movq		mm5, mm4
+		movq		mm6, mm4
+
+		psrlq		mm1, 32
+		psllq		mm1, 24
+		por			mm1, mm3
+
+		psrlq		mm3, 40
+		psllq		mm6, 16
+		por			mm3, mm6
+		movd		[edx], mm1
+
+		psrld		mm4, 16
+		psrlq		mm5, 24
+		por			mm5, mm4
+		movd		[edx+4], mm3
+
+		add			edx, 0x0c
+		add			esi, 0x04
+		cmp			esi, Clip_Width
+		movd		[edx-4], mm5
+
+		jl			convRGB24
+
+		add			eax, Coded_Picture_Width
+		add			ebx, Coded_Picture_Width
+		add			ecx, Coded_Picture_Width
+		add			edx, PWIDTH
+		mov			esi, 0x00
+		dec			edi
+		cmp			edi, 0x00
+		jg			convRGB24
+
+		emms
+	}
+}
+
+// YUV 4:2:2 Format:
+// YUYV YUYV ...
+void CMPEG2Dec::conv422toYUY2(unsigned char *py, unsigned char *pu, unsigned char *pv, unsigned char *dst, int pitch)
+{
+	py += CLIP_STEP;
+	pu += CLIP_STEP;
+	pv += CLIP_STEP;
+
+	int y = this->Clip_Height;
+	int Clip_Width_2 = this->Clip_Width / 2;
+	int Coded_Picture_Width = this->Coded_Picture_Width;
+	int Coded_Picture_Width_2 = this->Coded_Picture_Width / 2;
+
+	__asm
+	{
+		emms
+		mov			eax, [py]
+		mov			ebx, [pu]
+		mov			ecx, [pv]
+		mov			edx, [dst]
+		mov			edi, Clip_Width_2
+	yloop:
+		xor			esi, esi
+	xloop:
+		movd		mm1, [eax+esi*2]		;0000YYYY
+		movd		mm2, [ebx+esi]			;0000UUUU
+		movd		mm3, [ecx+esi]			;0000VVVV
+		;interleave this to VYUYVYUY
+		punpcklbw	mm2, mm3				;VUVUVUVU
+		punpcklbw	mm1, mm2				;VYUYVYUY
+		movq		[edx+esi*4], mm1
+		movd		mm1, [eax+esi*2+4]		;0000YYYY
+		punpckhdq	mm2, mm2				;xxxxVUVU
+		punpcklbw	mm1, mm2				;VYUYVYUY
+		movq		[edx+esi*4+8], mm1
+		add			esi, 4
+		cmp			esi, edi
+		jb			xloop
+		add			edx, pitch
+		add			eax, Coded_Picture_Width
+		add			ebx, Coded_Picture_Width_2
+		add			ecx, Coded_Picture_Width_2
+		dec			y
+		jnz			yloop
+		emms
+	}
+}
+
+//
+// codec
+//
+
+static const int ChromaFormat[4] = {
+	0, 6, 8, 12
+};
+
+CMPEG2Dec::CMPEG2Dec()
+{
+  VF_File = 0;
+  VF_FrameLimit = VF_FrameBound = VF_GOPLimit = VF_GOPNow = VF_GOPSize =
+	VF_OldFrame = VF_OldRef = 0;
+  VF_FrameSize = VF_FrameRate = 0;
+  memset(Rdbfr, 0, sizeof(Rdbfr));
+  Rdptr = Rdmax = 0;
+  CurrentBfr = NextBfr = BitsLeft = Val = Read = 0;
+  Fault_Flag = File_Flag = File_Limit = FO_Flag = IDCT_Flag = SystemStream_Flag = 0;
+  Luminance_Flag = Resize_Flag = KeyOp_Flag = lfsr0 = lfsr1 = 0;
+  BufferOp = 0;
+  memset(intra_quantizer_matrix, 0, sizeof(intra_quantizer_matrix));
+  memset(non_intra_quantizer_matrix, 0, sizeof(non_intra_quantizer_matrix));
+  memset(chroma_intra_quantizer_matrix, 0, sizeof(chroma_intra_quantizer_matrix));
+  memset(chroma_non_intra_quantizer_matrix, 0, sizeof(chroma_non_intra_quantizer_matrix));
+  load_intra_quantizer_matrix =
+  load_non_intra_quantizer_matrix =
+  load_chroma_intra_quantizer_matrix =
+  load_chroma_non_intra_quantizer_matrix = 0;
+  q_scale_type =
+  alternate_scan =
+  quantizer_scale = 0;
+
+	int i;
+	for (i=0; i<MAX_FILE_NUMBER; i++) Infilename[i] = NULL;
+	for (i=0; i<8; i++) p_block[i] = block[i] = NULL;
+	p_fTempArray = fTempArray = NULL;
+	for (i=0; i<3; i++) backward_reference_frame[i] = forward_reference_frame[i] =  auxframe[i] = NULL;
+	lum = NULL;
+	u422 = v422 = u444 = v444 = dstFrame = NULL;
+	hLibrary = NULL;
+
+  CheckCPU();
+}
+
+static char* myfgets(char* buff, int len, FILE* file)
+{
+	char* ret = buff;
+
+	ret[0] = 0;
+	
+	while(ret = fgets(buff, len, file))
+	{
+		while(isspace(*ret)) ret++;
+		if(*ret) break;
+	}
+
+	return(ret);
+}
+
+int CMPEG2Dec::Open(LPCTSTR path, DstFormat dstFormat)
+{
+	m_dstFormat = dstFormat;
+	char ID[19], PASS[19] = "DVD2AVIProjectFile";
+	DWORD i, j, size, code, type, tff, rff, film, ntsc, gop, top, bottom, mapping;
+	int repeat_on, repeat_off, repeat_init;
+	int Clip_Top, Clip_Bottom, Clip_Left, Clip_Right, Squeeze_Width, Squeeze_Height;
+
+	HKEY key; DWORD value = REG_SZ; DWORD length = 256;
+	char *ext, buffer[256];
+
+	CMPEG2Dec* out = this;
+
+	out->VF_File = _tfopen(path, _T("r"));
+	if (out->VF_File==NULL)
+		return 0;
+	if (fgets(ID, 19, out->VF_File)==NULL)
+		return 0;
+	if (strcmp(ID, PASS))
+		return 0;
+
+	// load DLL
+	if (RegOpenKeyExA(HKEY_CURRENT_USER, "Software\\VFPlugin", 0, KEY_ALL_ACCESS, &key)==ERROR_SUCCESS)
+	{
+		RegQueryValueExA(key, "DVD2AVI", NULL, &value, (unsigned char*)buffer, &length);
+
+		ext = strrchr(buffer, '\\');
+		sprintf(buffer + (int)(ext-buffer) + 1, "OpenDVD.dll");
+		RegCloseKey(key);
+	}
+
+	if ((hLibrary = LoadLibraryA(buffer)) != NULL)
+		BufferOp = (PBufferOp) GetProcAddress(hLibrary, "BufferOp");
+
+	for (i=0; i<MAX_FILE_NUMBER; i++)
+		Infilename[i] = DNew char[_MAX_PATH];
+
+	if(1 != fscanf(out->VF_File, "%d", &File_Limit))
+		return 0;
+
+	i = File_Limit;
+	while (i)
+	{
+		if(1 != fscanf(out->VF_File, "%d ", &j))
+			return 0;
+		fgets(Infilename[File_Limit-i], j+1, out->VF_File);
+		if ((Infile[File_Limit-i] = _open(Infilename[File_Limit-i], _O_RDONLY | _O_BINARY))==-1)
+			return 0;
+		i--;
+	}
+
+	if(3 != sscanf(myfgets(buffer, sizeof(buffer), out->VF_File), "\nStream_Type=%d,%X,%X\n", &SystemStream_Flag, &lfsr0, &lfsr1))
+		return 0;
+	if (lfsr0 || lfsr1)
+		KeyOp_Flag = 1;
+	else
+		KeyOp_Flag = 0;
+
+	if(1 != sscanf(myfgets(buffer, sizeof(buffer), out->VF_File), "iDCT_Algorithm=%d\n", &IDCT_Flag))
+		return 0;
+
+	switch (IDCT_Flag)
+	{
+		case IDCT_SSEMMX:
+			if (!cpu.ssemmx)
+				IDCT_Flag = IDCT_MMX;
+			break;
+
+		case IDCT_FPU:
+			Initialize_FPU_IDCT();
+			break;
+
+		case IDCT_REF:
+			Initialize_REF_IDCT();
+			break;
+	}
+
+	File_Flag = 0;
+	_lseeki64(Infile[0], 0, SEEK_SET);
+	Initialize_Buffer();
+
+	do
+	{
+		next_start_code();
+		code = Get_Bits(32);
+	}
+	while (code!=SEQUENCE_HEADER_CODE);
+
+	sequence_header();
+
+	mb_width = (horizontal_size+15)/16;
+	mb_height = progressive_sequence ? (vertical_size+15)/16 : 2*((vertical_size+31)/32);
+
+	Coded_Picture_Width = 16 * mb_width;
+	Coded_Picture_Height = 16 * mb_height;
+
+	Chroma_Width = (chroma_format==CHROMA444) ? Coded_Picture_Width : Coded_Picture_Width>>1;
+	Chroma_Height = (chroma_format!=CHROMA420) ? Coded_Picture_Height : Coded_Picture_Height>>1;
+
+	block_count = ChromaFormat[chroma_format];
+
+	for (i=0; i<8; i++)
+	{
+		p_block[i] = (short *)DNew BYTE[sizeof(short)*64 + 64];
+		block[i]   = (short *)((long)p_block[i] + 64 - (long)p_block[i]%64);
+	}
+
+	p_fTempArray = (void *)DNew BYTE[sizeof(float)*128 + 64];
+	fTempArray = (void *)((long)p_fTempArray + 64 - (long)p_fTempArray%64);
+
+	for (i=0; i<3; i++)
+	{
+		if (i==0)
+			size = Coded_Picture_Width * Coded_Picture_Height;
+		else
+			size = Chroma_Width * Chroma_Height;
+
+		backward_reference_frame[i] = DNew unsigned char[size];
+		forward_reference_frame[i] = DNew unsigned char[size];
+		auxframe[i] = DNew unsigned char[size];
+	}
+
+	if(1 != sscanf(myfgets(buffer, sizeof(buffer), out->VF_File), "YUVRGB_Scale=%d\n", &i))
+		return 0;
+
+	if (i)
+	{
+		RGB_Scale = 0x1000254310002543;
+		RGB_Offset = 0x0010001000100010;
+		RGB_CBU = 0x0000408D0000408D;
+		RGB_CGX = 0xF377E5FCF377E5FC;
+		RGB_CRV = 0x0000331300003313;
+	}
+	else
+	{
+		RGB_Scale = 0x1000200010002000;
+		RGB_Offset = 0x0000000000000000;
+		RGB_CBU = 0x000038B4000038B4;
+		RGB_CGX = 0xF4FDE926F4FDE926;
+		RGB_CRV = 0x00002CDD00002CDD;
+	}
+
+	char* tmp = myfgets(buffer, sizeof(buffer), out->VF_File);
+	if(2 != sscanf(tmp, "Luminance=%d,%d\n", &i, &j))
+	{
+		if(2 != sscanf(tmp, "Luminance_Filter=%d,%d\n", &i, &j))
+			return 0;
+		i=128; j=0;
+	}
+
+	if (i==128 && j==0)
+		Luminance_Flag = 0;
+	else
+	{
+		Luminance_Flag = 1;
+		LumGainMask = ((__int64)i<<48) + ((__int64)i<<32) + ((__int64)i<<16) + (__int64)i;
+		LumOffsetMask = ((__int64)j<<48) + ((__int64)j<<32) + ((__int64)j<<16) + (__int64)j;
+
+		lum = DNew unsigned char[Coded_Picture_Width * Coded_Picture_Height];
+	}
+
+	if(6 != sscanf(myfgets(buffer, sizeof(buffer), out->VF_File), "Picture_Size=%d,%d,%d,%d,%d,%d\n", 
+		&Clip_Top, &Clip_Bottom, &Clip_Left, &Clip_Right, &Squeeze_Width, &Squeeze_Height))
+		return 0;
+
+	Resize_Flag = 0;
+	Resize_Width = Clip_Width = Coded_Picture_Width;
+	Resize_Height = Clip_Height = Coded_Picture_Height;
+	CLIP_AREA = HALF_CLIP_AREA = CLIP_STEP = 0;
+
+	if (Clip_Top || Clip_Bottom || Clip_Left || Clip_Right)
+	{
+		Clip_Width -= Clip_Left+Clip_Right;
+		Clip_Height -= Clip_Top+Clip_Bottom;
+		Resize_Width = Clip_Width;
+		Resize_Height = Clip_Height;
+
+		CLIP_AREA = Coded_Picture_Width * Clip_Top;
+		HALF_CLIP_AREA = (Coded_Picture_Width>>1) * Clip_Top;
+		CLIP_STEP = Coded_Picture_Width * Clip_Top + Clip_Left;
+	}
+
+	if (Squeeze_Width || Squeeze_Height)
+	{
+		Resize_Flag = 1;
+		Resize_Width -= Squeeze_Width;
+		Resize_Height -= Squeeze_Height;
+	}
+
+	DSTBYTES = Clip_Width * (dstRGB24() ? 3 : 2);
+	DSTBYTES2 = DSTBYTES * 2;
+	LUM_AREA = Coded_Picture_Width * Clip_Height;
+	PROGRESSIVE_HEIGHT = (Coded_Picture_Height>>1) - 2;
+	INTERLACED_HEIGHT = (Coded_Picture_Height>>2) - 2;
+	HALF_WIDTH = Coded_Picture_Width>>1;
+	HALF_WIDTH_D8 = (Coded_Picture_Width>>1) - 8;
+	DOUBLE_WIDTH = Coded_Picture_Width<<1;
+
+	u422 = DNew unsigned char[Coded_Picture_Width * Coded_Picture_Height / 2];
+	v422 = DNew unsigned char[Coded_Picture_Width * Coded_Picture_Height / 2];
+	u444 = DNew unsigned char[Coded_Picture_Width * Coded_Picture_Height];
+	v444 = DNew unsigned char[Coded_Picture_Width * Coded_Picture_Height];
+	dstFrame = DNew unsigned char[Clip_Width * Clip_Height * 4];  // max value (super set)
+
+	if(1 != sscanf(myfgets(buffer, sizeof(buffer), out->VF_File), "Field_Operation=%d\n", &FO_Flag))
+		return 0;
+	if(1 != sscanf(myfgets(buffer, sizeof(buffer), out->VF_File), "Frame_Rate=%d\n", &(out->VF_FrameRate)))
+		return 0;
+	if(4 != sscanf(myfgets(buffer, sizeof(buffer), out->VF_File), "Location=%d,%X,%d,%X\n", &i, &j, &i, &j))
+		return 0;
+
+	ntsc = film = top = bottom = gop = mapping = repeat_on = repeat_off = repeat_init = 0;
+
+	while (1 == fscanf(out->VF_File, "%d", &type) && type<9)
+	{
+		if (type==7)	// I frame
+		{
+			GOPList[gop] = reinterpret_cast<GOPLIST*>(calloc(1, sizeof(GOPLIST)));
+			GOPList[gop]->number = film;
+			if(2 != fscanf(out->VF_File, "%d %X", &(GOPList[gop]->file), &j))
+				break;
+
+			GOPList[gop]->position = (__int64)j*BUFFER_SIZE;
+			gop ++;
+
+			if(1 != fscanf(out->VF_File, "%d", &j))
+				break;
+
+			tff = j>>1;
+			rff = j & 1;
+		}
+		else	// P, B frame
+		{
+			tff = type>>1;
+			rff = type & 1;
+		}
+
+		if (!film)
+		{
+			if (tff)
+				Field_Order = 1;
+			else
+				Field_Order = 0;
+		}
+
+		if (FO_Flag==FO_FILM)
+		{
+			if (rff)
+				repeat_on++;
+			else
+				repeat_off++;
+
+			if (repeat_init)
+			{
+				if (repeat_off-repeat_on == 5)
+				{
+					repeat_on = repeat_off = 0;
+				}
+				else
+				{
+					FrameList[mapping] = reinterpret_cast<FRAMELIST*>(calloc(1, sizeof(FRAMELIST)));
+					FrameList[mapping]->top = FrameList[mapping]->bottom = film;
+					mapping ++;
+				}
+
+				if (repeat_on-repeat_off == 5)
+				{
+					repeat_on = repeat_off = 0;
+					FrameList[mapping] = reinterpret_cast<FRAMELIST*>(calloc(1, sizeof(FRAMELIST)));
+					FrameList[mapping]->top = FrameList[mapping]->bottom = film;
+					mapping ++;
+				}
+			}
+			else
+			{
+				if (repeat_off-repeat_on == 3)
+				{
+					repeat_on = repeat_off = 0;
+					repeat_init = 1;
+				}
+				else
+				{
+					FrameList[mapping] = reinterpret_cast<FRAMELIST*>(calloc(1, sizeof(FRAMELIST)));
+					FrameList[mapping]->top = FrameList[mapping]->bottom = film;
+					mapping ++;
+				}
+
+				if (repeat_on-repeat_off == 3)
+				{
+					repeat_on = repeat_off = 0;
+					repeat_init = 1;
+
+					FrameList[mapping] = reinterpret_cast<FRAMELIST*>(calloc(1, sizeof(FRAMELIST)));
+					FrameList[mapping]->top = FrameList[mapping]->bottom = film;
+					mapping ++;
+				}
+			}
+		}
+		else
+		{
+			if (top)
+			{
+				FrameList[ntsc]->bottom = film;
+				ntsc ++;
+				FrameList[ntsc] = reinterpret_cast<FRAMELIST*>(calloc(1, sizeof(FRAMELIST)));
+				FrameList[ntsc]->top = film;
+			}
+			else if (bottom)
+			{
+				FrameList[ntsc]->top = film;
+				ntsc ++;
+				FrameList[ntsc] = reinterpret_cast<FRAMELIST*>(calloc(1, sizeof(FRAMELIST)));
+				FrameList[ntsc]->bottom = film;
+			}
+			else
+			{
+				FrameList[ntsc] = reinterpret_cast<FRAMELIST*>(calloc(1, sizeof(FRAMELIST)));
+				FrameList[ntsc]->top = film;
+				FrameList[ntsc]->bottom = film;
+				ntsc ++;
+			}
+
+			if (rff)
+			{
+				if (!top && !bottom)
+					FrameList[ntsc] = reinterpret_cast<FRAMELIST*>(calloc(1, sizeof(FRAMELIST)));
+
+				if (tff)
+				{
+					FrameList[ntsc]->top = film;
+					top = 1;
+				}
+				else
+				{
+					FrameList[ntsc]->bottom = film;
+					bottom = 1;
+				}
+
+				if (top && bottom)
+				{
+					top = bottom = 0;
+					ntsc ++;
+				}
+			}
+		}
+
+		film ++;
+	}
+
+	out->VF_FrameBound = film;
+	film -= 2;
+
+	if (FO_Flag==FO_FILM)
+	{
+		while (FrameList[mapping-1]->top >= film)
+			mapping --;
+
+		out->VF_FrameLimit = mapping;
+	}
+	else
+	{
+		if (FO_Flag==FO_SWAP)
+		{
+			Field_Order = !Field_Order;
+
+			if (Field_Order)
+				for (i=0; i<ntsc-1; i++)
+					FrameList[i]->bottom = FrameList[i+1]->bottom;
+			else
+				for (i=0; i<ntsc-1; i++)
+					FrameList[i]->top = FrameList[i+1]->top;
+		}
+
+		while ((FrameList[ntsc-1]->top >= film) || (FrameList[ntsc-1]->bottom >= film))
+			ntsc --;
+
+		out->VF_FrameLimit = ntsc;
+
+		for (i=0; i<out->VF_FrameLimit-1; i++)
+			if (FrameList[i]->top==FrameList[i+1]->top || FrameList[i]->top==FrameList[i+1]->bottom ||
+				FrameList[i]->bottom==FrameList[i+1]->top || FrameList[i]->bottom==FrameList[i+1]->bottom)
+			{
+				FrameList[i]->forward = 1;
+				FrameList[i+1]->backward = 1;
+			}
+	}
+
+	Full_Frame = 1;
+	for (i=0; i<out->VF_FrameLimit; i++)
+		if (FrameList[i]->top!=FrameList[i]->bottom)
+		{
+			Full_Frame = 0;
+			break;
+		}
+
+	out->VF_GOPNow = out->VF_GOPLimit = gop;
+	out->VF_OldFrame = out->VF_FrameLimit;
+	out->VF_FrameSize = Clip_Width * Clip_Height * 3;
+
+	return 1;
+}
+
+void CMPEG2Dec::Decode(unsigned char *dst, DWORD frame, int pitch)
+{
+	DWORD i, now, size, origin, ref, fo;
+	int remain;
+
+	CMPEG2Dec* in = this;
+
+	if (FO_Flag==FO_FILM)
+	{
+		fo = 0;
+		frame = FrameList[frame]->top;
+	}
+
+	origin = frame;
+
+	if (FO_Flag!=FO_FILM)
+	{
+		if (FrameList[frame]->top == FrameList[frame]->bottom)
+		{
+			fo = 0;
+			frame = FrameList[frame]->top;
+		}
+		else if (FrameList[frame]->top < FrameList[frame]->bottom)
+		{
+			fo = 1;
+			frame = FrameList[frame]->top;
+		}
+		else
+		{
+			fo = 2;
+			frame = FrameList[frame]->bottom;
+		}
+	}
+
+	ref = frame;
+
+	if (frame >= GOPList[in->VF_GOPLimit-1]->number)
+	{
+		now = in->VF_GOPLimit-1;
+		ref -= GOPList[in->VF_GOPLimit-1]->number;
+		size = in->VF_FrameBound - GOPList[in->VF_GOPLimit-1]->number + 1;
+	}
+	else
+		for (now = 0; now < (in->VF_GOPLimit-1); now++)
+		{
+			if (frame>=GOPList[now]->number && frame<GOPList[now+1]->number)
+			{
+				ref -= GOPList[now]->number;
+				size = GOPList[now+1]->number - GOPList[now]->number + 1;
+				break;
+			}
+		}
+
+	if (fo)
+		ref ++;
+
+	if (now != in->VF_GOPNow)
+	{
+		if ((in->VF_OldFrame + 1)==origin)
+		{
+			if (Full_Frame)
+			{
+				Get_Hdr();
+				Decode_Picture(1, dst, pitch);
+
+				if (picture_structure!=FRAME_PICTURE)
+				{
+					Get_Hdr();
+					Decode_Picture(1, dst, pitch);
+				}
+			}
+			else
+				switch (fo)
+				{
+					case 0:
+						if (!FrameList[origin]->backward)
+						{
+							Get_Hdr();
+							Decode_Picture(1, dst, pitch);
+
+							if (picture_structure!=FRAME_PICTURE)
+							{
+								Get_Hdr();
+								Decode_Picture(1, dst, pitch);
+							}
+
+							if (FrameList[origin]->forward)
+							{
+								if (Field_Order)
+									Copyodd(dst, dstFrame, pitch, 1);
+								else
+									Copyeven(dst, dstFrame, pitch, 1);
+							}
+						}
+						else
+						{
+							Copyodd(dstFrame, dst, pitch, 0);
+							Copyeven(dstFrame, dst, pitch, 0);
+						}
+						break;
+
+					case 1:
+						Copyodd(dstFrame, dst, pitch, 0);
+
+						Get_Hdr();
+						Decode_Picture(1, dstFrame, DSTBYTES);
+
+						if (picture_structure!=FRAME_PICTURE)
+						{
+							Get_Hdr();
+							Decode_Picture(1, dstFrame, DSTBYTES);
+						}
+
+						Copyeven(dstFrame, dst, pitch, 0);
+						break;
+
+					case 2:	
+						Copyeven(dstFrame, dst, pitch, 0);
+
+						Get_Hdr();
+						Decode_Picture(1, dstFrame, DSTBYTES);
+
+						if (picture_structure!=FRAME_PICTURE)
+						{
+							Get_Hdr();
+							Decode_Picture(1, dstFrame, DSTBYTES);
+						}
+
+						Copyodd(dstFrame, dst, pitch, 0);
+						break;
+				}
+
+			if (in->VF_GOPSize)
+			{
+				for (i=0; i < in->VF_GOPSize; i++)
+					free(GOPBuffer[i]);
+
+				in->VF_GOPSize = 0;
+			}
+
+			in->VF_GOPNow = in->VF_GOPLimit;
+			in->VF_OldFrame = origin;
+			return;
+		}
+
+		remain = ref;
+		in->VF_OldRef = ref;
+		in->VF_GOPNow = now;
+		Second_Field = 0;
+
+		if (size < in->VF_GOPSize)
+			for (i=0; i < (in->VF_GOPSize - size); i++)
+				free(GOPBuffer[size+i]);
+		else if (size > in->VF_GOPSize)
+			for (i=0; i < (size - in->VF_GOPSize); i++)
+				GOPBuffer[in->VF_GOPSize+i] = reinterpret_cast<unsigned char*>(malloc(in->VF_FrameSize));
+
+		in->VF_GOPSize = size;
+
+		File_Flag = GOPList[now]->file;
+		_lseeki64(Infile[GOPList[now]->file], GOPList[now]->position, SEEK_SET);
+		Initialize_Buffer();
+
+		while (Get_Hdr() && picture_coding_type!=I_TYPE);
+
+		Decode_Picture(0, dst, pitch);
+
+		while (Get_Hdr() && picture_coding_type==B_TYPE);
+
+		if (picture_structure!=FRAME_PICTURE)
+		{
+			Decode_Picture(0, dst, pitch);
+			Get_Hdr();
+		}
+
+		Decode_Picture(1, dst, pitch);
+
+		if (picture_structure!=FRAME_PICTURE)
+		{
+			Get_Hdr();
+			Decode_Picture(1, dst, pitch);
+		}
+
+		Copyodd(dst, GOPBuffer[0], pitch, 1);
+		Copyeven(dst, GOPBuffer[0], pitch, 1);
+
+		while (remain && Get_Hdr())
+		{
+			Decode_Picture(1, dst, pitch);
+
+			if (picture_structure!=FRAME_PICTURE)
+			{
+				Get_Hdr();
+				Decode_Picture(1, dst, pitch);
+			}
+
+			Copyodd(dst, GOPBuffer[ref - remain + 1], pitch, 1);
+			Copyeven(dst, GOPBuffer[ref - remain + 1], pitch, 1);
+
+			remain--;
+		}
+
+		if (!Full_Frame && ref>=(size-2))
+		{
+			Copyodd(dst, dstFrame, pitch, 1);
+			Copyeven(dst, dstFrame, pitch, 1);
+		}
+	}
+	else
+	{
+		remain = ref - in->VF_OldRef;
+
+		if (remain > 0)
+		{
+			in->VF_OldRef = ref;
+
+			while (remain && Get_Hdr())
+			{
+				Decode_Picture(1, dst, pitch);
+
+				if (picture_structure!=FRAME_PICTURE)
+				{
+					Get_Hdr();
+					Decode_Picture(1, dst, pitch);
+				}
+
+				Copyodd(dst, GOPBuffer[ref - remain + 1], pitch, 1);
+				Copyeven(dst, GOPBuffer[ref - remain + 1], pitch, 1);
+
+				remain--;
+			}
+
+			if (!Full_Frame && ref>=(size-2))
+			{
+				Copyodd(dst, dstFrame, pitch, 1);
+				Copyeven(dst, dstFrame, pitch, 1);
+			}
+		}
+	}
+
+	switch (fo)
+	{
+		case 0:
+			Copyodd(GOPBuffer[ref], dst, pitch, 0);
+			Copyeven(GOPBuffer[ref], dst, pitch, 0);
+			break;
+
+		case 1:
+			Copyodd(GOPBuffer[ref-1], dst, pitch, 0);
+			Copyeven(GOPBuffer[ref], dst, pitch, 0);
+			break;
+
+		case 2:
+			Copyodd(GOPBuffer[ref], dst, pitch, 0);
+			Copyeven(GOPBuffer[ref-1], dst, pitch, 0);
+			break;
+	}
+
+	in->VF_OldFrame = origin;
+}
+
+void CMPEG2Dec::Close()
+{
+	int i;
+
+	for(i = 0; i < VF_GOPLimit; i++) free(GOPList[i]);
+	for(i = 0; i < VF_FrameLimit; i++) free(FrameList[i]);
+
+	if (VF_File != NULL)
+		fclose(VF_File);
+
+	while (VF_GOPSize)
+	{
+		VF_GOPSize--;
+		free(GOPBuffer[VF_GOPSize]);
+	}
+
+	while (File_Limit)
+	{
+		File_Limit--;
+		_close(Infile[File_Limit]);
+	}
+
+	for (i=0; i<MAX_FILE_NUMBER; i++)
+		delete [] Infilename[i];
+
+	for (i=0; i<3; i++)
+	{
+		delete [] backward_reference_frame[i];
+		delete [] forward_reference_frame[i];
+		delete [] auxframe[i];
+	}
+
+	delete [] u422;
+	delete [] v422;
+	delete [] u444;
+	delete [] v444;
+	delete [] dstFrame;
+
+	if(Luminance_Flag)
+		delete [] lum;
+
+	for (i=0; i<8; i++)
+		delete [] p_block[i];
+
+	delete [] p_fTempArray;
+
+	if (hLibrary)
+		FreeLibrary(hLibrary);
+}
+
+void CMPEG2Dec::Copyodd(unsigned char *src, unsigned char *dst, int pitch, int forward)
+{
+	int i;
+	int PWIDTH = forward ? (pitch<<1) : DSTBYTES2;
+	int QWIDTH = forward ? DSTBYTES2 : (pitch<<1);
+
+	for (i=0; i<(Clip_Height>>1); i++)
+	{
+		memcpy (dst, src, DSTBYTES);
+		src += PWIDTH;
+		dst += QWIDTH;
+	}
+}
+
+void CMPEG2Dec::Copyeven(unsigned char *src, unsigned char *dst, int pitch, int forward)
+{
+	int i;
+	int PWIDTH = forward ? (pitch<<1) : DSTBYTES2;
+	int QWIDTH = forward ? DSTBYTES2 : (pitch<<1);
+	src += forward ? pitch : DSTBYTES;
+	dst += forward ? DSTBYTES : pitch;
+
+	for (i=0; i<(Clip_Height>>1); i++)
+	{
+		memcpy (dst, src, DSTBYTES);
+		src += PWIDTH;
+		dst += QWIDTH;
+	}
+}
diff --git a/src/filters/source/D2VSource/MPEG2Dec.h b/src/filters/source/D2VSource/MPEG2Dec.h
new file mode 100644
index 000000000..34368208b
--- /dev/null
+++ b/src/filters/source/D2VSource/MPEG2Dec.h
@@ -0,0 +1,304 @@
+#pragma once
+
+#include <windows.h>
+#include <winreg.h>
+#include <stdio.h>
+#include <io.h>
+#include <fcntl.h>
+
+/* code definition */
+#define PICTURE_START_CODE			0x100
+#define SLICE_START_CODE_MIN		0x101
+#define SLICE_START_CODE_MAX		0x1AF
+#define USER_DATA_START_CODE		0x1B2
+#define SEQUENCE_HEADER_CODE		0x1B3
+#define EXTENSION_START_CODE		0x1B5
+#define SEQUENCE_END_CODE			0x1B7
+#define GROUP_START_CODE			0x1B8
+
+#define SYSTEM_END_CODE				0x1B9
+#define PACK_START_CODE				0x1BA
+#define SYSTEM_START_CODE			0x1BB
+#define PRIVATE_STREAM_1			0x1BD
+#define VIDEO_ELEMENTARY_STREAM		0x1E0
+
+/* extension start code IDs */
+#define SEQUENCE_EXTENSION_ID					1
+#define SEQUENCE_DISPLAY_EXTENSION_ID			2
+#define QUANT_MATRIX_EXTENSION_ID				3
+#define COPYRIGHT_EXTENSION_ID					4
+#define PICTURE_DISPLAY_EXTENSION_ID			7
+#define PICTURE_CODING_EXTENSION_ID				8
+
+#define ZIG_ZAG									0
+#define MB_WEIGHT								32
+#define MB_CLASS4								64
+
+#define I_TYPE			1
+#define P_TYPE			2
+#define B_TYPE			3
+
+#define TOP_FIELD		1
+#define BOTTOM_FIELD	2
+#define FRAME_PICTURE	3
+
+#define MACROBLOCK_INTRA				1
+#define MACROBLOCK_PATTERN				2
+#define MACROBLOCK_MOTION_BACKWARD		4
+#define MACROBLOCK_MOTION_FORWARD		8
+#define MACROBLOCK_QUANT				16
+
+#define MC_FIELD		1
+#define MC_FRAME		2
+#define MC_16X8			2
+#define MC_DMV			3
+
+#define MV_FIELD		0
+#define MV_FRAME		1
+
+#define CHROMA420		1
+#define CHROMA422		2
+#define CHROMA444		3
+
+#define BUFFER_SIZE			2048
+#define MAX_FILE_NUMBER		256
+
+#define IDCT_MMX		1
+#define IDCT_SSEMMX		2
+#define	IDCT_FPU		3
+#define IDCT_REF		4
+
+#define FO_NONE			0
+#define FO_FILM			1
+#define FO_SWAP			2
+
+
+typedef void (WINAPI *PBufferOp) (unsigned char*, int, int);
+
+#define MAX_FRAME_NUMBER	1000000
+#define MAX_GOP_SIZE		1024
+
+
+class CMPEG2Dec  
+{
+protected:
+
+  // getbit.cpp
+  void Initialize_Buffer();
+  void Fill_Buffer();
+  void Next_Packet();
+  void Flush_Buffer_All(unsigned int N);
+  unsigned int Get_Bits_All(unsigned int N);
+  void Next_File();
+
+  unsigned int Show_Bits(unsigned int N);
+  unsigned int Get_Bits(unsigned int N);
+  void Flush_Buffer(unsigned int N);
+  void Fill_Next();
+  unsigned int Get_Byte();
+  unsigned int Get_Short();
+  void next_start_code();
+
+  unsigned char Rdbfr[BUFFER_SIZE], *Rdptr, *Rdmax;
+  unsigned int CurrentBfr, NextBfr, BitsLeft, Val, Read;
+
+  // gethdr.cpp
+  int Get_Hdr();
+  void sequence_header();
+  int slice_header();
+private:
+  void group_of_pictures_header();
+  void picture_header();
+  void sequence_extension();
+  void sequence_display_extension();
+  void quant_matrix_extension();
+  void picture_display_extension();
+  void picture_coding_extension();
+  void copyright_extension();
+  int  extra_bit_information();
+  void extension_and_user_data();
+
+protected:
+  // getpic.cpp
+  void Decode_Picture(int ref, unsigned char *dst, int pitch);
+private:
+  void Update_Picture_Buffers();
+  void picture_data();
+  int slice(int MBAmax);
+  void macroblock_modes(int *pmacroblock_type, int *pmotion_type, 
+  	int *pmotion_vector_count, int *pmv_format, int *pdmv, int *pmvscale, int *pdct_type);
+  void Clear_Block(int count);
+  void Add_Block(int count, int bx, int by, int dct_type, int addflag);
+  void motion_compensation(int MBA, int macroblock_type, int motion_type,
+	  int PMV[2][2][2], int motion_vertical_field_select[2][2], int dmvector[2], int dct_type);
+  void skipped_macroblock(int dc_dct_pred[3], int PMV[2][2][2], 
+  	int *motion_type, int motion_vertical_field_select[2][2], int *macroblock_type);
+  int start_of_slice(int *MBA, int *MBAinc, int dc_dct_pred[3], int PMV[2][2][2]);
+  int decode_macroblock(int *macroblock_type, int *motion_type, int *dct_type,
+	  int PMV[2][2][2], int dc_dct_pred[3], int motion_vertical_field_select[2][2], int dmvector[2]);
+  void Decode_MPEG2_Intra_Block(int comp, int dc_dct_pred[]);
+  void Decode_MPEG2_Non_Intra_Block(int comp);
+
+  int Get_macroblock_type();
+  int Get_I_macroblock_type();
+  int Get_P_macroblock_type();
+  int Get_B_macroblock_type();
+  int Get_D_macroblock_type();
+  int Get_coded_block_pattern();
+  int Get_macroblock_address_increment();
+  int Get_Luma_DC_dct_diff();
+  int Get_Chroma_DC_dct_diff();
+
+  void form_predictions(int bx, int by, int macroblock_type, int motion_type, 
+	  int PMV[2][2][2], int motion_vertical_field_select[2][2], int dmvector[2]);
+  void form_prediction(unsigned char *src[], int sfield, unsigned char *dst[], int dfield, 
+	  int lx, int lx2, int w, int h, int x, int y, int dx, int dy, int average_flag);
+  void form_component_prediction(unsigned char *src, unsigned char *dst,
+	  int lx, int lx2, int w, int h, int x, int y, int dx, int dy, int average_flag);
+
+  // motion.cpp
+  void motion_vectors(int PMV[2][2][2], int dmvector[2], int motion_vertical_field_select[2][2], 
+	  int s, int motion_vector_count, int mv_format, 
+	  int h_r_size, int v_r_size, int dmv, int mvscale);
+  void Dual_Prime_Arithmetic(int DMV[][2], int *dmvector, int mvx, int mvy);
+private:
+  void motion_vector(int *PMV, int *dmvector, int h_r_size, int v_r_size, 
+  	int dmv, int mvscale, int full_pel_vector);
+  void decode_motion_vector(int *pred, int r_size, int motion_code,
+  	int motion_residualesidual, int full_pel_vector);
+  int Get_motion_code();
+  int Get_dmvector();
+
+protected:
+  // store.cpp
+  void assembleFrame(unsigned char *src[], int pf, unsigned char *dst, int pitch);
+private:
+  void Luminance_Filter(unsigned char *src, unsigned char *dst);
+  void conv420to422(unsigned char *src, unsigned char *dst, int frame_type);
+  void conv422to444(unsigned char *src, unsigned char *dst);
+  void conv444toRGB24(unsigned char *py, unsigned char *pu, unsigned char *pv, unsigned char *dst, int pitch);
+  void conv422toYUY2(unsigned char *py, unsigned char *pu, unsigned char *pv, unsigned char *dst, int pitch);
+
+protected:
+  // decoder operation control flags
+  int Fault_Flag;
+  int File_Flag;
+  int File_Limit;
+  int FO_Flag;
+  int IDCT_Flag;
+  int SystemStream_Flag;
+
+  int Luminance_Flag;
+  int Resize_Flag;
+
+  int KeyOp_Flag;
+  int lfsr0, lfsr1;
+  PBufferOp BufferOp;
+
+  int Infile[MAX_FILE_NUMBER];
+  char *Infilename[MAX_FILE_NUMBER];
+
+  int intra_quantizer_matrix[64];
+  int non_intra_quantizer_matrix[64];
+  int chroma_intra_quantizer_matrix[64];
+  int chroma_non_intra_quantizer_matrix[64];
+  
+  int load_intra_quantizer_matrix;
+  int load_non_intra_quantizer_matrix;
+  int load_chroma_intra_quantizer_matrix;
+  int load_chroma_non_intra_quantizer_matrix;
+
+  int q_scale_type;
+  int alternate_scan;
+  int quantizer_scale;
+
+  void *fTempArray, *p_fTempArray;
+  short *block[8], *p_block[8];
+  int pf_backward, pf_forward, pf_current;
+
+  // global values
+  unsigned char *backward_reference_frame[3], *forward_reference_frame[3];
+  unsigned char *auxframe[3], *current_frame[3];
+  unsigned char *u422, *v422, *u444, *v444, /* *rgb24,*/ *lum;
+  unsigned char *dstFrame;	// replaces rgb24
+  __int64 RGB_Scale, RGB_Offset, RGB_CRV, RGB_CBU, RGB_CGX, LumOffsetMask, LumGainMask;
+
+  int HALF_WIDTH, PROGRESSIVE_HEIGHT, INTERLACED_HEIGHT, DOUBLE_WIDTH;
+  int /*TWIDTH, SWIDTH,*/ HALF_WIDTH_D8, LUM_AREA, CLIP_AREA, HALF_CLIP_AREA, CLIP_STEP;
+  int DSTBYTES, DSTBYTES2;	// these replace TWIDTH and SWIDTH
+public:
+  int Clip_Width, Clip_Height, Resize_Width, Resize_Height;
+protected:
+
+  int Coded_Picture_Width, Coded_Picture_Height, Chroma_Width, Chroma_Height;
+  int block_count, Second_Field;
+  int horizontal_size, vertical_size, mb_width, mb_height;
+
+  /* ISO/IEC 13818-2 section 6.2.2.3:  sequence_extension() */
+  int progressive_sequence;
+  int chroma_format;
+
+  /* ISO/IEC 13818-2 section 6.2.3: picture_header() */
+  int picture_coding_type;
+  int temporal_reference;
+
+  /* ISO/IEC 13818-2 section 6.2.3.1: picture_coding_extension() header */
+  int f_code[2][2];
+  int picture_structure;
+  int frame_pred_frame_dct;
+  int progressive_frame;
+  int concealment_motion_vectors;
+  int intra_dc_precision;
+  int top_field_first;
+  int repeat_first_field;
+  int intra_vlc_format;
+
+  // interface
+  typedef struct {
+	DWORD		number;
+	int			file;
+	__int64		position;
+  }	GOPLIST;
+  GOPLIST *GOPList[MAX_FRAME_NUMBER];
+
+  typedef struct {
+	DWORD			top;
+	DWORD			bottom;
+	char			forward;
+	char			backward;
+  }	FRAMELIST;
+  FRAMELIST *FrameList[MAX_FRAME_NUMBER];
+
+  unsigned char *GOPBuffer[MAX_GOP_SIZE];
+public:
+  BOOL Field_Order, Full_Frame;
+protected:
+  HINSTANCE hLibrary;
+
+  void Copyodd(unsigned char *src, unsigned char *dst, int pitch, int forward);
+  void Copyeven(unsigned char *src, unsigned char *dst, int pitch, int forward);
+public:
+  FILE		*VF_File;
+  int		VF_FrameRate;
+  DWORD		VF_FrameLimit;
+  DWORD		VF_FrameBound;
+  DWORD		VF_GOPLimit;
+  DWORD		VF_GOPNow;
+  DWORD		VF_GOPSize;
+  int		VF_FrameSize;
+  DWORD		VF_OldFrame;
+  DWORD		VF_OldRef;
+
+  enum DstFormat {
+	RGB24, YUY2
+  };
+  DstFormat m_dstFormat;
+
+  CMPEG2Dec();
+  ~CMPEG2Dec() {Close();}
+  int Open(LPCTSTR path, DstFormat);
+  void Close();
+  void Decode(unsigned char *dst, DWORD frame, int pitch);
+  bool dstRGB24() const { return m_dstFormat == RGB24; }
+  bool dstYUY2() const { return m_dstFormat == YUY2; }
+};
diff --git a/src/filters/source/D2VSource/d2vsource.rc b/src/filters/source/D2VSource/d2vsource.rc
new file mode 100644
index 000000000..6df83bdd8
--- /dev/null
+++ b/src/filters/source/D2VSource/d2vsource.rc
@@ -0,0 +1,117 @@
+// Microsoft Visual C++ generated resource script.
+//
+#include "resource.h"
+#include "..\..\..\..\include\Version.h"
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#include "afxres.h"
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// Hungarian resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_HUN)
+#ifdef _WIN32
+LANGUAGE LANG_HUNGARIAN, SUBLANG_DEFAULT
+#pragma code_page(1250)
+#endif //_WIN32
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION VERSION_MAJOR,VERSION_MINOR,VERSION_REV,VERSION_PATCH
+ PRODUCTVERSION VERSION_MAJOR,VERSION_MINOR,VERSION_REV,VERSION_PATCH
+ FILEFLAGSMASK 0x17L
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x4L
+ FILETYPE 0x2L
+ FILESUBTYPE 0x0L
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040e04b0"
+        BEGIN
+            VALUE "Comments", "http://sourceforge.net/projects/mpc-hc/"
+            VALUE "CompanyName", "MPC-HC Team"
+            VALUE "FileDescription", "D2V Source Filter"
+            VALUE "FileVersion", "1, 1, 0, 0"
+            VALUE "InternalName", "D2V Source Filter"
+            VALUE "LegalCopyright", "Copyright (C) 2002-2010 see AUTHORS file"
+            VALUE "OriginalFilename", "D2VSource.ax"
+            VALUE "ProductName", "D2V Source Filter"
+            VALUE "ProductVersion", "1, 1, 0, 0"
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x40e, 1200
+    END
+END
+
+#endif    // Hungarian resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+/////////////////////////////////////////////////////////////////////////////
+// English (U.S.) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+#ifdef _WIN32
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+#pragma code_page(1252)
+#endif //_WIN32
+
+#ifdef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// TEXTINCLUDE
+//
+
+1 TEXTINCLUDE 
+BEGIN
+    "resource.h\0"
+END
+
+2 TEXTINCLUDE 
+BEGIN
+    "#include ""afxres.h""\r\n"
+    "\0"
+END
+
+3 TEXTINCLUDE 
+BEGIN
+    "\r\n"
+    "\0"
+END
+
+#endif    // APSTUDIO_INVOKED
+
+#endif    // English (U.S.) resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 3 resource.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////
+#endif    // not APSTUDIO_INVOKED
+
+
diff --git a/src/filters/source/D2VSource/d2vsource.vcproj b/src/filters/source/D2VSource/d2vsource.vcproj
new file mode 100644
index 000000000..0fcded0c4
--- /dev/null
+++ b/src/filters/source/D2VSource/d2vsource.vcproj
@@ -0,0 +1,962 @@
+<?xml version="1.0" encoding="windows-1250"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="9,00"
+	Name="d2vsource"
+	ProjectGUID="{83CC6B88-A112-4192-BD5A-F2A249AF2277}"
+	RootNamespace="d2vsource"
+	Keyword="Win32Proj"
+	TargetFrameworkVersion="131072"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+		<Platform
+			Name="x64"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug Unicode|Win32"
+			ConfigurationType="2"
+			InheritedPropertySheets="..\..\..\common.vsprops;..\..\..\debug.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalIncludeDirectories="..\..\..\..\include;..\..\BaseClasses"
+				PreprocessorDefinitions="REGISTER_FILTER;WIN32;_DEBUG;_USRDLL"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				RegisterOutput="true"
+				AdditionalDependencies="strmbaseDU.lib basesourceDU.lib Winmm.lib"
+				OutputFile="$(OutDir)\$(ProjectName).ax"
+				AdditionalLibraryDirectories="..\..\..\..\lib"
+				ModuleDefinitionFile="D2VSource.def"
+				SubSystem="2"
+				RandomizedBaseAddress="1"
+				DataExecutionPrevention="0"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug Unicode|x64"
+			OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="2"
+			InheritedPropertySheets="..\..\..\common.vsprops;..\..\..\debug.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalIncludeDirectories="..\..\..\..\include;..\..\BaseClasses"
+				PreprocessorDefinitions="REGISTER_FILTER;WIN32;_DEBUG;_USRDLL"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="strmbaseDU.lib basesourceDU.lib Winmm.lib"
+				OutputFile="$(OutDir)\$(ProjectName).ax"
+				AdditionalLibraryDirectories="..\..\..\..\lib64"
+				ModuleDefinitionFile="D2VSource.def"
+				SubSystem="2"
+				RandomizedBaseAddress="1"
+				DataExecutionPrevention="0"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release Unicode|Win32"
+			ConfigurationType="2"
+			InheritedPropertySheets="..\..\..\common.vsprops;..\..\..\release.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalIncludeDirectories="..\..\..\..\include;..\..\BaseClasses"
+				PreprocessorDefinitions="REGISTER_FILTER;WIN32;NDEBUG;_USRDLL"
+				BufferSecurityCheck="true"
+				EnableEnhancedInstructionSet="1"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				RegisterOutput="true"
+				AdditionalDependencies="strmbaseRU.lib basesourceRU.lib Winmm.lib"
+				OutputFile="..\..\..\..\bin\x86\$(ProjectName).ax"
+				AdditionalLibraryDirectories="..\..\..\..\lib"
+				ModuleDefinitionFile="D2VSource.def"
+				GenerateDebugInformation="true"
+				SubSystem="2"
+				LargeAddressAware="2"
+				RandomizedBaseAddress="2"
+				DataExecutionPrevention="2"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release Unicode|x64"
+			OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="2"
+			InheritedPropertySheets="..\..\..\common.vsprops;..\..\..\release.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalIncludeDirectories="..\..\..\..\include;..\..\BaseClasses"
+				PreprocessorDefinitions="REGISTER_FILTER;WIN32;NDEBUG;_USRDLL"
+				BufferSecurityCheck="true"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="strmbaseRU.lib basesourceRU.lib Winmm.lib"
+				OutputFile="..\..\..\..\bin\x64\$(ProjectName).ax"
+				AdditionalLibraryDirectories="..\..\..\..\lib64"
+				ModuleDefinitionFile="D2VSource.def"
+				GenerateDebugInformation="true"
+				SubSystem="2"
+				LargeAddressAware="2"
+				RandomizedBaseAddress="2"
+				DataExecutionPrevention="2"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug Unicode lib|Win32"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\..\..\common.vsprops;..\..\..\debug.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalOptions="/MP"
+				AdditionalIncludeDirectories="..\..\..\..\include;..\..\BaseClasses"
+				PreprocessorDefinitions="WIN32;_DEBUG"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="..\..\..\..\lib\$(ProjectName)DU.lib"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug Unicode lib|x64"
+			OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\..\..\common.vsprops;..\..\..\debug.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalOptions="/MP"
+				AdditionalIncludeDirectories="..\..\..\..\include;..\..\BaseClasses"
+				PreprocessorDefinitions="_WIN64;_DEBUG"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="..\..\..\..\lib64\$(ProjectName)DU.lib"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release Unicode lib|Win32"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\..\..\common.vsprops;..\..\..\release.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalOptions="/MP"
+				AdditionalIncludeDirectories="..\..\..\..\include;..\..\BaseClasses"
+				PreprocessorDefinitions="WIN32;NDEBUG"
+				BufferSecurityCheck="true"
+				EnableEnhancedInstructionSet="1"
+				DisableSpecificWarnings="4244;4799;4731;"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="..\..\..\..\lib\$(ProjectName)RU.lib"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release Unicode lib|x64"
+			OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\..\..\common.vsprops;..\..\..\release.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalOptions="/MP"
+				AdditionalIncludeDirectories="..\..\..\..\include;..\..\BaseClasses"
+				PreprocessorDefinitions="_WIN64;NDEBUG"
+				BufferSecurityCheck="true"
+				EnableEnhancedInstructionSet="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="..\..\..\..\lib64\$(ProjectName)RU.lib"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm"
+			>
+			<File
+				RelativePath="D2VSource.cpp"
+				>
+				<FileConfiguration
+					Name="Debug Unicode|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug Unicode lib|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode lib|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="D2VSource.def"
+				>
+			</File>
+			<File
+				RelativePath="idctfpu.cpp"
+				>
+				<FileConfiguration
+					Name="Debug Unicode|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug Unicode lib|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode lib|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="idctmmx.asm"
+				>
+				<FileConfiguration
+					Name="Debug Unicode|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
+						Outputs="$(OutDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug Unicode|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
+						Outputs="$(OutDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
+						Outputs="$(OutDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
+						Outputs="$(OutDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug Unicode lib|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
+						Outputs="$(OutDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug Unicode lib|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
+						Outputs="$(OutDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode lib|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
+						Outputs="$(OutDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode lib|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
+						Outputs="$(OutDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="idctref.cpp"
+				>
+				<FileConfiguration
+					Name="Debug Unicode|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug Unicode lib|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode lib|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="MPEG2Dec.cpp"
+				>
+				<FileConfiguration
+					Name="Debug Unicode|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug Unicode lib|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode lib|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="stdafx.cpp"
+				>
+				<FileConfiguration
+					Name="Debug Unicode|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug Unicode|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug Unicode lib|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug Unicode lib|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode lib|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode lib|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc"
+			>
+			<File
+				RelativePath="D2VSource.h"
+				>
+			</File>
+			<File
+				RelativePath="MPEG2Dec.h"
+				>
+			</File>
+			<File
+				RelativePath=".\resource.h"
+				>
+				<FileConfiguration
+					Name="Debug Unicode lib|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug Unicode lib|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode lib|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode lib|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="stdafx.h"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+			>
+			<File
+				RelativePath=".\d2vsource.rc"
+				>
+				<FileConfiguration
+					Name="Debug Unicode lib|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCResourceCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug Unicode lib|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCResourceCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode lib|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCResourceCompilerTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release Unicode lib|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCResourceCompilerTool"
+					/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+		<Global
+			Name="DevPartner_IsInstrumented"
+			Value="0"
+		/>
+	</Globals>
+</VisualStudioProject>
diff --git a/src/filters/source/D2VSource/idctfpu.cpp b/src/filters/source/D2VSource/idctfpu.cpp
new file mode 100644
index 000000000..92fcd3954
--- /dev/null
+++ b/src/filters/source/D2VSource/idctfpu.cpp
@@ -0,0 +1,456 @@
+#include "stdafx.h"
+
+/* idct.c, inverse fast discrete cosine transform                           */
+
+
+/*************************************************************/
+/* inverse two dimensional DCT, Chen-Wang algorithm          */
+/* (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984)                */
+/*                                                           */
+/* floating point conversion by Miha Peternel                */
+/* x87 hand-optimized assembly by Miha Peternel              */
+/*                                    27.11. - 11.12.2000    */
+/*                                                           */
+/* You are free to use this code in your project if:         */
+/* - no changes are made to this message                     */
+/* - any changes to this code are publicly available         */
+/* - your project documentation contains the following text: */
+/*   "This software contains fast high-quality IDCT decoder  */
+/*    by Miha Peternel."                                     */
+/*                                                           */
+/*************************************************************/
+
+
+/////////////////////////////////////////////////////
+//
+// TODO:
+// - loops can be easily vectorized for SIMD
+//
+/////////////////////////////////////////////////////
+
+#include <math.h>
+#  define PI 3.1415926535897932384626433832795
+
+#define FLOAT double
+
+const static double RC = 1.0*1024*1024*1024*1024*256*16 + 1024; // magic + clip center
+
+static FLOAT W1; // /* sqrt(2)*cos(1*pi/16) */
+static FLOAT W2; // /* sqrt(2)*cos(2*pi/16) */
+static FLOAT W5; // /* sqrt(2)*cos(5*pi/16) */
+
+static FLOAT W1_8;
+static FLOAT W2_8;
+static FLOAT W5_8;
+
+static FLOAT W7; // /* sqrt(2)*cos(7*pi/16) */
+static FLOAT W1mW7; // W1-W7
+static FLOAT W1pW7; // W1+W7
+
+static FLOAT W3; // /* sqrt(2)*cos(3*pi/16) */
+static FLOAT W3mW5; // W3-W5
+static FLOAT W3pW5; // W3+W5
+
+static FLOAT W6; // /* sqrt(2)*cos(6*pi/16) */
+static FLOAT W2mW6; // W2-W6
+static FLOAT W2pW6; // W2+W6
+
+static FLOAT S2; // 1/sqrt(2)
+static FLOAT D8 = 1.0/8;
+
+static FLOAT W7_8;
+static FLOAT W1mW7_8;
+static FLOAT W1pW7_8;
+
+static FLOAT W3_8;
+static FLOAT W3mW5_8;
+static FLOAT W3pW5_8;
+
+static FLOAT W6_8;
+static FLOAT W2mW6_8;
+static FLOAT W2pW6_8;
+
+/* global declarations */
+
+/* private data */
+static short iclip[1024+1024]; /* clipping table */
+static short *iclp;
+
+void Initialize_FPU_IDCT()
+{
+  int i;
+
+  S2 = sqrt(0.5); // 1.0/sqrt(2);
+
+  W1 = sqrt(2.0)*cos(PI*(1.0/16)); 
+	W1_8 = W1/8;
+  W2 = sqrt(2.0)*cos(PI*(2.0/16)); 
+	W2_8 = W2/8;
+  W3 = sqrt(2.0)*cos(PI*(3.0/16)); 
+	W3_8 = W3/8;
+  W5 = sqrt(2.0)*cos(PI*(5.0/16)); 
+	W5_8 = W5/8;
+  W6 = sqrt(2.0)*cos(PI*(6.0/16)); 
+	W6_8 = W6/8;
+  W7 = sqrt(2.0)*cos(PI*(7.0/16));
+	W7_8 = W7/8;
+
+  W1mW7 = W1-W7;  W1mW7_8 = W1mW7/8;
+  W1pW7 = W1+W7;  W1pW7_8 = W1pW7/8;
+  W3mW5 = W3-W5;  W3mW5_8 = W3mW5/8;
+  W3pW5 = W3+W5;  W3pW5_8 = W3pW5/8;
+  W2mW6 = W2-W6;  W2mW6_8 = W2mW6/8;
+  W2pW6 = W2+W6;  W2pW6_8 = W2pW6/8;
+
+  iclp = iclip+1024;
+  for (i= -1024; i<1024; i++)
+    iclp[i] = (i<-256) ? -256 : ((i>255) ? 255 : i);
+}
+
+void FPU_IDCT(short *block)
+{
+	int *b = (int *) block;
+  if( b[0]==0 && (b[31]==0x10000 || b[31]==0) )
+	{
+	  if( b[ 1]|b[ 2]|b[ 3]|b[ 4]|b[ 5] )
+		  goto normal;
+	  if( b[ 6]|b[ 7]|b[ 8]|b[ 9]|b[10] )
+		  goto normal;
+	  if( b[11]|b[12]|b[13]|b[14]|b[15] )
+		  goto normal;
+	  if( b[16]|b[17]|b[18]|b[19]|b[20] )
+		  goto normal;
+	  if( b[21]|b[22]|b[23]|b[24]|b[25] )
+		  goto normal;
+	  if( b[26]|b[27]|b[28]|b[29]|b[30] )
+		  goto normal;
+		b[31]=0;
+		////empty++;
+		return;
+	}
+normal:
+
+#define tmp  ebx
+#define tmp1 ebx-1*8
+#define tmp2 ebx-2*8
+#define tmp3 ebx-3*8
+#define int0 ebx-3*8-1*4
+#define int1 ebx-3*8-2*4
+#define int2 ebx-3*8-3*4
+#define int3 ebx-3*8-4*4
+#define int4 ebx-3*8-5*4
+#define int5 ebx-3*8-6*4
+#define int6 ebx-3*8-7*4
+#define int7 ebx-3*8-8*4
+#define SIZE 8*8*8+3*8+8*4+16 // locals + 16-byte alignment area
+	__asm
+	{
+	  lea ebx,[esp-8*8*8]
+		sub esp,SIZE
+		and ebx,-16 // force 16-byte alignment of locals
+
+// rows
+		mov esi,[block]
+		lea edi,[tmp]
+		mov ecx,8
+
+		align 16
+Lrows:
+    movsx eax,word ptr [esi+2]
+		or    eax,         [esi+4]
+		or    eax,         [esi+8]
+		or    eax,         [esi+12]
+		jnz L1
+
+		fild word ptr [esi+0*2]
+		fst  qword ptr [edi+7*8]
+		fst  qword ptr [edi+6*8]
+		fst  qword ptr [edi+5*8]
+		fst  qword ptr [edi+4*8]
+		fst  qword ptr [edi+3*8]
+		fst  qword ptr [edi+2*8]
+		fst  qword ptr [edi+1*8]
+		fstp qword ptr [edi+0*8]
+		jmp L2
+
+		align 16
+	L1:
+
+		fild word ptr [esi+7*2]
+		fld st(0)
+		fild word ptr [esi+1*2]
+		fadd st(1),st(0)
+		fld qword ptr [W7]
+		fxch st(1)
+		fmul qword ptr [W1mW7]
+		fxch st(1)
+		fmulp st(2),st(0)
+		fadd st(0),st(1)
+		fstp qword ptr [tmp1]
+		fild word ptr [esi+3*2]
+		fld st(0)
+		fxch st(3)
+		fmul qword ptr [W1pW7]
+		fild word ptr [esi+5*2]
+		fadd st(4),st(0)
+		fmul qword ptr [W3mW5]
+		fxch st(1)
+		fsubp st(3),st(0)//fsubrp
+		fld qword ptr [W3]
+		fmulp st(4),st(0)
+		fsubr st(0),st(3)
+		fstp qword ptr [tmp2]
+		fmul qword ptr [W3pW5]
+		fsubp st(2),st(0)//fsubrp
+		fxch st(1)
+		fstp qword ptr [tmp3]
+		fild word ptr [esi+0*2]
+		fild word ptr [esi+4*2]
+		fild word ptr [esi+2*2]
+		fld st(0)
+		fmul qword ptr [W2mW6]
+		fld st(3)
+		fild word ptr [esi+6*2]
+		fxch st(5)
+		fsub st(0),st(4)
+		fxch st(3)
+		fadd st(0),st(5)
+		fxch st(1)
+		faddp st(4),st(0)
+		fld qword ptr [W6]
+		fmulp st(1),st(0)
+		fxch st(4)
+		fmul qword ptr [W2pW6]
+		fld qword ptr [tmp1]
+		fsub qword ptr [tmp2]
+		fld st(5)
+		fxch st(3)
+		faddp st(6),st(0)
+		fld qword ptr [tmp1]
+		fxch st(1)
+		fstp qword ptr [tmp1]
+		fld st(6)
+		fadd qword ptr [tmp3]
+		fxch st(1)
+		fadd qword ptr [tmp2]
+		fxch st(7)
+		fsub qword ptr [tmp3]
+		fxch st(1)
+		fstp qword ptr [tmp2]
+		fld st(4)
+		fxch st(3)
+		fsubrp st(2),st(0)//fsubp
+		fxch st(4)
+		fsub st(0),st(5)
+		fxch st(2)
+		faddp st(5),st(0)
+		fld st(2)
+		fsub st(0),st(1)
+		fxch st(5)
+		fstp qword ptr [tmp3]
+		fld qword ptr [tmp1]
+		fld qword ptr [S2]
+		fxch st(4)
+		faddp st(2),st(0)
+		fld st(3)
+		fxch st(1)
+		fadd st(0),st(5)
+		fmulp st(1),st(0)
+
+		fld qword ptr [tmp3]
+		fadd st(0),st(7)
+		fxch st(5)
+		fsubr qword ptr [tmp1]
+		fxch st(5)
+		fstp qword ptr [edi+0*8]
+		fxch st(6)
+		fsubr qword ptr [tmp3]
+		fld st(2)
+		fxch st(1)
+		fstp qword ptr [edi+7*8]
+		fadd qword ptr [tmp2]
+		fxch st(3)
+		fmulp st(4),st(0)
+		fxch st(2)
+		fstp qword ptr [edi+3*8]
+		fld st(1)
+		fadd st(0),st(5)
+		fxch st(1)
+		fsub qword ptr [tmp2]
+		fxch st(2)
+		fsubrp st(5),st(0)//fsubp
+		fstp qword ptr [edi+1*8]
+		fld st(2)
+		fxch st(1)
+		fstp qword ptr [edi+4*8]
+		fxch st(2)
+		fsub st(0),st(1)
+		fxch st(2)
+		faddp st(1),st(0)
+		fxch st(2)
+		fstp qword ptr [edi+6*8]
+		fstp qword ptr [edi+5*8]
+		fstp qword ptr [edi+2*8]
+	L2:
+	  add esi,8*2
+		add edi,8*8
+		dec ecx
+		jnz Lrows
+
+// columns
+    lea esi,[tmp]
+		mov edi,[block]
+		lea edx,[iclip+1024*2]
+		mov ecx,8
+
+    align 16
+Lcols:
+		fld qword ptr [esi+7*8*8]
+		fld st(0)
+		fld qword ptr [esi+1*8*8]
+		fadd st(1),st(0)
+		fld qword ptr [W7_8]
+		fxch st(1)
+		fmul qword ptr [W1mW7_8]
+		fxch st(1)
+		fmulp st(2),st(0)
+		fadd st(0),st(1)
+		fstp qword ptr [tmp2]
+		fld qword ptr [esi+3*8*8]
+		fld st(0)
+		fxch st(3)
+		fmul qword ptr [W1pW7_8]
+		fld qword ptr [esi+5*8*8]
+		fadd st(4),st(0)
+		fmul qword ptr [W3mW5_8]
+		fxch st(1)
+		fsubp st(3),st(0)//fsubrp
+		fld qword ptr [W3_8]
+		fmulp st(4),st(0)
+		fsubr st(0),st(3)
+		fstp qword ptr [tmp3]
+		fld qword ptr [D8]
+		fld qword ptr [esi+0*8*8]
+		fmul st(0),st(1)
+		fxch st(2)
+		fmul qword ptr [W3pW5_8]
+		fld qword ptr [esi+4*8*8]
+		fmulp st(2),st(0)
+		fld qword ptr [esi+6*8*8]
+		fld st(3)
+		fxch st(6)
+		fsubrp st(2),st(0)//fsubp
+		fld qword ptr [esi+2*8*8]
+		fld st(0)
+		fxch st(5)
+		fsub st(0),st(4)
+		fxch st(7)
+		faddp st(4),st(0)
+		fxch st(4)
+		fadd st(0),st(1)
+		fld qword ptr [W6_8]
+		fxch st(2)
+		fmul qword ptr [W2pW6_8]
+		fxch st(2)
+		fmulp st(1),st(0)
+		fxch st(4)
+		fmul qword ptr [W2mW6_8]
+		fld qword ptr [tmp2]
+		fsub qword ptr [tmp3]
+		fxch st(2)
+		fsubr st(0),st(5)
+		fxch st(1)
+		faddp st(5),st(0)
+		fld qword ptr [tmp2]
+		fxch st(2)
+		fstp qword ptr [tmp2]
+		fld st(5)
+		fxch st(2)
+		fadd qword ptr [tmp3]
+		fxch st(6)
+		fsub st(0),st(3)
+		fxch st(2)
+		faddp st(3),st(0)
+		fld st(3)
+		fsub st(0),st(5)
+		fxch st(3)
+		fstp qword ptr [tmp3]
+		fxch st(3)
+		faddp st(4),st(0)
+		fld st(5)
+		fld qword ptr [tmp2]
+		fxch st(7)
+		fsub st(0),st(4)
+		fxch st(7)
+		fadd st(0),st(2)
+		fxch st(1)
+		faddp st(4),st(0)
+		fld qword ptr [S2]
+		fmul st(1),st(0)
+		fxch st(1)
+		fstp qword ptr [tmp1]
+		fld st(4)
+		fadd st(0),st(6)
+		fxch st(2)
+		fsubr qword ptr [tmp2]
+		fxch st(5)
+		fsubrp st(6),st(0)//fsubp
+		fxch st(1)
+		fistp dword ptr [int0]
+		fxch st(4)
+		mov eax,[int0]
+		movsx eax,word ptr [edx+2*eax]
+		mov [edi+0*8*2],ax
+		fistp dword ptr [int7]
+		mov eax,[int7]
+		fld st(0)
+		movsx eax,word ptr [edx+2*eax]
+		mov [edi+7*8*2],ax
+		fadd qword ptr [tmp3]
+		fistp dword ptr [int3]
+		mov eax,[int3]
+		movsx eax,word ptr [edx+2*eax]
+		mov [edi+3*8*2],ax
+		fsub qword ptr [tmp3]
+		fld st(1)
+		fxch st(1)
+		fistp dword ptr [int4]
+		mov eax,[int4]
+		movsx eax,word ptr [edx+2*eax]
+		mov [edi+4*8*2],ax
+		fadd qword ptr [tmp1]
+		fxch st(3)
+		fmulp st(2),st(0)
+		fxch st(2)
+		fistp dword ptr [int1]
+		fxch st(1)
+		mov eax,[int1]
+		movsx eax,word ptr [edx+2*eax]
+		mov [edi+1*8*2],ax
+		fsub qword ptr [tmp1]
+		fld st(2)
+		fsub st(0),st(2)
+		fxch st(1)
+		fistp dword ptr [int6]
+		fxch st(2)
+		mov eax,[int6]
+		faddp st(1),st(0)
+		movsx eax,word ptr [edx+2*eax]
+		mov [edi+6*8*2],ax
+		fistp dword ptr [int2]
+		mov eax,[int2]
+		movsx eax,word ptr [edx+2*eax]
+		mov [edi+2*8*2],ax
+		fistp dword ptr [int5]
+		mov eax,[int5]
+		movsx eax,word ptr [edx+2*eax]
+		mov [edi+5*8*2],ax
+
+	  add esi,8
+		add edi,2
+		dec ecx
+		jnz Lcols
+
+		add esp,SIZE
+  }
+}
diff --git a/src/filters/source/D2VSource/idctmmx.asm b/src/filters/source/D2VSource/idctmmx.asm
new file mode 100644
index 000000000..7ebe4d7e3
--- /dev/null
+++ b/src/filters/source/D2VSource/idctmmx.asm
@@ -0,0 +1,738 @@
+;
+; idct8x8_xmm.asm
+;
+; Originally provided by Intel at AP-922
+; http://developer.intel.com/vtune/cbts/strmsimd/922down.htm
+; (See more app notes at http://developer.intel.com/vtune/cbts/strmsimd/appnotes.htm)
+; but in a limited edition.
+; New macro implements a column part for precise iDCT
+; The routine precision now satisfies IEEE standard 1180-1990. 
+;
+; Copyright (c) 2000-2001 Peter Gubanov <peter@elecard.net.ru>
+; Rounding trick Copyright (c) 2000 Michel Lespinasse <walken@zoy.org>
+;
+; http://www.elecard.com/peter/idct.html
+; http://www.linuxvideo.org/mpeg2dec/
+;
+;=============================================================================
+;
+; These examples contain code fragments for first stage iDCT 8x8
+; (for rows) and first stage DCT 8x8 (for columns)
+;
+;=============================================================================
+mword 	typedef qword
+mptr 	equ mword ptr
+
+BITS_INV_ACC = 5 			; 4 or 5 for IEEE
+SHIFT_INV_ROW = 16 - BITS_INV_ACC
+SHIFT_INV_COL = 1 + BITS_INV_ACC
+RND_INV_ROW = 1024 * (6 - BITS_INV_ACC) ; 1 << (SHIFT_INV_ROW-1)
+RND_INV_COL = 16 * (BITS_INV_ACC - 3) 	; 1 << (SHIFT_INV_COL-1)
+RND_INV_CORR = RND_INV_COL - 1 		; correction -1.0 and round
+
+BITS_FRW_ACC = 3 			; 2 or 3 for accuracy
+SHIFT_FRW_COL = BITS_FRW_ACC
+SHIFT_FRW_ROW = BITS_FRW_ACC + 17
+RND_FRW_ROW = 262144 * (BITS_FRW_ACC - 1)	; 1 << (SHIFT_FRW_ROW-1)
+
+_MMX = 1
+
+.nolist
+
+.586
+
+if @version GE 612
+.mmx
+;mmword	TEXTEQU	<QWORD>
+else
+include IAMMX.INC
+endif
+
+if @version GE 614
+.xmm
+;mm2word	TEXTEQU	<QWORD>			; needed for Streaming SIMD Extensions macros
+else
+include iaxmm.inc				; Streaming SIMD Extensions Emulator Macros
+endif
+
+	.list
+	.model flat
+
+_DATA SEGMENT PARA PUBLIC USE32 'DATA'
+
+one_corr 	sword            1,            1,            1,            1
+round_inv_row	dword  RND_INV_ROW,  RND_INV_ROW
+round_inv_col	sword  RND_INV_COL,  RND_INV_COL,  RND_INV_COL, RND_INV_COL
+round_inv_corr	sword RND_INV_CORR, RND_INV_CORR, RND_INV_CORR, RND_INV_CORR
+round_frw_row	dword  RND_FRW_ROW,  RND_FRW_ROW
+  tg_1_16	sword  13036,  13036,  13036,  13036	; tg * (2<<16) + 0.5
+  tg_2_16	sword  27146,  27146,  27146,  27146 	; tg * (2<<16) + 0.5
+  tg_3_16	sword -21746, -21746, -21746, -21746 	; tg * (2<<16) + 0.5
+ cos_4_16	sword -19195, -19195, -19195, -19195 	; cos * (2<<16) + 0.5
+ocos_4_16	sword  23170,  23170,  23170,  23170 	; cos * (2<<15) + 0.5
+
+ otg_3_16	sword  21895, 21895, 21895, 21895 	; tg * (2<<16) + 0.5
+
+; assume SHIFT_INV_ROW == 12
+;rounder_0	dword  65536, 65536
+;rounder_4       dword      0,     0
+;rounder_1       dword   7195,  7195
+;rounder_7	dword   1024,  1024
+;rounder_2	dword   4520,  4520
+;rounder_6	dword   1024,  1024
+;rounder_3	dword   2407,  2407
+;rounder_5	dword    240,   240
+
+; assume SHIFT_INV_ROW == 11
+rounder_0	dword  65536, 65536
+rounder_4       dword      0,     0
+rounder_1       dword   3597,  3597
+rounder_7	dword    512,   512
+rounder_2	dword   2260,  2260
+rounder_6	dword    512,   512
+rounder_3	dword   1203,  1203
+rounder_5	dword    120,   120
+
+;=============================================================================
+;
+; The first stage iDCT 8x8 - inverse DCTs of rows
+;
+;-----------------------------------------------------------------------------
+; The 8-point inverse DCT direct algorithm
+;-----------------------------------------------------------------------------
+;
+; static const short w[32] = {
+;	FIX(cos_4_16),  FIX(cos_2_16),  FIX(cos_4_16),  FIX(cos_6_16),
+;	FIX(cos_4_16),  FIX(cos_6_16), -FIX(cos_4_16), -FIX(cos_2_16),
+;	FIX(cos_4_16), -FIX(cos_6_16), -FIX(cos_4_16),  FIX(cos_2_16),
+;	FIX(cos_4_16), -FIX(cos_2_16),  FIX(cos_4_16), -FIX(cos_6_16),
+;	FIX(cos_1_16),  FIX(cos_3_16),  FIX(cos_5_16),  FIX(cos_7_16),
+;	FIX(cos_3_16), -FIX(cos_7_16), -FIX(cos_1_16), -FIX(cos_5_16),
+;	FIX(cos_5_16), -FIX(cos_1_16),  FIX(cos_7_16),  FIX(cos_3_16),
+;	FIX(cos_7_16), -FIX(cos_5_16),  FIX(cos_3_16), -FIX(cos_1_16) };
+;
+; #define DCT_8_INV_ROW(x, y)
+; {
+; 	int a0, a1, a2, a3, b0, b1, b2, b3;
+;
+; 	a0 =x[0]*w[0]+x[2]*w[1]+x[4]*w[2]+x[6]*w[3];
+; 	a1 =x[0]*w[4]+x[2]*w[5]+x[4]*w[6]+x[6]*w[7];
+; 	a2 = x[0] * w[ 8] + x[2] * w[ 9] + x[4] * w[10] + x[6] * w[11];
+; 	a3 = x[0] * w[12] + x[2] * w[13] + x[4] * w[14] + x[6] * w[15];
+; 	b0 = x[1] * w[16] + x[3] * w[17] + x[5] * w[18] + x[7] * w[19];
+; 	b1 = x[1] * w[20] + x[3] * w[21] + x[5] * w[22] + x[7] * w[23];
+; 	b2 = x[1] * w[24] + x[3] * w[25] + x[5] * w[26] + x[7] * w[27];
+; 	b3 = x[1] * w[28] + x[3] * w[29] + x[5] * w[30] + x[7] * w[31];
+;
+; 	y[0] = SHIFT_ROUND ( a0 + b0 );
+; 	y[1] = SHIFT_ROUND ( a1 + b1 );
+; 	y[2] = SHIFT_ROUND ( a2 + b2 );
+; 	y[3] = SHIFT_ROUND ( a3 + b3 );
+; 	y[4] = SHIFT_ROUND ( a3 - b3 );
+; 	y[5] = SHIFT_ROUND ( a2 - b2 );
+; 	y[6] = SHIFT_ROUND ( a1 - b1 );
+; 	y[7] = SHIFT_ROUND ( a0 - b0 );
+; }
+;
+;-----------------------------------------------------------------------------
+;
+; In this implementation the outputs of the iDCT-1D are multiplied
+; 	for rows 0,4 - by cos_4_16,
+; 	for rows 1,7 - by cos_1_16,
+; 	for rows 2,6 - by cos_2_16,
+; 	for rows 3,5 - by cos_3_16
+; and are shifted to the left for better accuracy
+;
+; For the constants used,
+; 	FIX(float_const) = (short) (float_const * (1<<15) + 0.5)
+;
+;=============================================================================
+
+;=============================================================================
+; MMX code
+;=============================================================================
+
+; Table for rows 0,4 - constants are multiplied by cos_4_16
+
+tab_i_04 	sword  16384,  16384,  16384, -16384	; movq-> w06 w04 w02 w00
+		sword  21407,   8867,   8867, -21407	; w07 w05 w03 w01
+		sword  16384, -16384,  16384,  16384	; w14 w12 w10 w08
+		sword  -8867,  21407, -21407,  -8867	; w15 w13 w11 w09
+		sword  22725,  12873,  19266, -22725	; w22 w20 w18 w16
+		sword  19266,   4520,  -4520, -12873	; w23 w21 w19 w17
+		sword  12873,   4520,   4520,  19266	; w30 w28 w26 w24
+		sword -22725,  19266, -12873, -22725	; w31 w29 w27 w25
+
+; Table for rows 1,7 - constants are multiplied by cos_1_16
+
+tab_i_17	sword  22725,  22725,  22725, -22725	; movq-> w06 w04 w02 w00
+		sword  29692,  12299,  12299, -29692	; w07 w05 w03 w01
+		sword  22725, -22725,  22725,  22725	; w14 w12 w10 w08
+		sword -12299,  29692, -29692, -12299	; w15 w13 w11 w09
+		sword  31521,  17855,  26722, -31521	; w22 w20 w18 w16
+		sword  26722,   6270,  -6270, -17855	; w23 w21 w19 w17
+		sword  17855,   6270,   6270,  26722	; w30 w28 w26 w24
+		sword -31521,  26722, -17855, -31521	; w31 w29 w27 w25
+
+; Table for rows 2,6 - constants are multiplied by cos_2_16
+
+tab_i_26	sword  21407,  21407,  21407, -21407	; movq-> w06 w04 w02 w00
+		sword  27969,  11585,  11585, -27969	; w07 w05 w03 w01
+		sword  21407, -21407,  21407,  21407	; w14 w12 w10 w08
+		sword -11585,  27969, -27969, -11585	; w15 w13 w11 w09
+		sword  29692,  16819,  25172, -29692	; w22 w20 w18 w16
+		sword  25172,   5906,  -5906, -16819	; w23 w21 w19 w17
+		sword  16819,   5906,   5906,  25172	; w30 w28 w26 w24
+		sword -29692,  25172, -16819, -29692	; w31 w29 w27 w25
+
+; Table for rows 3,5 - constants are multiplied by cos_3_16
+
+tab_i_35	sword  19266,  19266,  19266, -19266	; movq-> w06 w04 w02 w00
+		sword  25172,  10426,  10426, -25172	; w07 w05 w03 w01
+		sword  19266, -19266,  19266,  19266	; w14 w12 w10 w08
+		sword -10426,  25172, -25172, -10426	; w15 w13 w11 w09
+		sword  26722,  15137,  22654, -26722	; w22 w20 w18 w16
+		sword  22654,   5315,  -5315, -15137	; w23 w21 w19 w17
+		sword  15137,   5315,   5315,  22654	; w30 w28 w26 w24
+		sword -26722,  22654, -15137, -26722	; w31 w29 w27 w25
+
+;-----------------------------------------------------------------------------
+
+DCT_8_INV_ROW_1 MACRO INP:REQ, OUT:REQ, TABLE:REQ, ROUNDER:REQ
+
+	movq mm0, mptr [INP] 		; 0	; x3 x2 x1 x0
+
+	movq mm1, mptr [INP+8]		; 1	; x7 x6 x5 x4
+	movq mm2, mm0 			; 2	; x3 x2 x1 x0
+
+	movq mm3, mptr [TABLE]		; 3	; w06 w04 w02 w00
+	punpcklwd mm0, mm1 			; x5 x1 x4 x0
+
+	movq mm5, mm0 			; 5	; x5 x1 x4 x0
+	punpckldq mm0, mm0 			; x4 x0 x4 x0
+
+	movq mm4, mptr [TABLE+8] 	; 4	; w07 w05 w03 w01
+	punpckhwd mm2, mm1		; 1	; x7 x3 x6 x2
+
+	pmaddwd mm3, mm0 			; x4*w06+x0*w04 x4*w02+x0*w00
+	movq mm6, mm2 			; 6 	; x7 x3 x6 x2
+
+	movq mm1, mptr [TABLE+32] 	; 1 	; w22 w20 w18 w16
+	punpckldq mm2, mm2 			; x6 x2 x6 x2
+
+	pmaddwd mm4, mm2 			; x6*w07+x2*w05 x6*w03+x2*w01
+	punpckhdq mm5, mm5 			; x5 x1 x5 x1
+
+	pmaddwd mm0, mptr [TABLE+16] 		; x4*w14+x0*w12 x4*w10+x0*w08
+	punpckhdq mm6, mm6 			; x7 x3 x7 x3
+
+	movq mm7, mptr [TABLE+40] 	; 7 	; w23 w21 w19 w17
+	pmaddwd mm1, mm5 			; x5*w22+x1*w20 x5*w18+x1*w16
+
+	paddd mm3, mptr [ROUNDER] 		; +rounder
+	pmaddwd mm7, mm6 			; x7*w23+x3*w21 x7*w19+x3*w17
+
+	pmaddwd mm2, mptr [TABLE+24] 		; x6*w15+x2*w13 x6*w11+x2*w09
+	paddd mm3, mm4 			; 4 	; a1=sum(even1) a0=sum(even0)
+
+	pmaddwd mm5, mptr [TABLE+48] 		; x5*w30+x1*w28 x5*w26+x1*w24
+	movq mm4, mm3 			; 4 	; a1 a0
+
+	pmaddwd mm6, mptr [TABLE+56] 		; x7*w31+x3*w29 x7*w27+x3*w25
+	paddd mm1, mm7 			; 7 	; b1=sum(odd1) b0=sum(odd0)
+
+	paddd mm0, mptr [ROUNDER]		; +rounder
+	psubd mm3, mm1 				; a1-b1 a0-b0
+
+	psrad mm3, SHIFT_INV_ROW 		; y6=a1-b1 y7=a0-b0
+	paddd mm1, mm4 			; 4 	; a1+b1 a0+b0
+
+	paddd mm0, mm2 			; 2 	; a3=sum(even3) a2=sum(even2)
+	psrad mm1, SHIFT_INV_ROW 		; y1=a1+b1 y0=a0+b0
+
+	paddd mm5, mm6 			; 6 	; b3=sum(odd3) b2=sum(odd2)
+	movq mm4, mm0 			; 4 	; a3 a2
+
+	paddd mm0, mm5 				; a3+b3 a2+b2
+	psubd mm4, mm5 			; 5 	; a3-b3 a2-b2
+
+	psrad mm0, SHIFT_INV_ROW 		; y3=a3+b3 y2=a2+b2
+	psrad mm4, SHIFT_INV_ROW 		; y4=a3-b3 y5=a2-b2
+
+	packssdw mm1, mm0 		; 0 	; y3 y2 y1 y0
+	packssdw mm4, mm3 		; 3 	; y6 y7 y4 y5
+
+	movq mm7, mm4 			; 7 	; y6 y7 y4 y5
+	psrld mm4, 16 				; 0 y6 0 y4
+
+	pslld mm7, 16 				; y7 0 y5 0
+	movq mptr [OUT], mm1 		; 1 	; save y3 y2 y1 y0
+                             	
+	por mm7, mm4 			; 4 	; y7 y6 y5 y4
+	movq mptr [OUT+8], mm7 		; 7 	; save y7 y6 y5 y4
+ENDM
+
+;=============================================================================
+; code for Pentium III
+;=============================================================================
+
+; Table for rows 0,4 - constants are multiplied by cos_4_16
+
+tab_i_04_s 	sword 16384, 21407, 16384, 8867 ; movq-> w05 w04 w01 w00
+		sword 16384, 8867, -16384, -21407 ; w07 w06 w03 w02
+		sword 16384, -8867, 16384, -21407 ; w13 w12 w09 w08
+		sword -16384, 21407, 16384, -8867 ; w15 w14 w11 w10
+		sword 22725, 19266, 19266, -4520 ; w21 w20 w17 w16
+		sword 12873, 4520, -22725, -12873 ; w23 w22 w19 w18
+		sword 12873, -22725, 4520, -12873 ; w29 w28 w25 w24
+		sword 4520, 19266, 19266, -22725 ; w31 w30 w27 w26
+
+; Table for rows 1,7 - constants are multiplied by cos_1_16
+
+tab_i_17_s	sword 22725, 29692, 22725, 12299 ; movq-> w05 w04 w01 w00
+		sword 22725, 12299, -22725, -29692 ; w07 w06 w03 w02
+		sword 22725, -12299, 22725, -29692 ; w13 w12 w09 w08
+		sword -22725, 29692, 22725, -12299 ; w15 w14 w11 w10
+		sword 31521, 26722, 26722, -6270 ; w21 w20 w17 w16
+		sword 17855, 6270, -31521, -17855 ; w23 w22 w19 w18
+		sword 17855, -31521, 6270, -17855 ; w29 w28 w25 w24
+		sword 6270, 26722, 26722, -31521 ; w31 w30 w27 w26
+
+; Table for rows 2,6 - constants are multiplied by cos_2_16
+
+tab_i_26_s	sword 21407, 27969, 21407, 11585 ; movq-> w05 w04 w01 w00
+		sword 21407, 11585, -21407, -27969 ; w07 w06 w03 w02
+		sword 21407, -11585, 21407, -27969 ; w13 w12 w09 w08
+		sword -21407, 27969, 21407, -11585 ; w15 w14 w11 w10
+		sword 29692, 25172, 25172, -5906 ; w21 w20 w17 w16
+		sword 16819, 5906, -29692, -16819 ; w23 w22 w19 w18
+		sword 16819, -29692, 5906, -16819 ; w29 w28 w25 w24
+		sword 5906, 25172, 25172, -29692 ; w31 w30 w27 w26
+
+; Table for rows 3,5 - constants are multiplied by cos_3_16
+
+tab_i_35_s	sword 19266, 25172, 19266, 10426 ; movq-> w05 w04 w01 w00
+		sword 19266, 10426, -19266, -25172 ; w07 w06 w03 w02
+		sword 19266, -10426, 19266, -25172 ; w13 w12 w09 w08
+		sword -19266, 25172, 19266, -10426 ; w15 w14 w11 w10
+		sword 26722, 22654, 22654, -5315 ; w21 w20 w17 w16
+		sword 15137, 5315, -26722, -15137 ; w23 w22 w19 w18
+		sword 15137, -26722, 5315, -15137 ; w29 w28 w25 w24
+		sword 5315, 22654, 22654, -26722 ; w31 w30 w27 w26
+
+;-----------------------------------------------------------------------------
+
+DCT_8_INV_ROW_1_s MACRO INP:REQ, OUT:REQ, TABLE:REQ, ROUNDER:REQ
+
+	movq 	mm0, mptr [INP] 	; 0 	; x3 x2 x1 x0
+
+	movq 	mm1, mptr [INP+8]	; 1 	; x7 x6 x5 x4
+	movq 	mm2, mm0 		; 2 	; x3 x2 x1 x0
+
+	movq 	mm3, mptr [TABLE] 	; 3 	; w05 w04 w01 w00
+	pshufw	mm0, mm0, 10001000b 	; x2 x0 x2 x0
+
+	movq 	mm4, mptr [TABLE+8] 	; 4 	; w07 w06 w03 w02
+	movq 	mm5, mm1		; 5 	; x7 x6 x5 x4
+	pmaddwd mm3, mm0 		; x2*w05+x0*w04 x2*w01+x0*w00
+
+	movq 	mm6, mptr [TABLE+32] 	; 6 	; w21 w20 w17 w16
+	pshufw 	mm1, mm1, 10001000b 		; x6 x4 x6 x4
+	pmaddwd mm4, mm1 			; x6*w07+x4*w06 x6*w03+x4*w02
+
+	movq 	mm7, mptr [TABLE+40] 	; 7 	; w23 w22 w19 w18
+	pshufw 	mm2, mm2, 11011101b 		; x3 x1 x3 x1
+	pmaddwd mm6, mm2 			; x3*w21+x1*w20 x3*w17+x1*w16
+
+	pshufw 	mm5, mm5, 11011101b 		; x7 x5 x7 x5
+	pmaddwd mm7, mm5 			; x7*w23+x5*w22 x7*w19+x5*w18
+
+	paddd 	mm3, mptr [ROUNDER] 		; +rounder
+
+	pmaddwd mm0, mptr [TABLE+16] 		; x2*w13+x0*w12 x2*w09+x0*w08
+	paddd 	mm3, mm4 		; 4 	; a1=sum(even1) a0=sum(even0)
+
+	pmaddwd mm1, mptr [TABLE+24] 		; x6*w15+x4*w14 x6*w11+x4*w10
+	movq 	mm4, mm3 		; 4 	; a1 a0
+
+	pmaddwd mm2, mptr [TABLE+48] 		; x3*w29+x1*w28 x3*w25+x1*w24
+	paddd 	mm6, mm7 		; 7 	; b1=sum(odd1) b0=sum(odd0)
+
+	pmaddwd mm5, mptr [TABLE+56] 		; x7*w31+x5*w30 x7*w27+x5*w26
+	paddd mm3, mm6 				; a1+b1 a0+b0
+
+	paddd mm0, mptr [ROUNDER] 		; +rounder
+	psrad mm3, SHIFT_INV_ROW 		; y1=a1+b1 y0=a0+b0
+
+	paddd mm0, mm1 			; 1 	; a3=sum(even3) a2=sum(even2)
+	psubd mm4, mm6 			; 6 	; a1-b1 a0-b0
+
+	movq mm7, mm0 			; 7 	; a3 a2
+	paddd mm2, mm5 			; 5 	; b3=sum(odd3) b2=sum(odd2)
+
+	paddd mm0, mm2 				; a3+b3 a2+b2
+	psrad mm4, SHIFT_INV_ROW 		; y6=a1-b1 y7=a0-b0
+
+	psubd mm7, mm2 			; 2 	; a3-b3 a2-b2
+	psrad mm0, SHIFT_INV_ROW 		; y3=a3+b3 y2=a2+b2
+
+	psrad mm7, SHIFT_INV_ROW 		; y4=a3-b3 y5=a2-b2
+
+	packssdw mm3, mm0 		; 0 	; y3 y2 y1 y0
+
+	packssdw mm7, mm4 		; 4 	; y6 y7 y4 y5
+
+	movq mptr [OUT], mm3 		; 3 	; save y3 y2 y1 y0
+	pshufw mm7, mm7, 10110001b 		; y7 y6 y5 y4
+
+	movq mptr [OUT+8], mm7 		; 7 	; save y7 y6 y5 y4
+ENDM
+
+;=============================================================================
+;
+;=============================================================================
+
+;=============================================================================
+;
+; The first stage DCT 8x8 - forward DCTs of columns
+;
+; The outputs are multiplied
+; for rows 0,4 - on cos_4_16,
+; for rows 1,7 - on cos_1_16,
+; for rows 2,6 - on cos_2_16,
+; for rows 3,5 - on cos_3_16
+; and are shifted to the left for rise of accuracy
+;
+;-----------------------------------------------------------------------------
+;
+; The 8-point scaled forward DCT algorithm (26a8m)
+;
+;-----------------------------------------------------------------------------
+;
+; #define DCT_8_FRW_COL(x, y)
+;{
+; short t0, t1, t2, t3, t4, t5, t6, t7;
+; short tp03, tm03, tp12, tm12, tp65, tm65;
+; short tp465, tm465, tp765, tm765;
+;
+; t0 = LEFT_SHIFT ( x[0] + x[7] );
+; t1 = LEFT_SHIFT ( x[1] + x[6] );
+; t2 = LEFT_SHIFT ( x[2] + x[5] );
+; t3 = LEFT_SHIFT ( x[3] + x[4] );
+; t4 = LEFT_SHIFT ( x[3] - x[4] );
+; t5 = LEFT_SHIFT ( x[2] - x[5] );
+; t6 = LEFT_SHIFT ( x[1] - x[6] );
+; t7 = LEFT_SHIFT ( x[0] - x[7] );
+;
+; tp03 = t0 + t3;
+; tm03 = t0 - t3;
+; tp12 = t1 + t2;
+; tm12 = t1 - t2;
+;
+; y[0] = tp03 + tp12;
+; y[4] = tp03 - tp12;
+;
+; y[2] = tm03 + tm12 * tg_2_16;
+; y[6] = tm03 * tg_2_16 - tm12;
+;
+; tp65 =(t6 +t5 )*cos_4_16;
+; tm65 =(t6 -t5 )*cos_4_16;
+;
+; tp765 = t7 + tp65;
+; tm765 = t7 - tp65;
+; tp465 = t4 + tm65;
+; tm465 = t4 - tm65;
+;
+; y[1] = tp765 + tp465 * tg_1_16;
+; y[7] = tp765 * tg_1_16 - tp465;
+; y[5] = tm765 * tg_3_16 + tm465;
+; y[3] = tm765 - tm465 * tg_3_16;
+;}
+;
+;=============================================================================
+DCT_8_FRW_COL_4 MACRO INP:REQ, OUT:REQ
+LOCAL x0, x1, x2, x3, x4, x5, x6, x7
+LOCAL y0, y1, y2, y3, y4, y5, y6, y7
+x0 equ [INP + 0*16]
+x1 equ [INP + 1*16]
+x2 equ [INP + 2*16]
+x3 equ [INP + 3*16]
+x4 equ [INP + 4*16]
+x5 equ [INP + 5*16]
+x6 equ [INP + 6*16]
+x7 equ [INP + 7*16]
+y0 equ [OUT + 0*16]
+y1 equ [OUT + 1*16]
+y2 equ [OUT + 2*16]
+y3 equ [OUT + 3*16]
+y4 equ [OUT + 4*16]
+y5 equ [OUT + 5*16]
+y6 equ [OUT + 6*16]
+y7 equ [OUT + 7*16]
+movq mm0, x1 ; 0 ; x1
+movq mm1, x6 ; 1 ; x6
+movq mm2, mm0 ; 2 ; x1
+movq mm3, x2 ; 3 ; x2
+paddsw mm0, mm1 ; t1 = x[1] + x[6]
+movq mm4, x5 ; 4 ; x5
+psllw mm0, SHIFT_FRW_COL ; t1
+movq mm5, x0 ; 5 ; x0
+paddsw mm4, mm3 ; t2 = x[2] + x[5]
+paddsw mm5, x7 ; t0 = x[0] + x[7]
+psllw mm4, SHIFT_FRW_COL ; t2
+movq mm6, mm0 ; 6 ; t1
+psubsw mm2, mm1 ; 1 ; t6 = x[1] - x[6]
+movq mm1, mptr tg_2_16 ; 1 ; tg_2_16
+psubsw mm0, mm4 ; tm12 = t1 - t2
+movq mm7, x3 ; 7 ; x3
+pmulhw mm1, mm0 ; tm12*tg_2_16
+paddsw mm7, x4 ; t3 = x[3] + x[4]
+psllw mm5, SHIFT_FRW_COL ; t0
+paddsw mm6, mm4 ; 4 ; tp12 = t1 + t2
+psllw mm7, SHIFT_FRW_COL ; t3
+movq mm4, mm5 ; 4 ; t0
+psubsw mm5, mm7 ; tm03 = t0 - t3
+paddsw mm1, mm5 ; y2 = tm03 + tm12*tg_2_16
+paddsw mm4, mm7 ; 7 ; tp03 = t0 + t3
+por mm1, mptr one_corr ; correction y2 +0.5
+psllw mm2, SHIFT_FRW_COL+1 ; t6
+pmulhw mm5, mptr tg_2_16 ; tm03*tg_2_16
+movq mm7, mm4 ; 7 ; tp03
+psubsw mm3, x5 ; t5 = x[2] - x[5]
+psubsw mm4, mm6 ; y4 = tp03 - tp12
+movq y2, mm1 ; 1 ; save y2
+paddsw mm7, mm6 ; 6 ; y0 = tp03 + tp12
+movq mm1, x3 ; 1 ; x3
+psllw mm3, SHIFT_FRW_COL+1 ; t5
+psubsw mm1, x4 ; t4 = x[3] - x[4]
+movq mm6, mm2 ; 6 ; t6
+movq y4, mm4 ; 4 ; save y4
+paddsw mm2, mm3 ; t6 + t5
+pmulhw mm2, mptr ocos_4_16 ; tp65 = (t6 + t5)*cos_4_16
+psubsw mm6, mm3 ; 3 ; t6 - t5
+pmulhw mm6, mptr ocos_4_16 ; tm65 = (t6 - t5)*cos_4_16
+psubsw mm5, mm0 ; 0 ; y6 = tm03*tg_2_16 - tm12
+por mm5, mptr one_corr ; correction y6 +0.5
+psllw mm1, SHIFT_FRW_COL ; t4
+por mm2, mptr one_corr ; correction tp65 +0.5
+movq mm4, mm1 ; 4 ; t4
+movq mm3, x0 ; 3 ; x0
+paddsw mm1, mm6 ; tp465 = t4 + tm65
+psubsw mm3, x7 ; t7 = x[0] - x[7]
+psubsw mm4, mm6 ; 6 ; tm465 = t4 - tm65
+movq mm0, mptr tg_1_16 ; 0 ; tg_1_16
+psllw mm3, SHIFT_FRW_COL ; t7
+movq mm6, mptr tg_3_16 ; 6 ; tg_3_16
+pmulhw mm0, mm1 ; tp465*tg_1_16
+movq y0, mm7 ; 7 ; save y0
+pmulhw mm6, mm4 ; tm465*tg_3_16
+movq y6, mm5 ; 5 ; save y6
+movq mm7, mm3 ; 7 ; t7
+movq mm5, mptr tg_3_16 ; 5 ; tg_3_16
+psubsw mm7, mm2 ; tm765 = t7 - tp65
+paddsw mm3, mm2 ; 2 ; tp765 = t7 + tp65
+pmulhw mm5, mm7 ; tm765*tg_3_16
+paddsw mm0, mm3 ; y1 = tp765 + tp465*tg_1_16
+paddsw mm6, mm4 ; tm465*tg_3_16
+pmulhw mm3, mptr tg_1_16 ; tp765*tg_1_16
+por mm0, mptr one_corr ; correction y1 +0.5
+paddsw mm5, mm7 ; tm765*tg_3_16
+psubsw mm7, mm6 ; 6 ; y3 = tm765 - tm465*tg_3_16
+movq y1, mm0 ; 0 ; save y1
+paddsw mm5, mm4 ; 4 ; y5 = tm765*tg_3_16 + tm465
+movq y3, mm7 ; 7 ; save y3
+psubsw mm3, mm1 ; 1 ; y7 = tp765*tg_1_16 - tp465
+movq y5, mm5 ; 5 ; save y5
+movq y7, mm3 ; 3 ; save y7
+ENDM
+
+DCT_8_INV_COL_4 MACRO INP:REQ, OUT:REQ
+	movq	mm0, qword ptr tg_3_16
+
+	movq	mm3, qword ptr [INP+16*3]
+	movq	mm1, mm0			; tg_3_16
+
+	movq	mm5, qword ptr [INP+16*5]
+	pmulhw	mm0, mm3			; x3*(tg_3_16-1)
+
+	movq	mm4, qword ptr tg_1_16
+	pmulhw	mm1, mm5			; x5*(tg_3_16-1)
+
+	movq	mm7, qword ptr [INP+16*7]
+	movq	mm2, mm4			; tg_1_16
+
+	movq	mm6, qword ptr [INP+16*1]
+	pmulhw	mm4, mm7			; x7*tg_1_16
+
+	paddsw	mm0, mm3			; x3*tg_3_16
+	pmulhw	mm2, mm6			; x1*tg_1_16
+
+	paddsw	mm1, mm3			; x3+x5*(tg_3_16-1)
+	psubsw	mm0, mm5			; x3*tg_3_16-x5 = tm35
+
+	movq	mm3, qword ptr ocos_4_16
+	paddsw	mm1, mm5			; x3+x5*tg_3_16 = tp35
+
+	paddsw	mm4, mm6			; x1+tg_1_16*x7 = tp17
+	psubsw	mm2, mm7			; x1*tg_1_16-x7 = tm17
+
+	movq	mm5, mm4			; tp17
+	movq	mm6, mm2			; tm17
+
+	paddsw	mm5, mm1			; tp17+tp35 = b0
+	psubsw	mm6, mm0			; tm17-tm35 = b3
+
+	psubsw	mm4, mm1			; tp17-tp35 = t1
+	paddsw	mm2, mm0			; tm17+tm35 = t2
+
+	movq	mm7, qword ptr tg_2_16
+	movq	mm1, mm4			; t1
+
+;	movq	qword ptr [SCRATCH+0], mm5	; save b0
+	movq	qword ptr [OUT+3*16], mm5	; save b0
+	paddsw	mm1, mm2			; t1+t2
+
+;	movq	qword ptr [SCRATCH+8], mm6	; save b3
+	movq	qword ptr [OUT+5*16], mm6	; save b3
+	psubsw	mm4, mm2			; t1-t2
+
+	movq	mm5, qword ptr [INP+2*16]
+	movq	mm0, mm7			; tg_2_16
+
+	movq	mm6, qword ptr [INP+6*16]
+	pmulhw	mm0, mm5			; x2*tg_2_16
+
+	pmulhw	mm7, mm6			; x6*tg_2_16
+; slot
+	pmulhw	mm1, mm3			; ocos_4_16*(t1+t2) = b1/2
+; slot
+	movq	mm2, qword ptr [INP+0*16]
+	pmulhw	mm4, mm3			; ocos_4_16*(t1-t2) = b2/2
+
+	psubsw	mm0, mm6			; t2*tg_2_16-x6 = tm26
+	movq	mm3, mm2			; x0
+
+	movq	mm6, qword ptr [INP+4*16]
+	paddsw	mm7, mm5			; x2+x6*tg_2_16 = tp26
+
+	paddsw	mm2, mm6			; x0+x4 = tp04
+	psubsw	mm3, mm6			; x0-x4 = tm04
+
+	movq	mm5, mm2			; tp04
+	movq	mm6, mm3			; tm04
+
+	psubsw	mm2, mm7			; tp04-tp26 = a3
+	paddsw	mm3, mm0			; tm04+tm26 = a1
+
+	paddsw mm1, mm1				; b1
+	paddsw mm4, mm4				; b2
+
+	paddsw	mm5, mm7			; tp04+tp26 = a0
+	psubsw	mm6, mm0			; tm04-tm26 = a2
+
+	movq	mm7, mm3			; a1
+	movq	mm0, mm6			; a2
+
+	paddsw	mm3, mm1			; a1+b1
+	paddsw	mm6, mm4			; a2+b2
+
+	psraw	mm3, SHIFT_INV_COL		; dst1
+	psubsw	mm7, mm1			; a1-b1
+
+	psraw	mm6, SHIFT_INV_COL		; dst2
+	psubsw	mm0, mm4			; a2-b2
+
+;	movq	mm1, qword ptr [SCRATCH+0]	; load b0
+	movq	mm1, qword ptr [OUT+3*16]	; load b0
+	psraw	mm7, SHIFT_INV_COL		; dst6
+
+	movq	mm4, mm5			; a0
+	psraw	mm0, SHIFT_INV_COL		; dst5
+
+	movq	qword ptr [OUT+1*16], mm3
+	paddsw	mm5, mm1			; a0+b0
+
+	movq	qword ptr [OUT+2*16], mm6
+	psubsw	mm4, mm1			; a0-b0
+
+;	movq	mm3, qword ptr [SCRATCH+8]	; load b3
+	movq	mm3, qword ptr [OUT+5*16]	; load b3
+	psraw	mm5, SHIFT_INV_COL		; dst0
+
+	movq	mm6, mm2			; a3
+	psraw	mm4, SHIFT_INV_COL		; dst7
+
+	movq	qword ptr [OUT+5*16], mm0
+	paddsw	mm2, mm3			; a3+b3
+
+	movq	qword ptr [OUT+6*16], mm7
+	psubsw	mm6, mm3			; a3-b3
+
+	movq	qword ptr [OUT+0*16], mm5
+	psraw	mm2, SHIFT_INV_COL		; dst3
+
+	movq	qword ptr [OUT+7*16], mm4
+	psraw	mm6, SHIFT_INV_COL		; dst4
+
+	movq	qword ptr [OUT+3*16], mm2
+
+	movq	qword ptr [OUT+4*16], mm6
+ENDM
+
+_TEXT SEGMENT PARA PUBLIC USE32 'CODE'
+
+;
+; extern "C" __fastcall void idct8x8_mmx (short *src_result);
+;
+public  @MMX_IDCT@4
+
+@MMX_IDCT@4 proc near
+	mov     eax, ecx          ; source
+
+	DCT_8_INV_ROW_1	[eax+0], [eax+0], tab_i_04, rounder_0
+	DCT_8_INV_ROW_1	[eax+16], [eax+16], tab_i_17, rounder_1
+	DCT_8_INV_ROW_1	[eax+32], [eax+32], tab_i_26, rounder_2
+	DCT_8_INV_ROW_1	[eax+48], [eax+48], tab_i_35, rounder_3
+	DCT_8_INV_ROW_1	[eax+64], [eax+64], tab_i_04, rounder_4
+	DCT_8_INV_ROW_1	[eax+80], [eax+80], tab_i_35, rounder_5
+	DCT_8_INV_ROW_1	[eax+96], [eax+96], tab_i_26, rounder_6
+	DCT_8_INV_ROW_1	[eax+112], [eax+112], tab_i_17, rounder_7
+
+	DCT_8_INV_COL_4 [eax+0],[eax+0]
+	DCT_8_INV_COL_4 [eax+8],[eax+8]
+
+	ret    
+
+@MMX_IDCT@4 ENDP
+
+_TEXT ENDS
+
+_TEXT SEGMENT PARA PUBLIC USE32 'CODE'
+
+;
+; extern "C" __fastcall void idct8x8_sse (short *src_result);
+;
+public  @SSEMMX_IDCT@4
+
+@SSEMMX_IDCT@4 proc near
+	mov     eax, ecx          ; source
+
+	DCT_8_INV_ROW_1_s [eax+0], [eax+0], tab_i_04_s, rounder_0
+	DCT_8_INV_ROW_1_s [eax+16], [eax+16], tab_i_17_s, rounder_1
+	DCT_8_INV_ROW_1_s [eax+32], [eax+32], tab_i_26_s, rounder_2
+	DCT_8_INV_ROW_1_s [eax+48], [eax+48], tab_i_35_s, rounder_3
+	DCT_8_INV_ROW_1_s [eax+64], [eax+64], tab_i_04_s, rounder_4
+	DCT_8_INV_ROW_1_s [eax+80], [eax+80], tab_i_35_s, rounder_5
+	DCT_8_INV_ROW_1_s [eax+96], [eax+96], tab_i_26_s, rounder_6
+	DCT_8_INV_ROW_1_s [eax+112], [eax+112], tab_i_17_s, rounder_7
+
+	DCT_8_INV_COL_4 [eax+0],[eax+0]
+	DCT_8_INV_COL_4 [eax+8],[eax+8]
+
+	ret
+
+@SSEMMX_IDCT@4 ENDP
+
+_TEXT ENDS
+
+END
+\ No newline at end of file
diff --git a/src/filters/source/D2VSource/idctref.cpp b/src/filters/source/D2VSource/idctref.cpp
new file mode 100644
index 000000000..74b695ce1
--- /dev/null
+++ b/src/filters/source/D2VSource/idctref.cpp
@@ -0,0 +1,362 @@
+#include "stdafx.h"
+
+/* idctref_miha.c, Inverse Discrete Fourier Transform, double precision */
+
+/*************************************************************/
+/*                                                           */
+/* x87 hand-optimized assembly by Miha Peternel              */
+/*                                     27.11. - 20.1.2001    */
+/*                                                           */
+/* You are free to use this code in your project if:         */
+/* - no changes are made to this message                     */
+/* - any changes to this code are publicly available         */
+/* - your project documentation contains the following text: */
+/*   "This software contains fast high-quality IDCT decoder  */
+/*    by Miha Peternel."                                     */
+/*                                                           */
+/*************************************************************/
+
+/*  Perform IEEE 1180 reference (64-bit floating point, separable 8x1
+ *  direct matrix multiply) Inverse Discrete Cosine Transform
+*/
+
+#define ModelX 123 // enable C-level optimizations by Miha Peternel
+
+/* Here we use math.h to generate constants.  Compiler results may
+   vary a little */
+
+#include <math.h>
+
+#define M_PI	3.1415926535897932384626433832795
+const static double HALF = 0.5;
+
+/* private data */
+static short iclip[1024+1024]; /* clipping table */
+static short *iclp;
+
+/* cosine transform matrix for 8x1 IDCT */
+static double c[8][8];
+
+/* initialize DCT coefficient matrix */
+void Initialize_REF_IDCT()
+{
+  int freq, time, i;
+  double scale;
+
+  for (freq=0; freq < 8; freq++)
+  {
+    scale = (freq == 0) ? sqrt(0.125) : 0.5;
+    for (time=0; time<8; time++)
+      c[freq][time] = scale*cos((M_PI/8.0)*freq*(time + 0.5));
+  }
+
+#ifdef ModelX
+  iclp = iclip+1024;
+  for (i= -1024; i<1024; i++)
+    iclp[i] = (i<-256) ? -256 : ((i>255) ? 255 : i);
+#endif
+}
+
+void REF_IDCT(short *block)
+{
+  double tmp[64];
+	double rnd[64];
+	int int0, int1, int2, int3, int4, int5, int6, int7;
+	unsigned short fpold;
+	unsigned short fpnew;
+
+	int *b = (int *) block;
+
+  if( !(b[0]|(b[31]&~0x10000)) )
+	{
+	  if( b[ 1]|b[ 2]|b[ 3]|b[ 4]|b[ 5]|b[ 6] )
+		  goto normal;
+	  if( b[ 7]|b[ 8]|b[ 9]|b[10]|b[11]|b[12] )
+		  goto normal;
+	  if( b[13]|b[14]|b[15]|b[16]|b[17]|b[18] )
+		  goto normal;
+	  if( b[19]|b[20]|b[21]|b[22]|b[23]|b[24] )
+		  goto normal;
+	  if( b[25]|b[26]|b[27]|b[28]|b[29]|b[30] )
+		  goto normal;
+		b[31]=0;
+		return;
+	}
+normal:
+
+	__asm
+	{
+		// do the IDCT
+		mov esi,[block]
+		lea eax,[c]
+		lea edi,[tmp]
+		//mov ebx,8
+		mov ebx,8 // 0x77000000 // 8
+		align 16
+	__col1:
+	    movzx edx,[esi+1*2]
+			mov   ecx,[esi+2*2]
+			or    edx,[esi+4*2]
+			or    ecx,[esi+6*2]
+			or edx,ecx
+			//mov ecx,8
+			mov ecx,8/2 // 0x77000000 // 8
+
+			jnz __row1
+				fild  word ptr [esi+0*2]
+				fmul qword ptr [eax+0*8*8]
+				fst  qword ptr [edi+0*8]
+				fst  qword ptr [edi+1*8]
+				fst  qword ptr [edi+2*8]
+				fst  qword ptr [edi+3*8]
+				fst  qword ptr [edi+4*8]
+				fst  qword ptr [edi+5*8]
+				fst  qword ptr [edi+6*8]
+				fstp qword ptr [edi+7*8]
+				add edi,8*8
+				jmp __next1
+			align 16
+		__row1:
+				fild  word ptr [esi+0*2]
+				fmul qword ptr [eax+0*8*8]
+				fild  word ptr [esi+1*2]
+				fmul qword ptr [eax+1*8*8]
+				fadd
+				fild  word ptr [esi+2*2]
+				fmul qword ptr [eax+2*8*8]
+				fadd
+				fild  word ptr [esi+3*2]
+				fmul qword ptr [eax+3*8*8]
+				fadd
+				fild  word ptr [esi+4*2]
+				fmul qword ptr [eax+4*8*8]
+				fadd
+				fild  word ptr [esi+5*2]
+				fmul qword ptr [eax+5*8*8]
+				fadd
+				fild  word ptr [esi+6*2]
+				fmul qword ptr [eax+6*8*8]
+				fadd
+				fild  word ptr [esi+7*2]
+				fmul qword ptr [eax+7*8*8]
+				fadd
+
+				fild  word ptr [esi+0*2]
+				fmul qword ptr [eax+0*8*8+8]
+				fild  word ptr [esi+1*2]
+				fmul qword ptr [eax+1*8*8+8]
+				fadd
+				fild  word ptr [esi+2*2]
+				fmul qword ptr [eax+2*8*8+8]
+				fadd
+				fild  word ptr [esi+3*2]
+				fmul qword ptr [eax+3*8*8+8]
+				fadd
+				fild  word ptr [esi+4*2]
+				fmul qword ptr [eax+4*8*8+8]
+				fadd
+				fild  word ptr [esi+5*2]
+				fmul qword ptr [eax+5*8*8+8]
+				fadd
+				fild  word ptr [esi+6*2]
+				fmul qword ptr [eax+6*8*8+8]
+				fadd
+				fild  word ptr [esi+7*2]
+				fmul qword ptr [eax+7*8*8+8]
+				fadd
+				add eax,8*2
+				fxch st(1)
+				fstp qword ptr [edi]//
+				fstp qword ptr [edi+8]
+				add edi,8*2
+			dec ecx
+
+			jnz __row1
+			add eax,-8*8
+			  //align 16
+		__next1:
+			add esi,+8*2
+
+		sub ebx,0x80000001 // add ebx,ebx 
+		js  __col1
+			//align 16
+			test ebx,ebx // align jump &| redo flags
+		jnz __col1
+
+		lea esi,[tmp]
+		lea eax,[c]
+		lea edi,[rnd]
+		//mov edi,[block]
+    fld qword ptr [HALF]
+		mov ebx,8
+	__row2:
+			mov ecx,8/2
+			align 16
+			__col2:
+				fld  qword ptr [esi+0*8*8]
+				fmul qword ptr [eax+0*8*8]
+				fld  qword ptr [esi+1*8*8]
+				fmul qword ptr [eax+1*8*8]
+				fadd
+				fld  qword ptr [esi+2*8*8]
+				fmul qword ptr [eax+2*8*8]
+				fadd
+				fld  qword ptr [esi+3*8*8]
+				fmul qword ptr [eax+3*8*8]
+				fadd
+				fld  qword ptr [esi+4*8*8]
+				fmul qword ptr [eax+4*8*8]
+				fadd
+				fld  qword ptr [esi+5*8*8]
+				fmul qword ptr [eax+5*8*8]
+				fadd
+				fld  qword ptr [esi+6*8*8]
+				fmul qword ptr [eax+6*8*8]
+				fadd
+				fld  qword ptr [esi+7*8*8]
+				fmul qword ptr [eax+7*8*8]
+				fadd
+				fadd st(0),st(1)
+
+				fxch st(1)
+
+				fld  qword ptr [esi+0*8*8]
+				fmul qword ptr [eax+0*8*8+8]
+				fld  qword ptr [esi+1*8*8]
+				fmul qword ptr [eax+1*8*8+8]
+				fadd
+				fld  qword ptr [esi+2*8*8]
+				fmul qword ptr [eax+2*8*8+8]
+				fadd
+				fld  qword ptr [esi+3*8*8]
+				fmul qword ptr [eax+3*8*8+8]
+				fadd
+				fld  qword ptr [esi+4*8*8]
+				fmul qword ptr [eax+4*8*8+8]
+				fadd
+				fld  qword ptr [esi+5*8*8]
+				fmul qword ptr [eax+5*8*8+8]
+				fadd
+				fld  qword ptr [esi+6*8*8]
+				fmul qword ptr [eax+6*8*8+8]
+				fadd
+				fld  qword ptr [esi+7*8*8]
+				fmul qword ptr [eax+7*8*8+8]
+				fadd
+				fadd st(0),st(1)
+				add eax,8*2
+
+				fxch st(2)
+				fstp qword ptr [edi]
+				fxch st(1)
+				fstp qword ptr [edi+8*8]
+				add edi,8*8*2
+
+			dec ecx
+
+			jnz __col2
+			add eax,-8*8
+			add esi,+8
+			add edi,8-8*8*8
+
+		sub ebx,0x80000001
+		js  __row2
+			  //align 16
+				test ebx,ebx // align jump &| redo flags
+		jnz __row2
+		ffree st(0) // bye bye 0.5
+
+	  // set x87 to floor mode
+		fstcw [fpold]
+		movzx eax, [fpold]
+
+		or eax, 0x0400 // round down - floor
+		mov [fpnew], ax
+		fldcw [fpnew]
+
+		// now floor the damn array
+		lea esi, [rnd]
+		mov edi, [block]
+		mov ebx, -256 // clip min
+		mov edx, +255 // clip max
+		mov ecx, 8
+		align 16
+	__floor:
+		  fld   qword ptr [esi+0*8]
+			fistp dword ptr [int0]
+			  mov eax,[int0]
+				cmp   eax,ebx
+				cmovl eax,ebx
+				cmp   eax,edx
+				cmovg eax,edx
+		  fld   qword ptr [esi+1*8]
+			fistp dword ptr [int1]
+				mov word ptr [edi+0*2],ax
+			  mov eax,[int1]
+				cmp   eax,ebx
+				cmovl eax,ebx
+				cmp   eax,edx
+				cmovg eax,edx
+		  fld   qword ptr [esi+2*8]
+			fistp dword ptr [int2]
+				mov word ptr [edi+1*2],ax
+			  mov eax,[int2]
+				cmp   eax,ebx
+				cmovl eax,ebx
+				cmp   eax,edx
+				cmovg eax,edx
+		  fld   qword ptr [esi+3*8]
+			fistp dword ptr [int3]
+				mov word ptr [edi+2*2],ax
+			  mov eax,[int3]
+				cmp   eax,ebx
+				cmovl eax,ebx
+				cmp   eax,edx
+				cmovg eax,edx
+		  fld   qword ptr [esi+4*8]
+			fistp dword ptr [int4]
+				mov word ptr [edi+3*2],ax
+			  mov eax,[int4]
+				cmp   eax,ebx
+				cmovl eax,ebx
+				cmp   eax,edx
+				cmovg eax,edx
+		  fld   qword ptr [esi+5*8]
+			fistp dword ptr [int5]
+				mov word ptr [edi+4*2],ax
+			  mov eax,[int5]
+				cmp   eax,ebx
+				cmovl eax,ebx
+				cmp   eax,edx
+				cmovg eax,edx
+		  fld   qword ptr [esi+6*8]
+			fistp dword ptr [int6]
+				mov word ptr [edi+5*2],ax
+			  mov eax,[int6]
+				cmp   eax,ebx
+				cmovl eax,ebx
+				cmp   eax,edx
+				cmovg eax,edx
+		  fld   qword ptr [esi+7*8]
+			fistp dword ptr [int7]
+				mov word ptr [edi+6*2],ax
+			  mov eax,[int7]
+				cmp   eax,ebx
+				cmovl eax,ebx
+				cmp   eax,edx
+				cmovg eax,edx
+				mov word ptr [edi+7*2],ax
+
+			add esi, 8*8
+			add edi, 8*2
+
+		sub ecx,0x80000001
+		js  __floor
+			  //align 16
+				test ecx,ecx // align jump &| redo flags
+		jnz __floor
+
+		// set x87 to default mode
+		fldcw [fpold]
+	};
+}
diff --git a/src/filters/source/D2VSource/resource.h b/src/filters/source/D2VSource/resource.h
new file mode 100644
index 000000000..1d1658441
--- /dev/null
+++ b/src/filters/source/D2VSource/resource.h
@@ -0,0 +1,14 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by d2vsource.rc
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE        101
+#define _APS_NEXT_COMMAND_VALUE         40001
+#define _APS_NEXT_CONTROL_VALUE         1001
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif
diff --git a/src/filters/source/D2VSource/stdafx.cpp b/src/filters/source/D2VSource/stdafx.cpp
new file mode 100644
index 000000000..0d549c051
--- /dev/null
+++ b/src/filters/source/D2VSource/stdafx.cpp
@@ -0,0 +1,8 @@
+// stdafx.cpp : source file that includes just the standard includes
+// d2vsource.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+#include "stdafx.h"
+
+// TODO: reference any additional headers you need in STDAFX.H
+// and not in this file
diff --git a/src/filters/source/D2VSource/stdafx.h b/src/filters/source/D2VSource/stdafx.h
new file mode 100644
index 000000000..e596301ef
--- /dev/null
+++ b/src/filters/source/D2VSource/stdafx.h
@@ -0,0 +1,22 @@
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+#pragma once
+#include "../../../DSUtil/SharedInclude.h"
+
+#define WIN32_LEAN_AND_MEAN		// Exclude rarely-used stuff from Windows headers
+#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS	// some CString constructors will be explicit
+
+#ifndef VC_EXTRALEAN
+#define VC_EXTRALEAN		// Exclude rarely-used stuff from Windows headers
+#endif
+
+#include <afx.h>
+#include <afxwin.h>         // MFC core and standard components
+
+// TODO: reference additional headers your program requires here
+
+#include <streams.h>
+#include <dvdmedia.h>
author	povaddict <povaddict@users.sourceforge.net>	2010-02-10 02:16:44 +0300
committer	povaddict <povaddict@users.sourceforge.net>	2010-02-10 02:16:44 +0300
commit	726a91b12a7524e45e7a901c9e4883af5b1bffe6 (patch)
tree	f5d25e3b2e84c92f4901280c73d5d3d7e6c3cd19 /src/filters/source/D2VSource
parent	02183f6e47ad4ea1057de9950482f291f2ae4290 (diff)