Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/mpc-hc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpovaddict <povaddict@users.sourceforge.net>2010-02-10 02:16:44 +0300
committerpovaddict <povaddict@users.sourceforge.net>2010-02-10 02:16:44 +0300
commit726a91b12a7524e45e7a901c9e4883af5b1bffe6 (patch)
treef5d25e3b2e84c92f4901280c73d5d3d7e6c3cd19 /src/filters/source/D2VSource
parent02183f6e47ad4ea1057de9950482f291f2ae4290 (diff)
Rename several directories to use MixedCase instead of lowercase.
They now mostly match the case used in #includes, and they're consistent with the names of the .h files they contain. git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@1648 10f7b99b-c216-0410-bff0-8a66a9350fd8
Diffstat (limited to 'src/filters/source/D2VSource')
-rw-r--r--src/filters/source/D2VSource/D2VSource.cpp283
-rw-r--r--src/filters/source/D2VSource/D2VSource.def7
-rw-r--r--src/filters/source/D2VSource/D2VSource.h56
-rw-r--r--src/filters/source/D2VSource/MPEG2Dec.cpp4490
-rw-r--r--src/filters/source/D2VSource/MPEG2Dec.h304
-rw-r--r--src/filters/source/D2VSource/d2vsource.rc117
-rw-r--r--src/filters/source/D2VSource/d2vsource.vcproj962
-rw-r--r--src/filters/source/D2VSource/idctfpu.cpp456
-rw-r--r--src/filters/source/D2VSource/idctmmx.asm738
-rw-r--r--src/filters/source/D2VSource/idctref.cpp362
-rw-r--r--src/filters/source/D2VSource/resource.h14
-rw-r--r--src/filters/source/D2VSource/stdafx.cpp8
-rw-r--r--src/filters/source/D2VSource/stdafx.h22
13 files changed, 7819 insertions, 0 deletions
diff --git a/src/filters/source/D2VSource/D2VSource.cpp b/src/filters/source/D2VSource/D2VSource.cpp
new file mode 100644
index 000000000..8984bf360
--- /dev/null
+++ b/src/filters/source/D2VSource/D2VSource.cpp
@@ -0,0 +1,283 @@
+/*
+ * Copyright (C) 2003-2006 Gabest
+ *
+ * This Program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This Program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "stdafx.h"
+#include "D2VSource.h"
+#include "mpeg2dec.h"
+#include "../../../DSUtil/DSUtil.h"
+
+#ifdef REGISTER_FILTER
+
+const AMOVIESETUP_MEDIATYPE sudPinTypesOut[] =
+{
+ {&MEDIATYPE_Video, &MEDIASUBTYPE_YUY2}
+};
+
+const AMOVIESETUP_PIN sudOpPin[] =
+{
+ {L"Output", FALSE, TRUE, FALSE, FALSE, &CLSID_NULL, NULL, countof(sudPinTypesOut), sudPinTypesOut}
+};
+
+const AMOVIESETUP_FILTER sudFilter[] =
+{
+ {&__uuidof(CD2VSource), L"MPC - D2VSource", MERIT_NORMAL, countof(sudOpPin), sudOpPin, CLSID_LegacyAmFilterCategory}
+};
+
+CFactoryTemplate g_Templates[] =
+{
+ {sudFilter[0].strName, sudFilter[0].clsID, CreateInstance<CD2VSource>, NULL, &sudFilter[0]}
+};
+
+int g_cTemplates = countof(g_Templates);
+
+STDAPI DllRegisterServer()
+{
+ SetRegKeyValue(
+ _T("Media Type\\{e436eb83-524f-11ce-9f53-0020af0ba770}"), _T("{47CE0591-C4D5-4b41-BED7-28F59AD76228}"),
+ _T("0"), _T("0,18,,4456443241564950726F6A65637446696C65")); // "DVD2AVIProjectFile"
+
+ SetRegKeyValue(
+ _T("Media Type\\{e436eb83-524f-11ce-9f53-0020af0ba770}"), _T("{47CE0591-C4D5-4b41-BED7-28F59AD76228}"),
+ _T("Source Filter"), _T("{47CE0591-C4D5-4b41-BED7-28F59AD76228}"));
+
+ SetRegKeyValue(
+ _T("Media Type\\Extensions"), _T(".d2v"),
+ _T("Source Filter"), _T("{47CE0591-C4D5-4b41-BED7-28F59AD76228}"));
+
+ return AMovieDllRegisterServer2(TRUE);
+}
+
+STDAPI DllUnregisterServer()
+{
+ DeleteRegKey(_T("Media Type\\{e436eb83-524f-11ce-9f53-0020af0ba770}"), _T("{47CE0591-C4D5-4b41-BED7-28F59AD76228}"));
+ DeleteRegKey(_T("Media Type\\Extensions"), _T(".d2v"));
+
+ return AMovieDllRegisterServer2(FALSE);
+}
+
+#include "../../FilterApp.h"
+
+CFilterApp theApp;
+
+#endif
+
+//
+// CD2VSource
+//
+
+CD2VSource::CD2VSource(LPUNKNOWN lpunk, HRESULT* phr)
+ : CBaseSource<CD2VStream>(NAME("CD2VSource"), lpunk, phr, __uuidof(this))
+{
+ if(phr) *phr = S_OK;
+}
+
+CD2VSource::~CD2VSource()
+{
+}
+
+//
+// CD2VStream
+//
+
+CD2VStream::CD2VStream(const WCHAR* fn, CSource* pParent, HRESULT* phr)
+ : CBaseStream(NAME("D2VSourceStream"), pParent, phr)
+ , m_pFrameBuffer(NULL)
+{
+ CAutoLock cAutoLock(&m_cSharedState);
+
+ m_pDecoder.Attach(DNew CMPEG2Dec());
+ if(!m_pDecoder)
+ {
+ if(phr) *phr = E_OUTOFMEMORY;
+ return;
+ }
+
+ if(!m_pDecoder->Open(CString(fn), CMPEG2Dec::YUY2))
+ {
+ if(phr) *phr = E_FAIL;
+ return;
+ }
+
+ if(!m_pFrameBuffer.Allocate(m_pDecoder->Clip_Width*m_pDecoder->Clip_Height*4))
+ {
+ if(phr) *phr = E_OUTOFMEMORY;
+ return;
+ }
+
+ m_AvgTimePerFrame = 10000000000i64/m_pDecoder->VF_FrameRate;
+ m_rtDuration = m_rtStop = m_AvgTimePerFrame*m_pDecoder->VF_FrameLimit;
+
+ if(phr) *phr = m_rtDuration > 0 ? S_OK : E_FAIL;
+}
+
+CD2VStream::~CD2VStream()
+{
+ CAutoLock cAutoLock(&m_cSharedState);
+}
+
+HRESULT CD2VStream::DecideBufferSize(IMemAllocator* pAlloc, ALLOCATOR_PROPERTIES* pProperties)
+{
+// CAutoLock cAutoLock(m_pFilter->pStateLock());
+
+ ASSERT(pAlloc);
+ ASSERT(pProperties);
+
+ HRESULT hr = NOERROR;
+
+ int w, h, bpp;
+ if(!GetDim(w, h, bpp))
+ return E_FAIL;
+
+ pProperties->cBuffers = 1;
+ pProperties->cbBuffer = w*h*bpp>>3;
+
+ ALLOCATOR_PROPERTIES Actual;
+ if(FAILED(hr = pAlloc->SetProperties(pProperties, &Actual))) return hr;
+
+ if(Actual.cbBuffer < pProperties->cbBuffer) return E_FAIL;
+ ASSERT(Actual.cBuffers == pProperties->cBuffers);
+
+ return NOERROR;
+}
+
+HRESULT CD2VStream::FillBuffer(IMediaSample* pSample, int nFrame, BYTE* pOut, long& len)
+{
+ if(!m_pDecoder)
+ return S_FALSE;
+
+ AM_MEDIA_TYPE* pmt;
+ if(SUCCEEDED(pSample->GetMediaType(&pmt)) && pmt)
+ {
+ CMediaType mt(*pmt);
+ SetMediaType(&mt);
+
+ DeleteMediaType(pmt);
+ }
+
+ int w, h, bpp;
+ if(!GetDim(w, h, bpp))
+ return S_FALSE;
+
+ BYTE* pIn = m_pFrameBuffer;
+
+ int pitchIn, pitchOut = 0;
+
+ pitchIn = m_pDecoder->Clip_Width*bpp>>3;
+ pitchOut = w*bpp>>3;
+
+ m_pDecoder->Decode(pIn, (unsigned long)(nFrame), pitchIn);
+
+ for(int y = 0, p = min(pitchIn, pitchOut);
+ y < h;
+ y++, pIn += pitchIn, pOut += pitchOut)
+ {
+ memcpy(pOut, pIn, p);
+ }
+
+ len = pitchOut*h;
+
+ return S_OK;
+}
+
+HRESULT CD2VStream::GetMediaType(int iPosition, CMediaType* pmt)
+{
+ CAutoLock cAutoLock(m_pFilter->pStateLock());
+
+ if(iPosition < 0) return E_INVALIDARG;
+ if(iPosition > 0) return VFW_S_NO_MORE_ITEMS;
+
+ pmt->SetType(&MEDIATYPE_Video);
+ pmt->SetSubtype(&MEDIASUBTYPE_YUY2);
+ pmt->SetFormatType(&FORMAT_VideoInfo);
+ pmt->SetTemporalCompression(FALSE);
+
+ VIDEOINFOHEADER* vih = (VIDEOINFOHEADER*)pmt->AllocFormatBuffer(sizeof(VIDEOINFOHEADER));
+ memset(vih, 0, sizeof(VIDEOINFOHEADER));
+ vih->AvgTimePerFrame = m_AvgTimePerFrame;
+ vih->bmiHeader.biSize = sizeof(vih->bmiHeader);
+ vih->bmiHeader.biWidth = m_pDecoder->Clip_Width;
+ vih->bmiHeader.biHeight = m_pDecoder->Clip_Height;
+ vih->bmiHeader.biPlanes = 1;
+ vih->bmiHeader.biBitCount = 16;
+ vih->bmiHeader.biCompression = '2YUY';
+ vih->bmiHeader.biSizeImage = vih->bmiHeader.biWidth*abs(vih->bmiHeader.biHeight)*vih->bmiHeader.biBitCount>>3;
+
+ pmt->SetSampleSize(vih->bmiHeader.biSizeImage);
+
+ return NOERROR;
+}
+
+HRESULT CD2VStream::SetMediaType(const CMediaType* pmt)
+{
+ if(m_pDecoder)
+ {
+ if(pmt->subtype == MEDIASUBTYPE_YUY2)
+ m_pDecoder->m_dstFormat = CMPEG2Dec::YUY2;
+ else
+ return E_FAIL;
+ }
+
+ return CSourceStream::SetMediaType(pmt);
+}
+
+HRESULT CD2VStream::CheckMediaType(const CMediaType* pmt)
+{
+ return pmt->majortype == MEDIATYPE_Video
+ && pmt->subtype == MEDIASUBTYPE_YUY2
+ && pmt->formattype == FORMAT_VideoInfo
+ ? S_OK
+ : E_INVALIDARG;
+}
+
+STDMETHODIMP CD2VStream::Notify(IBaseFilter* pSender, Quality q)
+{
+ if(q.Late > 0 && q.Late < 100000000)
+ {
+ CAutoLock cAutoLockShared(&m_cSharedState);
+
+ m_rtSampleTime += (q.Late/m_AvgTimePerFrame)*m_AvgTimePerFrame;
+ m_rtPosition += (q.Late/m_AvgTimePerFrame)*m_AvgTimePerFrame;
+ }
+
+ return S_OK;
+}
+
+//
+
+bool CD2VStream::GetDim(int& w, int& h, int& bpp)
+{
+ if(m_mt.formattype == FORMAT_VideoInfo)
+ {
+ w = ((VIDEOINFOHEADER*)m_mt.pbFormat)->bmiHeader.biWidth;
+ h = abs(((VIDEOINFOHEADER*)m_mt.pbFormat)->bmiHeader.biHeight);
+ bpp = ((VIDEOINFOHEADER*)m_mt.pbFormat)->bmiHeader.biBitCount;
+ }
+ else if(m_mt.formattype == FORMAT_VideoInfo2)
+ {
+ w = ((VIDEOINFOHEADER2*)m_mt.pbFormat)->bmiHeader.biWidth;
+ h = abs(((VIDEOINFOHEADER2*)m_mt.pbFormat)->bmiHeader.biHeight);
+ bpp = ((VIDEOINFOHEADER2*)m_mt.pbFormat)->bmiHeader.biBitCount;
+ }
+ else
+ {
+ return(false);
+ }
+
+ return(true);
+}
diff --git a/src/filters/source/D2VSource/D2VSource.def b/src/filters/source/D2VSource/D2VSource.def
new file mode 100644
index 000000000..465cb0f61
--- /dev/null
+++ b/src/filters/source/D2VSource/D2VSource.def
@@ -0,0 +1,7 @@
+LIBRARY "D2VSource.ax"
+
+EXPORTS
+ DllCanUnloadNow PRIVATE
+ DllGetClassObject PRIVATE
+ DllRegisterServer PRIVATE
+ DllUnregisterServer PRIVATE
diff --git a/src/filters/source/D2VSource/D2VSource.h b/src/filters/source/D2VSource/D2VSource.h
new file mode 100644
index 000000000..bf2e18f9a
--- /dev/null
+++ b/src/filters/source/D2VSource/D2VSource.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2003-2006 Gabest
+ *
+ * This Program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This Program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#pragma once
+#include <atlbase.h>
+#include "../BaseSource/BaseSource.h"
+
+class CD2VStream;
+
+[uuid("47CE0591-C4D5-4b41-BED7-28F59AD76228")]
+class CD2VSource : public CBaseSource<CD2VStream>
+{
+public:
+ CD2VSource(LPUNKNOWN lpunk, HRESULT* phr);
+ virtual ~CD2VSource();
+};
+
+class CMPEG2Dec;
+
+class CD2VStream : public CBaseStream
+{
+private:
+ CAutoPtr<CMPEG2Dec> m_pDecoder;
+ CAutoVectorPtr<BYTE> m_pFrameBuffer;
+
+ bool GetDim(int& w, int& h, int& bpp);
+
+public:
+ CD2VStream(const WCHAR* fn, CSource* pParent, HRESULT* phr);
+ virtual ~CD2VStream();
+
+ HRESULT FillBuffer(IMediaSample* pSample, int nFrame, BYTE* pOut, long& len /*in+out*/);
+
+ HRESULT DecideBufferSize(IMemAllocator* pIMemAlloc, ALLOCATOR_PROPERTIES* pProperties);
+ HRESULT CheckMediaType(const CMediaType* pMediaType);
+ HRESULT GetMediaType(int iPosition, CMediaType* pmt);
+ HRESULT SetMediaType(const CMediaType* pmt);
+
+ STDMETHODIMP Notify(IBaseFilter* pSender, Quality q);
+};
diff --git a/src/filters/source/D2VSource/MPEG2Dec.cpp b/src/filters/source/D2VSource/MPEG2Dec.cpp
new file mode 100644
index 000000000..4be664c00
--- /dev/null
+++ b/src/filters/source/D2VSource/MPEG2Dec.cpp
@@ -0,0 +1,4490 @@
+#include "stdafx.h"
+#include "MPEG2Dec.h"
+
+int testint;
+
+struct CPU {
+ BOOL mmx;
+ BOOL _3dnow;
+ BOOL ssemmx;
+ BOOL ssefpu;
+} cpu;
+
+void CheckCPU()
+{
+ __asm
+ {
+ mov eax, 1
+ cpuid
+ test edx, 0x00800000 // STD MMX
+ jz TEST_SSE
+ mov [cpu.mmx], 1
+TEST_SSE:
+ test edx, 0x02000000 // STD SSE
+ jz TEST_3DNOW
+ mov [cpu.ssemmx], 1
+ mov [cpu.ssefpu], 1
+TEST_3DNOW:
+ mov eax, 0x80000001
+ cpuid
+ test edx, 0x80000000 // 3D NOW
+ jz TEST_SSEMMX
+ mov [cpu._3dnow], 1
+TEST_SSEMMX:
+ test edx, 0x00400000 // SSE MMX
+ jz TEST_END
+ mov [cpu.ssemmx], 1
+TEST_END:
+ }
+}
+
+#pragma warning(disable:4799) // no EMMS
+#pragma warning(disable:4731) // ebp modified
+// idct
+extern "C" void __fastcall MMX_IDCT(short *block);
+extern "C" void __fastcall SSEMMX_IDCT(short *block);
+extern void Initialize_FPU_IDCT(void);
+extern void FPU_IDCT(short *block);
+extern void Initialize_REF_IDCT(void);
+extern void REF_IDCT(short *block);
+
+/* default intra quantization matrix */
+static unsigned char default_intra_quantizer_matrix[64] =
+{
+ 8, 16, 19, 22, 26, 27, 29, 34,
+ 16, 16, 22, 24, 27, 29, 34, 37,
+ 19, 22, 26, 27, 29, 34, 34, 38,
+ 22, 22, 26, 27, 29, 34, 37, 40,
+ 22, 26, 27, 29, 32, 35, 40, 48,
+ 26, 27, 29, 32, 35, 40, 48, 58,
+ 26, 27, 29, 34, 38, 46, 56, 69,
+ 27, 29, 35, 38, 46, 56, 69, 83
+};
+
+/* zig-zag and alternate scan patterns */
+static unsigned char scan[2][64] =
+{
+ { /* Zig-Zag scan pattern */
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+ }
+ ,
+ { /* Alternate scan pattern */
+ 0, 8, 16, 24, 1, 9, 2, 10,
+ 17, 25, 32, 40, 48, 56, 57, 49,
+ 41, 33, 26, 18, 3, 11, 4, 12,
+ 19, 27, 34, 42, 50, 58, 35, 43,
+ 51, 59, 20, 28, 5, 13, 6, 14,
+ 21, 29, 36, 44, 52, 60, 37, 45,
+ 53, 61, 22, 30, 7, 15, 23, 31,
+ 38, 46, 54, 62, 39, 47, 55, 63
+ }
+};
+
+/* non-linear quantization coefficient table */
+static unsigned char Non_Linear_quantizer_scale[32] =
+{
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 10, 12, 14, 16, 18, 20, 22,
+ 24, 28, 32, 36, 40, 44, 48, 52,
+ 56, 64, 72, 80, 88, 96, 104, 112
+};
+
+#define ERROR_VALUE (-1)
+
+typedef struct {
+ char run, level, len;
+} DCTtab;
+
+typedef struct {
+ char val, len;
+} VLCtab;
+
+/* Table B-10, motion_code, codes 0001 ... 01xx */
+static VLCtab MVtab0[8] =
+{
+ {ERROR_VALUE,0}, {3,3}, {2,2}, {2,2}, {1,1}, {1,1}, {1,1}, {1,1}
+};
+
+/* Table B-10, motion_code, codes 0000011 ... 000011x */
+static VLCtab MVtab1[8] =
+{
+ {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, {7,6}, {6,6}, {5,6}, {4,5}, {4,5}
+};
+
+/* Table B-10, motion_code, codes 0000001100 ... 000001011x */
+static VLCtab MVtab2[12] =
+{
+ {16,9}, {15,9}, {14,9}, {13,9},
+ {12,9}, {11,9}, {10,8}, {10,8},
+ {9,8}, {9,8}, {8,8}, {8,8}
+};
+
+/* Table B-9, coded_block_pattern, codes 01000 ... 111xx */
+static VLCtab CBPtab0[32] =
+{
+ {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0},
+ {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0},
+ {62,5}, {2,5}, {61,5}, {1,5}, {56,5}, {52,5}, {44,5}, {28,5},
+ {40,5}, {20,5}, {48,5}, {12,5}, {32,4}, {32,4}, {16,4}, {16,4},
+ {8,4}, {8,4}, {4,4}, {4,4}, {60,3}, {60,3}, {60,3}, {60,3}
+};
+
+/* Table B-9, coded_block_pattern, codes 00000100 ... 001111xx */
+static VLCtab CBPtab1[64] =
+{
+ {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0}, {ERROR_VALUE,0},
+ {58,8}, {54,8}, {46,8}, {30,8},
+ {57,8}, {53,8}, {45,8}, {29,8}, {38,8}, {26,8}, {37,8}, {25,8},
+ {43,8}, {23,8}, {51,8}, {15,8}, {42,8}, {22,8}, {50,8}, {14,8},
+ {41,8}, {21,8}, {49,8}, {13,8}, {35,8}, {19,8}, {11,8}, {7,8},
+ {34,7}, {34,7}, {18,7}, {18,7}, {10,7}, {10,7}, {6,7}, {6,7},
+ {33,7}, {33,7}, {17,7}, {17,7}, {9,7}, {9,7}, {5,7}, {5,7},
+ {63,6}, {63,6}, {63,6}, {63,6}, {3,6}, {3,6}, {3,6}, {3,6},
+ {36,6}, {36,6}, {36,6}, {36,6}, {24,6}, {24,6}, {24,6}, {24,6}
+};
+
+/* Table B-9, coded_block_pattern, codes 000000001 ... 000000111 */
+static VLCtab CBPtab2[8] =
+{
+ {ERROR_VALUE,0}, {0,9}, {39,9}, {27,9}, {59,9}, {55,9}, {47,9}, {31,9}
+};
+
+/* Table B-1, macroblock_address_increment, codes 00010 ... 011xx */
+static VLCtab MBAtab1[16] =
+{
+ {ERROR_VALUE,0}, {ERROR_VALUE,0}, {7,5}, {6,5}, {5,4}, {5,4}, {4,4},
+ {4,4}, {3,3}, {3,3}, {3,3}, {3,3}, {2,3}, {2,3}, {2,3}, {2,3}
+};
+
+/* Table B-1, macroblock_address_increment, codes 00000011000 ... 0000111xxxx */
+static VLCtab MBAtab2[104] =
+{
+ {33,11}, {32,11}, {31,11}, {30,11}, {29,11}, {28,11}, {27,11}, {26,11},
+ {25,11}, {24,11}, {23,11}, {22,11}, {21,10}, {21,10}, {20,10}, {20,10},
+ {19,10}, {19,10}, {18,10}, {18,10}, {17,10}, {17,10}, {16,10}, {16,10},
+ {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8},
+ {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8},
+ {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8},
+ {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8},
+ {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8},
+ {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8},
+ {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7},
+ {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7},
+ {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7},
+ {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}
+};
+
+/* Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */
+static VLCtab DClumtab0[32] =
+{
+ {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+ {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+ {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
+ {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {ERROR_VALUE, 0}
+};
+
+/* Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */
+static VLCtab DClumtab1[16] =
+{
+ {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6},
+ {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10,9}, {11,9}
+};
+
+/* Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */
+static VLCtab DCchromtab0[32] =
+{
+ {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
+ {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+ {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+ {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {ERROR_VALUE, 0}
+};
+
+/* Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */
+static VLCtab DCchromtab1[32] =
+{
+ {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
+ {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
+ {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7},
+ {8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10,10}, {11,10}
+};
+
+/* Table B-14, DCT coefficients table zero,
+ * codes 0100 ... 1xxx (used for first (DC) coefficient)
+ */
+static DCTtab DCTtabfirst[12] =
+{
+ {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
+ {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1},
+ {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1}
+};
+
+/* Table B-14, DCT coefficients table zero,
+ * codes 0100 ... 1xxx (used for all other coefficients)
+ */
+static DCTtab DCTtabnext[12] =
+{
+ {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
+ {64,0,2}, {64,0,2}, {64,0,2}, {64,0,2}, /* EOB */
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}
+};
+
+/* Table B-14, DCT coefficients table zero,
+ * codes 000001xx ... 00111xxx
+ */
+static DCTtab DCTtab0[60] =
+{
+ {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
+ {2,2,7}, {2,2,7}, {9,1,7}, {9,1,7},
+ {0,4,7}, {0,4,7}, {8,1,7}, {8,1,7},
+ {7,1,6}, {7,1,6}, {7,1,6}, {7,1,6},
+ {6,1,6}, {6,1,6}, {6,1,6}, {6,1,6},
+ {1,2,6}, {1,2,6}, {1,2,6}, {1,2,6},
+ {5,1,6}, {5,1,6}, {5,1,6}, {5,1,6},
+ {13,1,8}, {0,6,8}, {12,1,8}, {11,1,8},
+ {3,2,8}, {1,3,8}, {0,5,8}, {10,1,8},
+ {0,3,5}, {0,3,5}, {0,3,5}, {0,3,5},
+ {0,3,5}, {0,3,5}, {0,3,5}, {0,3,5},
+ {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
+ {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
+ {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
+ {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}
+};
+
+/* Table B-15, DCT coefficients table one,
+ * codes 000001xx ... 11111111
+*/
+static DCTtab DCTtab0a[252] =
+{
+ {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
+ {7,1,7}, {7,1,7}, {8,1,7}, {8,1,7},
+ {6,1,7}, {6,1,7}, {2,2,7}, {2,2,7},
+ {0,7,6}, {0,7,6}, {0,7,6}, {0,7,6},
+ {0,6,6}, {0,6,6}, {0,6,6}, {0,6,6},
+ {4,1,6}, {4,1,6}, {4,1,6}, {4,1,6},
+ {5,1,6}, {5,1,6}, {5,1,6}, {5,1,6},
+ {1,5,8}, {11,1,8}, {0,11,8}, {0,10,8},
+ {13,1,8}, {12,1,8}, {3,2,8}, {1,4,8},
+ {2,1,5}, {2,1,5}, {2,1,5}, {2,1,5},
+ {2,1,5}, {2,1,5}, {2,1,5}, {2,1,5},
+ {1,2,5}, {1,2,5}, {1,2,5}, {1,2,5},
+ {1,2,5}, {1,2,5}, {1,2,5}, {1,2,5},
+ {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
+ {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
+ {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+ {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+ {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+ {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+ {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+ {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+ {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+ {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+ {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, /* EOB */
+ {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
+ {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
+ {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
+ {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
+ {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
+ {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
+ {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+ {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+ {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+ {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+ {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+ {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+ {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+ {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+ {0,4,5}, {0,4,5}, {0,4,5}, {0,4,5},
+ {0,4,5}, {0,4,5}, {0,4,5}, {0,4,5},
+ {0,5,5}, {0,5,5}, {0,5,5}, {0,5,5},
+ {0,5,5}, {0,5,5}, {0,5,5}, {0,5,5},
+ {9,1,7}, {9,1,7}, {1,3,7}, {1,3,7},
+ {10,1,7}, {10,1,7}, {0,8,7}, {0,8,7},
+ {0,9,7}, {0,9,7}, {0,12,8}, {0,13,8},
+ {2,3,8}, {4,2,8}, {0,14,8}, {0,15,8}
+};
+
+/* Table B-14, DCT coefficients table zero,
+ * codes 0000001000 ... 0000001111
+ */
+static DCTtab DCTtab1[8] =
+{
+ {16,1,10}, {5,2,10}, {0,7,10}, {2,3,10},
+ {1,4,10}, {15,1,10}, {14,1,10}, {4,2,10}
+};
+
+/* Table B-15, DCT coefficients table one,
+ * codes 000000100x ... 000000111x
+ */
+static DCTtab DCTtab1a[8] =
+{
+ {5,2,9}, {5,2,9}, {14,1,9}, {14,1,9},
+ {2,4,10}, {16,1,10}, {15,1,9}, {15,1,9}
+};
+
+/* Table B-14/15, DCT coefficients table zero / one,
+ * codes 000000010000 ... 000000011111
+ */
+static DCTtab DCTtab2[16] =
+{
+ {0,11,12}, {8,2,12}, {4,3,12}, {0,10,12},
+ {2,4,12}, {7,2,12}, {21,1,12}, {20,1,12},
+ {0,9,12}, {19,1,12}, {18,1,12}, {1,5,12},
+ {3,3,12}, {0,8,12}, {6,2,12}, {17,1,12}
+};
+
+/* Table B-14/15, DCT coefficients table zero / one,
+ * codes 0000000010000 ... 0000000011111
+ */
+static DCTtab DCTtab3[16] =
+{
+ {10,2,13}, {9,2,13}, {5,3,13}, {3,4,13},
+ {2,5,13}, {1,7,13}, {1,6,13}, {0,15,13},
+ {0,14,13}, {0,13,13}, {0,12,13}, {26,1,13},
+ {25,1,13}, {24,1,13}, {23,1,13}, {22,1,13}
+};
+
+/* Table B-14/15, DCT coefficients table zero / one,
+ * codes 00000000010000 ... 00000000011111
+ */
+static DCTtab DCTtab4[16] =
+{
+ {0,31,14}, {0,30,14}, {0,29,14}, {0,28,14},
+ {0,27,14}, {0,26,14}, {0,25,14}, {0,24,14},
+ {0,23,14}, {0,22,14}, {0,21,14}, {0,20,14},
+ {0,19,14}, {0,18,14}, {0,17,14}, {0,16,14}
+};
+
+/* Table B-14/15, DCT coefficients table zero / one,
+ * codes 000000000010000 ... 000000000011111
+ */
+static DCTtab DCTtab5[16] =
+{
+ {0,40,15}, {0,39,15}, {0,38,15}, {0,37,15},
+ {0,36,15}, {0,35,15}, {0,34,15}, {0,33,15},
+ {0,32,15}, {1,14,15}, {1,13,15}, {1,12,15},
+ {1,11,15}, {1,10,15}, {1,9,15}, {1,8,15}
+};
+
+/* Table B-14/15, DCT coefficients table zero / one,
+ * codes 0000000000010000 ... 0000000000011111
+ */
+static DCTtab DCTtab6[16] =
+{
+ {1,18,16}, {1,17,16}, {1,16,16}, {1,15,16},
+ {6,3,16}, {16,2,16}, {15,2,16}, {14,2,16},
+ {13,2,16}, {12,2,16}, {11,2,16}, {31,1,16},
+ {30,1,16}, {29,1,16}, {28,1,16}, {27,1,16}
+};
+
+/* Table B-3, macroblock_type in P-pictures, codes 001..1xx */
+static VLCtab PMBtab0[8] =
+{
+ {ERROR_VALUE,0},
+ {MACROBLOCK_MOTION_FORWARD,3},
+ {MACROBLOCK_PATTERN,2}, {MACROBLOCK_PATTERN,2},
+ {MACROBLOCK_MOTION_FORWARD|MACROBLOCK_PATTERN,1},
+ {MACROBLOCK_MOTION_FORWARD|MACROBLOCK_PATTERN,1},
+ {MACROBLOCK_MOTION_FORWARD|MACROBLOCK_PATTERN,1},
+ {MACROBLOCK_MOTION_FORWARD|MACROBLOCK_PATTERN,1}
+};
+
+/* Table B-3, macroblock_type in P-pictures, codes 000001..00011x */
+static VLCtab PMBtab1[8] =
+{
+ {ERROR_VALUE,0},
+ {MACROBLOCK_QUANT|MACROBLOCK_INTRA,6},
+ {MACROBLOCK_QUANT|MACROBLOCK_PATTERN,5}, {MACROBLOCK_QUANT|MACROBLOCK_PATTERN,5},
+ {MACROBLOCK_QUANT|MACROBLOCK_MOTION_FORWARD|MACROBLOCK_PATTERN,5}, {MACROBLOCK_QUANT|MACROBLOCK_MOTION_FORWARD|MACROBLOCK_PATTERN,5},
+ {MACROBLOCK_INTRA,5}, {MACROBLOCK_INTRA,5}
+};
+
+/* Table B-4, macroblock_type in B-pictures, codes 0010..11xx */
+static VLCtab BMBtab0[16] =
+{
+ {ERROR_VALUE,0},
+ {ERROR_VALUE,0},
+ {MACROBLOCK_MOTION_FORWARD,4},
+ {MACROBLOCK_MOTION_FORWARD|MACROBLOCK_PATTERN,4},
+ {MACROBLOCK_MOTION_BACKWARD,3},
+ {MACROBLOCK_MOTION_BACKWARD,3},
+ {MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,3},
+ {MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,3},
+ {MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD,2},
+ {MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD,2},
+ {MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD,2},
+ {MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD,2},
+ {MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,2},
+ {MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,2},
+ {MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,2},
+ {MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,2}
+};
+
+/* Table B-4, macroblock_type in B-pictures, codes 000001..00011x */
+static VLCtab BMBtab1[8] =
+{
+ {ERROR_VALUE,0},
+ {MACROBLOCK_QUANT|MACROBLOCK_INTRA,6},
+ {MACROBLOCK_QUANT|MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,6},
+ {MACROBLOCK_QUANT|MACROBLOCK_MOTION_FORWARD|MACROBLOCK_PATTERN,6},
+ {MACROBLOCK_QUANT|MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,5},
+ {MACROBLOCK_QUANT|MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD|MACROBLOCK_PATTERN,5},
+ {MACROBLOCK_INTRA,5},
+ {MACROBLOCK_INTRA,5}
+};
+
+//
+// getbit
+//
+
+void CMPEG2Dec::Initialize_Buffer()
+{
+ Rdptr = Rdbfr + BUFFER_SIZE;
+ Rdmax = Rdptr;
+
+ if (SystemStream_Flag)
+ {
+ if (Rdptr >= Rdmax)
+ Next_Packet();
+ CurrentBfr = *Rdptr++ << 24;
+
+ if (Rdptr >= Rdmax)
+ Next_Packet();
+ CurrentBfr += *Rdptr++ << 16;
+
+ if (Rdptr >= Rdmax)
+ Next_Packet();
+ CurrentBfr += *Rdptr++ << 8;
+
+ if (Rdptr >= Rdmax)
+ Next_Packet();
+ CurrentBfr += *Rdptr++;
+
+ Fill_Next();
+ }
+ else
+ {
+ Fill_Buffer();
+
+ CurrentBfr = (*Rdptr << 24) + (*(Rdptr+1) << 16) + (*(Rdptr+2) << 8) + *(Rdptr+3);
+ Rdptr += 4;
+
+ Fill_Next();
+ }
+
+ BitsLeft = 32;
+}
+
+unsigned int CMPEG2Dec::Get_Bits_All(unsigned int N)
+{
+ N -= BitsLeft;
+ Val = (CurrentBfr << (32 - BitsLeft)) >> (32 - BitsLeft);
+
+ if (N != 0)
+ Val = (Val << N) + (NextBfr >> (32 - N));
+
+ CurrentBfr = NextBfr;
+ BitsLeft = 32 - N;
+ Fill_Next();
+
+ return Val;
+}
+
+void CMPEG2Dec::Flush_Buffer_All(unsigned int N)
+{
+ CurrentBfr = NextBfr;
+ BitsLeft = BitsLeft + 32 - N;
+ Fill_Next();
+}
+
+void CMPEG2Dec::Next_Packet()
+{
+ unsigned int code, Packet_Length, Packet_Header_Length;
+
+ for (;;)
+ {
+ code = Get_Short();
+ code = (code<<16) + Get_Short();
+
+ // remove system layer byte stuffing
+ while ((code & 0xffffff00) != 0x00000100)
+ code = (code<<8) + Get_Byte();
+
+ switch (code)
+ {
+ case PACK_START_CODE:
+ Rdptr += 8;
+ break;
+
+ case VIDEO_ELEMENTARY_STREAM:
+ Packet_Length = Get_Short();
+ Rdmax = Rdptr + Packet_Length;
+
+ code = Get_Byte();
+
+ if ((code & 0xc0)==0x80)
+ {
+ code = Get_Byte();
+ Packet_Header_Length = Get_Byte();
+
+ Rdptr += Packet_Header_Length;
+ return;
+ }
+ else
+ Rdptr += Packet_Length-1;
+ break;
+
+ default:
+ if (code>=SYSTEM_START_CODE)
+ {
+ code = Get_Short();
+ Rdptr += code;
+ }
+ break;
+ }
+ }
+}
+
+void CMPEG2Dec::Fill_Buffer()
+{
+ Read = _read(Infile[File_Flag], Rdbfr, BUFFER_SIZE);
+
+ if (Read < BUFFER_SIZE)
+ Next_File();
+
+ if (KeyOp_Flag && (Rdbfr[20] & 0x10))
+ {
+ BufferOp(Rdbfr, lfsr0, lfsr1);
+ Rdbfr[20] &= ~0x10;
+ }
+
+ Rdptr = Rdbfr;
+
+ if (SystemStream_Flag)
+ Rdmax -= BUFFER_SIZE;
+}
+
+void CMPEG2Dec::Next_File()
+{
+ if (File_Flag < File_Limit-1)
+ File_Flag ++;
+
+ _lseeki64(Infile[File_Flag], 0, SEEK_SET);
+ _read(Infile[File_Flag], Rdbfr + Read, BUFFER_SIZE - Read);
+}
+
+
+unsigned int CMPEG2Dec::Show_Bits(unsigned int N)
+{
+ if (N <= BitsLeft)
+ return (CurrentBfr << (32 - BitsLeft)) >> (32 - N);
+ else
+ {
+ N -= BitsLeft;
+ return (((CurrentBfr << (32 - BitsLeft)) >> (32 - BitsLeft)) << N) + (NextBfr >> (32 - N));
+ }
+}
+
+unsigned int CMPEG2Dec::Get_Bits(unsigned int N)
+{
+ if (N < BitsLeft)
+ {
+ Val = (CurrentBfr << (32 - BitsLeft)) >> (32 - N);
+ BitsLeft -= N;
+ return Val;
+ }
+ else
+ return Get_Bits_All(N);
+}
+
+void CMPEG2Dec::Flush_Buffer(unsigned int N)
+{
+ if (N < BitsLeft)
+ BitsLeft -= N;
+ else
+ Flush_Buffer_All(N);
+}
+
+void CMPEG2Dec::Fill_Next()
+{
+ if (SystemStream_Flag && Rdptr>=Rdmax-4)
+ {
+ if (Rdptr >= Rdmax)
+ Next_Packet();
+ NextBfr = Get_Byte() << 24;
+
+ if (Rdptr >= Rdmax)
+ Next_Packet();
+ NextBfr += Get_Byte() << 16;
+
+ if (Rdptr >= Rdmax)
+ Next_Packet();
+ NextBfr += Get_Byte() << 8;
+
+ if (Rdptr >= Rdmax)
+ Next_Packet();
+ NextBfr += Get_Byte();
+ }
+ else if (Rdptr < Rdbfr+BUFFER_SIZE-4)
+ {
+ NextBfr = (*Rdptr << 24) + (*(Rdptr+1) << 16) + (*(Rdptr+2) << 8) + *(Rdptr+3);
+ Rdptr += 4;
+ }
+ else
+ {
+ if (Rdptr >= Rdbfr+BUFFER_SIZE)
+ Fill_Buffer();
+ NextBfr = *Rdptr++ << 24;
+
+ if (Rdptr >= Rdbfr+BUFFER_SIZE)
+ Fill_Buffer();
+ NextBfr += *Rdptr++ << 16;
+
+ if (Rdptr >= Rdbfr+BUFFER_SIZE)
+ Fill_Buffer();
+ NextBfr += *Rdptr++ << 8;
+
+ if (Rdptr >= Rdbfr+BUFFER_SIZE)
+ Fill_Buffer();
+ NextBfr += *Rdptr++;
+ }
+}
+
+unsigned int CMPEG2Dec::Get_Byte()
+{
+ while (Rdptr >= (Rdbfr + BUFFER_SIZE))
+ {
+ Read = _read(Infile[File_Flag], Rdbfr, BUFFER_SIZE);
+
+ if (Read < BUFFER_SIZE)
+ Next_File();
+
+ if (KeyOp_Flag && (Rdbfr[20] & 0x10))
+ {
+ BufferOp(Rdbfr, lfsr0, lfsr1);
+ Rdbfr[20] &= ~0x10;
+ }
+
+ Rdptr -= BUFFER_SIZE;
+ Rdmax -= BUFFER_SIZE;
+ }
+
+ return *Rdptr++;
+}
+
+unsigned int CMPEG2Dec::Get_Short()
+{
+ unsigned int i = Get_Byte();
+ return (i<<8) + Get_Byte();
+}
+
+void CMPEG2Dec::next_start_code()
+{
+ Flush_Buffer(BitsLeft & 7);
+
+ while (Show_Bits(24) != 1)
+ Flush_Buffer(8);
+}
+
+//
+// gethdr
+//
+
+int CMPEG2Dec::Get_Hdr()
+{
+ for (;;)
+ {
+ /* look for next_start_code */
+ next_start_code();
+
+ switch (Get_Bits(32))
+ {
+ case SEQUENCE_HEADER_CODE:
+ sequence_header();
+ break;
+
+ case GROUP_START_CODE:
+ group_of_pictures_header();
+ break;
+
+ case PICTURE_START_CODE:
+ picture_header();
+ return 1;
+ }
+ }
+}
+
+/* decode group of pictures header */
+/* ISO/IEC 13818-2 section 6.2.2.6 */
+void CMPEG2Dec::group_of_pictures_header()
+{
+ int gop_hour;
+ int gop_minute;
+ int gop_sec;
+ int gop_frame;
+
+ int drop_flag;
+ int closed_gop;
+ int broken_link;
+
+ drop_flag = Get_Bits(1);
+ gop_hour = Get_Bits(5);
+ gop_minute = Get_Bits(6);
+ Flush_Buffer(1); // marker bit
+ gop_sec = Get_Bits(6);
+ gop_frame = Get_Bits(6);
+ closed_gop = Get_Bits(1);
+ broken_link = Get_Bits(1);
+
+ extension_and_user_data();
+}
+
+/* decode picture header */
+/* ISO/IEC 13818-2 section 6.2.3 */
+void CMPEG2Dec::picture_header()
+{
+ int vbv_delay;
+ int full_pel_forward_vector;
+ int forward_f_code;
+ int full_pel_backward_vector;
+ int backward_f_code;
+ int Extra_Information_Byte_Count;
+
+ temporal_reference = Get_Bits(10);
+ picture_coding_type = Get_Bits(3);
+ vbv_delay = Get_Bits(16);
+
+ if (picture_coding_type==P_TYPE || picture_coding_type==B_TYPE)
+ {
+ full_pel_forward_vector = Get_Bits(1);
+ forward_f_code = Get_Bits(3);
+ }
+
+ if (picture_coding_type==B_TYPE)
+ {
+ full_pel_backward_vector = Get_Bits(1);
+ backward_f_code = Get_Bits(3);
+ }
+
+ Extra_Information_Byte_Count = extra_bit_information();
+ extension_and_user_data();
+}
+
+/* decode sequence header */
+void CMPEG2Dec::sequence_header()
+{
+ int frame_rate_code;
+ int vbv_buffer_size;
+ int aspect_ratio_information;
+ int bit_rate_value;
+
+ int constrained_parameters_flag;
+ int i;
+
+ horizontal_size = Get_Bits(12);
+ vertical_size = Get_Bits(12);
+ aspect_ratio_information = Get_Bits(4);
+ frame_rate_code = Get_Bits(4);
+ bit_rate_value = Get_Bits(18);
+ Flush_Buffer(1); // marker bit
+ vbv_buffer_size = Get_Bits(10);
+ constrained_parameters_flag = Get_Bits(1);
+
+ if (load_intra_quantizer_matrix = Get_Bits(1))
+ {
+ for (i=0; i<64; i++)
+ intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
+ }
+ else
+ {
+ for (i=0; i<64; i++)
+ intra_quantizer_matrix[i] = default_intra_quantizer_matrix[i];
+ }
+
+ if (load_non_intra_quantizer_matrix = Get_Bits(1))
+ {
+ for (i=0; i<64; i++)
+ non_intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
+ }
+ else
+ {
+ for (i=0; i<64; i++)
+ non_intra_quantizer_matrix[i] = 16;
+ }
+
+ /* copy luminance to chrominance matrices */
+ for (i=0; i<64; i++)
+ {
+ chroma_intra_quantizer_matrix[i] = intra_quantizer_matrix[i];
+ chroma_non_intra_quantizer_matrix[i] = non_intra_quantizer_matrix[i];
+ }
+ extension_and_user_data();
+}
+
+/* decode slice header */
+/* ISO/IEC 13818-2 section 6.2.4 */
+int CMPEG2Dec::slice_header()
+{
+ int slice_vertical_position_extension;
+ int quantizer_scale_code;
+ int slice_picture_id_enable = 0;
+ int slice_picture_id = 0;
+ int extra_information_slice = 0;
+
+ slice_vertical_position_extension = vertical_size>2800 ? Get_Bits(3) : 0;
+
+ quantizer_scale_code = Get_Bits(5);
+ quantizer_scale = q_scale_type ? Non_Linear_quantizer_scale[quantizer_scale_code] : quantizer_scale_code<<1;
+
+ /* slice_id introduced in March 1995 as part of the video corridendum
+ (after the IS was drafted in November 1994) */
+ if (Get_Bits(1))
+ {
+ Get_Bits(1); // intra slice
+
+ slice_picture_id_enable = Get_Bits(1);
+ slice_picture_id = Get_Bits(6);
+
+ extra_information_slice = extra_bit_information();
+ }
+
+ return slice_vertical_position_extension;
+}
+
+/* decode extension and user data */
+/* ISO/IEC 13818-2 section 6.2.2.2 */
+void CMPEG2Dec::extension_and_user_data()
+{
+ int code, ext_ID;
+
+ next_start_code();
+
+ while ((code = Show_Bits(32))==EXTENSION_START_CODE || code==USER_DATA_START_CODE)
+ {
+ if (code==EXTENSION_START_CODE)
+ {
+ Flush_Buffer(32);
+ ext_ID = Get_Bits(4);
+
+ switch (ext_ID)
+ {
+ case SEQUENCE_EXTENSION_ID:
+ sequence_extension();
+ break;
+
+ case SEQUENCE_DISPLAY_EXTENSION_ID:
+ sequence_display_extension();
+ break;
+
+ case QUANT_MATRIX_EXTENSION_ID:
+ quant_matrix_extension();
+ break;
+
+ case PICTURE_DISPLAY_EXTENSION_ID:
+ picture_display_extension();
+ break;
+
+ case PICTURE_CODING_EXTENSION_ID:
+ picture_coding_extension();
+ break;
+
+ case COPYRIGHT_EXTENSION_ID:
+ copyright_extension();
+ break;
+ }
+ next_start_code();
+ }
+ else
+ {
+ Flush_Buffer(32);
+ next_start_code();
+ }
+ }
+}
+
+/* decode sequence extension */
+/* ISO/IEC 13818-2 section 6.2.2.3 */
+void CMPEG2Dec::sequence_extension()
+{
+ int profile_and_level_indication;
+ int low_delay;
+ int frame_rate_extension_n;
+ int frame_rate_extension_d;
+
+ int horizontal_size_extension;
+ int vertical_size_extension;
+ int bit_rate_extension;
+ int vbv_buffer_size_extension;
+
+ profile_and_level_indication = Get_Bits(8);
+ progressive_sequence = Get_Bits(1);
+ chroma_format = Get_Bits(2);
+ horizontal_size_extension = Get_Bits(2);
+ vertical_size_extension = Get_Bits(2);
+ bit_rate_extension = Get_Bits(12);
+ Flush_Buffer(1); // marker bit
+ vbv_buffer_size_extension = Get_Bits(8);
+ low_delay = Get_Bits(1);
+
+ frame_rate_extension_n = Get_Bits(2);
+ frame_rate_extension_d = Get_Bits(5);
+
+ horizontal_size = (horizontal_size_extension<<12) | (horizontal_size&0x0fff);
+ vertical_size = (vertical_size_extension<<12) | (vertical_size&0x0fff);
+}
+
+/* decode sequence display extension */
+void CMPEG2Dec::sequence_display_extension()
+{
+ int video_format;
+ int color_description;
+ int color_primaries;
+ int transfer_characteristics;
+ int matrix_coefficients;
+ int display_horizontal_size;
+ int display_vertical_size;
+
+ video_format = Get_Bits(3);
+ color_description = Get_Bits(1);
+
+ if (color_description)
+ {
+ color_primaries = Get_Bits(8);
+ transfer_characteristics = Get_Bits(8);
+ matrix_coefficients = Get_Bits(8);
+ }
+
+ display_horizontal_size = Get_Bits(14);
+ Flush_Buffer(1); // marker bit
+ display_vertical_size = Get_Bits(14);
+}
+
+/* decode quant matrix entension */
+/* ISO/IEC 13818-2 section 6.2.3.2 */
+void CMPEG2Dec::quant_matrix_extension()
+{
+ int i;
+
+ if (load_intra_quantizer_matrix = Get_Bits(1))
+ for (i=0; i<64; i++)
+ chroma_intra_quantizer_matrix[scan[ZIG_ZAG][i]]
+ = intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
+
+ if (load_non_intra_quantizer_matrix = Get_Bits(1))
+ for (i=0; i<64; i++)
+ chroma_non_intra_quantizer_matrix[scan[ZIG_ZAG][i]]
+ = non_intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
+
+ if (load_chroma_intra_quantizer_matrix = Get_Bits(1))
+ for (i=0; i<64; i++)
+ chroma_intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
+
+ if (load_chroma_non_intra_quantizer_matrix = Get_Bits(1))
+ for (i=0; i<64; i++)
+ chroma_non_intra_quantizer_matrix[scan[ZIG_ZAG][i]] = Get_Bits(8);
+}
+
+/* decode picture display extension */
+/* ISO/IEC 13818-2 section 6.2.3.3. */
+void CMPEG2Dec::picture_display_extension()
+{
+ int frame_center_horizontal_offset[3];
+ int frame_center_vertical_offset[3];
+
+ int i;
+ int number_of_frame_center_offsets;
+
+ /* based on ISO/IEC 13818-2 section 6.3.12
+ (November 1994) Picture display extensions */
+
+ /* derive number_of_frame_center_offsets */
+ if (progressive_sequence)
+ {
+ if (repeat_first_field)
+ {
+ if (top_field_first)
+ number_of_frame_center_offsets = 3;
+ else
+ number_of_frame_center_offsets = 2;
+ }
+ else
+ number_of_frame_center_offsets = 1;
+ }
+ else
+ {
+ if (picture_structure!=FRAME_PICTURE)
+ number_of_frame_center_offsets = 1;
+ else
+ {
+ if (repeat_first_field)
+ number_of_frame_center_offsets = 3;
+ else
+ number_of_frame_center_offsets = 2;
+ }
+ }
+
+ /* now parse */
+ for (i=0; i<number_of_frame_center_offsets; i++)
+ {
+ frame_center_horizontal_offset[i] = Get_Bits(16);
+ Flush_Buffer(1); // marker bit
+
+ frame_center_vertical_offset[i] = Get_Bits(16);
+ Flush_Buffer(1); // marker bit
+ }
+}
+
+/* decode picture coding extension */
+void CMPEG2Dec::picture_coding_extension()
+{
+ int chroma_420_type;
+ int composite_display_flag;
+ int v_axis;
+ int field_sequence;
+ int sub_carrier;
+ int burst_amplitude;
+ int sub_carrier_phase;
+
+ f_code[0][0] = Get_Bits(4);
+ f_code[0][1] = Get_Bits(4);
+ f_code[1][0] = Get_Bits(4);
+ f_code[1][1] = Get_Bits(4);
+
+ intra_dc_precision = Get_Bits(2);
+ picture_structure = Get_Bits(2);
+ top_field_first = Get_Bits(1);
+ frame_pred_frame_dct = Get_Bits(1);
+ concealment_motion_vectors = Get_Bits(1);
+ q_scale_type = Get_Bits(1);
+ intra_vlc_format = Get_Bits(1);
+ alternate_scan = Get_Bits(1);
+ repeat_first_field = Get_Bits(1);
+ chroma_420_type = Get_Bits(1);
+ progressive_frame = Get_Bits(1);
+ composite_display_flag = Get_Bits(1);
+
+ pf_current = progressive_frame;
+
+ if (composite_display_flag)
+ {
+ v_axis = Get_Bits(1);
+ field_sequence = Get_Bits(3);
+ sub_carrier = Get_Bits(1);
+ burst_amplitude = Get_Bits(7);
+ sub_carrier_phase = Get_Bits(8);
+ }
+}
+
+/* decode extra bit information */
+/* ISO/IEC 13818-2 section 6.2.3.4. */
+int CMPEG2Dec::extra_bit_information()
+{
+ int Byte_Count = 0;
+
+ while (Get_Bits(1))
+ {
+ Flush_Buffer(8);
+ Byte_Count ++;
+ }
+
+ return(Byte_Count);
+}
+
+/* Copyright extension */
+/* ISO/IEC 13818-2 section 6.2.3.6. */
+/* (header added in November, 1994 to the IS document) */
+void CMPEG2Dec::copyright_extension()
+{
+ int copyright_flag;
+ int copyright_identifier;
+ int original_or_copy;
+ int copyright_number_1;
+ int copyright_number_2;
+ int copyright_number_3;
+
+ int reserved_data;
+
+ copyright_flag = Get_Bits(1);
+ copyright_identifier = Get_Bits(8);
+ original_or_copy = Get_Bits(1);
+
+ /* reserved */
+ reserved_data = Get_Bits(7);
+
+ Flush_Buffer(1); // marker bit
+ copyright_number_1 = Get_Bits(20);
+ Flush_Buffer(1); // marker bit
+ copyright_number_2 = Get_Bits(22);
+ Flush_Buffer(1); // marker bit
+ copyright_number_3 = Get_Bits(22);
+}
+
+//
+// getpic
+//
+
+static const unsigned char cc_table[12] = {
+ 0, 0, 0, 0, 1, 2, 1, 2, 1, 2, 1, 2
+};
+
+void CMPEG2Dec::Decode_Picture(int ref, unsigned char *dst, int pitch)
+{
+ if (picture_structure==FRAME_PICTURE && Second_Field)
+ Second_Field = 0;
+
+ if (picture_coding_type!=B_TYPE)
+ {
+ pf_forward = pf_backward;
+ pf_backward = pf_current;
+ }
+
+ Update_Picture_Buffers();
+
+ picture_data();
+
+ if (ref && (picture_structure==FRAME_PICTURE || Second_Field))
+ {
+ if (picture_coding_type==B_TYPE)
+ assembleFrame(auxframe, pf_current, dst, pitch);
+ else
+ assembleFrame(forward_reference_frame, pf_forward, dst, pitch);
+ }
+
+ if (picture_structure!=FRAME_PICTURE)
+ Second_Field = !Second_Field;
+}
+
+/* reuse old picture buffers as soon as they are no longer needed */
+void CMPEG2Dec::Update_Picture_Buffers()
+{
+ int cc; /* color component index */
+ unsigned char *tmp; /* temporary swap pointer */
+
+ for (cc=0; cc<3; cc++)
+ {
+ /* B pictures do not need to be save for future reference */
+ if (picture_coding_type==B_TYPE)
+ current_frame[cc] = auxframe[cc];
+ else
+ {
+ if (!Second_Field)
+ {
+ /* only update at the beginning of the coded frame */
+ tmp = forward_reference_frame[cc];
+
+ /* the previously decoded reference frame is stored coincident with the
+ location where the backward reference frame is stored (backwards
+ prediction is not needed in P pictures) */
+ forward_reference_frame[cc] = backward_reference_frame[cc];
+
+ /* update pointer for potential future B pictures */
+ backward_reference_frame[cc] = tmp;
+ }
+
+ /* can erase over old backward reference frame since it is not used
+ in a P picture, and since any subsequent B pictures will use the
+ previously decoded I or P frame as the backward_reference_frame */
+ current_frame[cc] = backward_reference_frame[cc];
+ }
+
+ if (picture_structure==BOTTOM_FIELD)
+ current_frame[cc] += (cc==0) ? Coded_Picture_Width : Chroma_Width;
+ }
+}
+
+/* decode all macroblocks of the current picture */
+/* stages described in ISO/IEC 13818-2 section 7 */
+void CMPEG2Dec::picture_data()
+{
+ int MBAmax;
+
+ /* number of macroblocks per picture */
+ MBAmax = mb_width*mb_height;
+
+ if (picture_structure!=FRAME_PICTURE)
+ MBAmax>>=1;
+
+ for (;;)
+ if (slice(MBAmax)<0)
+ return;
+}
+
+/* decode all macroblocks of the current picture */
+/* ISO/IEC 13818-2 section 6.3.16 */
+/* return 0 : go to next slice */
+/* return -1: go to next picture */
+int CMPEG2Dec::slice(int MBAmax)
+{
+ int MBA = 0, MBAinc =0, macroblock_type, motion_type, dct_type, ret;
+ int dc_dct_pred[3], PMV[2][2][2], motion_vertical_field_select[2][2], dmvector[2];
+
+ if ((ret=start_of_slice(&MBA, &MBAinc, dc_dct_pred, PMV))!=1)
+ return ret;
+
+ for (;;)
+ {
+ /* this is how we properly exit out of picture */
+ if (MBA>=MBAmax) return -1; // all macroblocks decoded
+
+ if (MBAinc==0)
+ {
+ if (!Show_Bits(23) || Fault_Flag) // next_start_code or fault
+ {
+resync:
+ Fault_Flag = 0;
+ return 0; // trigger: go to next slice
+ }
+ else /* neither next_start_code nor Fault_Flag */
+ {
+ /* decode macroblock address increment */
+ MBAinc = Get_macroblock_address_increment();
+ if (Fault_Flag) goto resync;
+ }
+ }
+
+ if (MBAinc==1) /* not skipped */
+ {
+ if (!decode_macroblock(&macroblock_type, &motion_type, &dct_type, PMV,
+ dc_dct_pred, motion_vertical_field_select, dmvector))
+ goto resync;
+ }
+ else /* MBAinc!=1: skipped macroblock */
+ /* ISO/IEC 13818-2 section 7.6.6 */
+ skipped_macroblock(dc_dct_pred, PMV, &motion_type, motion_vertical_field_select, &macroblock_type);
+
+ /* ISO/IEC 13818-2 section 7.6 */
+ motion_compensation(MBA, macroblock_type, motion_type, PMV,
+ motion_vertical_field_select, dmvector, dct_type);
+
+ /* advance to next macroblock */
+ MBA++; MBAinc--;
+
+ if (MBA>=MBAmax) return -1; // all macroblocks decoded
+ }
+}
+
+/* ISO/IEC 13818-2 section 6.3.17.1: Macroblock modes */
+void CMPEG2Dec::macroblock_modes(int *pmacroblock_type, int *pmotion_type,
+ int *pmotion_vector_count, int *pmv_format,
+ int *pdmv, int *pmvscale, int *pdct_type)
+{
+ int macroblock_type, motion_type = 0, motion_vector_count;
+ int mv_format, dmv, mvscale, dct_type;
+
+ /* get macroblock_type */
+ macroblock_type = Get_macroblock_type();
+ if (Fault_Flag) return;
+
+ /* get frame/field motion type */
+ if (macroblock_type & (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD))
+ {
+ if (picture_structure==FRAME_PICTURE)
+ motion_type = frame_pred_frame_dct ? MC_FRAME : Get_Bits(2);
+ else
+ motion_type = Get_Bits(2);
+ }
+ else if ((macroblock_type & MACROBLOCK_INTRA) && concealment_motion_vectors)
+ motion_type = (picture_structure==FRAME_PICTURE) ? MC_FRAME : MC_FIELD;
+
+ /* derive motion_vector_count, mv_format and dmv, (table 6-17, 6-18) */
+ if (picture_structure==FRAME_PICTURE)
+ {
+ motion_vector_count = (motion_type==MC_FIELD) ? 2 : 1;
+ mv_format = (motion_type==MC_FRAME) ? MV_FRAME : MV_FIELD;
+ }
+ else
+ {
+ motion_vector_count = (motion_type==MC_16X8) ? 2 : 1;
+ mv_format = MV_FIELD;
+ }
+
+ dmv = (motion_type==MC_DMV); /* dual prime */
+
+ /*
+ field mv predictions in frame pictures have to be scaled
+ ISO/IEC 13818-2 section 7.6.3.1 Decoding the motion vectors
+ */
+ mvscale = (mv_format==MV_FIELD && picture_structure==FRAME_PICTURE);
+
+ /* get dct_type (frame DCT / field DCT) */
+ dct_type = (picture_structure==FRAME_PICTURE) && (!frame_pred_frame_dct)
+ && (macroblock_type & (MACROBLOCK_PATTERN|MACROBLOCK_INTRA)) ? Get_Bits(1) : 0;
+
+ /* return values */
+ *pmacroblock_type = macroblock_type;
+ *pmotion_type = motion_type;
+ *pmotion_vector_count = motion_vector_count;
+ *pmv_format = mv_format;
+ *pdmv = dmv;
+ *pmvscale = mvscale;
+ *pdct_type = dct_type;
+}
+
+/* move/add 8x8-Block from block[comp] to backward_reference_frame */
+/* copy reconstructed 8x8 block from block[comp] to current_frame[]
+ ISO/IEC 13818-2 section 7.6.8: Adding prediction and coefficient data
+ This stage also embodies some of the operations implied by:
+ - ISO/IEC 13818-2 section 7.6.7: Combining predictions
+ - ISO/IEC 13818-2 section 6.1.3: Macroblock
+*/
+void CMPEG2Dec::Add_Block(int count, int bx, int by, int dct_type, int addflag)
+{
+ static const __int64 mmmask_128 = 0x0080008000800080;
+
+ int comp, cc, iincr, bxh, byh;
+ unsigned char *rfp;
+ short *Block_Ptr;
+
+ for (comp=0; comp<count; comp++)
+ {
+ Block_Ptr = block[comp];
+ cc = cc_table[comp];
+
+ bxh = bx; byh = by;
+
+ if (cc==0)
+ {
+ if (picture_structure==FRAME_PICTURE)
+ {
+ if (dct_type)
+ {
+ rfp = current_frame[0] + Coded_Picture_Width*(by+((comp&2)>>1)) + bx + ((comp&1)<<3);
+ iincr = Coded_Picture_Width<<1;
+ }
+ else
+ {
+ rfp = current_frame[0] + Coded_Picture_Width*(by+((comp&2)<<2)) + bx + ((comp&1)<<3);
+ iincr = Coded_Picture_Width;
+ }
+ }
+ else
+ {
+ rfp = current_frame[0] + (Coded_Picture_Width<<1)*(by+((comp&2)<<2)) + bx + ((comp&1)<<3);
+ iincr = Coded_Picture_Width<<1;
+ }
+ }
+ else
+ {
+ if (chroma_format!=CHROMA444)
+ bxh >>= 1;
+ if (chroma_format==CHROMA420)
+ byh >>= 1;
+
+ if (picture_structure==FRAME_PICTURE)
+ {
+ if (dct_type && chroma_format!=CHROMA420)
+ {
+ /* field DCT coding */
+ rfp = current_frame[cc] + Chroma_Width*(byh+((comp&2)>>1)) + bxh + (comp&8);
+ iincr = Chroma_Width<<1;
+ }
+ else
+ {
+ /* frame DCT coding */
+ rfp = current_frame[cc] + Chroma_Width*(byh+((comp&2)<<2)) + bxh + (comp&8);
+ iincr = Chroma_Width;
+ }
+ }
+ else
+ {
+ /* field picture */
+ rfp = current_frame[cc] + (Chroma_Width<<1)*(byh+((comp&2)<<2)) + bxh + (comp&8);
+ iincr = Chroma_Width<<1;
+ }
+ }
+
+ if (addflag)
+ {
+ __asm
+ {
+ pxor mm0, mm0
+ mov eax, [rfp]
+ mov ebx, [Block_Ptr]
+ mov edi, 8
+addon:
+ movq mm2, [ebx+8]
+
+ movq mm3, [eax]
+ movq mm4, mm3
+
+ movq mm1, [ebx]
+ punpckhbw mm3, mm0
+
+ paddsw mm3, mm2
+ packuswb mm3, mm0
+
+ punpcklbw mm4, mm0
+ psllq mm3, 32
+
+ paddsw mm4, mm1
+ packuswb mm4, mm0
+
+ por mm3, mm4
+ add ebx, 16
+
+ dec edi
+ movq [eax], mm3
+
+ add eax, [iincr]
+ cmp edi, 0x00
+ jg addon
+ }
+ }
+ else
+ {
+ __asm
+ {
+ mov eax, [rfp]
+ mov ebx, [Block_Ptr]
+ mov edi, 8
+
+ pxor mm0, mm0
+ movq mm7, [mmmask_128]
+addoff:
+ movq mm3, [ebx+8]
+ movq mm4, [ebx]
+
+ paddsw mm3, mm7
+ paddsw mm4, mm7
+
+ packuswb mm3, mm0
+ packuswb mm4, mm0
+
+ psllq mm3, 32
+ por mm3, mm4
+
+ add ebx, 16
+ dec edi
+
+ movq [eax], mm3
+
+ add eax, [iincr]
+ cmp edi, 0x00
+ jg addoff
+ }
+ }
+ }
+}
+
+/* set scratch pad macroblock to zero */
+void CMPEG2Dec::Clear_Block(int count)
+{
+ int comp;
+ short *Block_Ptr;
+
+ for (comp=0; comp<count; comp++)
+ {
+ Block_Ptr = block[comp];
+
+ __asm
+ {
+ mov eax, [Block_Ptr];
+ pxor mm0, mm0;
+ movq [eax+0 ], mm0;
+ movq [eax+8 ], mm0;
+ movq [eax+16], mm0;
+ movq [eax+24], mm0;
+ movq [eax+32], mm0;
+ movq [eax+40], mm0;
+ movq [eax+48], mm0;
+ movq [eax+56], mm0;
+ movq [eax+64], mm0;
+ movq [eax+72], mm0;
+ movq [eax+80], mm0;
+ movq [eax+88], mm0;
+ movq [eax+96], mm0;
+ movq [eax+104],mm0;
+ movq [eax+112],mm0;
+ movq [eax+120],mm0;
+ }
+ }
+}
+
+/* ISO/IEC 13818-2 section 7.6 */
+void CMPEG2Dec::motion_compensation(int MBA, int macroblock_type, int motion_type,
+ int PMV[2][2][2], int motion_vertical_field_select[2][2],
+ int dmvector[2], int dct_type)
+{
+ int bx, by;
+ int comp;
+
+ /* derive current macroblock position within picture */
+ /* ISO/IEC 13818-2 section 6.3.1.6 and 6.3.1.7 */
+ bx = 16*(MBA%mb_width);
+ by = 16*(MBA/mb_width);
+
+ /* motion compensation */
+ if (!(macroblock_type & MACROBLOCK_INTRA))
+ form_predictions(bx, by, macroblock_type, motion_type, PMV,
+ motion_vertical_field_select, dmvector);
+
+ switch (IDCT_Flag)
+ {
+ case IDCT_MMX:
+ for (comp=0; comp<block_count; comp++)
+ MMX_IDCT(block[comp]);
+ break;
+
+ case IDCT_SSEMMX:
+ for (comp=0; comp<block_count; comp++)
+ SSEMMX_IDCT(block[comp]);
+ break;
+
+ case IDCT_FPU:
+ __asm emms;
+ for (comp=0; comp<block_count; comp++)
+ FPU_IDCT(block[comp]);
+ break;
+
+ case IDCT_REF:
+ __asm emms;
+ for (comp=0; comp<block_count; comp++)
+ REF_IDCT(block[comp]);
+ break;
+ }
+
+ Add_Block(block_count, bx, by, dct_type, (macroblock_type & MACROBLOCK_INTRA)==0);
+}
+
+/* ISO/IEC 13818-2 section 7.6.6 */
+void CMPEG2Dec::skipped_macroblock(int dc_dct_pred[3], int PMV[2][2][2], int *motion_type,
+ int motion_vertical_field_select[2][2], int *macroblock_type)
+{
+ Clear_Block(block_count);
+
+ /* reset intra_dc predictors */
+ /* ISO/IEC 13818-2 section 7.2.1: DC coefficients in intra blocks */
+ dc_dct_pred[0]=dc_dct_pred[1]=dc_dct_pred[2]=0;
+
+ /* reset motion vector predictors */
+ /* ISO/IEC 13818-2 section 7.6.3.4: Resetting motion vector predictors */
+ if (picture_coding_type==P_TYPE)
+ PMV[0][0][0]=PMV[0][0][1]=PMV[1][0][0]=PMV[1][0][1]=0;
+
+ /* derive motion_type */
+ if (picture_structure==FRAME_PICTURE)
+ *motion_type = MC_FRAME;
+ else
+ {
+ *motion_type = MC_FIELD;
+ motion_vertical_field_select[0][0] = motion_vertical_field_select[0][1] =
+ (picture_structure==BOTTOM_FIELD);
+ }
+
+ /* clear MACROBLOCK_INTRA */
+ *macroblock_type&= ~MACROBLOCK_INTRA;
+}
+
+/* return==-1 means go to next picture */
+/* the expression "start of slice" is used throughout the normative
+ body of the MPEG specification */
+int CMPEG2Dec::start_of_slice(int *MBA, int *MBAinc,
+ int dc_dct_pred[3], int PMV[2][2][2])
+{
+ unsigned int code;
+ int slice_vert_pos_ext;
+
+ next_start_code();
+ code = Get_Bits(32);
+
+ if (code<SLICE_START_CODE_MIN || code>SLICE_START_CODE_MAX)
+ {
+ // only slice headers are allowed in picture_data
+ Fault_Flag = 10;
+ return -1;
+ }
+
+ /* decode slice header (may change quantizer_scale) */
+ slice_vert_pos_ext = slice_header();
+
+ /* decode macroblock address increment */
+ *MBAinc = Get_macroblock_address_increment();
+ if (Fault_Flag) return -1;
+
+ /* set current location */
+ /* NOTE: the arithmetic used to derive macroblock_address below is
+ equivalent to ISO/IEC 13818-2 section 6.3.17: Macroblock */
+ *MBA = ((slice_vert_pos_ext<<7) + (code&255) - 1)*mb_width + *MBAinc - 1;
+ *MBAinc = 1; // first macroblock in slice: not skipped
+
+ /* reset all DC coefficient and motion vector predictors */
+ /* ISO/IEC 13818-2 section 7.2.1: DC coefficients in intra blocks */
+ dc_dct_pred[0]=dc_dct_pred[1]=dc_dct_pred[2]=0;
+
+ /* ISO/IEC 13818-2 section 7.6.3.4: Resetting motion vector predictors */
+ PMV[0][0][0]=PMV[0][0][1]=PMV[1][0][0]=PMV[1][0][1]=0;
+ PMV[0][1][0]=PMV[0][1][1]=PMV[1][1][0]=PMV[1][1][1]=0;
+
+ /* successfull: trigger decode macroblocks in slice */
+ return 1;
+}
+
+/* ISO/IEC 13818-2 sections 7.2 through 7.5 */
+int CMPEG2Dec::decode_macroblock(int *macroblock_type, int *motion_type, int *dct_type,
+ int PMV[2][2][2], int dc_dct_pred[3],
+ int motion_vertical_field_select[2][2], int dmvector[2])
+{
+ int quantizer_scale_code, comp, motion_vector_count, mv_format;
+ int dmv, mvscale, coded_block_pattern;
+
+ /* ISO/IEC 13818-2 section 6.3.17.1: Macroblock modes */
+ macroblock_modes(macroblock_type, motion_type, &motion_vector_count, &mv_format,
+ &dmv, &mvscale, dct_type);
+ if (Fault_Flag) return 0; // trigger: go to next slice
+
+ if (*macroblock_type & MACROBLOCK_QUANT)
+ {
+ quantizer_scale_code = Get_Bits(5);
+
+ /* ISO/IEC 13818-2 section 7.4.2.2: Quantizer scale factor */
+ quantizer_scale = q_scale_type ?
+ Non_Linear_quantizer_scale[quantizer_scale_code] : (quantizer_scale_code << 1);
+ }
+
+ /* ISO/IEC 13818-2 section 6.3.17.2: Motion vectors */
+ /* decode forward motion vectors */
+ if ((*macroblock_type & MACROBLOCK_MOTION_FORWARD)
+ || ((*macroblock_type & MACROBLOCK_INTRA) && concealment_motion_vectors))
+ motion_vectors(PMV, dmvector, motion_vertical_field_select, 0,
+ motion_vector_count, mv_format, f_code[0][0]-1, f_code[0][1]-1, dmv, mvscale);
+ if (Fault_Flag) return 0; // trigger: go to next slice
+
+ /* decode backward motion vectors */
+ if (*macroblock_type & MACROBLOCK_MOTION_BACKWARD)
+ motion_vectors(PMV, dmvector, motion_vertical_field_select, 1,
+ motion_vector_count,mv_format, f_code[1][0]-1, f_code[1][1]-1, 0, mvscale);
+ if (Fault_Flag) return 0; // trigger: go to next slice
+
+ if ((*macroblock_type & MACROBLOCK_INTRA) && concealment_motion_vectors)
+ Flush_Buffer(1); // marker bit
+
+ /* macroblock_pattern */
+ /* ISO/IEC 13818-2 section 6.3.17.4: Coded block pattern */
+ if (*macroblock_type & MACROBLOCK_PATTERN)
+ {
+ coded_block_pattern = Get_coded_block_pattern();
+
+ if (chroma_format==CHROMA422)
+ coded_block_pattern = (coded_block_pattern<<2) | Get_Bits(2);
+ else if (chroma_format==CHROMA444)
+ coded_block_pattern = (coded_block_pattern<<6) | Get_Bits(6);
+ }
+ else
+ coded_block_pattern = (*macroblock_type & MACROBLOCK_INTRA) ? (1<<block_count)-1 : 0;
+
+ if (Fault_Flag) return 0; // trigger: go to next slice
+
+ Clear_Block(block_count);
+
+ /* decode blocks */
+ for (comp=0; comp<block_count; comp++)
+ {
+ if (coded_block_pattern & (1<<(block_count-1-comp)))
+ {
+ if (*macroblock_type & MACROBLOCK_INTRA)
+ Decode_MPEG2_Intra_Block(comp, dc_dct_pred);
+ else
+ Decode_MPEG2_Non_Intra_Block(comp);
+ if (Fault_Flag) return 0; // trigger: go to next slice
+ }
+ }
+
+ /* reset intra_dc predictors */
+ /* ISO/IEC 13818-2 section 7.2.1: DC coefficients in intra blocks */
+ if (!(*macroblock_type & MACROBLOCK_INTRA))
+ dc_dct_pred[0]=dc_dct_pred[1]=dc_dct_pred[2]=0;
+
+ /* reset motion vector predictors */
+ if ((*macroblock_type & MACROBLOCK_INTRA) && !concealment_motion_vectors)
+ {
+ /* intra mb without concealment motion vectors */
+ /* ISO/IEC 13818-2 section 7.6.3.4: Resetting motion vector predictors */
+ PMV[0][0][0]=PMV[0][0][1]=PMV[1][0][0]=PMV[1][0][1]=0;
+ PMV[0][1][0]=PMV[0][1][1]=PMV[1][1][0]=PMV[1][1][1]=0;
+ }
+
+ /* special "No_MC" macroblock_type case */
+ /* ISO/IEC 13818-2 section 7.6.3.5: Prediction in P pictures */
+ if ((picture_coding_type==P_TYPE)
+ && !(*macroblock_type & (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_INTRA)))
+ {
+ /* non-intra mb without forward mv in a P picture */
+ /* ISO/IEC 13818-2 section 7.6.3.4: Resetting motion vector predictors */
+ PMV[0][0][0]=PMV[0][0][1]=PMV[1][0][0]=PMV[1][0][1]=0;
+
+ /* derive motion_type */
+ /* ISO/IEC 13818-2 section 6.3.17.1: Macroblock modes, frame_motion_type */
+ if (picture_structure==FRAME_PICTURE)
+ *motion_type = MC_FRAME;
+ else
+ {
+ *motion_type = MC_FIELD;
+ motion_vertical_field_select[0][0] = (picture_structure==BOTTOM_FIELD);
+ }
+ }
+ /* successfully decoded macroblock */
+ return 1 ;
+}
+
+/* decode one intra coded MPEG-2 block */
+void CMPEG2Dec::Decode_MPEG2_Intra_Block(int comp, int dc_dct_pred[])
+{
+ int val, i, j, sign, *qmat;
+ unsigned int code;
+ DCTtab *tab;
+ short *bp;
+
+ bp = block[comp];
+ qmat = (comp<4 || chroma_format==CHROMA420)
+ ? intra_quantizer_matrix : chroma_intra_quantizer_matrix;
+
+ /* ISO/IEC 13818-2 section 7.2.1: decode DC coefficients */
+ switch (cc_table[comp])
+ {
+ case 0:
+ val = (dc_dct_pred[0]+= Get_Luma_DC_dct_diff());
+ break;
+
+ case 1:
+ val = (dc_dct_pred[1]+= Get_Chroma_DC_dct_diff());
+ break;
+
+ case 2:
+ val = (dc_dct_pred[2]+= Get_Chroma_DC_dct_diff());
+ break;
+ }
+
+ bp[0] = val << (3-intra_dc_precision);
+
+ /* decode AC coefficients */
+ for (i=1; ; i++)
+ {
+ code = Show_Bits(16);
+
+ if (code>=16384 && !intra_vlc_format)
+ tab = &DCTtabnext[(code>>12)-4];
+ else if (code>=1024)
+ {
+ if (intra_vlc_format)
+ tab = &DCTtab0a[(code>>8)-4];
+ else
+ tab = &DCTtab0[(code>>8)-4];
+ }
+ else if (code>=512)
+ {
+ if (intra_vlc_format)
+ tab = &DCTtab1a[(code>>6)-8];
+ else
+ tab = &DCTtab1[(code>>6)-8];
+ }
+ else if (code>=256)
+ tab = &DCTtab2[(code>>4)-16];
+ else if (code>=128)
+ tab = &DCTtab3[(code>>3)-16];
+ else if (code>=64)
+ tab = &DCTtab4[(code>>2)-16];
+ else if (code>=32)
+ tab = &DCTtab5[(code>>1)-16];
+ else if (code>=16)
+ tab = &DCTtab6[code-16];
+ else
+ {
+ Fault_Flag = 1;
+ return;
+ }
+
+ Flush_Buffer(tab->len);
+
+ if (tab->run<64)
+ {
+ i+= tab->run;
+ val = tab->level;
+ sign = Get_Bits(1);
+ }
+ else if (tab->run==64) /* end_of_block */
+ return;
+ else /* escape */
+ {
+ i+= Get_Bits(6);
+ val = Get_Bits(12);
+
+ if (sign = (val>=2048))
+ val = 4096 - val;
+ }
+
+ j = scan[alternate_scan][i];
+
+ val = (val * quantizer_scale * qmat[j]) >> 4;
+ bp[j] = sign ? -val : val;
+ }
+}
+
+/* decode one non-intra coded MPEG-2 block */
+void CMPEG2Dec::Decode_MPEG2_Non_Intra_Block(int comp)
+{
+ int val, i, j, sign, *qmat;
+ unsigned int code;
+ DCTtab *tab;
+ short *bp;
+
+ bp = block[comp];
+ qmat = (comp<4 || chroma_format==CHROMA420)
+ ? non_intra_quantizer_matrix : chroma_non_intra_quantizer_matrix;
+
+ /* decode AC coefficients */
+ for (i=0; ; i++)
+ {
+ code = Show_Bits(16);
+
+ if (code>=16384)
+ {
+ if (i==0)
+ tab = &DCTtabfirst[(code>>12)-4];
+ else
+ tab = &DCTtabnext[(code>>12)-4];
+ }
+ else if (code>=1024)
+ tab = &DCTtab0[(code>>8)-4];
+ else if (code>=512)
+ tab = &DCTtab1[(code>>6)-8];
+ else if (code>=256)
+ tab = &DCTtab2[(code>>4)-16];
+ else if (code>=128)
+ tab = &DCTtab3[(code>>3)-16];
+ else if (code>=64)
+ tab = &DCTtab4[(code>>2)-16];
+ else if (code>=32)
+ tab = &DCTtab5[(code>>1)-16];
+ else if (code>=16)
+ tab = &DCTtab6[code-16];
+ else
+ {
+ Fault_Flag = 1;
+ return;
+ }
+
+ Flush_Buffer(tab->len);
+
+ if (tab->run<64)
+ {
+ i+= tab->run;
+ val = tab->level;
+ sign = Get_Bits(1);
+ }
+ else if (tab->run==64) /* end_of_block */
+ return;
+ else /* escape */
+ {
+ i+= Get_Bits(6);
+ val = Get_Bits(12);
+
+ if (sign = (val>=2048))
+ val = 4096 - val;
+ }
+
+ j = scan[alternate_scan][i];
+
+ val = (((val<<1)+1) * quantizer_scale * qmat[j]) >> 5;
+ bp[j] = sign ? -val : val;
+ }
+}
+
+int CMPEG2Dec::Get_macroblock_type()
+{
+ int macroblock_type;
+
+ switch (picture_coding_type)
+ {
+ case I_TYPE:
+ macroblock_type = Get_I_macroblock_type();
+ break;
+
+ case P_TYPE:
+ macroblock_type = Get_P_macroblock_type();
+ break;
+
+ case B_TYPE:
+ macroblock_type = Get_B_macroblock_type();
+ break;
+ }
+
+ return macroblock_type;
+}
+
+int CMPEG2Dec::Get_I_macroblock_type()
+{
+ if (Get_Bits(1))
+ return 1;
+
+ if (!Get_Bits(1))
+ Fault_Flag = 2;
+
+ return 17;
+}
+
+int CMPEG2Dec::Get_P_macroblock_type()
+{
+ int code;
+
+ if ((code = Show_Bits(6))>=8)
+ {
+ code >>= 3;
+ Flush_Buffer(PMBtab0[code].len);
+
+ return PMBtab0[code].val;
+ }
+
+ if (code==0)
+ {
+ Fault_Flag = 2;
+ return 0;
+ }
+
+ Flush_Buffer(PMBtab1[code].len);
+
+ return PMBtab1[code].val;
+}
+
+int CMPEG2Dec::Get_B_macroblock_type()
+{
+ int code;
+
+ if ((code = Show_Bits(6))>=8)
+ {
+ code >>= 2;
+ Flush_Buffer(BMBtab0[code].len);
+
+ return BMBtab0[code].val;
+ }
+
+ if (code==0)
+ {
+ Fault_Flag = 2;
+ return 0;
+ }
+
+ Flush_Buffer(BMBtab1[code].len);
+
+ return BMBtab1[code].val;
+}
+
+int CMPEG2Dec::Get_coded_block_pattern()
+{
+ int code;
+
+ if ((code = Show_Bits(9))>=128)
+ {
+ code >>= 4;
+ Flush_Buffer(CBPtab0[code].len);
+
+ return CBPtab0[code].val;
+ }
+
+ if (code>=8)
+ {
+ code >>= 1;
+ Flush_Buffer(CBPtab1[code].len);
+
+ return CBPtab1[code].val;
+ }
+
+ if (code<1)
+ {
+ Fault_Flag = 3;
+ return 0;
+ }
+
+ Flush_Buffer(CBPtab2[code].len);
+
+ return CBPtab2[code].val;
+}
+
+int CMPEG2Dec::Get_macroblock_address_increment()
+{
+ int code, val;
+
+ val = 0;
+
+ while ((code = Show_Bits(11))<24)
+ {
+ if (code!=15) /* if not macroblock_stuffing */
+ {
+ if (code==8) /* if macroblock_escape */
+ val+= 33;
+ else
+ {
+ Fault_Flag = 4;
+ return 1;
+ }
+ }
+ Flush_Buffer(11);
+ }
+
+ /* macroblock_address_increment == 1 */
+ /* ('1' is in the MSB position of the lookahead) */
+ if (code>=1024)
+ {
+ Flush_Buffer(1);
+ return val + 1;
+ }
+
+ /* codes 00010 ... 011xx */
+ if (code>=128)
+ {
+ /* remove leading zeros */
+ code >>= 6;
+ Flush_Buffer(MBAtab1[code].len);
+
+ return val + MBAtab1[code].val;
+ }
+
+ /* codes 00000011000 ... 0000111xxxx */
+ code-= 24; /* remove common base */
+ Flush_Buffer(MBAtab2[code].len);
+
+ return val + MBAtab2[code].val;
+}
+
+/*
+ parse VLC and perform dct_diff arithmetic.
+ MPEG-2: ISO/IEC 13818-2 section 7.2.1
+
+ Note: the arithmetic here is presented more elegantly than
+ the spec, yet the results, dct_diff, are the same.
+*/
+int CMPEG2Dec::Get_Luma_DC_dct_diff()
+{
+ int code, size, dct_diff;
+
+ /* decode length */
+ code = Show_Bits(5);
+
+ if (code<31)
+ {
+ size = DClumtab0[code].val;
+ Flush_Buffer(DClumtab0[code].len);
+ }
+ else
+ {
+ code = Show_Bits(9) - 0x1f0;
+ size = DClumtab1[code].val;
+ Flush_Buffer(DClumtab1[code].len);
+ }
+
+ if (size==0)
+ dct_diff = 0;
+ else
+ {
+ dct_diff = Get_Bits(size);
+
+ if ((dct_diff & (1<<(size-1)))==0)
+ dct_diff-= (1<<size) - 1;
+ }
+
+ return dct_diff;
+}
+
+int CMPEG2Dec::Get_Chroma_DC_dct_diff()
+{
+ int code, size, dct_diff;
+
+ /* decode length */
+ code = Show_Bits(5);
+
+ if (code<31)
+ {
+ size = DCchromtab0[code].val;
+ Flush_Buffer(DCchromtab0[code].len);
+ }
+ else
+ {
+ code = Show_Bits(10) - 0x3e0;
+ size = DCchromtab1[code].val;
+ Flush_Buffer(DCchromtab1[code].len);
+ }
+
+ if (size==0)
+ dct_diff = 0;
+ else
+ {
+ dct_diff = Get_Bits(size);
+
+ if ((dct_diff & (1<<(size-1)))==0)
+ dct_diff-= (1<<size) - 1;
+ }
+
+ return dct_diff;
+}
+
+/*
+static int currentfield;
+static unsigned char **predframe;
+static int DMV[2][2];
+static int stw;
+*/
+
+void CMPEG2Dec::form_predictions(int bx, int by, int macroblock_type, int motion_type,
+ int PMV[2][2][2], int motion_vertical_field_select[2][2],
+ int dmvector[2])
+{
+ int currentfield;
+ unsigned char **predframe;
+ int DMV[2][2];
+ int stw;
+
+ stw = 0;
+
+ if ((macroblock_type & MACROBLOCK_MOTION_FORWARD) || (picture_coding_type==P_TYPE))
+ {
+ if (picture_structure==FRAME_PICTURE)
+ {
+ if ((motion_type==MC_FRAME) || !(macroblock_type & MACROBLOCK_MOTION_FORWARD))
+ {
+ /* frame-based prediction (broken into top and bottom halves
+ for spatial scalability prediction purposes) */
+ form_prediction(forward_reference_frame, 0, current_frame, 0, Coded_Picture_Width,
+ Coded_Picture_Width<<1, 16, 8, bx, by, PMV[0][0][0], PMV[0][0][1], stw);
+
+ form_prediction(forward_reference_frame, 1, current_frame, 1, Coded_Picture_Width,
+ Coded_Picture_Width<<1, 16, 8, bx, by, PMV[0][0][0], PMV[0][0][1], stw);
+ }
+ else if (motion_type==MC_FIELD) /* field-based prediction */
+ {
+ /* top field prediction */
+ form_prediction(forward_reference_frame, motion_vertical_field_select[0][0],
+ current_frame, 0, Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8,
+ bx, by>>1, PMV[0][0][0], PMV[0][0][1]>>1, stw);
+
+ /* bottom field prediction */
+ form_prediction(forward_reference_frame, motion_vertical_field_select[1][0],
+ current_frame, 1, Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8,
+ bx, by>>1, PMV[1][0][0], PMV[1][0][1]>>1, stw);
+ }
+ else if (motion_type==MC_DMV) /* dual prime prediction */
+ {
+ /* calculate derived motion vectors */
+ Dual_Prime_Arithmetic(DMV, dmvector, PMV[0][0][0], PMV[0][0][1]>>1);
+
+ /* predict top field from top field */
+ form_prediction(forward_reference_frame, 0, current_frame, 0,
+ Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8, bx, by>>1,
+ PMV[0][0][0], PMV[0][0][1]>>1, 0);
+
+ /* predict and add to top field from bottom field */
+ form_prediction(forward_reference_frame, 1, current_frame, 0,
+ Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8, bx, by>>1,
+ DMV[0][0], DMV[0][1], 1);
+
+ /* predict bottom field from bottom field */
+ form_prediction(forward_reference_frame, 1, current_frame, 1,
+ Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8, bx, by>>1,
+ PMV[0][0][0], PMV[0][0][1]>>1, 0);
+
+ /* predict and add to bottom field from top field */
+ form_prediction(forward_reference_frame, 0, current_frame, 1,
+ Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8, bx, by>>1,
+ DMV[1][0], DMV[1][1], 1);
+ }
+ else
+ Fault_Flag = 5;
+ }
+ else
+ {
+ /* field picture */
+ currentfield = (picture_structure==BOTTOM_FIELD);
+
+ /* determine which frame to use for prediction */
+ if (picture_coding_type==P_TYPE && Second_Field && currentfield!=motion_vertical_field_select[0][0])
+ predframe = backward_reference_frame;
+ else
+ predframe = forward_reference_frame;
+
+ if ((motion_type==MC_FIELD) || !(macroblock_type & MACROBLOCK_MOTION_FORWARD))
+ {
+ form_prediction(predframe, motion_vertical_field_select[0][0], current_frame, 0,
+ Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 16, bx, by,
+ PMV[0][0][0], PMV[0][0][1], stw);
+ }
+ else if (motion_type==MC_16X8)
+ {
+ form_prediction(predframe, motion_vertical_field_select[0][0], current_frame, 0,
+ Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8, bx, by,
+ PMV[0][0][0], PMV[0][0][1], stw);
+
+ if (picture_coding_type==P_TYPE && Second_Field && currentfield!=motion_vertical_field_select[1][0])
+ predframe = backward_reference_frame;
+ else
+ predframe = forward_reference_frame;
+
+ form_prediction(predframe, motion_vertical_field_select[1][0], current_frame,
+ 0, Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8, bx, by+8,
+ PMV[1][0][0], PMV[1][0][1], stw);
+ }
+ else if (motion_type==MC_DMV)
+ {
+ if (Second_Field)
+ predframe = backward_reference_frame;
+ else
+ predframe = forward_reference_frame;
+
+ /* calculate derived motion vectors */
+ Dual_Prime_Arithmetic(DMV, dmvector, PMV[0][0][0], PMV[0][0][1]);
+
+ /* predict from field of same parity */
+ form_prediction(forward_reference_frame, currentfield, current_frame, 0,
+ Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 16, bx, by,
+ PMV[0][0][0], PMV[0][0][1], 0);
+
+ /* predict from field of opposite parity */
+ form_prediction(predframe, !currentfield, current_frame, 0,
+ Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 16, bx, by,
+ DMV[0][0], DMV[0][1], 1);
+ }
+ else
+ Fault_Flag = 5;
+ }
+
+ stw = 1;
+ }
+
+ if (macroblock_type & MACROBLOCK_MOTION_BACKWARD)
+ {
+ if (picture_structure==FRAME_PICTURE)
+ {
+ if (motion_type==MC_FRAME)
+ {
+ /* frame-based prediction */
+ form_prediction(backward_reference_frame, 0, current_frame, 0,
+ Coded_Picture_Width, Coded_Picture_Width<<1, 16, 8, bx, by,
+ PMV[0][1][0], PMV[0][1][1], stw);
+
+ form_prediction(backward_reference_frame, 1, current_frame, 1,
+ Coded_Picture_Width, Coded_Picture_Width<<1, 16, 8, bx, by,
+ PMV[0][1][0], PMV[0][1][1], stw);
+ }
+ else /* field-based prediction */
+ {
+ /* top field prediction */
+ form_prediction(backward_reference_frame, motion_vertical_field_select[0][1],
+ current_frame, 0, Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8,
+ bx, by>>1, PMV[0][1][0], PMV[0][1][1]>>1, stw);
+
+ /* bottom field prediction */
+ form_prediction(backward_reference_frame, motion_vertical_field_select[1][1],
+ current_frame, 1, Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8,
+ bx, by>>1, PMV[1][1][0], PMV[1][1][1]>>1, stw);
+ }
+ }
+ else
+ {
+ /* field picture */
+ if (motion_type==MC_FIELD)
+ {
+ /* field-based prediction */
+ form_prediction(backward_reference_frame, motion_vertical_field_select[0][1],
+ current_frame, 0, Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 16,
+ bx, by, PMV[0][1][0], PMV[0][1][1], stw);
+ }
+ else if (motion_type==MC_16X8)
+ {
+ form_prediction(backward_reference_frame, motion_vertical_field_select[0][1],
+ current_frame, 0, Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8,
+ bx, by, PMV[0][1][0], PMV[0][1][1], stw);
+
+ form_prediction(backward_reference_frame, motion_vertical_field_select[1][1],
+ current_frame, 0, Coded_Picture_Width<<1, Coded_Picture_Width<<1, 16, 8,
+ bx, by+8, PMV[1][1][0], PMV[1][1][1], stw);
+ }
+ else
+ Fault_Flag = 5;
+ }
+ }
+}
+
+void CMPEG2Dec::form_prediction(unsigned char *src[], int sfield, unsigned char *dst[],
+ int dfield, int lx, int lx2, int w, int h, int x, int y,
+ int dx, int dy, int average_flag)
+{
+ form_component_prediction(src[0]+(sfield?lx2>>1:0), dst[0]+(dfield?lx2>>1:0),
+ lx, lx2, w, h, x, y, dx, dy, average_flag);
+
+ if (chroma_format!=CHROMA444)
+ {
+ lx>>=1; lx2>>=1; w>>=1; x>>=1; dx/=2;
+ }
+
+ if (chroma_format==CHROMA420)
+ {
+ h>>=1; y>>=1; dy/=2;
+ }
+
+ /* Cb */
+ form_component_prediction(src[1]+(sfield?lx2>>1:0), dst[1]+(dfield?lx2>>1:0),
+ lx, lx2, w, h, x, y, dx, dy, average_flag);
+
+ /* Cr */
+ form_component_prediction(src[2]+(sfield?lx2>>1:0), dst[2]+(dfield?lx2>>1:0),
+ lx, lx2, w, h, x, y, dx, dy, average_flag);
+}
+
+/* ISO/IEC 13818-2 section 7.6.4: Forming predictions */
+void CMPEG2Dec::form_component_prediction(unsigned char *src, unsigned char *dst,
+ int lx, int lx2, int w, int h, int x, int y,
+ int dx, int dy, int average_flag)
+{
+ static const __int64 mmmask_0001 = 0x0001000100010001;
+ static const __int64 mmmask_0002 = 0x0002000200020002;
+ static const __int64 mmmask_0003 = 0x0003000300030003;
+ static const __int64 mmmask_0006 = 0x0006000600060006;
+
+ unsigned char *s = src + lx * (y + (dy>>1)) + x + (dx>>1);
+ unsigned char *d = dst + lx * y + x;
+ int flag = (average_flag<<2) + ((dx & 1)<<1) + (dy & 1);
+
+ switch (flag)
+ {
+ case 0:
+ // d[i] = s[i];
+ __asm
+ {
+ mov eax, [s]
+ mov ebx, [d]
+ mov esi, 0x00
+ mov edi, [h]
+mc0:
+ movq mm1, [eax+esi]
+ add esi, 0x08
+ cmp esi, [w]
+ movq [ebx+esi-8], mm1
+ jl mc0
+
+ add eax, [lx2]
+ add ebx, [lx2]
+ dec edi
+ mov esi, 0x00
+ cmp edi, 0x00
+ jg mc0
+ }
+ break;
+
+ case 1:
+ // d[i] = (s[i]+s[i+lx]+1)>>1;
+ __asm
+ {
+ pxor mm0, mm0
+ movq mm7, [mmmask_0001]
+ mov eax, [s]
+ mov ebx, [d]
+ mov ecx, eax
+ add ecx, [lx]
+ mov esi, 0x00
+ mov edi, [h]
+mc1:
+ movq mm1, [eax+esi]
+ movq mm2, [ecx+esi]
+
+ movq mm3, mm1
+ movq mm4, mm2
+
+ punpcklbw mm1, mm0
+ punpckhbw mm3, mm0
+ punpcklbw mm2, mm0
+ punpckhbw mm4, mm0
+
+ paddsw mm1, mm2
+ paddsw mm3, mm4
+
+ paddsw mm1, mm7
+ paddsw mm3, mm7
+
+ psrlw mm1, 1
+ psrlw mm3, 1
+
+ packuswb mm1, mm0
+ packuswb mm3, mm0
+
+ psllq mm3, 32
+ por mm1, mm3
+
+ add esi, 0x08
+ cmp esi, [w]
+ movq [ebx+esi-8], mm1
+ jl mc1
+
+ add eax, [lx2]
+ add ebx, [lx2]
+ add ecx, [lx2]
+ dec edi
+ mov esi, 0x00
+ cmp edi, 0x00
+ jg mc1
+ }
+ break;
+
+ case 2:
+ // d[i] = (s[i]+s[i+1]+1)>>1;
+ __asm
+ {
+ pxor mm0, mm0
+ movq mm7, [mmmask_0001]
+ mov eax, [s]
+ mov ebx, [d]
+ mov esi, 0x00
+ mov edi, [h]
+mc2:
+ movq mm1, [eax+esi]
+ movq mm2, [eax+esi+1]
+
+ movq mm3, mm1
+ movq mm4, mm2
+
+ punpcklbw mm1, mm0
+ punpckhbw mm3, mm0
+
+ punpcklbw mm2, mm0
+ punpckhbw mm4, mm0
+
+ paddsw mm1, mm2
+ paddsw mm3, mm4
+
+ paddsw mm1, mm7
+ paddsw mm3, mm7
+
+ psrlw mm1, 1
+ psrlw mm3, 1
+
+ packuswb mm1, mm0
+ packuswb mm3, mm0
+
+ psllq mm3, 32
+ por mm1, mm3
+
+ add esi, 0x08
+ cmp esi, [w]
+ movq [ebx+esi-8], mm1
+ jl mc2
+
+ add eax, [lx2]
+ add ebx, [lx2]
+ dec edi
+ mov esi, 0x00
+ cmp edi, 0x00
+ jg mc2
+ }
+ break;
+
+ case 3:
+ // d[i] = (s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2)>>2;
+ __asm
+ {
+ pxor mm0, mm0
+ movq mm7, [mmmask_0002]
+ mov eax, [s]
+ mov ebx, [d]
+ mov ecx, eax
+ add ecx, [lx]
+ mov esi, 0x00
+ mov edi, [h]
+mc3:
+ movq mm1, [eax+esi]
+ movq mm2, [eax+esi+1]
+ movq mm3, mm1
+ movq mm4, mm2
+
+ punpcklbw mm1, mm0
+ punpckhbw mm3, mm0
+
+ punpcklbw mm2, mm0
+ punpckhbw mm4, mm0
+
+ paddsw mm1, mm2
+ paddsw mm3, mm4
+
+ movq mm5, [ecx+esi]
+ paddsw mm1, mm7
+
+ movq mm6, [ecx+esi+1]
+ paddsw mm3, mm7
+
+ movq mm2, mm5
+ movq mm4, mm6
+
+ punpcklbw mm2, mm0
+ punpckhbw mm5, mm0
+
+ punpcklbw mm4, mm0
+ punpckhbw mm6, mm0
+
+ paddsw mm2, mm4
+ paddsw mm5, mm6
+
+ paddsw mm1, mm2
+ paddsw mm3, mm5
+
+ psrlw mm1, 2
+ psrlw mm3, 2
+
+ packuswb mm1, mm0
+ packuswb mm3, mm0
+
+ psllq mm3, 32
+ por mm1, mm3
+
+ add esi, 0x08
+ cmp esi, [w]
+ movq [ebx+esi-8], mm1
+ jl mc3
+
+ add eax, [lx2]
+ add ebx, [lx2]
+ add ecx, [lx2]
+ dec edi
+ mov esi, 0x00
+ cmp edi, 0x00
+ jg mc3
+ }
+ break;
+
+ case 4:
+ // d[i] = (s[i]+d[i]+1)>>1;
+ __asm
+ {
+ pxor mm0, mm0
+ movq mm7, [mmmask_0001]
+ mov eax, [s]
+ mov ebx, [d]
+ mov esi, 0x00
+ mov edi, [h]
+mc4:
+ movq mm1, [eax+esi]
+ movq mm2, [ebx+esi]
+ movq mm3, mm1
+ movq mm4, mm2
+
+ punpcklbw mm1, mm0
+ punpckhbw mm3, mm0
+
+ punpcklbw mm2, mm0
+ punpckhbw mm4, mm0
+
+ paddsw mm1, mm2
+ paddsw mm3, mm4
+
+ paddsw mm1, mm7
+ paddsw mm3, mm7
+
+ psrlw mm1, 1
+ psrlw mm3, 1
+
+ packuswb mm1, mm0
+ packuswb mm3, mm0
+
+ psllq mm3, 32
+ por mm1, mm3
+
+ add esi, 0x08
+ cmp esi, [w]
+ movq [ebx+esi-8], mm1
+ jl mc4
+
+ add eax, [lx2]
+ add ebx, [lx2]
+ dec edi
+ mov esi, 0x00
+ cmp edi, 0x00
+ jg mc4
+ }
+ break;
+
+ case 5:
+ // d[i] = ((d[i]<<1) + s[i]+s[i+lx] + 3)>>2;
+ __asm
+ {
+ pxor mm0, mm0
+ movq mm7, [mmmask_0003]
+ mov eax, [s]
+ mov ebx, [d]
+ mov ecx, eax
+ add ecx, [lx]
+ mov esi, 0x00
+ mov edi, [h]
+mc5:
+ movq mm1, [eax+esi]
+ movq mm2, [ecx+esi]
+ movq mm3, mm1
+ movq mm4, mm2
+
+ punpcklbw mm1, mm0
+ punpckhbw mm3, mm0
+
+ punpcklbw mm2, mm0
+ punpckhbw mm4, mm0
+
+ paddsw mm1, mm2
+ paddsw mm3, mm4
+
+ movq mm5, [ebx+esi]
+
+ paddsw mm1, mm7
+ paddsw mm3, mm7
+
+ movq mm6, mm5
+ punpcklbw mm5, mm0
+ punpckhbw mm6, mm0
+
+ psllw mm5, 1
+ psllw mm6, 1
+
+ paddsw mm1, mm5
+ paddsw mm3, mm6
+
+ psrlw mm1, 2
+ psrlw mm3, 2
+
+ packuswb mm1, mm0
+ packuswb mm3, mm0
+
+ psllq mm3, 32
+ por mm1, mm3
+
+ add esi, 0x08
+ cmp esi, [w]
+ movq [ebx+esi-8], mm1
+ jl mc5
+
+ add eax, [lx2]
+ add ebx, [lx2]
+ add ecx, [lx2]
+ dec edi
+ mov esi, 0x00
+ cmp edi, 0x00
+ jg mc5
+ }
+ break;
+
+ case 6:
+ // d[i] = ((d[i]<<1) + s[i]+s[i+1] + 3) >> 2;
+ __asm
+ {
+ pxor mm0, mm0
+ movq mm7, [mmmask_0003]
+ mov eax, [s]
+ mov ebx, [d]
+ mov esi, 0x00
+ mov edi, [h]
+mc6:
+ movq mm1, [eax+esi]
+ movq mm2, [eax+esi+1]
+ movq mm3, mm1
+ movq mm4, mm2
+
+ punpcklbw mm1, mm0
+ punpckhbw mm3, mm0
+
+ punpcklbw mm2, mm0
+ punpckhbw mm4, mm0
+
+ paddsw mm1, mm2
+ paddsw mm3, mm4
+
+ movq mm5, [ebx+esi]
+
+ paddsw mm1, mm7
+ paddsw mm3, mm7
+
+ movq mm6, mm5
+ punpcklbw mm5, mm0
+ punpckhbw mm6, mm0
+
+ psllw mm5, 1
+ psllw mm6, 1
+
+ paddsw mm1, mm5
+ paddsw mm3, mm6
+
+ psrlw mm1, 2
+ psrlw mm3, 2
+
+ packuswb mm1, mm0
+ packuswb mm3, mm0
+
+ psllq mm3, 32
+ por mm1, mm3
+
+ add esi, 0x08
+ cmp esi, [w]
+ movq [ebx+esi-8], mm1
+ jl mc6
+
+ add eax, [lx2]
+ add ebx, [lx2]
+ dec edi
+ mov esi, 0x00
+ cmp edi, 0x00
+ jg mc6
+ }
+ break;
+
+ case 7:
+ // d[i] = ((d[i]<<2) + s[i]+s[i+1]+s[i+lx]+s[i+lx+1] + 6)>>3;
+ __asm
+ {
+ pxor mm0, mm0
+ movq mm7, [mmmask_0006]
+ mov eax, [s]
+ mov ebx, [d]
+ mov ecx, eax
+ add ecx, [lx]
+ mov esi, 0x00
+ mov edi, [h]
+mc7:
+ movq mm1, [eax+esi]
+ movq mm2, [eax+esi+1]
+ movq mm3, mm1
+ movq mm4, mm2
+
+ punpcklbw mm1, mm0
+ punpckhbw mm3, mm0
+
+ punpcklbw mm2, mm0
+ punpckhbw mm4, mm0
+
+ paddsw mm1, mm2
+ paddsw mm3, mm4
+
+ movq mm5, [ecx+esi]
+ paddsw mm1, mm7
+
+ movq mm6, [ecx+esi+1]
+ paddsw mm3, mm7
+
+ movq mm2, mm5
+ movq mm4, mm6
+
+ punpcklbw mm2, mm0
+ punpckhbw mm5, mm0
+
+ punpcklbw mm4, mm0
+ punpckhbw mm6, mm0
+
+ paddsw mm2, mm4
+ paddsw mm5, mm6
+
+ paddsw mm1, mm2
+ paddsw mm3, mm5
+
+ movq mm6, [ebx+esi]
+
+ movq mm4, mm6
+ punpcklbw mm4, mm0
+ punpckhbw mm6, mm0
+
+ psllw mm4, 2
+ psllw mm6, 2
+
+ paddsw mm1, mm4
+ paddsw mm3, mm6
+
+ psrlw mm1, 3
+ psrlw mm3, 3
+
+ packuswb mm1, mm0
+ packuswb mm3, mm0
+
+ psllq mm3, 32
+ por mm1, mm3
+
+ add esi, 0x08
+ cmp esi, [w]
+ movq [ebx+esi-8], mm1
+ jl mc7
+
+ add eax, [lx2]
+ add ebx, [lx2]
+ add ecx, [lx2]
+ dec edi
+ mov esi, 0x00
+ cmp edi, 0x00
+ jg mc7
+ }
+ break;
+ }
+}
+
+//
+// motion
+//
+
+/* ISO/IEC 13818-2 sections 6.2.5.2, 6.3.17.2, and 7.6.3: Motion vectors */
+void CMPEG2Dec::motion_vectors(int PMV[2][2][2],int dmvector[2],
+ int motion_vertical_field_select[2][2], int s,
+ int motion_vector_count, int mv_format, int h_r_size,
+ int v_r_size, int dmv, int mvscale)
+{
+ if (motion_vector_count==1)
+ {
+ if (mv_format==MV_FIELD && !dmv)
+ motion_vertical_field_select[1][s] =
+ motion_vertical_field_select[0][s] = Get_Bits(1);
+
+ motion_vector(PMV[0][s],dmvector,h_r_size,v_r_size,dmv,mvscale,0);
+
+ /* update other motion vector predictors */
+ PMV[1][s][0] = PMV[0][s][0];
+ PMV[1][s][1] = PMV[0][s][1];
+ }
+ else
+ {
+ motion_vertical_field_select[0][s] = Get_Bits(1);
+ motion_vector(PMV[0][s],dmvector,h_r_size,v_r_size,dmv,mvscale,0);
+ motion_vertical_field_select[1][s] = Get_Bits(1);
+ motion_vector(PMV[1][s],dmvector,h_r_size,v_r_size,dmv,mvscale,0);
+ }
+}
+
+
+/* ISO/IEC 13818-2 section 7.6.3.6: Dual prime additional arithmetic */
+void CMPEG2Dec::Dual_Prime_Arithmetic(int DMV[][2],int *dmvector, int mvx,int mvy)
+{
+ if (picture_structure==FRAME_PICTURE)
+ {
+ if (top_field_first)
+ {
+ /* vector for prediction of top field from bottom field */
+ DMV[0][0] = ((mvx +(mvx>0))>>1) + dmvector[0];
+ DMV[0][1] = ((mvy +(mvy>0))>>1) + dmvector[1] - 1;
+
+ /* vector for prediction of bottom field from top field */
+ DMV[1][0] = ((3*mvx+(mvx>0))>>1) + dmvector[0];
+ DMV[1][1] = ((3*mvy+(mvy>0))>>1) + dmvector[1] + 1;
+ }
+ else
+ {
+ /* vector for prediction of top field from bottom field */
+ DMV[0][0] = ((3*mvx+(mvx>0))>>1) + dmvector[0];
+ DMV[0][1] = ((3*mvy+(mvy>0))>>1) + dmvector[1] - 1;
+
+ /* vector for prediction of bottom field from top field */
+ DMV[1][0] = ((mvx +(mvx>0))>>1) + dmvector[0];
+ DMV[1][1] = ((mvy +(mvy>0))>>1) + dmvector[1] + 1;
+ }
+ }
+ else
+ {
+ /* vector for prediction from field of opposite 'parity' */
+ DMV[0][0] = ((mvx+(mvx>0))>>1) + dmvector[0];
+ DMV[0][1] = ((mvy+(mvy>0))>>1) + dmvector[1];
+
+ /* correct for vertical field shift */
+ if (picture_structure==TOP_FIELD)
+ DMV[0][1]--;
+ else
+ DMV[0][1]++;
+ }
+}
+
+/* get and decode motion vector and differential motion vector for one prediction */
+void CMPEG2Dec::motion_vector(int *PMV, int *dmvector, int h_r_size, int v_r_size,
+ int dmv, int mvscale, int full_pel_vector)
+{
+ int motion_code, motion_residual;
+
+ /* horizontal component */
+ /* ISO/IEC 13818-2 Table B-10 */
+ motion_code = Get_motion_code();
+
+ motion_residual = (h_r_size!=0 && motion_code!=0) ? Get_Bits(h_r_size) : 0;
+
+ decode_motion_vector(&PMV[0],h_r_size,motion_code,motion_residual,full_pel_vector);
+
+ if (dmv)
+ dmvector[0] = Get_dmvector();
+
+ /* vertical component */
+ motion_code = Get_motion_code();
+ motion_residual = (v_r_size!=0 && motion_code!=0) ? Get_Bits(v_r_size) : 0;
+
+ if (mvscale)
+ PMV[1] >>= 1; /* DIV 2 */
+
+ decode_motion_vector(&PMV[1],v_r_size,motion_code,motion_residual,full_pel_vector);
+
+ if (mvscale)
+ PMV[1] <<= 1;
+
+ if (dmv)
+ dmvector[1] = Get_dmvector();
+}
+
+/* calculate motion vector component */
+/* ISO/IEC 13818-2 section 7.6.3.1: Decoding the motion vectors */
+/* Note: the arithmetic here is more elegant than that which is shown
+ in 7.6.3.1. The end results (PMV[][][]) should, however, be the same. */
+void CMPEG2Dec::decode_motion_vector(int *pred, int r_size, int motion_code,
+ int motion_residual, int full_pel_vector)
+{
+ int lim, vec;
+
+ lim = 16<<r_size;
+ vec = full_pel_vector ? (*pred >> 1) : (*pred);
+
+ if (motion_code>0)
+ {
+ vec+= ((motion_code-1)<<r_size) + motion_residual + 1;
+ if (vec>=lim)
+ vec-= lim + lim;
+ }
+ else if (motion_code<0)
+ {
+ vec-= ((-motion_code-1)<<r_size) + motion_residual + 1;
+ if (vec<-lim)
+ vec+= lim + lim;
+ }
+
+ *pred = full_pel_vector ? (vec<<1) : vec;
+}
+
+int CMPEG2Dec::Get_motion_code()
+{
+ int code;
+
+ if (Get_Bits(1))
+ return 0;
+
+ if ((code = Show_Bits(9))>=64)
+ {
+ code >>= 6;
+ Flush_Buffer(MVtab0[code].len);
+
+ return Get_Bits(1)?-MVtab0[code].val:MVtab0[code].val;
+ }
+
+ if (code>=24)
+ {
+ code >>= 3;
+ Flush_Buffer(MVtab1[code].len);
+
+ return Get_Bits(1)?-MVtab1[code].val:MVtab1[code].val;
+ }
+
+ if ((code-=12)<0)
+ {
+ Fault_Flag = 10;
+ return 0;
+ }
+
+ Flush_Buffer(MVtab2[code].len);
+
+ return Get_Bits(1) ? -MVtab2[code].val : MVtab2[code].val;
+}
+
+/* get differential motion vector (for dual prime prediction) */
+int CMPEG2Dec::Get_dmvector()
+{
+ if (Get_Bits(1))
+ return Get_Bits(1) ? -1 : 1;
+ else
+ return 0;
+}
+
+//
+// store
+//
+
+static const __int64 mmmask_0001 = 0x0001000100010001;
+static const __int64 mmmask_0002 = 0x0002000200020002;
+static const __int64 mmmask_0003 = 0x0003000300030003;
+static const __int64 mmmask_0004 = 0x0004000400040004;
+static const __int64 mmmask_0005 = 0x0005000500050005;
+static const __int64 mmmask_0007 = 0x0007000700070007;
+static const __int64 mmmask_0016 = 0x0010001000100010;
+static const __int64 mmmask_0040 = 0x0040004000400040;
+static const __int64 mmmask_0128 = 0x0080008000800080;
+
+void CMPEG2Dec::assembleFrame(unsigned char *src[], int pf, unsigned char *dst, int pitch)
+{
+ unsigned char *y444;
+
+ if (Fault_Flag)
+ Fault_Flag = 0;
+
+ if (Luminance_Flag)
+ {
+ Luminance_Filter(src[0], lum);
+ y444 = lum;
+ }
+ else
+ y444 = src[0];
+
+ if (chroma_format==CHROMA420)
+ {
+ conv420to422(src[1], u422, pf);
+ conv420to422(src[2], v422, pf);
+
+ if (!dstYUY2())
+ {
+ conv422to444(u422, u444);
+ conv422to444(v422, v444);
+ }
+ }
+ else if (!dstYUY2())
+ {
+ conv422to444(src[1], u444);
+ conv422to444(src[2], v444);
+ }
+
+ if (dstYUY2())
+ conv422toYUY2(y444, u422, v422, dst, pitch);
+ else
+ conv444toRGB24(y444, u444, v444, dst, pitch);
+}
+
+void CMPEG2Dec::Luminance_Filter(unsigned char *src, unsigned char *dst)
+{
+ src += CLIP_AREA;
+ dst += CLIP_AREA;
+
+ __asm
+ {
+ mov edx, this
+ mov eax, [src]
+ mov ebx, [dst]
+ mov esi, 0x00
+ mov edi, [edx].LUM_AREA
+ pxor mm0, mm0
+ movq mm5, [edx].LumOffsetMask
+ movq mm6, [edx].LumGainMask
+ movq mm7, mmmask_0040
+
+lumconv:
+ movq mm1, [eax+esi]
+ movq mm2, mm1
+
+ punpcklbw mm1, mm0
+ punpckhbw mm2, mm0
+
+ pmullw mm1, mm6
+ pmullw mm2, mm6
+
+ paddw mm1, mm7
+ paddw mm2, mm7
+
+ psrlw mm1, 7
+ psrlw mm2, 7
+
+ paddw mm1, mm5
+ paddw mm2, mm5
+
+ packuswb mm1, mm0
+ packuswb mm2, mm0
+
+ add esi, 0x08
+ cmp esi, edi
+ movq [ebx+esi-8], mm1
+ movq [ebx+esi-4], mm2
+ jl lumconv
+ }
+}
+
+void CMPEG2Dec::conv422to444(unsigned char *src, unsigned char *dst)
+{
+ src += HALF_CLIP_AREA;
+ dst += CLIP_AREA;
+
+ __asm
+ {
+ mov edx, this
+ mov eax, [src]
+ mov ebx, [dst]
+ mov edi, [edx].Clip_Height
+
+ movq mm1, [mmmask_0001]
+ pxor mm0, mm0
+
+convyuv444init:
+ movq mm7, [eax]
+ mov esi, 0x00
+
+convyuv444:
+ movq mm2, mm7
+ movq mm7, [eax+esi+8]
+ movq mm3, mm2
+ movq mm4, mm7
+
+ psrlq mm3, 8
+ psllq mm4, 56
+ por mm3, mm4
+
+ movq mm4, mm2
+ movq mm5, mm3
+
+ punpcklbw mm4, mm0
+ punpcklbw mm5, mm0
+
+ movq mm6, mm4
+ paddusw mm4, mm1
+ paddusw mm4, mm5
+ psrlw mm4, 1
+ psllq mm4, 8
+ por mm4, mm6
+
+ punpckhbw mm2, mm0
+ punpckhbw mm3, mm0
+
+ movq mm6, mm2
+ paddusw mm2, mm1
+ paddusw mm2, mm3
+
+ movq [ebx+esi*2], mm4
+
+ psrlw mm2, 1
+ psllq mm2, 8
+ por mm2, mm6
+
+ add esi, 0x08
+ cmp esi, [edx].HALF_WIDTH_D8
+ movq [ebx+esi*2-8], mm2
+ jl convyuv444
+
+ movq mm2, mm7
+ punpcklbw mm2, mm0
+ movq mm3, mm2
+
+ psllq mm2, 8
+ por mm2, mm3
+
+ movq [ebx+esi*2], mm2
+
+ punpckhbw mm7, mm0
+ movq mm6, mm7
+
+ psllq mm6, 8
+ por mm6, mm7
+
+ movq [ebx+esi*2+8], mm6
+
+ add eax, [edx].HALF_WIDTH
+ add ebx, [edx].Coded_Picture_Width
+ dec edi
+ cmp edi, 0x00
+ jg convyuv444init
+ }
+}
+
+void CMPEG2Dec::conv420to422(unsigned char *src, unsigned char *dst, int frame_type)
+{
+ if (frame_type)
+ {
+ __asm
+ {
+ push ebp
+ mov eax, [src]
+ mov ebx, [dst]
+ mov ebp, this
+ mov ecx, ebx
+ add ecx, ds:[ebp].HALF_WIDTH
+ mov esi, 0x00
+ movq mm3, [mmmask_0003]
+ pxor mm0, mm0
+ movq mm4, [mmmask_0002]
+
+ mov edx, eax
+ add edx, ds:[ebp].HALF_WIDTH
+convyuv422topp:
+ movd mm1, [eax+esi]
+ movd mm2, [edx+esi]
+ movd [ebx+esi], mm1
+ punpcklbw mm1, mm0
+ pmullw mm1, mm3
+ paddusw mm1, mm4
+ punpcklbw mm2, mm0
+ paddusw mm2, mm1
+ psrlw mm2, 0x02
+ packuswb mm2, mm0
+
+ add esi, 0x04
+ cmp esi, ds:[ebp].HALF_WIDTH
+ movd [ecx+esi-4], mm2
+ jl convyuv422topp
+
+ add eax, ds:[ebp].HALF_WIDTH
+ add ebx, ds:[ebp].Coded_Picture_Width
+ add ecx, ds:[ebp].Coded_Picture_Width
+ mov esi, 0x00
+
+ mov edi, ds:[ebp].PROGRESSIVE_HEIGHT
+convyuv422p:
+ movd mm1, [eax+esi]
+
+ punpcklbw mm1, mm0
+ mov edx, eax
+
+ pmullw mm1, mm3
+ sub edx, ds:[ebp].HALF_WIDTH
+
+ movd mm5, [edx+esi]
+ movd mm2, [edx+esi]
+
+ punpcklbw mm5, mm0
+ punpcklbw mm2, mm0
+ paddusw mm5, mm1
+ paddusw mm2, mm1
+ paddusw mm5, mm4
+ paddusw mm2, mm4
+ psrlw mm5, 0x02
+ psrlw mm2, 0x02
+ packuswb mm5, mm0
+ packuswb mm2, mm0
+
+ mov edx, eax
+ add edx, ds:[ebp].HALF_WIDTH
+ add esi, 0x04
+ cmp esi, ds:[ebp].HALF_WIDTH
+ movd [ebx+esi-4], mm5
+ movd [ecx+esi-4], mm2
+
+ jl convyuv422p
+
+ add eax, ds:[ebp].HALF_WIDTH
+ add ebx, ds:[ebp].Coded_Picture_Width
+ add ecx, ds:[ebp].Coded_Picture_Width
+ mov esi, 0x00
+ dec edi
+ cmp edi, 0x00
+ jg convyuv422p
+
+ mov edx, eax
+ sub edx, ds:[ebp].HALF_WIDTH
+convyuv422bottomp:
+ movd mm1, [eax+esi]
+ movd mm5, [edx+esi]
+ punpcklbw mm5, mm0
+ movd [ecx+esi], mm1
+
+ punpcklbw mm1, mm0
+ pmullw mm1, mm3
+ paddusw mm5, mm1
+ paddusw mm5, mm4
+ psrlw mm5, 0x02
+ packuswb mm5, mm0
+
+ add esi, 0x04
+ cmp esi, ds:[ebp].HALF_WIDTH
+ movd [ebx+esi-4], mm5
+ jl convyuv422bottomp
+ pop ebp
+ }
+ }
+ else
+ {
+ __asm
+ {
+ push ebp
+ mov eax, [src]
+ mov ecx, [dst]
+ mov ebp, this
+ mov esi, 0x00
+ pxor mm0, mm0
+ movq mm3, [mmmask_0003]
+ movq mm4, [mmmask_0004]
+ movq mm5, [mmmask_0005]
+
+convyuv422topi:
+ movd mm1, [eax+esi]
+ mov ebx, eax
+ add ebx, ds:[ebp].HALF_WIDTH
+ movd mm2, [ebx+esi]
+ movd [ecx+esi], mm1
+ punpcklbw mm1, mm0
+ movq mm6, mm1
+ pmullw mm1, mm3
+
+ punpcklbw mm2, mm0
+ movq mm7, mm2
+ pmullw mm2, mm5
+ paddusw mm2, mm1
+ paddusw mm2, mm4
+ psrlw mm2, 0x03
+ packuswb mm2, mm0
+
+ mov edx, ecx
+ add edx, ds:[ebp].HALF_WIDTH
+ pmullw mm6, mm5
+ movd [edx+esi], mm2
+
+ add ebx, ds:[ebp].HALF_WIDTH
+ movd mm2, [ebx+esi]
+ punpcklbw mm2, mm0
+ pmullw mm2, mm3
+ paddusw mm2, mm6
+ paddusw mm2, mm4
+ psrlw mm2, 0x03
+ packuswb mm2, mm0
+
+ add edx, ds:[ebp].HALF_WIDTH
+ add ebx, ds:[ebp].HALF_WIDTH
+ pmullw mm7, [mmmask_0007]
+ movd [edx+esi], mm2
+
+ movd mm2, [ebx+esi]
+ punpcklbw mm2, mm0
+ paddusw mm2, mm7
+ paddusw mm2, mm4
+ psrlw mm2, 0x03
+ packuswb mm2, mm0
+
+ add edx, ds:[ebp].HALF_WIDTH
+ add esi, 0x04
+ cmp esi, ds:[ebp].HALF_WIDTH
+ movd [edx+esi-4], mm2
+
+ jl convyuv422topi
+
+ add eax, ds:[ebp].Coded_Picture_Width
+ add ecx, ds:[ebp].DOUBLE_WIDTH
+ mov esi, 0x00
+
+ mov edi, ds:[ebp].INTERLACED_HEIGHT
+convyuv422i:
+ movd mm1, [eax+esi]
+ punpcklbw mm1, mm0
+ movq mm6, mm1
+ mov ebx, eax
+ sub ebx, ds:[ebp].Coded_Picture_Width
+ movd mm3, [ebx+esi]
+ pmullw mm1, [mmmask_0007]
+ punpcklbw mm3, mm0
+ paddusw mm3, mm1
+ paddusw mm3, mm4
+ psrlw mm3, 0x03
+ packuswb mm3, mm0
+
+ add ebx, ds:[ebp].HALF_WIDTH
+ movq mm1, [ebx+esi]
+ add ebx, ds:[ebp].Coded_Picture_Width
+ movd [ecx+esi], mm3
+
+ movq mm3, [mmmask_0003]
+ movd mm2, [ebx+esi]
+
+ punpcklbw mm1, mm0
+ pmullw mm1, mm3
+ punpcklbw mm2, mm0
+ movq mm7, mm2
+ pmullw mm2, mm5
+ paddusw mm2, mm1
+ paddusw mm2, mm4
+ psrlw mm2, 0x03
+ packuswb mm2, mm0
+
+ pmullw mm6, mm5
+ mov edx, ecx
+ add edx, ds:[ebp].HALF_WIDTH
+ movd [edx+esi], mm2
+
+ add ebx, ds:[ebp].HALF_WIDTH
+ movd mm2, [ebx+esi]
+ punpcklbw mm2, mm0
+ pmullw mm2, mm3
+ paddusw mm2, mm6
+ paddusw mm2, mm4
+ psrlw mm2, 0x03
+ packuswb mm2, mm0
+
+ pmullw mm7, [mmmask_0007]
+ add edx, ds:[ebp].HALF_WIDTH
+ add ebx, ds:[ebp].HALF_WIDTH
+ movd [edx+esi], mm2
+
+ movd mm2, [ebx+esi]
+ punpcklbw mm2, mm0
+ paddusw mm2, mm7
+ paddusw mm2, mm4
+ psrlw mm2, 0x03
+ packuswb mm2, mm0
+
+ add edx, ds:[ebp].HALF_WIDTH
+ add esi, 0x04
+ cmp esi, ds:[ebp].HALF_WIDTH
+ movd [edx+esi-4], mm2
+
+ jl convyuv422i
+ add eax, ds:[ebp].Coded_Picture_Width
+ add ecx, ds:[ebp].DOUBLE_WIDTH
+ mov esi, 0x00
+ dec edi
+ cmp edi, 0x00
+ jg convyuv422i
+
+convyuv422bottomi:
+ movd mm1, [eax+esi]
+ movq mm6, mm1
+ punpcklbw mm1, mm0
+ mov ebx, eax
+ sub ebx, ds:[ebp].Coded_Picture_Width
+ movd mm3, [ebx+esi]
+ punpcklbw mm3, mm0
+ pmullw mm1, [mmmask_0007]
+ paddusw mm3, mm1
+ paddusw mm3, mm4
+ psrlw mm3, 0x03
+ packuswb mm3, mm0
+
+ add ebx, ds:[ebp].HALF_WIDTH
+ movq mm1, [ebx+esi]
+ punpcklbw mm1, mm0
+ movd [ecx+esi], mm3
+
+ pmullw mm1, [mmmask_0003]
+ add ebx, ds:[ebp].Coded_Picture_Width
+ movd mm2, [ebx+esi]
+ punpcklbw mm2, mm0
+ movq mm7, mm2
+ pmullw mm2, mm5
+ paddusw mm2, mm1
+ paddusw mm2, mm4
+ psrlw mm2, 0x03
+ packuswb mm2, mm0
+
+ mov edx, ecx
+ add edx, ds:[ebp].HALF_WIDTH
+ pmullw mm7, [mmmask_0007]
+ movd [edx+esi], mm2
+
+ add edx, ds:[ebp].HALF_WIDTH
+ movd [edx+esi], mm6
+
+ punpcklbw mm6, mm0
+ paddusw mm6, mm7
+ paddusw mm6, mm4
+ psrlw mm6, 0x03
+ packuswb mm6, mm0
+
+ add edx, ds:[ebp].HALF_WIDTH
+ add esi, 0x04
+ cmp esi, ds:[ebp].HALF_WIDTH
+ movd [edx+esi-4], mm6
+
+ jl convyuv422bottomi
+ pop ebp
+ }
+ }
+}
+
+void CMPEG2Dec::conv444toRGB24(unsigned char *py, unsigned char *pu, unsigned char *pv, unsigned char *dst, int pitch)
+{
+ int PWIDTH = pitch - DSTBYTES;
+
+ py += CLIP_STEP;
+ pu += CLIP_STEP;
+ pv += CLIP_STEP;
+
+ int Clip_Height = this->Clip_Height;
+ __int64 RGB_Offset = this->RGB_Offset;
+ __int64 RGB_Scale = this->RGB_Scale;
+ __int64 RGB_CBU = this->RGB_CBU;
+ __int64 RGB_CRV = this->RGB_CRV;
+ __int64 RGB_CGX = this->RGB_CGX;
+ int Clip_Width = this->Clip_Width;
+ int Coded_Picture_Width = this->Coded_Picture_Width;
+
+ __asm
+ {
+ mov eax, [py]
+ mov ebx, [pu]
+ mov ecx, [pv]
+ mov edx, [dst]
+ mov edi, Clip_Height
+ mov esi, 0x00
+ pxor mm0, mm0
+
+convRGB24:
+ movd mm1, [eax+esi]
+ movd mm3, [ebx+esi]
+ punpcklbw mm1, mm0
+ punpcklbw mm3, mm0
+ movd mm5, [ecx+esi]
+ punpcklbw mm5, mm0
+ movq mm7, [mmmask_0128]
+ psubw mm3, mm7
+ psubw mm5, mm7
+
+ psubw mm1, RGB_Offset
+ movq mm2, mm1
+ movq mm7, [mmmask_0001]
+ punpcklwd mm1, mm7
+ punpckhwd mm2, mm7
+ movq mm7, RGB_Scale
+ pmaddwd mm1, mm7
+ pmaddwd mm2, mm7
+
+ movq mm4, mm3
+ punpcklwd mm3, mm0
+ punpckhwd mm4, mm0
+ movq mm7, RGB_CBU
+ pmaddwd mm3, mm7
+ pmaddwd mm4, mm7
+ paddd mm3, mm1
+ paddd mm4, mm2
+ psrld mm3, 13
+ psrld mm4, 13
+ packuswb mm3, mm0
+ packuswb mm4, mm0
+
+ movq mm6, mm5
+ punpcklwd mm5, mm0
+ punpckhwd mm6, mm0
+ movq mm7, RGB_CRV
+ pmaddwd mm5, mm7
+ pmaddwd mm6, mm7
+ paddd mm5, mm1
+ paddd mm6, mm2
+ psrld mm5, 13
+ psrld mm6, 13
+ packuswb mm5, mm0
+ packuswb mm6, mm0
+
+ punpcklbw mm3, mm5
+ punpcklbw mm4, mm6
+ movq mm5, mm3
+ movq mm6, mm4
+ psrlq mm5, 16
+ psrlq mm6, 16
+ por mm3, mm5
+ por mm4, mm6
+
+ movd mm5, [ebx+esi]
+ movd mm6, [ecx+esi]
+ punpcklbw mm5, mm0
+ punpcklbw mm6, mm0
+ movq mm7, [mmmask_0128]
+ psubw mm5, mm7
+ psubw mm6, mm7
+
+ movq mm7, mm6
+ punpcklwd mm6, mm5
+ punpckhwd mm7, mm5
+ movq mm5, RGB_CGX
+ pmaddwd mm6, mm5
+ pmaddwd mm7, mm5
+ paddd mm6, mm1
+ paddd mm7, mm2
+
+ psrld mm6, 13
+ psrld mm7, 13
+ packuswb mm6, mm0
+ packuswb mm7, mm0
+
+ punpcklbw mm3, mm6
+ punpcklbw mm4, mm7
+
+ movq mm1, mm3
+ movq mm5, mm4
+ movq mm6, mm4
+
+ psrlq mm1, 32
+ psllq mm1, 24
+ por mm1, mm3
+
+ psrlq mm3, 40
+ psllq mm6, 16
+ por mm3, mm6
+ movd [edx], mm1
+
+ psrld mm4, 16
+ psrlq mm5, 24
+ por mm5, mm4
+ movd [edx+4], mm3
+
+ add edx, 0x0c
+ add esi, 0x04
+ cmp esi, Clip_Width
+ movd [edx-4], mm5
+
+ jl convRGB24
+
+ add eax, Coded_Picture_Width
+ add ebx, Coded_Picture_Width
+ add ecx, Coded_Picture_Width
+ add edx, PWIDTH
+ mov esi, 0x00
+ dec edi
+ cmp edi, 0x00
+ jg convRGB24
+
+ emms
+ }
+}
+
+// YUV 4:2:2 Format:
+// YUYV YUYV ...
+void CMPEG2Dec::conv422toYUY2(unsigned char *py, unsigned char *pu, unsigned char *pv, unsigned char *dst, int pitch)
+{
+ py += CLIP_STEP;
+ pu += CLIP_STEP;
+ pv += CLIP_STEP;
+
+ int y = this->Clip_Height;
+ int Clip_Width_2 = this->Clip_Width / 2;
+ int Coded_Picture_Width = this->Coded_Picture_Width;
+ int Coded_Picture_Width_2 = this->Coded_Picture_Width / 2;
+
+ __asm
+ {
+ emms
+ mov eax, [py]
+ mov ebx, [pu]
+ mov ecx, [pv]
+ mov edx, [dst]
+ mov edi, Clip_Width_2
+ yloop:
+ xor esi, esi
+ xloop:
+ movd mm1, [eax+esi*2] ;0000YYYY
+ movd mm2, [ebx+esi] ;0000UUUU
+ movd mm3, [ecx+esi] ;0000VVVV
+ ;interleave this to VYUYVYUY
+ punpcklbw mm2, mm3 ;VUVUVUVU
+ punpcklbw mm1, mm2 ;VYUYVYUY
+ movq [edx+esi*4], mm1
+ movd mm1, [eax+esi*2+4] ;0000YYYY
+ punpckhdq mm2, mm2 ;xxxxVUVU
+ punpcklbw mm1, mm2 ;VYUYVYUY
+ movq [edx+esi*4+8], mm1
+ add esi, 4
+ cmp esi, edi
+ jb xloop
+ add edx, pitch
+ add eax, Coded_Picture_Width
+ add ebx, Coded_Picture_Width_2
+ add ecx, Coded_Picture_Width_2
+ dec y
+ jnz yloop
+ emms
+ }
+}
+
+//
+// codec
+//
+
+static const int ChromaFormat[4] = {
+ 0, 6, 8, 12
+};
+
+CMPEG2Dec::CMPEG2Dec()
+{
+ VF_File = 0;
+ VF_FrameLimit = VF_FrameBound = VF_GOPLimit = VF_GOPNow = VF_GOPSize =
+ VF_OldFrame = VF_OldRef = 0;
+ VF_FrameSize = VF_FrameRate = 0;
+ memset(Rdbfr, 0, sizeof(Rdbfr));
+ Rdptr = Rdmax = 0;
+ CurrentBfr = NextBfr = BitsLeft = Val = Read = 0;
+ Fault_Flag = File_Flag = File_Limit = FO_Flag = IDCT_Flag = SystemStream_Flag = 0;
+ Luminance_Flag = Resize_Flag = KeyOp_Flag = lfsr0 = lfsr1 = 0;
+ BufferOp = 0;
+ memset(intra_quantizer_matrix, 0, sizeof(intra_quantizer_matrix));
+ memset(non_intra_quantizer_matrix, 0, sizeof(non_intra_quantizer_matrix));
+ memset(chroma_intra_quantizer_matrix, 0, sizeof(chroma_intra_quantizer_matrix));
+ memset(chroma_non_intra_quantizer_matrix, 0, sizeof(chroma_non_intra_quantizer_matrix));
+ load_intra_quantizer_matrix =
+ load_non_intra_quantizer_matrix =
+ load_chroma_intra_quantizer_matrix =
+ load_chroma_non_intra_quantizer_matrix = 0;
+ q_scale_type =
+ alternate_scan =
+ quantizer_scale = 0;
+
+ int i;
+ for (i=0; i<MAX_FILE_NUMBER; i++) Infilename[i] = NULL;
+ for (i=0; i<8; i++) p_block[i] = block[i] = NULL;
+ p_fTempArray = fTempArray = NULL;
+ for (i=0; i<3; i++) backward_reference_frame[i] = forward_reference_frame[i] = auxframe[i] = NULL;
+ lum = NULL;
+ u422 = v422 = u444 = v444 = dstFrame = NULL;
+ hLibrary = NULL;
+
+ CheckCPU();
+}
+
+static char* myfgets(char* buff, int len, FILE* file)
+{
+ char* ret = buff;
+
+ ret[0] = 0;
+
+ while(ret = fgets(buff, len, file))
+ {
+ while(isspace(*ret)) ret++;
+ if(*ret) break;
+ }
+
+ return(ret);
+}
+
+int CMPEG2Dec::Open(LPCTSTR path, DstFormat dstFormat)
+{
+ m_dstFormat = dstFormat;
+ char ID[19], PASS[19] = "DVD2AVIProjectFile";
+ DWORD i, j, size, code, type, tff, rff, film, ntsc, gop, top, bottom, mapping;
+ int repeat_on, repeat_off, repeat_init;
+ int Clip_Top, Clip_Bottom, Clip_Left, Clip_Right, Squeeze_Width, Squeeze_Height;
+
+ HKEY key; DWORD value = REG_SZ; DWORD length = 256;
+ char *ext, buffer[256];
+
+ CMPEG2Dec* out = this;
+
+ out->VF_File = _tfopen(path, _T("r"));
+ if (out->VF_File==NULL)
+ return 0;
+ if (fgets(ID, 19, out->VF_File)==NULL)
+ return 0;
+ if (strcmp(ID, PASS))
+ return 0;
+
+ // load DLL
+ if (RegOpenKeyExA(HKEY_CURRENT_USER, "Software\\VFPlugin", 0, KEY_ALL_ACCESS, &key)==ERROR_SUCCESS)
+ {
+ RegQueryValueExA(key, "DVD2AVI", NULL, &value, (unsigned char*)buffer, &length);
+
+ ext = strrchr(buffer, '\\');
+ sprintf(buffer + (int)(ext-buffer) + 1, "OpenDVD.dll");
+ RegCloseKey(key);
+ }
+
+ if ((hLibrary = LoadLibraryA(buffer)) != NULL)
+ BufferOp = (PBufferOp) GetProcAddress(hLibrary, "BufferOp");
+
+ for (i=0; i<MAX_FILE_NUMBER; i++)
+ Infilename[i] = DNew char[_MAX_PATH];
+
+ if(1 != fscanf(out->VF_File, "%d", &File_Limit))
+ return 0;
+
+ i = File_Limit;
+ while (i)
+ {
+ if(1 != fscanf(out->VF_File, "%d ", &j))
+ return 0;
+ fgets(Infilename[File_Limit-i], j+1, out->VF_File);
+ if ((Infile[File_Limit-i] = _open(Infilename[File_Limit-i], _O_RDONLY | _O_BINARY))==-1)
+ return 0;
+ i--;
+ }
+
+ if(3 != sscanf(myfgets(buffer, sizeof(buffer), out->VF_File), "\nStream_Type=%d,%X,%X\n", &SystemStream_Flag, &lfsr0, &lfsr1))
+ return 0;
+ if (lfsr0 || lfsr1)
+ KeyOp_Flag = 1;
+ else
+ KeyOp_Flag = 0;
+
+ if(1 != sscanf(myfgets(buffer, sizeof(buffer), out->VF_File), "iDCT_Algorithm=%d\n", &IDCT_Flag))
+ return 0;
+
+ switch (IDCT_Flag)
+ {
+ case IDCT_SSEMMX:
+ if (!cpu.ssemmx)
+ IDCT_Flag = IDCT_MMX;
+ break;
+
+ case IDCT_FPU:
+ Initialize_FPU_IDCT();
+ break;
+
+ case IDCT_REF:
+ Initialize_REF_IDCT();
+ break;
+ }
+
+ File_Flag = 0;
+ _lseeki64(Infile[0], 0, SEEK_SET);
+ Initialize_Buffer();
+
+ do
+ {
+ next_start_code();
+ code = Get_Bits(32);
+ }
+ while (code!=SEQUENCE_HEADER_CODE);
+
+ sequence_header();
+
+ mb_width = (horizontal_size+15)/16;
+ mb_height = progressive_sequence ? (vertical_size+15)/16 : 2*((vertical_size+31)/32);
+
+ Coded_Picture_Width = 16 * mb_width;
+ Coded_Picture_Height = 16 * mb_height;
+
+ Chroma_Width = (chroma_format==CHROMA444) ? Coded_Picture_Width : Coded_Picture_Width>>1;
+ Chroma_Height = (chroma_format!=CHROMA420) ? Coded_Picture_Height : Coded_Picture_Height>>1;
+
+ block_count = ChromaFormat[chroma_format];
+
+ for (i=0; i<8; i++)
+ {
+ p_block[i] = (short *)DNew BYTE[sizeof(short)*64 + 64];
+ block[i] = (short *)((long)p_block[i] + 64 - (long)p_block[i]%64);
+ }
+
+ p_fTempArray = (void *)DNew BYTE[sizeof(float)*128 + 64];
+ fTempArray = (void *)((long)p_fTempArray + 64 - (long)p_fTempArray%64);
+
+ for (i=0; i<3; i++)
+ {
+ if (i==0)
+ size = Coded_Picture_Width * Coded_Picture_Height;
+ else
+ size = Chroma_Width * Chroma_Height;
+
+ backward_reference_frame[i] = DNew unsigned char[size];
+ forward_reference_frame[i] = DNew unsigned char[size];
+ auxframe[i] = DNew unsigned char[size];
+ }
+
+ if(1 != sscanf(myfgets(buffer, sizeof(buffer), out->VF_File), "YUVRGB_Scale=%d\n", &i))
+ return 0;
+
+ if (i)
+ {
+ RGB_Scale = 0x1000254310002543;
+ RGB_Offset = 0x0010001000100010;
+ RGB_CBU = 0x0000408D0000408D;
+ RGB_CGX = 0xF377E5FCF377E5FC;
+ RGB_CRV = 0x0000331300003313;
+ }
+ else
+ {
+ RGB_Scale = 0x1000200010002000;
+ RGB_Offset = 0x0000000000000000;
+ RGB_CBU = 0x000038B4000038B4;
+ RGB_CGX = 0xF4FDE926F4FDE926;
+ RGB_CRV = 0x00002CDD00002CDD;
+ }
+
+ char* tmp = myfgets(buffer, sizeof(buffer), out->VF_File);
+ if(2 != sscanf(tmp, "Luminance=%d,%d\n", &i, &j))
+ {
+ if(2 != sscanf(tmp, "Luminance_Filter=%d,%d\n", &i, &j))
+ return 0;
+ i=128; j=0;
+ }
+
+ if (i==128 && j==0)
+ Luminance_Flag = 0;
+ else
+ {
+ Luminance_Flag = 1;
+ LumGainMask = ((__int64)i<<48) + ((__int64)i<<32) + ((__int64)i<<16) + (__int64)i;
+ LumOffsetMask = ((__int64)j<<48) + ((__int64)j<<32) + ((__int64)j<<16) + (__int64)j;
+
+ lum = DNew unsigned char[Coded_Picture_Width * Coded_Picture_Height];
+ }
+
+ if(6 != sscanf(myfgets(buffer, sizeof(buffer), out->VF_File), "Picture_Size=%d,%d,%d,%d,%d,%d\n",
+ &Clip_Top, &Clip_Bottom, &Clip_Left, &Clip_Right, &Squeeze_Width, &Squeeze_Height))
+ return 0;
+
+ Resize_Flag = 0;
+ Resize_Width = Clip_Width = Coded_Picture_Width;
+ Resize_Height = Clip_Height = Coded_Picture_Height;
+ CLIP_AREA = HALF_CLIP_AREA = CLIP_STEP = 0;
+
+ if (Clip_Top || Clip_Bottom || Clip_Left || Clip_Right)
+ {
+ Clip_Width -= Clip_Left+Clip_Right;
+ Clip_Height -= Clip_Top+Clip_Bottom;
+ Resize_Width = Clip_Width;
+ Resize_Height = Clip_Height;
+
+ CLIP_AREA = Coded_Picture_Width * Clip_Top;
+ HALF_CLIP_AREA = (Coded_Picture_Width>>1) * Clip_Top;
+ CLIP_STEP = Coded_Picture_Width * Clip_Top + Clip_Left;
+ }
+
+ if (Squeeze_Width || Squeeze_Height)
+ {
+ Resize_Flag = 1;
+ Resize_Width -= Squeeze_Width;
+ Resize_Height -= Squeeze_Height;
+ }
+
+ DSTBYTES = Clip_Width * (dstRGB24() ? 3 : 2);
+ DSTBYTES2 = DSTBYTES * 2;
+ LUM_AREA = Coded_Picture_Width * Clip_Height;
+ PROGRESSIVE_HEIGHT = (Coded_Picture_Height>>1) - 2;
+ INTERLACED_HEIGHT = (Coded_Picture_Height>>2) - 2;
+ HALF_WIDTH = Coded_Picture_Width>>1;
+ HALF_WIDTH_D8 = (Coded_Picture_Width>>1) - 8;
+ DOUBLE_WIDTH = Coded_Picture_Width<<1;
+
+ u422 = DNew unsigned char[Coded_Picture_Width * Coded_Picture_Height / 2];
+ v422 = DNew unsigned char[Coded_Picture_Width * Coded_Picture_Height / 2];
+ u444 = DNew unsigned char[Coded_Picture_Width * Coded_Picture_Height];
+ v444 = DNew unsigned char[Coded_Picture_Width * Coded_Picture_Height];
+ dstFrame = DNew unsigned char[Clip_Width * Clip_Height * 4]; // max value (super set)
+
+ if(1 != sscanf(myfgets(buffer, sizeof(buffer), out->VF_File), "Field_Operation=%d\n", &FO_Flag))
+ return 0;
+ if(1 != sscanf(myfgets(buffer, sizeof(buffer), out->VF_File), "Frame_Rate=%d\n", &(out->VF_FrameRate)))
+ return 0;
+ if(4 != sscanf(myfgets(buffer, sizeof(buffer), out->VF_File), "Location=%d,%X,%d,%X\n", &i, &j, &i, &j))
+ return 0;
+
+ ntsc = film = top = bottom = gop = mapping = repeat_on = repeat_off = repeat_init = 0;
+
+ while (1 == fscanf(out->VF_File, "%d", &type) && type<9)
+ {
+ if (type==7) // I frame
+ {
+ GOPList[gop] = reinterpret_cast<GOPLIST*>(calloc(1, sizeof(GOPLIST)));
+ GOPList[gop]->number = film;
+ if(2 != fscanf(out->VF_File, "%d %X", &(GOPList[gop]->file), &j))
+ break;
+
+ GOPList[gop]->position = (__int64)j*BUFFER_SIZE;
+ gop ++;
+
+ if(1 != fscanf(out->VF_File, "%d", &j))
+ break;
+
+ tff = j>>1;
+ rff = j & 1;
+ }
+ else // P, B frame
+ {
+ tff = type>>1;
+ rff = type & 1;
+ }
+
+ if (!film)
+ {
+ if (tff)
+ Field_Order = 1;
+ else
+ Field_Order = 0;
+ }
+
+ if (FO_Flag==FO_FILM)
+ {
+ if (rff)
+ repeat_on++;
+ else
+ repeat_off++;
+
+ if (repeat_init)
+ {
+ if (repeat_off-repeat_on == 5)
+ {
+ repeat_on = repeat_off = 0;
+ }
+ else
+ {
+ FrameList[mapping] = reinterpret_cast<FRAMELIST*>(calloc(1, sizeof(FRAMELIST)));
+ FrameList[mapping]->top = FrameList[mapping]->bottom = film;
+ mapping ++;
+ }
+
+ if (repeat_on-repeat_off == 5)
+ {
+ repeat_on = repeat_off = 0;
+ FrameList[mapping] = reinterpret_cast<FRAMELIST*>(calloc(1, sizeof(FRAMELIST)));
+ FrameList[mapping]->top = FrameList[mapping]->bottom = film;
+ mapping ++;
+ }
+ }
+ else
+ {
+ if (repeat_off-repeat_on == 3)
+ {
+ repeat_on = repeat_off = 0;
+ repeat_init = 1;
+ }
+ else
+ {
+ FrameList[mapping] = reinterpret_cast<FRAMELIST*>(calloc(1, sizeof(FRAMELIST)));
+ FrameList[mapping]->top = FrameList[mapping]->bottom = film;
+ mapping ++;
+ }
+
+ if (repeat_on-repeat_off == 3)
+ {
+ repeat_on = repeat_off = 0;
+ repeat_init = 1;
+
+ FrameList[mapping] = reinterpret_cast<FRAMELIST*>(calloc(1, sizeof(FRAMELIST)));
+ FrameList[mapping]->top = FrameList[mapping]->bottom = film;
+ mapping ++;
+ }
+ }
+ }
+ else
+ {
+ if (top)
+ {
+ FrameList[ntsc]->bottom = film;
+ ntsc ++;
+ FrameList[ntsc] = reinterpret_cast<FRAMELIST*>(calloc(1, sizeof(FRAMELIST)));
+ FrameList[ntsc]->top = film;
+ }
+ else if (bottom)
+ {
+ FrameList[ntsc]->top = film;
+ ntsc ++;
+ FrameList[ntsc] = reinterpret_cast<FRAMELIST*>(calloc(1, sizeof(FRAMELIST)));
+ FrameList[ntsc]->bottom = film;
+ }
+ else
+ {
+ FrameList[ntsc] = reinterpret_cast<FRAMELIST*>(calloc(1, sizeof(FRAMELIST)));
+ FrameList[ntsc]->top = film;
+ FrameList[ntsc]->bottom = film;
+ ntsc ++;
+ }
+
+ if (rff)
+ {
+ if (!top && !bottom)
+ FrameList[ntsc] = reinterpret_cast<FRAMELIST*>(calloc(1, sizeof(FRAMELIST)));
+
+ if (tff)
+ {
+ FrameList[ntsc]->top = film;
+ top = 1;
+ }
+ else
+ {
+ FrameList[ntsc]->bottom = film;
+ bottom = 1;
+ }
+
+ if (top && bottom)
+ {
+ top = bottom = 0;
+ ntsc ++;
+ }
+ }
+ }
+
+ film ++;
+ }
+
+ out->VF_FrameBound = film;
+ film -= 2;
+
+ if (FO_Flag==FO_FILM)
+ {
+ while (FrameList[mapping-1]->top >= film)
+ mapping --;
+
+ out->VF_FrameLimit = mapping;
+ }
+ else
+ {
+ if (FO_Flag==FO_SWAP)
+ {
+ Field_Order = !Field_Order;
+
+ if (Field_Order)
+ for (i=0; i<ntsc-1; i++)
+ FrameList[i]->bottom = FrameList[i+1]->bottom;
+ else
+ for (i=0; i<ntsc-1; i++)
+ FrameList[i]->top = FrameList[i+1]->top;
+ }
+
+ while ((FrameList[ntsc-1]->top >= film) || (FrameList[ntsc-1]->bottom >= film))
+ ntsc --;
+
+ out->VF_FrameLimit = ntsc;
+
+ for (i=0; i<out->VF_FrameLimit-1; i++)
+ if (FrameList[i]->top==FrameList[i+1]->top || FrameList[i]->top==FrameList[i+1]->bottom ||
+ FrameList[i]->bottom==FrameList[i+1]->top || FrameList[i]->bottom==FrameList[i+1]->bottom)
+ {
+ FrameList[i]->forward = 1;
+ FrameList[i+1]->backward = 1;
+ }
+ }
+
+ Full_Frame = 1;
+ for (i=0; i<out->VF_FrameLimit; i++)
+ if (FrameList[i]->top!=FrameList[i]->bottom)
+ {
+ Full_Frame = 0;
+ break;
+ }
+
+ out->VF_GOPNow = out->VF_GOPLimit = gop;
+ out->VF_OldFrame = out->VF_FrameLimit;
+ out->VF_FrameSize = Clip_Width * Clip_Height * 3;
+
+ return 1;
+}
+
+void CMPEG2Dec::Decode(unsigned char *dst, DWORD frame, int pitch)
+{
+ DWORD i, now, size, origin, ref, fo;
+ int remain;
+
+ CMPEG2Dec* in = this;
+
+ if (FO_Flag==FO_FILM)
+ {
+ fo = 0;
+ frame = FrameList[frame]->top;
+ }
+
+ origin = frame;
+
+ if (FO_Flag!=FO_FILM)
+ {
+ if (FrameList[frame]->top == FrameList[frame]->bottom)
+ {
+ fo = 0;
+ frame = FrameList[frame]->top;
+ }
+ else if (FrameList[frame]->top < FrameList[frame]->bottom)
+ {
+ fo = 1;
+ frame = FrameList[frame]->top;
+ }
+ else
+ {
+ fo = 2;
+ frame = FrameList[frame]->bottom;
+ }
+ }
+
+ ref = frame;
+
+ if (frame >= GOPList[in->VF_GOPLimit-1]->number)
+ {
+ now = in->VF_GOPLimit-1;
+ ref -= GOPList[in->VF_GOPLimit-1]->number;
+ size = in->VF_FrameBound - GOPList[in->VF_GOPLimit-1]->number + 1;
+ }
+ else
+ for (now = 0; now < (in->VF_GOPLimit-1); now++)
+ {
+ if (frame>=GOPList[now]->number && frame<GOPList[now+1]->number)
+ {
+ ref -= GOPList[now]->number;
+ size = GOPList[now+1]->number - GOPList[now]->number + 1;
+ break;
+ }
+ }
+
+ if (fo)
+ ref ++;
+
+ if (now != in->VF_GOPNow)
+ {
+ if ((in->VF_OldFrame + 1)==origin)
+ {
+ if (Full_Frame)
+ {
+ Get_Hdr();
+ Decode_Picture(1, dst, pitch);
+
+ if (picture_structure!=FRAME_PICTURE)
+ {
+ Get_Hdr();
+ Decode_Picture(1, dst, pitch);
+ }
+ }
+ else
+ switch (fo)
+ {
+ case 0:
+ if (!FrameList[origin]->backward)
+ {
+ Get_Hdr();
+ Decode_Picture(1, dst, pitch);
+
+ if (picture_structure!=FRAME_PICTURE)
+ {
+ Get_Hdr();
+ Decode_Picture(1, dst, pitch);
+ }
+
+ if (FrameList[origin]->forward)
+ {
+ if (Field_Order)
+ Copyodd(dst, dstFrame, pitch, 1);
+ else
+ Copyeven(dst, dstFrame, pitch, 1);
+ }
+ }
+ else
+ {
+ Copyodd(dstFrame, dst, pitch, 0);
+ Copyeven(dstFrame, dst, pitch, 0);
+ }
+ break;
+
+ case 1:
+ Copyodd(dstFrame, dst, pitch, 0);
+
+ Get_Hdr();
+ Decode_Picture(1, dstFrame, DSTBYTES);
+
+ if (picture_structure!=FRAME_PICTURE)
+ {
+ Get_Hdr();
+ Decode_Picture(1, dstFrame, DSTBYTES);
+ }
+
+ Copyeven(dstFrame, dst, pitch, 0);
+ break;
+
+ case 2:
+ Copyeven(dstFrame, dst, pitch, 0);
+
+ Get_Hdr();
+ Decode_Picture(1, dstFrame, DSTBYTES);
+
+ if (picture_structure!=FRAME_PICTURE)
+ {
+ Get_Hdr();
+ Decode_Picture(1, dstFrame, DSTBYTES);
+ }
+
+ Copyodd(dstFrame, dst, pitch, 0);
+ break;
+ }
+
+ if (in->VF_GOPSize)
+ {
+ for (i=0; i < in->VF_GOPSize; i++)
+ free(GOPBuffer[i]);
+
+ in->VF_GOPSize = 0;
+ }
+
+ in->VF_GOPNow = in->VF_GOPLimit;
+ in->VF_OldFrame = origin;
+ return;
+ }
+
+ remain = ref;
+ in->VF_OldRef = ref;
+ in->VF_GOPNow = now;
+ Second_Field = 0;
+
+ if (size < in->VF_GOPSize)
+ for (i=0; i < (in->VF_GOPSize - size); i++)
+ free(GOPBuffer[size+i]);
+ else if (size > in->VF_GOPSize)
+ for (i=0; i < (size - in->VF_GOPSize); i++)
+ GOPBuffer[in->VF_GOPSize+i] = reinterpret_cast<unsigned char*>(malloc(in->VF_FrameSize));
+
+ in->VF_GOPSize = size;
+
+ File_Flag = GOPList[now]->file;
+ _lseeki64(Infile[GOPList[now]->file], GOPList[now]->position, SEEK_SET);
+ Initialize_Buffer();
+
+ while (Get_Hdr() && picture_coding_type!=I_TYPE);
+
+ Decode_Picture(0, dst, pitch);
+
+ while (Get_Hdr() && picture_coding_type==B_TYPE);
+
+ if (picture_structure!=FRAME_PICTURE)
+ {
+ Decode_Picture(0, dst, pitch);
+ Get_Hdr();
+ }
+
+ Decode_Picture(1, dst, pitch);
+
+ if (picture_structure!=FRAME_PICTURE)
+ {
+ Get_Hdr();
+ Decode_Picture(1, dst, pitch);
+ }
+
+ Copyodd(dst, GOPBuffer[0], pitch, 1);
+ Copyeven(dst, GOPBuffer[0], pitch, 1);
+
+ while (remain && Get_Hdr())
+ {
+ Decode_Picture(1, dst, pitch);
+
+ if (picture_structure!=FRAME_PICTURE)
+ {
+ Get_Hdr();
+ Decode_Picture(1, dst, pitch);
+ }
+
+ Copyodd(dst, GOPBuffer[ref - remain + 1], pitch, 1);
+ Copyeven(dst, GOPBuffer[ref - remain + 1], pitch, 1);
+
+ remain--;
+ }
+
+ if (!Full_Frame && ref>=(size-2))
+ {
+ Copyodd(dst, dstFrame, pitch, 1);
+ Copyeven(dst, dstFrame, pitch, 1);
+ }
+ }
+ else
+ {
+ remain = ref - in->VF_OldRef;
+
+ if (remain > 0)
+ {
+ in->VF_OldRef = ref;
+
+ while (remain && Get_Hdr())
+ {
+ Decode_Picture(1, dst, pitch);
+
+ if (picture_structure!=FRAME_PICTURE)
+ {
+ Get_Hdr();
+ Decode_Picture(1, dst, pitch);
+ }
+
+ Copyodd(dst, GOPBuffer[ref - remain + 1], pitch, 1);
+ Copyeven(dst, GOPBuffer[ref - remain + 1], pitch, 1);
+
+ remain--;
+ }
+
+ if (!Full_Frame && ref>=(size-2))
+ {
+ Copyodd(dst, dstFrame, pitch, 1);
+ Copyeven(dst, dstFrame, pitch, 1);
+ }
+ }
+ }
+
+ switch (fo)
+ {
+ case 0:
+ Copyodd(GOPBuffer[ref], dst, pitch, 0);
+ Copyeven(GOPBuffer[ref], dst, pitch, 0);
+ break;
+
+ case 1:
+ Copyodd(GOPBuffer[ref-1], dst, pitch, 0);
+ Copyeven(GOPBuffer[ref], dst, pitch, 0);
+ break;
+
+ case 2:
+ Copyodd(GOPBuffer[ref], dst, pitch, 0);
+ Copyeven(GOPBuffer[ref-1], dst, pitch, 0);
+ break;
+ }
+
+ in->VF_OldFrame = origin;
+}
+
+void CMPEG2Dec::Close()
+{
+ int i;
+
+ for(i = 0; i < VF_GOPLimit; i++) free(GOPList[i]);
+ for(i = 0; i < VF_FrameLimit; i++) free(FrameList[i]);
+
+ if (VF_File != NULL)
+ fclose(VF_File);
+
+ while (VF_GOPSize)
+ {
+ VF_GOPSize--;
+ free(GOPBuffer[VF_GOPSize]);
+ }
+
+ while (File_Limit)
+ {
+ File_Limit--;
+ _close(Infile[File_Limit]);
+ }
+
+ for (i=0; i<MAX_FILE_NUMBER; i++)
+ delete [] Infilename[i];
+
+ for (i=0; i<3; i++)
+ {
+ delete [] backward_reference_frame[i];
+ delete [] forward_reference_frame[i];
+ delete [] auxframe[i];
+ }
+
+ delete [] u422;
+ delete [] v422;
+ delete [] u444;
+ delete [] v444;
+ delete [] dstFrame;
+
+ if(Luminance_Flag)
+ delete [] lum;
+
+ for (i=0; i<8; i++)
+ delete [] p_block[i];
+
+ delete [] p_fTempArray;
+
+ if (hLibrary)
+ FreeLibrary(hLibrary);
+}
+
+void CMPEG2Dec::Copyodd(unsigned char *src, unsigned char *dst, int pitch, int forward)
+{
+ int i;
+ int PWIDTH = forward ? (pitch<<1) : DSTBYTES2;
+ int QWIDTH = forward ? DSTBYTES2 : (pitch<<1);
+
+ for (i=0; i<(Clip_Height>>1); i++)
+ {
+ memcpy (dst, src, DSTBYTES);
+ src += PWIDTH;
+ dst += QWIDTH;
+ }
+}
+
+void CMPEG2Dec::Copyeven(unsigned char *src, unsigned char *dst, int pitch, int forward)
+{
+ int i;
+ int PWIDTH = forward ? (pitch<<1) : DSTBYTES2;
+ int QWIDTH = forward ? DSTBYTES2 : (pitch<<1);
+ src += forward ? pitch : DSTBYTES;
+ dst += forward ? DSTBYTES : pitch;
+
+ for (i=0; i<(Clip_Height>>1); i++)
+ {
+ memcpy (dst, src, DSTBYTES);
+ src += PWIDTH;
+ dst += QWIDTH;
+ }
+}
diff --git a/src/filters/source/D2VSource/MPEG2Dec.h b/src/filters/source/D2VSource/MPEG2Dec.h
new file mode 100644
index 000000000..34368208b
--- /dev/null
+++ b/src/filters/source/D2VSource/MPEG2Dec.h
@@ -0,0 +1,304 @@
+#pragma once
+
+#include <windows.h>
+#include <winreg.h>
+#include <stdio.h>
+#include <io.h>
+#include <fcntl.h>
+
+/* code definition */
+#define PICTURE_START_CODE 0x100
+#define SLICE_START_CODE_MIN 0x101
+#define SLICE_START_CODE_MAX 0x1AF
+#define USER_DATA_START_CODE 0x1B2
+#define SEQUENCE_HEADER_CODE 0x1B3
+#define EXTENSION_START_CODE 0x1B5
+#define SEQUENCE_END_CODE 0x1B7
+#define GROUP_START_CODE 0x1B8
+
+#define SYSTEM_END_CODE 0x1B9
+#define PACK_START_CODE 0x1BA
+#define SYSTEM_START_CODE 0x1BB
+#define PRIVATE_STREAM_1 0x1BD
+#define VIDEO_ELEMENTARY_STREAM 0x1E0
+
+/* extension start code IDs */
+#define SEQUENCE_EXTENSION_ID 1
+#define SEQUENCE_DISPLAY_EXTENSION_ID 2
+#define QUANT_MATRIX_EXTENSION_ID 3
+#define COPYRIGHT_EXTENSION_ID 4
+#define PICTURE_DISPLAY_EXTENSION_ID 7
+#define PICTURE_CODING_EXTENSION_ID 8
+
+#define ZIG_ZAG 0
+#define MB_WEIGHT 32
+#define MB_CLASS4 64
+
+#define I_TYPE 1
+#define P_TYPE 2
+#define B_TYPE 3
+
+#define TOP_FIELD 1
+#define BOTTOM_FIELD 2
+#define FRAME_PICTURE 3
+
+#define MACROBLOCK_INTRA 1
+#define MACROBLOCK_PATTERN 2
+#define MACROBLOCK_MOTION_BACKWARD 4
+#define MACROBLOCK_MOTION_FORWARD 8
+#define MACROBLOCK_QUANT 16
+
+#define MC_FIELD 1
+#define MC_FRAME 2
+#define MC_16X8 2
+#define MC_DMV 3
+
+#define MV_FIELD 0
+#define MV_FRAME 1
+
+#define CHROMA420 1
+#define CHROMA422 2
+#define CHROMA444 3
+
+#define BUFFER_SIZE 2048
+#define MAX_FILE_NUMBER 256
+
+#define IDCT_MMX 1
+#define IDCT_SSEMMX 2
+#define IDCT_FPU 3
+#define IDCT_REF 4
+
+#define FO_NONE 0
+#define FO_FILM 1
+#define FO_SWAP 2
+
+
+typedef void (WINAPI *PBufferOp) (unsigned char*, int, int);
+
+#define MAX_FRAME_NUMBER 1000000
+#define MAX_GOP_SIZE 1024
+
+
+class CMPEG2Dec
+{
+protected:
+
+ // getbit.cpp
+ void Initialize_Buffer();
+ void Fill_Buffer();
+ void Next_Packet();
+ void Flush_Buffer_All(unsigned int N);
+ unsigned int Get_Bits_All(unsigned int N);
+ void Next_File();
+
+ unsigned int Show_Bits(unsigned int N);
+ unsigned int Get_Bits(unsigned int N);
+ void Flush_Buffer(unsigned int N);
+ void Fill_Next();
+ unsigned int Get_Byte();
+ unsigned int Get_Short();
+ void next_start_code();
+
+ unsigned char Rdbfr[BUFFER_SIZE], *Rdptr, *Rdmax;
+ unsigned int CurrentBfr, NextBfr, BitsLeft, Val, Read;
+
+ // gethdr.cpp
+ int Get_Hdr();
+ void sequence_header();
+ int slice_header();
+private:
+ void group_of_pictures_header();
+ void picture_header();
+ void sequence_extension();
+ void sequence_display_extension();
+ void quant_matrix_extension();
+ void picture_display_extension();
+ void picture_coding_extension();
+ void copyright_extension();
+ int extra_bit_information();
+ void extension_and_user_data();
+
+protected:
+ // getpic.cpp
+ void Decode_Picture(int ref, unsigned char *dst, int pitch);
+private:
+ void Update_Picture_Buffers();
+ void picture_data();
+ int slice(int MBAmax);
+ void macroblock_modes(int *pmacroblock_type, int *pmotion_type,
+ int *pmotion_vector_count, int *pmv_format, int *pdmv, int *pmvscale, int *pdct_type);
+ void Clear_Block(int count);
+ void Add_Block(int count, int bx, int by, int dct_type, int addflag);
+ void motion_compensation(int MBA, int macroblock_type, int motion_type,
+ int PMV[2][2][2], int motion_vertical_field_select[2][2], int dmvector[2], int dct_type);
+ void skipped_macroblock(int dc_dct_pred[3], int PMV[2][2][2],
+ int *motion_type, int motion_vertical_field_select[2][2], int *macroblock_type);
+ int start_of_slice(int *MBA, int *MBAinc, int dc_dct_pred[3], int PMV[2][2][2]);
+ int decode_macroblock(int *macroblock_type, int *motion_type, int *dct_type,
+ int PMV[2][2][2], int dc_dct_pred[3], int motion_vertical_field_select[2][2], int dmvector[2]);
+ void Decode_MPEG2_Intra_Block(int comp, int dc_dct_pred[]);
+ void Decode_MPEG2_Non_Intra_Block(int comp);
+
+ int Get_macroblock_type();
+ int Get_I_macroblock_type();
+ int Get_P_macroblock_type();
+ int Get_B_macroblock_type();
+ int Get_D_macroblock_type();
+ int Get_coded_block_pattern();
+ int Get_macroblock_address_increment();
+ int Get_Luma_DC_dct_diff();
+ int Get_Chroma_DC_dct_diff();
+
+ void form_predictions(int bx, int by, int macroblock_type, int motion_type,
+ int PMV[2][2][2], int motion_vertical_field_select[2][2], int dmvector[2]);
+ void form_prediction(unsigned char *src[], int sfield, unsigned char *dst[], int dfield,
+ int lx, int lx2, int w, int h, int x, int y, int dx, int dy, int average_flag);
+ void form_component_prediction(unsigned char *src, unsigned char *dst,
+ int lx, int lx2, int w, int h, int x, int y, int dx, int dy, int average_flag);
+
+ // motion.cpp
+ void motion_vectors(int PMV[2][2][2], int dmvector[2], int motion_vertical_field_select[2][2],
+ int s, int motion_vector_count, int mv_format,
+ int h_r_size, int v_r_size, int dmv, int mvscale);
+ void Dual_Prime_Arithmetic(int DMV[][2], int *dmvector, int mvx, int mvy);
+private:
+ void motion_vector(int *PMV, int *dmvector, int h_r_size, int v_r_size,
+ int dmv, int mvscale, int full_pel_vector);
+ void decode_motion_vector(int *pred, int r_size, int motion_code,
+ int motion_residualesidual, int full_pel_vector);
+ int Get_motion_code();
+ int Get_dmvector();
+
+protected:
+ // store.cpp
+ void assembleFrame(unsigned char *src[], int pf, unsigned char *dst, int pitch);
+private:
+ void Luminance_Filter(unsigned char *src, unsigned char *dst);
+ void conv420to422(unsigned char *src, unsigned char *dst, int frame_type);
+ void conv422to444(unsigned char *src, unsigned char *dst);
+ void conv444toRGB24(unsigned char *py, unsigned char *pu, unsigned char *pv, unsigned char *dst, int pitch);
+ void conv422toYUY2(unsigned char *py, unsigned char *pu, unsigned char *pv, unsigned char *dst, int pitch);
+
+protected:
+ // decoder operation control flags
+ int Fault_Flag;
+ int File_Flag;
+ int File_Limit;
+ int FO_Flag;
+ int IDCT_Flag;
+ int SystemStream_Flag;
+
+ int Luminance_Flag;
+ int Resize_Flag;
+
+ int KeyOp_Flag;
+ int lfsr0, lfsr1;
+ PBufferOp BufferOp;
+
+ int Infile[MAX_FILE_NUMBER];
+ char *Infilename[MAX_FILE_NUMBER];
+
+ int intra_quantizer_matrix[64];
+ int non_intra_quantizer_matrix[64];
+ int chroma_intra_quantizer_matrix[64];
+ int chroma_non_intra_quantizer_matrix[64];
+
+ int load_intra_quantizer_matrix;
+ int load_non_intra_quantizer_matrix;
+ int load_chroma_intra_quantizer_matrix;
+ int load_chroma_non_intra_quantizer_matrix;
+
+ int q_scale_type;
+ int alternate_scan;
+ int quantizer_scale;
+
+ void *fTempArray, *p_fTempArray;
+ short *block[8], *p_block[8];
+ int pf_backward, pf_forward, pf_current;
+
+ // global values
+ unsigned char *backward_reference_frame[3], *forward_reference_frame[3];
+ unsigned char *auxframe[3], *current_frame[3];
+ unsigned char *u422, *v422, *u444, *v444, /* *rgb24,*/ *lum;
+ unsigned char *dstFrame; // replaces rgb24
+ __int64 RGB_Scale, RGB_Offset, RGB_CRV, RGB_CBU, RGB_CGX, LumOffsetMask, LumGainMask;
+
+ int HALF_WIDTH, PROGRESSIVE_HEIGHT, INTERLACED_HEIGHT, DOUBLE_WIDTH;
+ int /*TWIDTH, SWIDTH,*/ HALF_WIDTH_D8, LUM_AREA, CLIP_AREA, HALF_CLIP_AREA, CLIP_STEP;
+ int DSTBYTES, DSTBYTES2; // these replace TWIDTH and SWIDTH
+public:
+ int Clip_Width, Clip_Height, Resize_Width, Resize_Height;
+protected:
+
+ int Coded_Picture_Width, Coded_Picture_Height, Chroma_Width, Chroma_Height;
+ int block_count, Second_Field;
+ int horizontal_size, vertical_size, mb_width, mb_height;
+
+ /* ISO/IEC 13818-2 section 6.2.2.3: sequence_extension() */
+ int progressive_sequence;
+ int chroma_format;
+
+ /* ISO/IEC 13818-2 section 6.2.3: picture_header() */
+ int picture_coding_type;
+ int temporal_reference;
+
+ /* ISO/IEC 13818-2 section 6.2.3.1: picture_coding_extension() header */
+ int f_code[2][2];
+ int picture_structure;
+ int frame_pred_frame_dct;
+ int progressive_frame;
+ int concealment_motion_vectors;
+ int intra_dc_precision;
+ int top_field_first;
+ int repeat_first_field;
+ int intra_vlc_format;
+
+ // interface
+ typedef struct {
+ DWORD number;
+ int file;
+ __int64 position;
+ } GOPLIST;
+ GOPLIST *GOPList[MAX_FRAME_NUMBER];
+
+ typedef struct {
+ DWORD top;
+ DWORD bottom;
+ char forward;
+ char backward;
+ } FRAMELIST;
+ FRAMELIST *FrameList[MAX_FRAME_NUMBER];
+
+ unsigned char *GOPBuffer[MAX_GOP_SIZE];
+public:
+ BOOL Field_Order, Full_Frame;
+protected:
+ HINSTANCE hLibrary;
+
+ void Copyodd(unsigned char *src, unsigned char *dst, int pitch, int forward);
+ void Copyeven(unsigned char *src, unsigned char *dst, int pitch, int forward);
+public:
+ FILE *VF_File;
+ int VF_FrameRate;
+ DWORD VF_FrameLimit;
+ DWORD VF_FrameBound;
+ DWORD VF_GOPLimit;
+ DWORD VF_GOPNow;
+ DWORD VF_GOPSize;
+ int VF_FrameSize;
+ DWORD VF_OldFrame;
+ DWORD VF_OldRef;
+
+ enum DstFormat {
+ RGB24, YUY2
+ };
+ DstFormat m_dstFormat;
+
+ CMPEG2Dec();
+ ~CMPEG2Dec() {Close();}
+ int Open(LPCTSTR path, DstFormat);
+ void Close();
+ void Decode(unsigned char *dst, DWORD frame, int pitch);
+ bool dstRGB24() const { return m_dstFormat == RGB24; }
+ bool dstYUY2() const { return m_dstFormat == YUY2; }
+};
diff --git a/src/filters/source/D2VSource/d2vsource.rc b/src/filters/source/D2VSource/d2vsource.rc
new file mode 100644
index 000000000..6df83bdd8
--- /dev/null
+++ b/src/filters/source/D2VSource/d2vsource.rc
@@ -0,0 +1,117 @@
+// Microsoft Visual C++ generated resource script.
+//
+#include "resource.h"
+#include "..\..\..\..\include\Version.h"
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#include "afxres.h"
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// Hungarian resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_HUN)
+#ifdef _WIN32
+LANGUAGE LANG_HUNGARIAN, SUBLANG_DEFAULT
+#pragma code_page(1250)
+#endif //_WIN32
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION VERSION_MAJOR,VERSION_MINOR,VERSION_REV,VERSION_PATCH
+ PRODUCTVERSION VERSION_MAJOR,VERSION_MINOR,VERSION_REV,VERSION_PATCH
+ FILEFLAGSMASK 0x17L
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x4L
+ FILETYPE 0x2L
+ FILESUBTYPE 0x0L
+BEGIN
+ BLOCK "StringFileInfo"
+ BEGIN
+ BLOCK "040e04b0"
+ BEGIN
+ VALUE "Comments", "http://sourceforge.net/projects/mpc-hc/"
+ VALUE "CompanyName", "MPC-HC Team"
+ VALUE "FileDescription", "D2V Source Filter"
+ VALUE "FileVersion", "1, 1, 0, 0"
+ VALUE "InternalName", "D2V Source Filter"
+ VALUE "LegalCopyright", "Copyright (C) 2002-2010 see AUTHORS file"
+ VALUE "OriginalFilename", "D2VSource.ax"
+ VALUE "ProductName", "D2V Source Filter"
+ VALUE "ProductVersion", "1, 1, 0, 0"
+ END
+ END
+ BLOCK "VarFileInfo"
+ BEGIN
+ VALUE "Translation", 0x40e, 1200
+ END
+END
+
+#endif // Hungarian resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+/////////////////////////////////////////////////////////////////////////////
+// English (U.S.) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+#ifdef _WIN32
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+#pragma code_page(1252)
+#endif //_WIN32
+
+#ifdef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// TEXTINCLUDE
+//
+
+1 TEXTINCLUDE
+BEGIN
+ "resource.h\0"
+END
+
+2 TEXTINCLUDE
+BEGIN
+ "#include ""afxres.h""\r\n"
+ "\0"
+END
+
+3 TEXTINCLUDE
+BEGIN
+ "\r\n"
+ "\0"
+END
+
+#endif // APSTUDIO_INVOKED
+
+#endif // English (U.S.) resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 3 resource.
+//
+
+
+/////////////////////////////////////////////////////////////////////////////
+#endif // not APSTUDIO_INVOKED
+
+
diff --git a/src/filters/source/D2VSource/d2vsource.vcproj b/src/filters/source/D2VSource/d2vsource.vcproj
new file mode 100644
index 000000000..0fcded0c4
--- /dev/null
+++ b/src/filters/source/D2VSource/d2vsource.vcproj
@@ -0,0 +1,962 @@
+<?xml version="1.0" encoding="windows-1250"?>
+<VisualStudioProject
+ ProjectType="Visual C++"
+ Version="9,00"
+ Name="d2vsource"
+ ProjectGUID="{83CC6B88-A112-4192-BD5A-F2A249AF2277}"
+ RootNamespace="d2vsource"
+ Keyword="Win32Proj"
+ TargetFrameworkVersion="131072"
+ >
+ <Platforms>
+ <Platform
+ Name="Win32"
+ />
+ <Platform
+ Name="x64"
+ />
+ </Platforms>
+ <ToolFiles>
+ </ToolFiles>
+ <Configurations>
+ <Configuration
+ Name="Debug Unicode|Win32"
+ ConfigurationType="2"
+ InheritedPropertySheets="..\..\..\common.vsprops;..\..\..\debug.vsprops"
+ UseOfMFC="1"
+ CharacterSet="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories="..\..\..\..\include;..\..\BaseClasses"
+ PreprocessorDefinitions="REGISTER_FILTER;WIN32;_DEBUG;_USRDLL"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLinkerTool"
+ RegisterOutput="true"
+ AdditionalDependencies="strmbaseDU.lib basesourceDU.lib Winmm.lib"
+ OutputFile="$(OutDir)\$(ProjectName).ax"
+ AdditionalLibraryDirectories="..\..\..\..\lib"
+ ModuleDefinitionFile="D2VSource.def"
+ SubSystem="2"
+ RandomizedBaseAddress="1"
+ DataExecutionPrevention="0"
+ TargetMachine="1"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCManifestTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCAppVerifierTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Debug Unicode|x64"
+ OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+ IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+ ConfigurationType="2"
+ InheritedPropertySheets="..\..\..\common.vsprops;..\..\..\debug.vsprops"
+ UseOfMFC="1"
+ CharacterSet="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ TargetEnvironment="3"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories="..\..\..\..\include;..\..\BaseClasses"
+ PreprocessorDefinitions="REGISTER_FILTER;WIN32;_DEBUG;_USRDLL"
+ DebugInformationFormat="3"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLinkerTool"
+ AdditionalDependencies="strmbaseDU.lib basesourceDU.lib Winmm.lib"
+ OutputFile="$(OutDir)\$(ProjectName).ax"
+ AdditionalLibraryDirectories="..\..\..\..\lib64"
+ ModuleDefinitionFile="D2VSource.def"
+ SubSystem="2"
+ RandomizedBaseAddress="1"
+ DataExecutionPrevention="0"
+ TargetMachine="17"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCManifestTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCAppVerifierTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Release Unicode|Win32"
+ ConfigurationType="2"
+ InheritedPropertySheets="..\..\..\common.vsprops;..\..\..\release.vsprops"
+ UseOfMFC="1"
+ CharacterSet="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories="..\..\..\..\include;..\..\BaseClasses"
+ PreprocessorDefinitions="REGISTER_FILTER;WIN32;NDEBUG;_USRDLL"
+ BufferSecurityCheck="true"
+ EnableEnhancedInstructionSet="1"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLinkerTool"
+ RegisterOutput="true"
+ AdditionalDependencies="strmbaseRU.lib basesourceRU.lib Winmm.lib"
+ OutputFile="..\..\..\..\bin\x86\$(ProjectName).ax"
+ AdditionalLibraryDirectories="..\..\..\..\lib"
+ ModuleDefinitionFile="D2VSource.def"
+ GenerateDebugInformation="true"
+ SubSystem="2"
+ LargeAddressAware="2"
+ RandomizedBaseAddress="2"
+ DataExecutionPrevention="2"
+ TargetMachine="1"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCManifestTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCAppVerifierTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Release Unicode|x64"
+ OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+ IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+ ConfigurationType="2"
+ InheritedPropertySheets="..\..\..\common.vsprops;..\..\..\release.vsprops"
+ UseOfMFC="1"
+ CharacterSet="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ TargetEnvironment="3"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories="..\..\..\..\include;..\..\BaseClasses"
+ PreprocessorDefinitions="REGISTER_FILTER;WIN32;NDEBUG;_USRDLL"
+ BufferSecurityCheck="true"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLinkerTool"
+ AdditionalDependencies="strmbaseRU.lib basesourceRU.lib Winmm.lib"
+ OutputFile="..\..\..\..\bin\x64\$(ProjectName).ax"
+ AdditionalLibraryDirectories="..\..\..\..\lib64"
+ ModuleDefinitionFile="D2VSource.def"
+ GenerateDebugInformation="true"
+ SubSystem="2"
+ LargeAddressAware="2"
+ RandomizedBaseAddress="2"
+ DataExecutionPrevention="2"
+ TargetMachine="17"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCManifestTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCAppVerifierTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Debug Unicode lib|Win32"
+ ConfigurationType="4"
+ InheritedPropertySheets="..\..\..\common.vsprops;..\..\..\debug.vsprops"
+ UseOfMFC="1"
+ CharacterSet="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalOptions="/MP"
+ AdditionalIncludeDirectories="..\..\..\..\include;..\..\BaseClasses"
+ PreprocessorDefinitions="WIN32;_DEBUG"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ OutputFile="..\..\..\..\lib\$(ProjectName)DU.lib"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Debug Unicode lib|x64"
+ OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+ IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+ ConfigurationType="4"
+ InheritedPropertySheets="..\..\..\common.vsprops;..\..\..\debug.vsprops"
+ UseOfMFC="1"
+ CharacterSet="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ TargetEnvironment="3"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalOptions="/MP"
+ AdditionalIncludeDirectories="..\..\..\..\include;..\..\BaseClasses"
+ PreprocessorDefinitions="_WIN64;_DEBUG"
+ DebugInformationFormat="3"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ OutputFile="..\..\..\..\lib64\$(ProjectName)DU.lib"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Release Unicode lib|Win32"
+ ConfigurationType="4"
+ InheritedPropertySheets="..\..\..\common.vsprops;..\..\..\release.vsprops"
+ UseOfMFC="1"
+ CharacterSet="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalOptions="/MP"
+ AdditionalIncludeDirectories="..\..\..\..\include;..\..\BaseClasses"
+ PreprocessorDefinitions="WIN32;NDEBUG"
+ BufferSecurityCheck="true"
+ EnableEnhancedInstructionSet="1"
+ DisableSpecificWarnings="4244;4799;4731;"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ OutputFile="..\..\..\..\lib\$(ProjectName)RU.lib"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Release Unicode lib|x64"
+ OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+ IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+ ConfigurationType="4"
+ InheritedPropertySheets="..\..\..\common.vsprops;..\..\..\release.vsprops"
+ UseOfMFC="1"
+ CharacterSet="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ TargetEnvironment="3"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalOptions="/MP"
+ AdditionalIncludeDirectories="..\..\..\..\include;..\..\BaseClasses"
+ PreprocessorDefinitions="_WIN64;NDEBUG"
+ BufferSecurityCheck="true"
+ EnableEnhancedInstructionSet="0"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLibrarianTool"
+ OutputFile="..\..\..\..\lib64\$(ProjectName)RU.lib"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ </Configurations>
+ <References>
+ </References>
+ <Files>
+ <Filter
+ Name="Source Files"
+ Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm"
+ >
+ <File
+ RelativePath="D2VSource.cpp"
+ >
+ <FileConfiguration
+ Name="Debug Unicode|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug Unicode lib|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode lib|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="D2VSource.def"
+ >
+ </File>
+ <File
+ RelativePath="idctfpu.cpp"
+ >
+ <FileConfiguration
+ Name="Debug Unicode|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug Unicode lib|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode lib|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="idctmmx.asm"
+ >
+ <FileConfiguration
+ Name="Debug Unicode|Win32"
+ >
+ <Tool
+ Name="VCCustomBuildTool"
+ CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
+ Outputs="$(OutDir)\$(InputName).obj"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug Unicode|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCustomBuildTool"
+ CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
+ Outputs="$(OutDir)\$(InputName).obj"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode|Win32"
+ >
+ <Tool
+ Name="VCCustomBuildTool"
+ CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
+ Outputs="$(OutDir)\$(InputName).obj"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCustomBuildTool"
+ CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
+ Outputs="$(OutDir)\$(InputName).obj"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug Unicode lib|Win32"
+ >
+ <Tool
+ Name="VCCustomBuildTool"
+ CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
+ Outputs="$(OutDir)\$(InputName).obj"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug Unicode lib|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCustomBuildTool"
+ CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
+ Outputs="$(OutDir)\$(InputName).obj"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode lib|Win32"
+ >
+ <Tool
+ Name="VCCustomBuildTool"
+ CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
+ Outputs="$(OutDir)\$(InputName).obj"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode lib|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCustomBuildTool"
+ CommandLine="ml /c /coff /Cx /nologo /Fo&quot;$(OutDir)\$(InputName).obj&quot; &quot;$(InputPath)&#x0D;&#x0A;"
+ Outputs="$(OutDir)\$(InputName).obj"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="idctref.cpp"
+ >
+ <FileConfiguration
+ Name="Debug Unicode|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug Unicode lib|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode lib|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="MPEG2Dec.cpp"
+ >
+ <FileConfiguration
+ Name="Debug Unicode|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug Unicode lib|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode lib|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="stdafx.cpp"
+ >
+ <FileConfiguration
+ Name="Debug Unicode|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="1"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug Unicode|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="1"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="1"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="1"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug Unicode lib|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="1"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug Unicode lib|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="1"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode lib|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="1"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode lib|x64"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ UsePrecompiledHeader="1"
+ />
+ </FileConfiguration>
+ </File>
+ </Filter>
+ <Filter
+ Name="Header Files"
+ Filter="h;hpp;hxx;hm;inl;inc"
+ >
+ <File
+ RelativePath="D2VSource.h"
+ >
+ </File>
+ <File
+ RelativePath="MPEG2Dec.h"
+ >
+ </File>
+ <File
+ RelativePath=".\resource.h"
+ >
+ <FileConfiguration
+ Name="Debug Unicode lib|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug Unicode lib|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode lib|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode lib|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ </FileConfiguration>
+ </File>
+ <File
+ RelativePath="stdafx.h"
+ >
+ </File>
+ </Filter>
+ <Filter
+ Name="Resource Files"
+ Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+ >
+ <File
+ RelativePath=".\d2vsource.rc"
+ >
+ <FileConfiguration
+ Name="Debug Unicode lib|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Debug Unicode lib|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode lib|Win32"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release Unicode lib|x64"
+ ExcludedFromBuild="true"
+ >
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ </FileConfiguration>
+ </File>
+ </Filter>
+ </Files>
+ <Globals>
+ <Global
+ Name="DevPartner_IsInstrumented"
+ Value="0"
+ />
+ </Globals>
+</VisualStudioProject>
diff --git a/src/filters/source/D2VSource/idctfpu.cpp b/src/filters/source/D2VSource/idctfpu.cpp
new file mode 100644
index 000000000..92fcd3954
--- /dev/null
+++ b/src/filters/source/D2VSource/idctfpu.cpp
@@ -0,0 +1,456 @@
+#include "stdafx.h"
+
+/* idct.c, inverse fast discrete cosine transform */
+
+
+/*************************************************************/
+/* inverse two dimensional DCT, Chen-Wang algorithm */
+/* (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984) */
+/* */
+/* floating point conversion by Miha Peternel */
+/* x87 hand-optimized assembly by Miha Peternel */
+/* 27.11. - 11.12.2000 */
+/* */
+/* You are free to use this code in your project if: */
+/* - no changes are made to this message */
+/* - any changes to this code are publicly available */
+/* - your project documentation contains the following text: */
+/* "This software contains fast high-quality IDCT decoder */
+/* by Miha Peternel." */
+/* */
+/*************************************************************/
+
+
+/////////////////////////////////////////////////////
+//
+// TODO:
+// - loops can be easily vectorized for SIMD
+//
+/////////////////////////////////////////////////////
+
+#include <math.h>
+# define PI 3.1415926535897932384626433832795
+
+#define FLOAT double
+
+const static double RC = 1.0*1024*1024*1024*1024*256*16 + 1024; // magic + clip center
+
+static FLOAT W1; // /* sqrt(2)*cos(1*pi/16) */
+static FLOAT W2; // /* sqrt(2)*cos(2*pi/16) */
+static FLOAT W5; // /* sqrt(2)*cos(5*pi/16) */
+
+static FLOAT W1_8;
+static FLOAT W2_8;
+static FLOAT W5_8;
+
+static FLOAT W7; // /* sqrt(2)*cos(7*pi/16) */
+static FLOAT W1mW7; // W1-W7
+static FLOAT W1pW7; // W1+W7
+
+static FLOAT W3; // /* sqrt(2)*cos(3*pi/16) */
+static FLOAT W3mW5; // W3-W5
+static FLOAT W3pW5; // W3+W5
+
+static FLOAT W6; // /* sqrt(2)*cos(6*pi/16) */
+static FLOAT W2mW6; // W2-W6
+static FLOAT W2pW6; // W2+W6
+
+static FLOAT S2; // 1/sqrt(2)
+static FLOAT D8 = 1.0/8;
+
+static FLOAT W7_8;
+static FLOAT W1mW7_8;
+static FLOAT W1pW7_8;
+
+static FLOAT W3_8;
+static FLOAT W3mW5_8;
+static FLOAT W3pW5_8;
+
+static FLOAT W6_8;
+static FLOAT W2mW6_8;
+static FLOAT W2pW6_8;
+
+/* global declarations */
+
+/* private data */
+static short iclip[1024+1024]; /* clipping table */
+static short *iclp;
+
+void Initialize_FPU_IDCT()
+{
+ int i;
+
+ S2 = sqrt(0.5); // 1.0/sqrt(2);
+
+ W1 = sqrt(2.0)*cos(PI*(1.0/16));
+ W1_8 = W1/8;
+ W2 = sqrt(2.0)*cos(PI*(2.0/16));
+ W2_8 = W2/8;
+ W3 = sqrt(2.0)*cos(PI*(3.0/16));
+ W3_8 = W3/8;
+ W5 = sqrt(2.0)*cos(PI*(5.0/16));
+ W5_8 = W5/8;
+ W6 = sqrt(2.0)*cos(PI*(6.0/16));
+ W6_8 = W6/8;
+ W7 = sqrt(2.0)*cos(PI*(7.0/16));
+ W7_8 = W7/8;
+
+ W1mW7 = W1-W7; W1mW7_8 = W1mW7/8;
+ W1pW7 = W1+W7; W1pW7_8 = W1pW7/8;
+ W3mW5 = W3-W5; W3mW5_8 = W3mW5/8;
+ W3pW5 = W3+W5; W3pW5_8 = W3pW5/8;
+ W2mW6 = W2-W6; W2mW6_8 = W2mW6/8;
+ W2pW6 = W2+W6; W2pW6_8 = W2pW6/8;
+
+ iclp = iclip+1024;
+ for (i= -1024; i<1024; i++)
+ iclp[i] = (i<-256) ? -256 : ((i>255) ? 255 : i);
+}
+
+void FPU_IDCT(short *block)
+{
+ int *b = (int *) block;
+ if( b[0]==0 && (b[31]==0x10000 || b[31]==0) )
+ {
+ if( b[ 1]|b[ 2]|b[ 3]|b[ 4]|b[ 5] )
+ goto normal;
+ if( b[ 6]|b[ 7]|b[ 8]|b[ 9]|b[10] )
+ goto normal;
+ if( b[11]|b[12]|b[13]|b[14]|b[15] )
+ goto normal;
+ if( b[16]|b[17]|b[18]|b[19]|b[20] )
+ goto normal;
+ if( b[21]|b[22]|b[23]|b[24]|b[25] )
+ goto normal;
+ if( b[26]|b[27]|b[28]|b[29]|b[30] )
+ goto normal;
+ b[31]=0;
+ ////empty++;
+ return;
+ }
+normal:
+
+#define tmp ebx
+#define tmp1 ebx-1*8
+#define tmp2 ebx-2*8
+#define tmp3 ebx-3*8
+#define int0 ebx-3*8-1*4
+#define int1 ebx-3*8-2*4
+#define int2 ebx-3*8-3*4
+#define int3 ebx-3*8-4*4
+#define int4 ebx-3*8-5*4
+#define int5 ebx-3*8-6*4
+#define int6 ebx-3*8-7*4
+#define int7 ebx-3*8-8*4
+#define SIZE 8*8*8+3*8+8*4+16 // locals + 16-byte alignment area
+ __asm
+ {
+ lea ebx,[esp-8*8*8]
+ sub esp,SIZE
+ and ebx,-16 // force 16-byte alignment of locals
+
+// rows
+ mov esi,[block]
+ lea edi,[tmp]
+ mov ecx,8
+
+ align 16
+Lrows:
+ movsx eax,word ptr [esi+2]
+ or eax, [esi+4]
+ or eax, [esi+8]
+ or eax, [esi+12]
+ jnz L1
+
+ fild word ptr [esi+0*2]
+ fst qword ptr [edi+7*8]
+ fst qword ptr [edi+6*8]
+ fst qword ptr [edi+5*8]
+ fst qword ptr [edi+4*8]
+ fst qword ptr [edi+3*8]
+ fst qword ptr [edi+2*8]
+ fst qword ptr [edi+1*8]
+ fstp qword ptr [edi+0*8]
+ jmp L2
+
+ align 16
+ L1:
+
+ fild word ptr [esi+7*2]
+ fld st(0)
+ fild word ptr [esi+1*2]
+ fadd st(1),st(0)
+ fld qword ptr [W7]
+ fxch st(1)
+ fmul qword ptr [W1mW7]
+ fxch st(1)
+ fmulp st(2),st(0)
+ fadd st(0),st(1)
+ fstp qword ptr [tmp1]
+ fild word ptr [esi+3*2]
+ fld st(0)
+ fxch st(3)
+ fmul qword ptr [W1pW7]
+ fild word ptr [esi+5*2]
+ fadd st(4),st(0)
+ fmul qword ptr [W3mW5]
+ fxch st(1)
+ fsubp st(3),st(0)//fsubrp
+ fld qword ptr [W3]
+ fmulp st(4),st(0)
+ fsubr st(0),st(3)
+ fstp qword ptr [tmp2]
+ fmul qword ptr [W3pW5]
+ fsubp st(2),st(0)//fsubrp
+ fxch st(1)
+ fstp qword ptr [tmp3]
+ fild word ptr [esi+0*2]
+ fild word ptr [esi+4*2]
+ fild word ptr [esi+2*2]
+ fld st(0)
+ fmul qword ptr [W2mW6]
+ fld st(3)
+ fild word ptr [esi+6*2]
+ fxch st(5)
+ fsub st(0),st(4)
+ fxch st(3)
+ fadd st(0),st(5)
+ fxch st(1)
+ faddp st(4),st(0)
+ fld qword ptr [W6]
+ fmulp st(1),st(0)
+ fxch st(4)
+ fmul qword ptr [W2pW6]
+ fld qword ptr [tmp1]
+ fsub qword ptr [tmp2]
+ fld st(5)
+ fxch st(3)
+ faddp st(6),st(0)
+ fld qword ptr [tmp1]
+ fxch st(1)
+ fstp qword ptr [tmp1]
+ fld st(6)
+ fadd qword ptr [tmp3]
+ fxch st(1)
+ fadd qword ptr [tmp2]
+ fxch st(7)
+ fsub qword ptr [tmp3]
+ fxch st(1)
+ fstp qword ptr [tmp2]
+ fld st(4)
+ fxch st(3)
+ fsubrp st(2),st(0)//fsubp
+ fxch st(4)
+ fsub st(0),st(5)
+ fxch st(2)
+ faddp st(5),st(0)
+ fld st(2)
+ fsub st(0),st(1)
+ fxch st(5)
+ fstp qword ptr [tmp3]
+ fld qword ptr [tmp1]
+ fld qword ptr [S2]
+ fxch st(4)
+ faddp st(2),st(0)
+ fld st(3)
+ fxch st(1)
+ fadd st(0),st(5)
+ fmulp st(1),st(0)
+
+ fld qword ptr [tmp3]
+ fadd st(0),st(7)
+ fxch st(5)
+ fsubr qword ptr [tmp1]
+ fxch st(5)
+ fstp qword ptr [edi+0*8]
+ fxch st(6)
+ fsubr qword ptr [tmp3]
+ fld st(2)
+ fxch st(1)
+ fstp qword ptr [edi+7*8]
+ fadd qword ptr [tmp2]
+ fxch st(3)
+ fmulp st(4),st(0)
+ fxch st(2)
+ fstp qword ptr [edi+3*8]
+ fld st(1)
+ fadd st(0),st(5)
+ fxch st(1)
+ fsub qword ptr [tmp2]
+ fxch st(2)
+ fsubrp st(5),st(0)//fsubp
+ fstp qword ptr [edi+1*8]
+ fld st(2)
+ fxch st(1)
+ fstp qword ptr [edi+4*8]
+ fxch st(2)
+ fsub st(0),st(1)
+ fxch st(2)
+ faddp st(1),st(0)
+ fxch st(2)
+ fstp qword ptr [edi+6*8]
+ fstp qword ptr [edi+5*8]
+ fstp qword ptr [edi+2*8]
+ L2:
+ add esi,8*2
+ add edi,8*8
+ dec ecx
+ jnz Lrows
+
+// columns
+ lea esi,[tmp]
+ mov edi,[block]
+ lea edx,[iclip+1024*2]
+ mov ecx,8
+
+ align 16
+Lcols:
+ fld qword ptr [esi+7*8*8]
+ fld st(0)
+ fld qword ptr [esi+1*8*8]
+ fadd st(1),st(0)
+ fld qword ptr [W7_8]
+ fxch st(1)
+ fmul qword ptr [W1mW7_8]
+ fxch st(1)
+ fmulp st(2),st(0)
+ fadd st(0),st(1)
+ fstp qword ptr [tmp2]
+ fld qword ptr [esi+3*8*8]
+ fld st(0)
+ fxch st(3)
+ fmul qword ptr [W1pW7_8]
+ fld qword ptr [esi+5*8*8]
+ fadd st(4),st(0)
+ fmul qword ptr [W3mW5_8]
+ fxch st(1)
+ fsubp st(3),st(0)//fsubrp
+ fld qword ptr [W3_8]
+ fmulp st(4),st(0)
+ fsubr st(0),st(3)
+ fstp qword ptr [tmp3]
+ fld qword ptr [D8]
+ fld qword ptr [esi+0*8*8]
+ fmul st(0),st(1)
+ fxch st(2)
+ fmul qword ptr [W3pW5_8]
+ fld qword ptr [esi+4*8*8]
+ fmulp st(2),st(0)
+ fld qword ptr [esi+6*8*8]
+ fld st(3)
+ fxch st(6)
+ fsubrp st(2),st(0)//fsubp
+ fld qword ptr [esi+2*8*8]
+ fld st(0)
+ fxch st(5)
+ fsub st(0),st(4)
+ fxch st(7)
+ faddp st(4),st(0)
+ fxch st(4)
+ fadd st(0),st(1)
+ fld qword ptr [W6_8]
+ fxch st(2)
+ fmul qword ptr [W2pW6_8]
+ fxch st(2)
+ fmulp st(1),st(0)
+ fxch st(4)
+ fmul qword ptr [W2mW6_8]
+ fld qword ptr [tmp2]
+ fsub qword ptr [tmp3]
+ fxch st(2)
+ fsubr st(0),st(5)
+ fxch st(1)
+ faddp st(5),st(0)
+ fld qword ptr [tmp2]
+ fxch st(2)
+ fstp qword ptr [tmp2]
+ fld st(5)
+ fxch st(2)
+ fadd qword ptr [tmp3]
+ fxch st(6)
+ fsub st(0),st(3)
+ fxch st(2)
+ faddp st(3),st(0)
+ fld st(3)
+ fsub st(0),st(5)
+ fxch st(3)
+ fstp qword ptr [tmp3]
+ fxch st(3)
+ faddp st(4),st(0)
+ fld st(5)
+ fld qword ptr [tmp2]
+ fxch st(7)
+ fsub st(0),st(4)
+ fxch st(7)
+ fadd st(0),st(2)
+ fxch st(1)
+ faddp st(4),st(0)
+ fld qword ptr [S2]
+ fmul st(1),st(0)
+ fxch st(1)
+ fstp qword ptr [tmp1]
+ fld st(4)
+ fadd st(0),st(6)
+ fxch st(2)
+ fsubr qword ptr [tmp2]
+ fxch st(5)
+ fsubrp st(6),st(0)//fsubp
+ fxch st(1)
+ fistp dword ptr [int0]
+ fxch st(4)
+ mov eax,[int0]
+ movsx eax,word ptr [edx+2*eax]
+ mov [edi+0*8*2],ax
+ fistp dword ptr [int7]
+ mov eax,[int7]
+ fld st(0)
+ movsx eax,word ptr [edx+2*eax]
+ mov [edi+7*8*2],ax
+ fadd qword ptr [tmp3]
+ fistp dword ptr [int3]
+ mov eax,[int3]
+ movsx eax,word ptr [edx+2*eax]
+ mov [edi+3*8*2],ax
+ fsub qword ptr [tmp3]
+ fld st(1)
+ fxch st(1)
+ fistp dword ptr [int4]
+ mov eax,[int4]
+ movsx eax,word ptr [edx+2*eax]
+ mov [edi+4*8*2],ax
+ fadd qword ptr [tmp1]
+ fxch st(3)
+ fmulp st(2),st(0)
+ fxch st(2)
+ fistp dword ptr [int1]
+ fxch st(1)
+ mov eax,[int1]
+ movsx eax,word ptr [edx+2*eax]
+ mov [edi+1*8*2],ax
+ fsub qword ptr [tmp1]
+ fld st(2)
+ fsub st(0),st(2)
+ fxch st(1)
+ fistp dword ptr [int6]
+ fxch st(2)
+ mov eax,[int6]
+ faddp st(1),st(0)
+ movsx eax,word ptr [edx+2*eax]
+ mov [edi+6*8*2],ax
+ fistp dword ptr [int2]
+ mov eax,[int2]
+ movsx eax,word ptr [edx+2*eax]
+ mov [edi+2*8*2],ax
+ fistp dword ptr [int5]
+ mov eax,[int5]
+ movsx eax,word ptr [edx+2*eax]
+ mov [edi+5*8*2],ax
+
+ add esi,8
+ add edi,2
+ dec ecx
+ jnz Lcols
+
+ add esp,SIZE
+ }
+}
diff --git a/src/filters/source/D2VSource/idctmmx.asm b/src/filters/source/D2VSource/idctmmx.asm
new file mode 100644
index 000000000..7ebe4d7e3
--- /dev/null
+++ b/src/filters/source/D2VSource/idctmmx.asm
@@ -0,0 +1,738 @@
+;
+; idct8x8_xmm.asm
+;
+; Originally provided by Intel at AP-922
+; http://developer.intel.com/vtune/cbts/strmsimd/922down.htm
+; (See more app notes at http://developer.intel.com/vtune/cbts/strmsimd/appnotes.htm)
+; but in a limited edition.
+; New macro implements a column part for precise iDCT
+; The routine precision now satisfies IEEE standard 1180-1990.
+;
+; Copyright (c) 2000-2001 Peter Gubanov <peter@elecard.net.ru>
+; Rounding trick Copyright (c) 2000 Michel Lespinasse <walken@zoy.org>
+;
+; http://www.elecard.com/peter/idct.html
+; http://www.linuxvideo.org/mpeg2dec/
+;
+;=============================================================================
+;
+; These examples contain code fragments for first stage iDCT 8x8
+; (for rows) and first stage DCT 8x8 (for columns)
+;
+;=============================================================================
+mword typedef qword
+mptr equ mword ptr
+
+BITS_INV_ACC = 5 ; 4 or 5 for IEEE
+SHIFT_INV_ROW = 16 - BITS_INV_ACC
+SHIFT_INV_COL = 1 + BITS_INV_ACC
+RND_INV_ROW = 1024 * (6 - BITS_INV_ACC) ; 1 << (SHIFT_INV_ROW-1)
+RND_INV_COL = 16 * (BITS_INV_ACC - 3) ; 1 << (SHIFT_INV_COL-1)
+RND_INV_CORR = RND_INV_COL - 1 ; correction -1.0 and round
+
+BITS_FRW_ACC = 3 ; 2 or 3 for accuracy
+SHIFT_FRW_COL = BITS_FRW_ACC
+SHIFT_FRW_ROW = BITS_FRW_ACC + 17
+RND_FRW_ROW = 262144 * (BITS_FRW_ACC - 1) ; 1 << (SHIFT_FRW_ROW-1)
+
+_MMX = 1
+
+.nolist
+
+.586
+
+if @version GE 612
+.mmx
+;mmword TEXTEQU <QWORD>
+else
+include IAMMX.INC
+endif
+
+if @version GE 614
+.xmm
+;mm2word TEXTEQU <QWORD> ; needed for Streaming SIMD Extensions macros
+else
+include iaxmm.inc ; Streaming SIMD Extensions Emulator Macros
+endif
+
+ .list
+ .model flat
+
+_DATA SEGMENT PARA PUBLIC USE32 'DATA'
+
+one_corr sword 1, 1, 1, 1
+round_inv_row dword RND_INV_ROW, RND_INV_ROW
+round_inv_col sword RND_INV_COL, RND_INV_COL, RND_INV_COL, RND_INV_COL
+round_inv_corr sword RND_INV_CORR, RND_INV_CORR, RND_INV_CORR, RND_INV_CORR
+round_frw_row dword RND_FRW_ROW, RND_FRW_ROW
+ tg_1_16 sword 13036, 13036, 13036, 13036 ; tg * (2<<16) + 0.5
+ tg_2_16 sword 27146, 27146, 27146, 27146 ; tg * (2<<16) + 0.5
+ tg_3_16 sword -21746, -21746, -21746, -21746 ; tg * (2<<16) + 0.5
+ cos_4_16 sword -19195, -19195, -19195, -19195 ; cos * (2<<16) + 0.5
+ocos_4_16 sword 23170, 23170, 23170, 23170 ; cos * (2<<15) + 0.5
+
+ otg_3_16 sword 21895, 21895, 21895, 21895 ; tg * (2<<16) + 0.5
+
+; assume SHIFT_INV_ROW == 12
+;rounder_0 dword 65536, 65536
+;rounder_4 dword 0, 0
+;rounder_1 dword 7195, 7195
+;rounder_7 dword 1024, 1024
+;rounder_2 dword 4520, 4520
+;rounder_6 dword 1024, 1024
+;rounder_3 dword 2407, 2407
+;rounder_5 dword 240, 240
+
+; assume SHIFT_INV_ROW == 11
+rounder_0 dword 65536, 65536
+rounder_4 dword 0, 0
+rounder_1 dword 3597, 3597
+rounder_7 dword 512, 512
+rounder_2 dword 2260, 2260
+rounder_6 dword 512, 512
+rounder_3 dword 1203, 1203
+rounder_5 dword 120, 120
+
+;=============================================================================
+;
+; The first stage iDCT 8x8 - inverse DCTs of rows
+;
+;-----------------------------------------------------------------------------
+; The 8-point inverse DCT direct algorithm
+;-----------------------------------------------------------------------------
+;
+; static const short w[32] = {
+; FIX(cos_4_16), FIX(cos_2_16), FIX(cos_4_16), FIX(cos_6_16),
+; FIX(cos_4_16), FIX(cos_6_16), -FIX(cos_4_16), -FIX(cos_2_16),
+; FIX(cos_4_16), -FIX(cos_6_16), -FIX(cos_4_16), FIX(cos_2_16),
+; FIX(cos_4_16), -FIX(cos_2_16), FIX(cos_4_16), -FIX(cos_6_16),
+; FIX(cos_1_16), FIX(cos_3_16), FIX(cos_5_16), FIX(cos_7_16),
+; FIX(cos_3_16), -FIX(cos_7_16), -FIX(cos_1_16), -FIX(cos_5_16),
+; FIX(cos_5_16), -FIX(cos_1_16), FIX(cos_7_16), FIX(cos_3_16),
+; FIX(cos_7_16), -FIX(cos_5_16), FIX(cos_3_16), -FIX(cos_1_16) };
+;
+; #define DCT_8_INV_ROW(x, y)
+; {
+; int a0, a1, a2, a3, b0, b1, b2, b3;
+;
+; a0 =x[0]*w[0]+x[2]*w[1]+x[4]*w[2]+x[6]*w[3];
+; a1 =x[0]*w[4]+x[2]*w[5]+x[4]*w[6]+x[6]*w[7];
+; a2 = x[0] * w[ 8] + x[2] * w[ 9] + x[4] * w[10] + x[6] * w[11];
+; a3 = x[0] * w[12] + x[2] * w[13] + x[4] * w[14] + x[6] * w[15];
+; b0 = x[1] * w[16] + x[3] * w[17] + x[5] * w[18] + x[7] * w[19];
+; b1 = x[1] * w[20] + x[3] * w[21] + x[5] * w[22] + x[7] * w[23];
+; b2 = x[1] * w[24] + x[3] * w[25] + x[5] * w[26] + x[7] * w[27];
+; b3 = x[1] * w[28] + x[3] * w[29] + x[5] * w[30] + x[7] * w[31];
+;
+; y[0] = SHIFT_ROUND ( a0 + b0 );
+; y[1] = SHIFT_ROUND ( a1 + b1 );
+; y[2] = SHIFT_ROUND ( a2 + b2 );
+; y[3] = SHIFT_ROUND ( a3 + b3 );
+; y[4] = SHIFT_ROUND ( a3 - b3 );
+; y[5] = SHIFT_ROUND ( a2 - b2 );
+; y[6] = SHIFT_ROUND ( a1 - b1 );
+; y[7] = SHIFT_ROUND ( a0 - b0 );
+; }
+;
+;-----------------------------------------------------------------------------
+;
+; In this implementation the outputs of the iDCT-1D are multiplied
+; for rows 0,4 - by cos_4_16,
+; for rows 1,7 - by cos_1_16,
+; for rows 2,6 - by cos_2_16,
+; for rows 3,5 - by cos_3_16
+; and are shifted to the left for better accuracy
+;
+; For the constants used,
+; FIX(float_const) = (short) (float_const * (1<<15) + 0.5)
+;
+;=============================================================================
+
+;=============================================================================
+; MMX code
+;=============================================================================
+
+; Table for rows 0,4 - constants are multiplied by cos_4_16
+
+tab_i_04 sword 16384, 16384, 16384, -16384 ; movq-> w06 w04 w02 w00
+ sword 21407, 8867, 8867, -21407 ; w07 w05 w03 w01
+ sword 16384, -16384, 16384, 16384 ; w14 w12 w10 w08
+ sword -8867, 21407, -21407, -8867 ; w15 w13 w11 w09
+ sword 22725, 12873, 19266, -22725 ; w22 w20 w18 w16
+ sword 19266, 4520, -4520, -12873 ; w23 w21 w19 w17
+ sword 12873, 4520, 4520, 19266 ; w30 w28 w26 w24
+ sword -22725, 19266, -12873, -22725 ; w31 w29 w27 w25
+
+; Table for rows 1,7 - constants are multiplied by cos_1_16
+
+tab_i_17 sword 22725, 22725, 22725, -22725 ; movq-> w06 w04 w02 w00
+ sword 29692, 12299, 12299, -29692 ; w07 w05 w03 w01
+ sword 22725, -22725, 22725, 22725 ; w14 w12 w10 w08
+ sword -12299, 29692, -29692, -12299 ; w15 w13 w11 w09
+ sword 31521, 17855, 26722, -31521 ; w22 w20 w18 w16
+ sword 26722, 6270, -6270, -17855 ; w23 w21 w19 w17
+ sword 17855, 6270, 6270, 26722 ; w30 w28 w26 w24
+ sword -31521, 26722, -17855, -31521 ; w31 w29 w27 w25
+
+; Table for rows 2,6 - constants are multiplied by cos_2_16
+
+tab_i_26 sword 21407, 21407, 21407, -21407 ; movq-> w06 w04 w02 w00
+ sword 27969, 11585, 11585, -27969 ; w07 w05 w03 w01
+ sword 21407, -21407, 21407, 21407 ; w14 w12 w10 w08
+ sword -11585, 27969, -27969, -11585 ; w15 w13 w11 w09
+ sword 29692, 16819, 25172, -29692 ; w22 w20 w18 w16
+ sword 25172, 5906, -5906, -16819 ; w23 w21 w19 w17
+ sword 16819, 5906, 5906, 25172 ; w30 w28 w26 w24
+ sword -29692, 25172, -16819, -29692 ; w31 w29 w27 w25
+
+; Table for rows 3,5 - constants are multiplied by cos_3_16
+
+tab_i_35 sword 19266, 19266, 19266, -19266 ; movq-> w06 w04 w02 w00
+ sword 25172, 10426, 10426, -25172 ; w07 w05 w03 w01
+ sword 19266, -19266, 19266, 19266 ; w14 w12 w10 w08
+ sword -10426, 25172, -25172, -10426 ; w15 w13 w11 w09
+ sword 26722, 15137, 22654, -26722 ; w22 w20 w18 w16
+ sword 22654, 5315, -5315, -15137 ; w23 w21 w19 w17
+ sword 15137, 5315, 5315, 22654 ; w30 w28 w26 w24
+ sword -26722, 22654, -15137, -26722 ; w31 w29 w27 w25
+
+;-----------------------------------------------------------------------------
+
+DCT_8_INV_ROW_1 MACRO INP:REQ, OUT:REQ, TABLE:REQ, ROUNDER:REQ
+
+ movq mm0, mptr [INP] ; 0 ; x3 x2 x1 x0
+
+ movq mm1, mptr [INP+8] ; 1 ; x7 x6 x5 x4
+ movq mm2, mm0 ; 2 ; x3 x2 x1 x0
+
+ movq mm3, mptr [TABLE] ; 3 ; w06 w04 w02 w00
+ punpcklwd mm0, mm1 ; x5 x1 x4 x0
+
+ movq mm5, mm0 ; 5 ; x5 x1 x4 x0
+ punpckldq mm0, mm0 ; x4 x0 x4 x0
+
+ movq mm4, mptr [TABLE+8] ; 4 ; w07 w05 w03 w01
+ punpckhwd mm2, mm1 ; 1 ; x7 x3 x6 x2
+
+ pmaddwd mm3, mm0 ; x4*w06+x0*w04 x4*w02+x0*w00
+ movq mm6, mm2 ; 6 ; x7 x3 x6 x2
+
+ movq mm1, mptr [TABLE+32] ; 1 ; w22 w20 w18 w16
+ punpckldq mm2, mm2 ; x6 x2 x6 x2
+
+ pmaddwd mm4, mm2 ; x6*w07+x2*w05 x6*w03+x2*w01
+ punpckhdq mm5, mm5 ; x5 x1 x5 x1
+
+ pmaddwd mm0, mptr [TABLE+16] ; x4*w14+x0*w12 x4*w10+x0*w08
+ punpckhdq mm6, mm6 ; x7 x3 x7 x3
+
+ movq mm7, mptr [TABLE+40] ; 7 ; w23 w21 w19 w17
+ pmaddwd mm1, mm5 ; x5*w22+x1*w20 x5*w18+x1*w16
+
+ paddd mm3, mptr [ROUNDER] ; +rounder
+ pmaddwd mm7, mm6 ; x7*w23+x3*w21 x7*w19+x3*w17
+
+ pmaddwd mm2, mptr [TABLE+24] ; x6*w15+x2*w13 x6*w11+x2*w09
+ paddd mm3, mm4 ; 4 ; a1=sum(even1) a0=sum(even0)
+
+ pmaddwd mm5, mptr [TABLE+48] ; x5*w30+x1*w28 x5*w26+x1*w24
+ movq mm4, mm3 ; 4 ; a1 a0
+
+ pmaddwd mm6, mptr [TABLE+56] ; x7*w31+x3*w29 x7*w27+x3*w25
+ paddd mm1, mm7 ; 7 ; b1=sum(odd1) b0=sum(odd0)
+
+ paddd mm0, mptr [ROUNDER] ; +rounder
+ psubd mm3, mm1 ; a1-b1 a0-b0
+
+ psrad mm3, SHIFT_INV_ROW ; y6=a1-b1 y7=a0-b0
+ paddd mm1, mm4 ; 4 ; a1+b1 a0+b0
+
+ paddd mm0, mm2 ; 2 ; a3=sum(even3) a2=sum(even2)
+ psrad mm1, SHIFT_INV_ROW ; y1=a1+b1 y0=a0+b0
+
+ paddd mm5, mm6 ; 6 ; b3=sum(odd3) b2=sum(odd2)
+ movq mm4, mm0 ; 4 ; a3 a2
+
+ paddd mm0, mm5 ; a3+b3 a2+b2
+ psubd mm4, mm5 ; 5 ; a3-b3 a2-b2
+
+ psrad mm0, SHIFT_INV_ROW ; y3=a3+b3 y2=a2+b2
+ psrad mm4, SHIFT_INV_ROW ; y4=a3-b3 y5=a2-b2
+
+ packssdw mm1, mm0 ; 0 ; y3 y2 y1 y0
+ packssdw mm4, mm3 ; 3 ; y6 y7 y4 y5
+
+ movq mm7, mm4 ; 7 ; y6 y7 y4 y5
+ psrld mm4, 16 ; 0 y6 0 y4
+
+ pslld mm7, 16 ; y7 0 y5 0
+ movq mptr [OUT], mm1 ; 1 ; save y3 y2 y1 y0
+
+ por mm7, mm4 ; 4 ; y7 y6 y5 y4
+ movq mptr [OUT+8], mm7 ; 7 ; save y7 y6 y5 y4
+ENDM
+
+;=============================================================================
+; code for Pentium III
+;=============================================================================
+
+; Table for rows 0,4 - constants are multiplied by cos_4_16
+
+tab_i_04_s sword 16384, 21407, 16384, 8867 ; movq-> w05 w04 w01 w00
+ sword 16384, 8867, -16384, -21407 ; w07 w06 w03 w02
+ sword 16384, -8867, 16384, -21407 ; w13 w12 w09 w08
+ sword -16384, 21407, 16384, -8867 ; w15 w14 w11 w10
+ sword 22725, 19266, 19266, -4520 ; w21 w20 w17 w16
+ sword 12873, 4520, -22725, -12873 ; w23 w22 w19 w18
+ sword 12873, -22725, 4520, -12873 ; w29 w28 w25 w24
+ sword 4520, 19266, 19266, -22725 ; w31 w30 w27 w26
+
+; Table for rows 1,7 - constants are multiplied by cos_1_16
+
+tab_i_17_s sword 22725, 29692, 22725, 12299 ; movq-> w05 w04 w01 w00
+ sword 22725, 12299, -22725, -29692 ; w07 w06 w03 w02
+ sword 22725, -12299, 22725, -29692 ; w13 w12 w09 w08
+ sword -22725, 29692, 22725, -12299 ; w15 w14 w11 w10
+ sword 31521, 26722, 26722, -6270 ; w21 w20 w17 w16
+ sword 17855, 6270, -31521, -17855 ; w23 w22 w19 w18
+ sword 17855, -31521, 6270, -17855 ; w29 w28 w25 w24
+ sword 6270, 26722, 26722, -31521 ; w31 w30 w27 w26
+
+; Table for rows 2,6 - constants are multiplied by cos_2_16
+
+tab_i_26_s sword 21407, 27969, 21407, 11585 ; movq-> w05 w04 w01 w00
+ sword 21407, 11585, -21407, -27969 ; w07 w06 w03 w02
+ sword 21407, -11585, 21407, -27969 ; w13 w12 w09 w08
+ sword -21407, 27969, 21407, -11585 ; w15 w14 w11 w10
+ sword 29692, 25172, 25172, -5906 ; w21 w20 w17 w16
+ sword 16819, 5906, -29692, -16819 ; w23 w22 w19 w18
+ sword 16819, -29692, 5906, -16819 ; w29 w28 w25 w24
+ sword 5906, 25172, 25172, -29692 ; w31 w30 w27 w26
+
+; Table for rows 3,5 - constants are multiplied by cos_3_16
+
+tab_i_35_s sword 19266, 25172, 19266, 10426 ; movq-> w05 w04 w01 w00
+ sword 19266, 10426, -19266, -25172 ; w07 w06 w03 w02
+ sword 19266, -10426, 19266, -25172 ; w13 w12 w09 w08
+ sword -19266, 25172, 19266, -10426 ; w15 w14 w11 w10
+ sword 26722, 22654, 22654, -5315 ; w21 w20 w17 w16
+ sword 15137, 5315, -26722, -15137 ; w23 w22 w19 w18
+ sword 15137, -26722, 5315, -15137 ; w29 w28 w25 w24
+ sword 5315, 22654, 22654, -26722 ; w31 w30 w27 w26
+
+;-----------------------------------------------------------------------------
+
+DCT_8_INV_ROW_1_s MACRO INP:REQ, OUT:REQ, TABLE:REQ, ROUNDER:REQ
+
+ movq mm0, mptr [INP] ; 0 ; x3 x2 x1 x0
+
+ movq mm1, mptr [INP+8] ; 1 ; x7 x6 x5 x4
+ movq mm2, mm0 ; 2 ; x3 x2 x1 x0
+
+ movq mm3, mptr [TABLE] ; 3 ; w05 w04 w01 w00
+ pshufw mm0, mm0, 10001000b ; x2 x0 x2 x0
+
+ movq mm4, mptr [TABLE+8] ; 4 ; w07 w06 w03 w02
+ movq mm5, mm1 ; 5 ; x7 x6 x5 x4
+ pmaddwd mm3, mm0 ; x2*w05+x0*w04 x2*w01+x0*w00
+
+ movq mm6, mptr [TABLE+32] ; 6 ; w21 w20 w17 w16
+ pshufw mm1, mm1, 10001000b ; x6 x4 x6 x4
+ pmaddwd mm4, mm1 ; x6*w07+x4*w06 x6*w03+x4*w02
+
+ movq mm7, mptr [TABLE+40] ; 7 ; w23 w22 w19 w18
+ pshufw mm2, mm2, 11011101b ; x3 x1 x3 x1
+ pmaddwd mm6, mm2 ; x3*w21+x1*w20 x3*w17+x1*w16
+
+ pshufw mm5, mm5, 11011101b ; x7 x5 x7 x5
+ pmaddwd mm7, mm5 ; x7*w23+x5*w22 x7*w19+x5*w18
+
+ paddd mm3, mptr [ROUNDER] ; +rounder
+
+ pmaddwd mm0, mptr [TABLE+16] ; x2*w13+x0*w12 x2*w09+x0*w08
+ paddd mm3, mm4 ; 4 ; a1=sum(even1) a0=sum(even0)
+
+ pmaddwd mm1, mptr [TABLE+24] ; x6*w15+x4*w14 x6*w11+x4*w10
+ movq mm4, mm3 ; 4 ; a1 a0
+
+ pmaddwd mm2, mptr [TABLE+48] ; x3*w29+x1*w28 x3*w25+x1*w24
+ paddd mm6, mm7 ; 7 ; b1=sum(odd1) b0=sum(odd0)
+
+ pmaddwd mm5, mptr [TABLE+56] ; x7*w31+x5*w30 x7*w27+x5*w26
+ paddd mm3, mm6 ; a1+b1 a0+b0
+
+ paddd mm0, mptr [ROUNDER] ; +rounder
+ psrad mm3, SHIFT_INV_ROW ; y1=a1+b1 y0=a0+b0
+
+ paddd mm0, mm1 ; 1 ; a3=sum(even3) a2=sum(even2)
+ psubd mm4, mm6 ; 6 ; a1-b1 a0-b0
+
+ movq mm7, mm0 ; 7 ; a3 a2
+ paddd mm2, mm5 ; 5 ; b3=sum(odd3) b2=sum(odd2)
+
+ paddd mm0, mm2 ; a3+b3 a2+b2
+ psrad mm4, SHIFT_INV_ROW ; y6=a1-b1 y7=a0-b0
+
+ psubd mm7, mm2 ; 2 ; a3-b3 a2-b2
+ psrad mm0, SHIFT_INV_ROW ; y3=a3+b3 y2=a2+b2
+
+ psrad mm7, SHIFT_INV_ROW ; y4=a3-b3 y5=a2-b2
+
+ packssdw mm3, mm0 ; 0 ; y3 y2 y1 y0
+
+ packssdw mm7, mm4 ; 4 ; y6 y7 y4 y5
+
+ movq mptr [OUT], mm3 ; 3 ; save y3 y2 y1 y0
+ pshufw mm7, mm7, 10110001b ; y7 y6 y5 y4
+
+ movq mptr [OUT+8], mm7 ; 7 ; save y7 y6 y5 y4
+ENDM
+
+;=============================================================================
+;
+;=============================================================================
+
+;=============================================================================
+;
+; The first stage DCT 8x8 - forward DCTs of columns
+;
+; The outputs are multiplied
+; for rows 0,4 - on cos_4_16,
+; for rows 1,7 - on cos_1_16,
+; for rows 2,6 - on cos_2_16,
+; for rows 3,5 - on cos_3_16
+; and are shifted to the left for rise of accuracy
+;
+;-----------------------------------------------------------------------------
+;
+; The 8-point scaled forward DCT algorithm (26a8m)
+;
+;-----------------------------------------------------------------------------
+;
+; #define DCT_8_FRW_COL(x, y)
+;{
+; short t0, t1, t2, t3, t4, t5, t6, t7;
+; short tp03, tm03, tp12, tm12, tp65, tm65;
+; short tp465, tm465, tp765, tm765;
+;
+; t0 = LEFT_SHIFT ( x[0] + x[7] );
+; t1 = LEFT_SHIFT ( x[1] + x[6] );
+; t2 = LEFT_SHIFT ( x[2] + x[5] );
+; t3 = LEFT_SHIFT ( x[3] + x[4] );
+; t4 = LEFT_SHIFT ( x[3] - x[4] );
+; t5 = LEFT_SHIFT ( x[2] - x[5] );
+; t6 = LEFT_SHIFT ( x[1] - x[6] );
+; t7 = LEFT_SHIFT ( x[0] - x[7] );
+;
+; tp03 = t0 + t3;
+; tm03 = t0 - t3;
+; tp12 = t1 + t2;
+; tm12 = t1 - t2;
+;
+; y[0] = tp03 + tp12;
+; y[4] = tp03 - tp12;
+;
+; y[2] = tm03 + tm12 * tg_2_16;
+; y[6] = tm03 * tg_2_16 - tm12;
+;
+; tp65 =(t6 +t5 )*cos_4_16;
+; tm65 =(t6 -t5 )*cos_4_16;
+;
+; tp765 = t7 + tp65;
+; tm765 = t7 - tp65;
+; tp465 = t4 + tm65;
+; tm465 = t4 - tm65;
+;
+; y[1] = tp765 + tp465 * tg_1_16;
+; y[7] = tp765 * tg_1_16 - tp465;
+; y[5] = tm765 * tg_3_16 + tm465;
+; y[3] = tm765 - tm465 * tg_3_16;
+;}
+;
+;=============================================================================
+DCT_8_FRW_COL_4 MACRO INP:REQ, OUT:REQ
+LOCAL x0, x1, x2, x3, x4, x5, x6, x7
+LOCAL y0, y1, y2, y3, y4, y5, y6, y7
+x0 equ [INP + 0*16]
+x1 equ [INP + 1*16]
+x2 equ [INP + 2*16]
+x3 equ [INP + 3*16]
+x4 equ [INP + 4*16]
+x5 equ [INP + 5*16]
+x6 equ [INP + 6*16]
+x7 equ [INP + 7*16]
+y0 equ [OUT + 0*16]
+y1 equ [OUT + 1*16]
+y2 equ [OUT + 2*16]
+y3 equ [OUT + 3*16]
+y4 equ [OUT + 4*16]
+y5 equ [OUT + 5*16]
+y6 equ [OUT + 6*16]
+y7 equ [OUT + 7*16]
+movq mm0, x1 ; 0 ; x1
+movq mm1, x6 ; 1 ; x6
+movq mm2, mm0 ; 2 ; x1
+movq mm3, x2 ; 3 ; x2
+paddsw mm0, mm1 ; t1 = x[1] + x[6]
+movq mm4, x5 ; 4 ; x5
+psllw mm0, SHIFT_FRW_COL ; t1
+movq mm5, x0 ; 5 ; x0
+paddsw mm4, mm3 ; t2 = x[2] + x[5]
+paddsw mm5, x7 ; t0 = x[0] + x[7]
+psllw mm4, SHIFT_FRW_COL ; t2
+movq mm6, mm0 ; 6 ; t1
+psubsw mm2, mm1 ; 1 ; t6 = x[1] - x[6]
+movq mm1, mptr tg_2_16 ; 1 ; tg_2_16
+psubsw mm0, mm4 ; tm12 = t1 - t2
+movq mm7, x3 ; 7 ; x3
+pmulhw mm1, mm0 ; tm12*tg_2_16
+paddsw mm7, x4 ; t3 = x[3] + x[4]
+psllw mm5, SHIFT_FRW_COL ; t0
+paddsw mm6, mm4 ; 4 ; tp12 = t1 + t2
+psllw mm7, SHIFT_FRW_COL ; t3
+movq mm4, mm5 ; 4 ; t0
+psubsw mm5, mm7 ; tm03 = t0 - t3
+paddsw mm1, mm5 ; y2 = tm03 + tm12*tg_2_16
+paddsw mm4, mm7 ; 7 ; tp03 = t0 + t3
+por mm1, mptr one_corr ; correction y2 +0.5
+psllw mm2, SHIFT_FRW_COL+1 ; t6
+pmulhw mm5, mptr tg_2_16 ; tm03*tg_2_16
+movq mm7, mm4 ; 7 ; tp03
+psubsw mm3, x5 ; t5 = x[2] - x[5]
+psubsw mm4, mm6 ; y4 = tp03 - tp12
+movq y2, mm1 ; 1 ; save y2
+paddsw mm7, mm6 ; 6 ; y0 = tp03 + tp12
+movq mm1, x3 ; 1 ; x3
+psllw mm3, SHIFT_FRW_COL+1 ; t5
+psubsw mm1, x4 ; t4 = x[3] - x[4]
+movq mm6, mm2 ; 6 ; t6
+movq y4, mm4 ; 4 ; save y4
+paddsw mm2, mm3 ; t6 + t5
+pmulhw mm2, mptr ocos_4_16 ; tp65 = (t6 + t5)*cos_4_16
+psubsw mm6, mm3 ; 3 ; t6 - t5
+pmulhw mm6, mptr ocos_4_16 ; tm65 = (t6 - t5)*cos_4_16
+psubsw mm5, mm0 ; 0 ; y6 = tm03*tg_2_16 - tm12
+por mm5, mptr one_corr ; correction y6 +0.5
+psllw mm1, SHIFT_FRW_COL ; t4
+por mm2, mptr one_corr ; correction tp65 +0.5
+movq mm4, mm1 ; 4 ; t4
+movq mm3, x0 ; 3 ; x0
+paddsw mm1, mm6 ; tp465 = t4 + tm65
+psubsw mm3, x7 ; t7 = x[0] - x[7]
+psubsw mm4, mm6 ; 6 ; tm465 = t4 - tm65
+movq mm0, mptr tg_1_16 ; 0 ; tg_1_16
+psllw mm3, SHIFT_FRW_COL ; t7
+movq mm6, mptr tg_3_16 ; 6 ; tg_3_16
+pmulhw mm0, mm1 ; tp465*tg_1_16
+movq y0, mm7 ; 7 ; save y0
+pmulhw mm6, mm4 ; tm465*tg_3_16
+movq y6, mm5 ; 5 ; save y6
+movq mm7, mm3 ; 7 ; t7
+movq mm5, mptr tg_3_16 ; 5 ; tg_3_16
+psubsw mm7, mm2 ; tm765 = t7 - tp65
+paddsw mm3, mm2 ; 2 ; tp765 = t7 + tp65
+pmulhw mm5, mm7 ; tm765*tg_3_16
+paddsw mm0, mm3 ; y1 = tp765 + tp465*tg_1_16
+paddsw mm6, mm4 ; tm465*tg_3_16
+pmulhw mm3, mptr tg_1_16 ; tp765*tg_1_16
+por mm0, mptr one_corr ; correction y1 +0.5
+paddsw mm5, mm7 ; tm765*tg_3_16
+psubsw mm7, mm6 ; 6 ; y3 = tm765 - tm465*tg_3_16
+movq y1, mm0 ; 0 ; save y1
+paddsw mm5, mm4 ; 4 ; y5 = tm765*tg_3_16 + tm465
+movq y3, mm7 ; 7 ; save y3
+psubsw mm3, mm1 ; 1 ; y7 = tp765*tg_1_16 - tp465
+movq y5, mm5 ; 5 ; save y5
+movq y7, mm3 ; 3 ; save y7
+ENDM
+
+DCT_8_INV_COL_4 MACRO INP:REQ, OUT:REQ
+ movq mm0, qword ptr tg_3_16
+
+ movq mm3, qword ptr [INP+16*3]
+ movq mm1, mm0 ; tg_3_16
+
+ movq mm5, qword ptr [INP+16*5]
+ pmulhw mm0, mm3 ; x3*(tg_3_16-1)
+
+ movq mm4, qword ptr tg_1_16
+ pmulhw mm1, mm5 ; x5*(tg_3_16-1)
+
+ movq mm7, qword ptr [INP+16*7]
+ movq mm2, mm4 ; tg_1_16
+
+ movq mm6, qword ptr [INP+16*1]
+ pmulhw mm4, mm7 ; x7*tg_1_16
+
+ paddsw mm0, mm3 ; x3*tg_3_16
+ pmulhw mm2, mm6 ; x1*tg_1_16
+
+ paddsw mm1, mm3 ; x3+x5*(tg_3_16-1)
+ psubsw mm0, mm5 ; x3*tg_3_16-x5 = tm35
+
+ movq mm3, qword ptr ocos_4_16
+ paddsw mm1, mm5 ; x3+x5*tg_3_16 = tp35
+
+ paddsw mm4, mm6 ; x1+tg_1_16*x7 = tp17
+ psubsw mm2, mm7 ; x1*tg_1_16-x7 = tm17
+
+ movq mm5, mm4 ; tp17
+ movq mm6, mm2 ; tm17
+
+ paddsw mm5, mm1 ; tp17+tp35 = b0
+ psubsw mm6, mm0 ; tm17-tm35 = b3
+
+ psubsw mm4, mm1 ; tp17-tp35 = t1
+ paddsw mm2, mm0 ; tm17+tm35 = t2
+
+ movq mm7, qword ptr tg_2_16
+ movq mm1, mm4 ; t1
+
+; movq qword ptr [SCRATCH+0], mm5 ; save b0
+ movq qword ptr [OUT+3*16], mm5 ; save b0
+ paddsw mm1, mm2 ; t1+t2
+
+; movq qword ptr [SCRATCH+8], mm6 ; save b3
+ movq qword ptr [OUT+5*16], mm6 ; save b3
+ psubsw mm4, mm2 ; t1-t2
+
+ movq mm5, qword ptr [INP+2*16]
+ movq mm0, mm7 ; tg_2_16
+
+ movq mm6, qword ptr [INP+6*16]
+ pmulhw mm0, mm5 ; x2*tg_2_16
+
+ pmulhw mm7, mm6 ; x6*tg_2_16
+; slot
+ pmulhw mm1, mm3 ; ocos_4_16*(t1+t2) = b1/2
+; slot
+ movq mm2, qword ptr [INP+0*16]
+ pmulhw mm4, mm3 ; ocos_4_16*(t1-t2) = b2/2
+
+ psubsw mm0, mm6 ; t2*tg_2_16-x6 = tm26
+ movq mm3, mm2 ; x0
+
+ movq mm6, qword ptr [INP+4*16]
+ paddsw mm7, mm5 ; x2+x6*tg_2_16 = tp26
+
+ paddsw mm2, mm6 ; x0+x4 = tp04
+ psubsw mm3, mm6 ; x0-x4 = tm04
+
+ movq mm5, mm2 ; tp04
+ movq mm6, mm3 ; tm04
+
+ psubsw mm2, mm7 ; tp04-tp26 = a3
+ paddsw mm3, mm0 ; tm04+tm26 = a1
+
+ paddsw mm1, mm1 ; b1
+ paddsw mm4, mm4 ; b2
+
+ paddsw mm5, mm7 ; tp04+tp26 = a0
+ psubsw mm6, mm0 ; tm04-tm26 = a2
+
+ movq mm7, mm3 ; a1
+ movq mm0, mm6 ; a2
+
+ paddsw mm3, mm1 ; a1+b1
+ paddsw mm6, mm4 ; a2+b2
+
+ psraw mm3, SHIFT_INV_COL ; dst1
+ psubsw mm7, mm1 ; a1-b1
+
+ psraw mm6, SHIFT_INV_COL ; dst2
+ psubsw mm0, mm4 ; a2-b2
+
+; movq mm1, qword ptr [SCRATCH+0] ; load b0
+ movq mm1, qword ptr [OUT+3*16] ; load b0
+ psraw mm7, SHIFT_INV_COL ; dst6
+
+ movq mm4, mm5 ; a0
+ psraw mm0, SHIFT_INV_COL ; dst5
+
+ movq qword ptr [OUT+1*16], mm3
+ paddsw mm5, mm1 ; a0+b0
+
+ movq qword ptr [OUT+2*16], mm6
+ psubsw mm4, mm1 ; a0-b0
+
+; movq mm3, qword ptr [SCRATCH+8] ; load b3
+ movq mm3, qword ptr [OUT+5*16] ; load b3
+ psraw mm5, SHIFT_INV_COL ; dst0
+
+ movq mm6, mm2 ; a3
+ psraw mm4, SHIFT_INV_COL ; dst7
+
+ movq qword ptr [OUT+5*16], mm0
+ paddsw mm2, mm3 ; a3+b3
+
+ movq qword ptr [OUT+6*16], mm7
+ psubsw mm6, mm3 ; a3-b3
+
+ movq qword ptr [OUT+0*16], mm5
+ psraw mm2, SHIFT_INV_COL ; dst3
+
+ movq qword ptr [OUT+7*16], mm4
+ psraw mm6, SHIFT_INV_COL ; dst4
+
+ movq qword ptr [OUT+3*16], mm2
+
+ movq qword ptr [OUT+4*16], mm6
+ENDM
+
+_TEXT SEGMENT PARA PUBLIC USE32 'CODE'
+
+;
+; extern "C" __fastcall void idct8x8_mmx (short *src_result);
+;
+public @MMX_IDCT@4
+
+@MMX_IDCT@4 proc near
+ mov eax, ecx ; source
+
+ DCT_8_INV_ROW_1 [eax+0], [eax+0], tab_i_04, rounder_0
+ DCT_8_INV_ROW_1 [eax+16], [eax+16], tab_i_17, rounder_1
+ DCT_8_INV_ROW_1 [eax+32], [eax+32], tab_i_26, rounder_2
+ DCT_8_INV_ROW_1 [eax+48], [eax+48], tab_i_35, rounder_3
+ DCT_8_INV_ROW_1 [eax+64], [eax+64], tab_i_04, rounder_4
+ DCT_8_INV_ROW_1 [eax+80], [eax+80], tab_i_35, rounder_5
+ DCT_8_INV_ROW_1 [eax+96], [eax+96], tab_i_26, rounder_6
+ DCT_8_INV_ROW_1 [eax+112], [eax+112], tab_i_17, rounder_7
+
+ DCT_8_INV_COL_4 [eax+0],[eax+0]
+ DCT_8_INV_COL_4 [eax+8],[eax+8]
+
+ ret
+
+@MMX_IDCT@4 ENDP
+
+_TEXT ENDS
+
+_TEXT SEGMENT PARA PUBLIC USE32 'CODE'
+
+;
+; extern "C" __fastcall void idct8x8_sse (short *src_result);
+;
+public @SSEMMX_IDCT@4
+
+@SSEMMX_IDCT@4 proc near
+ mov eax, ecx ; source
+
+ DCT_8_INV_ROW_1_s [eax+0], [eax+0], tab_i_04_s, rounder_0
+ DCT_8_INV_ROW_1_s [eax+16], [eax+16], tab_i_17_s, rounder_1
+ DCT_8_INV_ROW_1_s [eax+32], [eax+32], tab_i_26_s, rounder_2
+ DCT_8_INV_ROW_1_s [eax+48], [eax+48], tab_i_35_s, rounder_3
+ DCT_8_INV_ROW_1_s [eax+64], [eax+64], tab_i_04_s, rounder_4
+ DCT_8_INV_ROW_1_s [eax+80], [eax+80], tab_i_35_s, rounder_5
+ DCT_8_INV_ROW_1_s [eax+96], [eax+96], tab_i_26_s, rounder_6
+ DCT_8_INV_ROW_1_s [eax+112], [eax+112], tab_i_17_s, rounder_7
+
+ DCT_8_INV_COL_4 [eax+0],[eax+0]
+ DCT_8_INV_COL_4 [eax+8],[eax+8]
+
+ ret
+
+@SSEMMX_IDCT@4 ENDP
+
+_TEXT ENDS
+
+END \ No newline at end of file
diff --git a/src/filters/source/D2VSource/idctref.cpp b/src/filters/source/D2VSource/idctref.cpp
new file mode 100644
index 000000000..74b695ce1
--- /dev/null
+++ b/src/filters/source/D2VSource/idctref.cpp
@@ -0,0 +1,362 @@
+#include "stdafx.h"
+
+/* idctref_miha.c, Inverse Discrete Fourier Transform, double precision */
+
+/*************************************************************/
+/* */
+/* x87 hand-optimized assembly by Miha Peternel */
+/* 27.11. - 20.1.2001 */
+/* */
+/* You are free to use this code in your project if: */
+/* - no changes are made to this message */
+/* - any changes to this code are publicly available */
+/* - your project documentation contains the following text: */
+/* "This software contains fast high-quality IDCT decoder */
+/* by Miha Peternel." */
+/* */
+/*************************************************************/
+
+/* Perform IEEE 1180 reference (64-bit floating point, separable 8x1
+ * direct matrix multiply) Inverse Discrete Cosine Transform
+*/
+
+#define ModelX 123 // enable C-level optimizations by Miha Peternel
+
+/* Here we use math.h to generate constants. Compiler results may
+ vary a little */
+
+#include <math.h>
+
+#define M_PI 3.1415926535897932384626433832795
+const static double HALF = 0.5;
+
+/* private data */
+static short iclip[1024+1024]; /* clipping table */
+static short *iclp;
+
+/* cosine transform matrix for 8x1 IDCT */
+static double c[8][8];
+
+/* initialize DCT coefficient matrix */
+void Initialize_REF_IDCT()
+{
+ int freq, time, i;
+ double scale;
+
+ for (freq=0; freq < 8; freq++)
+ {
+ scale = (freq == 0) ? sqrt(0.125) : 0.5;
+ for (time=0; time<8; time++)
+ c[freq][time] = scale*cos((M_PI/8.0)*freq*(time + 0.5));
+ }
+
+#ifdef ModelX
+ iclp = iclip+1024;
+ for (i= -1024; i<1024; i++)
+ iclp[i] = (i<-256) ? -256 : ((i>255) ? 255 : i);
+#endif
+}
+
+void REF_IDCT(short *block)
+{
+ double tmp[64];
+ double rnd[64];
+ int int0, int1, int2, int3, int4, int5, int6, int7;
+ unsigned short fpold;
+ unsigned short fpnew;
+
+ int *b = (int *) block;
+
+ if( !(b[0]|(b[31]&~0x10000)) )
+ {
+ if( b[ 1]|b[ 2]|b[ 3]|b[ 4]|b[ 5]|b[ 6] )
+ goto normal;
+ if( b[ 7]|b[ 8]|b[ 9]|b[10]|b[11]|b[12] )
+ goto normal;
+ if( b[13]|b[14]|b[15]|b[16]|b[17]|b[18] )
+ goto normal;
+ if( b[19]|b[20]|b[21]|b[22]|b[23]|b[24] )
+ goto normal;
+ if( b[25]|b[26]|b[27]|b[28]|b[29]|b[30] )
+ goto normal;
+ b[31]=0;
+ return;
+ }
+normal:
+
+ __asm
+ {
+ // do the IDCT
+ mov esi,[block]
+ lea eax,[c]
+ lea edi,[tmp]
+ //mov ebx,8
+ mov ebx,8 // 0x77000000 // 8
+ align 16
+ __col1:
+ movzx edx,[esi+1*2]
+ mov ecx,[esi+2*2]
+ or edx,[esi+4*2]
+ or ecx,[esi+6*2]
+ or edx,ecx
+ //mov ecx,8
+ mov ecx,8/2 // 0x77000000 // 8
+
+ jnz __row1
+ fild word ptr [esi+0*2]
+ fmul qword ptr [eax+0*8*8]
+ fst qword ptr [edi+0*8]
+ fst qword ptr [edi+1*8]
+ fst qword ptr [edi+2*8]
+ fst qword ptr [edi+3*8]
+ fst qword ptr [edi+4*8]
+ fst qword ptr [edi+5*8]
+ fst qword ptr [edi+6*8]
+ fstp qword ptr [edi+7*8]
+ add edi,8*8
+ jmp __next1
+ align 16
+ __row1:
+ fild word ptr [esi+0*2]
+ fmul qword ptr [eax+0*8*8]
+ fild word ptr [esi+1*2]
+ fmul qword ptr [eax+1*8*8]
+ fadd
+ fild word ptr [esi+2*2]
+ fmul qword ptr [eax+2*8*8]
+ fadd
+ fild word ptr [esi+3*2]
+ fmul qword ptr [eax+3*8*8]
+ fadd
+ fild word ptr [esi+4*2]
+ fmul qword ptr [eax+4*8*8]
+ fadd
+ fild word ptr [esi+5*2]
+ fmul qword ptr [eax+5*8*8]
+ fadd
+ fild word ptr [esi+6*2]
+ fmul qword ptr [eax+6*8*8]
+ fadd
+ fild word ptr [esi+7*2]
+ fmul qword ptr [eax+7*8*8]
+ fadd
+
+ fild word ptr [esi+0*2]
+ fmul qword ptr [eax+0*8*8+8]
+ fild word ptr [esi+1*2]
+ fmul qword ptr [eax+1*8*8+8]
+ fadd
+ fild word ptr [esi+2*2]
+ fmul qword ptr [eax+2*8*8+8]
+ fadd
+ fild word ptr [esi+3*2]
+ fmul qword ptr [eax+3*8*8+8]
+ fadd
+ fild word ptr [esi+4*2]
+ fmul qword ptr [eax+4*8*8+8]
+ fadd
+ fild word ptr [esi+5*2]
+ fmul qword ptr [eax+5*8*8+8]
+ fadd
+ fild word ptr [esi+6*2]
+ fmul qword ptr [eax+6*8*8+8]
+ fadd
+ fild word ptr [esi+7*2]
+ fmul qword ptr [eax+7*8*8+8]
+ fadd
+ add eax,8*2
+ fxch st(1)
+ fstp qword ptr [edi]//
+ fstp qword ptr [edi+8]
+ add edi,8*2
+ dec ecx
+
+ jnz __row1
+ add eax,-8*8
+ //align 16
+ __next1:
+ add esi,+8*2
+
+ sub ebx,0x80000001 // add ebx,ebx
+ js __col1
+ //align 16
+ test ebx,ebx // align jump &| redo flags
+ jnz __col1
+
+ lea esi,[tmp]
+ lea eax,[c]
+ lea edi,[rnd]
+ //mov edi,[block]
+ fld qword ptr [HALF]
+ mov ebx,8
+ __row2:
+ mov ecx,8/2
+ align 16
+ __col2:
+ fld qword ptr [esi+0*8*8]
+ fmul qword ptr [eax+0*8*8]
+ fld qword ptr [esi+1*8*8]
+ fmul qword ptr [eax+1*8*8]
+ fadd
+ fld qword ptr [esi+2*8*8]
+ fmul qword ptr [eax+2*8*8]
+ fadd
+ fld qword ptr [esi+3*8*8]
+ fmul qword ptr [eax+3*8*8]
+ fadd
+ fld qword ptr [esi+4*8*8]
+ fmul qword ptr [eax+4*8*8]
+ fadd
+ fld qword ptr [esi+5*8*8]
+ fmul qword ptr [eax+5*8*8]
+ fadd
+ fld qword ptr [esi+6*8*8]
+ fmul qword ptr [eax+6*8*8]
+ fadd
+ fld qword ptr [esi+7*8*8]
+ fmul qword ptr [eax+7*8*8]
+ fadd
+ fadd st(0),st(1)
+
+ fxch st(1)
+
+ fld qword ptr [esi+0*8*8]
+ fmul qword ptr [eax+0*8*8+8]
+ fld qword ptr [esi+1*8*8]
+ fmul qword ptr [eax+1*8*8+8]
+ fadd
+ fld qword ptr [esi+2*8*8]
+ fmul qword ptr [eax+2*8*8+8]
+ fadd
+ fld qword ptr [esi+3*8*8]
+ fmul qword ptr [eax+3*8*8+8]
+ fadd
+ fld qword ptr [esi+4*8*8]
+ fmul qword ptr [eax+4*8*8+8]
+ fadd
+ fld qword ptr [esi+5*8*8]
+ fmul qword ptr [eax+5*8*8+8]
+ fadd
+ fld qword ptr [esi+6*8*8]
+ fmul qword ptr [eax+6*8*8+8]
+ fadd
+ fld qword ptr [esi+7*8*8]
+ fmul qword ptr [eax+7*8*8+8]
+ fadd
+ fadd st(0),st(1)
+ add eax,8*2
+
+ fxch st(2)
+ fstp qword ptr [edi]
+ fxch st(1)
+ fstp qword ptr [edi+8*8]
+ add edi,8*8*2
+
+ dec ecx
+
+ jnz __col2
+ add eax,-8*8
+ add esi,+8
+ add edi,8-8*8*8
+
+ sub ebx,0x80000001
+ js __row2
+ //align 16
+ test ebx,ebx // align jump &| redo flags
+ jnz __row2
+ ffree st(0) // bye bye 0.5
+
+ // set x87 to floor mode
+ fstcw [fpold]
+ movzx eax, [fpold]
+
+ or eax, 0x0400 // round down - floor
+ mov [fpnew], ax
+ fldcw [fpnew]
+
+ // now floor the damn array
+ lea esi, [rnd]
+ mov edi, [block]
+ mov ebx, -256 // clip min
+ mov edx, +255 // clip max
+ mov ecx, 8
+ align 16
+ __floor:
+ fld qword ptr [esi+0*8]
+ fistp dword ptr [int0]
+ mov eax,[int0]
+ cmp eax,ebx
+ cmovl eax,ebx
+ cmp eax,edx
+ cmovg eax,edx
+ fld qword ptr [esi+1*8]
+ fistp dword ptr [int1]
+ mov word ptr [edi+0*2],ax
+ mov eax,[int1]
+ cmp eax,ebx
+ cmovl eax,ebx
+ cmp eax,edx
+ cmovg eax,edx
+ fld qword ptr [esi+2*8]
+ fistp dword ptr [int2]
+ mov word ptr [edi+1*2],ax
+ mov eax,[int2]
+ cmp eax,ebx
+ cmovl eax,ebx
+ cmp eax,edx
+ cmovg eax,edx
+ fld qword ptr [esi+3*8]
+ fistp dword ptr [int3]
+ mov word ptr [edi+2*2],ax
+ mov eax,[int3]
+ cmp eax,ebx
+ cmovl eax,ebx
+ cmp eax,edx
+ cmovg eax,edx
+ fld qword ptr [esi+4*8]
+ fistp dword ptr [int4]
+ mov word ptr [edi+3*2],ax
+ mov eax,[int4]
+ cmp eax,ebx
+ cmovl eax,ebx
+ cmp eax,edx
+ cmovg eax,edx
+ fld qword ptr [esi+5*8]
+ fistp dword ptr [int5]
+ mov word ptr [edi+4*2],ax
+ mov eax,[int5]
+ cmp eax,ebx
+ cmovl eax,ebx
+ cmp eax,edx
+ cmovg eax,edx
+ fld qword ptr [esi+6*8]
+ fistp dword ptr [int6]
+ mov word ptr [edi+5*2],ax
+ mov eax,[int6]
+ cmp eax,ebx
+ cmovl eax,ebx
+ cmp eax,edx
+ cmovg eax,edx
+ fld qword ptr [esi+7*8]
+ fistp dword ptr [int7]
+ mov word ptr [edi+6*2],ax
+ mov eax,[int7]
+ cmp eax,ebx
+ cmovl eax,ebx
+ cmp eax,edx
+ cmovg eax,edx
+ mov word ptr [edi+7*2],ax
+
+ add esi, 8*8
+ add edi, 8*2
+
+ sub ecx,0x80000001
+ js __floor
+ //align 16
+ test ecx,ecx // align jump &| redo flags
+ jnz __floor
+
+ // set x87 to default mode
+ fldcw [fpold]
+ };
+}
diff --git a/src/filters/source/D2VSource/resource.h b/src/filters/source/D2VSource/resource.h
new file mode 100644
index 000000000..1d1658441
--- /dev/null
+++ b/src/filters/source/D2VSource/resource.h
@@ -0,0 +1,14 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by d2vsource.rc
+
+// Next default values for new objects
+//
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE 101
+#define _APS_NEXT_COMMAND_VALUE 40001
+#define _APS_NEXT_CONTROL_VALUE 1001
+#define _APS_NEXT_SYMED_VALUE 101
+#endif
+#endif
diff --git a/src/filters/source/D2VSource/stdafx.cpp b/src/filters/source/D2VSource/stdafx.cpp
new file mode 100644
index 000000000..0d549c051
--- /dev/null
+++ b/src/filters/source/D2VSource/stdafx.cpp
@@ -0,0 +1,8 @@
+// stdafx.cpp : source file that includes just the standard includes
+// d2vsource.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+#include "stdafx.h"
+
+// TODO: reference any additional headers you need in STDAFX.H
+// and not in this file
diff --git a/src/filters/source/D2VSource/stdafx.h b/src/filters/source/D2VSource/stdafx.h
new file mode 100644
index 000000000..e596301ef
--- /dev/null
+++ b/src/filters/source/D2VSource/stdafx.h
@@ -0,0 +1,22 @@
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+#pragma once
+#include "../../../DSUtil/SharedInclude.h"
+
+#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
+#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS // some CString constructors will be explicit
+
+#ifndef VC_EXTRALEAN
+#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers
+#endif
+
+#include <afx.h>
+#include <afxwin.h> // MFC core and standard components
+
+// TODO: reference additional headers your program requires here
+
+#include <streams.h>
+#include <dvdmedia.h>