From b1389b5bf79340e1f348eb387c3ff6d2dd390644 Mon Sep 17 00:00:00 2001 From: kinddragon Date: Fri, 28 May 2010 01:23:17 +0000 Subject: Minor code cleanup Added SSE2 VD memcpy implementation git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@1965 10f7b99b-c216-0410-bff0-8a66a9350fd8 --- .../VirtualDub/system/source/a_memory.asm | 54 ++++++++++++++++++++++ src/thirdparty/VirtualDub/system/source/memory.cpp | 20 +++++++- 2 files changed, 73 insertions(+), 1 deletion(-) (limited to 'src/thirdparty') diff --git a/src/thirdparty/VirtualDub/system/source/a_memory.asm b/src/thirdparty/VirtualDub/system/source/a_memory.asm index e4b6cac8b..42a8ca998 100644 --- a/src/thirdparty/VirtualDub/system/source/a_memory.asm +++ b/src/thirdparty/VirtualDub/system/source/a_memory.asm @@ -130,6 +130,60 @@ _VDFastMemcpyPartialMMX2: pop ebp ret + global _VDFastMemcpyPartialSSE2 +_VDFastMemcpyPartialSSE2: + push ebp + push edi + push esi + push ebx + + mov edi, [esp+4+16] + mov esi, [esp+8+16] + mov ecx, [esp+12+16] + shr ecx, 7 + cmp ecx, 0 + jbe .skipblastloop +.blastloop: + prefetchnta [esi+16*8] + movaps xmm0, [esi] + movaps xmm1, [esi+16*1] + movaps xmm2, [esi+16*2] + movaps xmm3, [esi+16*3] + movaps xmm4, [esi+16*4] + movaps xmm5, [esi+16*5] + movaps xmm6, [esi+16*6] + movaps xmm7, [esi+16*7] + movntps [edi], xmm0 + movntps [edi+16*1], xmm1 + movntps [edi+16*2], xmm2 + movntps [edi+16*3], xmm3 + movntps [edi+16*4], xmm4 + movntps [edi+16*5], xmm5 + movntps [edi+16*6], xmm6 + movntps [edi+16*7], xmm7 + add esi, 128 + add edi, 128 + dec ecx + jne .blastloop +.skipblastloop: + mov ecx, [esp+12+16] + and ecx, 127 + cmp ecx, 0 + je .nooddballs +.loop: + mov dl, [esi] + mov [edi], dl + inc esi + inc edi + dec ecx + jne .loop +.nooddballs: + pop ebx + pop esi + pop edi + pop ebp + ret + end diff --git a/src/thirdparty/VirtualDub/system/source/memory.cpp b/src/thirdparty/VirtualDub/system/source/memory.cpp index 3e03b5d34..6edce7873 100644 --- a/src/thirdparty/VirtualDub/system/source/memory.cpp +++ b/src/thirdparty/VirtualDub/system/source/memory.cpp @@ -365,6 +365,7 @@ void VDMemset32Rect(void *dst, ptrdiff_t pitch, uint32 value, size_t w, size_t h extern "C" void __cdecl VDFastMemcpyPartialScalarAligned8(void *dst, const void *src, size_t bytes); extern "C" void __cdecl VDFastMemcpyPartialMMX(void *dst, const void *src, size_t bytes); extern "C" void __cdecl VDFastMemcpyPartialMMX2(void *dst, const void *src, size_t bytes); + extern "C" void __cdecl VDFastMemcpyPartialSSE2(void *dst, const void *src, size_t bytes); void VDFastMemcpyPartialScalar(void *dst, const void *src, size_t bytes) { if (!(((int)dst | (int)src | bytes) & 7)) @@ -391,7 +392,13 @@ void VDMemset32Rect(void *dst, ptrdiff_t pitch, uint32 value, size_t w, size_t h void VDFastMemcpyAutodetect() { long exts = CPUGetEnabledExtensions(); - if (exts & CPUF_SUPPORTS_SSE) { + // MPC custom code (begin) + if (exts & CPUF_SUPPORTS_SSE2) { + VDFastMemcpyPartial = VDFastMemcpyPartialSSE2; + VDFastMemcpyFinish = VDFastMemcpyFinishMMX2; + VDSwapMemory = VDSwapMemorySSE; + // MPC custom code (end) + } else if (exts & CPUF_SUPPORTS_SSE) { VDFastMemcpyPartial = VDFastMemcpyPartialMMX2; VDFastMemcpyFinish = VDFastMemcpyFinishMMX2; VDSwapMemory = VDSwapMemorySSE; @@ -426,6 +433,17 @@ void VDMemcpyRect(void *dst, ptrdiff_t dststride, const void *src, ptrdiff_t src if (w <= 0 || h <= 0) return; + // MPC custom code (begin) +#ifdef _DEBUG + if (CPUGetEnabledExtensions() & CPUF_SUPPORTS_SSE2) { + _ASSERT(!(((UINT_PTR)dst | (UINT_PTR)src) & 0xF)); + if (h > 1) { + _ASSERT(!((dststride | srcstride) & 0xF)); + } + } +#endif + // MPC custom code (end) + if (w == srcstride && w == dststride) VDFastMemcpyPartial(dst, src, w*h); // MPC custom code (begin) -- cgit v1.2.3