From bcfb2e624297f659a5bcf28ae745c89b5d955f31 Mon Sep 17 00:00:00 2001 From: kinddragon Date: Fri, 28 May 2010 21:52:48 +0000 Subject: AP4_Track::GetTrackName fixed after update MPEG4 Splitter (ticket #499) We now call "Navigate forward" when graph building error appear Quicktime bitmap lines now aligned More general VDFastMemcpyPartialSSE2 implantation git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@1976 10f7b99b-c216-0410-bff0-8a66a9350fd8 --- .../VirtualDub/system/source/a_memory.asm | 114 +++++++++++---------- src/thirdparty/VirtualDub/system/source/memory.cpp | 24 ++--- 2 files changed, 71 insertions(+), 67 deletions(-) (limited to 'src/thirdparty') diff --git a/src/thirdparty/VirtualDub/system/source/a_memory.asm b/src/thirdparty/VirtualDub/system/source/a_memory.asm index 42a8ca998..d2316e55a 100644 --- a/src/thirdparty/VirtualDub/system/source/a_memory.asm +++ b/src/thirdparty/VirtualDub/system/source/a_memory.asm @@ -71,6 +71,65 @@ _VDFastMemcpyPartialMMX: pop edi ret + global _VDFastMemcpyPartialSSE2 +_VDFastMemcpyPartialSSE2: + push ebp + push edi + push esi + push ebx + + mov ecx, [esp+12+16] + cmp ecx, 128 + jb _VDFastMemcpyPartialMMX2.MMX2 + mov edi, [esp+4+16] + mov esi, [esp+8+16] + mov eax, edi + or eax, esi + test al, 15 + jne SHORT _VDFastMemcpyPartialMMX2.MMX2 + + shr ecx, 7 +.loop128: + prefetchnta [esi+16*8] + movaps xmm0, [esi] + movaps xmm1, [esi+16*1] + movaps xmm2, [esi+16*2] + movaps xmm3, [esi+16*3] + movaps xmm4, [esi+16*4] + movaps xmm5, [esi+16*5] + movaps xmm6, [esi+16*6] + movaps xmm7, [esi+16*7] + movntps [edi], xmm0 + movntps [edi+16*1], xmm1 + movntps [edi+16*2], xmm2 + movntps [edi+16*3], xmm3 + movntps [edi+16*4], xmm4 + movntps [edi+16*5], xmm5 + movntps [edi+16*6], xmm6 + movntps [edi+16*7], xmm7 + add esi, 128 + add edi, 128 + dec ecx + jne .loop128 +.skiploop128: + mov ecx, [esp+12+16] + and ecx, 127 + cmp ecx, 0 + je .nooddballs +.loop: + mov dl, [esi] + mov [edi], dl + inc esi + inc edi + dec ecx + jne .loop +.nooddballs: + pop ebx + pop esi + pop edi + pop ebp + ret + global _VDFastMemcpyPartialMMX2 _VDFastMemcpyPartialMMX2: push ebp @@ -78,6 +137,7 @@ _VDFastMemcpyPartialMMX2: push esi push ebx +.MMX2 mov ebx, [esp+4+16] mov edx, [esp+8+16] mov eax, [esp+12+16] @@ -130,60 +190,6 @@ _VDFastMemcpyPartialMMX2: pop ebp ret - global _VDFastMemcpyPartialSSE2 -_VDFastMemcpyPartialSSE2: - push ebp - push edi - push esi - push ebx - - mov edi, [esp+4+16] - mov esi, [esp+8+16] - mov ecx, [esp+12+16] - shr ecx, 7 - cmp ecx, 0 - jbe .skipblastloop -.blastloop: - prefetchnta [esi+16*8] - movaps xmm0, [esi] - movaps xmm1, [esi+16*1] - movaps xmm2, [esi+16*2] - movaps xmm3, [esi+16*3] - movaps xmm4, [esi+16*4] - movaps xmm5, [esi+16*5] - movaps xmm6, [esi+16*6] - movaps xmm7, [esi+16*7] - movntps [edi], xmm0 - movntps [edi+16*1], xmm1 - movntps [edi+16*2], xmm2 - movntps [edi+16*3], xmm3 - movntps [edi+16*4], xmm4 - movntps [edi+16*5], xmm5 - movntps [edi+16*6], xmm6 - movntps [edi+16*7], xmm7 - add esi, 128 - add edi, 128 - dec ecx - jne .blastloop -.skipblastloop: - mov ecx, [esp+12+16] - and ecx, 127 - cmp ecx, 0 - je .nooddballs -.loop: - mov dl, [esi] - mov [edi], dl - inc esi - inc edi - dec ecx - jne .loop -.nooddballs: - pop ebx - pop esi - pop edi - pop ebp - ret - end diff --git a/src/thirdparty/VirtualDub/system/source/memory.cpp b/src/thirdparty/VirtualDub/system/source/memory.cpp index 5ba295427..abc394e9a 100644 --- a/src/thirdparty/VirtualDub/system/source/memory.cpp +++ b/src/thirdparty/VirtualDub/system/source/memory.cpp @@ -392,7 +392,14 @@ void VDMemset32Rect(void *dst, ptrdiff_t pitch, uint32 value, size_t w, size_t h void VDFastMemcpyAutodetect() { long exts = CPUGetEnabledExtensions(); - if (exts & CPUF_SUPPORTS_SSE) { + // MPC custom code (begin) + if (exts & CPUF_SUPPORTS_SSE2) { + VDFastMemcpyPartial = VDFastMemcpyPartialSSE2; + VDFastMemcpyFinish = VDFastMemcpyFinishMMX2; + VDSwapMemory = VDSwapMemorySSE; + } + // MPC custom code (end) + else if (exts & CPUF_SUPPORTS_SSE) { VDFastMemcpyPartial = VDFastMemcpyPartialMMX2; VDFastMemcpyFinish = VDFastMemcpyFinishMMX2; VDSwapMemory = VDSwapMemorySSE; @@ -427,27 +434,18 @@ void VDMemcpyRect(void *dst, ptrdiff_t dststride, const void *src, ptrdiff_t src if (w <= 0 || h <= 0) return; - void (__cdecl *VDFastMemcpyPartial_)(void *dst, const void *src, size_t bytes) = VDFastMemcpyPartial; - // MPC custom code (begin) -#if defined(_WIN32) && defined(_M_IX86) - if ((CPUGetEnabledExtensions() & CPUF_SUPPORTS_SSE2) && - !(((UINT_PTR)dst | dststride | (UINT_PTR)src | srcstride) & 0xF)) - VDFastMemcpyPartial_ = VDFastMemcpyPartialSSE2; -#endif - // MPC custom code (end) - if (w == srcstride && w == dststride) - VDFastMemcpyPartial_(dst, src, w*h); + VDFastMemcpyPartial(dst, src, w*h); // MPC custom code (begin) else if (w == -srcstride && w == -dststride) - VDFastMemcpyPartial_((char *)dst + dststride * (h - 1), (char *)src + srcstride * (h - 1), w*h); + VDFastMemcpyPartial((char *)dst + dststride * (h - 1), (char *)src + srcstride * (h - 1), w*h); // MPC custom code (end) else { char *dst2 = (char *)dst; const char *src2 = (const char *)src; do { - VDFastMemcpyPartial_(dst2, src2, w); + VDFastMemcpyPartial(dst2, src2, w); dst2 += dststride; src2 += srcstride; } while(--h); -- cgit v1.2.3