Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/mpc-hc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKacper Michajłow <kasper93@gmail.com>2017-08-13 17:39:25 +0300
committerKacper Michajłow <kasper93@gmail.com>2017-08-13 23:24:43 +0300
commitfe9d00c292419dc67849bd063812d8bb44feeb6d (patch)
tree1ee920e2a7158cbcd3937126d884afae9ef3e362
parent620d8e5bef3e071aa0a2e0d3d5b49f71f45ed712 (diff)
Do not over optimize memsetd.
In our use low overhead rep stosd is faster than SSE2 stores. On modern CPUs rep stosd is quite fast. We don't need big throughput, low overhead is more important in our usage.
-rw-r--r--src/DSUtil/DSUtil.cpp30
1 files changed, 1 insertions, 29 deletions
diff --git a/src/DSUtil/DSUtil.cpp b/src/DSUtil/DSUtil.cpp
index be4ab09e4..b7501b94b 100644
--- a/src/DSUtil/DSUtil.cpp
+++ b/src/DSUtil/DSUtil.cpp
@@ -919,35 +919,7 @@ REFERENCE_TIME HMSF2RT(DVD_HMSF_TIMECODE hmsf, double fps /*= -1.0*/)
void memsetd(void* dst, unsigned int c, size_t nbytes)
{
size_t n = nbytes / 4;
-
-#if defined(_M_IX86_FP) && _M_IX86_FP < 2
- if (!(g_cpuid.m_flags & g_cpuid.sse2)) { // No SSE2
- __stosd((unsigned long*)dst, c, n);
- return;
- }
-#endif
-
- size_t o = n - (n % 4);
-
- __m128i val = _mm_set1_epi32((int)c);
- if (((uintptr_t)dst & 0x0F) == 0) { // 16-byte aligned
- for (size_t i = 0; i < o; i += 4) {
- _mm_store_si128((__m128i*) & (((DWORD*)dst)[i]), val);
- }
- } else {
- for (size_t i = 0; i < o; i += 4) {
- _mm_storeu_si128((__m128i*) & (((DWORD*)dst)[i]), val);
- }
- }
-
- switch (n - o) {
- case 3:
- ((DWORD*)dst)[o + 2] = c;
- case 2:
- ((DWORD*)dst)[o + 1] = c;
- case 1:
- ((DWORD*)dst)[o + 0] = c;
- }
+ __stosd((unsigned long*)dst, c, n);
}
void memsetw(void* dst, unsigned short c, size_t nbytes)