Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/LAVFilters.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/common
diff options
context:
space:
mode:
authorHendrik Leppkes <h.leppkes@gmail.com>2017-03-20 12:53:07 +0300
committerHendrik Leppkes <h.leppkes@gmail.com>2017-03-20 12:53:07 +0300
commite623b064ebeb46fb9159ef21877367a20eec91c2 (patch)
tree1d4933d8c25ab6fe12818604631dfae4beed5100 /common
parenta11dd6ed22b33e42ba639a72ed5e6a8a0205b941 (diff)
gpu_memcpy: add a memory barrier to avoid compiler re-ordering
Diffstat (limited to 'common')
-rw-r--r--common/DSUtilLite/gpu_memcpy_sse4.h5
1 files changed, 4 insertions, 1 deletions
diff --git a/common/DSUtilLite/gpu_memcpy_sse4.h b/common/DSUtilLite/gpu_memcpy_sse4.h
index 76eec8e1..48aec0f1 100644
--- a/common/DSUtilLite/gpu_memcpy_sse4.h
+++ b/common/DSUtilLite/gpu_memcpy_sse4.h
@@ -76,7 +76,9 @@ inline void* gpu_memcpy(void* d, const void* s, size_t size)
xmm14 = _mm_stream_load_si128(pSrc + 14);
xmm15 = _mm_stream_load_si128(pSrc + 15);
#endif
- pSrc += regsInLoop;
+
+ _ReadWriteBarrier();
+
// _mm_store_si128 emit the SSE2 intruction MOVDQA (aligned store)
_mm_store_si128(pTrg , xmm0);
_mm_store_si128(pTrg + 1, xmm1);
@@ -96,6 +98,7 @@ inline void* gpu_memcpy(void* d, const void* s, size_t size)
_mm_store_si128(pTrg + 14, xmm14);
_mm_store_si128(pTrg + 15, xmm15);
#endif
+ pSrc += regsInLoop;
pTrg += regsInLoop;
}