Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/mpc-hc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXhmikosR <xhmikosr@users.sourceforge.net>2011-11-26 00:27:49 +0400
committerXhmikosR <xhmikosr@users.sourceforge.net>2011-11-26 00:27:49 +0400
commit0a9ecedeb7afbb66807b52dcb2770b091a65fe84 (patch)
tree5473347eb080d8677154a568d0a187431068ef9e /src/DSUtil/simd.h
parent7f88a1b1e557b74b8392a29d16026f39fd3771ab (diff)
update manifest and simd headers from ffdshow
git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@3855 10f7b99b-c216-0410-bff0-8a66a9350fd8
Diffstat (limited to 'src/DSUtil/simd.h')
-rw-r--r--src/DSUtil/simd.h85
1 files changed, 77 insertions, 8 deletions
diff --git a/src/DSUtil/simd.h b/src/DSUtil/simd.h
index 6e21c5ea1..8e5ceebb8 100644
--- a/src/DSUtil/simd.h
+++ b/src/DSUtil/simd.h
@@ -102,6 +102,37 @@ static __forceinline void psrad(__m64 &dst,int i)
dst=_mm_srai_pi32(dst,i);
}
+// load the same width as the register aligned
+static __forceinline void movVqa(__m64 &dst,const void *ptr)
+{
+ dst = *(__m64*)ptr;
+}
+
+// load the same width as the register un-aligned
+static __forceinline void movVqu(__m64 &dst,const void *ptr)
+{
+ dst = *(__m64*)ptr;
+}
+
+// store the same width as the register un-aligned
+static __forceinline void movVqu(void *ptr,const __m64 &m)
+{
+ *(__m64*)ptr=m;
+}
+
+// load half width of the register (4 bytes for MMX/MMXEXT)
+static __forceinline void movHalf(__m64 &dst, const void *ptr)
+{
+ dst = _mm_cvtsi32_si64(*(int*)ptr);
+}
+
+// load quarter width of the register (2 bytes for MMX/MMXEXT)
+static __forceinline void movQuarter(__m64 &m, const void *ptr)
+{
+ uint16_t d = *(uint16_t*)ptr;
+ m = _mm_cvtsi32_si64(int(d));
+}
+
static __forceinline void prefetcht0(const void *a)
{
_mm_prefetch((char*)a,_MM_HINT_T0);
@@ -307,6 +338,36 @@ static __forceinline void movhpd(void *dst,const __m128d &src)
_mm_storeh_pd((double*)dst,src);
}
+// load the same width as the register aligned
+static __forceinline void movVqa(__m128i &dst, const void *ptr)
+{
+ dst = _mm_load_si128((const __m128i*)ptr);
+}
+
+// load the same width as the register un-aligned
+static __forceinline void movVqu(__m128i &dst, const void *ptr)
+{
+ dst = _mm_loadu_si128((const __m128i*)ptr);
+}
+
+// store the same width as the register un-aligned
+static __forceinline void movVqu(void *ptr,const __m128i &m)
+{
+ _mm_storeu_si128((__m128i*)ptr,m);
+}
+
+// load half width of the register (8 bytes for SSE2)
+static __forceinline void movHalf(__m128i &dst, const void *ptr)
+{
+ dst = _mm_loadl_epi64((const __m128i*)ptr);
+}
+
+// load quarter width of the register (4 bytes for SSE2)
+static __forceinline void movQuarter(__m128i &dst, const void *ptr)
+{
+ dst = _mm_cvtsi32_si128(*(int*)ptr);
+}
+
#if defined(__INTEL_COMPILER) || (defined(__GNUC__) && __GNUC__>=4)
static __forceinline void movlpd(__m128i &dst,const void *src)
{
@@ -429,8 +490,6 @@ static __forceinline void movlhps(__m128i &dst,const __m128i &src)
static __forceinline __m shuffle_pi16_0(__m64 mm3) {return _mm_shuffle_pi16_0(mm3);} \
static __forceinline void store2(void *ptr,const __m &m) {*(int2*)ptr=_mm_cvtsi64_si32(m);} \
static __forceinline __m load2(const void *ptr) {return _mm_cvtsi32_si64(*(int2*)ptr);} \
- static __forceinline void storeU(void *ptr,const __m &m) {*(__m*)ptr=m;} \
- static __forceinline __m loadU(const void *ptr) {return *(__m*)ptr;} \
static __forceinline void empty(void) {_mm_empty();}
struct Tmmx {
@@ -593,6 +652,12 @@ struct Tmmx {
pmaxsw(mm0,mm2);
return mm0;
}
+ // store the same width as the register without polluting chache (if supported)
+ static __forceinline void movntVq(void *ptr,const __m64 &m)
+ {
+ *(__m64*)ptr=m;
+ }
+
MMX_INSTRUCTIONS
};
@@ -682,6 +747,11 @@ struct Tmmxext {
static __forceinline void pmaxsw(__m64 &dst,const __m64 &src) {
dst=_mm_max_pi16(dst,src);
}
+ // store the same width as the register without poluting chache (if supported)
+ static __forceinline void movntVq(void *ptr,const __m64 &m)
+ {
+ _mm_stream_pi((__m64 *)ptr, m);
+ }
MMX_INSTRUCTIONS
};
@@ -818,12 +888,6 @@ struct Tsse2 {
static __forceinline void store2(void *ptr,const __m &m) {
_mm_storel_epi64((__m128i*)ptr,m);
}
- static __forceinline void storeU(void *ptr,const __m &m) {
- _mm_storeu_si128((__m*)ptr,m);
- }
- static __forceinline __m loadU(const void *ptr) {
- return _mm_loadu_si128((const __m*)ptr);
- }
static __forceinline void empty(void) {
/*_mm_empty();*/
}
@@ -855,6 +919,11 @@ struct Tsse2 {
static __forceinline void sfence(void) {
_mm_sfence();
}
+ // store the same width as the register without polluting the cache
+ static __forceinline void movntVq(void *ptr,const __m128i &m)
+ {
+ _mm_stream_si128((__m128i*)ptr,m);
+ }
};
#endif //__SSE2__