Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/mpc-hc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXhmikosR <xhmikosr@users.sourceforge.net>2010-05-17 16:51:03 +0400
committerXhmikosR <xhmikosr@users.sourceforge.net>2010-05-17 16:51:03 +0400
commit651a19b2c72d75e3e0e486e5fe62308081370b9b (patch)
treeffc33bdc7bfc049374649cb32b78d727fe995b5d /src/DSUtil
parent9a03a08dc6198257815103631b313ffd2ea9e2fa (diff)
cleanup
git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@1891 10f7b99b-c216-0410-bff0-8a66a9350fd8
Diffstat (limited to 'src/DSUtil')
-rw-r--r--src/DSUtil/mmintrin64.c1254
-rw-r--r--src/DSUtil/simd.h5
-rw-r--r--src/DSUtil/simd_common.h14
-rw-r--r--src/DSUtil/simd_instructions.h4
4 files changed, 646 insertions, 631 deletions
diff --git a/src/DSUtil/mmintrin64.c b/src/DSUtil/mmintrin64.c
index 07b70b16a..d64bb4d2c 100644
--- a/src/DSUtil/mmintrin64.c
+++ b/src/DSUtil/mmintrin64.c
@@ -27,979 +27,979 @@
// MMX
__m64 _mm_setzero_si64(void)
{
- __m64 rv = {0};
- return rv;
+ __m64 rv = {0};
+ return rv;
}
__m64 _mm_set_pi32(int _I1, int _I0)
{
- __m64 rv = {0};
- rv.m64_i32[0] = _I0;
- rv.m64_i32[1] = _I1;
- return rv;
+ __m64 rv = {0};
+ rv.m64_i32[0] = _I0;
+ rv.m64_i32[1] = _I1;
+ return rv;
}
__m64 _mm_set_pi16(short _S3, short _S2, short _S1, short _S0)
{
- __m64 rv = {0};
- rv.m64_i16[0] = _S0;
- rv.m64_i16[1] = _S1;
- rv.m64_i16[2] = _S2;
- rv.m64_i16[3] = _S3;
- return rv;
+ __m64 rv = {0};
+ rv.m64_i16[0] = _S0;
+ rv.m64_i16[1] = _S1;
+ rv.m64_i16[2] = _S2;
+ rv.m64_i16[3] = _S3;
+ return rv;
}
__m64 _mm_set_pi8(char _B7, char _B6, char _B5, char _B4, char _B3, char _B2, char _B1, char _B0)
{
- __m64 rv = {0};
- rv.m64_i8[0] = _B0;
- rv.m64_i8[1] = _B1;
- rv.m64_i8[2] = _B2;
- rv.m64_i8[3] = _B3;
- rv.m64_i8[4] = _B4;
- rv.m64_i8[5] = _B5;
- rv.m64_i8[6] = _B6;
- rv.m64_i8[7] = _B7;
- return rv;
+ __m64 rv = {0};
+ rv.m64_i8[0] = _B0;
+ rv.m64_i8[1] = _B1;
+ rv.m64_i8[2] = _B2;
+ rv.m64_i8[3] = _B3;
+ rv.m64_i8[4] = _B4;
+ rv.m64_i8[5] = _B5;
+ rv.m64_i8[6] = _B6;
+ rv.m64_i8[7] = _B7;
+ return rv;
}
__m64 _mm_set1_pi32(int _I)
{
- __m64 rv = {0};
- rv.m64_i32[0] = rv.m64_i32[1] = _I;
- return rv;
+ __m64 rv = {0};
+ rv.m64_i32[0] = rv.m64_i32[1] = _I;
+ return rv;
}
__m64 _mm_set1_pi16(short _S)
{
- __m64 rv = {0};
- rv.m64_i16[0] = rv.m64_i16[1] = rv.m64_i16[2] = rv.m64_i16[3] = _S;
- return rv;
+ __m64 rv = {0};
+ rv.m64_i16[0] = rv.m64_i16[1] = rv.m64_i16[2] = rv.m64_i16[3] = _S;
+ return rv;
}
__m64 _mm_set1_pi8(char _B)
{
- __m64 rv = {0};
- rv.m64_i8[0] = rv.m64_i8[1] = rv.m64_i8[2] = rv.m64_i8[3] = rv.m64_i8[4] = rv.m64_i8[5] = rv.m64_i8[6] = rv.m64_i8[7] = _B;
- return rv;
+ __m64 rv = {0};
+ rv.m64_i8[0] = rv.m64_i8[1] = rv.m64_i8[2] = rv.m64_i8[3] = rv.m64_i8[4] = rv.m64_i8[5] = rv.m64_i8[6] = rv.m64_i8[7] = _B;
+ return rv;
}
__m64 _m_psubb(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_sub_epi8(lhs, rhs);
+ lhs = _mm_sub_epi8(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_paddusb(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_adds_epu8(lhs, rhs);
+ lhs = _mm_adds_epu8(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_psubsw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_subs_epi16(lhs, rhs);
+ lhs = _mm_subs_epi16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_psubsb(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_subs_epi8(lhs, rhs);
+ lhs = _mm_subs_epi8(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_paddw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_add_epi16(lhs, rhs);
+ lhs = _mm_add_epi16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
void _m_empty(void)
{
}
__m64 _m_packuswb(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
- lhs.m128i_i64[1] = _MM2.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
+ lhs.m128i_i64[1] = _MM2.m64_i64;
- lhs = _mm_packus_epi16(lhs, rhs);
+ lhs = _mm_packus_epi16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_psrlwi(__m64 _M, int _Count)
{
- __m128i lhs = {0};
- lhs.m128i_i64[0] = _M.m64_i64;
+ __m128i lhs = {0};
+ lhs.m128i_i64[0] = _M.m64_i64;
- lhs = _mm_srli_epi16(lhs, _Count);
+ lhs = _mm_srli_epi16(lhs, _Count);
- _M.m64_i64 = lhs.m128i_i64[0];
- return _M;
+ _M.m64_i64 = lhs.m128i_i64[0];
+ return _M;
}
__m64 _m_pmullw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_mullo_epi16(lhs, rhs);
+ lhs = _mm_mullo_epi16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_from_int(int _I)
{
- __m64 rv = {0};
- rv.m64_i32[0] = _I;
- return rv;
+ __m64 rv = {0};
+ rv.m64_i32[0] = _I;
+ return rv;
}
int _m_to_int(__m64 _M)
{
- return (int)(_M.m64_u64 & 0xffffffff);
+ return (int)(_M.m64_u64 & 0xffffffff);
}
__m64 _m_psrlqi(__m64 _M, int _Count)
{
- __m128i lhs = {0};
- lhs.m128i_i64[0] = _M.m64_i64;
+ __m128i lhs = {0};
+ lhs.m128i_i64[0] = _M.m64_i64;
- lhs = _mm_srli_epi64(lhs, _Count);
+ lhs = _mm_srli_epi64(lhs, _Count);
- _M.m64_i64 = lhs.m128i_i64[0];
- return _M;
+ _M.m64_i64 = lhs.m128i_i64[0];
+ return _M;
}
__m64 _m_paddd(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_add_epi32(lhs, rhs);
+ lhs = _mm_add_epi32(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pmaddwd(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_madd_epi16(lhs, rhs);
+ lhs = _mm_madd_epi16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_punpcklbw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_unpacklo_epi8(lhs, rhs);
+ lhs = _mm_unpacklo_epi8(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_paddb(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_add_epi8(lhs, rhs);
+ lhs = _mm_add_epi8(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_por(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_or_si128(lhs, rhs);
+ lhs = _mm_or_si128(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pand(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs, rhs;
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs, rhs;
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_and_si128(lhs, rhs);
+ lhs = _mm_and_si128(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pandn(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_andnot_si128(lhs, rhs);
+ lhs = _mm_andnot_si128(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pcmpgtb(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_cmpgt_epi8(lhs, rhs);
+ lhs = _mm_cmpgt_epi8(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_psubusb(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_subs_epu8(lhs, rhs);
+ lhs = _mm_subs_epu8(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_psrawi(__m64 _M, int _Count)
{
- __m128i lhs = {0};
- lhs.m128i_i64[0] = _M.m64_i64;
+ __m128i lhs = {0};
+ lhs.m128i_i64[0] = _M.m64_i64;
- lhs = _mm_srai_epi16(lhs, _Count);
+ lhs = _mm_srai_epi16(lhs, _Count);
- _M.m64_i64 = lhs.m128i_i64[0];
- return _M;
+ _M.m64_i64 = lhs.m128i_i64[0];
+ return _M;
}
__m64 _m_psubw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_sub_epi16(lhs, rhs);
+ lhs = _mm_sub_epi16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_psllwi(__m64 _M, int _Count)
{
- __m128i lhs = {0};
- lhs.m128i_i64[0] = _M.m64_i64;
+ __m128i lhs = {0};
+ lhs.m128i_i64[0] = _M.m64_i64;
- lhs = _mm_slli_epi16(lhs, _Count);
+ lhs = _mm_slli_epi16(lhs, _Count);
- _M.m64_i64 = lhs.m128i_i64[0];
- return _M;
+ _M.m64_i64 = lhs.m128i_i64[0];
+ return _M;
}
__m64 _m_paddusw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_adds_epu16(lhs, rhs);
+ lhs = _mm_adds_epu16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pxor(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_xor_si128(lhs, rhs);
+ lhs = _mm_xor_si128(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pslldi(__m64 _M, int _Count)
{
- __m128i lhs = {0};
- lhs.m128i_i64[0] = _M.m64_i64;
+ __m128i lhs = {0};
+ lhs.m128i_i64[0] = _M.m64_i64;
- lhs = _mm_slli_epi32(lhs, _Count);
+ lhs = _mm_slli_epi32(lhs, _Count);
- _M.m64_i64 = lhs.m128i_i64[0];
- return _M;
+ _M.m64_i64 = lhs.m128i_i64[0];
+ return _M;
}
__m64 _m_punpckhbw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i8[ 8] = _MM1.m64_i8[4];
- lhs.m128i_i8[ 9] = _MM1.m64_i8[5];
- lhs.m128i_i8[10] = _MM1.m64_i8[6];
- lhs.m128i_i8[11] = _MM1.m64_i8[7];
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i8[ 8] = _MM1.m64_i8[4];
+ lhs.m128i_i8[ 9] = _MM1.m64_i8[5];
+ lhs.m128i_i8[10] = _MM1.m64_i8[6];
+ lhs.m128i_i8[11] = _MM1.m64_i8[7];
- rhs.m128i_i8[ 8] = _MM2.m64_i8[4];
- rhs.m128i_i8[ 9] = _MM2.m64_i8[5];
- rhs.m128i_i8[10] = _MM2.m64_i8[6];
- rhs.m128i_i8[11] = _MM2.m64_i8[7];
+ rhs.m128i_i8[ 8] = _MM2.m64_i8[4];
+ rhs.m128i_i8[ 9] = _MM2.m64_i8[5];
+ rhs.m128i_i8[10] = _MM2.m64_i8[6];
+ rhs.m128i_i8[11] = _MM2.m64_i8[7];
- lhs = _mm_unpackhi_epi8(lhs, rhs);
+ lhs = _mm_unpackhi_epi8(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_punpcklwd(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_unpacklo_epi16(lhs, rhs);
+ lhs = _mm_unpacklo_epi16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_punpckldq(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_unpacklo_epi32(lhs, rhs);
+ lhs = _mm_unpacklo_epi32(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pcmpgtw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_cmpgt_epi16(lhs, rhs);
+ lhs = _mm_cmpgt_epi16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pcmpgtd(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_cmpgt_epi32(lhs, rhs);
+ lhs = _mm_cmpgt_epi32(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pcmpeqb(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_cmpeq_epi8(lhs, rhs);
+ lhs = _mm_cmpeq_epi8(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pcmpeqd(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_cmpeq_epi32(lhs, rhs);
+ lhs = _mm_cmpeq_epi32(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_punpckhwd(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i16[4] = _MM1.m64_i16[2];
- lhs.m128i_i16[5] = _MM1.m64_i16[3];
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i16[4] = _MM1.m64_i16[2];
+ lhs.m128i_i16[5] = _MM1.m64_i16[3];
- rhs.m128i_i16[4] = _MM2.m64_i16[2];
- rhs.m128i_i16[5] = _MM2.m64_i16[3];
+ rhs.m128i_i16[4] = _MM2.m64_i16[2];
+ rhs.m128i_i16[5] = _MM2.m64_i16[3];
- lhs = _mm_unpackhi_epi16(lhs, rhs);
+ lhs = _mm_unpackhi_epi16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_punpckhdq(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i32[2] = _MM1.m64_i32[1];
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i32[2] = _MM1.m64_i32[1];
- rhs.m128i_i32[2] = _MM2.m64_i32[1];
+ rhs.m128i_i32[2] = _MM2.m64_i32[1];
- lhs = _mm_unpackhi_epi32(lhs, rhs);
+ lhs = _mm_unpackhi_epi32(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_psrldi(__m64 _M, int _Count)
{
- __m128i lhs = {0};
- lhs.m128i_i64[0] = _M.m64_i64;
+ __m128i lhs = {0};
+ lhs.m128i_i64[0] = _M.m64_i64;
- lhs = _mm_srli_epi32(lhs, _Count);
+ lhs = _mm_srli_epi32(lhs, _Count);
- _M.m64_i64 = lhs.m128i_i64[0];
- return _M;
+ _M.m64_i64 = lhs.m128i_i64[0];
+ return _M;
}
__m64 _m_psubd(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_sub_epi32(lhs, rhs);
+ lhs = _mm_sub_epi32(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pmulhw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_mulhi_epi16(lhs, rhs);
+ lhs = _mm_mulhi_epi16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_psllqi(__m64 _M, int _Count)
{
- __m128i lhs = {0};
- lhs.m128i_i64[0] = _M.m64_i64;
+ __m128i lhs = {0};
+ lhs.m128i_i64[0] = _M.m64_i64;
- lhs = _mm_slli_epi64(lhs, _Count);
+ lhs = _mm_slli_epi64(lhs, _Count);
- _M.m64_i64 = lhs.m128i_i64[0];
- return _M;
+ _M.m64_i64 = lhs.m128i_i64[0];
+ return _M;
}
__m64 _m_pcmpeqw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_cmpeq_epi16(lhs, rhs);
+ lhs = _mm_cmpeq_epi16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_paddsb(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_adds_epi8(lhs, rhs);
+ lhs = _mm_adds_epi8(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_packsswb(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_packs_epi16(lhs, rhs);
+ lhs = _mm_packs_epi16(lhs, rhs);
- _MM1.m64_i32[0] = lhs.m128i_i32[0];
- _MM1.m64_i32[1] = lhs.m128i_i32[2];
- return _MM1;
+ _MM1.m64_i32[0] = lhs.m128i_i32[0];
+ _MM1.m64_i32[1] = lhs.m128i_i32[2];
+ return _MM1;
}
__m64 _m_psradi(__m64 _M, int _Count)
{
- __m128i lhs = {0};
- lhs.m128i_i64[0] = _M.m64_i64;
+ __m128i lhs = {0};
+ lhs.m128i_i64[0] = _M.m64_i64;
- lhs = _mm_srai_epi32(lhs, _Count);
+ lhs = _mm_srai_epi32(lhs, _Count);
- _M.m64_i64 = lhs.m128i_i64[0];
- return _M;
+ _M.m64_i64 = lhs.m128i_i64[0];
+ return _M;
}
__m64 _m_paddsw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_adds_epi16(lhs, rhs);
+ lhs = _mm_adds_epi16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_psubusw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_subs_epu16(lhs, rhs);
+ lhs = _mm_subs_epu16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_packssdw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i32[0] = _MM1.m64_i32[0];
- lhs.m128i_i32[1] = _MM1.m64_i32[1];
- lhs.m128i_i32[2] = _MM2.m64_i32[0];
- lhs.m128i_i32[3] = _MM2.m64_i32[1];
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i32[0] = _MM1.m64_i32[0];
+ lhs.m128i_i32[1] = _MM1.m64_i32[1];
+ lhs.m128i_i32[2] = _MM2.m64_i32[0];
+ lhs.m128i_i32[3] = _MM2.m64_i32[1];
- lhs = _mm_packs_epi32(lhs, rhs);
+ lhs = _mm_packs_epi32(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_psraw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_sra_epi16(lhs, rhs);
+ lhs = _mm_sra_epi16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
// SSE
__m64 _m_pmaxub(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_max_epu8(lhs, rhs);
+ lhs = _mm_max_epu8(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pminub(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_min_epu8(lhs, rhs);
+ lhs = _mm_min_epu8(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pavgb(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_avg_epu8(lhs, rhs);
+ lhs = _mm_avg_epu8(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pshufw(__m64 _MM1, int _Imm)
{
- __m128i lhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
-
- switch(_Imm)
- {
- case 0: lhs = _mm_shufflelo_epi16(lhs, 0); break;
- case 1: lhs = _mm_shufflelo_epi16(lhs, 1); break;
- case 2: lhs = _mm_shufflelo_epi16(lhs, 2); break;
- case 3: lhs = _mm_shufflelo_epi16(lhs, 3); break;
- case 4: lhs = _mm_shufflelo_epi16(lhs, 4); break;
- case 5: lhs = _mm_shufflelo_epi16(lhs, 5); break;
- case 6: lhs = _mm_shufflelo_epi16(lhs, 6); break;
- case 7: lhs = _mm_shufflelo_epi16(lhs, 7); break;
- case 8: lhs = _mm_shufflelo_epi16(lhs, 8); break;
- case 9: lhs = _mm_shufflelo_epi16(lhs, 9); break;
- case 10: lhs = _mm_shufflelo_epi16(lhs, 10); break;
- case 11: lhs = _mm_shufflelo_epi16(lhs, 11); break;
- case 12: lhs = _mm_shufflelo_epi16(lhs, 12); break;
- case 13: lhs = _mm_shufflelo_epi16(lhs, 13); break;
- case 14: lhs = _mm_shufflelo_epi16(lhs, 14); break;
- case 15: lhs = _mm_shufflelo_epi16(lhs, 15); break;
- case 16: lhs = _mm_shufflelo_epi16(lhs, 16); break;
- case 17: lhs = _mm_shufflelo_epi16(lhs, 17); break;
- case 18: lhs = _mm_shufflelo_epi16(lhs, 18); break;
- case 19: lhs = _mm_shufflelo_epi16(lhs, 19); break;
- case 20: lhs = _mm_shufflelo_epi16(lhs, 20); break;
- case 21: lhs = _mm_shufflelo_epi16(lhs, 21); break;
- case 22: lhs = _mm_shufflelo_epi16(lhs, 22); break;
- case 23: lhs = _mm_shufflelo_epi16(lhs, 23); break;
- case 24: lhs = _mm_shufflelo_epi16(lhs, 24); break;
- case 25: lhs = _mm_shufflelo_epi16(lhs, 25); break;
- case 26: lhs = _mm_shufflelo_epi16(lhs, 26); break;
- case 27: lhs = _mm_shufflelo_epi16(lhs, 27); break;
- case 28: lhs = _mm_shufflelo_epi16(lhs, 28); break;
- case 29: lhs = _mm_shufflelo_epi16(lhs, 29); break;
- case 30: lhs = _mm_shufflelo_epi16(lhs, 30); break;
- case 31: lhs = _mm_shufflelo_epi16(lhs, 31); break;
- case 32: lhs = _mm_shufflelo_epi16(lhs, 32); break;
- case 33: lhs = _mm_shufflelo_epi16(lhs, 33); break;
- case 34: lhs = _mm_shufflelo_epi16(lhs, 34); break;
- case 35: lhs = _mm_shufflelo_epi16(lhs, 35); break;
- case 36: lhs = _mm_shufflelo_epi16(lhs, 36); break;
- case 37: lhs = _mm_shufflelo_epi16(lhs, 37); break;
- case 38: lhs = _mm_shufflelo_epi16(lhs, 38); break;
- case 39: lhs = _mm_shufflelo_epi16(lhs, 39); break;
- case 40: lhs = _mm_shufflelo_epi16(lhs, 40); break;
- case 41: lhs = _mm_shufflelo_epi16(lhs, 41); break;
- case 42: lhs = _mm_shufflelo_epi16(lhs, 42); break;
- case 43: lhs = _mm_shufflelo_epi16(lhs, 43); break;
- case 44: lhs = _mm_shufflelo_epi16(lhs, 44); break;
- case 45: lhs = _mm_shufflelo_epi16(lhs, 45); break;
- case 46: lhs = _mm_shufflelo_epi16(lhs, 46); break;
- case 47: lhs = _mm_shufflelo_epi16(lhs, 47); break;
- case 48: lhs = _mm_shufflelo_epi16(lhs, 48); break;
- case 49: lhs = _mm_shufflelo_epi16(lhs, 49); break;
- case 50: lhs = _mm_shufflelo_epi16(lhs, 50); break;
- case 51: lhs = _mm_shufflelo_epi16(lhs, 51); break;
- case 52: lhs = _mm_shufflelo_epi16(lhs, 52); break;
- case 53: lhs = _mm_shufflelo_epi16(lhs, 53); break;
- case 54: lhs = _mm_shufflelo_epi16(lhs, 54); break;
- case 55: lhs = _mm_shufflelo_epi16(lhs, 55); break;
- case 56: lhs = _mm_shufflelo_epi16(lhs, 56); break;
- case 57: lhs = _mm_shufflelo_epi16(lhs, 57); break;
- case 58: lhs = _mm_shufflelo_epi16(lhs, 58); break;
- case 59: lhs = _mm_shufflelo_epi16(lhs, 59); break;
- case 60: lhs = _mm_shufflelo_epi16(lhs, 60); break;
- case 61: lhs = _mm_shufflelo_epi16(lhs, 61); break;
- case 62: lhs = _mm_shufflelo_epi16(lhs, 62); break;
- case 63: lhs = _mm_shufflelo_epi16(lhs, 63); break;
- case 64: lhs = _mm_shufflelo_epi16(lhs, 64); break;
- case 65: lhs = _mm_shufflelo_epi16(lhs, 65); break;
- case 66: lhs = _mm_shufflelo_epi16(lhs, 66); break;
- case 67: lhs = _mm_shufflelo_epi16(lhs, 67); break;
- case 68: lhs = _mm_shufflelo_epi16(lhs, 68); break;
- case 69: lhs = _mm_shufflelo_epi16(lhs, 69); break;
- case 70: lhs = _mm_shufflelo_epi16(lhs, 70); break;
- case 71: lhs = _mm_shufflelo_epi16(lhs, 71); break;
- case 72: lhs = _mm_shufflelo_epi16(lhs, 72); break;
- case 73: lhs = _mm_shufflelo_epi16(lhs, 73); break;
- case 74: lhs = _mm_shufflelo_epi16(lhs, 74); break;
- case 75: lhs = _mm_shufflelo_epi16(lhs, 75); break;
- case 76: lhs = _mm_shufflelo_epi16(lhs, 76); break;
- case 77: lhs = _mm_shufflelo_epi16(lhs, 77); break;
- case 78: lhs = _mm_shufflelo_epi16(lhs, 78); break;
- case 79: lhs = _mm_shufflelo_epi16(lhs, 79); break;
- case 80: lhs = _mm_shufflelo_epi16(lhs, 80); break;
- case 81: lhs = _mm_shufflelo_epi16(lhs, 81); break;
- case 82: lhs = _mm_shufflelo_epi16(lhs, 82); break;
- case 83: lhs = _mm_shufflelo_epi16(lhs, 83); break;
- case 84: lhs = _mm_shufflelo_epi16(lhs, 84); break;
- case 85: lhs = _mm_shufflelo_epi16(lhs, 85); break;
- case 86: lhs = _mm_shufflelo_epi16(lhs, 86); break;
- case 87: lhs = _mm_shufflelo_epi16(lhs, 87); break;
- case 88: lhs = _mm_shufflelo_epi16(lhs, 88); break;
- case 89: lhs = _mm_shufflelo_epi16(lhs, 89); break;
- case 90: lhs = _mm_shufflelo_epi16(lhs, 90); break;
- case 91: lhs = _mm_shufflelo_epi16(lhs, 91); break;
- case 92: lhs = _mm_shufflelo_epi16(lhs, 92); break;
- case 93: lhs = _mm_shufflelo_epi16(lhs, 93); break;
- case 94: lhs = _mm_shufflelo_epi16(lhs, 94); break;
- case 95: lhs = _mm_shufflelo_epi16(lhs, 95); break;
- case 96: lhs = _mm_shufflelo_epi16(lhs, 96); break;
- case 97: lhs = _mm_shufflelo_epi16(lhs, 97); break;
- case 98: lhs = _mm_shufflelo_epi16(lhs, 98); break;
- case 99: lhs = _mm_shufflelo_epi16(lhs, 99); break;
- case 100: lhs = _mm_shufflelo_epi16(lhs, 100); break;
- case 101: lhs = _mm_shufflelo_epi16(lhs, 101); break;
- case 102: lhs = _mm_shufflelo_epi16(lhs, 102); break;
- case 103: lhs = _mm_shufflelo_epi16(lhs, 103); break;
- case 104: lhs = _mm_shufflelo_epi16(lhs, 104); break;
- case 105: lhs = _mm_shufflelo_epi16(lhs, 105); break;
- case 106: lhs = _mm_shufflelo_epi16(lhs, 106); break;
- case 107: lhs = _mm_shufflelo_epi16(lhs, 107); break;
- case 108: lhs = _mm_shufflelo_epi16(lhs, 108); break;
- case 109: lhs = _mm_shufflelo_epi16(lhs, 109); break;
- case 110: lhs = _mm_shufflelo_epi16(lhs, 110); break;
- case 111: lhs = _mm_shufflelo_epi16(lhs, 111); break;
- case 112: lhs = _mm_shufflelo_epi16(lhs, 112); break;
- case 113: lhs = _mm_shufflelo_epi16(lhs, 113); break;
- case 114: lhs = _mm_shufflelo_epi16(lhs, 114); break;
- case 115: lhs = _mm_shufflelo_epi16(lhs, 115); break;
- case 116: lhs = _mm_shufflelo_epi16(lhs, 116); break;
- case 117: lhs = _mm_shufflelo_epi16(lhs, 117); break;
- case 118: lhs = _mm_shufflelo_epi16(lhs, 118); break;
- case 119: lhs = _mm_shufflelo_epi16(lhs, 119); break;
- case 120: lhs = _mm_shufflelo_epi16(lhs, 120); break;
- case 121: lhs = _mm_shufflelo_epi16(lhs, 121); break;
- case 122: lhs = _mm_shufflelo_epi16(lhs, 122); break;
- case 123: lhs = _mm_shufflelo_epi16(lhs, 123); break;
- case 124: lhs = _mm_shufflelo_epi16(lhs, 124); break;
- case 125: lhs = _mm_shufflelo_epi16(lhs, 125); break;
- case 126: lhs = _mm_shufflelo_epi16(lhs, 126); break;
- case 127: lhs = _mm_shufflelo_epi16(lhs, 127); break;
- case 128: lhs = _mm_shufflelo_epi16(lhs, 128); break;
- case 129: lhs = _mm_shufflelo_epi16(lhs, 129); break;
- case 130: lhs = _mm_shufflelo_epi16(lhs, 130); break;
- case 131: lhs = _mm_shufflelo_epi16(lhs, 131); break;
- case 132: lhs = _mm_shufflelo_epi16(lhs, 132); break;
- case 133: lhs = _mm_shufflelo_epi16(lhs, 133); break;
- case 134: lhs = _mm_shufflelo_epi16(lhs, 134); break;
- case 135: lhs = _mm_shufflelo_epi16(lhs, 135); break;
- case 136: lhs = _mm_shufflelo_epi16(lhs, 136); break;
- case 137: lhs = _mm_shufflelo_epi16(lhs, 137); break;
- case 138: lhs = _mm_shufflelo_epi16(lhs, 138); break;
- case 139: lhs = _mm_shufflelo_epi16(lhs, 139); break;
- case 140: lhs = _mm_shufflelo_epi16(lhs, 140); break;
- case 141: lhs = _mm_shufflelo_epi16(lhs, 141); break;
- case 142: lhs = _mm_shufflelo_epi16(lhs, 142); break;
- case 143: lhs = _mm_shufflelo_epi16(lhs, 143); break;
- case 144: lhs = _mm_shufflelo_epi16(lhs, 144); break;
- case 145: lhs = _mm_shufflelo_epi16(lhs, 145); break;
- case 146: lhs = _mm_shufflelo_epi16(lhs, 146); break;
- case 147: lhs = _mm_shufflelo_epi16(lhs, 147); break;
- case 148: lhs = _mm_shufflelo_epi16(lhs, 148); break;
- case 149: lhs = _mm_shufflelo_epi16(lhs, 149); break;
- case 150: lhs = _mm_shufflelo_epi16(lhs, 150); break;
- case 151: lhs = _mm_shufflelo_epi16(lhs, 151); break;
- case 152: lhs = _mm_shufflelo_epi16(lhs, 152); break;
- case 153: lhs = _mm_shufflelo_epi16(lhs, 153); break;
- case 154: lhs = _mm_shufflelo_epi16(lhs, 154); break;
- case 155: lhs = _mm_shufflelo_epi16(lhs, 155); break;
- case 156: lhs = _mm_shufflelo_epi16(lhs, 156); break;
- case 157: lhs = _mm_shufflelo_epi16(lhs, 157); break;
- case 158: lhs = _mm_shufflelo_epi16(lhs, 158); break;
- case 159: lhs = _mm_shufflelo_epi16(lhs, 159); break;
- case 160: lhs = _mm_shufflelo_epi16(lhs, 160); break;
- case 161: lhs = _mm_shufflelo_epi16(lhs, 161); break;
- case 162: lhs = _mm_shufflelo_epi16(lhs, 162); break;
- case 163: lhs = _mm_shufflelo_epi16(lhs, 163); break;
- case 164: lhs = _mm_shufflelo_epi16(lhs, 164); break;
- case 165: lhs = _mm_shufflelo_epi16(lhs, 165); break;
- case 166: lhs = _mm_shufflelo_epi16(lhs, 166); break;
- case 167: lhs = _mm_shufflelo_epi16(lhs, 167); break;
- case 168: lhs = _mm_shufflelo_epi16(lhs, 168); break;
- case 169: lhs = _mm_shufflelo_epi16(lhs, 169); break;
- case 170: lhs = _mm_shufflelo_epi16(lhs, 170); break;
- case 171: lhs = _mm_shufflelo_epi16(lhs, 171); break;
- case 172: lhs = _mm_shufflelo_epi16(lhs, 172); break;
- case 173: lhs = _mm_shufflelo_epi16(lhs, 173); break;
- case 174: lhs = _mm_shufflelo_epi16(lhs, 174); break;
- case 175: lhs = _mm_shufflelo_epi16(lhs, 175); break;
- case 176: lhs = _mm_shufflelo_epi16(lhs, 176); break;
- case 177: lhs = _mm_shufflelo_epi16(lhs, 177); break;
- case 178: lhs = _mm_shufflelo_epi16(lhs, 178); break;
- case 179: lhs = _mm_shufflelo_epi16(lhs, 179); break;
- case 180: lhs = _mm_shufflelo_epi16(lhs, 180); break;
- case 181: lhs = _mm_shufflelo_epi16(lhs, 181); break;
- case 182: lhs = _mm_shufflelo_epi16(lhs, 182); break;
- case 183: lhs = _mm_shufflelo_epi16(lhs, 183); break;
- case 184: lhs = _mm_shufflelo_epi16(lhs, 184); break;
- case 185: lhs = _mm_shufflelo_epi16(lhs, 185); break;
- case 186: lhs = _mm_shufflelo_epi16(lhs, 186); break;
- case 187: lhs = _mm_shufflelo_epi16(lhs, 187); break;
- case 188: lhs = _mm_shufflelo_epi16(lhs, 188); break;
- case 189: lhs = _mm_shufflelo_epi16(lhs, 189); break;
- case 190: lhs = _mm_shufflelo_epi16(lhs, 190); break;
- case 191: lhs = _mm_shufflelo_epi16(lhs, 191); break;
- case 192: lhs = _mm_shufflelo_epi16(lhs, 192); break;
- case 193: lhs = _mm_shufflelo_epi16(lhs, 193); break;
- case 194: lhs = _mm_shufflelo_epi16(lhs, 194); break;
- case 195: lhs = _mm_shufflelo_epi16(lhs, 195); break;
- case 196: lhs = _mm_shufflelo_epi16(lhs, 196); break;
- case 197: lhs = _mm_shufflelo_epi16(lhs, 197); break;
- case 198: lhs = _mm_shufflelo_epi16(lhs, 198); break;
- case 199: lhs = _mm_shufflelo_epi16(lhs, 199); break;
- case 200: lhs = _mm_shufflelo_epi16(lhs, 200); break;
- case 201: lhs = _mm_shufflelo_epi16(lhs, 201); break;
- case 202: lhs = _mm_shufflelo_epi16(lhs, 202); break;
- case 203: lhs = _mm_shufflelo_epi16(lhs, 203); break;
- case 204: lhs = _mm_shufflelo_epi16(lhs, 204); break;
- case 205: lhs = _mm_shufflelo_epi16(lhs, 205); break;
- case 206: lhs = _mm_shufflelo_epi16(lhs, 206); break;
- case 207: lhs = _mm_shufflelo_epi16(lhs, 207); break;
- case 208: lhs = _mm_shufflelo_epi16(lhs, 208); break;
- case 209: lhs = _mm_shufflelo_epi16(lhs, 209); break;
- case 210: lhs = _mm_shufflelo_epi16(lhs, 210); break;
- case 211: lhs = _mm_shufflelo_epi16(lhs, 211); break;
- case 212: lhs = _mm_shufflelo_epi16(lhs, 212); break;
- case 213: lhs = _mm_shufflelo_epi16(lhs, 213); break;
- case 214: lhs = _mm_shufflelo_epi16(lhs, 214); break;
- case 215: lhs = _mm_shufflelo_epi16(lhs, 215); break;
- case 216: lhs = _mm_shufflelo_epi16(lhs, 216); break;
- case 217: lhs = _mm_shufflelo_epi16(lhs, 217); break;
- case 218: lhs = _mm_shufflelo_epi16(lhs, 218); break;
- case 219: lhs = _mm_shufflelo_epi16(lhs, 219); break;
- case 220: lhs = _mm_shufflelo_epi16(lhs, 220); break;
- case 221: lhs = _mm_shufflelo_epi16(lhs, 221); break;
- case 222: lhs = _mm_shufflelo_epi16(lhs, 222); break;
- case 223: lhs = _mm_shufflelo_epi16(lhs, 223); break;
- case 224: lhs = _mm_shufflelo_epi16(lhs, 224); break;
- case 225: lhs = _mm_shufflelo_epi16(lhs, 225); break;
- case 226: lhs = _mm_shufflelo_epi16(lhs, 226); break;
- case 227: lhs = _mm_shufflelo_epi16(lhs, 227); break;
- case 228: lhs = _mm_shufflelo_epi16(lhs, 228); break;
- case 229: lhs = _mm_shufflelo_epi16(lhs, 229); break;
- case 230: lhs = _mm_shufflelo_epi16(lhs, 230); break;
- case 231: lhs = _mm_shufflelo_epi16(lhs, 231); break;
- case 232: lhs = _mm_shufflelo_epi16(lhs, 232); break;
- case 233: lhs = _mm_shufflelo_epi16(lhs, 233); break;
- case 234: lhs = _mm_shufflelo_epi16(lhs, 234); break;
- case 235: lhs = _mm_shufflelo_epi16(lhs, 235); break;
- case 236: lhs = _mm_shufflelo_epi16(lhs, 236); break;
- case 237: lhs = _mm_shufflelo_epi16(lhs, 237); break;
- case 238: lhs = _mm_shufflelo_epi16(lhs, 238); break;
- case 239: lhs = _mm_shufflelo_epi16(lhs, 239); break;
- case 240: lhs = _mm_shufflelo_epi16(lhs, 240); break;
- case 241: lhs = _mm_shufflelo_epi16(lhs, 241); break;
- case 242: lhs = _mm_shufflelo_epi16(lhs, 242); break;
- case 243: lhs = _mm_shufflelo_epi16(lhs, 243); break;
- case 244: lhs = _mm_shufflelo_epi16(lhs, 244); break;
- case 245: lhs = _mm_shufflelo_epi16(lhs, 245); break;
- case 246: lhs = _mm_shufflelo_epi16(lhs, 246); break;
- case 247: lhs = _mm_shufflelo_epi16(lhs, 247); break;
- case 248: lhs = _mm_shufflelo_epi16(lhs, 248); break;
- case 249: lhs = _mm_shufflelo_epi16(lhs, 249); break;
- case 250: lhs = _mm_shufflelo_epi16(lhs, 250); break;
- case 251: lhs = _mm_shufflelo_epi16(lhs, 251); break;
- case 252: lhs = _mm_shufflelo_epi16(lhs, 252); break;
- case 253: lhs = _mm_shufflelo_epi16(lhs, 253); break;
- case 254: lhs = _mm_shufflelo_epi16(lhs, 254); break;
- case 255: lhs = _mm_shufflelo_epi16(lhs, 255); break;
- }
-
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ __m128i lhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
+
+ switch(_Imm)
+ {
+ case 0: lhs = _mm_shufflelo_epi16(lhs, 0); break;
+ case 1: lhs = _mm_shufflelo_epi16(lhs, 1); break;
+ case 2: lhs = _mm_shufflelo_epi16(lhs, 2); break;
+ case 3: lhs = _mm_shufflelo_epi16(lhs, 3); break;
+ case 4: lhs = _mm_shufflelo_epi16(lhs, 4); break;
+ case 5: lhs = _mm_shufflelo_epi16(lhs, 5); break;
+ case 6: lhs = _mm_shufflelo_epi16(lhs, 6); break;
+ case 7: lhs = _mm_shufflelo_epi16(lhs, 7); break;
+ case 8: lhs = _mm_shufflelo_epi16(lhs, 8); break;
+ case 9: lhs = _mm_shufflelo_epi16(lhs, 9); break;
+ case 10: lhs = _mm_shufflelo_epi16(lhs, 10); break;
+ case 11: lhs = _mm_shufflelo_epi16(lhs, 11); break;
+ case 12: lhs = _mm_shufflelo_epi16(lhs, 12); break;
+ case 13: lhs = _mm_shufflelo_epi16(lhs, 13); break;
+ case 14: lhs = _mm_shufflelo_epi16(lhs, 14); break;
+ case 15: lhs = _mm_shufflelo_epi16(lhs, 15); break;
+ case 16: lhs = _mm_shufflelo_epi16(lhs, 16); break;
+ case 17: lhs = _mm_shufflelo_epi16(lhs, 17); break;
+ case 18: lhs = _mm_shufflelo_epi16(lhs, 18); break;
+ case 19: lhs = _mm_shufflelo_epi16(lhs, 19); break;
+ case 20: lhs = _mm_shufflelo_epi16(lhs, 20); break;
+ case 21: lhs = _mm_shufflelo_epi16(lhs, 21); break;
+ case 22: lhs = _mm_shufflelo_epi16(lhs, 22); break;
+ case 23: lhs = _mm_shufflelo_epi16(lhs, 23); break;
+ case 24: lhs = _mm_shufflelo_epi16(lhs, 24); break;
+ case 25: lhs = _mm_shufflelo_epi16(lhs, 25); break;
+ case 26: lhs = _mm_shufflelo_epi16(lhs, 26); break;
+ case 27: lhs = _mm_shufflelo_epi16(lhs, 27); break;
+ case 28: lhs = _mm_shufflelo_epi16(lhs, 28); break;
+ case 29: lhs = _mm_shufflelo_epi16(lhs, 29); break;
+ case 30: lhs = _mm_shufflelo_epi16(lhs, 30); break;
+ case 31: lhs = _mm_shufflelo_epi16(lhs, 31); break;
+ case 32: lhs = _mm_shufflelo_epi16(lhs, 32); break;
+ case 33: lhs = _mm_shufflelo_epi16(lhs, 33); break;
+ case 34: lhs = _mm_shufflelo_epi16(lhs, 34); break;
+ case 35: lhs = _mm_shufflelo_epi16(lhs, 35); break;
+ case 36: lhs = _mm_shufflelo_epi16(lhs, 36); break;
+ case 37: lhs = _mm_shufflelo_epi16(lhs, 37); break;
+ case 38: lhs = _mm_shufflelo_epi16(lhs, 38); break;
+ case 39: lhs = _mm_shufflelo_epi16(lhs, 39); break;
+ case 40: lhs = _mm_shufflelo_epi16(lhs, 40); break;
+ case 41: lhs = _mm_shufflelo_epi16(lhs, 41); break;
+ case 42: lhs = _mm_shufflelo_epi16(lhs, 42); break;
+ case 43: lhs = _mm_shufflelo_epi16(lhs, 43); break;
+ case 44: lhs = _mm_shufflelo_epi16(lhs, 44); break;
+ case 45: lhs = _mm_shufflelo_epi16(lhs, 45); break;
+ case 46: lhs = _mm_shufflelo_epi16(lhs, 46); break;
+ case 47: lhs = _mm_shufflelo_epi16(lhs, 47); break;
+ case 48: lhs = _mm_shufflelo_epi16(lhs, 48); break;
+ case 49: lhs = _mm_shufflelo_epi16(lhs, 49); break;
+ case 50: lhs = _mm_shufflelo_epi16(lhs, 50); break;
+ case 51: lhs = _mm_shufflelo_epi16(lhs, 51); break;
+ case 52: lhs = _mm_shufflelo_epi16(lhs, 52); break;
+ case 53: lhs = _mm_shufflelo_epi16(lhs, 53); break;
+ case 54: lhs = _mm_shufflelo_epi16(lhs, 54); break;
+ case 55: lhs = _mm_shufflelo_epi16(lhs, 55); break;
+ case 56: lhs = _mm_shufflelo_epi16(lhs, 56); break;
+ case 57: lhs = _mm_shufflelo_epi16(lhs, 57); break;
+ case 58: lhs = _mm_shufflelo_epi16(lhs, 58); break;
+ case 59: lhs = _mm_shufflelo_epi16(lhs, 59); break;
+ case 60: lhs = _mm_shufflelo_epi16(lhs, 60); break;
+ case 61: lhs = _mm_shufflelo_epi16(lhs, 61); break;
+ case 62: lhs = _mm_shufflelo_epi16(lhs, 62); break;
+ case 63: lhs = _mm_shufflelo_epi16(lhs, 63); break;
+ case 64: lhs = _mm_shufflelo_epi16(lhs, 64); break;
+ case 65: lhs = _mm_shufflelo_epi16(lhs, 65); break;
+ case 66: lhs = _mm_shufflelo_epi16(lhs, 66); break;
+ case 67: lhs = _mm_shufflelo_epi16(lhs, 67); break;
+ case 68: lhs = _mm_shufflelo_epi16(lhs, 68); break;
+ case 69: lhs = _mm_shufflelo_epi16(lhs, 69); break;
+ case 70: lhs = _mm_shufflelo_epi16(lhs, 70); break;
+ case 71: lhs = _mm_shufflelo_epi16(lhs, 71); break;
+ case 72: lhs = _mm_shufflelo_epi16(lhs, 72); break;
+ case 73: lhs = _mm_shufflelo_epi16(lhs, 73); break;
+ case 74: lhs = _mm_shufflelo_epi16(lhs, 74); break;
+ case 75: lhs = _mm_shufflelo_epi16(lhs, 75); break;
+ case 76: lhs = _mm_shufflelo_epi16(lhs, 76); break;
+ case 77: lhs = _mm_shufflelo_epi16(lhs, 77); break;
+ case 78: lhs = _mm_shufflelo_epi16(lhs, 78); break;
+ case 79: lhs = _mm_shufflelo_epi16(lhs, 79); break;
+ case 80: lhs = _mm_shufflelo_epi16(lhs, 80); break;
+ case 81: lhs = _mm_shufflelo_epi16(lhs, 81); break;
+ case 82: lhs = _mm_shufflelo_epi16(lhs, 82); break;
+ case 83: lhs = _mm_shufflelo_epi16(lhs, 83); break;
+ case 84: lhs = _mm_shufflelo_epi16(lhs, 84); break;
+ case 85: lhs = _mm_shufflelo_epi16(lhs, 85); break;
+ case 86: lhs = _mm_shufflelo_epi16(lhs, 86); break;
+ case 87: lhs = _mm_shufflelo_epi16(lhs, 87); break;
+ case 88: lhs = _mm_shufflelo_epi16(lhs, 88); break;
+ case 89: lhs = _mm_shufflelo_epi16(lhs, 89); break;
+ case 90: lhs = _mm_shufflelo_epi16(lhs, 90); break;
+ case 91: lhs = _mm_shufflelo_epi16(lhs, 91); break;
+ case 92: lhs = _mm_shufflelo_epi16(lhs, 92); break;
+ case 93: lhs = _mm_shufflelo_epi16(lhs, 93); break;
+ case 94: lhs = _mm_shufflelo_epi16(lhs, 94); break;
+ case 95: lhs = _mm_shufflelo_epi16(lhs, 95); break;
+ case 96: lhs = _mm_shufflelo_epi16(lhs, 96); break;
+ case 97: lhs = _mm_shufflelo_epi16(lhs, 97); break;
+ case 98: lhs = _mm_shufflelo_epi16(lhs, 98); break;
+ case 99: lhs = _mm_shufflelo_epi16(lhs, 99); break;
+ case 100: lhs = _mm_shufflelo_epi16(lhs, 100); break;
+ case 101: lhs = _mm_shufflelo_epi16(lhs, 101); break;
+ case 102: lhs = _mm_shufflelo_epi16(lhs, 102); break;
+ case 103: lhs = _mm_shufflelo_epi16(lhs, 103); break;
+ case 104: lhs = _mm_shufflelo_epi16(lhs, 104); break;
+ case 105: lhs = _mm_shufflelo_epi16(lhs, 105); break;
+ case 106: lhs = _mm_shufflelo_epi16(lhs, 106); break;
+ case 107: lhs = _mm_shufflelo_epi16(lhs, 107); break;
+ case 108: lhs = _mm_shufflelo_epi16(lhs, 108); break;
+ case 109: lhs = _mm_shufflelo_epi16(lhs, 109); break;
+ case 110: lhs = _mm_shufflelo_epi16(lhs, 110); break;
+ case 111: lhs = _mm_shufflelo_epi16(lhs, 111); break;
+ case 112: lhs = _mm_shufflelo_epi16(lhs, 112); break;
+ case 113: lhs = _mm_shufflelo_epi16(lhs, 113); break;
+ case 114: lhs = _mm_shufflelo_epi16(lhs, 114); break;
+ case 115: lhs = _mm_shufflelo_epi16(lhs, 115); break;
+ case 116: lhs = _mm_shufflelo_epi16(lhs, 116); break;
+ case 117: lhs = _mm_shufflelo_epi16(lhs, 117); break;
+ case 118: lhs = _mm_shufflelo_epi16(lhs, 118); break;
+ case 119: lhs = _mm_shufflelo_epi16(lhs, 119); break;
+ case 120: lhs = _mm_shufflelo_epi16(lhs, 120); break;
+ case 121: lhs = _mm_shufflelo_epi16(lhs, 121); break;
+ case 122: lhs = _mm_shufflelo_epi16(lhs, 122); break;
+ case 123: lhs = _mm_shufflelo_epi16(lhs, 123); break;
+ case 124: lhs = _mm_shufflelo_epi16(lhs, 124); break;
+ case 125: lhs = _mm_shufflelo_epi16(lhs, 125); break;
+ case 126: lhs = _mm_shufflelo_epi16(lhs, 126); break;
+ case 127: lhs = _mm_shufflelo_epi16(lhs, 127); break;
+ case 128: lhs = _mm_shufflelo_epi16(lhs, 128); break;
+ case 129: lhs = _mm_shufflelo_epi16(lhs, 129); break;
+ case 130: lhs = _mm_shufflelo_epi16(lhs, 130); break;
+ case 131: lhs = _mm_shufflelo_epi16(lhs, 131); break;
+ case 132: lhs = _mm_shufflelo_epi16(lhs, 132); break;
+ case 133: lhs = _mm_shufflelo_epi16(lhs, 133); break;
+ case 134: lhs = _mm_shufflelo_epi16(lhs, 134); break;
+ case 135: lhs = _mm_shufflelo_epi16(lhs, 135); break;
+ case 136: lhs = _mm_shufflelo_epi16(lhs, 136); break;
+ case 137: lhs = _mm_shufflelo_epi16(lhs, 137); break;
+ case 138: lhs = _mm_shufflelo_epi16(lhs, 138); break;
+ case 139: lhs = _mm_shufflelo_epi16(lhs, 139); break;
+ case 140: lhs = _mm_shufflelo_epi16(lhs, 140); break;
+ case 141: lhs = _mm_shufflelo_epi16(lhs, 141); break;
+ case 142: lhs = _mm_shufflelo_epi16(lhs, 142); break;
+ case 143: lhs = _mm_shufflelo_epi16(lhs, 143); break;
+ case 144: lhs = _mm_shufflelo_epi16(lhs, 144); break;
+ case 145: lhs = _mm_shufflelo_epi16(lhs, 145); break;
+ case 146: lhs = _mm_shufflelo_epi16(lhs, 146); break;
+ case 147: lhs = _mm_shufflelo_epi16(lhs, 147); break;
+ case 148: lhs = _mm_shufflelo_epi16(lhs, 148); break;
+ case 149: lhs = _mm_shufflelo_epi16(lhs, 149); break;
+ case 150: lhs = _mm_shufflelo_epi16(lhs, 150); break;
+ case 151: lhs = _mm_shufflelo_epi16(lhs, 151); break;
+ case 152: lhs = _mm_shufflelo_epi16(lhs, 152); break;
+ case 153: lhs = _mm_shufflelo_epi16(lhs, 153); break;
+ case 154: lhs = _mm_shufflelo_epi16(lhs, 154); break;
+ case 155: lhs = _mm_shufflelo_epi16(lhs, 155); break;
+ case 156: lhs = _mm_shufflelo_epi16(lhs, 156); break;
+ case 157: lhs = _mm_shufflelo_epi16(lhs, 157); break;
+ case 158: lhs = _mm_shufflelo_epi16(lhs, 158); break;
+ case 159: lhs = _mm_shufflelo_epi16(lhs, 159); break;
+ case 160: lhs = _mm_shufflelo_epi16(lhs, 160); break;
+ case 161: lhs = _mm_shufflelo_epi16(lhs, 161); break;
+ case 162: lhs = _mm_shufflelo_epi16(lhs, 162); break;
+ case 163: lhs = _mm_shufflelo_epi16(lhs, 163); break;
+ case 164: lhs = _mm_shufflelo_epi16(lhs, 164); break;
+ case 165: lhs = _mm_shufflelo_epi16(lhs, 165); break;
+ case 166: lhs = _mm_shufflelo_epi16(lhs, 166); break;
+ case 167: lhs = _mm_shufflelo_epi16(lhs, 167); break;
+ case 168: lhs = _mm_shufflelo_epi16(lhs, 168); break;
+ case 169: lhs = _mm_shufflelo_epi16(lhs, 169); break;
+ case 170: lhs = _mm_shufflelo_epi16(lhs, 170); break;
+ case 171: lhs = _mm_shufflelo_epi16(lhs, 171); break;
+ case 172: lhs = _mm_shufflelo_epi16(lhs, 172); break;
+ case 173: lhs = _mm_shufflelo_epi16(lhs, 173); break;
+ case 174: lhs = _mm_shufflelo_epi16(lhs, 174); break;
+ case 175: lhs = _mm_shufflelo_epi16(lhs, 175); break;
+ case 176: lhs = _mm_shufflelo_epi16(lhs, 176); break;
+ case 177: lhs = _mm_shufflelo_epi16(lhs, 177); break;
+ case 178: lhs = _mm_shufflelo_epi16(lhs, 178); break;
+ case 179: lhs = _mm_shufflelo_epi16(lhs, 179); break;
+ case 180: lhs = _mm_shufflelo_epi16(lhs, 180); break;
+ case 181: lhs = _mm_shufflelo_epi16(lhs, 181); break;
+ case 182: lhs = _mm_shufflelo_epi16(lhs, 182); break;
+ case 183: lhs = _mm_shufflelo_epi16(lhs, 183); break;
+ case 184: lhs = _mm_shufflelo_epi16(lhs, 184); break;
+ case 185: lhs = _mm_shufflelo_epi16(lhs, 185); break;
+ case 186: lhs = _mm_shufflelo_epi16(lhs, 186); break;
+ case 187: lhs = _mm_shufflelo_epi16(lhs, 187); break;
+ case 188: lhs = _mm_shufflelo_epi16(lhs, 188); break;
+ case 189: lhs = _mm_shufflelo_epi16(lhs, 189); break;
+ case 190: lhs = _mm_shufflelo_epi16(lhs, 190); break;
+ case 191: lhs = _mm_shufflelo_epi16(lhs, 191); break;
+ case 192: lhs = _mm_shufflelo_epi16(lhs, 192); break;
+ case 193: lhs = _mm_shufflelo_epi16(lhs, 193); break;
+ case 194: lhs = _mm_shufflelo_epi16(lhs, 194); break;
+ case 195: lhs = _mm_shufflelo_epi16(lhs, 195); break;
+ case 196: lhs = _mm_shufflelo_epi16(lhs, 196); break;
+ case 197: lhs = _mm_shufflelo_epi16(lhs, 197); break;
+ case 198: lhs = _mm_shufflelo_epi16(lhs, 198); break;
+ case 199: lhs = _mm_shufflelo_epi16(lhs, 199); break;
+ case 200: lhs = _mm_shufflelo_epi16(lhs, 200); break;
+ case 201: lhs = _mm_shufflelo_epi16(lhs, 201); break;
+ case 202: lhs = _mm_shufflelo_epi16(lhs, 202); break;
+ case 203: lhs = _mm_shufflelo_epi16(lhs, 203); break;
+ case 204: lhs = _mm_shufflelo_epi16(lhs, 204); break;
+ case 205: lhs = _mm_shufflelo_epi16(lhs, 205); break;
+ case 206: lhs = _mm_shufflelo_epi16(lhs, 206); break;
+ case 207: lhs = _mm_shufflelo_epi16(lhs, 207); break;
+ case 208: lhs = _mm_shufflelo_epi16(lhs, 208); break;
+ case 209: lhs = _mm_shufflelo_epi16(lhs, 209); break;
+ case 210: lhs = _mm_shufflelo_epi16(lhs, 210); break;
+ case 211: lhs = _mm_shufflelo_epi16(lhs, 211); break;
+ case 212: lhs = _mm_shufflelo_epi16(lhs, 212); break;
+ case 213: lhs = _mm_shufflelo_epi16(lhs, 213); break;
+ case 214: lhs = _mm_shufflelo_epi16(lhs, 214); break;
+ case 215: lhs = _mm_shufflelo_epi16(lhs, 215); break;
+ case 216: lhs = _mm_shufflelo_epi16(lhs, 216); break;
+ case 217: lhs = _mm_shufflelo_epi16(lhs, 217); break;
+ case 218: lhs = _mm_shufflelo_epi16(lhs, 218); break;
+ case 219: lhs = _mm_shufflelo_epi16(lhs, 219); break;
+ case 220: lhs = _mm_shufflelo_epi16(lhs, 220); break;
+ case 221: lhs = _mm_shufflelo_epi16(lhs, 221); break;
+ case 222: lhs = _mm_shufflelo_epi16(lhs, 222); break;
+ case 223: lhs = _mm_shufflelo_epi16(lhs, 223); break;
+ case 224: lhs = _mm_shufflelo_epi16(lhs, 224); break;
+ case 225: lhs = _mm_shufflelo_epi16(lhs, 225); break;
+ case 226: lhs = _mm_shufflelo_epi16(lhs, 226); break;
+ case 227: lhs = _mm_shufflelo_epi16(lhs, 227); break;
+ case 228: lhs = _mm_shufflelo_epi16(lhs, 228); break;
+ case 229: lhs = _mm_shufflelo_epi16(lhs, 229); break;
+ case 230: lhs = _mm_shufflelo_epi16(lhs, 230); break;
+ case 231: lhs = _mm_shufflelo_epi16(lhs, 231); break;
+ case 232: lhs = _mm_shufflelo_epi16(lhs, 232); break;
+ case 233: lhs = _mm_shufflelo_epi16(lhs, 233); break;
+ case 234: lhs = _mm_shufflelo_epi16(lhs, 234); break;
+ case 235: lhs = _mm_shufflelo_epi16(lhs, 235); break;
+ case 236: lhs = _mm_shufflelo_epi16(lhs, 236); break;
+ case 237: lhs = _mm_shufflelo_epi16(lhs, 237); break;
+ case 238: lhs = _mm_shufflelo_epi16(lhs, 238); break;
+ case 239: lhs = _mm_shufflelo_epi16(lhs, 239); break;
+ case 240: lhs = _mm_shufflelo_epi16(lhs, 240); break;
+ case 241: lhs = _mm_shufflelo_epi16(lhs, 241); break;
+ case 242: lhs = _mm_shufflelo_epi16(lhs, 242); break;
+ case 243: lhs = _mm_shufflelo_epi16(lhs, 243); break;
+ case 244: lhs = _mm_shufflelo_epi16(lhs, 244); break;
+ case 245: lhs = _mm_shufflelo_epi16(lhs, 245); break;
+ case 246: lhs = _mm_shufflelo_epi16(lhs, 246); break;
+ case 247: lhs = _mm_shufflelo_epi16(lhs, 247); break;
+ case 248: lhs = _mm_shufflelo_epi16(lhs, 248); break;
+ case 249: lhs = _mm_shufflelo_epi16(lhs, 249); break;
+ case 250: lhs = _mm_shufflelo_epi16(lhs, 250); break;
+ case 251: lhs = _mm_shufflelo_epi16(lhs, 251); break;
+ case 252: lhs = _mm_shufflelo_epi16(lhs, 252); break;
+ case 253: lhs = _mm_shufflelo_epi16(lhs, 253); break;
+ case 254: lhs = _mm_shufflelo_epi16(lhs, 254); break;
+ case 255: lhs = _mm_shufflelo_epi16(lhs, 255); break;
+ }
+
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pmulhuw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_mulhi_epu16(lhs, rhs);
+ lhs = _mm_mulhi_epu16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_psadbw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_sad_epu8(lhs, rhs);
+ lhs = _mm_sad_epu8(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pminsw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_min_epi16(lhs, rhs);
+ lhs = _mm_min_epi16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
__m64 _m_pmaxsw(__m64 _MM1, __m64 _MM2)
{
- __m128i lhs = {0}, rhs = {0};
- lhs.m128i_i64[0] = _MM1.m64_i64;
+ __m128i lhs = {0}, rhs = {0};
+ lhs.m128i_i64[0] = _MM1.m64_i64;
- rhs.m128i_i64[0] = _MM2.m64_i64;
+ rhs.m128i_i64[0] = _MM2.m64_i64;
- lhs = _mm_max_epi16(lhs, rhs);
+ lhs = _mm_max_epi16(lhs, rhs);
- _MM1.m64_i64 = lhs.m128i_i64[0];
- return _MM1;
+ _MM1.m64_i64 = lhs.m128i_i64[0];
+ return _MM1;
}
void _mm_stream_pi(__m64* _MM1, __m64 _MM2)
{
- _mm_stream_si32(&(_MM1->m64_i32[0]), _MM2.m64_i32[0]);
- _mm_stream_si32(&(_MM1->m64_i32[1]), _MM2.m64_i32[1]);
+ _mm_stream_si32(&(_MM1->m64_i32[0]), _MM2.m64_i32[0]);
+ _mm_stream_si32(&(_MM1->m64_i32[1]), _MM2.m64_i32[1]);
}
__m64 _mm_cvt_ps2pi(__m128 _A)
{
- __m64 rv = {0};
- rv.m64_i32[0] = (int)_A.m128_f32[0];
- rv.m64_i32[1] = (int)_A.m128_f32[1];
- return rv;
+ __m64 rv = {0};
+ rv.m64_i32[0] = (int)_A.m128_f32[0];
+ rv.m64_i32[1] = (int)_A.m128_f32[1];
+ return rv;
}
__m128 _mm_cvt_pi2ps(__m128 _MM1, __m64 _MM2)
{
- _MM1.m128_f32[0] = (float)_MM2.m64_i32[0];
- _MM1.m128_f32[1] = (float)_MM2.m64_i32[1];
- return _MM1;
+ _MM1.m128_f32[0] = (float)_MM2.m64_i32[0];
+ _MM1.m128_f32[1] = (float)_MM2.m64_i32[1];
+ return _MM1;
}
// SSE2
__m64 _mm_movepi64_pi64(__m128i _Q)
{
- __m64 rv = {0};
- rv.m64_i64 = _Q.m128i_i64[0];
- return rv;
+ __m64 rv = {0};
+ rv.m64_i64 = _Q.m128i_i64[0];
+ return rv;
}
diff --git a/src/DSUtil/simd.h b/src/DSUtil/simd.h
index 0d6d68a29..a7c8a5bd4 100644
--- a/src/DSUtil/simd.h
+++ b/src/DSUtil/simd.h
@@ -70,6 +70,7 @@ static __forceinline void cvtps2pi(__m64 &dst,const __m128 &src) {dst=_mm_cvtps_
static __forceinline void cmpnltps(__m128 &dst,const __m128 &src) {dst=_mm_cmpnlt_ps(dst,src);}
static __forceinline void cvtpi2ps(__m128 &dst,const __m64 &src) {dst=_mm_cvtpi32_ps(dst,src);}
+#ifdef __SSE2__
static __forceinline void movq(__m128i &dst,const __m128i &src) {dst=src;}
static __forceinline void movq(__m128i &dst,const void *src) {dst=*(__m128i*)src;}
static __forceinline void movq(const void *dst,__m128i &src) {*(__m128i*)dst=src;}
@@ -132,6 +133,8 @@ static __forceinline void movhpd(void *dst,const __m128d &src) {_mm_storeh_pd((d
static __forceinline void movlhps(__m128i &dst,const __m128i &src) {(__m128&)dst=_mm_movelh_ps((__m128&)dst,(const __m128&)src);}
#endif
+#endif //__SSE2__
+
//======================================= MMX ======================================
#define MMX_INSTRUCTIONS \
static __forceinline __m setzero_si64(void) {return _mm_setzero_si64();} \
@@ -495,6 +498,7 @@ static __forceinline void memadd(unsigned char *dst,const unsigned char *src,uns
}
//====================================== SSE2 ======================================
+#ifdef __SSE2__
struct Tsse2
{
typedef __m128i __m;
@@ -552,6 +556,7 @@ struct Tsse2
static __forceinline void pavgb(__m &mmr1,const void *mmr2) {mmr1=_mm_avg_epu8(mmr1,*(__m*)mmr2);}
static __forceinline void sfence(void) {_mm_sfence();}
};
+#endif //__SSE2__
template<class _mm> static __forceinline typename _mm::__m abs_16(const typename _mm::__m &mm0)
{
diff --git a/src/DSUtil/simd_common.h b/src/DSUtil/simd_common.h
index 0b5e307e6..50be07c44 100644
--- a/src/DSUtil/simd_common.h
+++ b/src/DSUtil/simd_common.h
@@ -1,10 +1,14 @@
#ifndef _SIMD_COMMON_H_
#define _SIMD_COMMON_H_
+#if !defined(__GNUC__)
+ #define __SSE2__
+#endif
+
#ifdef __GNUC__
- #define __forceinline __attribute__((__always_inline__)) inline
-#else
- #define inline __forceinline
+ #ifndef __forceinline
+ #define __forceinline __attribute__((__always_inline__)) inline
+ #endif
#endif
#ifdef __GNUC__
@@ -13,7 +17,9 @@
#include <mmintrin.h>
#include <xmmintrin.h>
-#include <emmintrin.h>
+#ifdef __SSE2__
+ #include <emmintrin.h>
+#endif
#ifdef __GNUC__
#undef __inline
diff --git a/src/DSUtil/simd_instructions.h b/src/DSUtil/simd_instructions.h
index 6ce9a2b29..e5098c731 100644
--- a/src/DSUtil/simd_instructions.h
+++ b/src/DSUtil/simd_instructions.h
@@ -52,6 +52,8 @@ MMX_INSTRUCTION(pavgb,_mm_avg_pu8)
MMX_INSTRUCTION(psadbw,_mm_sad_pu8)
+#ifdef __SSE2__
+
SSE2I_INSTRUCTION(pand,_mm_and_si128)
SSE2I_INSTRUCTION(por,_mm_or_si128)
SSE2I_INSTRUCTION(pxor,_mm_xor_si128)
@@ -81,3 +83,5 @@ SSE2I_INSTRUCTION(pcmpeqb,_mm_cmpeq_epi8)
SSE2I_INSTRUCTION(pcmpeqw,_mm_cmpeq_epi16)
SSE2I_INSTRUCTION(pcmpgtb,_mm_cmpgt_epi8)
SSE2I_INSTRUCTION(pcmpgtw,_mm_cmpgt_epi16)
+
+#endif \ No newline at end of file