Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMateusz Chudyk <mateuszchudyk@gmail.com>2019-08-24 14:14:00 +0300
committerMateusz Chudyk <mateuszchudyk@gmail.com>2019-08-30 19:36:19 +0300
commit41dbd6bf88522fd30f0d92b4fe36f89f67898edf (patch)
tree225e3efe76f8e5f03cd17b700236321e2d45c5b9
parent030653bd7cbb9f748abed76801c4fe91fae5c0e1 (diff)
Remove shuffle_epi32 intrinsics
-rw-r--r--intrinsics.h9
-rw-r--r--kernels/implementations.inl2
2 files changed, 1 insertions, 10 deletions
diff --git a/intrinsics.h b/intrinsics.h
index f5d6a61..4204d05 100644
--- a/intrinsics.h
+++ b/intrinsics.h
@@ -149,9 +149,6 @@ template <> INTGEMM_SSE2 inline __m128 setzero_ps<__m128>() {
template <> INTGEMM_SSE2 inline __m128i setzero_si<__m128i>() {
return _mm_setzero_si128();
}
-INTGEMM_SSE2 static inline __m128i shuffle_epi32(__m128i a, int imm8) {
- return _mm_shuffle_epi32(a, imm8);
-}
INTGEMM_SSSE3 static inline __m128i sign_epi8(__m128i first, __m128i second) {
return _mm_sign_epi8(first, second);
}
@@ -324,9 +321,6 @@ template <> INTGEMM_AVX2 inline __m256 setzero_ps<__m256>() {
template <> INTGEMM_AVX2 inline __m256i setzero_si<__m256i>() {
return _mm256_setzero_si256();
}
-INTGEMM_AVX2 static inline __m256i shuffle_epi32(__m256i a, int imm8) {
- return _mm256_shuffle_epi32(a, imm8);
-}
INTGEMM_AVX2 static inline __m256i sign_epi8(__m256i first, __m256i second) {
return _mm256_sign_epi8(first, second);
}
@@ -504,9 +498,6 @@ template <> INTGEMM_AVX512BW inline __m512 load_ps<__m512>(const float* from) {
/*
* Missing sign_epi8
*/
-INTGEMM_AVX512BW static inline __m512i shuffle_epi32(__m512i a, _MM_PERM_ENUM imm8) {
- return _mm512_shuffle_epi32(a, imm8);
-}
INTGEMM_AVX512BW static inline __m512i slli_epi16(__m512i a, int8_t b) {
return _mm512_slli_epi16(a, b);
}
diff --git a/kernels/implementations.inl b/kernels/implementations.inl
index fecc83d..bfbdf0c 100644
--- a/kernels/implementations.inl
+++ b/kernels/implementations.inl
@@ -164,7 +164,7 @@ CPU_ATTR inline vi multiply<int>(vi a, vi b) {
#if defined(THIS_IS_SSE2)
auto even = mul_epu32(a, b);
auto odd = mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4));
- return unpacklo_epi32(shuffle_epi32(even, 0x8 /* = 0 0 2 0 */), shuffle_epi32(odd, 0x8 /* = 0 0 2 0 */));
+ return unpacklo_epi32(_mm_shuffle_epi32(even, 0x8 /* = 0 0 2 0 */), _mm_shuffle_epi32(odd, 0x8 /* = 0 0 2 0 */));
#elif defined(THIS_IS_AVX2)
return _mm256_mullo_epi32(a, b);
#else