diff options
author | Mateusz Chudyk <mateuszchudyk@gmail.com> | 2019-08-24 14:14:00 +0300 |
---|---|---|
committer | Mateusz Chudyk <mateuszchudyk@gmail.com> | 2019-08-30 19:36:19 +0300 |
commit | 41dbd6bf88522fd30f0d92b4fe36f89f67898edf (patch) | |
tree | 225e3efe76f8e5f03cd17b700236321e2d45c5b9 | |
parent | 030653bd7cbb9f748abed76801c4fe91fae5c0e1 (diff) |
Remove shuffle_epi32 intrinsics
-rw-r--r-- | intrinsics.h | 9 | ||||
-rw-r--r-- | kernels/implementations.inl | 2 |
2 files changed, 1 insertions, 10 deletions
diff --git a/intrinsics.h b/intrinsics.h index f5d6a61..4204d05 100644 --- a/intrinsics.h +++ b/intrinsics.h @@ -149,9 +149,6 @@ template <> INTGEMM_SSE2 inline __m128 setzero_ps<__m128>() { template <> INTGEMM_SSE2 inline __m128i setzero_si<__m128i>() { return _mm_setzero_si128(); } -INTGEMM_SSE2 static inline __m128i shuffle_epi32(__m128i a, int imm8) { - return _mm_shuffle_epi32(a, imm8); -} INTGEMM_SSSE3 static inline __m128i sign_epi8(__m128i first, __m128i second) { return _mm_sign_epi8(first, second); } @@ -324,9 +321,6 @@ template <> INTGEMM_AVX2 inline __m256 setzero_ps<__m256>() { template <> INTGEMM_AVX2 inline __m256i setzero_si<__m256i>() { return _mm256_setzero_si256(); } -INTGEMM_AVX2 static inline __m256i shuffle_epi32(__m256i a, int imm8) { - return _mm256_shuffle_epi32(a, imm8); -} INTGEMM_AVX2 static inline __m256i sign_epi8(__m256i first, __m256i second) { return _mm256_sign_epi8(first, second); } @@ -504,9 +498,6 @@ template <> INTGEMM_AVX512BW inline __m512 load_ps<__m512>(const float* from) { /* * Missing sign_epi8 */ -INTGEMM_AVX512BW static inline __m512i shuffle_epi32(__m512i a, _MM_PERM_ENUM imm8) { - return _mm512_shuffle_epi32(a, imm8); -} INTGEMM_AVX512BW static inline __m512i slli_epi16(__m512i a, int8_t b) { return _mm512_slli_epi16(a, b); } diff --git a/kernels/implementations.inl b/kernels/implementations.inl index fecc83d..bfbdf0c 100644 --- a/kernels/implementations.inl +++ b/kernels/implementations.inl @@ -164,7 +164,7 @@ CPU_ATTR inline vi multiply<int>(vi a, vi b) { #if defined(THIS_IS_SSE2) auto even = mul_epu32(a, b); auto odd = mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)); - return unpacklo_epi32(shuffle_epi32(even, 0x8 /* = 0 0 2 0 */), shuffle_epi32(odd, 0x8 /* = 0 0 2 0 */)); + return unpacklo_epi32(_mm_shuffle_epi32(even, 0x8 /* = 0 0 2 0 */), _mm_shuffle_epi32(odd, 0x8 /* = 0 0 2 0 */)); #elif defined(THIS_IS_AVX2) return _mm256_mullo_epi32(a, b); #else |