diff options
author | Kenneth Heafield <github@kheafield.com> | 2020-04-19 15:46:24 +0300 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2020-04-19 15:46:24 +0300 |
commit | 40dd33b765cf2b6b711850eac46c2df4b225da12 (patch) | |
tree | 93d0508d0b7e63873710bbfbd2890872cd387cc8 | |
parent | f8fc28756b7ff219043b9f2fb513e63f48fdf70a (diff) |
template argument for shuffle immediate
makes clang happy
-rw-r--r-- | intrinsics.h | 6 | ||||
-rw-r--r-- | tile/reduce.inl | 4 |
2 files changed, 5 insertions, 5 deletions
diff --git a/intrinsics.h b/intrinsics.h index f5e241e..98b0961 100644 --- a/intrinsics.h +++ b/intrinsics.h @@ -150,7 +150,7 @@ template <> INTGEMM_SSE2 inline __m128 setzero_ps<__m128>() { template <> INTGEMM_SSE2 inline __m128i setzero_si<__m128i>() { return _mm_setzero_si128(); } -INTGEMM_SSE2 static inline __m128i shuffle_epi32(__m128i a, int imm8) { +template <int imm8> INTGEMM_SSE2 static inline __m128i shuffle_epi32(__m128i a) { return _mm_shuffle_epi32(a, imm8); } INTGEMM_SSSE3 static inline __m128i sign_epi8(__m128i first, __m128i second) { @@ -328,7 +328,7 @@ template <> INTGEMM_AVX2 inline __m256 setzero_ps<__m256>() { template <> INTGEMM_AVX2 inline __m256i setzero_si<__m256i>() { return _mm256_setzero_si256(); } -INTGEMM_AVX2 static inline __m256i shuffle_epi32(__m256i a, const int imm8) { +template <int imm8> INTGEMM_AVX2 static inline __m256i shuffle_epi32(__m256i a) { return _mm256_shuffle_epi32(a, imm8); } INTGEMM_AVX2 static inline __m256i sign_epi8(__m256i first, __m256i second) { @@ -508,7 +508,7 @@ template <> INTGEMM_AVX512BW inline __m512i setzero_si<__m512i>() { template <> INTGEMM_AVX512BW inline __m512 load_ps<__m512>(const float* from) { return _mm512_load_ps(from); } -INTGEMM_AVX512BW static inline __m512i shuffle_epi32(__m512i a, const int imm8) { +template <int imm8> INTGEMM_AVX512BW static inline __m512i shuffle_epi32(__m512i a) { return _mm512_shuffle_epi32(a, static_cast<_MM_PERM_ENUM>(imm8)); } /* diff --git a/tile/reduce.inl b/tile/reduce.inl index b17cfca..0bb7b2c 100644 --- a/tile/reduce.inl +++ b/tile/reduce.inl @@ -44,7 +44,7 @@ struct Reduce32Folder { } INTGEMM_TARGET static inline RegisterPair Odd(Register reg) { // For the odd case, shuffle to form 0 g 0 g where g is garbage and 0 is accumlated. - return RegisterPair { reg, shuffle_epi32(reg, 0x31) }; + return RegisterPair { reg, shuffle_epi32<0x31>(reg) }; } INTGEMM_TARGET static inline Register OddUpcast(Register reg) { return reg; } }; @@ -55,7 +55,7 @@ struct Reduce64Folder { } INTGEMM_TARGET static inline RegisterPair Odd(Register reg) { // For the odd case, shuffle to form 0 g where g is garbage and 0 is accumlated. - return RegisterPair { reg, shuffle_epi32(reg, 3 * 4 + 2) }; + return RegisterPair { reg, shuffle_epi32<3 * 4 + 2>(reg) }; } INTGEMM_TARGET static inline Register OddUpcast(Register reg) { return reg; } }; |