Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2020-04-19 15:46:24 +0300
committerKenneth Heafield <github@kheafield.com>2020-04-19 15:46:24 +0300
commit40dd33b765cf2b6b711850eac46c2df4b225da12 (patch)
tree93d0508d0b7e63873710bbfbd2890872cd387cc8
parentf8fc28756b7ff219043b9f2fb513e63f48fdf70a (diff)
template argument for shuffle immediate
makes clang happy
-rw-r--r--intrinsics.h6
-rw-r--r--tile/reduce.inl4
2 files changed, 5 insertions, 5 deletions
diff --git a/intrinsics.h b/intrinsics.h
index f5e241e..98b0961 100644
--- a/intrinsics.h
+++ b/intrinsics.h
@@ -150,7 +150,7 @@ template <> INTGEMM_SSE2 inline __m128 setzero_ps<__m128>() {
template <> INTGEMM_SSE2 inline __m128i setzero_si<__m128i>() {
return _mm_setzero_si128();
}
-INTGEMM_SSE2 static inline __m128i shuffle_epi32(__m128i a, int imm8) {
+template <int imm8> INTGEMM_SSE2 static inline __m128i shuffle_epi32(__m128i a) {
return _mm_shuffle_epi32(a, imm8);
}
INTGEMM_SSSE3 static inline __m128i sign_epi8(__m128i first, __m128i second) {
@@ -328,7 +328,7 @@ template <> INTGEMM_AVX2 inline __m256 setzero_ps<__m256>() {
template <> INTGEMM_AVX2 inline __m256i setzero_si<__m256i>() {
return _mm256_setzero_si256();
}
-INTGEMM_AVX2 static inline __m256i shuffle_epi32(__m256i a, const int imm8) {
+template <int imm8> INTGEMM_AVX2 static inline __m256i shuffle_epi32(__m256i a) {
return _mm256_shuffle_epi32(a, imm8);
}
INTGEMM_AVX2 static inline __m256i sign_epi8(__m256i first, __m256i second) {
@@ -508,7 +508,7 @@ template <> INTGEMM_AVX512BW inline __m512i setzero_si<__m512i>() {
template <> INTGEMM_AVX512BW inline __m512 load_ps<__m512>(const float* from) {
return _mm512_load_ps(from);
}
-INTGEMM_AVX512BW static inline __m512i shuffle_epi32(__m512i a, const int imm8) {
+template <int imm8> INTGEMM_AVX512BW static inline __m512i shuffle_epi32(__m512i a) {
return _mm512_shuffle_epi32(a, static_cast<_MM_PERM_ENUM>(imm8));
}
/*
diff --git a/tile/reduce.inl b/tile/reduce.inl
index b17cfca..0bb7b2c 100644
--- a/tile/reduce.inl
+++ b/tile/reduce.inl
@@ -44,7 +44,7 @@ struct Reduce32Folder {
}
INTGEMM_TARGET static inline RegisterPair Odd(Register reg) {
// For the odd case, shuffle to form 0 g 0 g where g is garbage and 0 is accumlated.
- return RegisterPair { reg, shuffle_epi32(reg, 0x31) };
+ return RegisterPair { reg, shuffle_epi32<0x31>(reg) };
}
INTGEMM_TARGET static inline Register OddUpcast(Register reg) { return reg; }
};
@@ -55,7 +55,7 @@ struct Reduce64Folder {
}
INTGEMM_TARGET static inline RegisterPair Odd(Register reg) {
// For the odd case, shuffle to form 0 g where g is garbage and 0 is accumlated.
- return RegisterPair { reg, shuffle_epi32(reg, 3 * 4 + 2) };
+ return RegisterPair { reg, shuffle_epi32<3 * 4 + 2>(reg) };
}
INTGEMM_TARGET static inline Register OddUpcast(Register reg) { return reg; }
};