Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2020-09-14 21:08:20 +0300
committerKenneth Heafield <github@kheafield.com>2020-09-14 21:08:20 +0300
commitdc2d9ce4f606a878b3d2e3845b9eb4ac7002198d (patch)
treefd54672c32e84e52671982243a5f58ae34e34620
parentc3b26e144c7bb619b56ba86e1f7046992332b452 (diff)
Remove redundant typedefs for Register in namespace
-rw-r--r--intgemm/avx2_gemm.h16
-rw-r--r--intgemm/avx512_gemm.h6
-rw-r--r--intgemm/avx512vnni_gemm.h3
-rw-r--r--intgemm/interleave.h2
-rw-r--r--intgemm/sse2_gemm.h2
-rw-r--r--intgemm/ssse3_gemm.h4
6 files changed, 7 insertions, 26 deletions
diff --git a/intgemm/avx2_gemm.h b/intgemm/avx2_gemm.h
index 9b49d8d..0ddca4c 100644
--- a/intgemm/avx2_gemm.h
+++ b/intgemm/avx2_gemm.h
@@ -11,16 +11,14 @@
namespace intgemm {
namespace avx2 {
-INTGEMM_AVX2 inline __m256i QuantizerGrab(const float *input, const __m256 quant_mult_reg) {
- return kernels::quantize(loadu_ps<__m256>(input), quant_mult_reg);
+INTGEMM_AVX2 inline Register QuantizerGrab(const float *input, const __m256 quant_mult_reg) {
+ return kernels::quantize(loadu_ps<FRegister>(input), quant_mult_reg);
}
INTGEMM_SELECT_COL_B(INTGEMM_AVX2, __m256i)
class QuantizeTile16 {
public:
- typedef __m256i Register;
-
INTGEMM_AVX2 explicit QuantizeTile16(float mult) : mult_(_mm256_set1_ps(mult)) {}
INTGEMM_AVX2 Register Consecutive(const float *input) const {
@@ -40,15 +38,15 @@ class QuantizeTile16 {
private:
INTGEMM_AVX2 __m256i Tile(const float *input0, const float *input1) const {
- __m256i g0 = QuantizerGrab(input0, mult_);
- __m256i g1 = QuantizerGrab(input1, mult_);
- __m256i packed = _mm256_packs_epi32(g0, g1);
+ Register g0 = QuantizerGrab(input0, mult_);
+ Register g1 = QuantizerGrab(input1, mult_);
+ Register packed = _mm256_packs_epi32(g0, g1);
// Reorder the packed values because Intel does 0 1 2 3 8 9 10 11 4 5 6 7 12 13 14 15.
// Technically this could be removed if the PrepareB did the same reordering internally.
return _mm256_permute4x64_epi64(packed, 0xd8 /* 0, 2, 1, 3 */);
}
- const __m256 mult_;
+ const FRegister mult_;
};
struct Kernels16 {
@@ -98,8 +96,6 @@ struct Kernels16 {
*/
class QuantizeTile8 {
public:
- typedef __m256i Register;
-
INTGEMM_AVX2 explicit QuantizeTile8(float quant_mult) : mult_(_mm256_set1_ps(quant_mult)) {}
INTGEMM_AVX2 inline __m256i Consecutive(const float *input) const {
diff --git a/intgemm/avx512_gemm.h b/intgemm/avx512_gemm.h
index 63f3ef9..0580398 100644
--- a/intgemm/avx512_gemm.h
+++ b/intgemm/avx512_gemm.h
@@ -66,8 +66,6 @@ INTGEMM_AVX512BW inline __m512i QuantizerGrabHalves(const float *input0, const f
// being used for the quantizer.
class QuantizeTile16 {
public:
- typedef __m512i Register;
-
/* Only INTGEMM_AVX512F is necessary but due to GCC 5.4 bug we have to set INTGEMM_AVX512BW */
INTGEMM_AVX512BW explicit QuantizeTile16(float mult) : mult_reg_(_mm512_set1_ps(mult)) {}
@@ -94,8 +92,6 @@ class QuantizeTile16 {
class QuantizeTile8 {
public:
- typedef __m512i Register;
-
/* Only INTGEMM_AVX512F is necessary but due to GCC 5.4 bug we have to set INTGEMM_AVX512BW */
INTGEMM_AVX512BW explicit QuantizeTile8(float mult) : mult_reg_(_mm512_set1_ps(mult)) {}
@@ -303,8 +299,6 @@ struct Kernels8 {
// allocate registers manually) and no sign instruction.
template <typename Callback>
INTGEMM_AVX512BW static void Multiply(const int8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback) {
- typedef __m512i Register;
- //typedef __m256 Float; // For quantization we only do 8 at a time.
// This is copy-paste from Multiply8_SSE2OrAVX2.
assert(width % sizeof(Register) == 0);
assert(B_cols % 8 == 0);
diff --git a/intgemm/avx512vnni_gemm.h b/intgemm/avx512vnni_gemm.h
index 86365ad..c660168 100644
--- a/intgemm/avx512vnni_gemm.h
+++ b/intgemm/avx512vnni_gemm.h
@@ -21,7 +21,6 @@ INTGEMM_AVX512VNNI static inline void VNNI8(__m512i &c, __m512i a, __m512i b) {
struct Kernels8 : public avx512bw::Kernels8 {
template <typename Callback>
INTGEMM_AVX512VNNI static void Multiply(const int8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback) {
- typedef __m512i Register;
assert(width % sizeof(Register) == 0);
assert(B_cols % 8 == 0);
assert(reinterpret_cast<uintptr_t>(A) % sizeof(Register) == 0);
@@ -83,7 +82,6 @@ struct Kernels8 : public avx512bw::Kernels8 {
template <typename Callback>
INTGEMM_AVX512VNNI static void Multiply8Shift(const uint8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback) {
- typedef __m512i Register;
assert(width % sizeof(Register) == 0);
assert(B_cols % 8 == 0);
assert(reinterpret_cast<uintptr_t>(A) % sizeof(Register) == 0);
@@ -125,7 +123,6 @@ struct Kernels8 : public avx512bw::Kernels8 {
template <typename Callback>
INTGEMM_AVX512VNNI static void PrepareBias(const int8_t *B, Index width, Index B_cols, Callback callback) {
- typedef __m512i Register;
assert(width % sizeof(Register) == 0);
assert(B_cols % 8 == 0);
assert(reinterpret_cast<uintptr_t>(B) % sizeof(Register) == 0);
diff --git a/intgemm/interleave.h b/intgemm/interleave.h
index 8d48df0..b6eb8ab 100644
--- a/intgemm/interleave.h
+++ b/intgemm/interleave.h
@@ -238,7 +238,6 @@ target static inline void PrepareB(const float *input, int16_t *output_shadow, f
*/
#define INTGEMM_PREPARE_B_QUANTIZED_TRANSPOSED(target, cpu_type, Integer) \
target static inline void PrepareBQuantizedTransposed(const Integer* input, Integer* output, Index cols, Index rows) { \
- using Register = vector_t<cpu_type, Integer>; \
const Index RegisterElems = sizeof(Register) / sizeof(Integer); \
const Index kColStride = 8; \
\
@@ -263,7 +262,6 @@ target static inline void PrepareBQuantizedTransposed(const Integer* input, Inte
*/
#define INTGEMM_PREPARE_B_TRANSPOSED(target, Quantizer, Integer) \
target static inline void PrepareBTransposed(const float* input, Integer* output, float quant_mult, Index cols, Index rows) { \
- using Register = typename Quantizer::Register; \
const Index RegisterElemsInt = sizeof(Register) / sizeof(Integer); \
const Index kColStride = 8; \
\
diff --git a/intgemm/sse2_gemm.h b/intgemm/sse2_gemm.h
index 30d197f..a562e20 100644
--- a/intgemm/sse2_gemm.h
+++ b/intgemm/sse2_gemm.h
@@ -19,8 +19,6 @@ INTGEMM_SELECT_COL_B(INTGEMM_SSE2, __m128i)
class QuantizeTile16 {
public:
- typedef __m128i Register;
-
INTGEMM_SSE2 explicit QuantizeTile16(float mult) : mult_reg_(_mm_set1_ps(mult)) {}
INTGEMM_SSE2 inline __m128i Consecutive(const float *input) const {
diff --git a/intgemm/ssse3_gemm.h b/intgemm/ssse3_gemm.h
index 60ac92b..2c00a53 100644
--- a/intgemm/ssse3_gemm.h
+++ b/intgemm/ssse3_gemm.h
@@ -21,8 +21,6 @@ INTGEMM_SELECT_COL_B(INTGEMM_SSSE3, __m128i)
class QuantizeTile8 {
public:
- typedef __m128i Register;
-
INTGEMM_SSSE3 explicit QuantizeTile8(float mult) : mult_reg_(_mm_set1_ps(mult)) {}
INTGEMM_SSSE3 inline __m128i ForReshape(const float *input, Index cols) const {
@@ -100,7 +98,7 @@ class QuantizeTile8 {
}
private:
- const __m128 mult_reg_;
+ const FRegister mult_reg_;
};
// pmaddubsw (the 8-bit multiply) is SSSE3, so pedantically that's the version we need.