diff options
author | Kenneth Heafield <github@kheafield.com> | 2020-09-14 21:08:20 +0300 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2020-09-14 21:08:20 +0300 |
commit | dc2d9ce4f606a878b3d2e3845b9eb4ac7002198d (patch) | |
tree | fd54672c32e84e52671982243a5f58ae34e34620 | |
parent | c3b26e144c7bb619b56ba86e1f7046992332b452 (diff) |
Remove redundant typedefs for Register in namespace
-rw-r--r-- | intgemm/avx2_gemm.h | 16 | ||||
-rw-r--r-- | intgemm/avx512_gemm.h | 6 | ||||
-rw-r--r-- | intgemm/avx512vnni_gemm.h | 3 | ||||
-rw-r--r-- | intgemm/interleave.h | 2 | ||||
-rw-r--r-- | intgemm/sse2_gemm.h | 2 | ||||
-rw-r--r-- | intgemm/ssse3_gemm.h | 4 |
6 files changed, 7 insertions, 26 deletions
diff --git a/intgemm/avx2_gemm.h b/intgemm/avx2_gemm.h index 9b49d8d..0ddca4c 100644 --- a/intgemm/avx2_gemm.h +++ b/intgemm/avx2_gemm.h @@ -11,16 +11,14 @@ namespace intgemm { namespace avx2 { -INTGEMM_AVX2 inline __m256i QuantizerGrab(const float *input, const __m256 quant_mult_reg) { - return kernels::quantize(loadu_ps<__m256>(input), quant_mult_reg); +INTGEMM_AVX2 inline Register QuantizerGrab(const float *input, const __m256 quant_mult_reg) { + return kernels::quantize(loadu_ps<FRegister>(input), quant_mult_reg); } INTGEMM_SELECT_COL_B(INTGEMM_AVX2, __m256i) class QuantizeTile16 { public: - typedef __m256i Register; - INTGEMM_AVX2 explicit QuantizeTile16(float mult) : mult_(_mm256_set1_ps(mult)) {} INTGEMM_AVX2 Register Consecutive(const float *input) const { @@ -40,15 +38,15 @@ class QuantizeTile16 { private: INTGEMM_AVX2 __m256i Tile(const float *input0, const float *input1) const { - __m256i g0 = QuantizerGrab(input0, mult_); - __m256i g1 = QuantizerGrab(input1, mult_); - __m256i packed = _mm256_packs_epi32(g0, g1); + Register g0 = QuantizerGrab(input0, mult_); + Register g1 = QuantizerGrab(input1, mult_); + Register packed = _mm256_packs_epi32(g0, g1); // Reorder the packed values because Intel does 0 1 2 3 8 9 10 11 4 5 6 7 12 13 14 15. // Technically this could be removed if the PrepareB did the same reordering internally. return _mm256_permute4x64_epi64(packed, 0xd8 /* 0, 2, 1, 3 */); } - const __m256 mult_; + const FRegister mult_; }; struct Kernels16 { @@ -98,8 +96,6 @@ struct Kernels16 { */ class QuantizeTile8 { public: - typedef __m256i Register; - INTGEMM_AVX2 explicit QuantizeTile8(float quant_mult) : mult_(_mm256_set1_ps(quant_mult)) {} INTGEMM_AVX2 inline __m256i Consecutive(const float *input) const { diff --git a/intgemm/avx512_gemm.h b/intgemm/avx512_gemm.h index 63f3ef9..0580398 100644 --- a/intgemm/avx512_gemm.h +++ b/intgemm/avx512_gemm.h @@ -66,8 +66,6 @@ INTGEMM_AVX512BW inline __m512i QuantizerGrabHalves(const float *input0, const f // being used for the quantizer. class QuantizeTile16 { public: - typedef __m512i Register; - /* Only INTGEMM_AVX512F is necessary but due to GCC 5.4 bug we have to set INTGEMM_AVX512BW */ INTGEMM_AVX512BW explicit QuantizeTile16(float mult) : mult_reg_(_mm512_set1_ps(mult)) {} @@ -94,8 +92,6 @@ class QuantizeTile16 { class QuantizeTile8 { public: - typedef __m512i Register; - /* Only INTGEMM_AVX512F is necessary but due to GCC 5.4 bug we have to set INTGEMM_AVX512BW */ INTGEMM_AVX512BW explicit QuantizeTile8(float mult) : mult_reg_(_mm512_set1_ps(mult)) {} @@ -303,8 +299,6 @@ struct Kernels8 { // allocate registers manually) and no sign instruction. template <typename Callback> INTGEMM_AVX512BW static void Multiply(const int8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback) { - typedef __m512i Register; - //typedef __m256 Float; // For quantization we only do 8 at a time. // This is copy-paste from Multiply8_SSE2OrAVX2. assert(width % sizeof(Register) == 0); assert(B_cols % 8 == 0); diff --git a/intgemm/avx512vnni_gemm.h b/intgemm/avx512vnni_gemm.h index 86365ad..c660168 100644 --- a/intgemm/avx512vnni_gemm.h +++ b/intgemm/avx512vnni_gemm.h @@ -21,7 +21,6 @@ INTGEMM_AVX512VNNI static inline void VNNI8(__m512i &c, __m512i a, __m512i b) { struct Kernels8 : public avx512bw::Kernels8 { template <typename Callback> INTGEMM_AVX512VNNI static void Multiply(const int8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback) { - typedef __m512i Register; assert(width % sizeof(Register) == 0); assert(B_cols % 8 == 0); assert(reinterpret_cast<uintptr_t>(A) % sizeof(Register) == 0); @@ -83,7 +82,6 @@ struct Kernels8 : public avx512bw::Kernels8 { template <typename Callback> INTGEMM_AVX512VNNI static void Multiply8Shift(const uint8_t *A, const int8_t *B, Index A_rows, Index width, Index B_cols, Callback callback) { - typedef __m512i Register; assert(width % sizeof(Register) == 0); assert(B_cols % 8 == 0); assert(reinterpret_cast<uintptr_t>(A) % sizeof(Register) == 0); @@ -125,7 +123,6 @@ struct Kernels8 : public avx512bw::Kernels8 { template <typename Callback> INTGEMM_AVX512VNNI static void PrepareBias(const int8_t *B, Index width, Index B_cols, Callback callback) { - typedef __m512i Register; assert(width % sizeof(Register) == 0); assert(B_cols % 8 == 0); assert(reinterpret_cast<uintptr_t>(B) % sizeof(Register) == 0); diff --git a/intgemm/interleave.h b/intgemm/interleave.h index 8d48df0..b6eb8ab 100644 --- a/intgemm/interleave.h +++ b/intgemm/interleave.h @@ -238,7 +238,6 @@ target static inline void PrepareB(const float *input, int16_t *output_shadow, f */ #define INTGEMM_PREPARE_B_QUANTIZED_TRANSPOSED(target, cpu_type, Integer) \ target static inline void PrepareBQuantizedTransposed(const Integer* input, Integer* output, Index cols, Index rows) { \ - using Register = vector_t<cpu_type, Integer>; \ const Index RegisterElems = sizeof(Register) / sizeof(Integer); \ const Index kColStride = 8; \ \ @@ -263,7 +262,6 @@ target static inline void PrepareBQuantizedTransposed(const Integer* input, Inte */ #define INTGEMM_PREPARE_B_TRANSPOSED(target, Quantizer, Integer) \ target static inline void PrepareBTransposed(const float* input, Integer* output, float quant_mult, Index cols, Index rows) { \ - using Register = typename Quantizer::Register; \ const Index RegisterElemsInt = sizeof(Register) / sizeof(Integer); \ const Index kColStride = 8; \ \ diff --git a/intgemm/sse2_gemm.h b/intgemm/sse2_gemm.h index 30d197f..a562e20 100644 --- a/intgemm/sse2_gemm.h +++ b/intgemm/sse2_gemm.h @@ -19,8 +19,6 @@ INTGEMM_SELECT_COL_B(INTGEMM_SSE2, __m128i) class QuantizeTile16 { public: - typedef __m128i Register; - INTGEMM_SSE2 explicit QuantizeTile16(float mult) : mult_reg_(_mm_set1_ps(mult)) {} INTGEMM_SSE2 inline __m128i Consecutive(const float *input) const { diff --git a/intgemm/ssse3_gemm.h b/intgemm/ssse3_gemm.h index 60ac92b..2c00a53 100644 --- a/intgemm/ssse3_gemm.h +++ b/intgemm/ssse3_gemm.h @@ -21,8 +21,6 @@ INTGEMM_SELECT_COL_B(INTGEMM_SSSE3, __m128i) class QuantizeTile8 { public: - typedef __m128i Register; - INTGEMM_SSSE3 explicit QuantizeTile8(float mult) : mult_reg_(_mm_set1_ps(mult)) {} INTGEMM_SSSE3 inline __m128i ForReshape(const float *input, Index cols) const { @@ -100,7 +98,7 @@ class QuantizeTile8 { } private: - const __m128 mult_reg_; + const FRegister mult_reg_; }; // pmaddubsw (the 8-bit multiply) is SSSE3, so pedantically that's the version we need. |