diff options
author | Nikolay Bogoychev <nheart@gmail.com> | 2019-05-20 20:19:14 +0300 |
---|---|---|
committer | Nikolay Bogoychev <nheart@gmail.com> | 2019-05-20 20:19:14 +0300 |
commit | 79f2f670e281e84bedc57261452aad1837a58bfd (patch) | |
tree | 058437d4c6547176230fd6504e472fb4d3d0e4b7 | |
parent | 05735e87226e52466a89de07a4bd3b6baba19216 (diff) |
More consistent define naming scheme
-rw-r--r-- | avx2_gemm.h | 12 | ||||
-rw-r--r-- | avx512_gemm.h | 10 | ||||
-rw-r--r-- | interleave.h | 6 | ||||
-rw-r--r-- | multiply.h | 14 | ||||
-rw-r--r-- | sse2_gemm.h | 8 | ||||
-rw-r--r-- | ssse3_gemm.h | 6 |
6 files changed, 28 insertions, 28 deletions
diff --git a/avx2_gemm.h b/avx2_gemm.h index 393eebb..9ed70c1 100644 --- a/avx2_gemm.h +++ b/avx2_gemm.h @@ -17,7 +17,7 @@ AVX2 inline __m256i QuantizerGrab(const float *input, const __m256 quant_mult_re return _mm256_cvtps_epi32(_mm256_mul_ps(*reinterpret_cast<const __m256*>(input), quant_mult_reg)); } -SELECT_COL_B_DEF(AVX2, __m256i) +SELECT_COL_B_DEFINE(AVX2, __m256i) class QuantizeTile16 { public: @@ -76,13 +76,13 @@ struct AVX2_16bit { AVX2 static void PrepareB(const float *input, int16_t *output, float quant_mult, Index rows, Index cols) { PrepareBFor16(input, output, avx2::QuantizeTile16(quant_mult), rows, cols); }*/ - PREPARE_B_16_DEF(AVX2, avx2::QuantizeTile16) + PREPARE_B_16_DEFINE(AVX2, avx2::QuantizeTile16) AVX2 static void SelectColumnsB(const int16_t *input, int16_t *output, Index rows, const Index *cols_begin, const Index *cols_end) { avx2::SelectColumnsOfB((const __m256i*)input, (__m256i*)output, rows * 2, cols_begin, cols_end); } - MULTIPLY16_define(__m256i, AVX2, OnAVX2) + MULTIPLY16_DEFINE(__m256i, AVX2, OnAVX2) constexpr static const char *const kName = "16-bit AVX2"; @@ -138,7 +138,7 @@ class QuantizeTile8 { }; // Technically only requires AVX -MAXABS_DEFINE(__m256, AVX2) +MAXABSOLUTE_DEFINE(__m256, AVX2) } // namespace @@ -170,7 +170,7 @@ struct AVX2_8bit { PrepareBFor8(input, output, avx2::QuantizeTile8(quant_mult), rows, cols); }*/ - PREPARE_B_8_DEF(AVX2, avx2::QuantizeTile8) + PREPARE_B_8_DEFINE(AVX2, avx2::QuantizeTile8) AVX2 static void SelectColumnsB(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end) { avx2::SelectColumnsOfB((const __m256i*)input, (__m256i*)output, rows, cols_begin, cols_end); @@ -180,7 +180,7 @@ struct AVX2_8bit { //Multiply8_SSE2OrAVX2<Multiply8_AVXAVX2, __m256i, __m256>(A, B, C, unquant_mult, A_rows, width, B_cols); Multiply8_SSE2OrAVX2__m256i<JustUnquantizeC>(A, B, JustUnquantizeC(C, unquant_mult), A_rows, width, B_cols); }*/ - MULTIPLY8_define(__m256i, AVX2, OnAVX2) + MULTIPLY8_DEFINE(__m256i, AVX2, OnAVX2) constexpr static const char *const kName = "8-bit AVX2"; diff --git a/avx512_gemm.h b/avx512_gemm.h index 478c29f..b92ba0d 100644 --- a/avx512_gemm.h +++ b/avx512_gemm.h @@ -42,7 +42,7 @@ AVX512BW inline __m512i QuantizerGrab(const float *input, const __m512 quant_mul } /* Only AVX512F is necessary but due to GCC 5.4 bug we have to set AVX512BW */ -SELECT_COL_B_DEF(AVX512BW, __m512i) +SELECT_COL_B_DEFINE(AVX512BW, __m512i) // For PrepareB we want to read 8 columns at a time. When converting 32-bit // floats to 8-bit values, that's 32 bytes of floats. But AVX512 is 64 bytes @@ -119,7 +119,7 @@ class QuantizeTile8 { }; /* Only AVX512F is necessary but due to GCC 5.4 bug we have to set AVX512BW */ -MAXABS_DEFINE(__m512, AVX512BW) +MAXABSOLUTE_DEFINE(__m512, AVX512BW) } // namespace @@ -160,7 +160,7 @@ struct AVX512_16bit { } */ /* Only AVX512F is necessary but due to GCC 5.4 bug we have to set AVX512BW */ - PREPARE_B_16_DEF(AVX512BW, avx512f::QuantizeTile16) + PREPARE_B_16_DEFINE(AVX512BW, avx512f::QuantizeTile16) /* Only AVX512F is necessary but due to GCC 5.4 bug we have to set AVX512BW */ AVX512BW static void SelectColumnsB(const int16_t *input, int16_t *output, Index rows, const Index *cols_begin, const Index *cols_end) { @@ -168,7 +168,7 @@ struct AVX512_16bit { } /* Only AVX512F is necessary but due to GCC 5.4 bug we have to set AVX512BW */ - MULTIPLY16_define(__m512i, AVX512BW, OnAVX2) + MULTIPLY16_DEFINE(__m512i, AVX512BW, OnAVX2) constexpr static const char *const kName = "16-bit AVX512"; @@ -210,7 +210,7 @@ struct AVX512_8bit { PrepareBFor8(input, output, avx512f::QuantizeTile8(quant_mult), rows, cols); }*/ /* Only AVX512F is necessary but due to GCC 5.4 bug we have to set AVX512BW */ - PREPARE_B_8_DEF(AVX512BW, avx512f::QuantizeTile8) + PREPARE_B_8_DEFINE(AVX512BW, avx512f::QuantizeTile8) /* Only AVX512F is necessary but due to GCC 5.4 bug we have to set AVX512BW */ AVX512BW static void SelectColumnsB(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end) { diff --git a/interleave.h b/interleave.h index 32c29b6..e7a62f5 100644 --- a/interleave.h +++ b/interleave.h @@ -209,7 +209,7 @@ template <class Register> static inline void Transpose8InLane( // 256 272 // 257 273 // ... ... -#define PREPARE_B_8_DEF(target, QuantClass) \ +#define PREPARE_B_8_DEFINE(target, QuantClass) \ target static inline void PrepareB(const float *input, int8_t *output_shadow, float quant_mult, Index rows, Index cols) { \ typedef typename QuantClass Quantizer; \ typedef typename Quantizer::Integer Register; \ @@ -244,7 +244,7 @@ target static inline void PrepareB(const float *input, int8_t *output_shadow, fl } \ } \ -#define PREPARE_B_16_DEF(target, QuantClass) \ +#define PREPARE_B_16_DEFINE(target, QuantClass) \ target static inline void PrepareB(const float *input, int16_t *output_shadow, float quant_mult, Index rows, Index cols) { \ typedef typename QuantClass Quantizer; \ typedef typename Quantizer::Integer Register; \ @@ -267,7 +267,7 @@ target static inline void PrepareB(const float *input, int16_t *output_shadow, f /* Select columns of B from PrepareB format to PrepareB format. */ -#define SELECT_COL_B_DEF(target, Register) \ +#define SELECT_COL_B_DEFINE(target, Register) \ target static inline void SelectColumnsOfB(const Register *input, Register *output, Index rows_bytes /* number of bytes in a row */, const Index *cols_begin, const Index *cols_end) { \ assert(rows_bytes % sizeof(Register) == 0); \ assert((cols_end - cols_begin) % 8 == 0); \ @@ -70,7 +70,7 @@ template <class Register> inline Register Pack0123(Register sum0, Register sum1, return add_epi32(pack01, pack23); } */ -#define PACK_DEFINE(target, Register) \ +#define PACK0123_DEFINE(target, Register) \ target inline Register Pack0123(Register sum0, Register sum1, Register sum2, Register sum3) { \ Interleave32(sum0, sum1); \ Register pack01 = add_epi32(sum0, sum1); \ @@ -80,11 +80,11 @@ target inline Register Pack0123(Register sum0, Register sum1, Register sum2, Reg return add_epi32(pack01, pack23); \ } \ -PACK_DEFINE(SSE2, __m128i) -PACK_DEFINE(AVX2, __m256i) +PACK0123_DEFINE(SSE2, __m128i) +PACK0123_DEFINE(AVX2, __m256i) #ifndef INTGEMM_NO_AVX512 /* Only AVX512F is necessary but due to GCC 5.4 bug we have to set AVX512BW */ -PACK_DEFINE(AVX512BW, __m512i) +PACK0123_DEFINE(AVX512BW, __m512i) #endif // 16-bit multiplier for SSE2, AVX2, and AVX512. @@ -124,7 +124,7 @@ PACK_DEFINE(AVX512BW, __m512i) // width must be a multiple of the register size. // B_cols must be a multiple of 8. // Multiply16 -#define MULTIPLY16_define(Integer, target, WriteCSubType) \ +#define MULTIPLY16_DEFINE(Integer, target, WriteCSubType) \ template <class WriteC> target static void Multiply(const int16_t *A, const int16_t *B, WriteC C, Index A_rows, Index width, Index B_cols) { \ assert(width % (sizeof(Integer) / sizeof(int16_t)) == 0); \ assert(B_cols % 8 == 0); \ @@ -328,7 +328,7 @@ SSSE3 inline static void InnerSSSE3( sum7 = adds_epi16(sum7, maddubs_epi16(a_positive, sign_epi8(b[7], a))); } //AVX2 or SSSE3 multiply -#define MULTIPLY8_define(Integer, target, WriteCSubType) \ +#define MULTIPLY8_DEFINE(Integer, target, WriteCSubType) \ template <class WriteC> target static void Multiply(const int8_t *A, const int8_t *B, WriteC C, Index A_rows, Index width, Index B_cols) { \ assert(width % sizeof(Integer) == 0); \ assert(B_cols % 8 == 0); \ @@ -417,7 +417,7 @@ template <class Register> inline static float MaxAbsoluteBackend(const float *be return MaxFloat32(highest); }*/ -#define MAXABS_DEFINE(Register, target) \ +#define MAXABSOLUTE_DEFINE(Register, target) \ target static float MaxAbsolute(const float *begin_float, const float *end_float) { \ assert(end_float > begin_float); \ assert((end_float - begin_float) % (sizeof(Register) / sizeof(float)) == 0); \ diff --git a/sse2_gemm.h b/sse2_gemm.h index 8717150..882e006 100644 --- a/sse2_gemm.h +++ b/sse2_gemm.h @@ -14,7 +14,7 @@ SSE2 inline __m128i QuantizerGrab(const float *input, const __m128 quant_mult_re return _mm_cvtps_epi32(_mm_mul_ps(*reinterpret_cast<const __m128*>(input), quant_mult_reg)); } -SELECT_COL_B_DEF(SSE2, __m128i) +SELECT_COL_B_DEFINE(SSE2, __m128i) class QuantizeTile16 { public: @@ -39,7 +39,7 @@ class QuantizeTile16 { // Technically only requires SSE -MAXABS_DEFINE(__m128, SSE2) +MAXABSOLUTE_DEFINE(__m128, SSE2) } //namespace // This should be pure SSE2 (and below). @@ -66,13 +66,13 @@ struct SSE2_16bit { static const Index kBTileRow = 8; static const Index kBTileCol = 8; - PREPARE_B_16_DEF(SSE2, sse2::QuantizeTile16) + PREPARE_B_16_DEFINE(SSE2, sse2::QuantizeTile16) SSE2 static void SelectColumnsB(const int16_t *input, int16_t *output, Index rows, const Index *cols_begin, const Index *cols_end) { //TODO #DEFINE sse2::SelectColumnsOfB((const __m128i*)input, (__m128i*)output, rows * 2, cols_begin, cols_end); } - MULTIPLY16_define(__m128i, SSE2, OnSSE2) + MULTIPLY16_DEFINE(__m128i, SSE2, OnSSE2) constexpr static const char *const kName = "16-bit SSE2"; diff --git a/ssse3_gemm.h b/ssse3_gemm.h index b19e552..dccc730 100644 --- a/ssse3_gemm.h +++ b/ssse3_gemm.h @@ -17,7 +17,7 @@ SSSE3 inline __m128i QuantizerGrab(const float *input, const __m128 quant_mult_r return _mm_cvtps_epi32(_mm_mul_ps(*reinterpret_cast<const __m128*>(input), quant_mult_reg)); } -SELECT_COL_B_DEF(SSSE3, __m128i) +SELECT_COL_B_DEFINE(SSSE3, __m128i) class QuantizeTile8 { public: @@ -92,7 +92,7 @@ struct SSSE3_8bit { SSSE3 static void PrepareB(const float *input, int8_t *output, float quant_mult, Index rows, Index cols) { PrepareBFor8(input, output, ssse3::QuantizeTile8(quant_mult), rows, cols); }*/ - PREPARE_B_8_DEF(SSSE3, ssse3::QuantizeTile8) + PREPARE_B_8_DEFINE(SSSE3, ssse3::QuantizeTile8) SSSE3 static void SelectColumnsB(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end) { ssse3::SelectColumnsOfB((const __m128i*)input, (__m128i*)output, rows, cols_begin, cols_end); @@ -102,7 +102,7 @@ struct SSSE3_8bit { //Multiply8_SSE2OrAVX2<Multiply8_C, __m128i, __m128>(A, B, C, unquant_mult, A_rows, width, B_cols); Multiply8_SSE2OrAVX2__m128i<JustUnquantizeC>(A, B, JustUnquantizeC(C, unquant_mult), A_rows, width, B_cols); }*/ - MULTIPLY8_define(__m128i, SSSE3, OnSSE2) + MULTIPLY8_DEFINE(__m128i, SSSE3, OnSSE2) constexpr static const char *const kName = "8-bit SSSE3"; |