Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikolay Bogoychev <nheart@gmail.com>2019-05-20 20:19:14 +0300
committerNikolay Bogoychev <nheart@gmail.com>2019-05-20 20:19:14 +0300
commit79f2f670e281e84bedc57261452aad1837a58bfd (patch)
tree058437d4c6547176230fd6504e472fb4d3d0e4b7
parent05735e87226e52466a89de07a4bd3b6baba19216 (diff)
More consistent define naming scheme
-rw-r--r--avx2_gemm.h12
-rw-r--r--avx512_gemm.h10
-rw-r--r--interleave.h6
-rw-r--r--multiply.h14
-rw-r--r--sse2_gemm.h8
-rw-r--r--ssse3_gemm.h6
6 files changed, 28 insertions, 28 deletions
diff --git a/avx2_gemm.h b/avx2_gemm.h
index 393eebb..9ed70c1 100644
--- a/avx2_gemm.h
+++ b/avx2_gemm.h
@@ -17,7 +17,7 @@ AVX2 inline __m256i QuantizerGrab(const float *input, const __m256 quant_mult_re
return _mm256_cvtps_epi32(_mm256_mul_ps(*reinterpret_cast<const __m256*>(input), quant_mult_reg));
}
-SELECT_COL_B_DEF(AVX2, __m256i)
+SELECT_COL_B_DEFINE(AVX2, __m256i)
class QuantizeTile16 {
public:
@@ -76,13 +76,13 @@ struct AVX2_16bit {
AVX2 static void PrepareB(const float *input, int16_t *output, float quant_mult, Index rows, Index cols) {
PrepareBFor16(input, output, avx2::QuantizeTile16(quant_mult), rows, cols);
}*/
- PREPARE_B_16_DEF(AVX2, avx2::QuantizeTile16)
+ PREPARE_B_16_DEFINE(AVX2, avx2::QuantizeTile16)
AVX2 static void SelectColumnsB(const int16_t *input, int16_t *output, Index rows, const Index *cols_begin, const Index *cols_end) {
avx2::SelectColumnsOfB((const __m256i*)input, (__m256i*)output, rows * 2, cols_begin, cols_end);
}
- MULTIPLY16_define(__m256i, AVX2, OnAVX2)
+ MULTIPLY16_DEFINE(__m256i, AVX2, OnAVX2)
constexpr static const char *const kName = "16-bit AVX2";
@@ -138,7 +138,7 @@ class QuantizeTile8 {
};
// Technically only requires AVX
-MAXABS_DEFINE(__m256, AVX2)
+MAXABSOLUTE_DEFINE(__m256, AVX2)
} // namespace
@@ -170,7 +170,7 @@ struct AVX2_8bit {
PrepareBFor8(input, output, avx2::QuantizeTile8(quant_mult), rows, cols);
}*/
- PREPARE_B_8_DEF(AVX2, avx2::QuantizeTile8)
+ PREPARE_B_8_DEFINE(AVX2, avx2::QuantizeTile8)
AVX2 static void SelectColumnsB(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end) {
avx2::SelectColumnsOfB((const __m256i*)input, (__m256i*)output, rows, cols_begin, cols_end);
@@ -180,7 +180,7 @@ struct AVX2_8bit {
//Multiply8_SSE2OrAVX2<Multiply8_AVXAVX2, __m256i, __m256>(A, B, C, unquant_mult, A_rows, width, B_cols);
Multiply8_SSE2OrAVX2__m256i<JustUnquantizeC>(A, B, JustUnquantizeC(C, unquant_mult), A_rows, width, B_cols);
}*/
- MULTIPLY8_define(__m256i, AVX2, OnAVX2)
+ MULTIPLY8_DEFINE(__m256i, AVX2, OnAVX2)
constexpr static const char *const kName = "8-bit AVX2";
diff --git a/avx512_gemm.h b/avx512_gemm.h
index 478c29f..b92ba0d 100644
--- a/avx512_gemm.h
+++ b/avx512_gemm.h
@@ -42,7 +42,7 @@ AVX512BW inline __m512i QuantizerGrab(const float *input, const __m512 quant_mul
}
/* Only AVX512F is necessary but due to GCC 5.4 bug we have to set AVX512BW */
-SELECT_COL_B_DEF(AVX512BW, __m512i)
+SELECT_COL_B_DEFINE(AVX512BW, __m512i)
// For PrepareB we want to read 8 columns at a time. When converting 32-bit
// floats to 8-bit values, that's 32 bytes of floats. But AVX512 is 64 bytes
@@ -119,7 +119,7 @@ class QuantizeTile8 {
};
/* Only AVX512F is necessary but due to GCC 5.4 bug we have to set AVX512BW */
-MAXABS_DEFINE(__m512, AVX512BW)
+MAXABSOLUTE_DEFINE(__m512, AVX512BW)
} // namespace
@@ -160,7 +160,7 @@ struct AVX512_16bit {
}
*/
/* Only AVX512F is necessary but due to GCC 5.4 bug we have to set AVX512BW */
- PREPARE_B_16_DEF(AVX512BW, avx512f::QuantizeTile16)
+ PREPARE_B_16_DEFINE(AVX512BW, avx512f::QuantizeTile16)
/* Only AVX512F is necessary but due to GCC 5.4 bug we have to set AVX512BW */
AVX512BW static void SelectColumnsB(const int16_t *input, int16_t *output, Index rows, const Index *cols_begin, const Index *cols_end) {
@@ -168,7 +168,7 @@ struct AVX512_16bit {
}
/* Only AVX512F is necessary but due to GCC 5.4 bug we have to set AVX512BW */
- MULTIPLY16_define(__m512i, AVX512BW, OnAVX2)
+ MULTIPLY16_DEFINE(__m512i, AVX512BW, OnAVX2)
constexpr static const char *const kName = "16-bit AVX512";
@@ -210,7 +210,7 @@ struct AVX512_8bit {
PrepareBFor8(input, output, avx512f::QuantizeTile8(quant_mult), rows, cols);
}*/
/* Only AVX512F is necessary but due to GCC 5.4 bug we have to set AVX512BW */
- PREPARE_B_8_DEF(AVX512BW, avx512f::QuantizeTile8)
+ PREPARE_B_8_DEFINE(AVX512BW, avx512f::QuantizeTile8)
/* Only AVX512F is necessary but due to GCC 5.4 bug we have to set AVX512BW */
AVX512BW static void SelectColumnsB(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end) {
diff --git a/interleave.h b/interleave.h
index 32c29b6..e7a62f5 100644
--- a/interleave.h
+++ b/interleave.h
@@ -209,7 +209,7 @@ template <class Register> static inline void Transpose8InLane(
// 256 272
// 257 273
// ... ...
-#define PREPARE_B_8_DEF(target, QuantClass) \
+#define PREPARE_B_8_DEFINE(target, QuantClass) \
target static inline void PrepareB(const float *input, int8_t *output_shadow, float quant_mult, Index rows, Index cols) { \
typedef typename QuantClass Quantizer; \
typedef typename Quantizer::Integer Register; \
@@ -244,7 +244,7 @@ target static inline void PrepareB(const float *input, int8_t *output_shadow, fl
} \
} \
-#define PREPARE_B_16_DEF(target, QuantClass) \
+#define PREPARE_B_16_DEFINE(target, QuantClass) \
target static inline void PrepareB(const float *input, int16_t *output_shadow, float quant_mult, Index rows, Index cols) { \
typedef typename QuantClass Quantizer; \
typedef typename Quantizer::Integer Register; \
@@ -267,7 +267,7 @@ target static inline void PrepareB(const float *input, int16_t *output_shadow, f
/* Select columns of B from PrepareB format to PrepareB format.
*/
-#define SELECT_COL_B_DEF(target, Register) \
+#define SELECT_COL_B_DEFINE(target, Register) \
target static inline void SelectColumnsOfB(const Register *input, Register *output, Index rows_bytes /* number of bytes in a row */, const Index *cols_begin, const Index *cols_end) { \
assert(rows_bytes % sizeof(Register) == 0); \
assert((cols_end - cols_begin) % 8 == 0); \
diff --git a/multiply.h b/multiply.h
index 4d3d4fb..6c81468 100644
--- a/multiply.h
+++ b/multiply.h
@@ -70,7 +70,7 @@ template <class Register> inline Register Pack0123(Register sum0, Register sum1,
return add_epi32(pack01, pack23);
}
*/
-#define PACK_DEFINE(target, Register) \
+#define PACK0123_DEFINE(target, Register) \
target inline Register Pack0123(Register sum0, Register sum1, Register sum2, Register sum3) { \
Interleave32(sum0, sum1); \
Register pack01 = add_epi32(sum0, sum1); \
@@ -80,11 +80,11 @@ target inline Register Pack0123(Register sum0, Register sum1, Register sum2, Reg
return add_epi32(pack01, pack23); \
} \
-PACK_DEFINE(SSE2, __m128i)
-PACK_DEFINE(AVX2, __m256i)
+PACK0123_DEFINE(SSE2, __m128i)
+PACK0123_DEFINE(AVX2, __m256i)
#ifndef INTGEMM_NO_AVX512
/* Only AVX512F is necessary but due to GCC 5.4 bug we have to set AVX512BW */
-PACK_DEFINE(AVX512BW, __m512i)
+PACK0123_DEFINE(AVX512BW, __m512i)
#endif
// 16-bit multiplier for SSE2, AVX2, and AVX512.
@@ -124,7 +124,7 @@ PACK_DEFINE(AVX512BW, __m512i)
// width must be a multiple of the register size.
// B_cols must be a multiple of 8.
// Multiply16
-#define MULTIPLY16_define(Integer, target, WriteCSubType) \
+#define MULTIPLY16_DEFINE(Integer, target, WriteCSubType) \
template <class WriteC> target static void Multiply(const int16_t *A, const int16_t *B, WriteC C, Index A_rows, Index width, Index B_cols) { \
assert(width % (sizeof(Integer) / sizeof(int16_t)) == 0); \
assert(B_cols % 8 == 0); \
@@ -328,7 +328,7 @@ SSSE3 inline static void InnerSSSE3(
sum7 = adds_epi16(sum7, maddubs_epi16(a_positive, sign_epi8(b[7], a)));
}
//AVX2 or SSSE3 multiply
-#define MULTIPLY8_define(Integer, target, WriteCSubType) \
+#define MULTIPLY8_DEFINE(Integer, target, WriteCSubType) \
template <class WriteC> target static void Multiply(const int8_t *A, const int8_t *B, WriteC C, Index A_rows, Index width, Index B_cols) { \
assert(width % sizeof(Integer) == 0); \
assert(B_cols % 8 == 0); \
@@ -417,7 +417,7 @@ template <class Register> inline static float MaxAbsoluteBackend(const float *be
return MaxFloat32(highest);
}*/
-#define MAXABS_DEFINE(Register, target) \
+#define MAXABSOLUTE_DEFINE(Register, target) \
target static float MaxAbsolute(const float *begin_float, const float *end_float) { \
assert(end_float > begin_float); \
assert((end_float - begin_float) % (sizeof(Register) / sizeof(float)) == 0); \
diff --git a/sse2_gemm.h b/sse2_gemm.h
index 8717150..882e006 100644
--- a/sse2_gemm.h
+++ b/sse2_gemm.h
@@ -14,7 +14,7 @@ SSE2 inline __m128i QuantizerGrab(const float *input, const __m128 quant_mult_re
return _mm_cvtps_epi32(_mm_mul_ps(*reinterpret_cast<const __m128*>(input), quant_mult_reg));
}
-SELECT_COL_B_DEF(SSE2, __m128i)
+SELECT_COL_B_DEFINE(SSE2, __m128i)
class QuantizeTile16 {
public:
@@ -39,7 +39,7 @@ class QuantizeTile16 {
// Technically only requires SSE
-MAXABS_DEFINE(__m128, SSE2)
+MAXABSOLUTE_DEFINE(__m128, SSE2)
} //namespace
// This should be pure SSE2 (and below).
@@ -66,13 +66,13 @@ struct SSE2_16bit {
static const Index kBTileRow = 8;
static const Index kBTileCol = 8;
- PREPARE_B_16_DEF(SSE2, sse2::QuantizeTile16)
+ PREPARE_B_16_DEFINE(SSE2, sse2::QuantizeTile16)
SSE2 static void SelectColumnsB(const int16_t *input, int16_t *output, Index rows, const Index *cols_begin, const Index *cols_end) {
//TODO #DEFINE
sse2::SelectColumnsOfB((const __m128i*)input, (__m128i*)output, rows * 2, cols_begin, cols_end);
}
- MULTIPLY16_define(__m128i, SSE2, OnSSE2)
+ MULTIPLY16_DEFINE(__m128i, SSE2, OnSSE2)
constexpr static const char *const kName = "16-bit SSE2";
diff --git a/ssse3_gemm.h b/ssse3_gemm.h
index b19e552..dccc730 100644
--- a/ssse3_gemm.h
+++ b/ssse3_gemm.h
@@ -17,7 +17,7 @@ SSSE3 inline __m128i QuantizerGrab(const float *input, const __m128 quant_mult_r
return _mm_cvtps_epi32(_mm_mul_ps(*reinterpret_cast<const __m128*>(input), quant_mult_reg));
}
-SELECT_COL_B_DEF(SSSE3, __m128i)
+SELECT_COL_B_DEFINE(SSSE3, __m128i)
class QuantizeTile8 {
public:
@@ -92,7 +92,7 @@ struct SSSE3_8bit {
SSSE3 static void PrepareB(const float *input, int8_t *output, float quant_mult, Index rows, Index cols) {
PrepareBFor8(input, output, ssse3::QuantizeTile8(quant_mult), rows, cols);
}*/
- PREPARE_B_8_DEF(SSSE3, ssse3::QuantizeTile8)
+ PREPARE_B_8_DEFINE(SSSE3, ssse3::QuantizeTile8)
SSSE3 static void SelectColumnsB(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end) {
ssse3::SelectColumnsOfB((const __m128i*)input, (__m128i*)output, rows, cols_begin, cols_end);
@@ -102,7 +102,7 @@ struct SSSE3_8bit {
//Multiply8_SSE2OrAVX2<Multiply8_C, __m128i, __m128>(A, B, C, unquant_mult, A_rows, width, B_cols);
Multiply8_SSE2OrAVX2__m128i<JustUnquantizeC>(A, B, JustUnquantizeC(C, unquant_mult), A_rows, width, B_cols);
}*/
- MULTIPLY8_define(__m128i, SSSE3, OnSSE2)
+ MULTIPLY8_DEFINE(__m128i, SSSE3, OnSSE2)
constexpr static const char *const kName = "8-bit SSSE3";