From e96023fb94016a740b788a3574872dc13df78014 Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Tue, 21 May 2019 14:15:20 +0100 Subject: INTGEMM_ namespace for all defines --- ssse3_gemm.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'ssse3_gemm.h') diff --git a/ssse3_gemm.h b/ssse3_gemm.h index dccc730..cf5d2c1 100644 --- a/ssse3_gemm.h +++ b/ssse3_gemm.h @@ -13,30 +13,30 @@ namespace intgemm { namespace ssse3 { // Same implementation as AVX512, just width. Grabs 4 32-bit values. //TODO duplicated function requires the removal of the annonymous namespace -SSSE3 inline __m128i QuantizerGrab(const float *input, const __m128 quant_mult_reg) { +INTGEMM_SSSE3 inline __m128i QuantizerGrab(const float *input, const __m128 quant_mult_reg) { return _mm_cvtps_epi32(_mm_mul_ps(*reinterpret_cast(input), quant_mult_reg)); } -SELECT_COL_B_DEFINE(SSSE3, __m128i) +INTGEMM_SELECT_COL_B(INTGEMM_SSSE3, __m128i) class QuantizeTile8 { public: typedef __m128i Integer; - SSSE3 explicit QuantizeTile8(float mult) : mult_reg_(_mm_set1_ps(mult)) {} + INTGEMM_SSSE3 explicit QuantizeTile8(float mult) : mult_reg_(_mm_set1_ps(mult)) {} - SSSE3 inline __m128i ForReshape(const float *input, Index cols) { + INTGEMM_SSSE3 inline __m128i ForReshape(const float *input, Index cols) { // Skip a row. return Tile(input, input + 2 * cols); } - SSSE3 inline __m128i Consecutive(const float *input) { + INTGEMM_SSSE3 inline __m128i Consecutive(const float *input) { return Tile(input, input + 8); } private: // Quantize 16xfloat into 16xint8_t - SSSE3 inline __m128i Tile(const float *input0, const float *input1) { + INTGEMM_SSSE3 inline __m128i Tile(const float *input0, const float *input1) { const __m128i neg128 = _mm_set1_epi8(-128); __m128i g0 = QuantizerGrab(input0, mult_reg_); __m128i g1 = QuantizerGrab(input0 + 4, mult_reg_); @@ -47,7 +47,7 @@ class QuantizeTile8 { __m128i packed = _mm_packs_epi16(packed0, packed1); /* Ban -128. * Don't use the SSE4.1 instruction _mm_max_epi8(packed, neg127). Instead, - * use SSE2 instructions _mm_cmpeq_epi8 and _mm_sub_epi8. + * use INTGEMM_SSE2 instructions _mm_cmpeq_epi8 and _mm_sub_epi8. * The first generates 0xff for fields -128. * The second subtracts 0xff from -128 which has the effect of converting * to -127. @@ -65,16 +65,16 @@ class QuantizeTile8 { } // namespace -// pmaddubsw (the 8-bit multiply) is SSSE3, so pedantically that's the version we need. +// pmaddubsw (the 8-bit multiply) is INTGEMM_SSSE3, so pedantically that's the version we need. struct SSSE3_8bit { typedef int8_t Integer; // Currently A is prepared by quantization but this could theoretically change. - SSSE3 static inline void PrepareA(const float *input, int8_t *output, float quant_mult, Index rows, Index cols) { + INTGEMM_SSSE3 static inline void PrepareA(const float *input, int8_t *output, float quant_mult, Index rows, Index cols) { Quantize(input, output, quant_mult, rows * cols); } - SSSE3 static void Quantize(const float *input, int8_t *output, float quant_mult, Index size) { + INTGEMM_SSSE3 static void Quantize(const float *input, int8_t *output, float quant_mult, Index size) { assert(size % 16 == 0); assert(reinterpret_cast(input) % 16 == 0); assert(reinterpret_cast(output) % 16 == 0); @@ -89,22 +89,22 @@ struct SSSE3_8bit { static const Index kBTileRow = 16; static const Index kBTileCol = 8; /* - SSSE3 static void PrepareB(const float *input, int8_t *output, float quant_mult, Index rows, Index cols) { + INTGEMM_SSSE3 static void PrepareB(const float *input, int8_t *output, float quant_mult, Index rows, Index cols) { PrepareBFor8(input, output, ssse3::QuantizeTile8(quant_mult), rows, cols); }*/ - PREPARE_B_8_DEFINE(SSSE3, ssse3::QuantizeTile8) + INTGEMM_PREPARE_B_8(INTGEMM_SSSE3, ssse3::QuantizeTile8) - SSSE3 static void SelectColumnsB(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end) { + INTGEMM_SSSE3 static void SelectColumnsB(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end) { ssse3::SelectColumnsOfB((const __m128i*)input, (__m128i*)output, rows, cols_begin, cols_end); } /* - SSSE3 static void Multiply(const int8_t *A, const int8_t *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols) { + INTGEMM_SSSE3 static void Multiply(const int8_t *A, const int8_t *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols) { //Multiply8_SSE2OrAVX2(A, B, C, unquant_mult, A_rows, width, B_cols); Multiply8_SSE2OrAVX2__m128i(A, B, JustUnquantizeC(C, unquant_mult), A_rows, width, B_cols); }*/ - MULTIPLY8_DEFINE(__m128i, SSSE3, OnSSE2) + INTGEMM_MULTIPLY8(__m128i, INTGEMM_SSSE3, OnSSE2) - constexpr static const char *const kName = "8-bit SSSE3"; + constexpr static const char *const kName = "8-bit INTGEMM_SSSE3"; static const CPUType kUses = CPU_SSSE3; }; -- cgit v1.2.3