diff options
author | Nikolay Bogoychev <nheart@gmail.com> | 2019-05-21 16:15:20 +0300 |
---|---|---|
committer | Nikolay Bogoychev <nheart@gmail.com> | 2019-05-21 16:15:20 +0300 |
commit | e96023fb94016a740b788a3574872dc13df78014 (patch) | |
tree | a493ba70d36d92a98b89a83adb20e134f4c96f5c /sse2_gemm.h | |
parent | 76eb81538f447ef92dbab3bda0daa566f3422fac (diff) |
INTGEMM_ namespace for all defines
Diffstat (limited to 'sse2_gemm.h')
-rw-r--r-- | sse2_gemm.h | 26 |
1 files changed, 13 insertions, 13 deletions
diff --git a/sse2_gemm.h b/sse2_gemm.h index 882e006..9605a68 100644 --- a/sse2_gemm.h +++ b/sse2_gemm.h @@ -10,26 +10,26 @@ namespace intgemm { namespace sse2 { // Same implementation as AVX512, just width. Grabs 4 32-bit values. // TODO duplicated function requires the removal of the annonymous namespace -SSE2 inline __m128i QuantizerGrab(const float *input, const __m128 quant_mult_reg) { +INTGEMM_SSE2 inline __m128i QuantizerGrab(const float *input, const __m128 quant_mult_reg) { return _mm_cvtps_epi32(_mm_mul_ps(*reinterpret_cast<const __m128*>(input), quant_mult_reg)); } -SELECT_COL_B_DEFINE(SSE2, __m128i) +INTGEMM_SELECT_COL_B(INTGEMM_SSE2, __m128i) class QuantizeTile16 { public: typedef __m128i Integer; - SSE2 explicit QuantizeTile16(float mult) : mult_reg_(_mm_set1_ps(mult)) {} + INTGEMM_SSE2 explicit QuantizeTile16(float mult) : mult_reg_(_mm_set1_ps(mult)) {} // Quantize 8xfloat into 8xint16_t - SSE2 inline __m128i Consecutive(const float *input) { + INTGEMM_SSE2 inline __m128i Consecutive(const float *input) { __m128i g0 = QuantizerGrab(input, mult_reg_); __m128i g1 = QuantizerGrab(input + 4, mult_reg_); return _mm_packs_epi32(g0, g1); } - SSE2 inline __m128i ForReshape(const float *input, int) { + INTGEMM_SSE2 inline __m128i ForReshape(const float *input, int) { return Consecutive(input); } @@ -39,19 +39,19 @@ class QuantizeTile16 { // Technically only requires SSE -MAXABSOLUTE_DEFINE(__m128, SSE2) +INTGEMM_MAXABSOLUTE(__m128, INTGEMM_SSE2) } //namespace -// This should be pure SSE2 (and below). +// This should be pure INTGEMM_SSE2 (and below). struct SSE2_16bit { typedef int16_t Integer; // Currently A is prepared by quantization but this could theoretically change. - SSE2 static inline void PrepareA(const float *input, int16_t *output, float quant_mult, Index rows, Index cols) { + INTGEMM_SSE2 static inline void PrepareA(const float *input, int16_t *output, float quant_mult, Index rows, Index cols) { Quantize(input, output, quant_mult, rows * cols); } - SSE2 static void Quantize(const float *input, int16_t *output, float quant_mult, Index size) { + INTGEMM_SSE2 static void Quantize(const float *input, int16_t *output, float quant_mult, Index size) { assert(size % 8 == 0); assert(reinterpret_cast<uintptr_t>(input) % 16 == 0); assert(reinterpret_cast<uintptr_t>(output) % 16 == 0); @@ -66,15 +66,15 @@ struct SSE2_16bit { static const Index kBTileRow = 8; static const Index kBTileCol = 8; - PREPARE_B_16_DEFINE(SSE2, sse2::QuantizeTile16) + INTGEMM_PREPARE_B_16(INTGEMM_SSE2, sse2::QuantizeTile16) - SSE2 static void SelectColumnsB(const int16_t *input, int16_t *output, Index rows, const Index *cols_begin, const Index *cols_end) { + INTGEMM_SSE2 static void SelectColumnsB(const int16_t *input, int16_t *output, Index rows, const Index *cols_begin, const Index *cols_end) { //TODO #DEFINE sse2::SelectColumnsOfB((const __m128i*)input, (__m128i*)output, rows * 2, cols_begin, cols_end); } - MULTIPLY16_DEFINE(__m128i, SSE2, OnSSE2) + INTGEMM_MULTIPLY16(__m128i, INTGEMM_SSE2, OnSSE2) - constexpr static const char *const kName = "16-bit SSE2"; + constexpr static const char *const kName = "16-bit INTGEMM_SSE2"; static const CPUType kUses = CPU_SSE2; }; |