From 627ab821c71606aa6ff4947640def78b4f2b192c Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Fri, 12 Apr 2019 21:11:03 +0100 Subject: Mostly working --- ssse3_gemm.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'ssse3_gemm.h') diff --git a/ssse3_gemm.h b/ssse3_gemm.h index 69ac298..2b830a9 100644 --- a/ssse3_gemm.h +++ b/ssse3_gemm.h @@ -86,17 +86,19 @@ struct SSSE3_8bit { // Tile size for B; B must be a multiple of this block size. static const Index kBTileRow = 16; static const Index kBTileCol = 8; - +/* SSSE3 static void PrepareB(const float *input, int8_t *output, float quant_mult, Index rows, Index cols) { PrepareBFor8(input, output, ssse3::QuantizeTile8(quant_mult), rows, cols); - } + }*/ + PREPARE_B_8_DEF(SSSE3, ssse3::QuantizeTile8) SSSE3 static void SelectColumnsB(const int8_t *input, int8_t *output, Index rows, const Index *cols_begin, const Index *cols_end) { SelectColumnsOfB((const __m128i*)input, (__m128i*)output, rows, cols_begin, cols_end); } SSSE3 static void Multiply(const int8_t *A, const int8_t *B, float *C, float unquant_mult, Index A_rows, Index width, Index B_cols) { - Multiply8_SSE2OrAVX2(A, B, C, unquant_mult, A_rows, width, B_cols); + //Multiply8_SSE2OrAVX2(A, B, C, unquant_mult, A_rows, width, B_cols); + Multiply8_SSE2OrAVX2__m128i(A, B, C, unquant_mult, A_rows, width, B_cols); } constexpr static const char *const kName = "8-bit SSSE3"; -- cgit v1.2.3