diff options
Diffstat (limited to 'ssse3_gemm.h')
-rw-r--r-- | ssse3_gemm.h | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/ssse3_gemm.h b/ssse3_gemm.h new file mode 100644 index 0000000..afbe4f0 --- /dev/null +++ b/ssse3_gemm.h @@ -0,0 +1,30 @@ +#pragma once +#include <stdint.h> + +// 16-bit is in sse2_gemm.h + +namespace intgemm { + +// pmaddubsw (the 8-bit multiply) is SSSE3, so pedantically that's the version we need. +struct SSSE3_8bit { + typedef int8_t Integer; + + // Currently A is prepared by quantization but this could theoretically change. + static inline void PrepareA(const float *input, int8_t *output, float quant_mult, int rows, int cols) { + Quantize(input, output, quant_mult, rows * cols); + } + + static void Quantize(const float *input, int8_t *output, float quant_mult, int size); + + // Tile size for B; B must be a multiple of this block size. + static const int kBTileRow = 16; + static const int kBTileCol = 8; + + static void PrepareB(const float *input, int8_t *output, float quant_mult, int rows, int cols); + + static void Multiply(const int8_t *A, const int8_t *B, float *C, float unquant_mult, int A_rows, int width, int B_cols); + + static const char *const kName; +}; + +} // namespace intgemm |