AVX2 gemm

author: Kenneth Heafield <github@kheafield.com> 2018-06-14 12:07:55 +0300
committer: Kenneth Heafield <github@kheafield.com> 2018-06-14 12:07:55 +0300
commit: ca9d64976e0fb197f3a97beb4aac6d8bf48b4d68 (patch)
tree: cd94a415a065afa5f9795961aaa9767504974494 /avx2_gemm.h
parent: d1d57bc9d22e27a459a74b735885b34f99f91934 (diff)
1 files changed, 21 insertions, 0 deletions
diff --git a/avx2_gemm.h b/avx2_gemm.h
new file mode 100644
index 0000000..26a0cee
--- /dev/null
+++ b/avx2_gemm.h
@@ -0,0 +1,21 @@
+#pragma once
+#include <immintrin.h>
+#include <cstddef>
+
+namespace intgemm {
+#ifdef __AVX2__
+namespace AVX2 {
+
+void Quantize16(const float *input, int16_t *output, float quant_mult, std::size_t size);
+void Quantize8(const float *input, int8_t *output, float quant_mult, std::size_t size);
+
+// Multiply C = unquant_mult * A * B^T.  A is normally activations and B is normally a parameter matrix.
+// Values of A and B should come from the corresponding quantizer.
+// A and B must be 64-byte aligned.
+// C should be the usual 4-byte alignment.
+void MatrixMult16(const __m256i *A, const __m256i *B, float *C, float unquant_mult, int num_A_rows, int num_B_rows, int width);
+void MatrixMult8(const __m256i *A, const __m256i *B, float *C, float unquant_mult, int num_A_rows, int num_B_rows, int width);
+
+} // namespace AVX2
+#endif // __AVX2__
+} // namespace intgemm
author	Kenneth Heafield <github@kheafield.com>	2018-06-14 12:07:55 +0300
committer	Kenneth Heafield <github@kheafield.com>	2018-06-14 12:07:55 +0300
commit	ca9d64976e0fb197f3a97beb4aac6d8bf48b4d68 (patch)
tree	cd94a415a065afa5f9795961aaa9767504974494 /avx2_gemm.h
parent	d1d57bc9d22e27a459a74b735885b34f99f91934 (diff)