Welcome to mirror list, hosted at ThFree Co, Russian Federation.

avx512_gemm.h - github.com/marian-nmt/intgemm/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 5713be43688753fc5385e765e47eb1425fbb7187 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#pragma once
#include <immintrin.h>
#include <cstddef>

namespace intgemm {
#ifdef __AVX512F__
namespace AVX512 {

void Quantize16(const float *input, int16_t *output, float quant_mult, std::size_t size);
void Quantize8(const float *input, int8_t *output, float quant_mult, std::size_t size);

// Multiply C = unquant_mult * A * B^T.  A is normally activations and B is normally a parameter matrix.
// Values of A and B should come from the corresponding quantizer.
// A and B must be 64-byte aligned.
// C should be the usual 4-byte alignment.
void MatrixMult16(const __m512i *A, const __m512i *B, float *C, float unquant_mult, int num_A_rows, int num_B_rows, int width);
void MatrixMult8(const __m512i *A, const __m512i *B, float *C, float unquant_mult, int num_A_rows, int num_B_rows, int width);

} // namespace AVX512
#endif // __AVX512F__
} // namespace intgemm