diff options
Diffstat (limited to 'src/GenerateKernelU8S8S32ACC16Avx512VNNI.cc')
-rw-r--r-- | src/GenerateKernelU8S8S32ACC16Avx512VNNI.cc | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/src/GenerateKernelU8S8S32ACC16Avx512VNNI.cc b/src/GenerateKernelU8S8S32ACC16Avx512VNNI.cc new file mode 100644 index 0000000..f559aba --- /dev/null +++ b/src/GenerateKernelU8S8S32ACC16Avx512VNNI.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +#include <iostream> +#include "GenerateKernel.h" + +namespace fbgemm { + +namespace x86 = asmjit::x86; + +/** + * Generate AVX512 instructions for initializing the C registers to 0 in 16-bit + * Accumulation kernel. + */ +template <> +template <> +void CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::initCRegs< + inst_set_t::avx512_vnni>( + x86::Emitter* a, + int rowRegs, + int colRegs, + int leadingDimCReg) { + assert(0 && "Accumulation to int16_t is not available for VNNI!"); + + // For AVX512VNNI, redirect to int32_t accumulation. + CodeGenBase<uint8_t, int8_t, int32_t, int32_t> codeObj; + codeObj.initCRegs<inst_set_t::avx512_vnni>( + a, rowRegs, colRegs, leadingDimCReg); +} + +/** + * Generate AVX512 instructions for computing block in the rank-k update of + * 16-bit Accmulation kernel. + */ +template <> +template <> +void CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::genComputeBlock< + inst_set_t::avx512_vnni>( + x86::Emitter* a, + x86::Gp buffer_A, + x86::Gp buffer_B, + x86::Gp /* unused (reserved for prefetching)*/, + int rowRegs, + int colRegs, + int lda, + int leadingDimCReg) { + assert(0 && "Accumulation to int16_t is not available for VNNI!"); + + // For AVX512VNNI, redirect to int32_t accumulation. + CodeGenBase<uint8_t, int8_t, int32_t, int32_t> codeObj; + codeObj.genComputeBlock<inst_set_t::avx512_vnni>( + a, buffer_A, buffer_B, buffer_B, rowRegs, colRegs, lda, leadingDimCReg); +} + +/** + * Generate AVX512 instructions for storing the C registers back to the memory + * in 16-bit Accumulation kernel. + */ +template <> +template <> +void CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::storeCRegs< + inst_set_t::avx512_vnni>( + x86::Emitter* a, + int rowRegs, + int colRegs, + x86::Gp C_Offset, + x86::Gp ldcReg, + bool accum, + int leadingDimCReg) { + assert(0 && "Accumulation to int16_t is not available for VNNI!"); + + // For AVX512VNNI, redirect to int32_t accumulation. + CodeGenBase<uint8_t, int8_t, int32_t, int32_t> codeObj; + codeObj.storeCRegs<inst_set_t::avx512_vnni>( + a, rowRegs, colRegs, C_Offset, ldcReg, accum, leadingDimCReg); +} + +/** + * Get or Create the AVX512 instructions for 16-bit Accumulation macro-kernel. + * + */ +template <> +template <> +CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::jit_micro_kernel_fp +CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate< + inst_set_t::avx512_vnni>( + bool accum, + int32_t mc, + int32_t nc, + int32_t kc, + int32_t /* unused */) { + assert(0 && "Accumulation to int16_t is not available for VNNI!"); + + // For AVX512VNNI, redirect to int32_t accumulation. + CodeGenBase<uint8_t, int8_t, int32_t, int32_t> codeObj; + return codeObj.getOrCreate<inst_set_t::avx512_vnni>(accum, mc, nc, kc, kc); +} + +} // namespace fbgemm |