src/GenerateKernelU8S8S32ACC16Avx512VNNI.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

/*
 * Copyright (c) Facebook, Inc. and its affiliates.
 * All rights reserved.
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */
#include <iostream>
#include "GenerateKernel.h"

namespace fbgemm {

namespace x86 = asmjit::x86;

/**
 * Generate AVX512 instructions for initializing the C registers to 0 in 16-bit
 * Accumulation kernel.
 */
template <>
template <>
void CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::initCRegs<
    inst_set_t::avx512_vnni>(
    x86::Emitter* a,
    int rowRegs,
    int colRegs,
    int leadingDimCReg) {
  assert(0 && "Accumulation to int16_t is not available for VNNI!");

  // For AVX512VNNI, redirect to int32_t accumulation.
  CodeGenBase<uint8_t, int8_t, int32_t, int32_t> codeObj;
  codeObj.initCRegs<inst_set_t::avx512_vnni>(
      a, rowRegs, colRegs, leadingDimCReg);
}

/**
 * Generate AVX512 instructions for computing block in the rank-k update of
 * 16-bit Accmulation kernel.
 */
template <>
template <>
void CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::genComputeBlock<
    inst_set_t::avx512_vnni>(
    x86::Emitter* a,
    x86::Gp buffer_A,
    x86::Gp buffer_B,
    x86::Gp /* unused (reserved for prefetching)*/,
    int rowRegs,
    int colRegs,
    int lda,
    int leadingDimCReg) {
  assert(0 && "Accumulation to int16_t is not available for VNNI!");

  // For AVX512VNNI, redirect to int32_t accumulation.
  CodeGenBase<uint8_t, int8_t, int32_t, int32_t> codeObj;
  codeObj.genComputeBlock<inst_set_t::avx512_vnni>(
      a, buffer_A, buffer_B, buffer_B, rowRegs, colRegs, lda, leadingDimCReg);
}

/**
 * Generate AVX512 instructions for storing the C registers back to the memory
 * in 16-bit Accumulation kernel.
 */
template <>
template <>
void CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::storeCRegs<
    inst_set_t::avx512_vnni>(
    x86::Emitter* a,
    int rowRegs,
    int colRegs,
    x86::Gp C_Offset,
    x86::Gp ldcReg,
    bool accum,
    int leadingDimCReg) {
  assert(0 && "Accumulation to int16_t is not available for VNNI!");

  // For AVX512VNNI, redirect to int32_t accumulation.
  CodeGenBase<uint8_t, int8_t, int32_t, int32_t> codeObj;
  codeObj.storeCRegs<inst_set_t::avx512_vnni>(
      a, rowRegs, colRegs, C_Offset, ldcReg, accum, leadingDimCReg);
}

/**
 * Get or Create the AVX512 instructions for 16-bit Accumulation macro-kernel.
 *
 */
template <>
template <>
CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::jit_micro_kernel_fp
CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<
    inst_set_t::avx512_vnni>(
    bool accum,
    int32_t mc,
    int32_t nc,
    int32_t kc,
    int32_t /* unused */) {
  assert(0 && "Accumulation to int16_t is not available for VNNI!");

  // For AVX512VNNI, redirect to int32_t accumulation.
  CodeGenBase<uint8_t, int8_t, int32_t, int32_t> codeObj;
  return codeObj.getOrCreate<inst_set_t::avx512_vnni>(accum, mc, nc, kc, kc);
}

} // namespace fbgemm