Welcome to mirror list, hosted at ThFree Co, Russian Federation.

ExecuteKernelU8S8.h « src - github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: bb20134784f4e30dac57c461602bf5a9fddd215a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
/*
 * Copyright (c) Facebook, Inc. and its affiliates.
 * All rights reserved.
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */
#pragma once
#include "ExecuteKernel.h"

namespace fbgemm {

/**
 * @brief Execute Engine of uint 8 and int8 matrix
 * multiplication for the macro-kernel and output processing. ExecuteKernel is a
 * derived class of CodeGenBase.
 */
template <typename packingAMatrix, typename cT, typename processOutputType>
class ExecuteKernel<
    packingAMatrix,
    PackBMatrix<int8_t, typename packingAMatrix::accType>,
    cT,
    processOutputType>
    : public CodeGenBase<
          uint8_t,
          int8_t,
          int32_t,
          typename packingAMatrix::accType> {
 public:
  using BaseType =
      CodeGenBase<uint8_t, int8_t, int32_t, typename packingAMatrix::accType>;
  /**
   * @brief Constructor for initializing the parameters for macro-kernel and
   * output processing type.
   */
  ExecuteKernel(
      PackMatrix<packingAMatrix, uint8_t, typename packingAMatrix::accType>&
          packA,
      PackMatrix<
          PackBMatrix<int8_t, typename packingAMatrix::accType>,
          int8_t,
          typename packingAMatrix::accType>& packB,
      cT* matC,
      int32_t* C_buffer,
      int32_t ldc,
      const processOutputType& outputProcess,
      int thread_id,
      int num_threads,
      const BlockingFactors* params = nullptr);
  void execute(int kBlock);

  ~ExecuteKernel() {
    delete[] C_tile_;
  }

 private:
  PackMatrix<packingAMatrix, uint8_t, typename packingAMatrix::accType>&
      packedA_; ///< Packed uint8 block of matrix A.
  PackMatrix<
      PackBMatrix<int8_t, typename packingAMatrix::accType>,
      int8_t,
      typename packingAMatrix::accType>& packedB_; ///< Packed int8 matrix B.
  cT* matC_; ///< Output for matrix C.
  int32_t* C_buffer_; ///< the accumulation buffer for matrix C.
  int32_t ldc_; ///< the leading dimension of matrix C.
  const processOutputType& outputProcess_; ///< output processing function for
                                           ///< matrix C in the macro-kernel.
  int thread_id_; ///< the thread id.
  int num_threads_; ///< the total number of threads
  int32_t* C_tile_; ///< buffer for the last N block when NCB is not an exact
                    ///< multiple of N.
  int mbSize_; ///< block size in the m dimension.
  int nbSize_; ///< block size in the n dimension.
  int nrMinSize_; ///< minimum register size in the n dimension.
  int nrSize_; ///< register size in the n dimension.
};

} // namespace fbgemm