Welcome to mirror list, hosted at ThFree Co, Russian Federation.

ExecuteKernelGeneric.h « src - github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: ce9a7bb92ae4231a476cda70ddd9ced1e71434d2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
/*
 * Copyright (c) Facebook, Inc. and its affiliates.
 * All rights reserved.
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */
#pragma once
#include <cstdint>
#include "GenerateKernel.h"
#include "fbgemm/Fbgemm.h"

namespace fbgemm {

/**
 * @brief Execute Engine for the macro-kernel and output processing.
 * ExecuteKernel is a derived class of CodeGenBase.
 */
template <
    typename packingAMatrix,
    typename packingBMatrix,
    typename cT,
    typename processOutputType>
class ExecuteKernel : public CodeGenBase<
                          typename packingAMatrix::inpType,
                          typename packingBMatrix::inpType,
                          cT,
                          typename packingBMatrix::accType> {
 public:
  ExecuteKernel(
      PackMatrix<
          packingAMatrix,
          typename packingAMatrix::inpType,
          typename packingAMatrix::accType>& packA,
      PackMatrix<
          packingBMatrix,
          typename packingBMatrix::inpType,
          typename packingBMatrix::accType>& packB,
      cT* matC,
      typename packingBMatrix::accType* C_buffer,
      int32_t ldc,
      const processOutputType& outputProcess,
      int thread_id,
      int num_threads,
      const BlockingFactors* params = nullptr);
  void execute(int kBlock);

 private:
  PackMatrix<
      packingAMatrix,
      typename packingAMatrix::inpType,
      typename packingAMatrix::accType>&
      packedA_; ///< Packed block of matrix A.
  PackMatrix<
      packingBMatrix,
      typename packingBMatrix::inpType,
      typename packingBMatrix::accType>& packedB_; ///< Packed matrix B.
  cT* matC_; ///< Output for matrix C.
  typename packingAMatrix::accType*
      C_buffer_; ///< the accumulation buffer for matrix C.
  int32_t ldc_; ///< the leading dimension of matrix C.
  const processOutputType& outputProcess_; ///< output processing function for
                                           ///< the C tile in the macro-kernel.
};

} // namespace fbgemm