src/tensors/cpu/int16.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113

#pragma once

#include "graph/node.h"
#include "tensors/cpu/sharp/int_gemm.h"

namespace marian {
namespace cpu {
namespace int16 {

struct QuantizeNodeOp : public UnaryNodeOp {
  float clipValue_;

  QuantizeNodeOp(Expr a, float clipValue)
      : UnaryNodeOp(a, Type::int16), clipValue_{clipValue} {}

  NodeOps forwardOps() override {
    return {NodeOp(Quantize16(val_, child(0)->val(), clipValue_))};
  }

  NodeOps backwardOps() override {
    ABORT("Only used for inference");
  }

  const std::string type() override { return "quantizeInt16"; }
};

class DotNodeOp : public NaryNodeOp {
private:
  float scalar_;

public:
  DotNodeOp(Expr a, Expr b, float scalar)
      : NaryNodeOp({a, b}, newShape(a, b), Type::float32), scalar_(scalar) {}

  Shape newShape(Expr a, Expr b) {
    auto shapeA = a->shape();
    auto shapeB = b->shape();

    // Computing A * B^T
    shapeB.set(-2, b->shape()[-1]);
    shapeB.set(-1, b->shape()[-2]);

    Shape outShape = shapeA;
    outShape.set(-1, shapeB[-1]);
    ABORT_IF(shapeA[-1] != shapeB[-2],
             "matrix product requires dimensions to match");
    return outShape;
  }

  NodeOps forwardOps() override {
    return {NodeOp(ProdInt16(val_, child(0)->val(), child(1)->val(), scalar_))};
  }

  NodeOps backwardOps() override {
    ABORT("Only used for inference");
  }

  const std::string type() override { return "dotInt16"; }
};

class AffineNodeOp : public NaryNodeOp {
private:
  float scalar_;

public:
  AffineNodeOp(const std::vector<Expr>& nodes, float scalar)
      : NaryNodeOp(nodes, newShape(nodes[0], nodes[1]), Type::float32), scalar_(scalar) {}

  Shape newShape(Expr a, Expr b) {
    auto shapeA = a->shape();
    auto shapeB = b->shape();

    // Computing A * B^T
    shapeB.set(-2, b->shape()[-1]);
    shapeB.set(-1, b->shape()[-2]);

    Shape outShape = shapeA;
    outShape.set(-1, shapeB[-1]);
    ABORT_IF(shapeA[-1] != shapeB[-2],
             "matrix product requires dimensions to match");
    return outShape;
  }

  NodeOps forwardOps() override {
    return {
      NodeOp(ProdInt16(val_, child(0)->val(), child(1)->val(), scalar_);
             AddBias(val_, child(2)->val()))
    };
  }

  NodeOps backwardOps() override {
    ABORT("Only used for inference");
  }

  const std::string type() override { return "affineInt16"; }
};

static inline Expr dot(Expr a, Expr b, float scalar) {
  return Expression<cpu::int16::DotNodeOp>(a, b, scalar);
}

static inline Expr affine(Expr a, Expr b, Expr c, float scalar) {
  std::vector<Expr> nodes = {a, b, c};
  return Expression<cpu::int16::AffineNodeOp>(nodes, scalar);
}

static inline Expr quantize(Expr a, float clipValue) {
  return Expression<cpu::int16::QuantizeNodeOp>(a, clipValue);
}

}  // namespace int16
}  // namespace cpu
}  // namespace marian