diff options
author | Kenneth Heafield <github@kheafield.com> | 2020-04-24 00:39:44 +0300 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2020-04-24 00:39:44 +0300 |
commit | b95575c0c039ee05910098590f6a46685988b248 (patch) | |
tree | 6ea394f0bb8f23be55f34af3339b7ba229c1079f | |
parent | f7c02f88cad0766537a3307eff1d6f15af6ed015 (diff) |
Comment
-rw-r--r-- | tile/multiply.inl | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/tile/multiply.inl b/tile/multiply.inl index 78dca55..a1a92cf 100644 --- a/tile/multiply.inl +++ b/tile/multiply.inl @@ -56,6 +56,14 @@ template <class AccessT, class Kernel> INTGEMM_TARGET __attribute__((flatten)) s } } +/* Multiply matrices without being a multiple of an unrolled kernel size. The + * inner dimension still needs to be a multiple of sizeof(Register) for int8_t + * or sizeof(Register) / 2 for int16_t. + * Kernel should be a small kernel like Shifted8 or Signed8; this function will + * unroll. + * A_rows and B_cols specify the unrolled kernel size to use for most of the + * multiply; these impact speed but not output. + */ template <class Access, class Kernel, Index A_rows, Index B_cols> INTGEMM_TARGET static inline void Multiply(Access access, const Tile shape) { // Still has to be a multiple of the underlying Kernel, but usually that's just 1 x sizeof(Register) x 1. assert(shape.A_rows % Kernel::kTile.A_rows == 0); |