Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2020-04-24 00:39:44 +0300
committerKenneth Heafield <github@kheafield.com>2020-04-24 00:39:44 +0300
commitb95575c0c039ee05910098590f6a46685988b248 (patch)
tree6ea394f0bb8f23be55f34af3339b7ba229c1079f
parentf7c02f88cad0766537a3307eff1d6f15af6ed015 (diff)
Comment
-rw-r--r--tile/multiply.inl8
1 files changed, 8 insertions, 0 deletions
diff --git a/tile/multiply.inl b/tile/multiply.inl
index 78dca55..a1a92cf 100644
--- a/tile/multiply.inl
+++ b/tile/multiply.inl
@@ -56,6 +56,14 @@ template <class AccessT, class Kernel> INTGEMM_TARGET __attribute__((flatten)) s
}
}
+/* Multiply matrices without being a multiple of an unrolled kernel size. The
+ * inner dimension still needs to be a multiple of sizeof(Register) for int8_t
+ * or sizeof(Register) / 2 for int16_t.
+ * Kernel should be a small kernel like Shifted8 or Signed8; this function will
+ * unroll.
+ * A_rows and B_cols specify the unrolled kernel size to use for most of the
+ * multiply; these impact speed but not output.
+ */
template <class Access, class Kernel, Index A_rows, Index B_cols> INTGEMM_TARGET static inline void Multiply(Access access, const Tile shape) {
// Still has to be a multiple of the underlying Kernel, but usually that's just 1 x sizeof(Register) x 1.
assert(shape.A_rows % Kernel::kTile.A_rows == 0);