Comment

author: Kenneth Heafield <github@kheafield.com> 2020-04-24 00:39:44 +0300
committer: Kenneth Heafield <github@kheafield.com> 2020-04-24 00:39:44 +0300
commit: b95575c0c039ee05910098590f6a46685988b248 (patch)
tree: 6ea394f0bb8f23be55f34af3339b7ba229c1079f
parent: f7c02f88cad0766537a3307eff1d6f15af6ed015 (diff)
1 files changed, 8 insertions, 0 deletions
diff --git a/tile/multiply.inl b/tile/multiply.inl
index 78dca55..a1a92cf 100644
--- a/tile/multiply.inl
+++ b/tile/multiply.inl
@@ -56,6 +56,14 @@ template <class AccessT, class Kernel> INTGEMM_TARGET __attribute__((flatten)) s
   }
 }
 
+/* Multiply matrices without being a multiple of an unrolled kernel size.  The
+ * inner dimension still needs to be a multiple of sizeof(Register) for int8_t
+ * or sizeof(Register) / 2 for int16_t.
+ * Kernel should be a small kernel like Shifted8 or Signed8; this function will
+ * unroll.
+ * A_rows and B_cols specify the unrolled kernel size to use for most of the
+ * multiply; these impact speed but not output.
+ */
 template <class Access, class Kernel, Index A_rows, Index B_cols> INTGEMM_TARGET static inline void Multiply(Access access, const Tile shape) {
   // Still has to be a multiple of the underlying Kernel, but usually that's just 1 x sizeof(Register) x 1.
   assert(shape.A_rows % Kernel::kTile.A_rows == 0);
author	Kenneth Heafield <github@kheafield.com>	2020-04-24 00:39:44 +0300
committer	Kenneth Heafield <github@kheafield.com>	2020-04-24 00:39:44 +0300
commit	b95575c0c039ee05910098590f6a46685988b248 (patch)
tree	6ea394f0bb8f23be55f34af3339b7ba229c1079f
parent	f7c02f88cad0766537a3307eff1d6f15af6ed015 (diff)