Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMateusz Chudyk <mateuszchudyk@gmail.com>2020-04-25 14:41:18 +0300
committerMateusz Chudyk <mateuszchudyk@gmail.com>2020-04-25 14:41:18 +0300
commitb5fc76d2695a605ca894831435eb13f0406e1108 (patch)
treec98050473e56489812a6443c27649a91bda44269
parentc2b81b285363c6c9713f105fb31bf0dc1ec658ef (diff)
parente78b9af9ba7ba419567a074deeb104943db6cd14 (diff)
Merge remote-tracking branch 'remotes/origin/static-multiply1x16' into HEADcompile_with_marian
-rw-r--r--tile/multiply.inl46
1 files changed, 46 insertions, 0 deletions
diff --git a/tile/multiply.inl b/tile/multiply.inl
index a1a92cf..42d2faf 100644
--- a/tile/multiply.inl
+++ b/tile/multiply.inl
@@ -56,6 +56,52 @@ template <class AccessT, class Kernel> INTGEMM_TARGET __attribute__((flatten)) s
}
}
+template <class Access, class Kernel, Index A_rows, Index B_cols> INTGEMM_TARGET static inline void Multiply_Force1x16(Access access, const Tile shape) {
+ // Still has to be a multiple of the underlying Kernel, but usually that's just 1 x sizeof(Register) x 1.
+ assert(shape.B_cols % 64 == 0);
+
+ // Left part
+ typedef UnrollKernel<1, 1, 16, Kernel> Left;
+ Tile overhang = {
+ shape.A_rows % Left::kTile.A_rows, // = 0
+ shape.inner % Left::kTile.inner,
+ shape.B_cols % Left::kTile.B_cols
+ };
+ Tile left_shape = {
+ shape.A_rows - overhang.A_rows, // = shape.A_rows
+ shape.inner - overhang.inner,
+ shape.B_cols - overhang.B_cols
+ };
+ MultiplyNoOverhang<Access, Left>(access, left_shape);
+
+ // Right part
+#define INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART(overhang_value) \
+ if ((overhang_value) == overhang.B_cols) { \
+ typedef UnrollKernel<1, 1, overhang_value, Kernel> Right; \
+ MultiplyNoOverhang<Access, Right>( \
+ access.BAdd(0, left_shape.B_cols).CAdd(0, left_shape.B_cols), \
+ Tile{left_shape.A_rows, shape.inner, overhang.B_cols}); \
+ }
+
+ INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART(1)
+ INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART(2)
+ INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART(3)
+ INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART(4)
+ INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART(5)
+ INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART(6)
+ INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART(7)
+ INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART(8)
+ INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART(9)
+ INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART(10)
+ INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART(11)
+ INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART(12)
+ INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART(13)
+ INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART(14)
+ INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART(15)
+
+#undef INTGEMM_UGLY_WAY_TO_IMPL_RIGHT_PART
+}
+
/* Multiply matrices without being a multiple of an unrolled kernel size. The
* inner dimension still needs to be a multiple of sizeof(Register) for int8_t
* or sizeof(Register) / 2 for int16_t.