From f65f0ebe54f0512d8f42ee10025b596e3f42e0b8 Mon Sep 17 00:00:00 2001 From: Daya S Khudia Date: Thu, 21 Mar 2019 10:03:36 -0700 Subject: Improves small N cases back to what they were Summary: In D14507536 and D14516232 small N cases suffered if we increased the NR. This fixes those cases. Reviewed By: jianyuh Differential Revision: D14529494 fbshipit-source-id: 6f53797948de760d6ed24b767cbbe8d27768660f --- include/fbgemm/PackingTraits-inl.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/fbgemm/PackingTraits-inl.h b/include/fbgemm/PackingTraits-inl.h index 6bf34d5..5b50bc9 100644 --- a/include/fbgemm/PackingTraits-inl.h +++ b/include/fbgemm/PackingTraits-inl.h @@ -154,6 +154,10 @@ struct PackingTraits< inst_set_t::avx512, typename std::enable_if::value>::type> { static constexpr int MR{14}; ///< Register block for M dimension. + static constexpr int NR_MIN{ + 16}; ///< Minimum register block for N dimension. + ///< 16 because 16*ROW_INTERLEAVE int8 elements + ///< completely fill a 512-bit wide vector. static constexpr int NR{ 32}; ///< Register block for N dimension. ///< Must be a multiple of 16 because 16*ROW_INTERLEAVE int8 elements @@ -187,6 +191,10 @@ struct PackingTraits< inst_set_t::avx512, typename std::enable_if::value>::type> { static constexpr int MR{6}; ///< Register block for M dimension + static constexpr int NR_MIN{ + 32}; ///< Minimum register block for N dimension; + ///< 32 because 32*ROW_INTERLEAVE int8 elements + ///< completely fill a 512-bit wide vector. static constexpr int NR{ 128}; ///< Register block for N dimension; ///< Must be a multiple of 32 because 32*ROW_INTERLEAVE int8 -- cgit v1.2.3