From 604575ff5de717b2ee712190634840981a9c8fba Mon Sep 17 00:00:00 2001 From: Mike Tsai Date: Fri, 14 Jun 2019 17:04:25 -0700 Subject: Update the logic of checking valid parameters. Summary: Add the check on NR_MIN and fix ymm/zmm register checks. Reviewed By: dskhudia Differential Revision: D15772144 fbshipit-source-id: 11e2c67fb3d47c5570b38ceaf9828ced0e60e65b --- src/GenerateKernelU8S8S32ACC16Avx512.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/GenerateKernelU8S8S32ACC16Avx512.cc b/src/GenerateKernelU8S8S32ACC16Avx512.cc index 505fec1..e5687eb 100644 --- a/src/GenerateKernelU8S8S32ACC16Avx512.cc +++ b/src/GenerateKernelU8S8S32ACC16Avx512.cc @@ -201,9 +201,10 @@ CodeGenBase::getOrCreate( int maxMRegs = mRegBlockSize; int maxNRegs = nRegBlockSize * row_interleave / VLEN_; assert( - maxMRegs * maxNRegs <= 24 && - "MR*(NR*ROW_INTERLEAVE*8/512) \ - must be <= 24(available registers constraint)"); + (maxMRegs+1) * maxNRegs <= 28 && + "number of zmm registers for C + one row for loading B: \ + MR*(NR*ROW_INTERLEAVE*8/512) + (NR*ROW_INTERLEAVE*8/512) \ + must be <= 28(available registers constraint)"); int mRegBlocks = mc / mRegBlockSize; int mRegBlocksRem = mc % mRegBlockSize; -- cgit v1.2.3