Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/GenerateKernelU8S8S32ACC32Avx512.cc')
-rw-r--r--src/GenerateKernelU8S8S32ACC32Avx512.cc17
1 files changed, 7 insertions, 10 deletions
diff --git a/src/GenerateKernelU8S8S32ACC32Avx512.cc b/src/GenerateKernelU8S8S32ACC32Avx512.cc
index d1729e4..12243ee 100644
--- a/src/GenerateKernelU8S8S32ACC32Avx512.cc
+++ b/src/GenerateKernelU8S8S32ACC32Avx512.cc
@@ -48,7 +48,7 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::genComputeBlock<
int rowRegs,
int colRegs,
int lda,
- int leadingDimCRegAssign) {
+ int leadingDimCReg) {
// used for matrix A
asmjit::X86Zmm AReg = x86::zmm31;
@@ -71,9 +71,9 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::genComputeBlock<
a->vpmaddubsw(res1, AReg, BReg);
a->vpmaddwd(res1, oneReg, res1);
a->vpaddd(
- CRegs_avx512_[i * leadingDimCRegAssign + j],
+ CRegs_avx512_[i * leadingDimCReg + j],
res1,
- CRegs_avx512_[i * leadingDimCRegAssign + j]);
+ CRegs_avx512_[i * leadingDimCReg + j]);
}
a->prefetcht0(x86::dword_ptr(B_pf, j * VLEN_ * sizeof(int8_t)));
}
@@ -93,10 +93,7 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::storeCRegs<
asmjit::X86Gp C_Offset,
asmjit::X86Gp ldcReg,
bool accum,
- int leadingDimCRegAssign) {
- // temp register
- asmjit::X86Zmm tmpReg = x86::zmm28;
-
+ int leadingDimCReg) {
for (int i = 0; i < rowRegs; ++i) {
if (i != 0) {
a->add(C_Offset, ldcReg);
@@ -107,13 +104,13 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::storeCRegs<
for (int j = 0; j < colRegs; ++j) {
if (accum) {
a->vpaddd(
- CRegs_avx512_[i * leadingDimCRegAssign + j],
- CRegs_avx512_[i * leadingDimCRegAssign + j],
+ CRegs_avx512_[i * leadingDimCReg + j],
+ CRegs_avx512_[i * leadingDimCReg + j],
x86::dword_ptr(a->zcx(), C_Offset, 0, j * 16 * sizeof(int32_t)));
}
a->vmovups(
x86::dword_ptr(a->zcx(), C_Offset, 0, j * 16 * sizeof(int32_t)),
- CRegs_avx512_[i * leadingDimCRegAssign + j]);
+ CRegs_avx512_[i * leadingDimCReg + j]);
}
}
}