Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordskhudia <dskhudia@fb.com>2018-11-04 19:22:37 +0300
committerdskhudia <dskhudia@fb.com>2018-11-04 19:22:37 +0300
commit690dbc29d9b0cb373fa0303b7c30c20b527e9605 (patch)
tree56d9b3ebc1a7b5ff394e5dc9e08db9e44285e6f4 /src/GenerateKernelU8S8S32ACC32_avx512.cc
parent505eb847185c9255526813dd39edadcd4e61d8e0 (diff)
Syncing with internal version. Fixes for Mac/clang build. Other minor fixes
Diffstat (limited to 'src/GenerateKernelU8S8S32ACC32_avx512.cc')
-rw-r--r--src/GenerateKernelU8S8S32ACC32_avx512.cc29
1 files changed, 17 insertions, 12 deletions
diff --git a/src/GenerateKernelU8S8S32ACC32_avx512.cc b/src/GenerateKernelU8S8S32ACC32_avx512.cc
index 5cd5684..251a8b8 100644
--- a/src/GenerateKernelU8S8S32ACC32_avx512.cc
+++ b/src/GenerateKernelU8S8S32ACC32_avx512.cc
@@ -203,7 +203,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx512>(
// a->vpcmpeqw(oneReg, oneReg, oneReg);
a->vpternlogd(oneReg, oneReg, oneReg, 0xff);
a->vpsrlw(oneReg, oneReg, 15);
- a->imul(ldcReg, ldcReg, sizeof(int32_t));
+ a->imul(ldcReg, ldcReg, static_cast<asmjit::Imm>(sizeof(int32_t)));
a->mov(C_Offset, 0);
int colRegs = nc * row_interleave * sizeof(int8_t) / VLEN_;
@@ -228,19 +228,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx512>(
a->bind(Loopk);
// k is incremented by row_interleave
- a->add(kIdx, row_interleave);
+ a->add(kIdx, static_cast<asmjit::Imm>(row_interleave));
genComputeBlock<inst_set_t::avx512>(
a, buffer_A, buffer_B, B_pf, rowRegs, colRegs, kBlock, colRegs);
// update buffer_A address for next k iteration
- a->add(buffer_A, row_interleave * sizeof(uint8_t));
+ a->add(
+ buffer_A, static_cast<asmjit::Imm>(row_interleave * sizeof(uint8_t)));
// update buffer_B address for next k iteration
- a->add(buffer_B, VLEN_ * colRegs * sizeof(int8_t));
- a->add(B_pf, VLEN_ * colRegs * sizeof(int8_t));
+ a->add(
+ buffer_B, static_cast<asmjit::Imm>(VLEN_ * colRegs * sizeof(int8_t)));
+ a->add(B_pf, static_cast<asmjit::Imm>(VLEN_ * colRegs * sizeof(int8_t)));
- // a->add(B_pf, 32*sizeof(float));
+ // a->add(B_pf, static_cast<asmjit::Imm>(32*sizeof(float)));
a->cmp(kIdx, kSize);
a->jl(Loopk);
@@ -251,10 +253,11 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx512>(
// increment A for next block
a->sub(buffer_A, kSize);
- a->add(buffer_A, (rowRegs)*kBlock * sizeof(uint8_t));
+ a->add(
+ buffer_A, static_cast<asmjit::Imm>((rowRegs)*kBlock * sizeof(uint8_t)));
// increment C for next block
- a->imul(C_Offset, ldcReg, rowRegs);
+ a->imul(C_Offset, ldcReg, static_cast<asmjit::Imm>(rowRegs));
a->add(CBase, C_Offset);
a->mov(C_Offset, 0);
@@ -277,17 +280,19 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx512>(
a->bind(LoopkRem);
// k is incremented by row_interleave
- a->add(kIdx, row_interleave);
+ a->add(kIdx, static_cast<asmjit::Imm>(row_interleave));
genComputeBlock<inst_set_t::avx512>(
a, buffer_A, buffer_B, B_pf, rowRegs, colRegs, kBlock, colRegs);
// update buffer_A address for next k iteration
- a->add(buffer_A, row_interleave * sizeof(uint8_t));
+ a->add(
+ buffer_A, static_cast<asmjit::Imm>(row_interleave * sizeof(uint8_t)));
// update buffer_B address for next k iteration
- a->add(buffer_B, VLEN_ * colRegs * sizeof(int8_t));
- a->add(B_pf, VLEN_ * colRegs * sizeof(int8_t));
+ a->add(
+ buffer_B, static_cast<asmjit::Imm>(VLEN_ * colRegs * sizeof(int8_t)));
+ a->add(B_pf, static_cast<asmjit::Imm>(VLEN_ * colRegs * sizeof(int8_t)));
a->cmp(kIdx, kSize);
a->jl(LoopkRem);