Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYoung Jin Kim <youki@microsoft.com>2020-01-15 22:29:47 +0300
committerYoung Jin Kim <youki@microsoft.com>2020-01-15 22:29:47 +0300
commit1faf882d74b267c5cf5b07829f4355e819bb437e (patch)
tree53c0d74f785637a3861fc9ed2268014c5190d870
parent84e66a976046180187724aff60a236c5378fde7c (diff)
Some experiments with asmjityouki/jit-experiments
-rw-r--r--src/GenerateKernelU8S8S32ACC16.cc26
-rw-r--r--src/GenerateKernelU8S8S32ACC16Avx512.cc26
-rw-r--r--src/GenerateKernelU8S8S32ACC32.cc34
-rw-r--r--src/GenerateKernelU8S8S32ACC32Avx512.cc34
-rw-r--r--src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc34
-rw-r--r--src/GroupwiseConvAcc32Avx2.cc34
m---------third_party/asmjit0
7 files changed, 94 insertions, 94 deletions
diff --git a/src/GenerateKernelU8S8S32ACC16.cc b/src/GenerateKernelU8S8S32ACC16.cc
index 205af14..6fbe1fb 100644
--- a/src/GenerateKernelU8S8S32ACC16.cc
+++ b/src/GenerateKernelU8S8S32ACC16.cc
@@ -105,11 +105,11 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::storeCRegs<
extractDest128, CRegs(i * leadingDimCReg + j), idx);
a->vpmovsxwd(extractDest256, extractDest128);
x86::Mem destAddr = x86::dword_ptr(
-#ifdef _MSC_VER
- a->gpz(9), C_Offset, 0, (j * 2 + idx) * 8 * sizeof(int32_t));
-#else
+//#ifdef _MSC_VER
+ //a->gpz(9), C_Offset, 0, (j * 2 + idx) * 8 * sizeof(int32_t));
+//#else
a->zcx(), C_Offset, 0, (j * 2 + idx) * 8 * sizeof(int32_t));
-#endif
+//#endif
if (accum) {
a->vpaddd(extractDest256, extractDest256, destAddr);
}
@@ -195,21 +195,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx2>(
//"nc must be equal to the number of register blocks");
// arguments to the function created
-#ifdef _MSC_VER
- x86::Gp buffer_A = a->zcx();
- x86::Gp buffer_B = a->zdx();
- x86::Gp B_pf = a->gpz(8);
- x86::Gp CBase = a->gpz(9);
- x86::Gp kSize = a->zdi();
- x86::Gp ldcReg = a->zsi();
-#else
+//#ifdef _MSC_VER
+// x86::Gp buffer_A = a->zcx();
+// x86::Gp buffer_B = a->zdx();
+// x86::Gp B_pf = a->gpz(8);
+// x86::Gp CBase = a->gpz(9);
+// x86::Gp kSize = a->zdi();
+// x86::Gp ldcReg = a->zsi();
+//#else
x86::Gp buffer_A = a->zdi();
x86::Gp buffer_B = a->zsi();
x86::Gp B_pf = a->zdx();
x86::Gp CBase = a->zcx();
x86::Gp kSize = a->gpz(8);
x86::Gp ldcReg = a->gpz(9);
-#endif
+//#endif
asmjit::FuncDetail func;
func.init(
diff --git a/src/GenerateKernelU8S8S32ACC16Avx512.cc b/src/GenerateKernelU8S8S32ACC16Avx512.cc
index 819f33b..c3a7b7b 100644
--- a/src/GenerateKernelU8S8S32ACC16Avx512.cc
+++ b/src/GenerateKernelU8S8S32ACC16Avx512.cc
@@ -112,11 +112,11 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::storeCRegs<
extractDest256, CRegs(i * leadingDimCReg + j), idx);
a->vpmovsxwd(extractDest512, extractDest256);
x86::Mem destAddr = x86::dword_ptr(
-#ifdef _MSC_VER
- a->gpz(9), C_Offset, 0, (j * 2 + idx) * 16 * sizeof(int32_t));
-#else
+//#ifdef _MSC_VER
+// a->gpz(9), C_Offset, 0, (j * 2 + idx) * 16 * sizeof(int32_t));
+//#else
a->zcx(), C_Offset, 0, (j * 2 + idx) * 16 * sizeof(int32_t));
-#endif
+//#endif
if (accum) {
a->vpaddd(extractDest512, extractDest512, destAddr);
}
@@ -207,21 +207,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx512>(
int mRegBlocksRem = mc % mRegBlockSize;
// arguments to the function created
-#ifdef _MSC_VER
- x86::Gp buffer_A = a->zcx();
- x86::Gp buffer_B = a->zdx();
- x86::Gp B_pf = a->gpz(8);
- x86::Gp CBase = a->gpz(9);
- x86::Gp kSize = a->zdi();
- x86::Gp ldcReg = a->zsi();
-#else
+//#ifdef _MSC_VER
+// x86::Gp buffer_A = a->zcx();
+// x86::Gp buffer_B = a->zdx();
+// x86::Gp B_pf = a->gpz(8);
+// x86::Gp CBase = a->gpz(9);
+// x86::Gp kSize = a->zdi();
+// x86::Gp ldcReg = a->zsi();
+//#else
x86::Gp buffer_A = a->zdi();
x86::Gp buffer_B = a->zsi();
x86::Gp B_pf = a->zdx();
x86::Gp CBase = a->zcx();
x86::Gp kSize = a->gpz(8);
x86::Gp ldcReg = a->gpz(9);
-#endif
+//#endif
asmjit::FuncDetail func;
func.init(
diff --git a/src/GenerateKernelU8S8S32ACC32.cc b/src/GenerateKernelU8S8S32ACC32.cc
index dc9c534..977f635 100644
--- a/src/GenerateKernelU8S8S32ACC32.cc
+++ b/src/GenerateKernelU8S8S32ACC32.cc
@@ -107,18 +107,18 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::storeCRegs<
a->vpaddd(
CRegs(i * leadingDimCReg + j),
CRegs(i * leadingDimCReg + j),
-#ifdef _MSC_VER
- x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 8 * sizeof(int32_t)));
-#else
+//#ifdef _MSC_VER
+// x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 8 * sizeof(int32_t)));
+//#else
x86::dword_ptr(a->zcx(), C_Offset, 0, j * 8 * sizeof(int32_t)));
-#endif
+//#endif
}
a->vmovups(
-#ifdef _MSC_VER
- x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 8 * sizeof(int32_t)),
-#else
+//#ifdef _MSC_VER
+// x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 8 * sizeof(int32_t)),
+//#else
x86::dword_ptr(a->zcx(), C_Offset, 0, j * 8 * sizeof(int32_t)),
-#endif
+//#endif
CRegs(i * leadingDimCReg + j));
}
}
@@ -196,21 +196,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx2>(
int mRegBlocksRem = mc % mRegBlockSize;
// arguments to the function created
-#ifdef _MSC_VER
- x86::Gp buffer_A = a->zcx();
- x86::Gp buffer_B = a->zdx();
- x86::Gp B_pf = a->gpz(8);
- x86::Gp CBase = a->gpz(9);
- x86::Gp kSize = a->zdi();
- x86::Gp ldcReg = a->zsi();
-#else
+//#ifdef _MSC_VER
+// x86::Gp buffer_A = a->zcx();
+// x86::Gp buffer_B = a->zdx();
+// x86::Gp B_pf = a->gpz(8);
+// x86::Gp CBase = a->gpz(9);
+// x86::Gp kSize = a->zdi();
+// x86::Gp ldcReg = a->zsi();
+//#else
x86::Gp buffer_A = a->zdi();
x86::Gp buffer_B = a->zsi();
x86::Gp B_pf = a->zdx();
x86::Gp CBase = a->zcx();
x86::Gp kSize = a->gpz(8);
x86::Gp ldcReg = a->gpz(9);
-#endif
+//#endif
asmjit::FuncDetail func;
func.init(
diff --git a/src/GenerateKernelU8S8S32ACC32Avx512.cc b/src/GenerateKernelU8S8S32ACC32Avx512.cc
index 5037292..15b6fd4 100644
--- a/src/GenerateKernelU8S8S32ACC32Avx512.cc
+++ b/src/GenerateKernelU8S8S32ACC32Avx512.cc
@@ -108,18 +108,18 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::storeCRegs<
a->vpaddd(
CRegs(i * leadingDimCReg + j),
CRegs(i * leadingDimCReg + j),
-#ifdef _MSC_VER
- x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t)));
-#else
+//#ifdef _MSC_VER
+// x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t)));
+//#else
x86::dword_ptr(a->zcx(), C_Offset, 0, j * 16 * sizeof(int32_t)));
-#endif
+//#endif
}
a->vmovups(
-#ifdef _MSC_VER
- x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t)),
-#else
+//#ifdef _MSC_VER
+// x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t)),
+//#else
x86::dword_ptr(a->zcx(), C_Offset, 0, j * 16 * sizeof(int32_t)),
-#endif
+//#endif
CRegs(i * leadingDimCReg + j));
}
}
@@ -204,21 +204,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx512>(
int mRegBlocksRem = mc % mRegBlockSize;
// arguments to the function created
-#ifdef _MSC_VER
- x86::Gp buffer_A = a->zcx();
- x86::Gp buffer_B = a->zdx();
- x86::Gp B_pf = a->gpz(8);
- x86::Gp CBase = a->gpz(9);
- x86::Gp kSize = a->zdi();
- x86::Gp ldcReg = a->zsi();
-#else
+//#ifdef _MSC_VER
+// x86::Gp buffer_A = a->zcx();
+// x86::Gp buffer_B = a->zdx();
+// x86::Gp B_pf = a->gpz(8);
+// x86::Gp CBase = a->gpz(9);
+// x86::Gp kSize = a->zdi();
+// x86::Gp ldcReg = a->zsi();
+//#else
x86::Gp buffer_A = a->zdi();
x86::Gp buffer_B = a->zsi();
x86::Gp B_pf = a->zdx();
x86::Gp CBase = a->zcx();
x86::Gp kSize = a->gpz(8);
x86::Gp ldcReg = a->gpz(9);
-#endif
+//#endif
asmjit::FuncDetail func;
func.init(
diff --git a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
index bd8be1f..102083f 100644
--- a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
+++ b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
@@ -98,18 +98,18 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::storeCRegs<
a->vpaddd(
CRegs(i * leadingDimCReg + j),
CRegs(i * leadingDimCReg + j),
-#ifdef _MSC_VER
- x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t)));
-#else
+//#ifdef _MSC_VER
+// x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t)));
+//#else
x86::dword_ptr(a->zcx(), C_Offset, 0, j * 16 * sizeof(int32_t)));
-#endif
+//#endif
}
a->vmovups(
-#ifdef _MSC_VER
- x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t)),
-#else
+//#ifdef _MSC_VER
+// x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t)),
+//#else
x86::dword_ptr(a->zcx(), C_Offset, 0, j * 16 * sizeof(int32_t)),
-#endif
+//#endif
CRegs(i * leadingDimCReg + j));
}
}
@@ -198,21 +198,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<
int mRegBlocksRem = mc % mRegBlockSize;
// arguments to the function created
-#ifdef _MSC_VER
- x86::Gp buffer_A = a->zcx();
- x86::Gp buffer_B = a->zdx();
- x86::Gp B_pf = a->gpz(8);
- x86::Gp CBase = a->gpz(9);
- x86::Gp kSize = a->zdi();
- x86::Gp ldcReg = a->zsi();
-#else
+//#ifdef _MSC_VER
+// x86::Gp buffer_A = a->zcx();
+// x86::Gp buffer_B = a->zdx();
+// x86::Gp B_pf = a->gpz(8);
+// x86::Gp CBase = a->gpz(9);
+// x86::Gp kSize = a->zdi();
+// x86::Gp ldcReg = a->zsi();
+//#else
x86::Gp buffer_A = a->zdi();
x86::Gp buffer_B = a->zsi();
x86::Gp B_pf = a->zdx();
x86::Gp CBase = a->zcx();
x86::Gp kSize = a->gpz(8);
x86::Gp ldcReg = a->gpz(9);
-#endif
+//#endif
asmjit::FuncDetail func;
func.init(
diff --git a/src/GroupwiseConvAcc32Avx2.cc b/src/GroupwiseConvAcc32Avx2.cc
index 396e792..dd309c3 100644
--- a/src/GroupwiseConvAcc32Avx2.cc
+++ b/src/GroupwiseConvAcc32Avx2.cc
@@ -1010,21 +1010,21 @@ jit_conv_kernel_fp GenConvKernel<2, int32_t>::getOrCreate<inst_set_t::avx2>(
#endif
// arguments to the function created
-#ifdef _MSC_VER
- in_acts_R_ = a->zcx();
- wghts_R_ = a->zdx();
- out_acts_R_ = a->gpz(8);
- a_zero_pt_R_ = a->gpz(9);
- H_R_ = a->zdi();
- W_R_ = a->zsi();
-#else
+//#ifdef _MSC_VER
+// in_acts_R_ = a->zcx();
+// wghts_R_ = a->zdx();
+// out_acts_R_ = a->gpz(8);
+// a_zero_pt_R_ = a->gpz(9);
+// H_R_ = a->zdi();
+// W_R_ = a->zsi();
+//#else
in_acts_R_ = a->zdi();
wghts_R_ = a->zsi();
out_acts_R_ = a->zdx();
a_zero_pt_R_ = a->zcx();
H_R_ = a->gpz(8);
W_R_ = a->gpz(9);
-#endif
+//#endif
row_offset_R_ = a->gpz(10);
// register for temporary use
@@ -1501,19 +1501,19 @@ GenConvKernel<2, int32_t>::getOrCreateRowOffset<inst_set_t::avx2>(
#endif
// arguments to the function created
-#ifdef _MSC_VER
- in_acts_R_ = a->zcx();
- a_zero_pt_R_ = a->zdx();
- H_R_ = a->gpz(8);
- W_R_ = a->gpz(9);
- row_offset_R_ = a->zdi();
-#else
+//#ifdef _MSC_VER
+// in_acts_R_ = a->zcx();
+// a_zero_pt_R_ = a->zdx();
+// H_R_ = a->gpz(8);
+// W_R_ = a->gpz(9);
+// row_offset_R_ = a->zdi();
+//#else
in_acts_R_ = a->zdi();
a_zero_pt_R_ = a->zsi();
H_R_ = a->zdx();
W_R_ = a->zcx();
row_offset_R_ = a->gpz(8);
-#endif
+//#endif
// register for temporary use
scratchReg1_ = a->gpz(12);
diff --git a/third_party/asmjit b/third_party/asmjit
-Subproject 4da474ac9aa2689e88d5e40a2f37628f302d7e3
+Subproject ac77dfcd75f043e2fe317133a971040e5b99991