diff options
author | Young Jin Kim <youki@microsoft.com> | 2020-01-15 22:29:47 +0300 |
---|---|---|
committer | Young Jin Kim <youki@microsoft.com> | 2020-01-15 22:29:47 +0300 |
commit | 1faf882d74b267c5cf5b07829f4355e819bb437e (patch) | |
tree | 53c0d74f785637a3861fc9ed2268014c5190d870 | |
parent | 84e66a976046180187724aff60a236c5378fde7c (diff) |
Some experiments with asmjityouki/jit-experiments
-rw-r--r-- | src/GenerateKernelU8S8S32ACC16.cc | 26 | ||||
-rw-r--r-- | src/GenerateKernelU8S8S32ACC16Avx512.cc | 26 | ||||
-rw-r--r-- | src/GenerateKernelU8S8S32ACC32.cc | 34 | ||||
-rw-r--r-- | src/GenerateKernelU8S8S32ACC32Avx512.cc | 34 | ||||
-rw-r--r-- | src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc | 34 | ||||
-rw-r--r-- | src/GroupwiseConvAcc32Avx2.cc | 34 | ||||
m--------- | third_party/asmjit | 0 |
7 files changed, 94 insertions, 94 deletions
diff --git a/src/GenerateKernelU8S8S32ACC16.cc b/src/GenerateKernelU8S8S32ACC16.cc index 205af14..6fbe1fb 100644 --- a/src/GenerateKernelU8S8S32ACC16.cc +++ b/src/GenerateKernelU8S8S32ACC16.cc @@ -105,11 +105,11 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::storeCRegs< extractDest128, CRegs(i * leadingDimCReg + j), idx); a->vpmovsxwd(extractDest256, extractDest128); x86::Mem destAddr = x86::dword_ptr( -#ifdef _MSC_VER - a->gpz(9), C_Offset, 0, (j * 2 + idx) * 8 * sizeof(int32_t)); -#else +//#ifdef _MSC_VER + //a->gpz(9), C_Offset, 0, (j * 2 + idx) * 8 * sizeof(int32_t)); +//#else a->zcx(), C_Offset, 0, (j * 2 + idx) * 8 * sizeof(int32_t)); -#endif +//#endif if (accum) { a->vpaddd(extractDest256, extractDest256, destAddr); } @@ -195,21 +195,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx2>( //"nc must be equal to the number of register blocks"); // arguments to the function created -#ifdef _MSC_VER - x86::Gp buffer_A = a->zcx(); - x86::Gp buffer_B = a->zdx(); - x86::Gp B_pf = a->gpz(8); - x86::Gp CBase = a->gpz(9); - x86::Gp kSize = a->zdi(); - x86::Gp ldcReg = a->zsi(); -#else +//#ifdef _MSC_VER +// x86::Gp buffer_A = a->zcx(); +// x86::Gp buffer_B = a->zdx(); +// x86::Gp B_pf = a->gpz(8); +// x86::Gp CBase = a->gpz(9); +// x86::Gp kSize = a->zdi(); +// x86::Gp ldcReg = a->zsi(); +//#else x86::Gp buffer_A = a->zdi(); x86::Gp buffer_B = a->zsi(); x86::Gp B_pf = a->zdx(); x86::Gp CBase = a->zcx(); x86::Gp kSize = a->gpz(8); x86::Gp ldcReg = a->gpz(9); -#endif +//#endif asmjit::FuncDetail func; func.init( diff --git a/src/GenerateKernelU8S8S32ACC16Avx512.cc b/src/GenerateKernelU8S8S32ACC16Avx512.cc index 819f33b..c3a7b7b 100644 --- a/src/GenerateKernelU8S8S32ACC16Avx512.cc +++ b/src/GenerateKernelU8S8S32ACC16Avx512.cc @@ -112,11 +112,11 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::storeCRegs< extractDest256, CRegs(i * leadingDimCReg + j), idx); a->vpmovsxwd(extractDest512, extractDest256); x86::Mem destAddr = x86::dword_ptr( -#ifdef _MSC_VER - a->gpz(9), C_Offset, 0, (j * 2 + idx) * 16 * sizeof(int32_t)); -#else +//#ifdef _MSC_VER +// a->gpz(9), C_Offset, 0, (j * 2 + idx) * 16 * sizeof(int32_t)); +//#else a->zcx(), C_Offset, 0, (j * 2 + idx) * 16 * sizeof(int32_t)); -#endif +//#endif if (accum) { a->vpaddd(extractDest512, extractDest512, destAddr); } @@ -207,21 +207,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx512>( int mRegBlocksRem = mc % mRegBlockSize; // arguments to the function created -#ifdef _MSC_VER - x86::Gp buffer_A = a->zcx(); - x86::Gp buffer_B = a->zdx(); - x86::Gp B_pf = a->gpz(8); - x86::Gp CBase = a->gpz(9); - x86::Gp kSize = a->zdi(); - x86::Gp ldcReg = a->zsi(); -#else +//#ifdef _MSC_VER +// x86::Gp buffer_A = a->zcx(); +// x86::Gp buffer_B = a->zdx(); +// x86::Gp B_pf = a->gpz(8); +// x86::Gp CBase = a->gpz(9); +// x86::Gp kSize = a->zdi(); +// x86::Gp ldcReg = a->zsi(); +//#else x86::Gp buffer_A = a->zdi(); x86::Gp buffer_B = a->zsi(); x86::Gp B_pf = a->zdx(); x86::Gp CBase = a->zcx(); x86::Gp kSize = a->gpz(8); x86::Gp ldcReg = a->gpz(9); -#endif +//#endif asmjit::FuncDetail func; func.init( diff --git a/src/GenerateKernelU8S8S32ACC32.cc b/src/GenerateKernelU8S8S32ACC32.cc index dc9c534..977f635 100644 --- a/src/GenerateKernelU8S8S32ACC32.cc +++ b/src/GenerateKernelU8S8S32ACC32.cc @@ -107,18 +107,18 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::storeCRegs< a->vpaddd( CRegs(i * leadingDimCReg + j), CRegs(i * leadingDimCReg + j), -#ifdef _MSC_VER - x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 8 * sizeof(int32_t))); -#else +//#ifdef _MSC_VER +// x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 8 * sizeof(int32_t))); +//#else x86::dword_ptr(a->zcx(), C_Offset, 0, j * 8 * sizeof(int32_t))); -#endif +//#endif } a->vmovups( -#ifdef _MSC_VER - x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 8 * sizeof(int32_t)), -#else +//#ifdef _MSC_VER +// x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 8 * sizeof(int32_t)), +//#else x86::dword_ptr(a->zcx(), C_Offset, 0, j * 8 * sizeof(int32_t)), -#endif +//#endif CRegs(i * leadingDimCReg + j)); } } @@ -196,21 +196,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx2>( int mRegBlocksRem = mc % mRegBlockSize; // arguments to the function created -#ifdef _MSC_VER - x86::Gp buffer_A = a->zcx(); - x86::Gp buffer_B = a->zdx(); - x86::Gp B_pf = a->gpz(8); - x86::Gp CBase = a->gpz(9); - x86::Gp kSize = a->zdi(); - x86::Gp ldcReg = a->zsi(); -#else +//#ifdef _MSC_VER +// x86::Gp buffer_A = a->zcx(); +// x86::Gp buffer_B = a->zdx(); +// x86::Gp B_pf = a->gpz(8); +// x86::Gp CBase = a->gpz(9); +// x86::Gp kSize = a->zdi(); +// x86::Gp ldcReg = a->zsi(); +//#else x86::Gp buffer_A = a->zdi(); x86::Gp buffer_B = a->zsi(); x86::Gp B_pf = a->zdx(); x86::Gp CBase = a->zcx(); x86::Gp kSize = a->gpz(8); x86::Gp ldcReg = a->gpz(9); -#endif +//#endif asmjit::FuncDetail func; func.init( diff --git a/src/GenerateKernelU8S8S32ACC32Avx512.cc b/src/GenerateKernelU8S8S32ACC32Avx512.cc index 5037292..15b6fd4 100644 --- a/src/GenerateKernelU8S8S32ACC32Avx512.cc +++ b/src/GenerateKernelU8S8S32ACC32Avx512.cc @@ -108,18 +108,18 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::storeCRegs< a->vpaddd( CRegs(i * leadingDimCReg + j), CRegs(i * leadingDimCReg + j), -#ifdef _MSC_VER - x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t))); -#else +//#ifdef _MSC_VER +// x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t))); +//#else x86::dword_ptr(a->zcx(), C_Offset, 0, j * 16 * sizeof(int32_t))); -#endif +//#endif } a->vmovups( -#ifdef _MSC_VER - x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t)), -#else +//#ifdef _MSC_VER +// x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t)), +//#else x86::dword_ptr(a->zcx(), C_Offset, 0, j * 16 * sizeof(int32_t)), -#endif +//#endif CRegs(i * leadingDimCReg + j)); } } @@ -204,21 +204,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx512>( int mRegBlocksRem = mc % mRegBlockSize; // arguments to the function created -#ifdef _MSC_VER - x86::Gp buffer_A = a->zcx(); - x86::Gp buffer_B = a->zdx(); - x86::Gp B_pf = a->gpz(8); - x86::Gp CBase = a->gpz(9); - x86::Gp kSize = a->zdi(); - x86::Gp ldcReg = a->zsi(); -#else +//#ifdef _MSC_VER +// x86::Gp buffer_A = a->zcx(); +// x86::Gp buffer_B = a->zdx(); +// x86::Gp B_pf = a->gpz(8); +// x86::Gp CBase = a->gpz(9); +// x86::Gp kSize = a->zdi(); +// x86::Gp ldcReg = a->zsi(); +//#else x86::Gp buffer_A = a->zdi(); x86::Gp buffer_B = a->zsi(); x86::Gp B_pf = a->zdx(); x86::Gp CBase = a->zcx(); x86::Gp kSize = a->gpz(8); x86::Gp ldcReg = a->gpz(9); -#endif +//#endif asmjit::FuncDetail func; func.init( diff --git a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc index bd8be1f..102083f 100644 --- a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc +++ b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc @@ -98,18 +98,18 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::storeCRegs< a->vpaddd( CRegs(i * leadingDimCReg + j), CRegs(i * leadingDimCReg + j), -#ifdef _MSC_VER - x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t))); -#else +//#ifdef _MSC_VER +// x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t))); +//#else x86::dword_ptr(a->zcx(), C_Offset, 0, j * 16 * sizeof(int32_t))); -#endif +//#endif } a->vmovups( -#ifdef _MSC_VER - x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t)), -#else +//#ifdef _MSC_VER +// x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t)), +//#else x86::dword_ptr(a->zcx(), C_Offset, 0, j * 16 * sizeof(int32_t)), -#endif +//#endif CRegs(i * leadingDimCReg + j)); } } @@ -198,21 +198,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate< int mRegBlocksRem = mc % mRegBlockSize; // arguments to the function created -#ifdef _MSC_VER - x86::Gp buffer_A = a->zcx(); - x86::Gp buffer_B = a->zdx(); - x86::Gp B_pf = a->gpz(8); - x86::Gp CBase = a->gpz(9); - x86::Gp kSize = a->zdi(); - x86::Gp ldcReg = a->zsi(); -#else +//#ifdef _MSC_VER +// x86::Gp buffer_A = a->zcx(); +// x86::Gp buffer_B = a->zdx(); +// x86::Gp B_pf = a->gpz(8); +// x86::Gp CBase = a->gpz(9); +// x86::Gp kSize = a->zdi(); +// x86::Gp ldcReg = a->zsi(); +//#else x86::Gp buffer_A = a->zdi(); x86::Gp buffer_B = a->zsi(); x86::Gp B_pf = a->zdx(); x86::Gp CBase = a->zcx(); x86::Gp kSize = a->gpz(8); x86::Gp ldcReg = a->gpz(9); -#endif +//#endif asmjit::FuncDetail func; func.init( diff --git a/src/GroupwiseConvAcc32Avx2.cc b/src/GroupwiseConvAcc32Avx2.cc index 396e792..dd309c3 100644 --- a/src/GroupwiseConvAcc32Avx2.cc +++ b/src/GroupwiseConvAcc32Avx2.cc @@ -1010,21 +1010,21 @@ jit_conv_kernel_fp GenConvKernel<2, int32_t>::getOrCreate<inst_set_t::avx2>( #endif // arguments to the function created -#ifdef _MSC_VER - in_acts_R_ = a->zcx(); - wghts_R_ = a->zdx(); - out_acts_R_ = a->gpz(8); - a_zero_pt_R_ = a->gpz(9); - H_R_ = a->zdi(); - W_R_ = a->zsi(); -#else +//#ifdef _MSC_VER +// in_acts_R_ = a->zcx(); +// wghts_R_ = a->zdx(); +// out_acts_R_ = a->gpz(8); +// a_zero_pt_R_ = a->gpz(9); +// H_R_ = a->zdi(); +// W_R_ = a->zsi(); +//#else in_acts_R_ = a->zdi(); wghts_R_ = a->zsi(); out_acts_R_ = a->zdx(); a_zero_pt_R_ = a->zcx(); H_R_ = a->gpz(8); W_R_ = a->gpz(9); -#endif +//#endif row_offset_R_ = a->gpz(10); // register for temporary use @@ -1501,19 +1501,19 @@ GenConvKernel<2, int32_t>::getOrCreateRowOffset<inst_set_t::avx2>( #endif // arguments to the function created -#ifdef _MSC_VER - in_acts_R_ = a->zcx(); - a_zero_pt_R_ = a->zdx(); - H_R_ = a->gpz(8); - W_R_ = a->gpz(9); - row_offset_R_ = a->zdi(); -#else +//#ifdef _MSC_VER +// in_acts_R_ = a->zcx(); +// a_zero_pt_R_ = a->zdx(); +// H_R_ = a->gpz(8); +// W_R_ = a->gpz(9); +// row_offset_R_ = a->zdi(); +//#else in_acts_R_ = a->zdi(); a_zero_pt_R_ = a->zsi(); H_R_ = a->zdx(); W_R_ = a->zcx(); row_offset_R_ = a->gpz(8); -#endif +//#endif // register for temporary use scratchReg1_ = a->gpz(12); diff --git a/third_party/asmjit b/third_party/asmjit -Subproject 4da474ac9aa2689e88d5e40a2f37628f302d7e3 +Subproject ac77dfcd75f043e2fe317133a971040e5b99991 |