Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYoung Jin Kim <youki@microsoft.com>2019-09-25 21:46:49 +0300
committerYoung Jin Kim <youki@microsoft.com>2019-09-25 21:46:49 +0300
commit604620b78663d2bed318efba0ceb6d3ebadd14fb (patch)
tree8c84921ecd5d49f95aa24891d3bc8ecc03a3eb5f
parentd02815ffedbc46a3f8af1a3884efefd83668a401 (diff)
All functions are running well on windows
-rw-r--r--src/GenerateKernelU8S8S32ACC16.cc13
-rw-r--r--src/GenerateKernelU8S8S32ACC16Avx512.cc13
-rw-r--r--src/GenerateKernelU8S8S32ACC32.cc17
-rw-r--r--src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc17
-rw-r--r--src/GroupwiseConvAcc32Avx2.cc17
-rw-r--r--src/PackDepthwiseConvMatrixAvx2.cc4
-rw-r--r--test/GConvTest.cc8
-rw-r--r--test/Im2ColFusedRequantizeTest.cc5
-rw-r--r--test/PackedRequantizeAcc16Test.cc8
-rw-r--r--test/PackedRequantizeTest.cc8
-rw-r--r--test/RequantizeOnlyTest.cc4
-rw-r--r--test/TestUtils.h9
-rw-r--r--test/UniConvTest.cc5
13 files changed, 92 insertions, 36 deletions
diff --git a/src/GenerateKernelU8S8S32ACC16.cc b/src/GenerateKernelU8S8S32ACC16.cc
index cbd5877..205af14 100644
--- a/src/GenerateKernelU8S8S32ACC16.cc
+++ b/src/GenerateKernelU8S8S32ACC16.cc
@@ -105,7 +105,11 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::storeCRegs<
extractDest128, CRegs(i * leadingDimCReg + j), idx);
a->vpmovsxwd(extractDest256, extractDest128);
x86::Mem destAddr = x86::dword_ptr(
+#ifdef _MSC_VER
+ a->gpz(9), C_Offset, 0, (j * 2 + idx) * 8 * sizeof(int32_t));
+#else
a->zcx(), C_Offset, 0, (j * 2 + idx) * 8 * sizeof(int32_t));
+#endif
if (accum) {
a->vpaddd(extractDest256, extractDest256, destAddr);
}
@@ -191,12 +195,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx2>(
//"nc must be equal to the number of register blocks");
// arguments to the function created
+#ifdef _MSC_VER
+ x86::Gp buffer_A = a->zcx();
+ x86::Gp buffer_B = a->zdx();
+ x86::Gp B_pf = a->gpz(8);
+ x86::Gp CBase = a->gpz(9);
+ x86::Gp kSize = a->zdi();
+ x86::Gp ldcReg = a->zsi();
+#else
x86::Gp buffer_A = a->zdi();
x86::Gp buffer_B = a->zsi();
x86::Gp B_pf = a->zdx();
x86::Gp CBase = a->zcx();
x86::Gp kSize = a->gpz(8);
x86::Gp ldcReg = a->gpz(9);
+#endif
asmjit::FuncDetail func;
func.init(
diff --git a/src/GenerateKernelU8S8S32ACC16Avx512.cc b/src/GenerateKernelU8S8S32ACC16Avx512.cc
index 512c8ba..819f33b 100644
--- a/src/GenerateKernelU8S8S32ACC16Avx512.cc
+++ b/src/GenerateKernelU8S8S32ACC16Avx512.cc
@@ -112,7 +112,11 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::storeCRegs<
extractDest256, CRegs(i * leadingDimCReg + j), idx);
a->vpmovsxwd(extractDest512, extractDest256);
x86::Mem destAddr = x86::dword_ptr(
+#ifdef _MSC_VER
+ a->gpz(9), C_Offset, 0, (j * 2 + idx) * 16 * sizeof(int32_t));
+#else
a->zcx(), C_Offset, 0, (j * 2 + idx) * 16 * sizeof(int32_t));
+#endif
if (accum) {
a->vpaddd(extractDest512, extractDest512, destAddr);
}
@@ -203,12 +207,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx512>(
int mRegBlocksRem = mc % mRegBlockSize;
// arguments to the function created
+#ifdef _MSC_VER
+ x86::Gp buffer_A = a->zcx();
+ x86::Gp buffer_B = a->zdx();
+ x86::Gp B_pf = a->gpz(8);
+ x86::Gp CBase = a->gpz(9);
+ x86::Gp kSize = a->zdi();
+ x86::Gp ldcReg = a->zsi();
+#else
x86::Gp buffer_A = a->zdi();
x86::Gp buffer_B = a->zsi();
x86::Gp B_pf = a->zdx();
x86::Gp CBase = a->zcx();
x86::Gp kSize = a->gpz(8);
x86::Gp ldcReg = a->gpz(9);
+#endif
asmjit::FuncDetail func;
func.init(
diff --git a/src/GenerateKernelU8S8S32ACC32.cc b/src/GenerateKernelU8S8S32ACC32.cc
index 226e974..dc9c534 100644
--- a/src/GenerateKernelU8S8S32ACC32.cc
+++ b/src/GenerateKernelU8S8S32ACC32.cc
@@ -107,10 +107,18 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::storeCRegs<
a->vpaddd(
CRegs(i * leadingDimCReg + j),
CRegs(i * leadingDimCReg + j),
+#ifdef _MSC_VER
+ x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 8 * sizeof(int32_t)));
+#else
x86::dword_ptr(a->zcx(), C_Offset, 0, j * 8 * sizeof(int32_t)));
+#endif
}
a->vmovups(
+#ifdef _MSC_VER
+ x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 8 * sizeof(int32_t)),
+#else
x86::dword_ptr(a->zcx(), C_Offset, 0, j * 8 * sizeof(int32_t)),
+#endif
CRegs(i * leadingDimCReg + j));
}
}
@@ -188,12 +196,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx2>(
int mRegBlocksRem = mc % mRegBlockSize;
// arguments to the function created
+#ifdef _MSC_VER
+ x86::Gp buffer_A = a->zcx();
+ x86::Gp buffer_B = a->zdx();
+ x86::Gp B_pf = a->gpz(8);
+ x86::Gp CBase = a->gpz(9);
+ x86::Gp kSize = a->zdi();
+ x86::Gp ldcReg = a->zsi();
+#else
x86::Gp buffer_A = a->zdi();
x86::Gp buffer_B = a->zsi();
x86::Gp B_pf = a->zdx();
x86::Gp CBase = a->zcx();
x86::Gp kSize = a->gpz(8);
x86::Gp ldcReg = a->gpz(9);
+#endif
asmjit::FuncDetail func;
func.init(
diff --git a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
index 1d23e90..bd8be1f 100644
--- a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
+++ b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
@@ -98,10 +98,18 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::storeCRegs<
a->vpaddd(
CRegs(i * leadingDimCReg + j),
CRegs(i * leadingDimCReg + j),
+#ifdef _MSC_VER
+ x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t)));
+#else
x86::dword_ptr(a->zcx(), C_Offset, 0, j * 16 * sizeof(int32_t)));
+#endif
}
a->vmovups(
+#ifdef _MSC_VER
+ x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t)),
+#else
x86::dword_ptr(a->zcx(), C_Offset, 0, j * 16 * sizeof(int32_t)),
+#endif
CRegs(i * leadingDimCReg + j));
}
}
@@ -190,12 +198,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<
int mRegBlocksRem = mc % mRegBlockSize;
// arguments to the function created
+#ifdef _MSC_VER
+ x86::Gp buffer_A = a->zcx();
+ x86::Gp buffer_B = a->zdx();
+ x86::Gp B_pf = a->gpz(8);
+ x86::Gp CBase = a->gpz(9);
+ x86::Gp kSize = a->zdi();
+ x86::Gp ldcReg = a->zsi();
+#else
x86::Gp buffer_A = a->zdi();
x86::Gp buffer_B = a->zsi();
x86::Gp B_pf = a->zdx();
x86::Gp CBase = a->zcx();
x86::Gp kSize = a->gpz(8);
x86::Gp ldcReg = a->gpz(9);
+#endif
asmjit::FuncDetail func;
func.init(
diff --git a/src/GroupwiseConvAcc32Avx2.cc b/src/GroupwiseConvAcc32Avx2.cc
index d1e0fdd..396e792 100644
--- a/src/GroupwiseConvAcc32Avx2.cc
+++ b/src/GroupwiseConvAcc32Avx2.cc
@@ -1010,12 +1010,21 @@ jit_conv_kernel_fp GenConvKernel<2, int32_t>::getOrCreate<inst_set_t::avx2>(
#endif
// arguments to the function created
+#ifdef _MSC_VER
+ in_acts_R_ = a->zcx();
+ wghts_R_ = a->zdx();
+ out_acts_R_ = a->gpz(8);
+ a_zero_pt_R_ = a->gpz(9);
+ H_R_ = a->zdi();
+ W_R_ = a->zsi();
+#else
in_acts_R_ = a->zdi();
wghts_R_ = a->zsi();
out_acts_R_ = a->zdx();
a_zero_pt_R_ = a->zcx();
H_R_ = a->gpz(8);
W_R_ = a->gpz(9);
+#endif
row_offset_R_ = a->gpz(10);
// register for temporary use
@@ -1492,11 +1501,19 @@ GenConvKernel<2, int32_t>::getOrCreateRowOffset<inst_set_t::avx2>(
#endif
// arguments to the function created
+#ifdef _MSC_VER
+ in_acts_R_ = a->zcx();
+ a_zero_pt_R_ = a->zdx();
+ H_R_ = a->gpz(8);
+ W_R_ = a->gpz(9);
+ row_offset_R_ = a->zdi();
+#else
in_acts_R_ = a->zdi();
a_zero_pt_R_ = a->zsi();
H_R_ = a->zdx();
W_R_ = a->zcx();
row_offset_R_ = a->gpz(8);
+#endif
// register for temporary use
scratchReg1_ = a->gpz(12);
diff --git a/src/PackDepthwiseConvMatrixAvx2.cc b/src/PackDepthwiseConvMatrixAvx2.cc
index ab2e1f2..04c08f3 100644
--- a/src/PackDepthwiseConvMatrixAvx2.cc
+++ b/src/PackDepthwiseConvMatrixAvx2.cc
@@ -205,7 +205,11 @@ void PackedDepthWiseConvMatrix::unpack(int8_t* unpacked_data) {
}
PackedDepthWiseConvMatrix::~PackedDepthWiseConvMatrix() {
+#ifdef _MSC_VER
+ _aligned_free(pmat_);
+#else
free(pmat_);
+#endif
}
} // namespace fbgemm
diff --git a/test/GConvTest.cc b/test/GConvTest.cc
index 8c1fb82..982208b 100644
--- a/test/GConvTest.cc
+++ b/test/GConvTest.cc
@@ -25,14 +25,6 @@
using namespace std;
using namespace fbgemm;
-vector<matrix_op_t> transposeVals{matrix_op_t::NoTranspose,
- matrix_op_t::Transpose};
-
-vector<QuantizationGranularity> qGranularityVals{
- QuantizationGranularity::TENSOR,
- QuantizationGranularity::GROUP,
- QuantizationGranularity::OUT_CHANNEL};
-
namespace {
class fbgemmGConvAcc32Test
: public testing::TestWithParam<tuple<matrix_op_t, matrix_op_t>> {};
diff --git a/test/Im2ColFusedRequantizeTest.cc b/test/Im2ColFusedRequantizeTest.cc
index b14303f..56df3c8 100644
--- a/test/Im2ColFusedRequantizeTest.cc
+++ b/test/Im2ColFusedRequantizeTest.cc
@@ -24,11 +24,6 @@
using namespace std;
using namespace fbgemm;
-vector<QuantizationGranularity> qGranularityVals{
- QuantizationGranularity::TENSOR,
- QuantizationGranularity::GROUP,
- QuantizationGranularity::OUT_CHANNEL};
-
namespace {
class fbgemmIm2colTest
: public testing::TestWithParam<tuple<QuantizationGranularity, bool>> {};
diff --git a/test/PackedRequantizeAcc16Test.cc b/test/PackedRequantizeAcc16Test.cc
index 62b1303..8978150 100644
--- a/test/PackedRequantizeAcc16Test.cc
+++ b/test/PackedRequantizeAcc16Test.cc
@@ -26,14 +26,6 @@
using namespace std;
using namespace fbgemm;
-vector<matrix_op_t> transposeVals{matrix_op_t::NoTranspose,
- matrix_op_t::Transpose};
-
-vector<QuantizationGranularity> qGranularityVals{
- QuantizationGranularity::TENSOR,
- QuantizationGranularity::GROUP,
- QuantizationGranularity::OUT_CHANNEL};
-
namespace {
class fbgemmu8s8acc16WithQuantGranularityTest
: public testing::TestWithParam<
diff --git a/test/PackedRequantizeTest.cc b/test/PackedRequantizeTest.cc
index 5338243..15e7d55 100644
--- a/test/PackedRequantizeTest.cc
+++ b/test/PackedRequantizeTest.cc
@@ -25,14 +25,6 @@
using namespace std;
using namespace fbgemm;
-vector<matrix_op_t> transposeVals{matrix_op_t::NoTranspose,
- matrix_op_t::Transpose};
-
-vector<QuantizationGranularity> qGranularityVals{
- QuantizationGranularity::TENSOR,
- QuantizationGranularity::GROUP,
- QuantizationGranularity::OUT_CHANNEL};
-
namespace {
class fbgemmu8s8acc32WithQuantGranularityTest
: public testing::TestWithParam<
diff --git a/test/RequantizeOnlyTest.cc b/test/RequantizeOnlyTest.cc
index 2f73d49..94e8e7d 100644
--- a/test/RequantizeOnlyTest.cc
+++ b/test/RequantizeOnlyTest.cc
@@ -20,7 +20,7 @@
using namespace std;
using namespace fbgemm;
-vector<QuantizationGranularity> qGranularityVals{
+vector<QuantizationGranularity> qGranularityValsLocal{
QuantizationGranularity::TENSOR,
QuantizationGranularity::OUT_CHANNEL};
@@ -42,7 +42,7 @@ INSTANTIATE_TEST_CASE_P(
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 20, 32}), // number of
// cols
::testing::Bool(), // fuse relu
- ::testing::ValuesIn(qGranularityVals))); // requantization granularity
+ ::testing::ValuesIn(qGranularityValsLocal))); // requantization granularity
/**
* Test for float bias
diff --git a/test/TestUtils.h b/test/TestUtils.h
index 2cb7b88..d320ae2 100644
--- a/test/TestUtils.h
+++ b/test/TestUtils.h
@@ -7,9 +7,18 @@
#pragma once
#include <cmath>
#include <vector>
+#include "fbgemm/Fbgemm.h"
namespace fbgemm {
+static std::vector<matrix_op_t> transposeVals = { matrix_op_t::NoTranspose,
+ matrix_op_t::Transpose };
+
+static std::vector<QuantizationGranularity> qGranularityVals = {
+ QuantizationGranularity::TENSOR,
+ QuantizationGranularity::GROUP,
+ QuantizationGranularity::OUT_CHANNEL };
+
/*
* @brief Check and validate the buffers for reference and FBGEMM result.
*/
diff --git a/test/UniConvTest.cc b/test/UniConvTest.cc
index cead3a6..e9c7ba5 100644
--- a/test/UniConvTest.cc
+++ b/test/UniConvTest.cc
@@ -20,11 +20,6 @@
using namespace std;
using namespace fbgemm;
-vector<QuantizationGranularity> qGranularityVals{
- QuantizationGranularity::TENSOR,
- QuantizationGranularity::GROUP,
- QuantizationGranularity::OUT_CHANNEL};
-
// clang-format off
static vector<conv_param_t<>> GetShapes_() {
vector<conv_param_t<>> shapes = {