All functions are running well on windows

author: Young Jin Kim <youki@microsoft.com> 2019-09-25 21:46:49 +0300
committer: Young Jin Kim <youki@microsoft.com> 2019-09-25 21:46:49 +0300
commit: 604620b78663d2bed318efba0ceb6d3ebadd14fb (patch)
tree: 8c84921ecd5d49f95aa24891d3bc8ecc03a3eb5f
parent: d02815ffedbc46a3f8af1a3884efefd83668a401 (diff)
13 files changed, 92 insertions, 36 deletions
diff --git a/src/GenerateKernelU8S8S32ACC16.cc b/src/GenerateKernelU8S8S32ACC16.cc
index cbd5877..205af14 100644
--- a/src/GenerateKernelU8S8S32ACC16.cc
+++ b/src/GenerateKernelU8S8S32ACC16.cc
@@ -105,7 +105,11 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::storeCRegs<
             extractDest128, CRegs(i * leadingDimCReg + j), idx);
         a->vpmovsxwd(extractDest256, extractDest128);
         x86::Mem destAddr = x86::dword_ptr(
+#ifdef _MSC_VER
+            a->gpz(9), C_Offset, 0, (j * 2 + idx) * 8 * sizeof(int32_t));
+#else
             a->zcx(), C_Offset, 0, (j * 2 + idx) * 8 * sizeof(int32_t));
+#endif
         if (accum) {
           a->vpaddd(extractDest256, extractDest256, destAddr);
         }
@@ -191,12 +195,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx2>(
     //"nc must be equal to the number of register blocks");
 
     // arguments to the function created
+#ifdef _MSC_VER
+    x86::Gp buffer_A = a->zcx();
+    x86::Gp buffer_B = a->zdx();
+    x86::Gp B_pf = a->gpz(8);
+    x86::Gp CBase = a->gpz(9);
+    x86::Gp kSize = a->zdi();
+    x86::Gp ldcReg = a->zsi();
+#else
     x86::Gp buffer_A = a->zdi();
     x86::Gp buffer_B = a->zsi();
     x86::Gp B_pf = a->zdx();
     x86::Gp CBase = a->zcx();
     x86::Gp kSize = a->gpz(8);
     x86::Gp ldcReg = a->gpz(9);
+#endif
 
     asmjit::FuncDetail func;
     func.init(
diff --git a/src/GenerateKernelU8S8S32ACC16Avx512.cc b/src/GenerateKernelU8S8S32ACC16Avx512.cc
index 512c8ba..819f33b 100644
--- a/src/GenerateKernelU8S8S32ACC16Avx512.cc
+++ b/src/GenerateKernelU8S8S32ACC16Avx512.cc
@@ -112,7 +112,11 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::storeCRegs<
             extractDest256, CRegs(i * leadingDimCReg + j), idx);
         a->vpmovsxwd(extractDest512, extractDest256);
         x86::Mem destAddr = x86::dword_ptr(
+#ifdef _MSC_VER
+            a->gpz(9), C_Offset, 0, (j * 2 + idx) * 16 * sizeof(int32_t));
+#else
             a->zcx(), C_Offset, 0, (j * 2 + idx) * 16 * sizeof(int32_t));
+#endif
         if (accum) {
           a->vpaddd(extractDest512, extractDest512, destAddr);
         }
@@ -203,12 +207,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx512>(
     int mRegBlocksRem = mc % mRegBlockSize;
 
     // arguments to the function created
+#ifdef _MSC_VER
+    x86::Gp buffer_A = a->zcx();
+    x86::Gp buffer_B = a->zdx();
+    x86::Gp B_pf = a->gpz(8);
+    x86::Gp CBase = a->gpz(9);
+    x86::Gp kSize = a->zdi();
+    x86::Gp ldcReg = a->zsi();
+#else
     x86::Gp buffer_A = a->zdi();
     x86::Gp buffer_B = a->zsi();
     x86::Gp B_pf = a->zdx();
     x86::Gp CBase = a->zcx();
     x86::Gp kSize = a->gpz(8);
     x86::Gp ldcReg = a->gpz(9);
+#endif
 
     asmjit::FuncDetail func;
     func.init(
diff --git a/src/GenerateKernelU8S8S32ACC32.cc b/src/GenerateKernelU8S8S32ACC32.cc
index 226e974..dc9c534 100644
--- a/src/GenerateKernelU8S8S32ACC32.cc
+++ b/src/GenerateKernelU8S8S32ACC32.cc
@@ -107,10 +107,18 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::storeCRegs<
         a->vpaddd(
             CRegs(i * leadingDimCReg + j),
             CRegs(i * leadingDimCReg + j),
+#ifdef _MSC_VER
+            x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 8 * sizeof(int32_t)));
+#else
             x86::dword_ptr(a->zcx(), C_Offset, 0, j * 8 * sizeof(int32_t)));
+#endif
       }
       a->vmovups(
+#ifdef _MSC_VER
+          x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 8 * sizeof(int32_t)),
+#else
           x86::dword_ptr(a->zcx(), C_Offset, 0, j * 8 * sizeof(int32_t)),
+#endif
           CRegs(i * leadingDimCReg + j));
     }
   }
@@ -188,12 +196,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx2>(
     int mRegBlocksRem = mc % mRegBlockSize;
 
     // arguments to the function created
+#ifdef _MSC_VER
+    x86::Gp buffer_A = a->zcx();
+    x86::Gp buffer_B = a->zdx();
+    x86::Gp B_pf = a->gpz(8);
+    x86::Gp CBase = a->gpz(9);
+    x86::Gp kSize = a->zdi();
+    x86::Gp ldcReg = a->zsi();
+#else
     x86::Gp buffer_A = a->zdi();
     x86::Gp buffer_B = a->zsi();
     x86::Gp B_pf = a->zdx();
     x86::Gp CBase = a->zcx();
     x86::Gp kSize = a->gpz(8);
     x86::Gp ldcReg = a->gpz(9);
+#endif
 
     asmjit::FuncDetail func;
     func.init(
diff --git a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
index 1d23e90..bd8be1f 100644
--- a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
+++ b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
@@ -98,10 +98,18 @@ void CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::storeCRegs<
         a->vpaddd(
             CRegs(i * leadingDimCReg + j),
             CRegs(i * leadingDimCReg + j),
+#ifdef _MSC_VER
+            x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t)));
+#else
             x86::dword_ptr(a->zcx(), C_Offset, 0, j * 16 * sizeof(int32_t)));
+#endif
       }
       a->vmovups(
+#ifdef _MSC_VER
+          x86::dword_ptr(a->gpz(9), C_Offset, 0, j * 16 * sizeof(int32_t)),
+#else
           x86::dword_ptr(a->zcx(), C_Offset, 0, j * 16 * sizeof(int32_t)),
+#endif
           CRegs(i * leadingDimCReg + j));
     }
   }
@@ -190,12 +198,21 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<
     int mRegBlocksRem = mc % mRegBlockSize;
 
     // arguments to the function created
+#ifdef _MSC_VER
+    x86::Gp buffer_A = a->zcx();
+    x86::Gp buffer_B = a->zdx();
+    x86::Gp B_pf = a->gpz(8);
+    x86::Gp CBase = a->gpz(9);
+    x86::Gp kSize = a->zdi();
+    x86::Gp ldcReg = a->zsi();
+#else
     x86::Gp buffer_A = a->zdi();
     x86::Gp buffer_B = a->zsi();
     x86::Gp B_pf = a->zdx();
     x86::Gp CBase = a->zcx();
     x86::Gp kSize = a->gpz(8);
     x86::Gp ldcReg = a->gpz(9);
+#endif
 
     asmjit::FuncDetail func;
     func.init(
diff --git a/src/GroupwiseConvAcc32Avx2.cc b/src/GroupwiseConvAcc32Avx2.cc
index d1e0fdd..396e792 100644
--- a/src/GroupwiseConvAcc32Avx2.cc
+++ b/src/GroupwiseConvAcc32Avx2.cc
@@ -1010,12 +1010,21 @@ jit_conv_kernel_fp GenConvKernel<2, int32_t>::getOrCreate<inst_set_t::avx2>(
 #endif
 
   // arguments to the function created
+#ifdef _MSC_VER
+  in_acts_R_ = a->zcx();
+  wghts_R_ = a->zdx();
+  out_acts_R_ = a->gpz(8);
+  a_zero_pt_R_ = a->gpz(9);
+  H_R_ = a->zdi();
+  W_R_ = a->zsi();
+#else
   in_acts_R_ = a->zdi();
   wghts_R_ = a->zsi();
   out_acts_R_ = a->zdx();
   a_zero_pt_R_ = a->zcx();
   H_R_ = a->gpz(8);
   W_R_ = a->gpz(9);
+#endif
   row_offset_R_ = a->gpz(10);
 
   // register for temporary use
@@ -1492,11 +1501,19 @@ GenConvKernel<2, int32_t>::getOrCreateRowOffset<inst_set_t::avx2>(
 #endif
 
   // arguments to the function created
+#ifdef _MSC_VER
+  in_acts_R_ = a->zcx();
+  a_zero_pt_R_ = a->zdx();
+  H_R_ = a->gpz(8);
+  W_R_ = a->gpz(9);
+  row_offset_R_ = a->zdi();
+#else
   in_acts_R_ = a->zdi();
   a_zero_pt_R_ = a->zsi();
   H_R_ = a->zdx();
   W_R_ = a->zcx();
   row_offset_R_ = a->gpz(8);
+#endif
 
   // register for temporary use
   scratchReg1_ = a->gpz(12);
diff --git a/src/PackDepthwiseConvMatrixAvx2.cc b/src/PackDepthwiseConvMatrixAvx2.cc
index ab2e1f2..04c08f3 100644
--- a/src/PackDepthwiseConvMatrixAvx2.cc
+++ b/src/PackDepthwiseConvMatrixAvx2.cc
@@ -205,7 +205,11 @@ void PackedDepthWiseConvMatrix::unpack(int8_t* unpacked_data) {
 }
 
 PackedDepthWiseConvMatrix::~PackedDepthWiseConvMatrix() {
+#ifdef _MSC_VER
+  _aligned_free(pmat_);
+#else
   free(pmat_);
+#endif
 }
 
 } // namespace fbgemm
diff --git a/test/GConvTest.cc b/test/GConvTest.cc
index 8c1fb82..982208b 100644
--- a/test/GConvTest.cc
+++ b/test/GConvTest.cc
@@ -25,14 +25,6 @@
 using namespace std;
 using namespace fbgemm;
 
-vector<matrix_op_t> transposeVals{matrix_op_t::NoTranspose,
-                                  matrix_op_t::Transpose};
-
-vector<QuantizationGranularity> qGranularityVals{
-    QuantizationGranularity::TENSOR,
-    QuantizationGranularity::GROUP,
-    QuantizationGranularity::OUT_CHANNEL};
-
 namespace {
 class fbgemmGConvAcc32Test
     : public testing::TestWithParam<tuple<matrix_op_t, matrix_op_t>> {};
diff --git a/test/Im2ColFusedRequantizeTest.cc b/test/Im2ColFusedRequantizeTest.cc
index b14303f..56df3c8 100644
--- a/test/Im2ColFusedRequantizeTest.cc
+++ b/test/Im2ColFusedRequantizeTest.cc
@@ -24,11 +24,6 @@
 using namespace std;
 using namespace fbgemm;
 
-vector<QuantizationGranularity> qGranularityVals{
-    QuantizationGranularity::TENSOR,
-    QuantizationGranularity::GROUP,
-    QuantizationGranularity::OUT_CHANNEL};
-
 namespace {
 class fbgemmIm2colTest
     : public testing::TestWithParam<tuple<QuantizationGranularity, bool>> {};
diff --git a/test/PackedRequantizeAcc16Test.cc b/test/PackedRequantizeAcc16Test.cc
index 62b1303..8978150 100644
--- a/test/PackedRequantizeAcc16Test.cc
+++ b/test/PackedRequantizeAcc16Test.cc
@@ -26,14 +26,6 @@
 using namespace std;
 using namespace fbgemm;
 
-vector<matrix_op_t> transposeVals{matrix_op_t::NoTranspose,
-                                  matrix_op_t::Transpose};
-
-vector<QuantizationGranularity> qGranularityVals{
-    QuantizationGranularity::TENSOR,
-    QuantizationGranularity::GROUP,
-    QuantizationGranularity::OUT_CHANNEL};
-
 namespace {
 class fbgemmu8s8acc16WithQuantGranularityTest
     : public testing::TestWithParam<
diff --git a/test/PackedRequantizeTest.cc b/test/PackedRequantizeTest.cc
index 5338243..15e7d55 100644
--- a/test/PackedRequantizeTest.cc
+++ b/test/PackedRequantizeTest.cc
@@ -25,14 +25,6 @@
 using namespace std;
 using namespace fbgemm;
 
-vector<matrix_op_t> transposeVals{matrix_op_t::NoTranspose,
-                                  matrix_op_t::Transpose};
-
-vector<QuantizationGranularity> qGranularityVals{
-    QuantizationGranularity::TENSOR,
-    QuantizationGranularity::GROUP,
-    QuantizationGranularity::OUT_CHANNEL};
-
 namespace {
 class fbgemmu8s8acc32WithQuantGranularityTest
     : public testing::TestWithParam<
diff --git a/test/RequantizeOnlyTest.cc b/test/RequantizeOnlyTest.cc
index 2f73d49..94e8e7d 100644
--- a/test/RequantizeOnlyTest.cc
+++ b/test/RequantizeOnlyTest.cc
@@ -20,7 +20,7 @@
 using namespace std;
 using namespace fbgemm;
 
-vector<QuantizationGranularity> qGranularityVals{
+vector<QuantizationGranularity> qGranularityValsLocal{
     QuantizationGranularity::TENSOR,
     QuantizationGranularity::OUT_CHANNEL};
 
@@ -42,7 +42,7 @@ INSTANTIATE_TEST_CASE_P(
             {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 20, 32}), // number of
                                                                   // cols
         ::testing::Bool(), // fuse relu
-        ::testing::ValuesIn(qGranularityVals))); // requantization granularity
+        ::testing::ValuesIn(qGranularityValsLocal))); // requantization granularity
 
 /**
  * Test for float bias
diff --git a/test/TestUtils.h b/test/TestUtils.h
index 2cb7b88..d320ae2 100644
--- a/test/TestUtils.h
+++ b/test/TestUtils.h
@@ -7,9 +7,18 @@
 #pragma once
 #include <cmath>
 #include <vector>
+#include "fbgemm/Fbgemm.h"
 
 namespace fbgemm {
 
+static std::vector<matrix_op_t> transposeVals = { matrix_op_t::NoTranspose,
+                                    matrix_op_t::Transpose };
+
+static std::vector<QuantizationGranularity> qGranularityVals = {
+      QuantizationGranularity::TENSOR,
+      QuantizationGranularity::GROUP,
+      QuantizationGranularity::OUT_CHANNEL };
+
 /*
  * @brief Check and validate the buffers for reference and FBGEMM result.
  */
diff --git a/test/UniConvTest.cc b/test/UniConvTest.cc
index cead3a6..e9c7ba5 100644
--- a/test/UniConvTest.cc
+++ b/test/UniConvTest.cc
@@ -20,11 +20,6 @@
 using namespace std;
 using namespace fbgemm;
 
-vector<QuantizationGranularity> qGranularityVals{
-    QuantizationGranularity::TENSOR,
-    QuantizationGranularity::GROUP,
-    QuantizationGranularity::OUT_CHANNEL};
-
 // clang-format off
 static vector<conv_param_t<>> GetShapes_() {
   vector<conv_param_t<>> shapes = {
author	Young Jin Kim <youki@microsoft.com>	2019-09-25 21:46:49 +0300
committer	Young Jin Kim <youki@microsoft.com>	2019-09-25 21:46:49 +0300
commit	604620b78663d2bed318efba0ceb6d3ebadd14fb (patch)
tree	8c84921ecd5d49f95aa24891d3bc8ecc03a3eb5f
parent	d02815ffedbc46a3f8af1a3884efefd83668a401 (diff)