Refactor to use FbgemmFP16 in packed gemm operator (#49)

Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/49 refactored code to use deeplearning/fbgemm2/FbgemmFP16 Reviewed By: jspark1105 Differential Revision: D13324112 fbshipit-source-id: 8addc602ae61337f68ec361082e7536ad7fbd202
author: Amy Yang <amyyang@fb.com> 2018-12-19 06:43:23 +0300
committer: Facebook Github Bot <facebook-github-bot@users.noreply.github.com> 2018-12-19 06:48:54 +0300
commit: d5810be02d4b7b90a5aec746f98841c9e585f6d4 (patch)
tree: 720f143d415a1ae216410a5df5e352016207d45d
parent: 1b3d9701336d3c11da9e88b176fd1191c5b283d9 (diff)
1 files changed, 43 insertions, 5 deletions
diff --git a/include/fbgemm/FbgemmFP16.h b/include/fbgemm/FbgemmFP16.h
index e45cff8..bebeb70 100644
--- a/include/fbgemm/FbgemmFP16.h
+++ b/include/fbgemm/FbgemmFP16.h
@@ -41,14 +41,41 @@ class PackedGemmMatrixFP16 {
       const float* smat,
       const int brow = 512)
       : nrow_(nrow), ncol_(ncol), brow_(brow) {
+    initializeParam();
+    initializeMemory();
+    // copy source matrix into packed matrix
+    this->packFromSrc(trans, alpha, smat);
+  }
+
+  PackedGemmMatrixFP16(
+      const int nrow,
+      const int ncol,
+      const int brow,
+      const int last_brow,
+      const int bcol,
+      const int nbrow,
+      const int nbcol,
+      const uint64_t size)
+      : nrow_(nrow),
+        ncol_(ncol),
+        brow_(brow),
+        last_brow_(last_brow),
+        bcol_(bcol),
+        nbrow_(nbrow),
+        nbcol_(nbcol),
+        size_(size) {
+    initializeMemory();
+  }
+
+  void initializeParam() {
     bcol_ = 8 * 1; // hardwired
 
     // set up internal packing parameters
     nbrow_ = ((numRows() % blockRowSize()) == 0)
         ? (numRows() / blockRowSize())
         : ((numRows() + blockRowSize()) / blockRowSize());
-    last_brow_ = ((nrow % blockRowSize()) == 0) ? blockRowSize()
-                                                : (nrow % blockRowSize());
+    last_brow_ = ((nrow_ % blockRowSize()) == 0) ? blockRowSize()
+                                                 : (nrow_ % blockRowSize());
     nbcol_ = ((numCols() % blockColSize()) == 0)
         ? (numCols() / blockColSize())
         : ((numCols() + blockColSize()) / blockColSize());
@@ -62,7 +89,9 @@ class PackedGemmMatrixFP16 {
           << "lefover is currently done via MKL: hence overhead will inccur";
 #endif
     }
+  }
 
+  void initializeMemory() {
     // allocate and initialize packed memory
     const int padding = 1024; // required by sw pipelined kernels
     size_ = (blockRowSize() * nbrow_) * (blockColSize() * nbcol_);
@@ -72,9 +101,6 @@ class PackedGemmMatrixFP16 {
     for (auto i = 0; i < matSize(); i++) {
       pmat_[i] = tconv(0.f, pmat_[i]);
     }
-
-    // copy source matrix into packed matrix
-    this->packFromSrc(trans, alpha, smat);
   }
 
   ~PackedGemmMatrixFP16() {
@@ -135,6 +161,18 @@ class PackedGemmMatrixFP16 {
   int numCols() const {
     return ncol_;
   }
+  int lastBrow() const {
+    return last_brow_;
+  }
+  int numBrow() const {
+    return nbrow_;
+  }
+  int numBcol() const {
+    return nbcol_;
+  }
+  float16* pmat() const {
+    return pmat_;
+  }
   inline int blockRowSize() const {
     return brow_;
   }
author	Amy Yang <amyyang@fb.com>	2018-12-19 06:43:23 +0300
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>	2018-12-19 06:48:54 +0300
commit	d5810be02d4b7b90a5aec746f98841c9e585f6d4 (patch)
tree	720f143d415a1ae216410a5df5e352016207d45d
parent	1b3d9701336d3c11da9e88b176fd1191c5b283d9 (diff)