diff options
author | Amy Yang <amyyang@fb.com> | 2018-12-19 06:43:23 +0300 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2018-12-19 06:48:54 +0300 |
commit | d5810be02d4b7b90a5aec746f98841c9e585f6d4 (patch) | |
tree | 720f143d415a1ae216410a5df5e352016207d45d | |
parent | 1b3d9701336d3c11da9e88b176fd1191c5b283d9 (diff) |
Refactor to use FbgemmFP16 in packed gemm operator (#49)
Summary:
Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/49
refactored code to use deeplearning/fbgemm2/FbgemmFP16
Reviewed By: jspark1105
Differential Revision: D13324112
fbshipit-source-id: 8addc602ae61337f68ec361082e7536ad7fbd202
-rw-r--r-- | include/fbgemm/FbgemmFP16.h | 48 |
1 files changed, 43 insertions, 5 deletions
diff --git a/include/fbgemm/FbgemmFP16.h b/include/fbgemm/FbgemmFP16.h index e45cff8..bebeb70 100644 --- a/include/fbgemm/FbgemmFP16.h +++ b/include/fbgemm/FbgemmFP16.h @@ -41,14 +41,41 @@ class PackedGemmMatrixFP16 { const float* smat, const int brow = 512) : nrow_(nrow), ncol_(ncol), brow_(brow) { + initializeParam(); + initializeMemory(); + // copy source matrix into packed matrix + this->packFromSrc(trans, alpha, smat); + } + + PackedGemmMatrixFP16( + const int nrow, + const int ncol, + const int brow, + const int last_brow, + const int bcol, + const int nbrow, + const int nbcol, + const uint64_t size) + : nrow_(nrow), + ncol_(ncol), + brow_(brow), + last_brow_(last_brow), + bcol_(bcol), + nbrow_(nbrow), + nbcol_(nbcol), + size_(size) { + initializeMemory(); + } + + void initializeParam() { bcol_ = 8 * 1; // hardwired // set up internal packing parameters nbrow_ = ((numRows() % blockRowSize()) == 0) ? (numRows() / blockRowSize()) : ((numRows() + blockRowSize()) / blockRowSize()); - last_brow_ = ((nrow % blockRowSize()) == 0) ? blockRowSize() - : (nrow % blockRowSize()); + last_brow_ = ((nrow_ % blockRowSize()) == 0) ? blockRowSize() + : (nrow_ % blockRowSize()); nbcol_ = ((numCols() % blockColSize()) == 0) ? (numCols() / blockColSize()) : ((numCols() + blockColSize()) / blockColSize()); @@ -62,7 +89,9 @@ class PackedGemmMatrixFP16 { << "lefover is currently done via MKL: hence overhead will inccur"; #endif } + } + void initializeMemory() { // allocate and initialize packed memory const int padding = 1024; // required by sw pipelined kernels size_ = (blockRowSize() * nbrow_) * (blockColSize() * nbcol_); @@ -72,9 +101,6 @@ class PackedGemmMatrixFP16 { for (auto i = 0; i < matSize(); i++) { pmat_[i] = tconv(0.f, pmat_[i]); } - - // copy source matrix into packed matrix - this->packFromSrc(trans, alpha, smat); } ~PackedGemmMatrixFP16() { @@ -135,6 +161,18 @@ class PackedGemmMatrixFP16 { int numCols() const { return ncol_; } + int lastBrow() const { + return last_brow_; + } + int numBrow() const { + return nbrow_; + } + int numBcol() const { + return nbcol_; + } + float16* pmat() const { + return pmat_; + } inline int blockRowSize() const { return brow_; } |