Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAmy Yang <amyyang@fb.com>2018-12-19 06:43:23 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2018-12-19 06:48:54 +0300
commitd5810be02d4b7b90a5aec746f98841c9e585f6d4 (patch)
tree720f143d415a1ae216410a5df5e352016207d45d
parent1b3d9701336d3c11da9e88b176fd1191c5b283d9 (diff)
Refactor to use FbgemmFP16 in packed gemm operator (#49)
Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/49 refactored code to use deeplearning/fbgemm2/FbgemmFP16 Reviewed By: jspark1105 Differential Revision: D13324112 fbshipit-source-id: 8addc602ae61337f68ec361082e7536ad7fbd202
-rw-r--r--include/fbgemm/FbgemmFP16.h48
1 files changed, 43 insertions, 5 deletions
diff --git a/include/fbgemm/FbgemmFP16.h b/include/fbgemm/FbgemmFP16.h
index e45cff8..bebeb70 100644
--- a/include/fbgemm/FbgemmFP16.h
+++ b/include/fbgemm/FbgemmFP16.h
@@ -41,14 +41,41 @@ class PackedGemmMatrixFP16 {
const float* smat,
const int brow = 512)
: nrow_(nrow), ncol_(ncol), brow_(brow) {
+ initializeParam();
+ initializeMemory();
+ // copy source matrix into packed matrix
+ this->packFromSrc(trans, alpha, smat);
+ }
+
+ PackedGemmMatrixFP16(
+ const int nrow,
+ const int ncol,
+ const int brow,
+ const int last_brow,
+ const int bcol,
+ const int nbrow,
+ const int nbcol,
+ const uint64_t size)
+ : nrow_(nrow),
+ ncol_(ncol),
+ brow_(brow),
+ last_brow_(last_brow),
+ bcol_(bcol),
+ nbrow_(nbrow),
+ nbcol_(nbcol),
+ size_(size) {
+ initializeMemory();
+ }
+
+ void initializeParam() {
bcol_ = 8 * 1; // hardwired
// set up internal packing parameters
nbrow_ = ((numRows() % blockRowSize()) == 0)
? (numRows() / blockRowSize())
: ((numRows() + blockRowSize()) / blockRowSize());
- last_brow_ = ((nrow % blockRowSize()) == 0) ? blockRowSize()
- : (nrow % blockRowSize());
+ last_brow_ = ((nrow_ % blockRowSize()) == 0) ? blockRowSize()
+ : (nrow_ % blockRowSize());
nbcol_ = ((numCols() % blockColSize()) == 0)
? (numCols() / blockColSize())
: ((numCols() + blockColSize()) / blockColSize());
@@ -62,7 +89,9 @@ class PackedGemmMatrixFP16 {
<< "lefover is currently done via MKL: hence overhead will inccur";
#endif
}
+ }
+ void initializeMemory() {
// allocate and initialize packed memory
const int padding = 1024; // required by sw pipelined kernels
size_ = (blockRowSize() * nbrow_) * (blockColSize() * nbcol_);
@@ -72,9 +101,6 @@ class PackedGemmMatrixFP16 {
for (auto i = 0; i < matSize(); i++) {
pmat_[i] = tconv(0.f, pmat_[i]);
}
-
- // copy source matrix into packed matrix
- this->packFromSrc(trans, alpha, smat);
}
~PackedGemmMatrixFP16() {
@@ -135,6 +161,18 @@ class PackedGemmMatrixFP16 {
int numCols() const {
return ncol_;
}
+ int lastBrow() const {
+ return last_brow_;
+ }
+ int numBrow() const {
+ return nbrow_;
+ }
+ int numBcol() const {
+ return nbcol_;
+ }
+ float16* pmat() const {
+ return pmat_;
+ }
inline int blockRowSize() const {
return brow_;
}