Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Tsai <miketsai@fb.com>2019-08-02 02:03:29 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2019-08-02 02:20:54 +0300
commit0d5d057ca941ebb511bdc6178fc26c23e6c4a953 (patch)
tree1ceaddaf942edb9debcafad7491b750fc3a5f066
parentf712cb2328a2b29424bdaeecb9c0731da2cd997b (diff)
Pass blocking param pointer into packedBufferSize() in PackBMatrix.cc
Summary: Pass blocking params in to compute correct buffer size for each group. Fix the bug for this CONV shape: `conv_param_t<2>(1, 32, 16, {12, 14}, 4, {3, 3}, {1, 1}, {0, 0, 0, 0})` Corresponding M, N, K = 120, 4, 288 with these params: BlockingFactors params; params.MCB = 48; params.NCB = 16; params.KCB = 256; params.MR = 1; params.NR = 16; params.ROW_INTERLEAVE = 4; params.NR_MIN = 16; Reviewed By: jianyuh Differential Revision: D16571367 fbshipit-source-id: 27c9b003d37c4d3d13767227e8343d44668823d6
-rw-r--r--include/fbgemm/Fbgemm.h14
-rw-r--r--src/PackBMatrix.cc22
-rw-r--r--test/PackedRequantizeAcc16Test.cc83
-rw-r--r--test/PackedRequantizeTest.cc83
4 files changed, 120 insertions, 82 deletions
diff --git a/include/fbgemm/Fbgemm.h b/include/fbgemm/Fbgemm.h
index bdec036..68963fa 100644
--- a/include/fbgemm/Fbgemm.h
+++ b/include/fbgemm/Fbgemm.h
@@ -441,14 +441,17 @@ class FBGEMM_API PackBMatrix final
std::int32_t addr(std::int32_t i, std::int32_t j) const;
/**
- * @brief Packs a block of source matrix into pmat buffer.
+ * @brief Packs a block of source matrix into pmat buffer. The blocking
+ * parameters are needed to compute the buffer size of each group.
+ * It will use default blocking parameters if params is not provided.
*/
- void pack(const block_type_t& block);
+ void pack(const block_type_t& block, const BlockingFactors* params = nullptr);
/**
* @brief Print the packed block.
*/
- void printPackedMatrix(std::string name);
+ void printPackedMatrix(std::string name,
+ const BlockingFactors* params = nullptr);
/**
* @return true if meta information like matrix shape is the same.
@@ -463,7 +466,7 @@ class FBGEMM_API PackBMatrix final
* @brief Unpack pmat buffer to the origin_buf (Used for the serialization to
* recover weight matrix).
*/
- void unpack(T* origin_buf);
+ void unpack(T* origin_buf, const BlockingFactors* params = nullptr);
~PackBMatrix() {}
@@ -480,7 +483,8 @@ class FBGEMM_API PackBMatrix final
const block_type_t& block,
T* unpack_buf,
T* pack_buf,
- bool ispack);
+ bool ispack,
+ const BlockingFactors* params = nullptr);
};
/**
diff --git a/src/PackBMatrix.cc b/src/PackBMatrix.cc
index a2805b1..0990edb 100644
--- a/src/PackBMatrix.cc
+++ b/src/PackBMatrix.cc
@@ -228,7 +228,7 @@ PackBMatrix<T, accT>::PackBMatrix(
BaseType::numGroups() * BaseType::blockRows() * BaseType::brow_ *
BaseType::blockCols() * BaseType::bcol_ * sizeof(T));
}
- pack(block);
+ pack(block, params);
}
template <typename T, typename accT>
@@ -236,7 +236,8 @@ void PackBMatrix<T, accT>::pack_unpack_(
const block_type_t& block,
T* unpack_buf,
T* pack_buf,
- bool ispack) {
+ bool ispack,
+ const BlockingFactors* params) {
assert((BaseType::blockRowSize() % row_interleave_) == 0);
assert((block.row_start % BaseType::blockRowSize()) == 0);
assert((block.col_start % BaseType::blockColSize()) == 0);
@@ -245,7 +246,7 @@ void PackBMatrix<T, accT>::pack_unpack_(
bool tr = (trans_ == matrix_op_t::Transpose);
for (int g = 0; g < BaseType::numGroups(); ++g) {
T* pack_buf_cur = pack_buf +
- g * BaseType::packedBufferSize(block.row_size, block.col_size);
+ g * BaseType::packedBufferSize(block.row_size, block.col_size, params);
for (int i = block.row_start; i < block.row_start + block.row_size; ++i) {
int r_offset = ((i / BaseType::blockRowSize()) * BaseType::blockCols()) *
(BaseType::blockRowSize() * BaseType::blockColSize()) +
@@ -316,17 +317,19 @@ void PackBMatrix<T, accT>::pack_unpack_(
}
template <typename T, typename accT>
-void PackBMatrix<T, accT>::pack(const block_type_t& block) {
- pack_unpack_(block, const_cast<T*>(smat_), BaseType::getBuf(), true);
+void PackBMatrix<T, accT>::pack(const block_type_t& block,
+ const BlockingFactors* params) {
+ pack_unpack_(block, const_cast<T*>(smat_), BaseType::getBuf(), true, params);
}
template <typename T, typename accT>
-void PackBMatrix<T, accT>::unpack(T* origin_buf) {
+void PackBMatrix<T, accT>::unpack(T* origin_buf,
+ const BlockingFactors* params) {
block_type_t blockB{BaseType::packedRowStart(),
BaseType::numPackedRows(),
BaseType::packedColStart(),
BaseType::numPackedCols()};
- pack_unpack_(blockB, origin_buf, BaseType::getBuf(), false);
+ pack_unpack_(blockB, origin_buf, BaseType::getBuf(), false, params);
}
template <typename T, typename accT>
@@ -349,7 +352,8 @@ int32_t PackBMatrix<T, accT>::addr(int32_t r, int32_t c) const {
}
template <typename T, typename accT>
-void PackBMatrix<T, accT>::printPackedMatrix(std::string name) {
+void PackBMatrix<T, accT>::printPackedMatrix(std::string name,
+ const BlockingFactors* params) {
std::cout << name << ":"
<< "[" << BaseType::numPackedRows() << ", "
<< BaseType::numPackedCols() << "]" << std::endl;
@@ -361,7 +365,7 @@ void PackBMatrix<T, accT>::printPackedMatrix(std::string name) {
T* out = BaseType::getBuf() +
g *
BaseType::packedBufferSize(
- BaseType::numPackedRows(), BaseType::numPackedCols());
+ BaseType::numPackedRows(), BaseType::numPackedCols(), params);
std::cout << "group: " << g << std::endl;
for (auto nr = 0; nr < BaseType::blockRows(); ++nr) {
auto rows = (nr == BaseType::blockRows() - 1) ? BaseType::lastBrow()
diff --git a/test/PackedRequantizeAcc16Test.cc b/test/PackedRequantizeAcc16Test.cc
index 40254cb..93e7566 100644
--- a/test/PackedRequantizeAcc16Test.cc
+++ b/test/PackedRequantizeAcc16Test.cc
@@ -93,6 +93,8 @@ static vector<vector<int>> GetShapes_() {
{102, 512, 258},
{1024, 512, 258},
+
+ {120, 4, 288},
};
return shapes;
}
@@ -826,54 +828,67 @@ TEST_P(fbgemmPackUnpackAcc16Test, TestPackUnpack) {
bool test_ld;
tie(btrans, test_ld) = GetParam();
+ BlockingFactors params;
+ params.MCB = 48;
+ params.NCB = 16;
+ params.KCB = 256;
+ params.MR = 1;
+ params.NR = 16;
+ params.ROW_INTERLEAVE = 4;
+ params.NR_MIN = 16;
+ vector<BlockingFactors*> vec_params_ptr = {&params, nullptr};
+
for (auto shape : shapes) {
for (int groups : {1, 3, 4}) {
- int n = shape[1];
- int k = shape[2];
+ for (auto params_ptr : vec_params_ptr) {
+ int n = shape[1];
+ int k = shape[2];
- if (k % groups != 0) {
- continue;
- }
- int k_per_group = k / groups;
+ if (k % groups != 0) {
+ continue;
+ }
+ int k_per_group = k / groups;
- // kxn matrix
- aligned_vector<int8_t> Bint8(k * n);
- randFill<int8_t>(Bint8, -128, 127);
+ // kxn matrix
+ aligned_vector<int8_t> Bint8(k * n);
+ randFill<int8_t>(Bint8, -128, 127);
- // To test lda != k , we just reduce k by half and use the original k
- // as lda.
- int n_adjusted = n;
- if (test_ld) {
- if (btrans == matrix_op_t::NoTranspose) {
- n_adjusted = std::max(n / 2, 1);
+ // To test lda != k , we just reduce k by half and use the original k
+ // as lda.
+ int n_adjusted = n;
+ if (test_ld) {
+ if (btrans == matrix_op_t::NoTranspose) {
+ n_adjusted = std::max(n / 2, 1);
+ }
}
- }
- // Note that packing for weight is performed during the constructor
- // stage.
- PackBMatrix<int8_t, int16_t> packedWeights(
- btrans,
- k,
- n_adjusted,
- Bint8.data(),
- (btrans == matrix_op_t::Transpose) ? k_per_group : n,
- nullptr,
- groups);
+ // Note that packing for weight is performed during the constructor
+ // stage.
+ PackBMatrix<int8_t, int16_t> packedWeights(
+ btrans,
+ k,
+ n_adjusted,
+ Bint8.data(),
+ (btrans == matrix_op_t::Transpose) ? k_per_group : n,
+ nullptr,
+ groups,
+ params_ptr);
- // Setup a buffer to get pack -> unpacked results
- aligned_vector<int8_t> unpack_buf(k * n, 0);
+ // Setup a buffer to get pack -> unpacked results
+ aligned_vector<int8_t> unpack_buf(k * n, 0);
- // Perform unpacking
- packedWeights.unpack(unpack_buf.data());
+ // Perform unpacking
+ packedWeights.unpack(unpack_buf.data(), params_ptr);
- // Sanity check
- for (int i = 0; i < k; i++) {
- for (int j = 0; j < n_adjusted; j++) {
- EXPECT_EQ(Bint8.data()[i * n + j], unpack_buf.data()[i * n + j])
+ // Sanity check
+ for (int i = 0; i < k; i++) {
+ for (int j = 0; j < n_adjusted; j++) {
+ EXPECT_EQ(Bint8.data()[i * n + j], unpack_buf.data()[i * n + j])
<< "Pack/Unpack results differ at index (" << i << ", " << j
<< ", Reference: " << static_cast<int>(Bint8.data()[i * n + j])
<< ", Pack-Unpacked: "
<< static_cast<int>(unpack_buf.data()[i * n + j]);
+ }
}
}
}
diff --git a/test/PackedRequantizeTest.cc b/test/PackedRequantizeTest.cc
index 11ef6ff..5338243 100644
--- a/test/PackedRequantizeTest.cc
+++ b/test/PackedRequantizeTest.cc
@@ -93,6 +93,8 @@ static vector<vector<int>> GetShapes_() {
{102, 512, 258},
{1024, 512, 258},
+
+ {120, 4, 288},
};
return shapes;
}
@@ -766,54 +768,67 @@ TEST_P(fbgemmPackUnpackAcc32Test, TestPackUnpack) {
bool test_ld;
tie(btrans, test_ld) = GetParam();
+ BlockingFactors params;
+ params.MCB = 48;
+ params.NCB = 16;
+ params.KCB = 256;
+ params.MR = 1;
+ params.NR = 16;
+ params.ROW_INTERLEAVE = 4;
+ params.NR_MIN = 16;
+ vector<BlockingFactors*> vec_params_ptr = {&params, nullptr};
+
for (auto shape : shapes) {
for (int groups : {1, 3, 4}) {
- int n = shape[1];
- int k = shape[2];
+ for (auto params_ptr : vec_params_ptr) {
+ int n = shape[1];
+ int k = shape[2];
- if (k % groups != 0) {
- continue;
- }
- int k_per_group = k / groups;
+ if (k % groups != 0) {
+ continue;
+ }
+ int k_per_group = k / groups;
- // kxn matrix
- aligned_vector<int8_t> Bint8(k * n);
- randFill<int8_t>(Bint8, -128, 127);
+ // kxn matrix
+ aligned_vector<int8_t> Bint8(k * n);
+ randFill<int8_t>(Bint8, -128, 127);
- // To test lda != k , we just reduce k by half and use the original k
- // as lda.
- int n_adjusted = n;
- if (test_ld) {
- if (btrans == matrix_op_t::NoTranspose) {
- n_adjusted = std::max(n / 2, 1);
+ // To test lda != k , we just reduce k by half and use the original k
+ // as lda.
+ int n_adjusted = n;
+ if (test_ld) {
+ if (btrans == matrix_op_t::NoTranspose) {
+ n_adjusted = std::max(n / 2, 1);
+ }
}
- }
- // Note that packing for weight is performed during the constructor
- // stage.
- PackBMatrix<int8_t> packedWeights(
- btrans,
- k,
- n_adjusted,
- Bint8.data(),
- (btrans == matrix_op_t::Transpose) ? k_per_group : n,
- nullptr,
- groups);
+ // Note that packing for weight is performed during the constructor
+ // stage.
+ PackBMatrix<int8_t> packedWeights(
+ btrans,
+ k,
+ n_adjusted,
+ Bint8.data(),
+ (btrans == matrix_op_t::Transpose) ? k_per_group : n,
+ nullptr,
+ groups,
+ params_ptr);
- // Setup a buffer to get pack -> unpacked results
- aligned_vector<int8_t> unpack_buf(k * n, 0);
+ // Setup a buffer to get pack -> unpacked results
+ aligned_vector<int8_t> unpack_buf(k * n, 0);
- // Perform unpacking
- packedWeights.unpack(unpack_buf.data());
+ // Perform unpacking
+ packedWeights.unpack(unpack_buf.data(), params_ptr);
- // Sanity check
- for (int i = 0; i < k; i++) {
- for (int j = 0; j < n_adjusted; j++) {
- EXPECT_EQ(Bint8.data()[i * n + j], unpack_buf.data()[i * n + j])
+ // Sanity check
+ for (int i = 0; i < k; i++) {
+ for (int j = 0; j < n_adjusted; j++) {
+ EXPECT_EQ(Bint8.data()[i * n + j], unpack_buf.data()[i * n + j])
<< "Pack/Unpack results differ at index (" << i << ", " << j
<< ", Reference: " << static_cast<int>(Bint8.data()[i * n + j])
<< ", Pack-Unpacked: "
<< static_cast<int>(unpack_buf.data()[i * n + j]);
+ }
}
}
}