diff options
author | Mike Tsai <miketsai@fb.com> | 2019-08-02 02:03:29 +0300 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2019-08-02 02:20:54 +0300 |
commit | 0d5d057ca941ebb511bdc6178fc26c23e6c4a953 (patch) | |
tree | 1ceaddaf942edb9debcafad7491b750fc3a5f066 | |
parent | f712cb2328a2b29424bdaeecb9c0731da2cd997b (diff) |
Pass blocking param pointer into packedBufferSize() in PackBMatrix.cc
Summary:
Pass blocking params in to compute correct buffer size for each group.
Fix the bug for this CONV shape:
`conv_param_t<2>(1, 32, 16, {12, 14}, 4, {3, 3}, {1, 1}, {0, 0, 0, 0})`
Corresponding M, N, K = 120, 4, 288
with these params:
BlockingFactors params;
params.MCB = 48;
params.NCB = 16;
params.KCB = 256;
params.MR = 1;
params.NR = 16;
params.ROW_INTERLEAVE = 4;
params.NR_MIN = 16;
Reviewed By: jianyuh
Differential Revision: D16571367
fbshipit-source-id: 27c9b003d37c4d3d13767227e8343d44668823d6
-rw-r--r-- | include/fbgemm/Fbgemm.h | 14 | ||||
-rw-r--r-- | src/PackBMatrix.cc | 22 | ||||
-rw-r--r-- | test/PackedRequantizeAcc16Test.cc | 83 | ||||
-rw-r--r-- | test/PackedRequantizeTest.cc | 83 |
4 files changed, 120 insertions, 82 deletions
diff --git a/include/fbgemm/Fbgemm.h b/include/fbgemm/Fbgemm.h index bdec036..68963fa 100644 --- a/include/fbgemm/Fbgemm.h +++ b/include/fbgemm/Fbgemm.h @@ -441,14 +441,17 @@ class FBGEMM_API PackBMatrix final std::int32_t addr(std::int32_t i, std::int32_t j) const; /** - * @brief Packs a block of source matrix into pmat buffer. + * @brief Packs a block of source matrix into pmat buffer. The blocking + * parameters are needed to compute the buffer size of each group. + * It will use default blocking parameters if params is not provided. */ - void pack(const block_type_t& block); + void pack(const block_type_t& block, const BlockingFactors* params = nullptr); /** * @brief Print the packed block. */ - void printPackedMatrix(std::string name); + void printPackedMatrix(std::string name, + const BlockingFactors* params = nullptr); /** * @return true if meta information like matrix shape is the same. @@ -463,7 +466,7 @@ class FBGEMM_API PackBMatrix final * @brief Unpack pmat buffer to the origin_buf (Used for the serialization to * recover weight matrix). */ - void unpack(T* origin_buf); + void unpack(T* origin_buf, const BlockingFactors* params = nullptr); ~PackBMatrix() {} @@ -480,7 +483,8 @@ class FBGEMM_API PackBMatrix final const block_type_t& block, T* unpack_buf, T* pack_buf, - bool ispack); + bool ispack, + const BlockingFactors* params = nullptr); }; /** diff --git a/src/PackBMatrix.cc b/src/PackBMatrix.cc index a2805b1..0990edb 100644 --- a/src/PackBMatrix.cc +++ b/src/PackBMatrix.cc @@ -228,7 +228,7 @@ PackBMatrix<T, accT>::PackBMatrix( BaseType::numGroups() * BaseType::blockRows() * BaseType::brow_ * BaseType::blockCols() * BaseType::bcol_ * sizeof(T)); } - pack(block); + pack(block, params); } template <typename T, typename accT> @@ -236,7 +236,8 @@ void PackBMatrix<T, accT>::pack_unpack_( const block_type_t& block, T* unpack_buf, T* pack_buf, - bool ispack) { + bool ispack, + const BlockingFactors* params) { assert((BaseType::blockRowSize() % row_interleave_) == 0); assert((block.row_start % BaseType::blockRowSize()) == 0); assert((block.col_start % BaseType::blockColSize()) == 0); @@ -245,7 +246,7 @@ void PackBMatrix<T, accT>::pack_unpack_( bool tr = (trans_ == matrix_op_t::Transpose); for (int g = 0; g < BaseType::numGroups(); ++g) { T* pack_buf_cur = pack_buf + - g * BaseType::packedBufferSize(block.row_size, block.col_size); + g * BaseType::packedBufferSize(block.row_size, block.col_size, params); for (int i = block.row_start; i < block.row_start + block.row_size; ++i) { int r_offset = ((i / BaseType::blockRowSize()) * BaseType::blockCols()) * (BaseType::blockRowSize() * BaseType::blockColSize()) + @@ -316,17 +317,19 @@ void PackBMatrix<T, accT>::pack_unpack_( } template <typename T, typename accT> -void PackBMatrix<T, accT>::pack(const block_type_t& block) { - pack_unpack_(block, const_cast<T*>(smat_), BaseType::getBuf(), true); +void PackBMatrix<T, accT>::pack(const block_type_t& block, + const BlockingFactors* params) { + pack_unpack_(block, const_cast<T*>(smat_), BaseType::getBuf(), true, params); } template <typename T, typename accT> -void PackBMatrix<T, accT>::unpack(T* origin_buf) { +void PackBMatrix<T, accT>::unpack(T* origin_buf, + const BlockingFactors* params) { block_type_t blockB{BaseType::packedRowStart(), BaseType::numPackedRows(), BaseType::packedColStart(), BaseType::numPackedCols()}; - pack_unpack_(blockB, origin_buf, BaseType::getBuf(), false); + pack_unpack_(blockB, origin_buf, BaseType::getBuf(), false, params); } template <typename T, typename accT> @@ -349,7 +352,8 @@ int32_t PackBMatrix<T, accT>::addr(int32_t r, int32_t c) const { } template <typename T, typename accT> -void PackBMatrix<T, accT>::printPackedMatrix(std::string name) { +void PackBMatrix<T, accT>::printPackedMatrix(std::string name, + const BlockingFactors* params) { std::cout << name << ":" << "[" << BaseType::numPackedRows() << ", " << BaseType::numPackedCols() << "]" << std::endl; @@ -361,7 +365,7 @@ void PackBMatrix<T, accT>::printPackedMatrix(std::string name) { T* out = BaseType::getBuf() + g * BaseType::packedBufferSize( - BaseType::numPackedRows(), BaseType::numPackedCols()); + BaseType::numPackedRows(), BaseType::numPackedCols(), params); std::cout << "group: " << g << std::endl; for (auto nr = 0; nr < BaseType::blockRows(); ++nr) { auto rows = (nr == BaseType::blockRows() - 1) ? BaseType::lastBrow() diff --git a/test/PackedRequantizeAcc16Test.cc b/test/PackedRequantizeAcc16Test.cc index 40254cb..93e7566 100644 --- a/test/PackedRequantizeAcc16Test.cc +++ b/test/PackedRequantizeAcc16Test.cc @@ -93,6 +93,8 @@ static vector<vector<int>> GetShapes_() { {102, 512, 258}, {1024, 512, 258}, + + {120, 4, 288}, }; return shapes; } @@ -826,54 +828,67 @@ TEST_P(fbgemmPackUnpackAcc16Test, TestPackUnpack) { bool test_ld; tie(btrans, test_ld) = GetParam(); + BlockingFactors params; + params.MCB = 48; + params.NCB = 16; + params.KCB = 256; + params.MR = 1; + params.NR = 16; + params.ROW_INTERLEAVE = 4; + params.NR_MIN = 16; + vector<BlockingFactors*> vec_params_ptr = {¶ms, nullptr}; + for (auto shape : shapes) { for (int groups : {1, 3, 4}) { - int n = shape[1]; - int k = shape[2]; + for (auto params_ptr : vec_params_ptr) { + int n = shape[1]; + int k = shape[2]; - if (k % groups != 0) { - continue; - } - int k_per_group = k / groups; + if (k % groups != 0) { + continue; + } + int k_per_group = k / groups; - // kxn matrix - aligned_vector<int8_t> Bint8(k * n); - randFill<int8_t>(Bint8, -128, 127); + // kxn matrix + aligned_vector<int8_t> Bint8(k * n); + randFill<int8_t>(Bint8, -128, 127); - // To test lda != k , we just reduce k by half and use the original k - // as lda. - int n_adjusted = n; - if (test_ld) { - if (btrans == matrix_op_t::NoTranspose) { - n_adjusted = std::max(n / 2, 1); + // To test lda != k , we just reduce k by half and use the original k + // as lda. + int n_adjusted = n; + if (test_ld) { + if (btrans == matrix_op_t::NoTranspose) { + n_adjusted = std::max(n / 2, 1); + } } - } - // Note that packing for weight is performed during the constructor - // stage. - PackBMatrix<int8_t, int16_t> packedWeights( - btrans, - k, - n_adjusted, - Bint8.data(), - (btrans == matrix_op_t::Transpose) ? k_per_group : n, - nullptr, - groups); + // Note that packing for weight is performed during the constructor + // stage. + PackBMatrix<int8_t, int16_t> packedWeights( + btrans, + k, + n_adjusted, + Bint8.data(), + (btrans == matrix_op_t::Transpose) ? k_per_group : n, + nullptr, + groups, + params_ptr); - // Setup a buffer to get pack -> unpacked results - aligned_vector<int8_t> unpack_buf(k * n, 0); + // Setup a buffer to get pack -> unpacked results + aligned_vector<int8_t> unpack_buf(k * n, 0); - // Perform unpacking - packedWeights.unpack(unpack_buf.data()); + // Perform unpacking + packedWeights.unpack(unpack_buf.data(), params_ptr); - // Sanity check - for (int i = 0; i < k; i++) { - for (int j = 0; j < n_adjusted; j++) { - EXPECT_EQ(Bint8.data()[i * n + j], unpack_buf.data()[i * n + j]) + // Sanity check + for (int i = 0; i < k; i++) { + for (int j = 0; j < n_adjusted; j++) { + EXPECT_EQ(Bint8.data()[i * n + j], unpack_buf.data()[i * n + j]) << "Pack/Unpack results differ at index (" << i << ", " << j << ", Reference: " << static_cast<int>(Bint8.data()[i * n + j]) << ", Pack-Unpacked: " << static_cast<int>(unpack_buf.data()[i * n + j]); + } } } } diff --git a/test/PackedRequantizeTest.cc b/test/PackedRequantizeTest.cc index 11ef6ff..5338243 100644 --- a/test/PackedRequantizeTest.cc +++ b/test/PackedRequantizeTest.cc @@ -93,6 +93,8 @@ static vector<vector<int>> GetShapes_() { {102, 512, 258}, {1024, 512, 258}, + + {120, 4, 288}, }; return shapes; } @@ -766,54 +768,67 @@ TEST_P(fbgemmPackUnpackAcc32Test, TestPackUnpack) { bool test_ld; tie(btrans, test_ld) = GetParam(); + BlockingFactors params; + params.MCB = 48; + params.NCB = 16; + params.KCB = 256; + params.MR = 1; + params.NR = 16; + params.ROW_INTERLEAVE = 4; + params.NR_MIN = 16; + vector<BlockingFactors*> vec_params_ptr = {¶ms, nullptr}; + for (auto shape : shapes) { for (int groups : {1, 3, 4}) { - int n = shape[1]; - int k = shape[2]; + for (auto params_ptr : vec_params_ptr) { + int n = shape[1]; + int k = shape[2]; - if (k % groups != 0) { - continue; - } - int k_per_group = k / groups; + if (k % groups != 0) { + continue; + } + int k_per_group = k / groups; - // kxn matrix - aligned_vector<int8_t> Bint8(k * n); - randFill<int8_t>(Bint8, -128, 127); + // kxn matrix + aligned_vector<int8_t> Bint8(k * n); + randFill<int8_t>(Bint8, -128, 127); - // To test lda != k , we just reduce k by half and use the original k - // as lda. - int n_adjusted = n; - if (test_ld) { - if (btrans == matrix_op_t::NoTranspose) { - n_adjusted = std::max(n / 2, 1); + // To test lda != k , we just reduce k by half and use the original k + // as lda. + int n_adjusted = n; + if (test_ld) { + if (btrans == matrix_op_t::NoTranspose) { + n_adjusted = std::max(n / 2, 1); + } } - } - // Note that packing for weight is performed during the constructor - // stage. - PackBMatrix<int8_t> packedWeights( - btrans, - k, - n_adjusted, - Bint8.data(), - (btrans == matrix_op_t::Transpose) ? k_per_group : n, - nullptr, - groups); + // Note that packing for weight is performed during the constructor + // stage. + PackBMatrix<int8_t> packedWeights( + btrans, + k, + n_adjusted, + Bint8.data(), + (btrans == matrix_op_t::Transpose) ? k_per_group : n, + nullptr, + groups, + params_ptr); - // Setup a buffer to get pack -> unpacked results - aligned_vector<int8_t> unpack_buf(k * n, 0); + // Setup a buffer to get pack -> unpacked results + aligned_vector<int8_t> unpack_buf(k * n, 0); - // Perform unpacking - packedWeights.unpack(unpack_buf.data()); + // Perform unpacking + packedWeights.unpack(unpack_buf.data(), params_ptr); - // Sanity check - for (int i = 0; i < k; i++) { - for (int j = 0; j < n_adjusted; j++) { - EXPECT_EQ(Bint8.data()[i * n + j], unpack_buf.data()[i * n + j]) + // Sanity check + for (int i = 0; i < k; i++) { + for (int j = 0; j < n_adjusted; j++) { + EXPECT_EQ(Bint8.data()[i * n + j], unpack_buf.data()[i * n + j]) << "Pack/Unpack results differ at index (" << i << ", " << j << ", Reference: " << static_cast<int>(Bint8.data()[i * n + j]) << ", Pack-Unpacked: " << static_cast<int>(unpack_buf.data()[i * n + j]); + } } } } |