diff options
author | Jongsoo Park <jongsoo@fb.com> | 2019-02-20 00:02:50 +0300 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2019-02-20 00:05:50 +0300 |
commit | a5f1f5308130f21b376f5368e87eb74b071493fa (patch) | |
tree | 85ca95f688352f16c00aafd522fcb0063cb58293 | |
parent | 610378f2cfd9eba220c423e7ebbe62a1109477ff (diff) |
increase test coverage (#78)
Summary:
Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/78
Increase test coverage like transposing A
Reviewed By: protonu
Differential Revision: D14121297
fbshipit-source-id: a6e21442dc47e8cd725b795dbaf8614719f013fb
-rw-r--r-- | src/PackAMatrix.cc | 10 | ||||
-rw-r--r-- | src/PackAWithRowOffset.cc | 23 | ||||
-rw-r--r-- | test/PackedRequantizeTest.cc | 54 |
3 files changed, 49 insertions, 38 deletions
diff --git a/src/PackAMatrix.cc b/src/PackAMatrix.cc index 9487974..8469a39 100644 --- a/src/PackAMatrix.cc +++ b/src/PackAMatrix.cc @@ -62,10 +62,12 @@ void PackAMatrix<T, accT>::pack(const block_type_t& block) { bool tr = (trans_ == matrix_op_t::Transpose); T* out = BaseType::getBuf(); if (tr) { + // TODO: should print warning because this path is not optimized yet for (int i = block.row_start; i < block.row_start + block.row_size; ++i) { + int buf_idx = i - block.row_start; for (int j = block.col_start; j < block.col_start + block.col_size; ++j) { T val = smat_[i + j * ld_]; - out[addr(i, j) - addr(block.row_start, block.col_start)] = val; + out[buf_idx * BaseType::blockColSize() + (j - block.col_start)] = val; } // zero fill // Please note that we zero fill, not zero_pt fill, because for @@ -99,10 +101,8 @@ void PackAMatrix<T, accT>::pack(const block_type_t& block) { // // and requantization with numElements(A) = 3 will produce the same // answer (-4.8). - for (int j = block.col_start + block.col_size; - j < block_p.col_start + block_p.col_size; - ++j) { - out[addr(i, j) - addr(block.row_start, block.col_start)] = 0; + for (int j = block.col_size; j < block_p.col_size; ++j) { + out[buf_idx * BaseType::blockColSize() + j] = 0; } } } else { diff --git a/src/PackAWithRowOffset.cc b/src/PackAWithRowOffset.cc index 4882bb5..cb0dcaa 100644 --- a/src/PackAWithRowOffset.cc +++ b/src/PackAWithRowOffset.cc @@ -91,22 +91,18 @@ void PackAWithRowOffset<T, accT>::pack(const block_type_t& block) { int32_t* row_offset_buf = getRowOffsetBuffer(); if (tr) { for (int i = block.row_start; i < block.row_start + block.row_size; ++i) { - int32_t row_sum = - row_offset_acc ? row_offset_buf[i - block.row_start] : 0; + int buf_idx = i - block.row_start; + int32_t row_sum = row_offset_acc ? row_offset_buf[buf_idx] : 0; for (int j = block.col_start; j < block.col_start + block.col_size; ++j) { - T val = smat_[i + ld_ * j]; + T val = smat_[i + j * ld_]; row_sum += val; - out[(i - block.row_start) * BaseType::blockColSize() + - (j - block.col_start)] = val; + out[buf_idx * BaseType::blockColSize() + (j - block.col_start)] = val; } - row_offset_buf[i - block.row_start] = row_sum; + row_offset_buf[buf_idx] = row_sum; // zero fill // Please see the comment in PackAMatrix.cc on zero vs zero_pt fill. - for (int j = block.col_start + block.col_size; - j < block_p.col_start + block_p.col_size; - ++j) { - out[(i - block.row_start) * BaseType::blockColSize() + - (j - block.col_start)] = 0; + for (int j = block.col_size; j < block_p.col_size; ++j) { + out[buf_idx * BaseType::blockColSize() + j] = 0; } } } else { @@ -124,10 +120,9 @@ void PackAWithRowOffset<T, accT>::pack(const block_type_t& block) { for (int j = block.col_size; j < block_p.col_size; ++j) { out[buf_idx * BaseType::blockColSize() + j] = 0; } - int32_t row_sum = - row_offset_acc ? row_offset_buf[i - block.row_start] : 0; + int32_t row_sum = row_offset_acc ? row_offset_buf[buf_idx] : 0; row_sum += reduceAvx2(smat_ + i * ld_ + block.col_start, block.col_size); - row_offset_buf[i - block.row_start] = row_sum; + row_offset_buf[buf_idx] = row_sum; } } } diff --git a/test/PackedRequantizeTest.cc b/test/PackedRequantizeTest.cc index ab3e1a7..fd827b0 100644 --- a/test/PackedRequantizeTest.cc +++ b/test/PackedRequantizeTest.cc @@ -45,7 +45,7 @@ INSTANTIATE_TEST_CASE_P( InstantiationName, fbgemmu8s8acc32WithQuantGranularityTest, ::testing::Combine( - ::testing::Values(matrix_op_t::NoTranspose), + ::testing::ValuesIn(transposeVals), ::testing::ValuesIn(transposeVals), ::testing::Bool(), ::testing::ValuesIn(qGranularityVals))); @@ -54,7 +54,7 @@ INSTANTIATE_TEST_CASE_P( InstantiationName, fbgemmu8s8acc32Test, ::testing::Combine( - ::testing::Values(matrix_op_t::NoTranspose), + ::testing::ValuesIn(transposeVals), ::testing::ValuesIn(transposeVals), ::testing::Bool())); @@ -167,9 +167,6 @@ TEST_P(fbgemmu8s8acc32WithQuantGranularityTest, Test) { // as lda. int n_adjusted = n; if (test_ld) { - assert( - atrans == matrix_op_t::NoTranspose && - "This case is not handled yet"); if (btrans == matrix_op_t::NoTranspose) { n_adjusted = std::max(n / 2, 1); } @@ -239,6 +236,12 @@ TEST_P(fbgemmu8s8acc32WithQuantGranularityTest, Test) { ncols_per_quant_group); } + if (atrans == matrix_op_t::Transpose) { + aligned_vector<uint8_t> Aint8_temp(Aint8.size()); + transpose_matrix(m, k, Aint8.data(), k, Aint8_temp.data(), m); + Aint8 = Aint8_temp; + } + PackBMatrix<int8_t> packedBN( btrans, k, @@ -256,14 +259,13 @@ TEST_P(fbgemmu8s8acc32WithQuantGranularityTest, Test) { PackAWithRowOffset<uint8_t>::rowOffsetBufferSize()); PackAWithRowOffset<uint8_t> packAN( - matrix_op_t::NoTranspose, + atrans, m, k, Aint8.data(), - k, + (atrans == matrix_op_t::Transpose) ? m : k, nullptr, - groups, - row_offset_buf.data()); + groups); int num_threads = fbgemm_get_num_threads(); int tid = fbgemm_get_thread_num(); @@ -411,9 +413,6 @@ TEST_P(fbgemmu8s8acc32WithQuantGranularityTest, TestFloatInputOutput) { // as lda. int n_adjusted = n; if (test_ld) { - assert( - atrans == matrix_op_t::NoTranspose && - "This case is not handled yet"); if (btrans == matrix_op_t::NoTranspose) { n_adjusted = std::max(n / 2, 1); } @@ -479,6 +478,12 @@ TEST_P(fbgemmu8s8acc32WithQuantGranularityTest, TestFloatInputOutput) { Cfp32_ref.data() + g * n_adjusted); } + if (atrans == matrix_op_t::Transpose) { + aligned_vector<float> Afp32_temp(Afp32.size()); + transpose_matrix(m, k, Afp32.data(), k, Afp32_temp.data(), m); + Afp32 = Afp32_temp; + } + PackBMatrix<int8_t> packedBN( btrans, k, @@ -496,16 +501,18 @@ TEST_P(fbgemmu8s8acc32WithQuantGranularityTest, TestFloatInputOutput) { PackAWithQuantRowOffset<uint8_t>::rowOffsetBufferSize()); PackAWithQuantRowOffset<uint8_t> packAN( - matrix_op_t::NoTranspose, + atrans, m, k, Afp32.data(), - k, + (atrans == matrix_op_t::Transpose) ? m : k, nullptr, /*buffer for packed matrix*/ Aint8_scale, Aint8_zero_point, groups, - row_offset_buf.data()); + // This is just to test row_offset_buf = nullptr with at least + // one configuration. + groups == 3 ? nullptr : row_offset_buf.data()); int num_threads = fbgemm_get_num_threads(); int tid = fbgemm_get_thread_num(); @@ -650,14 +657,17 @@ TEST_P(fbgemmu8s8acc32Test, TestSymmetricQuantizedInputOutput) { // as lda. int n_adjusted = n; if (test_ld) { - assert( - atrans == matrix_op_t::NoTranspose && - "This case is not handled yet"); if (btrans == matrix_op_t::NoTranspose) { n_adjusted = std::max(n / 2, 1); } } + if (atrans == matrix_op_t::Transpose) { + aligned_vector<uint8_t> Aint8_temp(Aint8.size()); + transpose_matrix(m, k, Aint8.data(), k, Aint8_temp.data(), m); + Aint8 = Aint8_temp; + } + if (btrans == matrix_op_t::Transpose) { aligned_vector<int8_t> Bint8_temp(Bint8.size()); for (int g = 0; g < groups; ++g) { @@ -701,7 +711,13 @@ TEST_P(fbgemmu8s8acc32Test, TestSymmetricQuantizedInputOutput) { { // A zero point and row offset not required PackAMatrix<uint8_t> packAN( - matrix_op_t::NoTranspose, m, k, Aint8.data(), k, nullptr, groups); + atrans, + m, + k, + Aint8.data(), + (atrans == matrix_op_t::Transpose) ? m : k, + nullptr, + groups); DoNothing<int32_t, int32_t> doNothingObj{}; memCopy<> outputProcObj(doNothingObj); |