Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJongsoo Park <jongsoo@fb.com>2019-02-20 00:02:50 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2019-02-20 00:05:50 +0300
commita5f1f5308130f21b376f5368e87eb74b071493fa (patch)
tree85ca95f688352f16c00aafd522fcb0063cb58293
parent610378f2cfd9eba220c423e7ebbe62a1109477ff (diff)
increase test coverage (#78)
Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/78 Increase test coverage like transposing A Reviewed By: protonu Differential Revision: D14121297 fbshipit-source-id: a6e21442dc47e8cd725b795dbaf8614719f013fb
-rw-r--r--src/PackAMatrix.cc10
-rw-r--r--src/PackAWithRowOffset.cc23
-rw-r--r--test/PackedRequantizeTest.cc54
3 files changed, 49 insertions, 38 deletions
diff --git a/src/PackAMatrix.cc b/src/PackAMatrix.cc
index 9487974..8469a39 100644
--- a/src/PackAMatrix.cc
+++ b/src/PackAMatrix.cc
@@ -62,10 +62,12 @@ void PackAMatrix<T, accT>::pack(const block_type_t& block) {
bool tr = (trans_ == matrix_op_t::Transpose);
T* out = BaseType::getBuf();
if (tr) {
+ // TODO: should print warning because this path is not optimized yet
for (int i = block.row_start; i < block.row_start + block.row_size; ++i) {
+ int buf_idx = i - block.row_start;
for (int j = block.col_start; j < block.col_start + block.col_size; ++j) {
T val = smat_[i + j * ld_];
- out[addr(i, j) - addr(block.row_start, block.col_start)] = val;
+ out[buf_idx * BaseType::blockColSize() + (j - block.col_start)] = val;
}
// zero fill
// Please note that we zero fill, not zero_pt fill, because for
@@ -99,10 +101,8 @@ void PackAMatrix<T, accT>::pack(const block_type_t& block) {
//
// and requantization with numElements(A) = 3 will produce the same
// answer (-4.8).
- for (int j = block.col_start + block.col_size;
- j < block_p.col_start + block_p.col_size;
- ++j) {
- out[addr(i, j) - addr(block.row_start, block.col_start)] = 0;
+ for (int j = block.col_size; j < block_p.col_size; ++j) {
+ out[buf_idx * BaseType::blockColSize() + j] = 0;
}
}
} else {
diff --git a/src/PackAWithRowOffset.cc b/src/PackAWithRowOffset.cc
index 4882bb5..cb0dcaa 100644
--- a/src/PackAWithRowOffset.cc
+++ b/src/PackAWithRowOffset.cc
@@ -91,22 +91,18 @@ void PackAWithRowOffset<T, accT>::pack(const block_type_t& block) {
int32_t* row_offset_buf = getRowOffsetBuffer();
if (tr) {
for (int i = block.row_start; i < block.row_start + block.row_size; ++i) {
- int32_t row_sum =
- row_offset_acc ? row_offset_buf[i - block.row_start] : 0;
+ int buf_idx = i - block.row_start;
+ int32_t row_sum = row_offset_acc ? row_offset_buf[buf_idx] : 0;
for (int j = block.col_start; j < block.col_start + block.col_size; ++j) {
- T val = smat_[i + ld_ * j];
+ T val = smat_[i + j * ld_];
row_sum += val;
- out[(i - block.row_start) * BaseType::blockColSize() +
- (j - block.col_start)] = val;
+ out[buf_idx * BaseType::blockColSize() + (j - block.col_start)] = val;
}
- row_offset_buf[i - block.row_start] = row_sum;
+ row_offset_buf[buf_idx] = row_sum;
// zero fill
// Please see the comment in PackAMatrix.cc on zero vs zero_pt fill.
- for (int j = block.col_start + block.col_size;
- j < block_p.col_start + block_p.col_size;
- ++j) {
- out[(i - block.row_start) * BaseType::blockColSize() +
- (j - block.col_start)] = 0;
+ for (int j = block.col_size; j < block_p.col_size; ++j) {
+ out[buf_idx * BaseType::blockColSize() + j] = 0;
}
}
} else {
@@ -124,10 +120,9 @@ void PackAWithRowOffset<T, accT>::pack(const block_type_t& block) {
for (int j = block.col_size; j < block_p.col_size; ++j) {
out[buf_idx * BaseType::blockColSize() + j] = 0;
}
- int32_t row_sum =
- row_offset_acc ? row_offset_buf[i - block.row_start] : 0;
+ int32_t row_sum = row_offset_acc ? row_offset_buf[buf_idx] : 0;
row_sum += reduceAvx2(smat_ + i * ld_ + block.col_start, block.col_size);
- row_offset_buf[i - block.row_start] = row_sum;
+ row_offset_buf[buf_idx] = row_sum;
}
}
}
diff --git a/test/PackedRequantizeTest.cc b/test/PackedRequantizeTest.cc
index ab3e1a7..fd827b0 100644
--- a/test/PackedRequantizeTest.cc
+++ b/test/PackedRequantizeTest.cc
@@ -45,7 +45,7 @@ INSTANTIATE_TEST_CASE_P(
InstantiationName,
fbgemmu8s8acc32WithQuantGranularityTest,
::testing::Combine(
- ::testing::Values(matrix_op_t::NoTranspose),
+ ::testing::ValuesIn(transposeVals),
::testing::ValuesIn(transposeVals),
::testing::Bool(),
::testing::ValuesIn(qGranularityVals)));
@@ -54,7 +54,7 @@ INSTANTIATE_TEST_CASE_P(
InstantiationName,
fbgemmu8s8acc32Test,
::testing::Combine(
- ::testing::Values(matrix_op_t::NoTranspose),
+ ::testing::ValuesIn(transposeVals),
::testing::ValuesIn(transposeVals),
::testing::Bool()));
@@ -167,9 +167,6 @@ TEST_P(fbgemmu8s8acc32WithQuantGranularityTest, Test) {
// as lda.
int n_adjusted = n;
if (test_ld) {
- assert(
- atrans == matrix_op_t::NoTranspose &&
- "This case is not handled yet");
if (btrans == matrix_op_t::NoTranspose) {
n_adjusted = std::max(n / 2, 1);
}
@@ -239,6 +236,12 @@ TEST_P(fbgemmu8s8acc32WithQuantGranularityTest, Test) {
ncols_per_quant_group);
}
+ if (atrans == matrix_op_t::Transpose) {
+ aligned_vector<uint8_t> Aint8_temp(Aint8.size());
+ transpose_matrix(m, k, Aint8.data(), k, Aint8_temp.data(), m);
+ Aint8 = Aint8_temp;
+ }
+
PackBMatrix<int8_t> packedBN(
btrans,
k,
@@ -256,14 +259,13 @@ TEST_P(fbgemmu8s8acc32WithQuantGranularityTest, Test) {
PackAWithRowOffset<uint8_t>::rowOffsetBufferSize());
PackAWithRowOffset<uint8_t> packAN(
- matrix_op_t::NoTranspose,
+ atrans,
m,
k,
Aint8.data(),
- k,
+ (atrans == matrix_op_t::Transpose) ? m : k,
nullptr,
- groups,
- row_offset_buf.data());
+ groups);
int num_threads = fbgemm_get_num_threads();
int tid = fbgemm_get_thread_num();
@@ -411,9 +413,6 @@ TEST_P(fbgemmu8s8acc32WithQuantGranularityTest, TestFloatInputOutput) {
// as lda.
int n_adjusted = n;
if (test_ld) {
- assert(
- atrans == matrix_op_t::NoTranspose &&
- "This case is not handled yet");
if (btrans == matrix_op_t::NoTranspose) {
n_adjusted = std::max(n / 2, 1);
}
@@ -479,6 +478,12 @@ TEST_P(fbgemmu8s8acc32WithQuantGranularityTest, TestFloatInputOutput) {
Cfp32_ref.data() + g * n_adjusted);
}
+ if (atrans == matrix_op_t::Transpose) {
+ aligned_vector<float> Afp32_temp(Afp32.size());
+ transpose_matrix(m, k, Afp32.data(), k, Afp32_temp.data(), m);
+ Afp32 = Afp32_temp;
+ }
+
PackBMatrix<int8_t> packedBN(
btrans,
k,
@@ -496,16 +501,18 @@ TEST_P(fbgemmu8s8acc32WithQuantGranularityTest, TestFloatInputOutput) {
PackAWithQuantRowOffset<uint8_t>::rowOffsetBufferSize());
PackAWithQuantRowOffset<uint8_t> packAN(
- matrix_op_t::NoTranspose,
+ atrans,
m,
k,
Afp32.data(),
- k,
+ (atrans == matrix_op_t::Transpose) ? m : k,
nullptr, /*buffer for packed matrix*/
Aint8_scale,
Aint8_zero_point,
groups,
- row_offset_buf.data());
+ // This is just to test row_offset_buf = nullptr with at least
+ // one configuration.
+ groups == 3 ? nullptr : row_offset_buf.data());
int num_threads = fbgemm_get_num_threads();
int tid = fbgemm_get_thread_num();
@@ -650,14 +657,17 @@ TEST_P(fbgemmu8s8acc32Test, TestSymmetricQuantizedInputOutput) {
// as lda.
int n_adjusted = n;
if (test_ld) {
- assert(
- atrans == matrix_op_t::NoTranspose &&
- "This case is not handled yet");
if (btrans == matrix_op_t::NoTranspose) {
n_adjusted = std::max(n / 2, 1);
}
}
+ if (atrans == matrix_op_t::Transpose) {
+ aligned_vector<uint8_t> Aint8_temp(Aint8.size());
+ transpose_matrix(m, k, Aint8.data(), k, Aint8_temp.data(), m);
+ Aint8 = Aint8_temp;
+ }
+
if (btrans == matrix_op_t::Transpose) {
aligned_vector<int8_t> Bint8_temp(Bint8.size());
for (int g = 0; g < groups; ++g) {
@@ -701,7 +711,13 @@ TEST_P(fbgemmu8s8acc32Test, TestSymmetricQuantizedInputOutput) {
{
// A zero point and row offset not required
PackAMatrix<uint8_t> packAN(
- matrix_op_t::NoTranspose, m, k, Aint8.data(), k, nullptr, groups);
+ atrans,
+ m,
+ k,
+ Aint8.data(),
+ (atrans == matrix_op_t::Transpose) ? m : k,
+ nullptr,
+ groups);
DoNothing<int32_t, int32_t> doNothingObj{};
memCopy<> outputProcObj(doNothingObj);