Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJongsoo Park <jongsoo@fb.com>2019-02-20 19:50:37 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2019-02-20 19:59:51 +0300
commit9ebc998c2bf0da81ab7afa1478df395d79429cf8 (patch)
treec664731e03c05f239f28bc3eb6a8010b65696f93
parenta5f1f5308130f21b376f5368e87eb74b071493fa (diff)
optimize PackAWithIm2Col for symmetric b quant
Summary: Add additional option b_symmetric and skip row offset computation if it's true Reviewed By: jianyuh Differential Revision: D14119128 fbshipit-source-id: fa079347562b7f75727b3a1414e9bdda3f9c65dd
-rw-r--r--include/fbgemm/Fbgemm.h12
-rw-r--r--src/PackAWithIm2Col.cc80
-rw-r--r--test/Im2ColFusedRequantizeTest.cc67
3 files changed, 101 insertions, 58 deletions
diff --git a/include/fbgemm/Fbgemm.h b/include/fbgemm/Fbgemm.h
index 2ac10f7..4f3c92e 100644
--- a/include/fbgemm/Fbgemm.h
+++ b/include/fbgemm/Fbgemm.h
@@ -524,13 +524,15 @@ class FBGEMM_API PackAWithIm2Col
* buffer and owns it. Otherwise, this class doesn't own
* the buffer. The buffer will be populated when pack
* function is called.
+ * @params b_symmetric if true we skip row offset computation
*/
PackAWithIm2Col(
const conv_param_t<SPATIAL_DIM>& conv_param,
const T* sdata,
inpType* pmat = nullptr,
- std::int32_t zero_pt = 0,
- std::int32_t* row_offset = nullptr);
+ std::int32_t a_zero_pt = 0,
+ std::int32_t* row_offset = nullptr,
+ bool b_symmetric = false);
/**
* Activation matrices are not constant so cannot amortize the cost of
@@ -578,9 +580,9 @@ class FBGEMM_API PackAWithIm2Col
private:
const conv_param_t<SPATIAL_DIM> conv_p_;
const T* sdata_;
- std::int32_t zero_pt_;
- std::int32_t* row_offset_;
- bool rowOffsetAllocatedHere;
+ std::int32_t a_zero_pt_;
+ std::int32_t* row_offset_{nullptr};
+ bool rowOffsetAllocatedHere{false};
std::int32_t row_interleave_B_;
};
diff --git a/src/PackAWithIm2Col.cc b/src/PackAWithIm2Col.cc
index f096d7a..b9e310f 100644
--- a/src/PackAWithIm2Col.cc
+++ b/src/PackAWithIm2Col.cc
@@ -21,8 +21,9 @@ PackAWithIm2Col<T, accT, SPATIAL_DIM>::PackAWithIm2Col(
const conv_param_t<SPATIAL_DIM>& conv_p,
const T* sdata,
inpType* pmat,
- int32_t zero_pt,
- int32_t* row_offset)
+ int32_t a_zero_pt,
+ int32_t* row_offset,
+ bool b_symmetric)
: PackMatrix<PackAWithIm2Col<T, accT, SPATIAL_DIM>, T, accT>(
conv_p.MB *
std::accumulate(
@@ -40,7 +41,7 @@ PackAWithIm2Col<T, accT, SPATIAL_DIM>::PackAWithIm2Col(
conv_p.G),
conv_p_(conv_p),
sdata_(sdata),
- zero_pt_(zero_pt) {
+ a_zero_pt_(a_zero_pt) {
static_assert(
SPATIAL_DIM == 2 || SPATIAL_DIM == 3, "unsupported conv dimension ");
if (cpuinfo_has_x86_avx512f()) {
@@ -70,13 +71,15 @@ PackAWithIm2Col<T, accT, SPATIAL_DIM>::PackAWithIm2Col(
fbgemmAlignedAlloc(64, BaseType::brow_ * BaseType::bcol_ * sizeof(T)));
// aligned_alloc(64, BaseType::brow_ * BaseType::bcol_ * sizeof(T)));
}
- if (row_offset) {
- rowOffsetAllocatedHere = false;
- row_offset_ = row_offset;
- } else {
- rowOffsetAllocatedHere = true;
- row_offset_ = static_cast<int32_t*>(
- fbgemmAlignedAlloc(64, BaseType::brow_ * sizeof(int32_t)));
+ if (!b_symmetric) {
+ if (row_offset) {
+ rowOffsetAllocatedHere = false;
+ row_offset_ = row_offset;
+ } else {
+ rowOffsetAllocatedHere = true;
+ row_offset_ = static_cast<int32_t*>(
+ fbgemmAlignedAlloc(64, BaseType::brow_ * sizeof(int32_t)));
+ }
}
}
@@ -115,20 +118,35 @@ void PackAWithIm2Col<T, accT, SPATIAL_DIM>::pack(const block_type_t& block) {
"PackAWithIm2Col<T, accT>::pack only works for T == uint8_t");
if (point_wise) {
int32_t ld = this->numCols();
- for (int i = block.row_start; i < block.row_start + block.row_size; ++i) {
- int buf_idx = i - block.row_start;
- memcpy(
- out + buf_idx * BaseType::blockColSize(),
- sdata_ + i * ld + block.col_start,
- block.col_size * sizeof(T));
- // zero fill
- for (int j = block.col_size; j < block_p.col_size; ++j) {
- out[buf_idx * BaseType::blockColSize() + j] = 0;
+ if (row_offset_buf) {
+ for (int i = block.row_start; i < block.row_start + block.row_size; ++i) {
+ int buf_idx = i - block.row_start;
+ memcpy(
+ out + buf_idx * BaseType::blockColSize(),
+ sdata_ + i * ld + block.col_start,
+ block.col_size * sizeof(T));
+ // zero fill
+ for (int j = block.col_size; j < block_p.col_size; ++j) {
+ out[buf_idx * BaseType::blockColSize() + j] = 0;
+ }
+ int32_t row_sum =
+ row_offset_acc ? row_offset_buf[i - block.row_start] : 0;
+ row_sum +=
+ reduceAvx2(sdata_ + i * ld + block.col_start, block.col_size);
+ row_offset_buf[i - block.row_start] = row_sum;
+ }
+ } else {
+ for (int i = block.row_start; i < block.row_start + block.row_size; ++i) {
+ int buf_idx = i - block.row_start;
+ memcpy(
+ out + buf_idx * BaseType::blockColSize(),
+ sdata_ + i * ld + block.col_start,
+ block.col_size * sizeof(T));
+ // zero fill
+ for (int j = block.col_size; j < block_p.col_size; ++j) {
+ out[buf_idx * BaseType::blockColSize() + j] = 0;
+ }
}
- int32_t row_sum =
- row_offset_acc ? row_offset_buf[i - block.row_start] : 0;
- row_sum += reduceAvx2(sdata_ + i * ld + block.col_start, block.col_size);
- row_offset_buf[i - block.row_start] = row_sum;
}
return;
@@ -168,7 +186,7 @@ void PackAWithIm2Col<T, accT, SPATIAL_DIM>::pack(const block_type_t& block) {
std::memset(
out + (i - block.row_start) * BaseType::blockColSize() +
(j_blk_start - block.col_start),
- zero_pt_,
+ a_zero_pt_,
sizeof(T) * (j_blk_end - j_blk_start));
} else {
std::memcpy(
@@ -220,7 +238,7 @@ void PackAWithIm2Col<T, accT, SPATIAL_DIM>::pack(const block_type_t& block) {
&out
[(i - block.row_start) * BaseType::blockColSize() +
(j_blk_start - block.col_start)],
- zero_pt_,
+ a_zero_pt_,
sizeof(T) * (j_blk_end - j_blk_start));
} else {
std::memcpy(
@@ -252,11 +270,13 @@ void PackAWithIm2Col<T, accT, SPATIAL_DIM>::pack(const block_type_t& block) {
(block.col_start + block.col_size)));
}
- // TODO: skip row_offset computation when B_zero_point is 0
- int32_t row_sum = row_offset_acc ? row_offset_buf[i - block.row_start] : 0;
- row_sum += reduceAvx2(
- out + (i - block.row_start) * this->blockColSize(), block.col_size);
- row_offset_buf[i - block.row_start] = row_sum;
+ if (row_offset_buf) {
+ int32_t row_sum =
+ row_offset_acc ? row_offset_buf[i - block.row_start] : 0;
+ row_sum += reduceAvx2(
+ out + (i - block.row_start) * this->blockColSize(), block.col_size);
+ row_offset_buf[i - block.row_start] = row_sum;
+ }
} // for each i
}
diff --git a/test/Im2ColFusedRequantizeTest.cc b/test/Im2ColFusedRequantizeTest.cc
index feae002..1dfc756 100644
--- a/test/Im2ColFusedRequantizeTest.cc
+++ b/test/Im2ColFusedRequantizeTest.cc
@@ -30,13 +30,15 @@ vector<QuantizationGranularity> qGranularityVals{
namespace {
class fbgemmIm2colTest
- : public testing::TestWithParam<QuantizationGranularity> {};
+ : public testing::TestWithParam<tuple<QuantizationGranularity, bool>> {};
}; // namespace
INSTANTIATE_TEST_CASE_P(
InstantiationName,
fbgemmIm2colTest,
- ::testing::ValuesIn(qGranularityVals));
+ ::testing::Combine(
+ ::testing::ValuesIn(qGranularityVals),
+ ::testing::Bool()));
// From Faster-RCNN with ShuffleNet
static vector<conv_param_t<>> shapes = {
@@ -52,7 +54,7 @@ static vector<conv_param_t<>> shapes = {
};
template <typename ACC_T, QuantizationGranularity Q_GRAN>
-static void Im2colTest() {
+static void Im2colTest(bool b_symmetric) {
for (auto conv_p : shapes) {
for (int groups : {1, 4}) {
if (conv_p.IC % groups != 0 || conv_p.OC % groups != 0) {
@@ -89,6 +91,9 @@ static void Im2colTest() {
randFill<int8_t>(Bint8, -4, 4);
randFill(Bint8_zero_point, -3, -1);
}
+ if (b_symmetric) {
+ randFill(Bint8_zero_point, 0, 0);
+ }
aligned_vector<float> C_multiplier(Bint8_zero_point.size());
randFill(C_multiplier, 0.001234f / 2, 0.001234f * 3 / 2);
@@ -169,7 +174,8 @@ static void Im2colTest() {
Aint8.data(),
nullptr,
Aint8_zero_point,
- row_offset_buf.data());
+ row_offset_buf.data(),
+ b_symmetric);
DoNothing<> doNothingObj{};
ReQuantizeOutput<false, Q_GRAN> outputProcObj(
@@ -223,24 +229,28 @@ static void Im2colTest() {
}
TEST_P(fbgemmIm2colTest, Acc32Test) {
- QuantizationGranularity q_granularity = GetParam();
+ QuantizationGranularity q_granularity;
+ bool b_symmetric;
+ tie(q_granularity, b_symmetric) = GetParam();
if (q_granularity == QuantizationGranularity::TENSOR) {
- Im2colTest<int32_t, QuantizationGranularity::TENSOR>();
+ Im2colTest<int32_t, QuantizationGranularity::TENSOR>(b_symmetric);
} else if (q_granularity == QuantizationGranularity::GROUP) {
- Im2colTest<int32_t, QuantizationGranularity::GROUP>();
+ Im2colTest<int32_t, QuantizationGranularity::GROUP>(b_symmetric);
} else {
- Im2colTest<int32_t, QuantizationGranularity::OUT_CHANNEL>();
+ Im2colTest<int32_t, QuantizationGranularity::OUT_CHANNEL>(b_symmetric);
}
}
TEST_P(fbgemmIm2colTest, Acc16Test) {
- QuantizationGranularity q_granularity = GetParam();
+ QuantizationGranularity q_granularity;
+ bool b_symmetric;
+ tie(q_granularity, b_symmetric) = GetParam();
if (q_granularity == QuantizationGranularity::TENSOR) {
- Im2colTest<int16_t, QuantizationGranularity::TENSOR>();
+ Im2colTest<int16_t, QuantizationGranularity::TENSOR>(b_symmetric);
} else if (q_granularity == QuantizationGranularity::GROUP) {
- Im2colTest<int16_t, QuantizationGranularity::GROUP>();
+ Im2colTest<int16_t, QuantizationGranularity::GROUP>(b_symmetric);
} else {
- Im2colTest<int16_t, QuantizationGranularity::OUT_CHANNEL>();
+ Im2colTest<int16_t, QuantizationGranularity::OUT_CHANNEL>(b_symmetric);
}
}
@@ -453,7 +463,10 @@ void SConvTest() {
}
TEST_P(fbgemmIm2colTest, SConvTest) {
- QuantizationGranularity q_granularity = GetParam();
+ QuantizationGranularity q_granularity;
+ bool b_symmetric;
+ tie(q_granularity, b_symmetric) = GetParam();
+ // b_symmetric ignored for now
if (q_granularity == QuantizationGranularity::TENSOR) {
SConvTest<QuantizationGranularity::TENSOR>();
} else if (q_granularity == QuantizationGranularity::GROUP) {
@@ -539,7 +552,7 @@ static vector<conv_param_t<3>> shapes_3d = {
};
template <typename ACC_T, QuantizationGranularity Q_GRAN>
-static void Im2col3DTest() {
+static void Im2col3DTest(bool b_symmetric) {
for (auto conv_p : shapes_3d) {
for (int groups : {1, 4}) {
if (conv_p.IC % groups != 0 || conv_p.OC % groups != 0) {
@@ -578,6 +591,9 @@ static void Im2col3DTest() {
randFill<int8_t>(Bint8, -4, 4);
randFill(Bint8_zero_point, -3, -1);
}
+ if (b_symmetric) {
+ randFill(Bint8_zero_point, 0, 0);
+ }
aligned_vector<float> C_multiplier(Bint8_zero_point.size());
randFill(C_multiplier, 0.001234f / 2, 0.001234f * 3 / 2);
@@ -659,7 +675,8 @@ static void Im2col3DTest() {
Aint8.data(),
nullptr,
Aint8_zero_point,
- row_offset_buf.data());
+ row_offset_buf.data(),
+ b_symmetric);
DoNothing<> doNothingObj{};
ReQuantizeOutput<false, Q_GRAN> outputProcObj(
@@ -719,23 +736,27 @@ static void Im2col3DTest() {
}
TEST_P(fbgemmIm2colTest, 3DAcc32Test) {
- QuantizationGranularity q_granularity = GetParam();
+ QuantizationGranularity q_granularity;
+ bool b_symmetric;
+ tie(q_granularity, b_symmetric) = GetParam();
if (q_granularity == QuantizationGranularity::TENSOR) {
- Im2col3DTest<int32_t, QuantizationGranularity::TENSOR>();
+ Im2col3DTest<int32_t, QuantizationGranularity::TENSOR>(b_symmetric);
} else if (q_granularity == QuantizationGranularity::GROUP) {
- Im2col3DTest<int32_t, QuantizationGranularity::GROUP>();
+ Im2col3DTest<int32_t, QuantizationGranularity::GROUP>(b_symmetric);
} else {
- Im2col3DTest<int32_t, QuantizationGranularity::OUT_CHANNEL>();
+ Im2col3DTest<int32_t, QuantizationGranularity::OUT_CHANNEL>(b_symmetric);
}
}
TEST_P(fbgemmIm2colTest, 3DAcc16Test) {
- QuantizationGranularity q_granularity = GetParam();
+ QuantizationGranularity q_granularity;
+ bool b_symmetric;
+ tie(q_granularity, b_symmetric) = GetParam();
if (q_granularity == QuantizationGranularity::TENSOR) {
- Im2col3DTest<int16_t, QuantizationGranularity::TENSOR>();
+ Im2col3DTest<int16_t, QuantizationGranularity::TENSOR>(b_symmetric);
} else if (q_granularity == QuantizationGranularity::GROUP) {
- Im2col3DTest<int16_t, QuantizationGranularity::GROUP>();
+ Im2col3DTest<int16_t, QuantizationGranularity::GROUP>(b_symmetric);
} else {
- Im2col3DTest<int16_t, QuantizationGranularity::OUT_CHANNEL>();
+ Im2col3DTest<int16_t, QuantizationGranularity::OUT_CHANNEL>(b_symmetric);
}
}