Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaya Khudia <dskhudia@fb.com>2019-09-11 21:47:58 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2019-09-11 21:52:07 +0300
commitea787e8278744ab4c7d6c4ee42a050bb1c76ef88 (patch)
tree3846cfc169c2333cfa23f519f88565b0e0b40416
parent637288bff9972c02e72341d6a60fdf9bab1dce7e (diff)
fbgemmPacked and fbgemmConv apis with float bias + tests
Summary: fbgemmPacked and fbgemmConv api changes to take float bias. Reviewed By: jianyuh Differential Revision: D17244262 fbshipit-source-id: 0531c829190d20e31cb957a3f1861d4a65645cee
-rw-r--r--src/ExecuteKernelU8S8.cc47
-rw-r--r--src/Fbgemm.cc58
-rw-r--r--src/FbgemmConv.cc22
-rw-r--r--src/GroupwiseConvAcc32Avx2.cc13
-rw-r--r--test/RequantizeOnlyTest.cc169
-rw-r--r--test/UniConvTest.cc266
6 files changed, 453 insertions, 122 deletions
diff --git a/src/ExecuteKernelU8S8.cc b/src/ExecuteKernelU8S8.cc
index 0a4ff55..4ae1b50 100644
--- a/src/ExecuteKernelU8S8.cc
+++ b/src/ExecuteKernelU8S8.cc
@@ -315,19 +315,23 @@ void ExecuteKernel<
////////////////////////////////////////////////////////////////////////////////
// ReQuantizeOutput
-#define INSTANTIATE_REQUANT_BASE(PACK_A, ACC_T, RELU, Q_GRAN) \
- template class ExecuteKernel< \
- PACK_A<uint8_t, ACC_T>, \
- PackBMatrix<int8_t, ACC_T>, \
- uint8_t, \
- ReQuantizeOutput<RELU, Q_GRAN>>;
+#define INSTANTIATE_REQUANT_BASE(PACK_A, ACC_T, RELU, Q_GRAN, BIAS_TYPE) \
+ template class ExecuteKernel< \
+ PACK_A<uint8_t, ACC_T>, \
+ PackBMatrix<int8_t, ACC_T>, \
+ uint8_t, \
+ ReQuantizeOutput<RELU, Q_GRAN, BIAS_TYPE>>;
+
+#define INSTANTIATE_REQUANT_BIAS_T(PACK_A, ACC_T, RELU, Q_GRAN) \
+ INSTANTIATE_REQUANT_BASE(PACK_A, ACC_T, RELU, Q_GRAN, float); \
+ INSTANTIATE_REQUANT_BASE(PACK_A, ACC_T, RELU, Q_GRAN, int32_t);
#define INSTANTIATE_REQUANT_Q_GRANS(PACK_A, ACC_T, RELU) \
- INSTANTIATE_REQUANT_BASE( \
+ INSTANTIATE_REQUANT_BIAS_T( \
PACK_A, ACC_T, RELU, QuantizationGranularity::TENSOR); \
- INSTANTIATE_REQUANT_BASE( \
+ INSTANTIATE_REQUANT_BIAS_T( \
PACK_A, ACC_T, RELU, QuantizationGranularity::GROUP); \
- INSTANTIATE_REQUANT_BASE( \
+ INSTANTIATE_REQUANT_BIAS_T( \
PACK_A, ACC_T, RELU, QuantizationGranularity::OUT_CHANNEL);
#define INSTANTIATE_REQUANT_RELU(PACK_A, ACC_T) \
@@ -344,21 +348,27 @@ INSTANTIATE_REQUANT_ACC_T(PackAWithRowOffset);
#undef INSTANTIATE_REQUANT_ACC_T
#undef INSTANTIATE_REQUANT_RELU
#undef INSTANTIATE_REQUANT_Q_GRANS
+#undef INSTANTIATE_REQUANT_BIAS_T
#undef INSTANTIATE_REQUANT_BASE
-#define INSTANTIATE_IM2COL_REQUANT_BASE(ACC_T, RELU, SPATIAL_DIM, Q_GRAN) \
- template class ExecuteKernel< \
- PackAWithIm2Col<uint8_t, ACC_T, SPATIAL_DIM>, \
- PackBMatrix<int8_t, ACC_T>, \
- uint8_t, \
- ReQuantizeOutput<RELU, Q_GRAN>>;
+#define INSTANTIATE_IM2COL_REQUANT_BASE( \
+ ACC_T, RELU, SPATIAL_DIM, Q_GRAN, BIAS_TYPE) \
+ template class ExecuteKernel< \
+ PackAWithIm2Col<uint8_t, ACC_T, SPATIAL_DIM>, \
+ PackBMatrix<int8_t, ACC_T>, \
+ uint8_t, \
+ ReQuantizeOutput<RELU, Q_GRAN, BIAS_TYPE>>;
+
+#define INSTANTIATE_IM2COL_REQUANT_BIAS_T(ACC_T, RELU, SPATIAL_DIM, Q_GRAN) \
+ INSTANTIATE_IM2COL_REQUANT_BASE(ACC_T, RELU, SPATIAL_DIM, Q_GRAN, float); \
+ INSTANTIATE_IM2COL_REQUANT_BASE(ACC_T, RELU, SPATIAL_DIM, Q_GRAN, int32_t);
#define INSTANTIATE_IM2COL_REQUANT_Q_GRANS(ACC_T, RELU, SPATIAL_DIM) \
- INSTANTIATE_IM2COL_REQUANT_BASE( \
+ INSTANTIATE_IM2COL_REQUANT_BIAS_T( \
ACC_T, RELU, SPATIAL_DIM, QuantizationGranularity::TENSOR); \
- INSTANTIATE_IM2COL_REQUANT_BASE( \
+ INSTANTIATE_IM2COL_REQUANT_BIAS_T( \
ACC_T, RELU, SPATIAL_DIM, QuantizationGranularity::GROUP); \
- INSTANTIATE_IM2COL_REQUANT_BASE( \
+ INSTANTIATE_IM2COL_REQUANT_BIAS_T( \
ACC_T, RELU, SPATIAL_DIM, QuantizationGranularity::OUT_CHANNEL);
#define INSTANTIATE_IM2COL_REQUANT_SPATIAL_DIM(ACC_T, RELU) \
@@ -375,6 +385,7 @@ INSTANTIATE_IM2COL_REQUANT_RELU(int16_t);
#undef INSTANTIATE_IM2COL_REQUANT_RELU
#undef INSTANTIATE_IM2COL_REQUANT_SPATIAL_DIM
#undef INSTANTIATE_IM2COL_REQUANT_Q_GRANS
+#undef INSTANTIATE_IM2COL_REQUANT_BIAS_T
#undef INSTANTIATE_IM2COL_REQUANT_BASE
////////////////////////////////////////////////////////////////////////////////
diff --git a/src/Fbgemm.cc b/src/Fbgemm.cc
index 4f7026f..ade851f 100644
--- a/src/Fbgemm.cc
+++ b/src/Fbgemm.cc
@@ -237,22 +237,26 @@ bool fbgemmSupportedCPU() {
////////////////////////////////////////////////////////////////////////////////
// ReQuantizeOutput
-#define INSTANTIATE_BASE(PACK_A, ACC_T, RELU, Q_GRAN) \
+#define INSTANTIATE_BASE(PACK_A, ACC_T, RELU, Q_GRAN, BIAS_TYPE) \
template void fbgemmPacked( \
PackMatrix<PACK_A<uint8_t, ACC_T>, uint8_t, ACC_T>& packA, \
PackMatrix<PackBMatrix<int8_t, ACC_T>, int8_t, ACC_T>& packB, \
uint8_t* C, \
int32_t* C_buffer, \
uint32_t ldc, \
- const ReQuantizeOutput<RELU, Q_GRAN>& outProcess, \
+ const ReQuantizeOutput<RELU, Q_GRAN, BIAS_TYPE>& outProcess, \
int thread_id, \
int num_threads, \
const BlockingFactors* blocking_params);
-#define INSTANTIATE_Q_GRANS(PACK_A, ACC_T, RELU) \
- INSTANTIATE_BASE(PACK_A, ACC_T, RELU, QuantizationGranularity::TENSOR); \
- INSTANTIATE_BASE(PACK_A, ACC_T, RELU, QuantizationGranularity::GROUP); \
- INSTANTIATE_BASE(PACK_A, ACC_T, RELU, QuantizationGranularity::OUT_CHANNEL);
+#define INSTANTIATE_BIAS_T(PACK_A, ACC_T, RELU, Q_GRAN) \
+ INSTANTIATE_BASE(PACK_A, ACC_T, RELU, Q_GRAN, float); \
+ INSTANTIATE_BASE(PACK_A, ACC_T, RELU, Q_GRAN, int32_t);
+
+#define INSTANTIATE_Q_GRANS(PACK_A, ACC_T, RELU) \
+ INSTANTIATE_BIAS_T(PACK_A, ACC_T, RELU, QuantizationGranularity::TENSOR); \
+ INSTANTIATE_BIAS_T(PACK_A, ACC_T, RELU, QuantizationGranularity::GROUP); \
+ INSTANTIATE_BIAS_T(PACK_A, ACC_T, RELU, QuantizationGranularity::OUT_CHANNEL);
#define INSTANTIATE_RELU(PACK_A, ACC_T) \
INSTANTIATE_Q_GRANS(PACK_A, ACC_T, false); \
@@ -268,27 +272,34 @@ INSTANTIATE_ACC_T(PackAWithRowOffset);
#undef INSTANTIATE_ACC_T
#undef INSTANTIATE_RELU
#undef INSTANTIATE_Q_GRANS
+#undef INSTANTIATE_BIAS_T
#undef INSTANTIATE_BASE
-#define INSTANTIATE_BASE(ACC_T, RELU, SPATIAL_DIM, Q_GRAN) \
- template void fbgemmPacked( \
- PackMatrix< \
- PackAWithIm2Col<uint8_t, ACC_T, SPATIAL_DIM>, \
- uint8_t, \
- ACC_T>& packA, \
- PackMatrix<PackBMatrix<int8_t, ACC_T>, int8_t, ACC_T>& packB, \
- uint8_t* C, \
- int32_t* C_buffer, \
- uint32_t ldc, \
- const ReQuantizeOutput<RELU, Q_GRAN>& outProcess, \
- int thread_id, \
- int num_threads, \
+#define INSTANTIATE_BASE(ACC_T, RELU, SPATIAL_DIM, Q_GRAN, BIAS_TYPE) \
+ template void fbgemmPacked( \
+ PackMatrix< \
+ PackAWithIm2Col<uint8_t, ACC_T, SPATIAL_DIM>, \
+ uint8_t, \
+ ACC_T>& packA, \
+ PackMatrix<PackBMatrix<int8_t, ACC_T>, int8_t, ACC_T>& packB, \
+ uint8_t* C, \
+ int32_t* C_buffer, \
+ uint32_t ldc, \
+ const ReQuantizeOutput<RELU, Q_GRAN, BIAS_TYPE>& outProcess, \
+ int thread_id, \
+ int num_threads, \
const BlockingFactors* blocking_params);
-#define INSTANTIATE_Q_GRANS(ACC_T, RELU, SPATIAL_DIM) \
- INSTANTIATE_BASE(ACC_T, RELU, SPATIAL_DIM, QuantizationGranularity::TENSOR); \
- INSTANTIATE_BASE(ACC_T, RELU, SPATIAL_DIM, QuantizationGranularity::GROUP); \
- INSTANTIATE_BASE( \
+#define INSTANTIATE_BIAS_T(ACC_T, RELU, SPATIAL_DIM, Q_GRAN) \
+ INSTANTIATE_BASE(ACC_T, RELU, SPATIAL_DIM, Q_GRAN, float); \
+ INSTANTIATE_BASE(ACC_T, RELU, SPATIAL_DIM, Q_GRAN, int32_t);
+
+#define INSTANTIATE_Q_GRANS(ACC_T, RELU, SPATIAL_DIM) \
+ INSTANTIATE_BIAS_T( \
+ ACC_T, RELU, SPATIAL_DIM, QuantizationGranularity::TENSOR); \
+ INSTANTIATE_BIAS_T( \
+ ACC_T, RELU, SPATIAL_DIM, QuantizationGranularity::GROUP); \
+ INSTANTIATE_BIAS_T( \
ACC_T, RELU, SPATIAL_DIM, QuantizationGranularity::OUT_CHANNEL);
#define INSTANTIATE_SPATIAL_DIM(ACC_T, RELU) \
@@ -305,6 +316,7 @@ INSTANTIATE_RELU(int16_t);
#undef INSTANTIATE_RELU
#undef INSTANTIATE_SPATIAL_DIM
#undef INSTANTIATE_Q_GRANS
+#undef INSTANTIATE_BIAS_T
#undef INSTANTIATE_BASE
////////////////////////////////////////////////////////////////////////////////
diff --git a/src/FbgemmConv.cc b/src/FbgemmConv.cc
index 6a1e55b..5a486c0 100644
--- a/src/FbgemmConv.cc
+++ b/src/FbgemmConv.cc
@@ -89,6 +89,7 @@ int fbgemmConv(
// std::cout << "Depthwise fast path" << std::endl;
const std::int32_t* B_zero_point = outProcess.getBZeroPoint();
const float* C_multiplier = outProcess.getCMultiplier();
+ const float* act_times_w_scale = outProcess.getActWScale();
if (SPATIAL_DIM == 3) {
static_assert(
std::is_same<typename processOutputType::outType, std::uint8_t>::
@@ -115,7 +116,7 @@ int fbgemmConv(
outProcess.getColOffsets(),
outProcess.getBias(),
outProcess.RELU_FUSED, // fuse_relu
- 1.0f, // act_scale * weight_scale
+ act_times_w_scale ? act_times_w_scale[0] : 1.0f,
thread_id,
num_threads);
} else if (
@@ -141,7 +142,7 @@ int fbgemmConv(
outProcess.getColOffsets(),
outProcess.getBias(),
outProcess.RELU_FUSED, // fuse_relu
- nullptr, // act_scale * weight_scale
+ outProcess.getActWScale(), // act_scale * weight_scale
thread_id,
num_threads);
} else {
@@ -169,7 +170,7 @@ int fbgemmConv(
outProcess.getColOffsets(),
outProcess.getBias(),
outProcess.RELU_FUSED, // fuse_relu
- 1.0f, // act_scale * weight_scale
+ act_times_w_scale ? act_times_w_scale[0] : 1.0f,
thread_id,
num_threads);
} else if (
@@ -194,7 +195,7 @@ int fbgemmConv(
outProcess.getColOffsets(),
outProcess.getBias(),
outProcess.RELU_FUSED, // fuse_relu
- nullptr, // act_scale * weight_scale
+ outProcess.getActWScale(), // act_scale * weight_scale
thread_id,
num_threads);
} else {
@@ -316,21 +317,25 @@ int fbgemmConv(
return 0;
}
-#define INSTANTIATE_BASE(ACC_T, Q_GRAN, RELU, SPATIAL_DIM) \
+#define INSTANTIATE_BASE(ACC_T, Q_GRAN, RELU, SPATIAL_DIM, BIAS_TYPE) \
template int fbgemmConv( \
const conv_param_t<SPATIAL_DIM>& conv_p, \
const std::uint8_t* activations, \
PackWeightsForConv<SPATIAL_DIM, std::int8_t, ACC_T>& packed_weights, \
std::uint8_t* out, \
std::int32_t* outBuffer, \
- ReQuantizeOutput<RELU, Q_GRAN>& outProcess, \
+ ReQuantizeOutput<RELU, Q_GRAN, BIAS_TYPE>& outProcess, \
int thread_id, \
int num_threads, \
const BlockingFactors* blocking_params);
+#define INSTANTIATE_BIAS_T(ACC_T, Q_GRAN, RELU, SPATIAL_DIM) \
+ INSTANTIATE_BASE(ACC_T, Q_GRAN, RELU, SPATIAL_DIM, float); \
+ INSTANTIATE_BASE(ACC_T, Q_GRAN, RELU, SPATIAL_DIM, int32_t);
+
#define INSTANTIATE_SPATIAL_DIM(ACC_T, Q_GRAN, RELU) \
- INSTANTIATE_BASE(ACC_T, Q_GRAN, RELU, 2); \
- INSTANTIATE_BASE(ACC_T, Q_GRAN, RELU, 3);
+ INSTANTIATE_BIAS_T(ACC_T, Q_GRAN, RELU, 2); \
+ INSTANTIATE_BIAS_T(ACC_T, Q_GRAN, RELU, 3);
#define INSTANTIATE_RELU(ACC_T, Q_GRAN) \
INSTANTIATE_SPATIAL_DIM(ACC_T, Q_GRAN, true); \
@@ -346,6 +351,7 @@ INSTANTIATE_Q_GRANS(std::int32_t);
#undef INSTANTIATE_Q_GRANS
#undef INSTANTIATE_RELU
#undef INSTANTIATE_SPATIAL_DIM
+#undef INSTANTIATE_BIAS_T
#undef INSTANTIATE_BASE
template bool takeDepthWiseFastPath<2, std::int32_t>(
diff --git a/src/GroupwiseConvAcc32Avx2.cc b/src/GroupwiseConvAcc32Avx2.cc
index 40f3fba..4ba3549 100644
--- a/src/GroupwiseConvAcc32Avx2.cc
+++ b/src/GroupwiseConvAcc32Avx2.cc
@@ -2204,7 +2204,7 @@ int rowOffsetBufferSizeGConv(const conv_param_t<SPATIAL_DIM>& conv_param) {
template int rowOffsetBufferSizeGConv<2>(const conv_param_t<2>& conv_param);
template int rowOffsetBufferSizeGConv<3>(const conv_param_t<3>& conv_param);
-#define INSTANTIATE_BASE(RELU, Q_GRAN, SPATIAL_DIM) \
+#define INSTANTIATE_BASE(RELU, Q_GRAN, SPATIAL_DIM, BIAS_TYPE) \
template void fbgemmGroupwiseConv( \
const conv_param_t<SPATIAL_DIM>& conv_param, \
const uint8_t* activations, \
@@ -2213,13 +2213,17 @@ template int rowOffsetBufferSizeGConv<3>(const conv_param_t<3>& conv_param);
PackWeightMatrixForGConv<int8_t, int32_t, SPATIAL_DIM>& packed_weights, \
uint8_t* out, \
int32_t* outBuffer, \
- const ReQuantizeOutput<RELU, Q_GRAN>& outProcess, \
+ const ReQuantizeOutput<RELU, Q_GRAN, BIAS_TYPE>& outProcess, \
int thread_id, \
int num_threads);
+#define INSTANTIATE_BIAS_T(RELU, Q_GRAN, SPATIAL_DIM) \
+ INSTANTIATE_BASE(RELU, Q_GRAN, SPATIAL_DIM, float); \
+ INSTANTIATE_BASE(RELU, Q_GRAN, SPATIAL_DIM, int32_t);
+
#define INSTANTIATE_SPATIAL_DIM(RELU, Q_GRAN) \
- INSTANTIATE_BASE(RELU, Q_GRAN, 2); \
- INSTANTIATE_BASE(RELU, Q_GRAN, 3);
+ INSTANTIATE_BIAS_T(RELU, Q_GRAN, 2); \
+ INSTANTIATE_BIAS_T(RELU, Q_GRAN, 3);
#define INSTANTIATE_Q_GRANS(RELU) \
INSTANTIATE_SPATIAL_DIM(RELU, QuantizationGranularity::TENSOR); \
@@ -2231,6 +2235,7 @@ INSTANTIATE_Q_GRANS(true);
#undef INSTANTIATE_Q_GRANS
#undef INSTANTIATE_SPATIAL_DIM
+#undef INSTANTIATE_BIAS_T
#undef INSTANTIATE_BASE
template void fbgemmGroupwiseConv(
diff --git a/test/RequantizeOnlyTest.cc b/test/RequantizeOnlyTest.cc
new file mode 100644
index 0000000..2f73d49
--- /dev/null
+++ b/test/RequantizeOnlyTest.cc
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * All rights reserved.
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <algorithm>
+#include <functional>
+#include <iostream>
+#include <random>
+#include <stdexcept>
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include "TestUtils.h"
+#include "bench/BenchUtils.h"
+#include "fbgemm/Fbgemm.h"
+
+using namespace std;
+using namespace fbgemm;
+
+vector<QuantizationGranularity> qGranularityVals{
+ QuantizationGranularity::TENSOR,
+ QuantizationGranularity::OUT_CHANNEL};
+
+namespace {
+
+// tuple represents #rows, #cols, fuse_relu, quantization_granularity, bias_type
+class FloatRequantizeTest
+ : public testing::TestWithParam<
+ tuple<int, int, bool, QuantizationGranularity>> {};
+
+}; // namespace
+
+INSTANTIATE_TEST_CASE_P(
+ InstantiationName,
+ FloatRequantizeTest,
+ ::testing::Combine(
+ ::testing::ValuesIn({1, 2, 3, 4}), // number of rows
+ ::testing::ValuesIn(
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 20, 32}), // number of
+ // cols
+ ::testing::Bool(), // fuse relu
+ ::testing::ValuesIn(qGranularityVals))); // requantization granularity
+
+/**
+ * Test for float bias
+ */
+TEST_P(FloatRequantizeTest, floatBiasTest) {
+ int rows, cols;
+ bool fuse_relu;
+ QuantizationGranularity q_gran;
+ tie(rows, cols, fuse_relu, q_gran) = GetParam();
+
+ int numElements = rows * cols;
+
+ aligned_vector<float> act_times_w_scale(cols);
+ randFill<float>(act_times_w_scale, -8, 8);
+
+ float out_scale = 2.0f;
+
+ aligned_vector<float> C_multiplier(cols);
+ transform(
+ act_times_w_scale.begin(),
+ act_times_w_scale.end(),
+ C_multiplier.begin(),
+ [&out_scale](float i) { return i / out_scale; });
+
+ aligned_vector<int32_t> Bint8_zero_point(cols);
+ randFill<int32_t>(Bint8_zero_point, -8, 8);
+
+ aligned_vector<int32_t> row_offset_buf(rows);
+ randFill<int32_t>(row_offset_buf, -8, 8);
+
+ aligned_vector<int32_t> col_offsets(cols);
+ randFill<int32_t>(col_offsets, -8, 8);
+
+ // quantized bias
+ aligned_vector<int32_t> bias_q(cols);
+ randFill<int32_t>(bias_q, -8, 8);
+
+ // floating point bias
+ aligned_vector<float> bias_f(cols);
+ if (q_gran == QuantizationGranularity::TENSOR) {
+ transform(
+ bias_q.begin(),
+ bias_q.end(),
+ bias_f.begin(),
+ [&act_times_w_scale](float i) { return i * act_times_w_scale[0]; });
+ } else if (q_gran == QuantizationGranularity::OUT_CHANNEL) {
+ transform(
+ act_times_w_scale.begin(),
+ act_times_w_scale.end(),
+ bias_q.begin(),
+ bias_f.begin(),
+ multiplies<float>());
+
+ } else {
+ FAIL();
+ }
+
+ aligned_vector<int32_t> input(numElements);
+ randFill<int32_t>(input, -8, 8);
+
+ aligned_vector<uint8_t> output_q_bias(numElements);
+ aligned_vector<uint8_t> output_f_bias(numElements);
+
+ int32_t C_zero_point = 3;
+ int32_t Aint8_zero_point = 3;
+
+ block_type_t block{0, rows, 0, cols};
+
+ DoNothing<> doNothingObj{};
+
+#define TESTCODE(FUSE_RELU, Q_GRAN) \
+ ReQuantizeOutput<FUSE_RELU, Q_GRAN> reqObj_q( \
+ doNothingObj, \
+ C_multiplier.data(), \
+ C_zero_point, \
+ Aint8_zero_point, \
+ Bint8_zero_point.data(), \
+ row_offset_buf.data(), \
+ col_offsets.data(), \
+ bias_q.data(), \
+ cols); \
+ ReQuantizeOutput<FUSE_RELU, Q_GRAN, float> reqObj_f( \
+ doNothingObj, \
+ C_multiplier.data(), \
+ C_zero_point, \
+ Aint8_zero_point, \
+ Bint8_zero_point.data(), \
+ row_offset_buf.data(), \
+ col_offsets.data(), \
+ bias_f.data(), \
+ cols, \
+ 1, \
+ act_times_w_scale.data()); \
+ reqObj_q.f<inst_set_t::avx2>( \
+ output_q_bias.data(), input.data(), block, cols, cols); \
+ reqObj_f.f<inst_set_t::avx2>( \
+ output_f_bias.data(), input.data(), block, cols, cols);
+
+ if (fuse_relu) {
+ if (q_gran == QuantizationGranularity::TENSOR) {
+ TESTCODE(true, QuantizationGranularity::TENSOR)
+
+ } else if (q_gran == QuantizationGranularity::OUT_CHANNEL) {
+ TESTCODE(true, QuantizationGranularity::OUT_CHANNEL)
+
+ } else {
+ FAIL();
+ }
+
+ } else {
+ if (q_gran == QuantizationGranularity::TENSOR) {
+ TESTCODE(false, QuantizationGranularity::TENSOR)
+
+ } else if (q_gran == QuantizationGranularity::OUT_CHANNEL) {
+ TESTCODE(false, QuantizationGranularity::OUT_CHANNEL)
+
+ } else {
+ FAIL();
+ }
+ }
+#undef TESTCODE
+ ASSERT_EQ(output_q_bias, output_f_bias)
+ << "Requantization with quantized bias and float bias differs";
+}
diff --git a/test/UniConvTest.cc b/test/UniConvTest.cc
index 21bdcb5..83674ce 100644
--- a/test/UniConvTest.cc
+++ b/test/UniConvTest.cc
@@ -88,8 +88,11 @@ class uniConvTest
: public testing::TestWithParam<
tuple<int, int, int, int, int, int, int, int, int, int>> {};
-class UniConvQGranTest : public testing::TestWithParam<
- tuple<QuantizationGranularity, bool, bool>> {};
+// tuple represents QuantizationGranularity, A symmetric, B symmetric,
+// test_bias, test_float_bias
+class UniConvQGranTest
+ : public testing::TestWithParam<
+ tuple<QuantizationGranularity, bool, bool, bool, bool>> {};
}; // namespace
@@ -115,7 +118,9 @@ INSTANTIATE_TEST_CASE_P(
::testing::Combine(
::testing::ValuesIn(qGranularityVals),
::testing::Bool(), // A symmetric
- ::testing::Bool())); // B symmetric
+ ::testing::Bool(), // B symmetric
+ ::testing::Bool(), // test_bias
+ ::testing::Bool())); // test_float_bias
/**
* Test for conv packing
*/
@@ -403,7 +408,9 @@ TEST_P(UniConvQGranTest, requantizeTest) {
vector<conv_param_t<>> shapes(GetShapes_());
QuantizationGranularity q_granularity;
bool a_symmetric, b_symmetric;
- tie(q_granularity, a_symmetric, b_symmetric) = GetParam();
+ bool test_bias, test_float_bias;
+ tie(q_granularity, a_symmetric, b_symmetric, test_bias, test_float_bias) =
+ GetParam();
for (auto conv_p : shapes) {
int R = conv_p.K[0];
@@ -463,10 +470,51 @@ TEST_P(UniConvQGranTest, requantizeTest) {
vector<int32_t> row_offsets(MDim);
+ // activation_scale * weight_scale
+ aligned_vector<float> act_times_w_scale(Bint8_zero_point.size());
+ randFill(act_times_w_scale, 0.1234f / 2, 0.1234f * 3 / 2);
+
+ float out_scale = 2.0f;
aligned_vector<float> C_multiplier(Bint8_zero_point.size());
- randFill(C_multiplier, 0.1234f / 2, 0.1234f * 3 / 2);
+ transform(
+ act_times_w_scale.begin(),
+ act_times_w_scale.end(),
+ C_multiplier.begin(),
+ [&out_scale](float i) { return i / out_scale; });
+
int32_t C_zero_pt = 5;
+ // initialize bias
+ aligned_vector<int32_t> bias_int32(OC);
+ aligned_vector<float> bias_fp32(OC);
+ if (test_bias) {
+ randFill(bias_int32, -8, 8);
+ }
+
+ // floating point bias
+ if (test_float_bias) {
+ if (q_granularity == QuantizationGranularity::TENSOR) {
+ transform(
+ bias_int32.begin(),
+ bias_int32.end(),
+ bias_fp32.begin(),
+ [&act_times_w_scale](float i) { return i * act_times_w_scale[0]; });
+ } else if (q_granularity == QuantizationGranularity::GROUP) {
+ for (int g = 0; g < G; ++g) {
+ for (int c = 0; c < OC_per_G; ++c) {
+ bias_fp32[g * OC_per_G + c] = act_times_w_scale[g] *
+ static_cast<float>(bias_int32[g * OC_per_G + c]);
+ }
+ }
+ } else { // OUT_CHANNEL
+ transform(
+ act_times_w_scale.begin(),
+ act_times_w_scale.end(),
+ bias_int32.begin(),
+ bias_fp32.begin(),
+ multiplies<float>());
+ }
+ }
// reference implementation
// conv_ref expects weights to be in G (R S C/G) K/G
int8_t* rightBData = Bint8.data();
@@ -505,7 +553,7 @@ TEST_P(UniConvQGranTest, requantizeTest) {
Bint8_zero_point.data() + g * NDim / ncols_per_quant_group,
row_offsets.data(),
col_offsets.data() + g * NDim,
- nullptr,
+ test_bias ? bias_int32.data() + g * NDim : nullptr,
ncols_per_quant_group);
}
@@ -524,73 +572,153 @@ TEST_P(UniConvQGranTest, requantizeTest) {
int tid = fbgemm_get_thread_num();
if (q_granularity == QuantizationGranularity::TENSOR) {
- ReQuantizeOutput<false, QuantizationGranularity::TENSOR> reqObj(
- doNothingObj,
- C_multiplier.data(),
- C_zero_pt,
- Aint8_zero_point,
- Bint8_zero_point.data(),
- nullptr, /* row offset buffer */
- col_offsets.data(),
- nullptr,
- G * NDim,
- G);
-
- fbgemmConv(
- conv_p,
- Aint8.data(),
- packedWeights,
- Cint8_fb.data(),
- Cint32_fb.data(),
- reqObj,
- tid,
- num_threads);
+ if (test_float_bias) {
+ ReQuantizeOutput<false, QuantizationGranularity::TENSOR, float>
+ reqObj(
+ doNothingObj,
+ C_multiplier.data(),
+ C_zero_pt,
+ Aint8_zero_point,
+ Bint8_zero_point.data(),
+ nullptr, /* row offset buffer */
+ col_offsets.data(),
+ test_bias ? bias_fp32.data() : nullptr,
+ G * NDim,
+ G,
+ act_times_w_scale.data());
+
+ fbgemmConv(
+ conv_p,
+ Aint8.data(),
+ packedWeights,
+ Cint8_fb.data(),
+ Cint32_fb.data(),
+ reqObj,
+ tid,
+ num_threads);
+
+ } else {
+ ReQuantizeOutput<false, QuantizationGranularity::TENSOR> reqObj(
+ doNothingObj,
+ C_multiplier.data(),
+ C_zero_pt,
+ Aint8_zero_point,
+ Bint8_zero_point.data(),
+ nullptr, /* row offset buffer */
+ col_offsets.data(),
+ test_bias ? bias_int32.data() : nullptr,
+ G * NDim,
+ G);
+
+ fbgemmConv(
+ conv_p,
+ Aint8.data(),
+ packedWeights,
+ Cint8_fb.data(),
+ Cint32_fb.data(),
+ reqObj,
+ tid,
+ num_threads);
+ }
} else if (q_granularity == QuantizationGranularity::GROUP) {
- ReQuantizeOutput<false, QuantizationGranularity::GROUP> reqObj(
- doNothingObj,
- C_multiplier.data(),
- C_zero_pt,
- Aint8_zero_point,
- Bint8_zero_point.data(),
- nullptr, /* row offset buffer */
- col_offsets.data(),
- nullptr,
- G * NDim,
- G);
-
- fbgemmConv(
- conv_p,
- Aint8.data(),
- packedWeights,
- Cint8_fb.data(),
- Cint32_fb.data(),
- reqObj,
- tid,
- num_threads);
+ if (test_float_bias) {
+ ReQuantizeOutput<false, QuantizationGranularity::GROUP, float> reqObj(
+ doNothingObj,
+ C_multiplier.data(),
+ C_zero_pt,
+ Aint8_zero_point,
+ Bint8_zero_point.data(),
+ nullptr, /* row offset buffer */
+ col_offsets.data(),
+ test_bias ? bias_fp32.data() : nullptr,
+ G * NDim,
+ G,
+ act_times_w_scale.data());
+
+ fbgemmConv(
+ conv_p,
+ Aint8.data(),
+ packedWeights,
+ Cint8_fb.data(),
+ Cint32_fb.data(),
+ reqObj,
+ tid,
+ num_threads);
+
+ } else {
+ ReQuantizeOutput<false, QuantizationGranularity::GROUP> reqObj(
+ doNothingObj,
+ C_multiplier.data(),
+ C_zero_pt,
+ Aint8_zero_point,
+ Bint8_zero_point.data(),
+ nullptr, /* row offset buffer */
+ col_offsets.data(),
+ test_bias ? bias_int32.data() : nullptr,
+ G * NDim,
+ G);
+
+ fbgemmConv(
+ conv_p,
+ Aint8.data(),
+ packedWeights,
+ Cint8_fb.data(),
+ Cint32_fb.data(),
+ reqObj,
+ tid,
+ num_threads);
+ }
} else {
- ReQuantizeOutput<false, QuantizationGranularity::OUT_CHANNEL> reqObj(
- doNothingObj,
- C_multiplier.data(),
- C_zero_pt,
- Aint8_zero_point,
- Bint8_zero_point.data(),
- nullptr, /* row offset buffer */
- col_offsets.data(),
- nullptr,
- G * NDim,
- G);
-
- fbgemmConv(
- conv_p,
- Aint8.data(),
- packedWeights,
- Cint8_fb.data(),
- Cint32_fb.data(),
- reqObj,
- tid,
- num_threads);
+ if (test_float_bias) {
+ ReQuantizeOutput<false, QuantizationGranularity::OUT_CHANNEL, float>
+ reqObj(
+ doNothingObj,
+ C_multiplier.data(),
+ C_zero_pt,
+ Aint8_zero_point,
+ Bint8_zero_point.data(),
+ nullptr, /* row offset buffer */
+ col_offsets.data(),
+ test_bias ? bias_fp32.data() : nullptr,
+ G * NDim,
+ G,
+ act_times_w_scale.data());
+
+ fbgemmConv(
+ conv_p,
+ Aint8.data(),
+ packedWeights,
+ Cint8_fb.data(),
+ Cint32_fb.data(),
+ reqObj,
+ tid,
+ num_threads);
+
+ } else {
+ ReQuantizeOutput<false, QuantizationGranularity::OUT_CHANNEL> reqObj(
+ doNothingObj,
+ C_multiplier.data(),
+ C_zero_pt,
+ Aint8_zero_point,
+ Bint8_zero_point.data(),
+ nullptr, /* row offset buffer */
+ col_offsets.data(),
+ test_bias ? bias_int32.data() : nullptr,
+ G * NDim,
+ G);
+
+ fbgemmConv(
+ conv_p,
+ Aint8.data(),
+ packedWeights,
+ Cint8_fb.data(),
+ Cint32_fb.data(),
+ reqObj,
+ tid,
+ num_threads);
+ }
}
} // omp parallel