diff options
author | Daya Khudia <dskhudia@fb.com> | 2019-06-20 22:13:35 +0300 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2019-06-20 22:21:51 +0300 |
commit | 5b64af1469cf629aa7beb934eb898fd1e0b02719 (patch) | |
tree | dddef8da6e597f1c118a18cfe5ff421e97df0a88 | |
parent | 604575ff5de717b2ee712190634840981a9c8fba (diff) |
Per channel and groupwise quantization (#99)
Summary:
Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/99
A function to do per channel and groupwise quantization
Reviewed By: jspark1105
Differential Revision: D15567272
fbshipit-source-id: e2f326ea7c7463b5c47b3f590e003344a9e41960
-rw-r--r-- | include/fbgemm/QuantUtils.h | 35 | ||||
-rw-r--r-- | include/fbgemm/Utils.h | 7 | ||||
-rw-r--r-- | src/QuantUtils.cc | 109 | ||||
-rw-r--r-- | test/QuantUtilsTest.cc | 157 |
4 files changed, 308 insertions, 0 deletions
diff --git a/include/fbgemm/QuantUtils.h b/include/fbgemm/QuantUtils.h index 43855d8..508ce7d 100644 --- a/include/fbgemm/QuantUtils.h +++ b/include/fbgemm/QuantUtils.h @@ -7,6 +7,7 @@ #include <limits> #include "FbgemmBuild.h" #include "QuantUtilsAvx2.h" +#include "Utils.h" namespace fbgemm { @@ -78,6 +79,40 @@ FBGEMM_API void Quantize( int len, const TensorQuantizationParams& qparams); +/* + * @brief Quantize floating point data in src to type T + * + * @tparam T output quantized data type (int8_t, uint8_t and int32_t are + * supported) + * + * @tparam T LAYOUT layout of input tensor in src. (KCX and KXC are supported) + * KCX corresponds to KCRS or KCTRS (for weight tensors with + * time dimension) + * KXC corresponds to KRSC or KTRSC (for weight tensors with + * time dimension) + * + * @params K Output channels for weight tensors + * @params C Number of channels + * @params X R*S or T*R*S + * @params G Groups (if G == C the function performs channelwise quantization; + * if 1 < G < C the function performs groupwise quantization; + * if G == 1 the function performs per tensor quantization;) + * @params scales floating point scales. + * Size should be equal G + * @params zero_points zero points (should be reprsentable in type T). + * Size should be equal G + */ +template <typename T, layout_t LAYOUT = layout_t::KCX> +FBGEMM_API void QuantizeGroupwise( + const float* src, + int K, + int C, + int X, + int G, + const float* scales, + const std::int32_t* zero_points, + T* dst); + template <typename T> FBGEMM_API float Dequantize(T src, const TensorQuantizationParams& qparams) { return qparams.scale * (src - qparams.zero_point); diff --git a/include/fbgemm/Utils.h b/include/fbgemm/Utils.h index 1a35aa1..636abc7 100644 --- a/include/fbgemm/Utils.h +++ b/include/fbgemm/Utils.h @@ -44,6 +44,13 @@ enum class optimized_conv_t { depthwise, groupwise, im2col }; enum class impl_type_t { ref, opt }; /** + * @brief Typed enum to specify data layout. + * KCX can be KCRS format or KCTRS format (e.g., for 3-D convolutions) + * KXC can be KRSC format or KTRSC format (e.g., for 3-D convolutions) + */ +enum class layout_t { KCX, KXC }; + +/** * @brief A function to compare data in two buffers for closeness/equality. */ template <typename T> diff --git a/src/QuantUtils.cc b/src/QuantUtils.cc index 1ab00d1..5dde90b 100644 --- a/src/QuantUtils.cc +++ b/src/QuantUtils.cc @@ -188,6 +188,115 @@ void Quantize<uint8_t>( } } +#define FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKCX(T) \ + template <> \ + void QuantizeGroupwise<T, layout_t::KCX>( \ + const float* src, \ + int N, \ + int C, \ + int X, \ + int G, \ + const float* scales, \ + const std::int32_t* zero_points, \ + T* dst) { \ + assert(C % G == 0); \ + int C_per_G = C / G; \ + for (int i = 0; i < N; ++i) { \ + for (int g = 0; g < G; ++g) { \ + float scale = scales[g]; \ + int32_t zero_point = zero_points[g]; \ + for (int c = 0; c < C / G; ++c) { \ + for (int x = 0; x < X; ++x) { \ + dst[(i * C + g * C_per_G + c) * X + x] = Quantize<T>( \ + src[(i * C + g * C_per_G + c) * X + x], \ + zero_point, \ + scale, \ + 8 * sizeof(T)); \ + } \ + } \ + } \ + } \ + } +FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKCX(int8_t) +FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKCX(int32_t) +#undef FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKCX + +template <> +void QuantizeGroupwise<uint8_t, layout_t::KCX>( + const float* src, + int K, + int C, + int X, + int G, + const float* scales, + const std::int32_t* zero_points, + uint8_t* dst) { + assert(C % G == 0); + int C_per_G = C / G; + fbgemm::TensorQuantizationParams qparams; + qparams.precision = 8 * sizeof(uint8_t); + bool takeFastPath = + cpuinfo_initialize() && fbgemmHasAvx2Support() && cpuinfo_has_x86_fma3(); + + for (int i = 0; i < K; ++i) { + for (int g = 0; g < G; ++g) { + qparams.scale = scales[g]; + qparams.zero_point = zero_points[g]; + if (takeFastPath) { + QuantizeAvx2( + src + (i * C + g * C_per_G) * X, + dst + (i * C + g * C_per_G) * X, + C_per_G * X, + qparams); + } else { + for (int c = 0; c < C / G; ++c) { + for (int x = 0; x < X; ++x) { + dst[(i * C + g * C_per_G + c) * X + x] = Quantize<uint8_t>( + src[(i * C + g * C_per_G + c) * X + x], + qparams.zero_point, + qparams.scale, + qparams.precision); + } + } + } + } + } +} + +#define FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKXC(T) \ + template <> \ + void QuantizeGroupwise<T, layout_t::KXC>( \ + const float* src, \ + int K, \ + int C, \ + int X, \ + int G, \ + const float* scales, \ + const std::int32_t* zero_points, \ + T* dst) { \ + assert(C % G == 0); \ + int C_per_G = C / G; \ + for (int i = 0; i < K; ++i) { \ + for (int x = 0; x < X; ++x) { \ + for (int g = 0; g < G; ++g) { \ + float scale = scales[g]; \ + int32_t zero_point = zero_points[g]; \ + for (int c = 0; c < C / G; ++c) { \ + dst[(i * X + x) * C + g * C_per_G + c] = Quantize<T>( \ + src[(i * X + x) * C + g * C_per_G + c], \ + zero_point, \ + scale, \ + 8 * sizeof(T)); \ + } \ + } \ + } \ + } \ + } +FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKXC(int8_t) +FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKXC(uint8_t) +FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKXC(int32_t) +#undef FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKXC + //////////////////////////////////////////////////////////////////////////////// // Requantization (pure fixed-point) diff --git a/test/QuantUtilsTest.cc b/test/QuantUtilsTest.cc new file mode 100644 index 0000000..2bbd05e --- /dev/null +++ b/test/QuantUtilsTest.cc @@ -0,0 +1,157 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include <algorithm> +#include <limits> +#include <random> + +#include <gtest/gtest.h> + +#include "fbgemm/QuantUtils.h" +#include "fbgemm/Utils.h" + +using namespace std; +using namespace fbgemm; + +// tuple represents K, C, X, G, layout_t +// layout_t can be KCX or KXC +class QuantizeGroupwiseTest + : public testing::TestWithParam<tuple<int, int, int, int, layout_t>> {}; + +INSTANTIATE_TEST_CASE_P( + InstantiationName, + QuantizeGroupwiseTest, + ::testing::Combine( + ::testing::ValuesIn({4, 12, 64}), // K + ::testing::ValuesIn({12, 16, 32}), // C + ::testing::ValuesIn({1, 10, 15, 30}), // X + ::testing::ValuesIn({1, 4}), // G + ::testing::ValuesIn({layout_t::KCX, layout_t::KXC}))); + +template <typename T, layout_t LT> +void ref_impl( + const vector<float>& src, + int K, + int C, + int X, + int G, + const vector<float>& scales, + const vector<int>& zero_points, + vector<T>& dst) { + int C_per_G = C / G; + for (int i = 0; i < K; ++i) { + for (int g = 0; g < G; ++g) { + for (int c = 0; c < C / G; ++c) { + for (int x = 0; x < X; ++x) { + float num; + if (LT == layout_t::KCX) { + num = src[(i * C + g * C_per_G + c) * X + x]; + } else { + num = src[(i * X + x) * C + g * C_per_G + c]; + } + int res = nearbyint(zero_points[g] + num / scales[g]); + T final_res = min<T>( + max<T>(res, numeric_limits<T>::min()), numeric_limits<T>::max()); + if (LT == layout_t::KCX) { + dst[(i * C + g * C_per_G + c) * X + x] = final_res; + } else { + dst[(i * X + x) * C + g * C_per_G + c] = final_res; + } + } + } + } + } +} + +template <typename T, layout_t LT> +void runTests( + const vector<float>& src, + int K, + int C, + int X, + int G, + const vector<float>& scales, + const vector<int>& zero_points, + vector<T>& dst, + vector<T>& dst_ref) { + QuantizeGroupwise<T, LT>( + src.data(), K, C, X, G, scales.data(), zero_points.data(), dst.data()); + + ref_impl<T, LT>(src, K, C, X, G, scales, zero_points, dst_ref); +} + +/** + * Test for QuantizeGroupwise + */ +TEST_P(QuantizeGroupwiseTest, quantizeTest) { + int K, C, X, G; + layout_t layout; + tie(K, C, X, G, layout) = GetParam(); + + random_device rd; + mt19937 gen(rd()); + + uniform_real_distribution<float> disFP(0.1, 1.1); + + vector<float> inp(K * C * X); + generate(inp.begin(), inp.end(), [&, disFP]() mutable { return disFP(gen); }); + + vector<float> scales(G); + generate(scales.begin(), scales.end(), [&, disFP]() mutable { + return disFP(gen); + }); + + uniform_int_distribution<> disUInt8(0, 8); + vector<int> zero_points_uint8(G); + generate( + zero_points_uint8.begin(), + zero_points_uint8.end(), + [&, disUInt8]() mutable { return disUInt8(gen); }); + + uniform_int_distribution<> disInt8(-64, 63); + vector<int> zero_points_int8(G); + generate( + zero_points_int8.begin(), zero_points_int8.end(), [&, disInt8]() mutable { + return disInt8(gen); + }); + + uniform_int_distribution<> disInt32(-512, 512); + vector<int> zero_points_int32(G); + generate( + zero_points_int32.begin(), + zero_points_int32.end(), + [&, disInt32]() mutable { return disInt32(gen); }); + + vector<uint8_t> dstuint8(K * C * X); + vector<uint8_t> dstuint8_ref(K * C * X); + + vector<int8_t> dstint8(K * C * X); + vector<int8_t> dstint8_ref(K * C * X); + + vector<int32_t> dstint32(K * C * X); + vector<int32_t> dstint32_ref(K * C * X); + + if (layout == layout_t::KCX) { + runTests<uint8_t, layout_t::KCX>( + inp, K, C, X, G, scales, zero_points_uint8, dstuint8, dstuint8_ref); + runTests<int8_t, layout_t::KCX>( + inp, K, C, X, G, scales, zero_points_int8, dstint8, dstint8_ref); + runTests<int32_t, layout_t::KCX>( + inp, K, C, X, G, scales, zero_points_int32, dstint32, dstint32_ref); + } else { + runTests<uint8_t, layout_t::KXC>( + inp, K, C, X, G, scales, zero_points_uint8, dstuint8, dstuint8_ref); + runTests<int8_t, layout_t::KXC>( + inp, K, C, X, G, scales, zero_points_int8, dstint8, dstint8_ref); + runTests<int32_t, layout_t::KXC>( + inp, K, C, X, G, scales, zero_points_int32, dstint32, dstint32_ref); + } + + EXPECT_EQ(dstuint8, dstuint8_ref); + EXPECT_EQ(dstint8, dstint8_ref); + EXPECT_EQ(dstint32, dstint32_ref); +} |