Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaya Khudia <dskhudia@fb.com>2019-06-20 22:13:35 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2019-06-20 22:21:51 +0300
commit5b64af1469cf629aa7beb934eb898fd1e0b02719 (patch)
treedddef8da6e597f1c118a18cfe5ff421e97df0a88
parent604575ff5de717b2ee712190634840981a9c8fba (diff)
Per channel and groupwise quantization (#99)
Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/99 A function to do per channel and groupwise quantization Reviewed By: jspark1105 Differential Revision: D15567272 fbshipit-source-id: e2f326ea7c7463b5c47b3f590e003344a9e41960
-rw-r--r--include/fbgemm/QuantUtils.h35
-rw-r--r--include/fbgemm/Utils.h7
-rw-r--r--src/QuantUtils.cc109
-rw-r--r--test/QuantUtilsTest.cc157
4 files changed, 308 insertions, 0 deletions
diff --git a/include/fbgemm/QuantUtils.h b/include/fbgemm/QuantUtils.h
index 43855d8..508ce7d 100644
--- a/include/fbgemm/QuantUtils.h
+++ b/include/fbgemm/QuantUtils.h
@@ -7,6 +7,7 @@
#include <limits>
#include "FbgemmBuild.h"
#include "QuantUtilsAvx2.h"
+#include "Utils.h"
namespace fbgemm {
@@ -78,6 +79,40 @@ FBGEMM_API void Quantize(
int len,
const TensorQuantizationParams& qparams);
+/*
+ * @brief Quantize floating point data in src to type T
+ *
+ * @tparam T output quantized data type (int8_t, uint8_t and int32_t are
+ * supported)
+ *
+ * @tparam T LAYOUT layout of input tensor in src. (KCX and KXC are supported)
+ * KCX corresponds to KCRS or KCTRS (for weight tensors with
+ * time dimension)
+ * KXC corresponds to KRSC or KTRSC (for weight tensors with
+ * time dimension)
+ *
+ * @params K Output channels for weight tensors
+ * @params C Number of channels
+ * @params X R*S or T*R*S
+ * @params G Groups (if G == C the function performs channelwise quantization;
+ * if 1 < G < C the function performs groupwise quantization;
+ * if G == 1 the function performs per tensor quantization;)
+ * @params scales floating point scales.
+ * Size should be equal G
+ * @params zero_points zero points (should be reprsentable in type T).
+ * Size should be equal G
+ */
+template <typename T, layout_t LAYOUT = layout_t::KCX>
+FBGEMM_API void QuantizeGroupwise(
+ const float* src,
+ int K,
+ int C,
+ int X,
+ int G,
+ const float* scales,
+ const std::int32_t* zero_points,
+ T* dst);
+
template <typename T>
FBGEMM_API float Dequantize(T src, const TensorQuantizationParams& qparams) {
return qparams.scale * (src - qparams.zero_point);
diff --git a/include/fbgemm/Utils.h b/include/fbgemm/Utils.h
index 1a35aa1..636abc7 100644
--- a/include/fbgemm/Utils.h
+++ b/include/fbgemm/Utils.h
@@ -44,6 +44,13 @@ enum class optimized_conv_t { depthwise, groupwise, im2col };
enum class impl_type_t { ref, opt };
/**
+ * @brief Typed enum to specify data layout.
+ * KCX can be KCRS format or KCTRS format (e.g., for 3-D convolutions)
+ * KXC can be KRSC format or KTRSC format (e.g., for 3-D convolutions)
+ */
+enum class layout_t { KCX, KXC };
+
+/**
* @brief A function to compare data in two buffers for closeness/equality.
*/
template <typename T>
diff --git a/src/QuantUtils.cc b/src/QuantUtils.cc
index 1ab00d1..5dde90b 100644
--- a/src/QuantUtils.cc
+++ b/src/QuantUtils.cc
@@ -188,6 +188,115 @@ void Quantize<uint8_t>(
}
}
+#define FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKCX(T) \
+ template <> \
+ void QuantizeGroupwise<T, layout_t::KCX>( \
+ const float* src, \
+ int N, \
+ int C, \
+ int X, \
+ int G, \
+ const float* scales, \
+ const std::int32_t* zero_points, \
+ T* dst) { \
+ assert(C % G == 0); \
+ int C_per_G = C / G; \
+ for (int i = 0; i < N; ++i) { \
+ for (int g = 0; g < G; ++g) { \
+ float scale = scales[g]; \
+ int32_t zero_point = zero_points[g]; \
+ for (int c = 0; c < C / G; ++c) { \
+ for (int x = 0; x < X; ++x) { \
+ dst[(i * C + g * C_per_G + c) * X + x] = Quantize<T>( \
+ src[(i * C + g * C_per_G + c) * X + x], \
+ zero_point, \
+ scale, \
+ 8 * sizeof(T)); \
+ } \
+ } \
+ } \
+ } \
+ }
+FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKCX(int8_t)
+FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKCX(int32_t)
+#undef FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKCX
+
+template <>
+void QuantizeGroupwise<uint8_t, layout_t::KCX>(
+ const float* src,
+ int K,
+ int C,
+ int X,
+ int G,
+ const float* scales,
+ const std::int32_t* zero_points,
+ uint8_t* dst) {
+ assert(C % G == 0);
+ int C_per_G = C / G;
+ fbgemm::TensorQuantizationParams qparams;
+ qparams.precision = 8 * sizeof(uint8_t);
+ bool takeFastPath =
+ cpuinfo_initialize() && fbgemmHasAvx2Support() && cpuinfo_has_x86_fma3();
+
+ for (int i = 0; i < K; ++i) {
+ for (int g = 0; g < G; ++g) {
+ qparams.scale = scales[g];
+ qparams.zero_point = zero_points[g];
+ if (takeFastPath) {
+ QuantizeAvx2(
+ src + (i * C + g * C_per_G) * X,
+ dst + (i * C + g * C_per_G) * X,
+ C_per_G * X,
+ qparams);
+ } else {
+ for (int c = 0; c < C / G; ++c) {
+ for (int x = 0; x < X; ++x) {
+ dst[(i * C + g * C_per_G + c) * X + x] = Quantize<uint8_t>(
+ src[(i * C + g * C_per_G + c) * X + x],
+ qparams.zero_point,
+ qparams.scale,
+ qparams.precision);
+ }
+ }
+ }
+ }
+ }
+}
+
+#define FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKXC(T) \
+ template <> \
+ void QuantizeGroupwise<T, layout_t::KXC>( \
+ const float* src, \
+ int K, \
+ int C, \
+ int X, \
+ int G, \
+ const float* scales, \
+ const std::int32_t* zero_points, \
+ T* dst) { \
+ assert(C % G == 0); \
+ int C_per_G = C / G; \
+ for (int i = 0; i < K; ++i) { \
+ for (int x = 0; x < X; ++x) { \
+ for (int g = 0; g < G; ++g) { \
+ float scale = scales[g]; \
+ int32_t zero_point = zero_points[g]; \
+ for (int c = 0; c < C / G; ++c) { \
+ dst[(i * X + x) * C + g * C_per_G + c] = Quantize<T>( \
+ src[(i * X + x) * C + g * C_per_G + c], \
+ zero_point, \
+ scale, \
+ 8 * sizeof(T)); \
+ } \
+ } \
+ } \
+ } \
+ }
+FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKXC(int8_t)
+FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKXC(uint8_t)
+FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKXC(int32_t)
+#undef FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKXC
+
////////////////////////////////////////////////////////////////////////////////
// Requantization (pure fixed-point)
diff --git a/test/QuantUtilsTest.cc b/test/QuantUtilsTest.cc
new file mode 100644
index 0000000..2bbd05e
--- /dev/null
+++ b/test/QuantUtilsTest.cc
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * All rights reserved.
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <algorithm>
+#include <limits>
+#include <random>
+
+#include <gtest/gtest.h>
+
+#include "fbgemm/QuantUtils.h"
+#include "fbgemm/Utils.h"
+
+using namespace std;
+using namespace fbgemm;
+
+// tuple represents K, C, X, G, layout_t
+// layout_t can be KCX or KXC
+class QuantizeGroupwiseTest
+ : public testing::TestWithParam<tuple<int, int, int, int, layout_t>> {};
+
+INSTANTIATE_TEST_CASE_P(
+ InstantiationName,
+ QuantizeGroupwiseTest,
+ ::testing::Combine(
+ ::testing::ValuesIn({4, 12, 64}), // K
+ ::testing::ValuesIn({12, 16, 32}), // C
+ ::testing::ValuesIn({1, 10, 15, 30}), // X
+ ::testing::ValuesIn({1, 4}), // G
+ ::testing::ValuesIn({layout_t::KCX, layout_t::KXC})));
+
+template <typename T, layout_t LT>
+void ref_impl(
+ const vector<float>& src,
+ int K,
+ int C,
+ int X,
+ int G,
+ const vector<float>& scales,
+ const vector<int>& zero_points,
+ vector<T>& dst) {
+ int C_per_G = C / G;
+ for (int i = 0; i < K; ++i) {
+ for (int g = 0; g < G; ++g) {
+ for (int c = 0; c < C / G; ++c) {
+ for (int x = 0; x < X; ++x) {
+ float num;
+ if (LT == layout_t::KCX) {
+ num = src[(i * C + g * C_per_G + c) * X + x];
+ } else {
+ num = src[(i * X + x) * C + g * C_per_G + c];
+ }
+ int res = nearbyint(zero_points[g] + num / scales[g]);
+ T final_res = min<T>(
+ max<T>(res, numeric_limits<T>::min()), numeric_limits<T>::max());
+ if (LT == layout_t::KCX) {
+ dst[(i * C + g * C_per_G + c) * X + x] = final_res;
+ } else {
+ dst[(i * X + x) * C + g * C_per_G + c] = final_res;
+ }
+ }
+ }
+ }
+ }
+}
+
+template <typename T, layout_t LT>
+void runTests(
+ const vector<float>& src,
+ int K,
+ int C,
+ int X,
+ int G,
+ const vector<float>& scales,
+ const vector<int>& zero_points,
+ vector<T>& dst,
+ vector<T>& dst_ref) {
+ QuantizeGroupwise<T, LT>(
+ src.data(), K, C, X, G, scales.data(), zero_points.data(), dst.data());
+
+ ref_impl<T, LT>(src, K, C, X, G, scales, zero_points, dst_ref);
+}
+
+/**
+ * Test for QuantizeGroupwise
+ */
+TEST_P(QuantizeGroupwiseTest, quantizeTest) {
+ int K, C, X, G;
+ layout_t layout;
+ tie(K, C, X, G, layout) = GetParam();
+
+ random_device rd;
+ mt19937 gen(rd());
+
+ uniform_real_distribution<float> disFP(0.1, 1.1);
+
+ vector<float> inp(K * C * X);
+ generate(inp.begin(), inp.end(), [&, disFP]() mutable { return disFP(gen); });
+
+ vector<float> scales(G);
+ generate(scales.begin(), scales.end(), [&, disFP]() mutable {
+ return disFP(gen);
+ });
+
+ uniform_int_distribution<> disUInt8(0, 8);
+ vector<int> zero_points_uint8(G);
+ generate(
+ zero_points_uint8.begin(),
+ zero_points_uint8.end(),
+ [&, disUInt8]() mutable { return disUInt8(gen); });
+
+ uniform_int_distribution<> disInt8(-64, 63);
+ vector<int> zero_points_int8(G);
+ generate(
+ zero_points_int8.begin(), zero_points_int8.end(), [&, disInt8]() mutable {
+ return disInt8(gen);
+ });
+
+ uniform_int_distribution<> disInt32(-512, 512);
+ vector<int> zero_points_int32(G);
+ generate(
+ zero_points_int32.begin(),
+ zero_points_int32.end(),
+ [&, disInt32]() mutable { return disInt32(gen); });
+
+ vector<uint8_t> dstuint8(K * C * X);
+ vector<uint8_t> dstuint8_ref(K * C * X);
+
+ vector<int8_t> dstint8(K * C * X);
+ vector<int8_t> dstint8_ref(K * C * X);
+
+ vector<int32_t> dstint32(K * C * X);
+ vector<int32_t> dstint32_ref(K * C * X);
+
+ if (layout == layout_t::KCX) {
+ runTests<uint8_t, layout_t::KCX>(
+ inp, K, C, X, G, scales, zero_points_uint8, dstuint8, dstuint8_ref);
+ runTests<int8_t, layout_t::KCX>(
+ inp, K, C, X, G, scales, zero_points_int8, dstint8, dstint8_ref);
+ runTests<int32_t, layout_t::KCX>(
+ inp, K, C, X, G, scales, zero_points_int32, dstint32, dstint32_ref);
+ } else {
+ runTests<uint8_t, layout_t::KXC>(
+ inp, K, C, X, G, scales, zero_points_uint8, dstuint8, dstuint8_ref);
+ runTests<int8_t, layout_t::KXC>(
+ inp, K, C, X, G, scales, zero_points_int8, dstint8, dstint8_ref);
+ runTests<int32_t, layout_t::KXC>(
+ inp, K, C, X, G, scales, zero_points_int32, dstint32, dstint32_ref);
+ }
+
+ EXPECT_EQ(dstuint8, dstuint8_ref);
+ EXPECT_EQ(dstint8, dstint8_ref);
+ EXPECT_EQ(dstint32, dstint32_ref);
+}