Per channel and groupwise quantization (#99)

Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/99 A function to do per channel and groupwise quantization Reviewed By: jspark1105 Differential Revision: D15567272 fbshipit-source-id: e2f326ea7c7463b5c47b3f590e003344a9e41960
author: Daya Khudia <dskhudia@fb.com> 2019-06-20 22:13:35 +0300
committer: Facebook Github Bot <facebook-github-bot@users.noreply.github.com> 2019-06-20 22:21:51 +0300
commit: 5b64af1469cf629aa7beb934eb898fd1e0b02719 (patch)
tree: dddef8da6e597f1c118a18cfe5ff421e97df0a88
parent: 604575ff5de717b2ee712190634840981a9c8fba (diff)
4 files changed, 308 insertions, 0 deletions
diff --git a/include/fbgemm/QuantUtils.h b/include/fbgemm/QuantUtils.h
index 43855d8..508ce7d 100644
--- a/include/fbgemm/QuantUtils.h
+++ b/include/fbgemm/QuantUtils.h
@@ -7,6 +7,7 @@
 #include <limits>
 #include "FbgemmBuild.h"
 #include "QuantUtilsAvx2.h"
+#include "Utils.h"
 
 namespace fbgemm {
 
@@ -78,6 +79,40 @@ FBGEMM_API void Quantize(
     int len,
     const TensorQuantizationParams& qparams);
 
+/*
+ * @brief Quantize floating point data in src to type T
+ *
+ * @tparam T output quantized data type (int8_t, uint8_t and int32_t are
+ *                  supported)
+ *
+ * @tparam T LAYOUT layout of input tensor in src. (KCX and KXC are supported)
+ *                  KCX corresponds to KCRS or KCTRS (for weight tensors with
+ *                  time dimension)
+ *                  KXC corresponds to KRSC or KTRSC (for weight tensors with
+ *                  time dimension)
+ *
+ * @params K Output channels for weight tensors
+ * @params C Number of channels
+ * @params X R*S or T*R*S
+ * @params G Groups (if G == C the function performs channelwise quantization;
+ *                   if 1 < G < C the function performs groupwise quantization;
+ *                   if G == 1 the function performs per tensor quantization;)
+ * @params scales floating point scales.
+ *                Size should be equal G
+ * @params zero_points zero points (should be reprsentable in type T).
+ *                     Size should be equal G
+ */
+template <typename T, layout_t LAYOUT = layout_t::KCX>
+FBGEMM_API void QuantizeGroupwise(
+    const float* src,
+    int K,
+    int C,
+    int X,
+    int G,
+    const float* scales,
+    const std::int32_t* zero_points,
+    T* dst);
+
 template <typename T>
 FBGEMM_API float Dequantize(T src, const TensorQuantizationParams& qparams) {
   return qparams.scale * (src - qparams.zero_point);
diff --git a/include/fbgemm/Utils.h b/include/fbgemm/Utils.h
index 1a35aa1..636abc7 100644
--- a/include/fbgemm/Utils.h
+++ b/include/fbgemm/Utils.h
@@ -44,6 +44,13 @@ enum class optimized_conv_t { depthwise, groupwise, im2col };
 enum class impl_type_t { ref, opt };
 
 /**
+ * @brief Typed enum to specify data layout.
+ * KCX can be KCRS format or KCTRS format (e.g., for 3-D convolutions)
+ * KXC can be KRSC format or KTRSC format (e.g., for 3-D convolutions)
+ */
+enum class layout_t { KCX, KXC };
+
+/**
  * @brief A function to compare data in two buffers for closeness/equality.
  */
 template <typename T>
diff --git a/src/QuantUtils.cc b/src/QuantUtils.cc
index 1ab00d1..5dde90b 100644
--- a/src/QuantUtils.cc
+++ b/src/QuantUtils.cc
@@ -188,6 +188,115 @@ void Quantize<uint8_t>(
   }
 }
 
+#define FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKCX(T)                \
+  template <>                                                     \
+  void QuantizeGroupwise<T, layout_t::KCX>(                       \
+      const float* src,                                           \
+      int N,                                                      \
+      int C,                                                      \
+      int X,                                                      \
+      int G,                                                      \
+      const float* scales,                                        \
+      const std::int32_t* zero_points,                            \
+      T* dst) {                                                   \
+    assert(C % G == 0);                                           \
+    int C_per_G = C / G;                                          \
+    for (int i = 0; i < N; ++i) {                                 \
+      for (int g = 0; g < G; ++g) {                               \
+        float scale = scales[g];                                  \
+        int32_t zero_point = zero_points[g];                      \
+        for (int c = 0; c < C / G; ++c) {                         \
+          for (int x = 0; x < X; ++x) {                           \
+            dst[(i * C + g * C_per_G + c) * X + x] = Quantize<T>( \
+                src[(i * C + g * C_per_G + c) * X + x],           \
+                zero_point,                                       \
+                scale,                                            \
+                8 * sizeof(T));                                   \
+          }                                                       \
+        }                                                         \
+      }                                                           \
+    }                                                             \
+  }
+FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKCX(int8_t)
+FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKCX(int32_t)
+#undef FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKCX
+
+template <>
+void QuantizeGroupwise<uint8_t, layout_t::KCX>(
+    const float* src,
+    int K,
+    int C,
+    int X,
+    int G,
+    const float* scales,
+    const std::int32_t* zero_points,
+    uint8_t* dst) {
+  assert(C % G == 0);
+  int C_per_G = C / G;
+  fbgemm::TensorQuantizationParams qparams;
+  qparams.precision = 8 * sizeof(uint8_t);
+  bool takeFastPath =
+      cpuinfo_initialize() && fbgemmHasAvx2Support() && cpuinfo_has_x86_fma3();
+
+  for (int i = 0; i < K; ++i) {
+    for (int g = 0; g < G; ++g) {
+      qparams.scale = scales[g];
+      qparams.zero_point = zero_points[g];
+      if (takeFastPath) {
+        QuantizeAvx2(
+            src + (i * C + g * C_per_G) * X,
+            dst + (i * C + g * C_per_G) * X,
+            C_per_G * X,
+            qparams);
+      } else {
+        for (int c = 0; c < C / G; ++c) {
+          for (int x = 0; x < X; ++x) {
+            dst[(i * C + g * C_per_G + c) * X + x] = Quantize<uint8_t>(
+                src[(i * C + g * C_per_G + c) * X + x],
+                qparams.zero_point,
+                qparams.scale,
+                qparams.precision);
+          }
+        }
+      }
+    }
+  }
+}
+
+#define FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKXC(T)                \
+  template <>                                                     \
+  void QuantizeGroupwise<T, layout_t::KXC>(                       \
+      const float* src,                                           \
+      int K,                                                      \
+      int C,                                                      \
+      int X,                                                      \
+      int G,                                                      \
+      const float* scales,                                        \
+      const std::int32_t* zero_points,                            \
+      T* dst) {                                                   \
+    assert(C % G == 0);                                           \
+    int C_per_G = C / G;                                          \
+    for (int i = 0; i < K; ++i) {                                 \
+      for (int x = 0; x < X; ++x) {                               \
+        for (int g = 0; g < G; ++g) {                             \
+          float scale = scales[g];                                \
+          int32_t zero_point = zero_points[g];                    \
+          for (int c = 0; c < C / G; ++c) {                       \
+            dst[(i * X + x) * C + g * C_per_G + c] = Quantize<T>( \
+                src[(i * X + x) * C + g * C_per_G + c],           \
+                zero_point,                                       \
+                scale,                                            \
+                8 * sizeof(T));                                   \
+          }                                                       \
+        }                                                         \
+      }                                                           \
+    }                                                             \
+  }
+FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKXC(int8_t)
+FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKXC(uint8_t)
+FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKXC(int32_t)
+#undef FBGEMM_SPECIALIZED_QUANTIZEGROUPWISEKXC
+
 ////////////////////////////////////////////////////////////////////////////////
 // Requantization (pure fixed-point)
 
diff --git a/test/QuantUtilsTest.cc b/test/QuantUtilsTest.cc
new file mode 100644
index 0000000..2bbd05e
--- /dev/null
+++ b/test/QuantUtilsTest.cc
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ * All rights reserved.
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <algorithm>
+#include <limits>
+#include <random>
+
+#include <gtest/gtest.h>
+
+#include "fbgemm/QuantUtils.h"
+#include "fbgemm/Utils.h"
+
+using namespace std;
+using namespace fbgemm;
+
+// tuple represents K, C, X, G, layout_t
+// layout_t can be KCX or KXC
+class QuantizeGroupwiseTest
+    : public testing::TestWithParam<tuple<int, int, int, int, layout_t>> {};
+
+INSTANTIATE_TEST_CASE_P(
+    InstantiationName,
+    QuantizeGroupwiseTest,
+    ::testing::Combine(
+        ::testing::ValuesIn({4, 12, 64}), // K
+        ::testing::ValuesIn({12, 16, 32}), // C
+        ::testing::ValuesIn({1, 10, 15, 30}), // X
+        ::testing::ValuesIn({1, 4}), // G
+        ::testing::ValuesIn({layout_t::KCX, layout_t::KXC})));
+
+template <typename T, layout_t LT>
+void ref_impl(
+    const vector<float>& src,
+    int K,
+    int C,
+    int X,
+    int G,
+    const vector<float>& scales,
+    const vector<int>& zero_points,
+    vector<T>& dst) {
+  int C_per_G = C / G;
+  for (int i = 0; i < K; ++i) {
+    for (int g = 0; g < G; ++g) {
+      for (int c = 0; c < C / G; ++c) {
+        for (int x = 0; x < X; ++x) {
+          float num;
+          if (LT == layout_t::KCX) {
+            num = src[(i * C + g * C_per_G + c) * X + x];
+          } else {
+            num = src[(i * X + x) * C + g * C_per_G + c];
+          }
+          int res = nearbyint(zero_points[g] + num / scales[g]);
+          T final_res = min<T>(
+              max<T>(res, numeric_limits<T>::min()), numeric_limits<T>::max());
+          if (LT == layout_t::KCX) {
+            dst[(i * C + g * C_per_G + c) * X + x] = final_res;
+          } else {
+            dst[(i * X + x) * C + g * C_per_G + c] = final_res;
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T, layout_t LT>
+void runTests(
+    const vector<float>& src,
+    int K,
+    int C,
+    int X,
+    int G,
+    const vector<float>& scales,
+    const vector<int>& zero_points,
+    vector<T>& dst,
+    vector<T>& dst_ref) {
+  QuantizeGroupwise<T, LT>(
+      src.data(), K, C, X, G, scales.data(), zero_points.data(), dst.data());
+
+  ref_impl<T, LT>(src, K, C, X, G, scales, zero_points, dst_ref);
+}
+
+/**
+ * Test for QuantizeGroupwise
+ */
+TEST_P(QuantizeGroupwiseTest, quantizeTest) {
+  int K, C, X, G;
+  layout_t layout;
+  tie(K, C, X, G, layout) = GetParam();
+
+  random_device rd;
+  mt19937 gen(rd());
+
+  uniform_real_distribution<float> disFP(0.1, 1.1);
+
+  vector<float> inp(K * C * X);
+  generate(inp.begin(), inp.end(), [&, disFP]() mutable { return disFP(gen); });
+
+  vector<float> scales(G);
+  generate(scales.begin(), scales.end(), [&, disFP]() mutable {
+    return disFP(gen);
+  });
+
+  uniform_int_distribution<> disUInt8(0, 8);
+  vector<int> zero_points_uint8(G);
+  generate(
+      zero_points_uint8.begin(),
+      zero_points_uint8.end(),
+      [&, disUInt8]() mutable { return disUInt8(gen); });
+
+  uniform_int_distribution<> disInt8(-64, 63);
+  vector<int> zero_points_int8(G);
+  generate(
+      zero_points_int8.begin(), zero_points_int8.end(), [&, disInt8]() mutable {
+        return disInt8(gen);
+      });
+
+  uniform_int_distribution<> disInt32(-512, 512);
+  vector<int> zero_points_int32(G);
+  generate(
+      zero_points_int32.begin(),
+      zero_points_int32.end(),
+      [&, disInt32]() mutable { return disInt32(gen); });
+
+  vector<uint8_t> dstuint8(K * C * X);
+  vector<uint8_t> dstuint8_ref(K * C * X);
+
+  vector<int8_t> dstint8(K * C * X);
+  vector<int8_t> dstint8_ref(K * C * X);
+
+  vector<int32_t> dstint32(K * C * X);
+  vector<int32_t> dstint32_ref(K * C * X);
+
+  if (layout == layout_t::KCX) {
+    runTests<uint8_t, layout_t::KCX>(
+        inp, K, C, X, G, scales, zero_points_uint8, dstuint8, dstuint8_ref);
+    runTests<int8_t, layout_t::KCX>(
+        inp, K, C, X, G, scales, zero_points_int8, dstint8, dstint8_ref);
+    runTests<int32_t, layout_t::KCX>(
+        inp, K, C, X, G, scales, zero_points_int32, dstint32, dstint32_ref);
+  } else {
+    runTests<uint8_t, layout_t::KXC>(
+        inp, K, C, X, G, scales, zero_points_uint8, dstuint8, dstuint8_ref);
+    runTests<int8_t, layout_t::KXC>(
+        inp, K, C, X, G, scales, zero_points_int8, dstint8, dstint8_ref);
+    runTests<int32_t, layout_t::KXC>(
+        inp, K, C, X, G, scales, zero_points_int32, dstint32, dstint32_ref);
+  }
+
+  EXPECT_EQ(dstuint8, dstuint8_ref);
+  EXPECT_EQ(dstint8, dstint8_ref);
+  EXPECT_EQ(dstint32, dstint32_ref);
+}
author	Daya Khudia <dskhudia@fb.com>	2019-06-20 22:13:35 +0300
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>	2019-06-20 22:21:51 +0300
commit	5b64af1469cf629aa7beb934eb898fd1e0b02719 (patch)
tree	dddef8da6e597f1c118a18cfe5ff421e97df0a88
parent	604575ff5de717b2ee712190634840981a9c8fba (diff)