avx2 specific code in a separate file for QuantUtils (#29)

Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/29 avx2 code separation for QuantUtils Reviewed By: jianyuh Differential Revision: D13269041 fbshipit-source-id: df798cc0d93e0f2081cb832f4341fb2effa68294
author: Daya S Khudia <dskhudia@fb.com> 2018-12-05 23:13:48 +0300
committer: Facebook Github Bot <facebook-github-bot@users.noreply.github.com> 2018-12-05 23:39:45 +0300
commit: bf398ec35647adcc1627fd2ab5999651862b53a3 (patch)
tree: 06653b9d86bfdb0c9374cc4212a889c7bb6bff80 /include/fbgemm/QuantUtilsAvx2.h
parent: 114a4612f374aaea96558581e24e321b19ca8cef (diff)
1 files changed, 65 insertions, 0 deletions
diff --git a/include/fbgemm/QuantUtilsAvx2.h b/include/fbgemm/QuantUtilsAvx2.h
new file mode 100644
index 0000000..1cbdf8b
--- /dev/null
+++ b/include/fbgemm/QuantUtilsAvx2.h
@@ -0,0 +1,65 @@
+#pragma once
+
+#include <cstdint>
+#include "FbgemmBuild.h"
+
+namespace fbgemm {
+
+// Copied from gemmlowp
+//
+// A structure to hold quantization parameters 'scale' and 'zero_point'.
+// The meaning of these values is as the constants in the quantization equation
+//
+//   real_value = scale * (quantized_value - zero_point)
+//
+// In other words, 'zero_point' is the quantized value that corresponds
+// to the real value 0, and 'scale' is the difference of real values
+// corresponding to consecutive quantized values.
+struct FBGEMM_API TensorQuantizationParams {
+  float scale;
+  std::int32_t zero_point;
+  int precision;
+  float Min() const;
+  float Max() const;
+};
+
+// Parameters when we scale from int32 intermediate matrix multiplication
+// results to 8-bit integers
+struct FBGEMM_API RequantizationParams {
+  // For floating-point requantization
+  float real_multiplier;
+
+  // For fixed-point requantization
+  std::int32_t multiplier;
+  int right_shift;
+
+  TensorQuantizationParams target_qparams;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Utility functions
+
+void QuantizeAvx2(
+    const float* src,
+    std::uint8_t* dst,
+    int len,
+    const TensorQuantizationParams& qparams);
+
+/**
+ * @brief Find the min and max value in a float matrix.
+ */
+void FBGEMM_API FindMinMax(const float* m, float* min, float* max, int len);
+
+void RequantizeFixedPointAvx2(
+    const std::int32_t* src,
+    std::uint8_t* dst,
+    int len,
+    const RequantizationParams& params);
+
+void RequantizeAvx2(
+    const std::int32_t* src,
+    std::uint8_t* dst,
+    int len,
+    const RequantizationParams& params);
+
+} // namespace fbgemm
author	Daya S Khudia <dskhudia@fb.com>	2018-12-05 23:13:48 +0300
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>	2018-12-05 23:39:45 +0300
commit	bf398ec35647adcc1627fd2ab5999651862b53a3 (patch)
tree	06653b9d86bfdb0c9374cc4212a889c7bb6bff80 /include/fbgemm/QuantUtilsAvx2.h
parent	114a4612f374aaea96558581e24e321b19ca8cef (diff)