Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaya S Khudia <dskhudia@fb.com>2018-12-05 23:13:48 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2018-12-05 23:39:45 +0300
commitbf398ec35647adcc1627fd2ab5999651862b53a3 (patch)
tree06653b9d86bfdb0c9374cc4212a889c7bb6bff80 /include/fbgemm/QuantUtilsAvx2.h
parent114a4612f374aaea96558581e24e321b19ca8cef (diff)
avx2 specific code in a separate file for QuantUtils (#29)
Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/29 avx2 code separation for QuantUtils Reviewed By: jianyuh Differential Revision: D13269041 fbshipit-source-id: df798cc0d93e0f2081cb832f4341fb2effa68294
Diffstat (limited to 'include/fbgemm/QuantUtilsAvx2.h')
-rw-r--r--include/fbgemm/QuantUtilsAvx2.h65
1 files changed, 65 insertions, 0 deletions
diff --git a/include/fbgemm/QuantUtilsAvx2.h b/include/fbgemm/QuantUtilsAvx2.h
new file mode 100644
index 0000000..1cbdf8b
--- /dev/null
+++ b/include/fbgemm/QuantUtilsAvx2.h
@@ -0,0 +1,65 @@
+#pragma once
+
+#include <cstdint>
+#include "FbgemmBuild.h"
+
+namespace fbgemm {
+
+// Copied from gemmlowp
+//
+// A structure to hold quantization parameters 'scale' and 'zero_point'.
+// The meaning of these values is as the constants in the quantization equation
+//
+// real_value = scale * (quantized_value - zero_point)
+//
+// In other words, 'zero_point' is the quantized value that corresponds
+// to the real value 0, and 'scale' is the difference of real values
+// corresponding to consecutive quantized values.
+struct FBGEMM_API TensorQuantizationParams {
+ float scale;
+ std::int32_t zero_point;
+ int precision;
+ float Min() const;
+ float Max() const;
+};
+
+// Parameters when we scale from int32 intermediate matrix multiplication
+// results to 8-bit integers
+struct FBGEMM_API RequantizationParams {
+ // For floating-point requantization
+ float real_multiplier;
+
+ // For fixed-point requantization
+ std::int32_t multiplier;
+ int right_shift;
+
+ TensorQuantizationParams target_qparams;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Utility functions
+
+void QuantizeAvx2(
+ const float* src,
+ std::uint8_t* dst,
+ int len,
+ const TensorQuantizationParams& qparams);
+
+/**
+ * @brief Find the min and max value in a float matrix.
+ */
+void FBGEMM_API FindMinMax(const float* m, float* min, float* max, int len);
+
+void RequantizeFixedPointAvx2(
+ const std::int32_t* src,
+ std::uint8_t* dst,
+ int len,
+ const RequantizationParams& params);
+
+void RequantizeAvx2(
+ const std::int32_t* src,
+ std::uint8_t* dst,
+ int len,
+ const RequantizationParams& params);
+
+} // namespace fbgemm