optimize requantize for float out processing (#85)

Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/85 Optimizing performance of output processing when output is dequantized right away. Reviewed By: protonu Differential Revision: D14433141 fbshipit-source-id: f99a8d82000c43e554461acf036462a4e8f7e300
author: Jongsoo Park <jongsoo@fb.com> 2019-03-13 06:14:32 +0300
committer: Facebook Github Bot <facebook-github-bot@users.noreply.github.com> 2019-03-13 06:17:49 +0300
commit: 6011ce3b0c1fccee549e85b37e475c7a734ad742 (patch)
tree: 7089177b6c7da36c2582da1cf9b42eca9dfb2ea7 /include/fbgemm/QuantUtilsAvx2.h
parent: 50b43162fd1742122d01f2704945c78f13e0d73e (diff)
1 files changed, 14 insertions, 0 deletions
diff --git a/include/fbgemm/QuantUtilsAvx2.h b/include/fbgemm/QuantUtilsAvx2.h
index 04aeba1..47f33a8 100644
--- a/include/fbgemm/QuantUtilsAvx2.h
+++ b/include/fbgemm/QuantUtilsAvx2.h
@@ -95,4 +95,18 @@ FBGEMM_API void requantizeOutputProcessingGConvAvx2(
     int ld_in,
     const requantizationParams_t& r);
 
+template <
+    bool A_SYMMETRIC,
+    bool B_SYMMETRIC,
+    QuantizationGranularity Q_GRAN,
+    bool HAS_BIAS,
+    bool FUSE_RELU>
+FBGEMM_API void requantizeForFloatAvx2(
+    float* out,
+    const std::int32_t* inp,
+    const block_type_t& block,
+    int ld_out,
+    int ld_in,
+    const requantizationForFloatParams_t& r);
+
 } // namespace fbgemm
author	Jongsoo Park <jongsoo@fb.com>	2019-03-13 06:14:32 +0300
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>	2019-03-13 06:17:49 +0300
commit	6011ce3b0c1fccee549e85b37e475c7a734ad742 (patch)
tree	7089177b6c7da36c2582da1cf9b42eca9dfb2ea7 /include/fbgemm/QuantUtilsAvx2.h
parent	50b43162fd1742122d01f2704945c78f13e0d73e (diff)