Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'include/fbgemm/FbgemmI8DepthwiseAvx2.h')
-rw-r--r--include/fbgemm/FbgemmI8DepthwiseAvx2.h116
1 files changed, 112 insertions, 4 deletions
diff --git a/include/fbgemm/FbgemmI8DepthwiseAvx2.h b/include/fbgemm/FbgemmI8DepthwiseAvx2.h
index 98c4ed7..19946cf 100644
--- a/include/fbgemm/FbgemmI8DepthwiseAvx2.h
+++ b/include/fbgemm/FbgemmI8DepthwiseAvx2.h
@@ -50,12 +50,39 @@ using Packed5ConvMatrix = PackedDepthWiseConvMatrix<5>;
using Packed10ConvMatrix = PackedDepthWiseConvMatrix<10>;
using Packed11ConvMatrix = PackedDepthWiseConvMatrix<11>;
+/** To be removed. Keeping it just to make sure we don't change C2 files and
+ * fbgemm files in a single diff
+ *
+ */
+FBGEMM_API void depthwise_3x3_pad_1(
+ int N,
+ int H,
+ int W,
+ int K,
+ int stride_h,
+ int stride_w,
+ std::int32_t A_zero_point,
+ const std::uint8_t* A,
+ std::int32_t B_zero_point,
+ const Packed3x3ConvMatrix& Bp,
+ float C_multiplier,
+ std::int32_t C_zero_point,
+ std::uint8_t* C,
+ const std::int32_t* col_offsets,
+ const std::int32_t* bias,
+ bool fuse_relu = false,
+ int thread_id = 0,
+ int num_threads = 1);
+
/**
- * Depth-wise 3x3 convolution with pad=1 and K a multiple of 8, fused with
- * requantization.
+ * Depth-wise 3x3 convolution with pad=1 and stride=1 and K a multiple of 8
+ * This version is fused with requantization.
*
* @col_offsets nullptr if col_offsets are folded into bias
+ * @act_times_w_scale Only used if BIAS_TYPE is float, i.e., bias is
+ * unquantized.
*/
+template <typename BIAS_TYPE = std::int32_t>
FBGEMM_API void depthwise_3x3_pad_1(
int N,
int H,
@@ -71,8 +98,9 @@ FBGEMM_API void depthwise_3x3_pad_1(
std::int32_t C_zero_point,
std::uint8_t* C,
const std::int32_t* col_offsets,
- const std::int32_t* bias,
+ const BIAS_TYPE* bias,
bool fuse_relu = false,
+ float act_times_w_scale = 1.0f,
int thread_id = 0,
int num_threads = 1);
@@ -82,6 +110,31 @@ FBGEMM_API void depthwise_3x3_pad_1(
*
* @col_offsets nullptr if col_offsets are folded into bias
*/
+template <typename BIAS_TYPE = std::int32_t>
+FBGEMM_API void depthwise_3x3_per_channel_quantization_pad_1(
+ int N,
+ int H,
+ int W,
+ int K,
+ int stride_h,
+ int stride_w,
+ std::int32_t A_zero_point,
+ const std::uint8_t* A,
+ const std::int32_t* B_zero_point,
+ const Packed3x3ConvMatrix& Bp,
+ const float* C_multiplier,
+ std::int32_t C_zero_point,
+ std::uint8_t* C,
+ const std::int32_t* col_offsets,
+ const BIAS_TYPE* bias,
+ bool fuse_relu = false,
+ const float* act_times_w_scale = nullptr,
+ int thread_id = 0,
+ int num_threads = 1);
+
+/** To be removed. Keeping it just to make sure we don't change C2 files and
+ * fbgemm files in a single diff
+ */
FBGEMM_API void depthwise_3x3_per_channel_quantization_pad_1(
int N,
int H,
@@ -102,9 +155,35 @@ FBGEMM_API void depthwise_3x3_per_channel_quantization_pad_1(
int thread_id = 0,
int num_threads = 1);
+/** To be removed. Keeping it just to make sure we don't change C2 files and
+ * fbgemm files in a single diff
+ *
+ */
+FBGEMM_API void depthwise_3x3x3_pad_1(
+ int N,
+ int T,
+ int H,
+ int W,
+ int K,
+ int stride_t,
+ int stride_h,
+ int stride_w,
+ std::int32_t A_zero_point,
+ const std::uint8_t* A,
+ std::int32_t B_zero_point,
+ const Packed3x3x3ConvMatrix& Bp,
+ float C_multiplier,
+ std::int32_t C_zero_point,
+ std::uint8_t* C,
+ const std::int32_t* col_offsets,
+ const std::int32_t* bias,
+ bool fuse_relu = false,
+ int thread_id = 0,
+ int num_threads = 1);
/**
* @col_offsets nullptr if col_offsets are folded into bias
*/
+template <typename BIAS_TYPE = std::int32_t>
FBGEMM_API void depthwise_3x3x3_pad_1(
int N,
int T,
@@ -122,6 +201,33 @@ FBGEMM_API void depthwise_3x3x3_pad_1(
std::int32_t C_zero_point,
std::uint8_t* C,
const std::int32_t* col_offsets,
+ const BIAS_TYPE* bias,
+ bool fuse_relu = false,
+ float act_times_w_scale = 1.0f,
+ int thread_id = 0,
+ int num_threads = 1);
+
+/** To be removed. Keeping it just to make sure we don't change C2 files and
+ * fbgemm files in a single diff
+ *
+ */
+FBGEMM_API void depthwise_3x3x3_per_channel_quantization_pad_1(
+ int N,
+ int T,
+ int H,
+ int W,
+ int K,
+ int stride_t,
+ int stride_h,
+ int stride_w,
+ std::int32_t A_zero_point,
+ const std::uint8_t* A,
+ const std::int32_t* B_zero_point,
+ const Packed3x3x3ConvMatrix& Bp,
+ const float* C_multiplier,
+ std::int32_t C_zero_point,
+ std::uint8_t* C,
+ const std::int32_t* col_offsets,
const std::int32_t* bias,
bool fuse_relu = false,
int thread_id = 0,
@@ -130,6 +236,7 @@ FBGEMM_API void depthwise_3x3x3_pad_1(
/**
* @col_offsets nullptr if col_offsets are folded into bias
*/
+template <typename BIAS_TYPE = std::int32_t>
FBGEMM_API void depthwise_3x3x3_per_channel_quantization_pad_1(
int N,
int T,
@@ -147,8 +254,9 @@ FBGEMM_API void depthwise_3x3x3_per_channel_quantization_pad_1(
std::int32_t C_zero_point,
std::uint8_t* C,
const std::int32_t* col_offsets,
- const std::int32_t* bias,
+ const BIAS_TYPE* bias,
bool fuse_relu = false,
+ const float* act_times_w_scale = nullptr,
int thread_id = 0,
int num_threads = 1);