diff options
Diffstat (limited to 'include/fbgemm/FbgemmI8DepthwiseAvx2.h')
-rw-r--r-- | include/fbgemm/FbgemmI8DepthwiseAvx2.h | 116 |
1 files changed, 112 insertions, 4 deletions
diff --git a/include/fbgemm/FbgemmI8DepthwiseAvx2.h b/include/fbgemm/FbgemmI8DepthwiseAvx2.h index 98c4ed7..19946cf 100644 --- a/include/fbgemm/FbgemmI8DepthwiseAvx2.h +++ b/include/fbgemm/FbgemmI8DepthwiseAvx2.h @@ -50,12 +50,39 @@ using Packed5ConvMatrix = PackedDepthWiseConvMatrix<5>; using Packed10ConvMatrix = PackedDepthWiseConvMatrix<10>; using Packed11ConvMatrix = PackedDepthWiseConvMatrix<11>; +/** To be removed. Keeping it just to make sure we don't change C2 files and + * fbgemm files in a single diff + * + */ +FBGEMM_API void depthwise_3x3_pad_1( + int N, + int H, + int W, + int K, + int stride_h, + int stride_w, + std::int32_t A_zero_point, + const std::uint8_t* A, + std::int32_t B_zero_point, + const Packed3x3ConvMatrix& Bp, + float C_multiplier, + std::int32_t C_zero_point, + std::uint8_t* C, + const std::int32_t* col_offsets, + const std::int32_t* bias, + bool fuse_relu = false, + int thread_id = 0, + int num_threads = 1); + /** - * Depth-wise 3x3 convolution with pad=1 and K a multiple of 8, fused with - * requantization. + * Depth-wise 3x3 convolution with pad=1 and stride=1 and K a multiple of 8 + * This version is fused with requantization. * * @col_offsets nullptr if col_offsets are folded into bias + * @act_times_w_scale Only used if BIAS_TYPE is float, i.e., bias is + * unquantized. */ +template <typename BIAS_TYPE = std::int32_t> FBGEMM_API void depthwise_3x3_pad_1( int N, int H, @@ -71,8 +98,9 @@ FBGEMM_API void depthwise_3x3_pad_1( std::int32_t C_zero_point, std::uint8_t* C, const std::int32_t* col_offsets, - const std::int32_t* bias, + const BIAS_TYPE* bias, bool fuse_relu = false, + float act_times_w_scale = 1.0f, int thread_id = 0, int num_threads = 1); @@ -82,6 +110,31 @@ FBGEMM_API void depthwise_3x3_pad_1( * * @col_offsets nullptr if col_offsets are folded into bias */ +template <typename BIAS_TYPE = std::int32_t> +FBGEMM_API void depthwise_3x3_per_channel_quantization_pad_1( + int N, + int H, + int W, + int K, + int stride_h, + int stride_w, + std::int32_t A_zero_point, + const std::uint8_t* A, + const std::int32_t* B_zero_point, + const Packed3x3ConvMatrix& Bp, + const float* C_multiplier, + std::int32_t C_zero_point, + std::uint8_t* C, + const std::int32_t* col_offsets, + const BIAS_TYPE* bias, + bool fuse_relu = false, + const float* act_times_w_scale = nullptr, + int thread_id = 0, + int num_threads = 1); + +/** To be removed. Keeping it just to make sure we don't change C2 files and + * fbgemm files in a single diff + */ FBGEMM_API void depthwise_3x3_per_channel_quantization_pad_1( int N, int H, @@ -102,9 +155,35 @@ FBGEMM_API void depthwise_3x3_per_channel_quantization_pad_1( int thread_id = 0, int num_threads = 1); +/** To be removed. Keeping it just to make sure we don't change C2 files and + * fbgemm files in a single diff + * + */ +FBGEMM_API void depthwise_3x3x3_pad_1( + int N, + int T, + int H, + int W, + int K, + int stride_t, + int stride_h, + int stride_w, + std::int32_t A_zero_point, + const std::uint8_t* A, + std::int32_t B_zero_point, + const Packed3x3x3ConvMatrix& Bp, + float C_multiplier, + std::int32_t C_zero_point, + std::uint8_t* C, + const std::int32_t* col_offsets, + const std::int32_t* bias, + bool fuse_relu = false, + int thread_id = 0, + int num_threads = 1); /** * @col_offsets nullptr if col_offsets are folded into bias */ +template <typename BIAS_TYPE = std::int32_t> FBGEMM_API void depthwise_3x3x3_pad_1( int N, int T, @@ -122,6 +201,33 @@ FBGEMM_API void depthwise_3x3x3_pad_1( std::int32_t C_zero_point, std::uint8_t* C, const std::int32_t* col_offsets, + const BIAS_TYPE* bias, + bool fuse_relu = false, + float act_times_w_scale = 1.0f, + int thread_id = 0, + int num_threads = 1); + +/** To be removed. Keeping it just to make sure we don't change C2 files and + * fbgemm files in a single diff + * + */ +FBGEMM_API void depthwise_3x3x3_per_channel_quantization_pad_1( + int N, + int T, + int H, + int W, + int K, + int stride_t, + int stride_h, + int stride_w, + std::int32_t A_zero_point, + const std::uint8_t* A, + const std::int32_t* B_zero_point, + const Packed3x3x3ConvMatrix& Bp, + const float* C_multiplier, + std::int32_t C_zero_point, + std::uint8_t* C, + const std::int32_t* col_offsets, const std::int32_t* bias, bool fuse_relu = false, int thread_id = 0, @@ -130,6 +236,7 @@ FBGEMM_API void depthwise_3x3x3_pad_1( /** * @col_offsets nullptr if col_offsets are folded into bias */ +template <typename BIAS_TYPE = std::int32_t> FBGEMM_API void depthwise_3x3x3_per_channel_quantization_pad_1( int N, int T, @@ -147,8 +254,9 @@ FBGEMM_API void depthwise_3x3x3_per_channel_quantization_pad_1( std::int32_t C_zero_point, std::uint8_t* C, const std::int32_t* col_offsets, - const std::int32_t* bias, + const BIAS_TYPE* bias, bool fuse_relu = false, + const float* act_times_w_scale = nullptr, int thread_id = 0, int num_threads = 1); |