From 844dacc267391cd2a725d81c2495636f0765771b Mon Sep 17 00:00:00 2001 From: Jianyu Huang Date: Thu, 7 Mar 2019 18:02:55 -0800 Subject: Fixes for FBGEMM FP16 performance (#82) Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/82 This is a quick fix for matching FBGEMM FP16 performance with SKINNY GEMM FP16. Basically, this Diff switches the register layout in C accumulation buffer inside micro-kernel from MR * 1 to MR * 2. Check the reasons in T40816746. Reviewed By: zhengwy888 Differential Revision: D14278430 fbshipit-source-id: 961dd681deee69e2b7fec6bcdba7920e0b09134a --- src/FbgemmFP16UKernelsAvx2.h | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'src/FbgemmFP16UKernelsAvx2.h') diff --git a/src/FbgemmFP16UKernelsAvx2.h b/src/FbgemmFP16UKernelsAvx2.h index 4053332..6e7dfbc 100644 --- a/src/FbgemmFP16UKernelsAvx2.h +++ b/src/FbgemmFP16UKernelsAvx2.h @@ -24,20 +24,12 @@ struct GemmParams { uint64_t b_block_cols; uint64_t b_block_size; }; -void __attribute__((noinline)) gemmkernel_1x1_AVX2_fA0fB0fC0(GemmParams* gp); -void __attribute__((noinline)) gemmkernel_2x1_AVX2_fA0fB0fC0(GemmParams* gp); -void __attribute__((noinline)) gemmkernel_3x1_AVX2_fA0fB0fC0(GemmParams* gp); -void __attribute__((noinline)) gemmkernel_4x1_AVX2_fA0fB0fC0(GemmParams* gp); -void __attribute__((noinline)) gemmkernel_5x1_AVX2_fA0fB0fC0(GemmParams* gp); -void __attribute__((noinline)) gemmkernel_6x1_AVX2_fA0fB0fC0(GemmParams* gp); -void __attribute__((noinline)) gemmkernel_7x1_AVX2_fA0fB0fC0(GemmParams* gp); -void __attribute__((noinline)) gemmkernel_8x1_AVX2_fA0fB0fC0(GemmParams* gp); -void __attribute__((noinline)) gemmkernel_9x1_AVX2_fA0fB0fC0(GemmParams* gp); -void __attribute__((noinline)) gemmkernel_10x1_AVX2_fA0fB0fC0(GemmParams* gp); -void __attribute__((noinline)) gemmkernel_11x1_AVX2_fA0fB0fC0(GemmParams* gp); -void __attribute__((noinline)) gemmkernel_12x1_AVX2_fA0fB0fC0(GemmParams* gp); -void __attribute__((noinline)) gemmkernel_13x1_AVX2_fA0fB0fC0(GemmParams* gp); -void __attribute__((noinline)) gemmkernel_14x1_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_1x2_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_2x2_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_3x2_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_4x2_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_5x2_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_6x2_AVX2_fA0fB0fC0(GemmParams* gp); typedef void (*funcptr_fp16)(GemmParams* gp); ; -- cgit v1.2.3