Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJianyu Huang <jianyuhuang@fb.com>2019-03-08 05:02:55 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2019-03-08 05:05:39 +0300
commit844dacc267391cd2a725d81c2495636f0765771b (patch)
treecb20d367ce086dfb4374a7794fa8d889ffead09b /src/FbgemmFP16UKernelsAvx2.h
parent66b41357561f2ff9895d2b4638273f07c49dbe29 (diff)
Fixes for FBGEMM FP16 performance (#82)
Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/82 This is a quick fix for matching FBGEMM FP16 performance with SKINNY GEMM FP16. Basically, this Diff switches the register layout in C accumulation buffer inside micro-kernel from MR * 1 to MR * 2. Check the reasons in T40816746. Reviewed By: zhengwy888 Differential Revision: D14278430 fbshipit-source-id: 961dd681deee69e2b7fec6bcdba7920e0b09134a
Diffstat (limited to 'src/FbgemmFP16UKernelsAvx2.h')
-rw-r--r--src/FbgemmFP16UKernelsAvx2.h20
1 files changed, 6 insertions, 14 deletions
diff --git a/src/FbgemmFP16UKernelsAvx2.h b/src/FbgemmFP16UKernelsAvx2.h
index 4053332..6e7dfbc 100644
--- a/src/FbgemmFP16UKernelsAvx2.h
+++ b/src/FbgemmFP16UKernelsAvx2.h
@@ -24,20 +24,12 @@ struct GemmParams {
uint64_t b_block_cols;
uint64_t b_block_size;
};
-void __attribute__((noinline)) gemmkernel_1x1_AVX2_fA0fB0fC0(GemmParams* gp);
-void __attribute__((noinline)) gemmkernel_2x1_AVX2_fA0fB0fC0(GemmParams* gp);
-void __attribute__((noinline)) gemmkernel_3x1_AVX2_fA0fB0fC0(GemmParams* gp);
-void __attribute__((noinline)) gemmkernel_4x1_AVX2_fA0fB0fC0(GemmParams* gp);
-void __attribute__((noinline)) gemmkernel_5x1_AVX2_fA0fB0fC0(GemmParams* gp);
-void __attribute__((noinline)) gemmkernel_6x1_AVX2_fA0fB0fC0(GemmParams* gp);
-void __attribute__((noinline)) gemmkernel_7x1_AVX2_fA0fB0fC0(GemmParams* gp);
-void __attribute__((noinline)) gemmkernel_8x1_AVX2_fA0fB0fC0(GemmParams* gp);
-void __attribute__((noinline)) gemmkernel_9x1_AVX2_fA0fB0fC0(GemmParams* gp);
-void __attribute__((noinline)) gemmkernel_10x1_AVX2_fA0fB0fC0(GemmParams* gp);
-void __attribute__((noinline)) gemmkernel_11x1_AVX2_fA0fB0fC0(GemmParams* gp);
-void __attribute__((noinline)) gemmkernel_12x1_AVX2_fA0fB0fC0(GemmParams* gp);
-void __attribute__((noinline)) gemmkernel_13x1_AVX2_fA0fB0fC0(GemmParams* gp);
-void __attribute__((noinline)) gemmkernel_14x1_AVX2_fA0fB0fC0(GemmParams* gp);
+void __attribute__((noinline)) gemmkernel_1x2_AVX2_fA0fB0fC0(GemmParams* gp);
+void __attribute__((noinline)) gemmkernel_2x2_AVX2_fA0fB0fC0(GemmParams* gp);
+void __attribute__((noinline)) gemmkernel_3x2_AVX2_fA0fB0fC0(GemmParams* gp);
+void __attribute__((noinline)) gemmkernel_4x2_AVX2_fA0fB0fC0(GemmParams* gp);
+void __attribute__((noinline)) gemmkernel_5x2_AVX2_fA0fB0fC0(GemmParams* gp);
+void __attribute__((noinline)) gemmkernel_6x2_AVX2_fA0fB0fC0(GemmParams* gp);
typedef void (*funcptr_fp16)(GemmParams* gp);
;