From 3af8fe54149d9451d593c635d616d2c380e21acb Mon Sep 17 00:00:00 2001 From: Daya S Khudia Date: Wed, 5 Dec 2018 14:13:59 -0800 Subject: Final cleanup for avx2 isolation and consistent file names (#40) Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/40 File name changes + removal of -mavx2 compiler flag non-avx files This completes the separation of avx2 code to few files that make minimal use of c++ std lib. Reviewed By: jianyuh Differential Revision: D13330577 fbshipit-source-id: b469ebee484168800ce2d12fd2356edecbf0fa4d --- src/FbgemmFP16UKernelsAvx2.h | 46 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 src/FbgemmFP16UKernelsAvx2.h (limited to 'src/FbgemmFP16UKernelsAvx2.h') diff --git a/src/FbgemmFP16UKernelsAvx2.h b/src/FbgemmFP16UKernelsAvx2.h new file mode 100644 index 0000000..4053332 --- /dev/null +++ b/src/FbgemmFP16UKernelsAvx2.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +#ifndef FBGEMM_UKERNELS +#define FBGEMM_UKERNELS +#include +#include "fbgemm/Types.h" + +namespace fbgemm { + +using fp16 = float16; +using fp32 = float; +struct GemmParams { + uint64_t k; + float* A; + const fp16* B; + float* beta; + uint64_t accum; + float* C; + uint64_t ldc; + uint64_t b_block_cols; + uint64_t b_block_size; +}; +void __attribute__((noinline)) gemmkernel_1x1_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_2x1_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_3x1_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_4x1_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_5x1_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_6x1_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_7x1_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_8x1_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_9x1_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_10x1_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_11x1_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_12x1_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_13x1_AVX2_fA0fB0fC0(GemmParams* gp); +void __attribute__((noinline)) gemmkernel_14x1_AVX2_fA0fB0fC0(GemmParams* gp); +typedef void (*funcptr_fp16)(GemmParams* gp); +; + +} // namespace fbgemm + +#endif -- cgit v1.2.3