Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJongsoo Park <jongsoo@fb.com>2018-11-19 07:17:54 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2018-11-19 07:19:53 +0300
commit6d00b52ee80787065561ac16942e55042ed6c760 (patch)
tree95986eb1a6e4a05e43c7b3aa21c31addedf9453b /src/FbgemmFP16.cc
parent0581331ce7c2b528bc3c726c64d3baf74d12c959 (diff)
clang-format (#11)
Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/11 clang format of fbgemm Reviewed By: dskhudia Differential Revision: D13115202 fbshipit-source-id: 6dab29cb8b5f4fabcc165019663351567a2a2952
Diffstat (limited to 'src/FbgemmFP16.cc')
-rw-r--r--src/FbgemmFP16.cc59
1 files changed, 34 insertions, 25 deletions
diff --git a/src/FbgemmFP16.cc b/src/FbgemmFP16.cc
index f54feb8..d819cdd 100644
--- a/src/FbgemmFP16.cc
+++ b/src/FbgemmFP16.cc
@@ -27,26 +27,34 @@ inline void PackA(int nrow, int ncol, const float* from, int ldim, float* to) {
// to[r + c * nrow] = from[r * ldim + c];
// }
// }
- transpose_simd( nrow, ncol, from, ldim, to, nrow );
+ transpose_simd(nrow, ncol, from, ldim, to, nrow);
}
struct KernelInfo {
using knl_ptr = funcptr_fp16;
// optimized kernels to cover all cases
- static constexpr array<knl_ptr, 15> kernel = {{
- nullptr, gemmkernel_1x1_AVX2_fA0fB0fC0,
- gemmkernel_2x1_AVX2_fA0fB0fC0, gemmkernel_3x1_AVX2_fA0fB0fC0,
- gemmkernel_4x1_AVX2_fA0fB0fC0, gemmkernel_5x1_AVX2_fA0fB0fC0,
- gemmkernel_6x1_AVX2_fA0fB0fC0, gemmkernel_7x1_AVX2_fA0fB0fC0,
- gemmkernel_8x1_AVX2_fA0fB0fC0, gemmkernel_9x1_AVX2_fA0fB0fC0,
- gemmkernel_10x1_AVX2_fA0fB0fC0, gemmkernel_11x1_AVX2_fA0fB0fC0,
- gemmkernel_12x1_AVX2_fA0fB0fC0, gemmkernel_13x1_AVX2_fA0fB0fC0,
- gemmkernel_14x1_AVX2_fA0fB0fC0
- }};
+ static constexpr array<knl_ptr, 15> kernel = {
+ {nullptr,
+ gemmkernel_1x1_AVX2_fA0fB0fC0,
+ gemmkernel_2x1_AVX2_fA0fB0fC0,
+ gemmkernel_3x1_AVX2_fA0fB0fC0,
+ gemmkernel_4x1_AVX2_fA0fB0fC0,
+ gemmkernel_5x1_AVX2_fA0fB0fC0,
+ gemmkernel_6x1_AVX2_fA0fB0fC0,
+ gemmkernel_7x1_AVX2_fA0fB0fC0,
+ gemmkernel_8x1_AVX2_fA0fB0fC0,
+ gemmkernel_9x1_AVX2_fA0fB0fC0,
+ gemmkernel_10x1_AVX2_fA0fB0fC0,
+ gemmkernel_11x1_AVX2_fA0fB0fC0,
+ gemmkernel_12x1_AVX2_fA0fB0fC0,
+ gemmkernel_13x1_AVX2_fA0fB0fC0,
+ gemmkernel_14x1_AVX2_fA0fB0fC0}};
// autotuned kernel splits for various cases m = 1:mb_max
// may need re-autotuning for new uarch
- static constexpr array<array<array<int, 2>, 2>, 121 > partition = {
+ static constexpr array<array<array<int, 2>, 2>, 121> partition = {
+ // NOTE: clang-format wants to use a different formatting but the current
+ // formatting should be easier to read.
{
{{ { 0, 0 }, { 0, 0 } } },
{{ { 1, 1 }, { 0, 0 } } },
@@ -173,13 +181,16 @@ struct KernelInfo {
};
};
constexpr array<KernelInfo::knl_ptr, 15> KernelInfo::kernel;
-constexpr array<array<array<int, 2>, 2>, 121 > KernelInfo::partition;
+constexpr array<array<array<int, 2>, 2>, 121> KernelInfo::partition;
// autotuned kernel splits for various cases m = 1:mb_max
-void
-cblas_gemm_compute(const matrix_op_t transa, const int m, const float *A,
- const PackedGemmMatrixFP16 &Bp, const float beta,
- float *C) {
+void cblas_gemm_compute(
+ const matrix_op_t transa,
+ const int m,
+ const float* A,
+ const PackedGemmMatrixFP16& Bp,
+ const float beta,
+ float* C) {
// ground truth
assert(cpuinfo_initialize());
assert(cpuinfo_has_x86_fma3());
@@ -194,7 +205,7 @@ cblas_gemm_compute(const matrix_op_t transa, const int m, const float *A,
constexpr int kernel_ncols = kernel_ncol_blocks * simd_width;
// private scratchpad storage
- static thread_local unique_ptr<std::array<float, 256 * 1024> > scratchpad(
+ static thread_local unique_ptr<std::array<float, 256 * 1024>> scratchpad(
new std::array<float, 256 * 1024>());
GemmParams gp;
@@ -202,7 +213,6 @@ cblas_gemm_compute(const matrix_op_t transa, const int m, const float *A,
int mb = std::min(mb_max, m - m0);
assert(mb < KernelInfo::partition.size());
for (auto k_ind = 0; k_ind < k; k_ind += Bp.blockRowSize()) {
-
// set up proper accumulation to avoid "Nan" problem
float beta_;
uint64_t accum;
@@ -221,7 +231,6 @@ cblas_gemm_compute(const matrix_op_t transa, const int m, const float *A,
auto m1 = 0;
for (auto c = 0; c < 2; c++) {
-
auto kernel_nrows = KernelInfo::partition[mb][c][0];
auto nkernel_nrows = KernelInfo::partition[mb][c][1];
@@ -261,7 +270,7 @@ cblas_gemm_compute(const matrix_op_t transa, const int m, const float *A,
KernelInfo::kernel[kernel_nrows](&gp);
} else {
// small temporary buffer
- float c_tmp[16 * 24] = { 0 };
+ float c_tmp[16 * 24] = {0};
assert((16 * 24) > kernel_nrows * kernel_ncols);
gp.B = &(Bp(k_ind, last_blk_col));
@@ -272,13 +281,14 @@ cblas_gemm_compute(const matrix_op_t transa, const int m, const float *A,
for (int i = 0; i < kernel_nrows; i++) {
// Todo: use assembly
for (int j = last_blk_col; j < n; j++) {
- assert(i * 8 + (j - last_blk_col) <
- sizeof(c_tmp) / sizeof(c_tmp[0]));
+ assert(
+ i * 8 + (j - last_blk_col) <
+ sizeof(c_tmp) / sizeof(c_tmp[0]));
if (accum == 0) {
C[(m2 + i) * ldc + j] = c_tmp[i * 8 + (j - last_blk_col)];
} else {
C[(m2 + i) * ldc + j] = beta_ * C[(m2 + i) * ldc + j] +
- c_tmp[i * 8 + (j - last_blk_col)];
+ c_tmp[i * 8 + (j - last_blk_col)];
}
}
}
@@ -291,5 +301,4 @@ cblas_gemm_compute(const matrix_op_t transa, const int m, const float *A,
}
}
-
} // namespace fbgemm