diff options
author | Jianyu Huang <jianyuhuang@fb.com> | 2018-12-21 22:18:44 +0300 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2018-12-21 22:21:05 +0300 |
commit | 4691d5bcb0756b69baf4f54e45d42ba75d133464 (patch) | |
tree | 52ae9b39a1d3ea44bc5ac53bb57d3dc74c85bf38 /src | |
parent | d5810be02d4b7b90a5aec746f98841c9e585f6d4 (diff) |
Update with clang format (#51)
Summary:
Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/51
Use Clang formatting with "clang-format -i *.cc *.h".
Reviewed By: dskhudia
Differential Revision: D13532121
fbshipit-source-id: 6792d008f3295c128942f4896e8221aebbf2566e
Diffstat (limited to 'src')
-rw-r--r-- | src/ExecuteKernelU8S8.cc | 6 | ||||
-rw-r--r-- | src/Fbgemm.cc | 2 | ||||
-rw-r--r-- | src/FbgemmI8DepthwiseAvx2.cc | 46 | ||||
-rw-r--r-- | src/FbgemmI8Spmdm.cc | 6 | ||||
-rw-r--r-- | src/GenerateKernelU8S8S32ACC16.cc | 1 | ||||
-rw-r--r-- | src/PackAWithIm2Col.cc | 2 | ||||
-rw-r--r-- | src/PackAWithQuantRowOffset.cc | 2 | ||||
-rw-r--r-- | src/PackAWithRowOffset.cc | 2 | ||||
-rw-r--r-- | src/QuantUtils.cc | 20 | ||||
-rw-r--r-- | src/Utils.cc | 2 | ||||
-rw-r--r-- | src/UtilsAvx2.cc | 3 | ||||
-rw-r--r-- | src/UtilsAvx512.cc | 2 |
12 files changed, 31 insertions, 63 deletions
diff --git a/src/ExecuteKernelU8S8.cc b/src/ExecuteKernelU8S8.cc index 3b6e059..e7f7c70 100644 --- a/src/ExecuteKernelU8S8.cc +++ b/src/ExecuteKernelU8S8.cc @@ -288,7 +288,7 @@ INSTANTIATE_ACC_T(PackAWithRowOffset); INSTANTIATE_Q_GRANS(ACC_T, RELU, 2); \ INSTANTIATE_Q_GRANS(ACC_T, RELU, 3); -#define INSTANTIATE_RELU(ACC_T) \ +#define INSTANTIATE_RELU(ACC_T) \ INSTANTIATE_SPATIAL_DIM(ACC_T, false); \ INSTANTIATE_SPATIAL_DIM(ACC_T, true); @@ -410,7 +410,7 @@ template class ExecuteKernel< template class ExecuteKernel< \ PACK_A<uint8_t, ACC_T>, \ PackBMatrix<int8_t, ACC_T>, \ - int32_t, \ + int32_t, \ memCopy<>>; #define INSTANTIATE_ACC_T(PACK_A) \ @@ -427,7 +427,7 @@ INSTANTIATE_ACC_T(PackAWithRowOffset); template class ExecuteKernel< \ PackAWithIm2Col<uint8_t, ACC_T, SPATIAL_DIM>, \ PackBMatrix<int8_t, ACC_T>, \ - int32_t, \ + int32_t, \ memCopy<>>; #define INSTANTIATE_SPATIAL_DIM(ACC_T) \ diff --git a/src/Fbgemm.cc b/src/Fbgemm.cc index b7a99c6..45108d0 100644 --- a/src/Fbgemm.cc +++ b/src/Fbgemm.cc @@ -254,7 +254,7 @@ INSTANTIATE_ACC_T(PackAWithRowOffset); INSTANTIATE_Q_GRANS(ACC_T, RELU, 2); \ INSTANTIATE_Q_GRANS(ACC_T, RELU, 3); -#define INSTANTIATE_RELU(ACC_T) \ +#define INSTANTIATE_RELU(ACC_T) \ INSTANTIATE_SPATIAL_DIM(ACC_T, false); \ INSTANTIATE_SPATIAL_DIM(ACC_T, true); diff --git a/src/FbgemmI8DepthwiseAvx2.cc b/src/FbgemmI8DepthwiseAvx2.cc index ace3f7e..9dcd7e1 100644 --- a/src/FbgemmI8DepthwiseAvx2.cc +++ b/src/FbgemmI8DepthwiseAvx2.cc @@ -112,13 +112,12 @@ PackedDepthWiseConvMatrix<KERNEL_PROD>::PackedDepthWiseConvMatrix( reinterpret_cast<const __m256i*>(masks[remainder / 4])); for (int i = 0; i < KERNEL_PROD; ++i) { b_v[i] = _mm256_maskload_epi32( - reinterpret_cast<const int*>(smat_transposed + i * K + k1), - mask_v); + reinterpret_cast<const int*>(smat_transposed + i * K + k1), mask_v); } } else { for (int i = 0; i < KERNEL_PROD; ++i) { - b_v[i] = _mm256_lddqu_si256(reinterpret_cast<const __m256i*>( - smat_transposed + i * K + k1)); + b_v[i] = _mm256_lddqu_si256( + reinterpret_cast<const __m256i*>(smat_transposed + i * K + k1)); } } @@ -457,14 +456,7 @@ static inline __attribute__((always_inline)) void inner_prod_packed_( a_v[k], Bp + k, &c[0], &c[1], &c[2], &c[3], a_sum_temp); } else if (K - k == 2) { madd_epi16x2_packed<SUM_A>( - a_v[k], - a_v[k + 1], - Bp + k, - &c[0], - &c[1], - &c[2], - &c[3], - a_sum_temp); + a_v[k], a_v[k + 1], Bp + k, &c[0], &c[1], &c[2], &c[3], a_sum_temp); } c[0] = _mm256_add_epi32(c[0], c_temp[0]); @@ -860,11 +852,7 @@ static inline __attribute__((always_inline)) void inner_prod_3x3_packed_( __m256i a_sum[4]; inner_prod_3x3_packed_<SUM_A, REMAINDER>( - a_v, - reinterpret_cast<const __m256i*>(Bp), - C, - remainder, - a_sum); + a_v, reinterpret_cast<const __m256i*>(Bp), C, remainder, a_sum); if (SUM_A) { __m256i B_zero_point_v; for (int i = 0; i < (REMAINDER ? (remainder / 8) : 4); ++i) { @@ -974,11 +962,7 @@ static inline __attribute__((always_inline)) void inner_prod_3x3x3_packed_( __m256i a_sum[4]; inner_prod_packed_<8, SUM_A, REMAINDER>( - a_v, - reinterpret_cast<const __m256i*>(Bp), - C, - remainder, - a_sum); + a_v, reinterpret_cast<const __m256i*>(Bp), C, remainder, a_sum); a_v[0] = A_zero_point_v; a_v[1] = A_zero_point_v; @@ -1031,11 +1015,7 @@ static inline __attribute__((always_inline)) void inner_prod_3x3x3_packed_( __m256i a_sum_temp[4]; inner_prod_packed_<8, SUM_A, REMAINDER, true /* acc */>( - a_v, - reinterpret_cast<const __m256i*>(Bp) + 8, - C, - remainder, - a_sum_temp); + a_v, reinterpret_cast<const __m256i*>(Bp) + 8, C, remainder, a_sum_temp); if (SUM_A) { a_sum[0] = _mm256_add_epi32(a_sum[0], a_sum_temp[0]); a_sum[1] = _mm256_add_epi32(a_sum[1], a_sum_temp[1]); @@ -1090,11 +1070,7 @@ static inline __attribute__((always_inline)) void inner_prod_3x3x3_packed_( } inner_prod_packed_<8, SUM_A, REMAINDER, true /* acc */>( - a_v, - reinterpret_cast<const __m256i*>(Bp) + 16, - C, - remainder, - a_sum_temp); + a_v, reinterpret_cast<const __m256i*>(Bp) + 16, C, remainder, a_sum_temp); if (SUM_A) { a_sum[0] = _mm256_add_epi32(a_sum[0], a_sum_temp[0]); a_sum[1] = _mm256_add_epi32(a_sum[1], a_sum_temp[1]); @@ -1121,11 +1097,7 @@ static inline __attribute__((always_inline)) void inner_prod_3x3x3_packed_( } inner_prod_packed_<3, SUM_A, REMAINDER, true /* acc */>( - a_v, - reinterpret_cast<const __m256i*>(Bp) + 24, - C, - remainder, - a_sum_temp); + a_v, reinterpret_cast<const __m256i*>(Bp) + 24, C, remainder, a_sum_temp); if (SUM_A) { a_sum[0] = _mm256_add_epi32(a_sum[0], a_sum_temp[0]); diff --git a/src/FbgemmI8Spmdm.cc b/src/FbgemmI8Spmdm.cc index 8d69c9f..c249871 100644 --- a/src/FbgemmI8Spmdm.cc +++ b/src/FbgemmI8Spmdm.cc @@ -236,8 +236,7 @@ void CompressedSparseColumn::SparseConv( // TODO: if not hyper sparse, transpose a block of A matrix as in SpMDM. if (!accumulation) { for (int i = block.row_start; i < block.row_start + block.row_size; ++i) { - for (int j = block.col_start; j < block.col_start + block.col_size; - ++j) { + for (int j = block.col_start; j < block.col_start + block.col_size; ++j) { C[(i - block.row_start) * ldc + j - block.col_start] = 0; } } @@ -245,8 +244,7 @@ void CompressedSparseColumn::SparseConv( for (int j = block.col_start; j < block.col_start + block.col_size; ++j) { for (int k = colptr_[j]; k < colptr_[j + 1]; ++k) { int v = values_[k]; - for (int i = block.row_start; i < block.row_start + block.row_size; - ++i) { + for (int i = block.row_start; i < block.row_start + block.row_size; ++i) { int ow = i % conv_p.OUT_DIM[1]; int oh = i / conv_p.OUT_DIM[1] % conv_p.OUT_DIM[0]; int n = i / conv_p.OUT_DIM[1] / conv_p.OUT_DIM[0]; diff --git a/src/GenerateKernelU8S8S32ACC16.cc b/src/GenerateKernelU8S8S32ACC16.cc index f845e44..b7bc676 100644 --- a/src/GenerateKernelU8S8S32ACC16.cc +++ b/src/GenerateKernelU8S8S32ACC16.cc @@ -21,7 +21,6 @@ thread_local std::map< typename CodeGenBase<TA, TB, TC, accT>::jit_micro_kernel_fp> CodeGenBase<TA, TB, TC, accT>::codeCache_; - namespace x86 = asmjit::x86; /** diff --git a/src/PackAWithIm2Col.cc b/src/PackAWithIm2Col.cc index 5856942..f096d7a 100644 --- a/src/PackAWithIm2Col.cc +++ b/src/PackAWithIm2Col.cc @@ -11,8 +11,8 @@ #include <iostream> #include <numeric> -#include "fbgemm/Fbgemm.h" #include "OptimizedKernelsAvx2.h" +#include "fbgemm/Fbgemm.h" namespace fbgemm { diff --git a/src/PackAWithQuantRowOffset.cc b/src/PackAWithQuantRowOffset.cc index 98e862b..02e701e 100644 --- a/src/PackAWithQuantRowOffset.cc +++ b/src/PackAWithQuantRowOffset.cc @@ -11,9 +11,9 @@ #include <iomanip> #include <iostream> #include <stdexcept> +#include "OptimizedKernelsAvx2.h" #include "fbgemm/Fbgemm.h" #include "fbgemm/QuantUtilsAvx2.h" -#include "OptimizedKernelsAvx2.h" namespace fbgemm { diff --git a/src/PackAWithRowOffset.cc b/src/PackAWithRowOffset.cc index 30da4c0..4882bb5 100644 --- a/src/PackAWithRowOffset.cc +++ b/src/PackAWithRowOffset.cc @@ -10,8 +10,8 @@ #include <iomanip> #include <iostream> #include <stdexcept> -#include "fbgemm/Fbgemm.h" #include "OptimizedKernelsAvx2.h" +#include "fbgemm/Fbgemm.h" namespace fbgemm { diff --git a/src/QuantUtils.cc b/src/QuantUtils.cc index b346952..1d057e2 100644 --- a/src/QuantUtils.cc +++ b/src/QuantUtils.cc @@ -153,16 +153,16 @@ void ChooseRequantizationMultiplier( //////////////////////////////////////////////////////////////////////////////// // Utility functions -#define FBGEMM_SPECIALIZED_QUANTIZE(T) \ - template <> \ - void Quantize<T>( \ - const float* src, \ - T* dst, \ - const int len, \ - const TensorQuantizationParams& qparams) { \ - for (int i = 0; i < len; ++i) { \ - dst[i] = Quantize<T>(src[i], qparams); \ - } \ +#define FBGEMM_SPECIALIZED_QUANTIZE(T) \ + template <> \ + void Quantize<T>( \ + const float* src, \ + T* dst, \ + const int len, \ + const TensorQuantizationParams& qparams) { \ + for (int i = 0; i < len; ++i) { \ + dst[i] = Quantize<T>(src[i], qparams); \ + } \ } FBGEMM_SPECIALIZED_QUANTIZE(int8_t) FBGEMM_SPECIALIZED_QUANTIZE(uint16_t) diff --git a/src/Utils.cc b/src/Utils.cc index 88d029d..fcd8ade 100644 --- a/src/Utils.cc +++ b/src/Utils.cc @@ -5,7 +5,6 @@ * LICENSE file in the root directory of this source tree. */ #include "fbgemm/Utils.h" -#include "TransposeUtils.h" #include <cpuinfo.h> #include <cassert> #include <cinttypes> @@ -14,6 +13,7 @@ #include <iostream> #include <limits> #include <stdexcept> +#include "TransposeUtils.h" namespace fbgemm { diff --git a/src/UtilsAvx2.cc b/src/UtilsAvx2.cc index badf70b..5659200 100644 --- a/src/UtilsAvx2.cc +++ b/src/UtilsAvx2.cc @@ -4,8 +4,8 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ -#include "TransposeUtils.h" #include <immintrin.h> +#include "TransposeUtils.h" namespace fbgemm { @@ -145,7 +145,6 @@ inline void transpose_kernel_8x8_avx2( _mm256_storeu_ps(&dst[7 * ld_dst], h); } - void transpose_8x8( int M, int N, diff --git a/src/UtilsAvx512.cc b/src/UtilsAvx512.cc index f49bb6f..44d4f9a 100644 --- a/src/UtilsAvx512.cc +++ b/src/UtilsAvx512.cc @@ -5,8 +5,8 @@ * LICENSE file in the root directory of this source tree. */ -#include "TransposeUtils.h" #include <immintrin.h> +#include "TransposeUtils.h" namespace fbgemm { |