diff options
author | Jianyu Huang <jianyuhuang@fb.com> | 2018-12-21 22:18:44 +0300 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2018-12-21 22:21:05 +0300 |
commit | 4691d5bcb0756b69baf4f54e45d42ba75d133464 (patch) | |
tree | 52ae9b39a1d3ea44bc5ac53bb57d3dc74c85bf38 | |
parent | d5810be02d4b7b90a5aec746f98841c9e585f6d4 (diff) |
Update with clang format (#51)
Summary:
Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/51
Use Clang formatting with "clang-format -i *.cc *.h".
Reviewed By: dskhudia
Differential Revision: D13532121
fbshipit-source-id: 6792d008f3295c128942f4896e8221aebbf2566e
-rw-r--r-- | bench/PackedFloatInOutBenchmark.cc | 8 | ||||
-rw-r--r-- | bench/PackedRequantizeAcc16Benchmark.cc | 8 | ||||
-rw-r--r-- | bench/PackedRequantizeAcc32Benchmark.cc | 8 | ||||
-rw-r--r-- | include/fbgemm/Fbgemm.h | 4 | ||||
-rw-r--r-- | include/fbgemm/FbgemmI8Spmdm.h | 2 | ||||
-rw-r--r-- | include/fbgemm/OutputProcessing-inl.h | 2 | ||||
-rw-r--r-- | include/fbgemm/QuantUtils.h | 3 | ||||
-rw-r--r-- | include/fbgemm/Utils.h | 2 | ||||
-rw-r--r-- | src/ExecuteKernelU8S8.cc | 6 | ||||
-rw-r--r-- | src/Fbgemm.cc | 2 | ||||
-rw-r--r-- | src/FbgemmI8DepthwiseAvx2.cc | 46 | ||||
-rw-r--r-- | src/FbgemmI8Spmdm.cc | 6 | ||||
-rw-r--r-- | src/GenerateKernelU8S8S32ACC16.cc | 1 | ||||
-rw-r--r-- | src/PackAWithIm2Col.cc | 2 | ||||
-rw-r--r-- | src/PackAWithQuantRowOffset.cc | 2 | ||||
-rw-r--r-- | src/PackAWithRowOffset.cc | 2 | ||||
-rw-r--r-- | src/QuantUtils.cc | 20 | ||||
-rw-r--r-- | src/Utils.cc | 2 | ||||
-rw-r--r-- | src/UtilsAvx2.cc | 3 | ||||
-rw-r--r-- | src/UtilsAvx512.cc | 2 | ||||
-rw-r--r-- | test/Im2ColFusedRequantizeTest.cc | 2 | ||||
-rw-r--r-- | test/PackedRequantizeTest.cc | 3 |
22 files changed, 42 insertions, 94 deletions
diff --git a/bench/PackedFloatInOutBenchmark.cc b/bench/PackedFloatInOutBenchmark.cc index 79a750e..f2589f7 100644 --- a/bench/PackedFloatInOutBenchmark.cc +++ b/bench/PackedFloatInOutBenchmark.cc @@ -187,13 +187,7 @@ void performance_test() { row_offset_buf.data()); PackBMatrix<int8_t> packedBN( - matrix_op_t::NoTranspose, - k, - n, - Bint8.data(), - n, - nullptr, - 1); + matrix_op_t::NoTranspose, k, n, Bint8.data(), n, nullptr, 1); DoNothing<float, float> doNothingObj{}; ReQuantizeForFloat<false> outputProcObj( diff --git a/bench/PackedRequantizeAcc16Benchmark.cc b/bench/PackedRequantizeAcc16Benchmark.cc index f60332f..89cca4f 100644 --- a/bench/PackedRequantizeAcc16Benchmark.cc +++ b/bench/PackedRequantizeAcc16Benchmark.cc @@ -296,13 +296,7 @@ void performance_test() { PackAWithRowOffset<uint8_t, int16_t>::rowOffsetBufferSize()); PackAMatrix<uint8_t, int16_t> packA( - matrix_op_t::NoTranspose, - m, - k, - Aint8.data(), - k, - nullptr, - 1); + matrix_op_t::NoTranspose, m, k, Aint8.data(), k, nullptr, 1); PackAWithRowOffset<uint8_t, int16_t> packAWithRowOffset( matrix_op_t::NoTranspose, m, diff --git a/bench/PackedRequantizeAcc32Benchmark.cc b/bench/PackedRequantizeAcc32Benchmark.cc index b255b8c..5096475 100644 --- a/bench/PackedRequantizeAcc32Benchmark.cc +++ b/bench/PackedRequantizeAcc32Benchmark.cc @@ -209,13 +209,7 @@ void performance_test() { // offsets before"); PackBMatrix<int8_t> packedBN( - matrix_op_t::NoTranspose, - k, - n, - Bint8.data(), - n, - nullptr, - 1); + matrix_op_t::NoTranspose, k, n, Bint8.data(), n, nullptr, 1); ttot = 0.0; runType = "FBGEMM_i8_acc32"; diff --git a/include/fbgemm/Fbgemm.h b/include/fbgemm/Fbgemm.h index 3cf832c..b729d56 100644 --- a/include/fbgemm/Fbgemm.h +++ b/include/fbgemm/Fbgemm.h @@ -15,11 +15,11 @@ #include <memory> #include <type_traits> #include "ConvUtils.h" +#include "FbgemmBuild.h" #include "FbgemmI8Spmdm.h" +#include "QuantUtilsAvx2.h" #include "Types.h" #include "Utils.h" -#include "FbgemmBuild.h" -#include "QuantUtilsAvx2.h" // Turning on this option will print out time breakdown of each stage (e.g., // input packing, the main GEMM kernel, each output processing pipeline). diff --git a/include/fbgemm/FbgemmI8Spmdm.h b/include/fbgemm/FbgemmI8Spmdm.h index 01416d5..93af6ea 100644 --- a/include/fbgemm/FbgemmI8Spmdm.h +++ b/include/fbgemm/FbgemmI8Spmdm.h @@ -9,8 +9,8 @@ #include <cstdint> #include <vector> #include "ConvUtils.h" -#include "Utils.h" #include "FbgemmBuild.h" +#include "Utils.h" // #define FBGEMM_MEASURE_TIME_BREAKDOWN diff --git a/include/fbgemm/OutputProcessing-inl.h b/include/fbgemm/OutputProcessing-inl.h index 2ff64f3..c250942 100644 --- a/include/fbgemm/OutputProcessing-inl.h +++ b/include/fbgemm/OutputProcessing-inl.h @@ -111,7 +111,7 @@ inline int ReQuantizeOutput<FUSE_RELU, Q_GRAN, outT, inT, nextOPType>::f( } else if (instSet == inst_set_t::avx2 || instSet == inst_set_t::avx512) { if (std::is_same<outT, uint8_t>::value) { bool b_symmetric = (Q_GRAN == QuantizationGranularity::TENSOR && - Bq_zero_point_[0] == 0) || + Bq_zero_point_[0] == 0) || q_row_offsets_ == nullptr; requantizationParams_t r = {Aq_zero_point_, diff --git a/include/fbgemm/QuantUtils.h b/include/fbgemm/QuantUtils.h index f658f73..43855d8 100644 --- a/include/fbgemm/QuantUtils.h +++ b/include/fbgemm/QuantUtils.h @@ -68,8 +68,7 @@ FBGEMM_API T Quantize( template <typename T> FBGEMM_API T Quantize(float src, const TensorQuantizationParams& qparams) { - return Quantize<T>( - src, qparams.zero_point, qparams.scale, qparams.precision); + return Quantize<T>(src, qparams.zero_point, qparams.scale, qparams.precision); } template <typename T> diff --git a/include/fbgemm/Utils.h b/include/fbgemm/Utils.h index a7e91cd..775e18c 100644 --- a/include/fbgemm/Utils.h +++ b/include/fbgemm/Utils.h @@ -7,8 +7,8 @@ #pragma once #include <string> #include <type_traits> -#include "UtilsAvx2.h" #include "FbgemmBuild.h" +#include "UtilsAvx2.h" namespace fbgemm { diff --git a/src/ExecuteKernelU8S8.cc b/src/ExecuteKernelU8S8.cc index 3b6e059..e7f7c70 100644 --- a/src/ExecuteKernelU8S8.cc +++ b/src/ExecuteKernelU8S8.cc @@ -288,7 +288,7 @@ INSTANTIATE_ACC_T(PackAWithRowOffset); INSTANTIATE_Q_GRANS(ACC_T, RELU, 2); \ INSTANTIATE_Q_GRANS(ACC_T, RELU, 3); -#define INSTANTIATE_RELU(ACC_T) \ +#define INSTANTIATE_RELU(ACC_T) \ INSTANTIATE_SPATIAL_DIM(ACC_T, false); \ INSTANTIATE_SPATIAL_DIM(ACC_T, true); @@ -410,7 +410,7 @@ template class ExecuteKernel< template class ExecuteKernel< \ PACK_A<uint8_t, ACC_T>, \ PackBMatrix<int8_t, ACC_T>, \ - int32_t, \ + int32_t, \ memCopy<>>; #define INSTANTIATE_ACC_T(PACK_A) \ @@ -427,7 +427,7 @@ INSTANTIATE_ACC_T(PackAWithRowOffset); template class ExecuteKernel< \ PackAWithIm2Col<uint8_t, ACC_T, SPATIAL_DIM>, \ PackBMatrix<int8_t, ACC_T>, \ - int32_t, \ + int32_t, \ memCopy<>>; #define INSTANTIATE_SPATIAL_DIM(ACC_T) \ diff --git a/src/Fbgemm.cc b/src/Fbgemm.cc index b7a99c6..45108d0 100644 --- a/src/Fbgemm.cc +++ b/src/Fbgemm.cc @@ -254,7 +254,7 @@ INSTANTIATE_ACC_T(PackAWithRowOffset); INSTANTIATE_Q_GRANS(ACC_T, RELU, 2); \ INSTANTIATE_Q_GRANS(ACC_T, RELU, 3); -#define INSTANTIATE_RELU(ACC_T) \ +#define INSTANTIATE_RELU(ACC_T) \ INSTANTIATE_SPATIAL_DIM(ACC_T, false); \ INSTANTIATE_SPATIAL_DIM(ACC_T, true); diff --git a/src/FbgemmI8DepthwiseAvx2.cc b/src/FbgemmI8DepthwiseAvx2.cc index ace3f7e..9dcd7e1 100644 --- a/src/FbgemmI8DepthwiseAvx2.cc +++ b/src/FbgemmI8DepthwiseAvx2.cc @@ -112,13 +112,12 @@ PackedDepthWiseConvMatrix<KERNEL_PROD>::PackedDepthWiseConvMatrix( reinterpret_cast<const __m256i*>(masks[remainder / 4])); for (int i = 0; i < KERNEL_PROD; ++i) { b_v[i] = _mm256_maskload_epi32( - reinterpret_cast<const int*>(smat_transposed + i * K + k1), - mask_v); + reinterpret_cast<const int*>(smat_transposed + i * K + k1), mask_v); } } else { for (int i = 0; i < KERNEL_PROD; ++i) { - b_v[i] = _mm256_lddqu_si256(reinterpret_cast<const __m256i*>( - smat_transposed + i * K + k1)); + b_v[i] = _mm256_lddqu_si256( + reinterpret_cast<const __m256i*>(smat_transposed + i * K + k1)); } } @@ -457,14 +456,7 @@ static inline __attribute__((always_inline)) void inner_prod_packed_( a_v[k], Bp + k, &c[0], &c[1], &c[2], &c[3], a_sum_temp); } else if (K - k == 2) { madd_epi16x2_packed<SUM_A>( - a_v[k], - a_v[k + 1], - Bp + k, - &c[0], - &c[1], - &c[2], - &c[3], - a_sum_temp); + a_v[k], a_v[k + 1], Bp + k, &c[0], &c[1], &c[2], &c[3], a_sum_temp); } c[0] = _mm256_add_epi32(c[0], c_temp[0]); @@ -860,11 +852,7 @@ static inline __attribute__((always_inline)) void inner_prod_3x3_packed_( __m256i a_sum[4]; inner_prod_3x3_packed_<SUM_A, REMAINDER>( - a_v, - reinterpret_cast<const __m256i*>(Bp), - C, - remainder, - a_sum); + a_v, reinterpret_cast<const __m256i*>(Bp), C, remainder, a_sum); if (SUM_A) { __m256i B_zero_point_v; for (int i = 0; i < (REMAINDER ? (remainder / 8) : 4); ++i) { @@ -974,11 +962,7 @@ static inline __attribute__((always_inline)) void inner_prod_3x3x3_packed_( __m256i a_sum[4]; inner_prod_packed_<8, SUM_A, REMAINDER>( - a_v, - reinterpret_cast<const __m256i*>(Bp), - C, - remainder, - a_sum); + a_v, reinterpret_cast<const __m256i*>(Bp), C, remainder, a_sum); a_v[0] = A_zero_point_v; a_v[1] = A_zero_point_v; @@ -1031,11 +1015,7 @@ static inline __attribute__((always_inline)) void inner_prod_3x3x3_packed_( __m256i a_sum_temp[4]; inner_prod_packed_<8, SUM_A, REMAINDER, true /* acc */>( - a_v, - reinterpret_cast<const __m256i*>(Bp) + 8, - C, - remainder, - a_sum_temp); + a_v, reinterpret_cast<const __m256i*>(Bp) + 8, C, remainder, a_sum_temp); if (SUM_A) { a_sum[0] = _mm256_add_epi32(a_sum[0], a_sum_temp[0]); a_sum[1] = _mm256_add_epi32(a_sum[1], a_sum_temp[1]); @@ -1090,11 +1070,7 @@ static inline __attribute__((always_inline)) void inner_prod_3x3x3_packed_( } inner_prod_packed_<8, SUM_A, REMAINDER, true /* acc */>( - a_v, - reinterpret_cast<const __m256i*>(Bp) + 16, - C, - remainder, - a_sum_temp); + a_v, reinterpret_cast<const __m256i*>(Bp) + 16, C, remainder, a_sum_temp); if (SUM_A) { a_sum[0] = _mm256_add_epi32(a_sum[0], a_sum_temp[0]); a_sum[1] = _mm256_add_epi32(a_sum[1], a_sum_temp[1]); @@ -1121,11 +1097,7 @@ static inline __attribute__((always_inline)) void inner_prod_3x3x3_packed_( } inner_prod_packed_<3, SUM_A, REMAINDER, true /* acc */>( - a_v, - reinterpret_cast<const __m256i*>(Bp) + 24, - C, - remainder, - a_sum_temp); + a_v, reinterpret_cast<const __m256i*>(Bp) + 24, C, remainder, a_sum_temp); if (SUM_A) { a_sum[0] = _mm256_add_epi32(a_sum[0], a_sum_temp[0]); diff --git a/src/FbgemmI8Spmdm.cc b/src/FbgemmI8Spmdm.cc index 8d69c9f..c249871 100644 --- a/src/FbgemmI8Spmdm.cc +++ b/src/FbgemmI8Spmdm.cc @@ -236,8 +236,7 @@ void CompressedSparseColumn::SparseConv( // TODO: if not hyper sparse, transpose a block of A matrix as in SpMDM. if (!accumulation) { for (int i = block.row_start; i < block.row_start + block.row_size; ++i) { - for (int j = block.col_start; j < block.col_start + block.col_size; - ++j) { + for (int j = block.col_start; j < block.col_start + block.col_size; ++j) { C[(i - block.row_start) * ldc + j - block.col_start] = 0; } } @@ -245,8 +244,7 @@ void CompressedSparseColumn::SparseConv( for (int j = block.col_start; j < block.col_start + block.col_size; ++j) { for (int k = colptr_[j]; k < colptr_[j + 1]; ++k) { int v = values_[k]; - for (int i = block.row_start; i < block.row_start + block.row_size; - ++i) { + for (int i = block.row_start; i < block.row_start + block.row_size; ++i) { int ow = i % conv_p.OUT_DIM[1]; int oh = i / conv_p.OUT_DIM[1] % conv_p.OUT_DIM[0]; int n = i / conv_p.OUT_DIM[1] / conv_p.OUT_DIM[0]; diff --git a/src/GenerateKernelU8S8S32ACC16.cc b/src/GenerateKernelU8S8S32ACC16.cc index f845e44..b7bc676 100644 --- a/src/GenerateKernelU8S8S32ACC16.cc +++ b/src/GenerateKernelU8S8S32ACC16.cc @@ -21,7 +21,6 @@ thread_local std::map< typename CodeGenBase<TA, TB, TC, accT>::jit_micro_kernel_fp> CodeGenBase<TA, TB, TC, accT>::codeCache_; - namespace x86 = asmjit::x86; /** diff --git a/src/PackAWithIm2Col.cc b/src/PackAWithIm2Col.cc index 5856942..f096d7a 100644 --- a/src/PackAWithIm2Col.cc +++ b/src/PackAWithIm2Col.cc @@ -11,8 +11,8 @@ #include <iostream> #include <numeric> -#include "fbgemm/Fbgemm.h" #include "OptimizedKernelsAvx2.h" +#include "fbgemm/Fbgemm.h" namespace fbgemm { diff --git a/src/PackAWithQuantRowOffset.cc b/src/PackAWithQuantRowOffset.cc index 98e862b..02e701e 100644 --- a/src/PackAWithQuantRowOffset.cc +++ b/src/PackAWithQuantRowOffset.cc @@ -11,9 +11,9 @@ #include <iomanip> #include <iostream> #include <stdexcept> +#include "OptimizedKernelsAvx2.h" #include "fbgemm/Fbgemm.h" #include "fbgemm/QuantUtilsAvx2.h" -#include "OptimizedKernelsAvx2.h" namespace fbgemm { diff --git a/src/PackAWithRowOffset.cc b/src/PackAWithRowOffset.cc index 30da4c0..4882bb5 100644 --- a/src/PackAWithRowOffset.cc +++ b/src/PackAWithRowOffset.cc @@ -10,8 +10,8 @@ #include <iomanip> #include <iostream> #include <stdexcept> -#include "fbgemm/Fbgemm.h" #include "OptimizedKernelsAvx2.h" +#include "fbgemm/Fbgemm.h" namespace fbgemm { diff --git a/src/QuantUtils.cc b/src/QuantUtils.cc index b346952..1d057e2 100644 --- a/src/QuantUtils.cc +++ b/src/QuantUtils.cc @@ -153,16 +153,16 @@ void ChooseRequantizationMultiplier( //////////////////////////////////////////////////////////////////////////////// // Utility functions -#define FBGEMM_SPECIALIZED_QUANTIZE(T) \ - template <> \ - void Quantize<T>( \ - const float* src, \ - T* dst, \ - const int len, \ - const TensorQuantizationParams& qparams) { \ - for (int i = 0; i < len; ++i) { \ - dst[i] = Quantize<T>(src[i], qparams); \ - } \ +#define FBGEMM_SPECIALIZED_QUANTIZE(T) \ + template <> \ + void Quantize<T>( \ + const float* src, \ + T* dst, \ + const int len, \ + const TensorQuantizationParams& qparams) { \ + for (int i = 0; i < len; ++i) { \ + dst[i] = Quantize<T>(src[i], qparams); \ + } \ } FBGEMM_SPECIALIZED_QUANTIZE(int8_t) FBGEMM_SPECIALIZED_QUANTIZE(uint16_t) diff --git a/src/Utils.cc b/src/Utils.cc index 88d029d..fcd8ade 100644 --- a/src/Utils.cc +++ b/src/Utils.cc @@ -5,7 +5,6 @@ * LICENSE file in the root directory of this source tree. */ #include "fbgemm/Utils.h" -#include "TransposeUtils.h" #include <cpuinfo.h> #include <cassert> #include <cinttypes> @@ -14,6 +13,7 @@ #include <iostream> #include <limits> #include <stdexcept> +#include "TransposeUtils.h" namespace fbgemm { diff --git a/src/UtilsAvx2.cc b/src/UtilsAvx2.cc index badf70b..5659200 100644 --- a/src/UtilsAvx2.cc +++ b/src/UtilsAvx2.cc @@ -4,8 +4,8 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ -#include "TransposeUtils.h" #include <immintrin.h> +#include "TransposeUtils.h" namespace fbgemm { @@ -145,7 +145,6 @@ inline void transpose_kernel_8x8_avx2( _mm256_storeu_ps(&dst[7 * ld_dst], h); } - void transpose_8x8( int M, int N, diff --git a/src/UtilsAvx512.cc b/src/UtilsAvx512.cc index f49bb6f..44d4f9a 100644 --- a/src/UtilsAvx512.cc +++ b/src/UtilsAvx512.cc @@ -5,8 +5,8 @@ * LICENSE file in the root directory of this source tree. */ -#include "TransposeUtils.h" #include <immintrin.h> +#include "TransposeUtils.h" namespace fbgemm { diff --git a/test/Im2ColFusedRequantizeTest.cc b/test/Im2ColFusedRequantizeTest.cc index 73971b9..feae002 100644 --- a/test/Im2ColFusedRequantizeTest.cc +++ b/test/Im2ColFusedRequantizeTest.cc @@ -244,7 +244,7 @@ TEST_P(fbgemmIm2colTest, Acc16Test) { } } -template<QuantizationGranularity Q_GRAN> +template <QuantizationGranularity Q_GRAN> void SConvTest() { for (auto conv_p : shapes) { for (int groups : {1, 4}) { diff --git a/test/PackedRequantizeTest.cc b/test/PackedRequantizeTest.cc index 9873e3f..ab3e1a7 100644 --- a/test/PackedRequantizeTest.cc +++ b/test/PackedRequantizeTest.cc @@ -38,8 +38,7 @@ class fbgemmu8s8acc32WithQuantGranularityTest : public testing::TestWithParam< tuple<matrix_op_t, matrix_op_t, bool, QuantizationGranularity>> {}; class fbgemmu8s8acc32Test - : public testing::TestWithParam< - tuple<matrix_op_t, matrix_op_t, bool>> {}; + : public testing::TestWithParam<tuple<matrix_op_t, matrix_op_t, bool>> {}; }; // namespace INSTANTIATE_TEST_CASE_P( |