Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJianyu Huang <jianyuhuang@fb.com>2018-12-21 22:18:44 +0300
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2018-12-21 22:21:05 +0300
commit4691d5bcb0756b69baf4f54e45d42ba75d133464 (patch)
tree52ae9b39a1d3ea44bc5ac53bb57d3dc74c85bf38
parentd5810be02d4b7b90a5aec746f98841c9e585f6d4 (diff)
Update with clang format (#51)
Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/51 Use Clang formatting with "clang-format -i *.cc *.h". Reviewed By: dskhudia Differential Revision: D13532121 fbshipit-source-id: 6792d008f3295c128942f4896e8221aebbf2566e
-rw-r--r--bench/PackedFloatInOutBenchmark.cc8
-rw-r--r--bench/PackedRequantizeAcc16Benchmark.cc8
-rw-r--r--bench/PackedRequantizeAcc32Benchmark.cc8
-rw-r--r--include/fbgemm/Fbgemm.h4
-rw-r--r--include/fbgemm/FbgemmI8Spmdm.h2
-rw-r--r--include/fbgemm/OutputProcessing-inl.h2
-rw-r--r--include/fbgemm/QuantUtils.h3
-rw-r--r--include/fbgemm/Utils.h2
-rw-r--r--src/ExecuteKernelU8S8.cc6
-rw-r--r--src/Fbgemm.cc2
-rw-r--r--src/FbgemmI8DepthwiseAvx2.cc46
-rw-r--r--src/FbgemmI8Spmdm.cc6
-rw-r--r--src/GenerateKernelU8S8S32ACC16.cc1
-rw-r--r--src/PackAWithIm2Col.cc2
-rw-r--r--src/PackAWithQuantRowOffset.cc2
-rw-r--r--src/PackAWithRowOffset.cc2
-rw-r--r--src/QuantUtils.cc20
-rw-r--r--src/Utils.cc2
-rw-r--r--src/UtilsAvx2.cc3
-rw-r--r--src/UtilsAvx512.cc2
-rw-r--r--test/Im2ColFusedRequantizeTest.cc2
-rw-r--r--test/PackedRequantizeTest.cc3
22 files changed, 42 insertions, 94 deletions
diff --git a/bench/PackedFloatInOutBenchmark.cc b/bench/PackedFloatInOutBenchmark.cc
index 79a750e..f2589f7 100644
--- a/bench/PackedFloatInOutBenchmark.cc
+++ b/bench/PackedFloatInOutBenchmark.cc
@@ -187,13 +187,7 @@ void performance_test() {
row_offset_buf.data());
PackBMatrix<int8_t> packedBN(
- matrix_op_t::NoTranspose,
- k,
- n,
- Bint8.data(),
- n,
- nullptr,
- 1);
+ matrix_op_t::NoTranspose, k, n, Bint8.data(), n, nullptr, 1);
DoNothing<float, float> doNothingObj{};
ReQuantizeForFloat<false> outputProcObj(
diff --git a/bench/PackedRequantizeAcc16Benchmark.cc b/bench/PackedRequantizeAcc16Benchmark.cc
index f60332f..89cca4f 100644
--- a/bench/PackedRequantizeAcc16Benchmark.cc
+++ b/bench/PackedRequantizeAcc16Benchmark.cc
@@ -296,13 +296,7 @@ void performance_test() {
PackAWithRowOffset<uint8_t, int16_t>::rowOffsetBufferSize());
PackAMatrix<uint8_t, int16_t> packA(
- matrix_op_t::NoTranspose,
- m,
- k,
- Aint8.data(),
- k,
- nullptr,
- 1);
+ matrix_op_t::NoTranspose, m, k, Aint8.data(), k, nullptr, 1);
PackAWithRowOffset<uint8_t, int16_t> packAWithRowOffset(
matrix_op_t::NoTranspose,
m,
diff --git a/bench/PackedRequantizeAcc32Benchmark.cc b/bench/PackedRequantizeAcc32Benchmark.cc
index b255b8c..5096475 100644
--- a/bench/PackedRequantizeAcc32Benchmark.cc
+++ b/bench/PackedRequantizeAcc32Benchmark.cc
@@ -209,13 +209,7 @@ void performance_test() {
// offsets before");
PackBMatrix<int8_t> packedBN(
- matrix_op_t::NoTranspose,
- k,
- n,
- Bint8.data(),
- n,
- nullptr,
- 1);
+ matrix_op_t::NoTranspose, k, n, Bint8.data(), n, nullptr, 1);
ttot = 0.0;
runType = "FBGEMM_i8_acc32";
diff --git a/include/fbgemm/Fbgemm.h b/include/fbgemm/Fbgemm.h
index 3cf832c..b729d56 100644
--- a/include/fbgemm/Fbgemm.h
+++ b/include/fbgemm/Fbgemm.h
@@ -15,11 +15,11 @@
#include <memory>
#include <type_traits>
#include "ConvUtils.h"
+#include "FbgemmBuild.h"
#include "FbgemmI8Spmdm.h"
+#include "QuantUtilsAvx2.h"
#include "Types.h"
#include "Utils.h"
-#include "FbgemmBuild.h"
-#include "QuantUtilsAvx2.h"
// Turning on this option will print out time breakdown of each stage (e.g.,
// input packing, the main GEMM kernel, each output processing pipeline).
diff --git a/include/fbgemm/FbgemmI8Spmdm.h b/include/fbgemm/FbgemmI8Spmdm.h
index 01416d5..93af6ea 100644
--- a/include/fbgemm/FbgemmI8Spmdm.h
+++ b/include/fbgemm/FbgemmI8Spmdm.h
@@ -9,8 +9,8 @@
#include <cstdint>
#include <vector>
#include "ConvUtils.h"
-#include "Utils.h"
#include "FbgemmBuild.h"
+#include "Utils.h"
// #define FBGEMM_MEASURE_TIME_BREAKDOWN
diff --git a/include/fbgemm/OutputProcessing-inl.h b/include/fbgemm/OutputProcessing-inl.h
index 2ff64f3..c250942 100644
--- a/include/fbgemm/OutputProcessing-inl.h
+++ b/include/fbgemm/OutputProcessing-inl.h
@@ -111,7 +111,7 @@ inline int ReQuantizeOutput<FUSE_RELU, Q_GRAN, outT, inT, nextOPType>::f(
} else if (instSet == inst_set_t::avx2 || instSet == inst_set_t::avx512) {
if (std::is_same<outT, uint8_t>::value) {
bool b_symmetric = (Q_GRAN == QuantizationGranularity::TENSOR &&
- Bq_zero_point_[0] == 0) ||
+ Bq_zero_point_[0] == 0) ||
q_row_offsets_ == nullptr;
requantizationParams_t r = {Aq_zero_point_,
diff --git a/include/fbgemm/QuantUtils.h b/include/fbgemm/QuantUtils.h
index f658f73..43855d8 100644
--- a/include/fbgemm/QuantUtils.h
+++ b/include/fbgemm/QuantUtils.h
@@ -68,8 +68,7 @@ FBGEMM_API T Quantize(
template <typename T>
FBGEMM_API T Quantize(float src, const TensorQuantizationParams& qparams) {
- return Quantize<T>(
- src, qparams.zero_point, qparams.scale, qparams.precision);
+ return Quantize<T>(src, qparams.zero_point, qparams.scale, qparams.precision);
}
template <typename T>
diff --git a/include/fbgemm/Utils.h b/include/fbgemm/Utils.h
index a7e91cd..775e18c 100644
--- a/include/fbgemm/Utils.h
+++ b/include/fbgemm/Utils.h
@@ -7,8 +7,8 @@
#pragma once
#include <string>
#include <type_traits>
-#include "UtilsAvx2.h"
#include "FbgemmBuild.h"
+#include "UtilsAvx2.h"
namespace fbgemm {
diff --git a/src/ExecuteKernelU8S8.cc b/src/ExecuteKernelU8S8.cc
index 3b6e059..e7f7c70 100644
--- a/src/ExecuteKernelU8S8.cc
+++ b/src/ExecuteKernelU8S8.cc
@@ -288,7 +288,7 @@ INSTANTIATE_ACC_T(PackAWithRowOffset);
INSTANTIATE_Q_GRANS(ACC_T, RELU, 2); \
INSTANTIATE_Q_GRANS(ACC_T, RELU, 3);
-#define INSTANTIATE_RELU(ACC_T) \
+#define INSTANTIATE_RELU(ACC_T) \
INSTANTIATE_SPATIAL_DIM(ACC_T, false); \
INSTANTIATE_SPATIAL_DIM(ACC_T, true);
@@ -410,7 +410,7 @@ template class ExecuteKernel<
template class ExecuteKernel< \
PACK_A<uint8_t, ACC_T>, \
PackBMatrix<int8_t, ACC_T>, \
- int32_t, \
+ int32_t, \
memCopy<>>;
#define INSTANTIATE_ACC_T(PACK_A) \
@@ -427,7 +427,7 @@ INSTANTIATE_ACC_T(PackAWithRowOffset);
template class ExecuteKernel< \
PackAWithIm2Col<uint8_t, ACC_T, SPATIAL_DIM>, \
PackBMatrix<int8_t, ACC_T>, \
- int32_t, \
+ int32_t, \
memCopy<>>;
#define INSTANTIATE_SPATIAL_DIM(ACC_T) \
diff --git a/src/Fbgemm.cc b/src/Fbgemm.cc
index b7a99c6..45108d0 100644
--- a/src/Fbgemm.cc
+++ b/src/Fbgemm.cc
@@ -254,7 +254,7 @@ INSTANTIATE_ACC_T(PackAWithRowOffset);
INSTANTIATE_Q_GRANS(ACC_T, RELU, 2); \
INSTANTIATE_Q_GRANS(ACC_T, RELU, 3);
-#define INSTANTIATE_RELU(ACC_T) \
+#define INSTANTIATE_RELU(ACC_T) \
INSTANTIATE_SPATIAL_DIM(ACC_T, false); \
INSTANTIATE_SPATIAL_DIM(ACC_T, true);
diff --git a/src/FbgemmI8DepthwiseAvx2.cc b/src/FbgemmI8DepthwiseAvx2.cc
index ace3f7e..9dcd7e1 100644
--- a/src/FbgemmI8DepthwiseAvx2.cc
+++ b/src/FbgemmI8DepthwiseAvx2.cc
@@ -112,13 +112,12 @@ PackedDepthWiseConvMatrix<KERNEL_PROD>::PackedDepthWiseConvMatrix(
reinterpret_cast<const __m256i*>(masks[remainder / 4]));
for (int i = 0; i < KERNEL_PROD; ++i) {
b_v[i] = _mm256_maskload_epi32(
- reinterpret_cast<const int*>(smat_transposed + i * K + k1),
- mask_v);
+ reinterpret_cast<const int*>(smat_transposed + i * K + k1), mask_v);
}
} else {
for (int i = 0; i < KERNEL_PROD; ++i) {
- b_v[i] = _mm256_lddqu_si256(reinterpret_cast<const __m256i*>(
- smat_transposed + i * K + k1));
+ b_v[i] = _mm256_lddqu_si256(
+ reinterpret_cast<const __m256i*>(smat_transposed + i * K + k1));
}
}
@@ -457,14 +456,7 @@ static inline __attribute__((always_inline)) void inner_prod_packed_(
a_v[k], Bp + k, &c[0], &c[1], &c[2], &c[3], a_sum_temp);
} else if (K - k == 2) {
madd_epi16x2_packed<SUM_A>(
- a_v[k],
- a_v[k + 1],
- Bp + k,
- &c[0],
- &c[1],
- &c[2],
- &c[3],
- a_sum_temp);
+ a_v[k], a_v[k + 1], Bp + k, &c[0], &c[1], &c[2], &c[3], a_sum_temp);
}
c[0] = _mm256_add_epi32(c[0], c_temp[0]);
@@ -860,11 +852,7 @@ static inline __attribute__((always_inline)) void inner_prod_3x3_packed_(
__m256i a_sum[4];
inner_prod_3x3_packed_<SUM_A, REMAINDER>(
- a_v,
- reinterpret_cast<const __m256i*>(Bp),
- C,
- remainder,
- a_sum);
+ a_v, reinterpret_cast<const __m256i*>(Bp), C, remainder, a_sum);
if (SUM_A) {
__m256i B_zero_point_v;
for (int i = 0; i < (REMAINDER ? (remainder / 8) : 4); ++i) {
@@ -974,11 +962,7 @@ static inline __attribute__((always_inline)) void inner_prod_3x3x3_packed_(
__m256i a_sum[4];
inner_prod_packed_<8, SUM_A, REMAINDER>(
- a_v,
- reinterpret_cast<const __m256i*>(Bp),
- C,
- remainder,
- a_sum);
+ a_v, reinterpret_cast<const __m256i*>(Bp), C, remainder, a_sum);
a_v[0] = A_zero_point_v;
a_v[1] = A_zero_point_v;
@@ -1031,11 +1015,7 @@ static inline __attribute__((always_inline)) void inner_prod_3x3x3_packed_(
__m256i a_sum_temp[4];
inner_prod_packed_<8, SUM_A, REMAINDER, true /* acc */>(
- a_v,
- reinterpret_cast<const __m256i*>(Bp) + 8,
- C,
- remainder,
- a_sum_temp);
+ a_v, reinterpret_cast<const __m256i*>(Bp) + 8, C, remainder, a_sum_temp);
if (SUM_A) {
a_sum[0] = _mm256_add_epi32(a_sum[0], a_sum_temp[0]);
a_sum[1] = _mm256_add_epi32(a_sum[1], a_sum_temp[1]);
@@ -1090,11 +1070,7 @@ static inline __attribute__((always_inline)) void inner_prod_3x3x3_packed_(
}
inner_prod_packed_<8, SUM_A, REMAINDER, true /* acc */>(
- a_v,
- reinterpret_cast<const __m256i*>(Bp) + 16,
- C,
- remainder,
- a_sum_temp);
+ a_v, reinterpret_cast<const __m256i*>(Bp) + 16, C, remainder, a_sum_temp);
if (SUM_A) {
a_sum[0] = _mm256_add_epi32(a_sum[0], a_sum_temp[0]);
a_sum[1] = _mm256_add_epi32(a_sum[1], a_sum_temp[1]);
@@ -1121,11 +1097,7 @@ static inline __attribute__((always_inline)) void inner_prod_3x3x3_packed_(
}
inner_prod_packed_<3, SUM_A, REMAINDER, true /* acc */>(
- a_v,
- reinterpret_cast<const __m256i*>(Bp) + 24,
- C,
- remainder,
- a_sum_temp);
+ a_v, reinterpret_cast<const __m256i*>(Bp) + 24, C, remainder, a_sum_temp);
if (SUM_A) {
a_sum[0] = _mm256_add_epi32(a_sum[0], a_sum_temp[0]);
diff --git a/src/FbgemmI8Spmdm.cc b/src/FbgemmI8Spmdm.cc
index 8d69c9f..c249871 100644
--- a/src/FbgemmI8Spmdm.cc
+++ b/src/FbgemmI8Spmdm.cc
@@ -236,8 +236,7 @@ void CompressedSparseColumn::SparseConv(
// TODO: if not hyper sparse, transpose a block of A matrix as in SpMDM.
if (!accumulation) {
for (int i = block.row_start; i < block.row_start + block.row_size; ++i) {
- for (int j = block.col_start; j < block.col_start + block.col_size;
- ++j) {
+ for (int j = block.col_start; j < block.col_start + block.col_size; ++j) {
C[(i - block.row_start) * ldc + j - block.col_start] = 0;
}
}
@@ -245,8 +244,7 @@ void CompressedSparseColumn::SparseConv(
for (int j = block.col_start; j < block.col_start + block.col_size; ++j) {
for (int k = colptr_[j]; k < colptr_[j + 1]; ++k) {
int v = values_[k];
- for (int i = block.row_start; i < block.row_start + block.row_size;
- ++i) {
+ for (int i = block.row_start; i < block.row_start + block.row_size; ++i) {
int ow = i % conv_p.OUT_DIM[1];
int oh = i / conv_p.OUT_DIM[1] % conv_p.OUT_DIM[0];
int n = i / conv_p.OUT_DIM[1] / conv_p.OUT_DIM[0];
diff --git a/src/GenerateKernelU8S8S32ACC16.cc b/src/GenerateKernelU8S8S32ACC16.cc
index f845e44..b7bc676 100644
--- a/src/GenerateKernelU8S8S32ACC16.cc
+++ b/src/GenerateKernelU8S8S32ACC16.cc
@@ -21,7 +21,6 @@ thread_local std::map<
typename CodeGenBase<TA, TB, TC, accT>::jit_micro_kernel_fp>
CodeGenBase<TA, TB, TC, accT>::codeCache_;
-
namespace x86 = asmjit::x86;
/**
diff --git a/src/PackAWithIm2Col.cc b/src/PackAWithIm2Col.cc
index 5856942..f096d7a 100644
--- a/src/PackAWithIm2Col.cc
+++ b/src/PackAWithIm2Col.cc
@@ -11,8 +11,8 @@
#include <iostream>
#include <numeric>
-#include "fbgemm/Fbgemm.h"
#include "OptimizedKernelsAvx2.h"
+#include "fbgemm/Fbgemm.h"
namespace fbgemm {
diff --git a/src/PackAWithQuantRowOffset.cc b/src/PackAWithQuantRowOffset.cc
index 98e862b..02e701e 100644
--- a/src/PackAWithQuantRowOffset.cc
+++ b/src/PackAWithQuantRowOffset.cc
@@ -11,9 +11,9 @@
#include <iomanip>
#include <iostream>
#include <stdexcept>
+#include "OptimizedKernelsAvx2.h"
#include "fbgemm/Fbgemm.h"
#include "fbgemm/QuantUtilsAvx2.h"
-#include "OptimizedKernelsAvx2.h"
namespace fbgemm {
diff --git a/src/PackAWithRowOffset.cc b/src/PackAWithRowOffset.cc
index 30da4c0..4882bb5 100644
--- a/src/PackAWithRowOffset.cc
+++ b/src/PackAWithRowOffset.cc
@@ -10,8 +10,8 @@
#include <iomanip>
#include <iostream>
#include <stdexcept>
-#include "fbgemm/Fbgemm.h"
#include "OptimizedKernelsAvx2.h"
+#include "fbgemm/Fbgemm.h"
namespace fbgemm {
diff --git a/src/QuantUtils.cc b/src/QuantUtils.cc
index b346952..1d057e2 100644
--- a/src/QuantUtils.cc
+++ b/src/QuantUtils.cc
@@ -153,16 +153,16 @@ void ChooseRequantizationMultiplier(
////////////////////////////////////////////////////////////////////////////////
// Utility functions
-#define FBGEMM_SPECIALIZED_QUANTIZE(T) \
- template <> \
- void Quantize<T>( \
- const float* src, \
- T* dst, \
- const int len, \
- const TensorQuantizationParams& qparams) { \
- for (int i = 0; i < len; ++i) { \
- dst[i] = Quantize<T>(src[i], qparams); \
- } \
+#define FBGEMM_SPECIALIZED_QUANTIZE(T) \
+ template <> \
+ void Quantize<T>( \
+ const float* src, \
+ T* dst, \
+ const int len, \
+ const TensorQuantizationParams& qparams) { \
+ for (int i = 0; i < len; ++i) { \
+ dst[i] = Quantize<T>(src[i], qparams); \
+ } \
}
FBGEMM_SPECIALIZED_QUANTIZE(int8_t)
FBGEMM_SPECIALIZED_QUANTIZE(uint16_t)
diff --git a/src/Utils.cc b/src/Utils.cc
index 88d029d..fcd8ade 100644
--- a/src/Utils.cc
+++ b/src/Utils.cc
@@ -5,7 +5,6 @@
* LICENSE file in the root directory of this source tree.
*/
#include "fbgemm/Utils.h"
-#include "TransposeUtils.h"
#include <cpuinfo.h>
#include <cassert>
#include <cinttypes>
@@ -14,6 +13,7 @@
#include <iostream>
#include <limits>
#include <stdexcept>
+#include "TransposeUtils.h"
namespace fbgemm {
diff --git a/src/UtilsAvx2.cc b/src/UtilsAvx2.cc
index badf70b..5659200 100644
--- a/src/UtilsAvx2.cc
+++ b/src/UtilsAvx2.cc
@@ -4,8 +4,8 @@
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
-#include "TransposeUtils.h"
#include <immintrin.h>
+#include "TransposeUtils.h"
namespace fbgemm {
@@ -145,7 +145,6 @@ inline void transpose_kernel_8x8_avx2(
_mm256_storeu_ps(&dst[7 * ld_dst], h);
}
-
void transpose_8x8(
int M,
int N,
diff --git a/src/UtilsAvx512.cc b/src/UtilsAvx512.cc
index f49bb6f..44d4f9a 100644
--- a/src/UtilsAvx512.cc
+++ b/src/UtilsAvx512.cc
@@ -5,8 +5,8 @@
* LICENSE file in the root directory of this source tree.
*/
-#include "TransposeUtils.h"
#include <immintrin.h>
+#include "TransposeUtils.h"
namespace fbgemm {
diff --git a/test/Im2ColFusedRequantizeTest.cc b/test/Im2ColFusedRequantizeTest.cc
index 73971b9..feae002 100644
--- a/test/Im2ColFusedRequantizeTest.cc
+++ b/test/Im2ColFusedRequantizeTest.cc
@@ -244,7 +244,7 @@ TEST_P(fbgemmIm2colTest, Acc16Test) {
}
}
-template<QuantizationGranularity Q_GRAN>
+template <QuantizationGranularity Q_GRAN>
void SConvTest() {
for (auto conv_p : shapes) {
for (int groups : {1, 4}) {
diff --git a/test/PackedRequantizeTest.cc b/test/PackedRequantizeTest.cc
index 9873e3f..ab3e1a7 100644
--- a/test/PackedRequantizeTest.cc
+++ b/test/PackedRequantizeTest.cc
@@ -38,8 +38,7 @@ class fbgemmu8s8acc32WithQuantGranularityTest
: public testing::TestWithParam<
tuple<matrix_op_t, matrix_op_t, bool, QuantizationGranularity>> {};
class fbgemmu8s8acc32Test
- : public testing::TestWithParam<
- tuple<matrix_op_t, matrix_op_t, bool>> {};
+ : public testing::TestWithParam<tuple<matrix_op_t, matrix_op_t, bool>> {};
}; // namespace
INSTANTIATE_TEST_CASE_P(