From 6d00b52ee80787065561ac16942e55042ed6c760 Mon Sep 17 00:00:00 2001 From: Jongsoo Park Date: Sun, 18 Nov 2018 20:17:54 -0800 Subject: clang-format (#11) Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/11 clang format of fbgemm Reviewed By: dskhudia Differential Revision: D13115202 fbshipit-source-id: 6dab29cb8b5f4fabcc165019663351567a2a2952 --- bench/AlignedVec.h | 64 ++++++++++++++--------- bench/BenchUtils.cc | 24 ++++----- bench/BenchUtils.h | 2 +- bench/Depthwise3DBenchmark.cc | 45 ++++++++++------ bench/DepthwiseBenchmark.cc | 33 +++++++----- bench/FP16Benchmark.cc | 2 +- bench/I8SpmdmBenchmark.cc | 4 +- bench/Im2ColFusedRequantizeAcc16Benchmark.cc | 8 +-- bench/Im2ColFusedRequantizeAcc32Benchmark.cc | 8 +-- bench/PackedFloatInOutBenchmark.cc | 72 ++++++++++++------------- bench/PackedRequantizeAcc16Benchmark.cc | 2 +- bench/PackedRequantizeAcc32Benchmark.cc | 78 ++++++++++++++-------------- 12 files changed, 182 insertions(+), 160 deletions(-) (limited to 'bench') diff --git a/bench/AlignedVec.h b/bench/AlignedVec.h index 30fd266..fd4b88e 100644 --- a/bench/AlignedVec.h +++ b/bench/AlignedVec.h @@ -12,63 +12,73 @@ * * */ -template class aligned_allocator { -public: +template +class aligned_allocator { + public: // The following will be the same for virtually all allocators. - typedef T *pointer; - typedef const T *const_pointer; - typedef T &reference; - typedef const T &const_reference; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; typedef T value_type; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; - T *address(T &r) const { return &r; } + T* address(T& r) const { + return &r; + } - const T *address(const T &s) const { return &s; } + const T* address(const T& s) const { + return &s; + } std::size_t max_size() const { // The following has been carefully written to be independent of // the definition of size_t and to avoid signed/unsigned warnings. return (static_cast(0) - static_cast(1)) / - sizeof(T); + sizeof(T); } // The following must be the same for all allocators. - template struct rebind { + template + struct rebind { typedef aligned_allocator other; }; - bool operator!=(const aligned_allocator &other) const { + bool operator!=(const aligned_allocator& other) const { return !(*this == other); } - void construct(T *const p, const T &t) const { - void *const pv = static_cast(p); + void construct(T* const p, const T& t) const { + void* const pv = static_cast(p); new (pv) T(t); } - void destroy(T *const p) const { p->~T(); } + void destroy(T* const p) const { + p->~T(); + } // Returns true if and only if storage allocated from *this // can be deallocated from other, and vice versa. // Always returns true for stateless allocators. - bool operator==(const aligned_allocator & /*other*/) const { return true; } + bool operator==(const aligned_allocator& /*other*/) const { + return true; + } // Default constructor, copy constructor, rebinding constructor, and // destructor. Empty for stateless allocators. aligned_allocator() {} - aligned_allocator(const aligned_allocator &) {} + aligned_allocator(const aligned_allocator&) {} template - aligned_allocator(const aligned_allocator &) {} + aligned_allocator(const aligned_allocator&) {} ~aligned_allocator() {} // The following will be different for each allocator. - T *allocate(const std::size_t n) const { + T* allocate(const std::size_t n) const { // The return value of allocate(0) is unspecified. // Mallocator returns NULL in order to avoid depending // on malloc(0)'s implementation-defined behavior @@ -88,7 +98,7 @@ public: } // Mallocator wraps malloc(). - void *pv = nullptr; + void* pv = nullptr; posix_memalign(&pv, Alignment, n * sizeof(T)); // pv = aligned_alloc(Alignment, n * sizeof(T)); @@ -98,14 +108,16 @@ public: throw std::bad_alloc(); } - return static_cast(pv); + return static_cast(pv); } - void deallocate(T *const p, const std::size_t /*n*/) const { free(p); } + void deallocate(T* const p, const std::size_t /*n*/) const { + free(p); + } // The following will be the same for all allocators that ignore hints. template - T *allocate(const std::size_t n, const U * /* const hint */) const { + T* allocate(const std::size_t n, const U* /* const hint */) const { return allocate(n); } @@ -116,9 +128,11 @@ public: // "assignment operator could not be generated because a // base class assignment operator is inaccessible" within // the STL headers, but that warning is useless. -private: - aligned_allocator &operator=(const aligned_allocator &) { assert(0); } + private: + aligned_allocator& operator=(const aligned_allocator&) { + assert(0); + } }; template -using aligned_vector = std::vector >; +using aligned_vector = std::vector>; diff --git a/bench/BenchUtils.cc b/bench/BenchUtils.cc index 7b4cde4..db40ee0 100644 --- a/bench/BenchUtils.cc +++ b/bench/BenchUtils.cc @@ -12,27 +12,23 @@ namespace fbgemm { std::default_random_engine eng; template -void randFill(aligned_vector &vec, const int low, const int high) { +void randFill(aligned_vector& vec, const int low, const int high) { std::random_device r; std::uniform_int_distribution dis(low, high); - for (auto &v : vec) { + for (auto& v : vec) { v = static_cast(dis(eng)); } } -template -void randFill(aligned_vector &vec, - const int low, const int high); -template -void randFill(aligned_vector &vec, - const int low, const int high); -template -void randFill(aligned_vector &vec, - const int low, const int high); +template void +randFill(aligned_vector& vec, const int low, const int high); +template void +randFill(aligned_vector& vec, const int low, const int high); +template void +randFill(aligned_vector& vec, const int low, const int high); -template -void randFill(aligned_vector &vec, - const int low, const int high); +template void +randFill(aligned_vector& vec, const int low, const int high); void llc_flush(std::vector& llc) { volatile char* data = llc.data(); diff --git a/bench/BenchUtils.h b/bench/BenchUtils.h index 5c16a06..8ca99df 100644 --- a/bench/BenchUtils.h +++ b/bench/BenchUtils.h @@ -11,7 +11,7 @@ namespace fbgemm { template -void randFill(aligned_vector &vec, const int low, const int high); +void randFill(aligned_vector& vec, const int low, const int high); void llc_flush(std::vector& llc); diff --git a/bench/Depthwise3DBenchmark.cc b/bench/Depthwise3DBenchmark.cc index 417ddd1..f53eeea 100644 --- a/bench/Depthwise3DBenchmark.cc +++ b/bench/Depthwise3DBenchmark.cc @@ -18,10 +18,10 @@ #endif #include "AlignedVec.h" -#include "src/FbgemmI8Depthwise.h" +#include "BenchUtils.h" #include "fbgemm/Utils.h" +#include "src/FbgemmI8Depthwise.h" #include "src/RefImplementations.h" -#include "BenchUtils.h" using namespace std; using namespace fbgemm; @@ -33,9 +33,9 @@ int main() { if (flush) { llc.resize(128 * 1024 * 1024, 1.0); } -#define llc_flush() \ - for (auto i = 0; i < llc.size(); i++) { \ - llc[i]++; \ +#define llc_flush() \ + for (auto i = 0; i < llc.size(); i++) { \ + llc[i]++; \ } constexpr int NWARMUP = 4; @@ -116,10 +116,10 @@ int main() { Packed3x3x3ConvMatrix Bp(K, B.data()); double ttot = 0; - double bytes = - double(NITER) * - (K * (N * (2. * sizeof(int32_t) * T_OUT * H_OUT * W_OUT + T * H * W) + - K_T * K_H * K_W)); + double bytes = double(NITER) * + (K * + (N * (2. * sizeof(int32_t) * T_OUT * H_OUT * W_OUT + T * H * W) + + K_T * K_H * K_W)); double ops = double(NITER) * N * T_OUT * H_OUT * W_OUT * K * K_T * K_H * K_W * 2; chrono::time_point t_begin, t_end; @@ -183,8 +183,14 @@ int main() { } // n // Report performance - printf("N = %d K = %d T = %d H = %d W = %d stride = %d\n", N, K, T, H, W, - stride_h); + printf( + "N = %d K = %d T = %d H = %d W = %d stride = %d\n", + N, + K, + T, + H, + W, + stride_h); printf("GB/s = %f Gops/s = %f\n", bytes / ttot / 1e9, ops / ttot / 1e9); ttot = 0; @@ -236,9 +242,8 @@ int main() { for (int h = 0; h < H_OUT; ++h) { for (int w = 0; w < W_OUT; ++w) { for (int g = 0; g < K; ++g) { - uint8_t expected = - C_uint8_ref[(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K + - g]; + uint8_t expected = C_uint8_ref + [(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K + g]; uint8_t actual = C_uint8[(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K + g]; if (expected != actual) { @@ -255,9 +260,15 @@ int main() { } // n // Report performance - printf("N = %d K = %d T = %d H = %d W = %d stride = %d with requantization " - "fused\n", - N, K, T, H, W, stride_h); + printf( + "N = %d K = %d T = %d H = %d W = %d stride = %d with requantization " + "fused\n", + N, + K, + T, + H, + W, + stride_h); printf("GB/s = %f Gops/s = %f\n", bytes / ttot / 1e9, ops / ttot / 1e9); } // for each shape diff --git a/bench/DepthwiseBenchmark.cc b/bench/DepthwiseBenchmark.cc index 0bf2d73..8e6d83d 100644 --- a/bench/DepthwiseBenchmark.cc +++ b/bench/DepthwiseBenchmark.cc @@ -16,10 +16,10 @@ #endif #include "AlignedVec.h" -#include "src/FbgemmI8Depthwise.h" +#include "BenchUtils.h" #include "fbgemm/Utils.h" +#include "src/FbgemmI8Depthwise.h" #include "src/RefImplementations.h" -#include "BenchUtils.h" using namespace std; using namespace fbgemm; @@ -27,6 +27,8 @@ using namespace fbgemm; int main() { // From Xray OCR vector> shapes = { + // NOTE: clang-format wants to use a different formatting but the current + // formatting should be easier to read. // N, G, H_in, W_in, stride { 1, 272, 47, 125, 1, }, { 1, 272, 64, 125, 1, }, @@ -135,9 +137,9 @@ int main() { if (flush) { llc.resize(128 * 1024 * 1024, 1.0); } -#define llc_flush() \ - for (auto i = 0; i < llc.size(); i++) { \ - llc[i]++; \ +#define llc_flush() \ + for (auto i = 0; i < llc.size(); i++) { \ + llc[i]++; \ } constexpr int NWARMUP = 4; @@ -209,8 +211,7 @@ int main() { Packed3x3ConvMatrix Bp(G, B.data()); double ttot = 0; - double bytes = - double(NITER) * + double bytes = double(NITER) * (G * (N * (2 * sizeof(int32_t) * H_OUT * W_OUT + H * W) + R * S)); double ops = double(NITER) * N * H_OUT * W_OUT * G * R * S * 2; chrono::time_point t_begin, t_end; @@ -256,9 +257,9 @@ int main() { int32_t expected = C_ref[((n * H_OUT + h) * W_OUT + w) * G + g]; int32_t actual = C[((n * H_OUT + h) * W_OUT + w) * G + g]; if (expected != actual) { - cerr << "Depthwise 3x3 results differ at (" << n << ", " - << h << ", " << w << ", " << g << "). expected " - << expected << " actual " << actual << endl; + cerr << "Depthwise 3x3 results differ at (" << n << ", " << h + << ", " << w << ", " << g << "). expected " << expected + << " actual " << actual << endl; return -1; } assert(expected == actual); @@ -320,9 +321,9 @@ int main() { C_uint8_ref[((n * H_OUT + h) * W_OUT + w) * G + g]; uint8_t actual = C_uint8[((n * H_OUT + h) * W_OUT + w) * G + g]; if (expected != actual) { - cerr << "Depthwise 3x3 results differ at (" << n << ", " - << h << ", " << w << ", " << g << "). expected " - << (int)expected << " actual " << (int)actual << endl; + cerr << "Depthwise 3x3 results differ at (" << n << ", " << h + << ", " << w << ", " << g << "). expected " << (int)expected + << " actual " << (int)actual << endl; return -1; } assert(expected == actual); @@ -334,7 +335,11 @@ int main() { // Report performance printf( "N = %d G = %d H = %d W = %d stride = %d with requantization fused\n", - N, G, H, W, stride_h); + N, + G, + H, + W, + stride_h); printf("GB/s = %f Gops/s = %f\n", bytes / ttot / 1e9, ops / ttot / 1e9); } // for each shape diff --git a/bench/FP16Benchmark.cc b/bench/FP16Benchmark.cc index 8fbe878..c03f18a 100644 --- a/bench/FP16Benchmark.cc +++ b/bench/FP16Benchmark.cc @@ -16,9 +16,9 @@ #include #endif +#include "AlignedVec.h" #include "bench/BenchUtils.h" #include "fbgemm/FbgemmFP16.h" -#include "AlignedVec.h" using namespace std; using namespace fbgemm; diff --git a/bench/I8SpmdmBenchmark.cc b/bench/I8SpmdmBenchmark.cc index d361bb5..07b73dc 100644 --- a/bench/I8SpmdmBenchmark.cc +++ b/bench/I8SpmdmBenchmark.cc @@ -17,9 +17,9 @@ #include #endif +#include "BenchUtils.h" #include "fbgemm/FbgemmI8Spmdm.h" #include "src/RefImplementations.h" -#include "BenchUtils.h" using namespace std; using namespace fbgemm; @@ -156,7 +156,7 @@ int main() { #pragma omp parallel #endif { -#if defined (FBGEMM_MEASURE_TIME_BREAKDOWN) || !defined(_OPENMP) +#if defined(FBGEMM_MEASURE_TIME_BREAKDOWN) || !defined(_OPENMP) int num_threads = 1; int tid = 0; #else diff --git a/bench/Im2ColFusedRequantizeAcc16Benchmark.cc b/bench/Im2ColFusedRequantizeAcc16Benchmark.cc index 8827b4c..e3c9da2 100644 --- a/bench/Im2ColFusedRequantizeAcc16Benchmark.cc +++ b/bench/Im2ColFusedRequantizeAcc16Benchmark.cc @@ -16,9 +16,9 @@ #include #endif +#include "BenchUtils.h" #include "fbgemm/Fbgemm.h" #include "src/RefImplementations.h" -#include "BenchUtils.h" using namespace std; using namespace fbgemm; @@ -189,11 +189,7 @@ void performance_test() { PackAWithIm2Col::rowOffsetBufferSize()); PackAWithIm2Col packA( - conv_p, - Aint8.data(), - nullptr, - Aint8_zero_point, - row_offset_buf.data()); + conv_p, Aint8.data(), nullptr, Aint8_zero_point, row_offset_buf.data()); PackBMatrix packedB( matrix_op_t::NoTranspose, KDim, NDim, Bint8.data(), NDim); diff --git a/bench/Im2ColFusedRequantizeAcc32Benchmark.cc b/bench/Im2ColFusedRequantizeAcc32Benchmark.cc index b87f7d7..153dc3b 100644 --- a/bench/Im2ColFusedRequantizeAcc32Benchmark.cc +++ b/bench/Im2ColFusedRequantizeAcc32Benchmark.cc @@ -16,9 +16,9 @@ #include #endif +#include "BenchUtils.h" #include "fbgemm/Fbgemm.h" #include "src/RefImplementations.h" -#include "BenchUtils.h" using namespace std; using namespace fbgemm; @@ -191,11 +191,7 @@ void performance_test() { PackAWithIm2Col::rowOffsetBufferSize()); PackAWithIm2Col packA( - conv_p, - Aint8.data(), - nullptr, - Aint8_zero_point, - row_offset_buf.data()); + conv_p, Aint8.data(), nullptr, Aint8_zero_point, row_offset_buf.data()); PackBMatrix packedB( matrix_op_t::NoTranspose, KDim, NDim, Bint8.data(), NDim); diff --git a/bench/PackedFloatInOutBenchmark.cc b/bench/PackedFloatInOutBenchmark.cc index 29a7547..dc9536e 100644 --- a/bench/PackedFloatInOutBenchmark.cc +++ b/bench/PackedFloatInOutBenchmark.cc @@ -19,50 +19,52 @@ #include #endif +#include "BenchUtils.h" #include "fbgemm/Fbgemm.h" #include "src/RefImplementations.h" #include "test/QuantizationHelpers.h" -#include "BenchUtils.h" using namespace std; using namespace fbgemm; void performance_test() { vector> shapes = { - {1, 128, 512}, - {1, 1024, 256}, - {1, 2048, 512}, - {1, 4096, 1024}, - - {6, 256, 1024}, - {6, 256, 2048}, - {6, 512, 512}, - {6, 1024, 256}, - {6, 2048, 256}, - {6, 2048, 512}, - {6, 4096, 256}, - {6, 4096, 1024}, - {6, 4096, 2048}, - - {10, 2048, 256}, - {10, 4096, 1024}, - - {20, 2048, 256}, - {20, 4096, 1024}, - - {102, 1024, 512}, - {102, 2323, 256}, - {102, 512, 256}, - - {1, 800, 3200}, - {1, 800, 8000}, - - {16, 256, 1500}, - {16, 256, 1567}, - {1, 128, 2876}, - {16, 128, 1567}, - {1, 128, 2722}, - {16, 256, 512}, + // NOTE: clang-format wants to use a different formatting but the current + // formatting should be easier to read. + {1, 128, 512}, + {1, 1024, 256}, + {1, 2048, 512}, + {1, 4096, 1024}, + + {6, 256, 1024}, + {6, 256, 2048}, + {6, 512, 512}, + {6, 1024, 256}, + {6, 2048, 256}, + {6, 2048, 512}, + {6, 4096, 256}, + {6, 4096, 1024}, + {6, 4096, 2048}, + + {10, 2048, 256}, + {10, 4096, 1024}, + + {20, 2048, 256}, + {20, 4096, 1024}, + + {102, 1024, 512}, + {102, 2323, 256}, + {102, 512, 256}, + + {1, 800, 3200}, + {1, 800, 8000}, + + {16, 256, 1500}, + {16, 256, 1567}, + {1, 128, 2876}, + {16, 128, 1567}, + {1, 128, 2722}, + {16, 256, 512}, }; bool flush = true; std::vector llc; diff --git a/bench/PackedRequantizeAcc16Benchmark.cc b/bench/PackedRequantizeAcc16Benchmark.cc index 4974ba2..367aa32 100644 --- a/bench/PackedRequantizeAcc16Benchmark.cc +++ b/bench/PackedRequantizeAcc16Benchmark.cc @@ -20,9 +20,9 @@ #include #endif +#include "BenchUtils.h" #include "fbgemm/Fbgemm.h" #include "src/RefImplementations.h" -#include "BenchUtils.h" using namespace std; using namespace fbgemm; diff --git a/bench/PackedRequantizeAcc32Benchmark.cc b/bench/PackedRequantizeAcc32Benchmark.cc index 94bb899..84d7c24 100644 --- a/bench/PackedRequantizeAcc32Benchmark.cc +++ b/bench/PackedRequantizeAcc32Benchmark.cc @@ -19,53 +19,55 @@ #include #endif +#include "BenchUtils.h" #include "fbgemm/Fbgemm.h" #include "src/RefImplementations.h" #include "test/QuantizationHelpers.h" -#include "BenchUtils.h" using namespace std; using namespace fbgemm; void performance_test() { vector> shapes = { - {156800, 4, 36}, - {156800, 8, 36}, - {156800, 16, 36}, - {1, 128, 512}, - {1, 1024, 256}, - {1, 2048, 512}, - {1, 4096, 1024}, - - {6, 256, 1024}, - {6, 256, 2048}, - {6, 512, 512}, - {6, 1024, 256}, - {6, 2048, 256}, - {6, 2048, 512}, - {6, 4096, 256}, - {6, 4096, 1024}, - {6, 4096, 2048}, - - {10, 2048, 256}, - {10, 4096, 1024}, - - {20, 2048, 256}, - {20, 4096, 1024}, - - {102, 1024, 512}, - {102, 2323, 256}, - {102, 512, 256}, - - {1, 800, 3200}, - {1, 800, 8000}, - - {16, 256, 1500}, - {16, 256, 1567}, - {1, 128, 2876}, - {16, 128, 1567}, - {1, 128, 2722}, - {16, 256, 512}, + // NOTE: clang-format wants to use a different formatting but the current + // formatting should be easier to read. + {156800, 4, 36}, + {156800, 8, 36}, + {156800, 16, 36}, + {1, 128, 512}, + {1, 1024, 256}, + {1, 2048, 512}, + {1, 4096, 1024}, + + {6, 256, 1024}, + {6, 256, 2048}, + {6, 512, 512}, + {6, 1024, 256}, + {6, 2048, 256}, + {6, 2048, 512}, + {6, 4096, 256}, + {6, 4096, 1024}, + {6, 4096, 2048}, + + {10, 2048, 256}, + {10, 4096, 1024}, + + {20, 2048, 256}, + {20, 4096, 1024}, + + {102, 1024, 512}, + {102, 2323, 256}, + {102, 512, 256}, + + {1, 800, 3200}, + {1, 800, 8000}, + + {16, 256, 1500}, + {16, 256, 1567}, + {1, 128, 2876}, + {16, 128, 1567}, + {1, 128, 2722}, + {16, 256, 512}, }; bool flush = true; std::vector llc; -- cgit v1.2.3