From 6d00b52ee80787065561ac16942e55042ed6c760 Mon Sep 17 00:00:00 2001
From: Jongsoo Park <jongsoo@fb.com>
Date: Sun, 18 Nov 2018 20:17:54 -0800
Subject: clang-format (#11)

Summary:
Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/11

clang format of fbgemm

Reviewed By: dskhudia

Differential Revision: D13115202

fbshipit-source-id: 6dab29cb8b5f4fabcc165019663351567a2a2952
---
 bench/AlignedVec.h                           | 64 ++++++++++++++---------
 bench/BenchUtils.cc                          | 24 ++++-----
 bench/BenchUtils.h                           |  2 +-
 bench/Depthwise3DBenchmark.cc                | 45 ++++++++++------
 bench/DepthwiseBenchmark.cc                  | 33 +++++++-----
 bench/FP16Benchmark.cc                       |  2 +-
 bench/I8SpmdmBenchmark.cc                    |  4 +-
 bench/Im2ColFusedRequantizeAcc16Benchmark.cc |  8 +--
 bench/Im2ColFusedRequantizeAcc32Benchmark.cc |  8 +--
 bench/PackedFloatInOutBenchmark.cc           | 72 ++++++++++++-------------
 bench/PackedRequantizeAcc16Benchmark.cc      |  2 +-
 bench/PackedRequantizeAcc32Benchmark.cc      | 78 ++++++++++++++--------------
 12 files changed, 182 insertions(+), 160 deletions(-)

(limited to 'bench')
diff --git a/bench/AlignedVec.h b/bench/AlignedVec.h
index 30fd266..fd4b88e 100644
--- a/bench/AlignedVec.h
+++ b/bench/AlignedVec.h
@@ -12,63 +12,73 @@
  * <http://blogs.msdn.com/b/vcblog/archive/2008/08/28/the-mallocator.aspx>
  *
  */
-template <typename T, std::size_t Alignment> class aligned_allocator {
-public:
+template <typename T, std::size_t Alignment>
+class aligned_allocator {
+ public:
   // The following will be the same for virtually all allocators.
-  typedef T *pointer;
-  typedef const T *const_pointer;
-  typedef T &reference;
-  typedef const T &const_reference;
+  typedef T* pointer;
+  typedef const T* const_pointer;
+  typedef T& reference;
+  typedef const T& const_reference;
   typedef T value_type;
   typedef std::size_t size_type;
   typedef std::ptrdiff_t difference_type;
 
-  T *address(T &r) const { return &r; }
+  T* address(T& r) const {
+    return &r;
+  }
 
-  const T *address(const T &s) const { return &s; }
+  const T* address(const T& s) const {
+    return &s;
+  }
 
   std::size_t max_size() const {
     // The following has been carefully written to be independent of
     // the definition of size_t and to avoid signed/unsigned warnings.
     return (static_cast<std::size_t>(0) - static_cast<std::size_t>(1)) /
-           sizeof(T);
+        sizeof(T);
   }
 
   // The following must be the same for all allocators.
-  template <typename U> struct rebind {
+  template <typename U>
+  struct rebind {
     typedef aligned_allocator<U, Alignment> other;
   };
 
-  bool operator!=(const aligned_allocator &other) const {
+  bool operator!=(const aligned_allocator& other) const {
     return !(*this == other);
   }
 
-  void construct(T *const p, const T &t) const {
-    void *const pv = static_cast<void *>(p);
+  void construct(T* const p, const T& t) const {
+    void* const pv = static_cast<void*>(p);
 
     new (pv) T(t);
   }
 
-  void destroy(T *const p) const { p->~T(); }
+  void destroy(T* const p) const {
+    p->~T();
+  }
 
   // Returns true if and only if storage allocated from *this
   // can be deallocated from other, and vice versa.
   // Always returns true for stateless allocators.
-  bool operator==(const aligned_allocator & /*other*/) const { return true; }
+  bool operator==(const aligned_allocator& /*other*/) const {
+    return true;
+  }
 
   // Default constructor, copy constructor, rebinding constructor, and
   // destructor. Empty for stateless allocators.
   aligned_allocator() {}
 
-  aligned_allocator(const aligned_allocator &) {}
+  aligned_allocator(const aligned_allocator&) {}
 
   template <typename U>
-  aligned_allocator(const aligned_allocator<U, Alignment> &) {}
+  aligned_allocator(const aligned_allocator<U, Alignment>&) {}
 
   ~aligned_allocator() {}
 
   // The following will be different for each allocator.
-  T *allocate(const std::size_t n) const {
+  T* allocate(const std::size_t n) const {
     // The return value of allocate(0) is unspecified.
     // Mallocator returns NULL in order to avoid depending
     // on malloc(0)'s implementation-defined behavior
@@ -88,7 +98,7 @@ public:
     }
 
     // Mallocator wraps malloc().
-    void *pv = nullptr;
+    void* pv = nullptr;
     posix_memalign(&pv, Alignment, n * sizeof(T));
     // pv = aligned_alloc(Alignment, n * sizeof(T));
 
@@ -98,14 +108,16 @@ public:
       throw std::bad_alloc();
     }
 
-    return static_cast<T *>(pv);
+    return static_cast<T*>(pv);
   }
 
-  void deallocate(T *const p, const std::size_t /*n*/) const { free(p); }
+  void deallocate(T* const p, const std::size_t /*n*/) const {
+    free(p);
+  }
 
   // The following will be the same for all allocators that ignore hints.
   template <typename U>
-  T *allocate(const std::size_t n, const U * /* const hint */) const {
+  T* allocate(const std::size_t n, const U* /* const hint */) const {
     return allocate(n);
   }
 
@@ -116,9 +128,11 @@ public:
   // "assignment operator could not be generated because a
   // base class assignment operator is inaccessible" within
   // the STL headers, but that warning is useless.
-private:
-  aligned_allocator &operator=(const aligned_allocator &) { assert(0); }
+ private:
+  aligned_allocator& operator=(const aligned_allocator&) {
+    assert(0);
+  }
 };
 
 template <typename T>
-using aligned_vector = std::vector<T, aligned_allocator<T, 64> >;
+using aligned_vector = std::vector<T, aligned_allocator<T, 64>>;
diff --git a/bench/BenchUtils.cc b/bench/BenchUtils.cc
index 7b4cde4..db40ee0 100644
--- a/bench/BenchUtils.cc
+++ b/bench/BenchUtils.cc
@@ -12,27 +12,23 @@ namespace fbgemm {
 std::default_random_engine eng;
 
 template <typename T>
-void randFill(aligned_vector<T> &vec, const int low, const int high) {
+void randFill(aligned_vector<T>& vec, const int low, const int high) {
   std::random_device r;
   std::uniform_int_distribution<int> dis(low, high);
-  for (auto &v : vec) {
+  for (auto& v : vec) {
     v = static_cast<T>(dis(eng));
   }
 }
 
-template
-void randFill<float>(aligned_vector<float> &vec,
-                     const int low, const int high);
-template
-void randFill<uint8_t>(aligned_vector<uint8_t> &vec,
-                       const int low, const int high);
-template
-void randFill<int8_t>(aligned_vector<int8_t> &vec,
-                      const int low, const int high);
+template void
+randFill<float>(aligned_vector<float>& vec, const int low, const int high);
+template void
+randFill<uint8_t>(aligned_vector<uint8_t>& vec, const int low, const int high);
+template void
+randFill<int8_t>(aligned_vector<int8_t>& vec, const int low, const int high);
 
-template
-void randFill<int>(aligned_vector<int> &vec,
-                   const int low, const int high);
+template void
+randFill<int>(aligned_vector<int>& vec, const int low, const int high);
 
 void llc_flush(std::vector<char>& llc) {
   volatile char* data = llc.data();
diff --git a/bench/BenchUtils.h b/bench/BenchUtils.h
index 5c16a06..8ca99df 100644
--- a/bench/BenchUtils.h
+++ b/bench/BenchUtils.h
@@ -11,7 +11,7 @@
 namespace fbgemm {
 
 template <typename T>
-void randFill(aligned_vector<T> &vec, const int low, const int high);
+void randFill(aligned_vector<T>& vec, const int low, const int high);
 
 void llc_flush(std::vector<char>& llc);
 
diff --git a/bench/Depthwise3DBenchmark.cc b/bench/Depthwise3DBenchmark.cc
index 417ddd1..f53eeea 100644
--- a/bench/Depthwise3DBenchmark.cc
+++ b/bench/Depthwise3DBenchmark.cc
@@ -18,10 +18,10 @@
 #endif
 
 #include "AlignedVec.h"
-#include "src/FbgemmI8Depthwise.h"
+#include "BenchUtils.h"
 #include "fbgemm/Utils.h"
+#include "src/FbgemmI8Depthwise.h"
 #include "src/RefImplementations.h"
-#include "BenchUtils.h"
 
 using namespace std;
 using namespace fbgemm;
@@ -33,9 +33,9 @@ int main() {
   if (flush) {
     llc.resize(128 * 1024 * 1024, 1.0);
   }
-#define llc_flush()                                                            \
-  for (auto i = 0; i < llc.size(); i++) {                                      \
-    llc[i]++;                                                                  \
+#define llc_flush()                       \
+  for (auto i = 0; i < llc.size(); i++) { \
+    llc[i]++;                             \
   }
 
   constexpr int NWARMUP = 4;
@@ -116,10 +116,10 @@ int main() {
     Packed3x3x3ConvMatrix Bp(K, B.data());
 
     double ttot = 0;
-    double bytes =
-        double(NITER) *
-        (K * (N * (2. * sizeof(int32_t) * T_OUT * H_OUT * W_OUT + T * H * W) +
-              K_T * K_H * K_W));
+    double bytes = double(NITER) *
+        (K *
+         (N * (2. * sizeof(int32_t) * T_OUT * H_OUT * W_OUT + T * H * W) +
+          K_T * K_H * K_W));
     double ops =
         double(NITER) * N * T_OUT * H_OUT * W_OUT * K * K_T * K_H * K_W * 2;
     chrono::time_point<chrono::system_clock> t_begin, t_end;
@@ -183,8 +183,14 @@ int main() {
     } // n
 
     // Report performance
-    printf("N = %d K = %d T = %d H = %d W = %d stride = %d\n", N, K, T, H, W,
-           stride_h);
+    printf(
+        "N = %d K = %d T = %d H = %d W = %d stride = %d\n",
+        N,
+        K,
+        T,
+        H,
+        W,
+        stride_h);
     printf("GB/s = %f Gops/s = %f\n", bytes / ttot / 1e9, ops / ttot / 1e9);
 
     ttot = 0;
@@ -236,9 +242,8 @@ int main() {
         for (int h = 0; h < H_OUT; ++h) {
           for (int w = 0; w < W_OUT; ++w) {
             for (int g = 0; g < K; ++g) {
-              uint8_t expected =
-                  C_uint8_ref[(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K +
-                              g];
+              uint8_t expected = C_uint8_ref
+                  [(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K + g];
               uint8_t actual =
                   C_uint8[(((n * T_OUT + t) * H_OUT + h) * W_OUT + w) * K + g];
               if (expected != actual) {
@@ -255,9 +260,15 @@ int main() {
     } // n
 
     // Report performance
-    printf("N = %d K = %d T = %d H = %d W = %d stride = %d with requantization "
-           "fused\n",
-           N, K, T, H, W, stride_h);
+    printf(
+        "N = %d K = %d T = %d H = %d W = %d stride = %d with requantization "
+        "fused\n",
+        N,
+        K,
+        T,
+        H,
+        W,
+        stride_h);
     printf("GB/s = %f Gops/s = %f\n", bytes / ttot / 1e9, ops / ttot / 1e9);
   } // for each shape
 
diff --git a/bench/DepthwiseBenchmark.cc b/bench/DepthwiseBenchmark.cc
index 0bf2d73..8e6d83d 100644
--- a/bench/DepthwiseBenchmark.cc
+++ b/bench/DepthwiseBenchmark.cc
@@ -16,10 +16,10 @@
 #endif
 
 #include "AlignedVec.h"
-#include "src/FbgemmI8Depthwise.h"
+#include "BenchUtils.h"
 #include "fbgemm/Utils.h"
+#include "src/FbgemmI8Depthwise.h"
 #include "src/RefImplementations.h"
-#include "BenchUtils.h"
 
 using namespace std;
 using namespace fbgemm;
@@ -27,6 +27,8 @@ using namespace fbgemm;
 int main() {
   // From Xray OCR
   vector<vector<int>> shapes = {
+    // NOTE: clang-format wants to use a different formatting but the current
+    // formatting should be easier to read.
     // N,  G, H_in, W_in, stride
     {   1,  272,  47, 125, 1, },
     {   1,  272,  64, 125, 1, },
@@ -135,9 +137,9 @@ int main() {
   if (flush) {
     llc.resize(128 * 1024 * 1024, 1.0);
   }
-#define llc_flush()                                                            \
-  for (auto i = 0; i < llc.size(); i++) {                                      \
-    llc[i]++;                                                                  \
+#define llc_flush()                       \
+  for (auto i = 0; i < llc.size(); i++) { \
+    llc[i]++;                             \
   }
 
   constexpr int NWARMUP = 4;
@@ -209,8 +211,7 @@ int main() {
     Packed3x3ConvMatrix Bp(G, B.data());
 
     double ttot = 0;
-    double bytes =
-        double(NITER) *
+    double bytes = double(NITER) *
         (G * (N * (2 * sizeof(int32_t) * H_OUT * W_OUT + H * W) + R * S));
     double ops = double(NITER) * N * H_OUT * W_OUT * G * R * S * 2;
     chrono::time_point<chrono::system_clock> t_begin, t_end;
@@ -256,9 +257,9 @@ int main() {
             int32_t expected = C_ref[((n * H_OUT + h) * W_OUT + w) * G + g];
             int32_t actual = C[((n * H_OUT + h) * W_OUT + w) * G + g];
             if (expected != actual) {
-              cerr << "Depthwise 3x3 results differ at (" << n << ", "
-                   << h << ", " << w << ", " << g << "). expected "
-                   << expected << " actual " << actual << endl;
+              cerr << "Depthwise 3x3 results differ at (" << n << ", " << h
+                   << ", " << w << ", " << g << "). expected " << expected
+                   << " actual " << actual << endl;
               return -1;
             }
             assert(expected == actual);
@@ -320,9 +321,9 @@ int main() {
                 C_uint8_ref[((n * H_OUT + h) * W_OUT + w) * G + g];
             uint8_t actual = C_uint8[((n * H_OUT + h) * W_OUT + w) * G + g];
             if (expected != actual) {
-              cerr << "Depthwise 3x3 results differ at (" << n << ", "
-                   << h << ", " << w << ", " << g << "). expected "
-                   << (int)expected << " actual " << (int)actual << endl;
+              cerr << "Depthwise 3x3 results differ at (" << n << ", " << h
+                   << ", " << w << ", " << g << "). expected " << (int)expected
+                   << " actual " << (int)actual << endl;
               return -1;
             }
             assert(expected == actual);
@@ -334,7 +335,11 @@ int main() {
     // Report performance
     printf(
         "N = %d G = %d H = %d W = %d stride = %d with requantization fused\n",
-        N, G, H, W, stride_h);
+        N,
+        G,
+        H,
+        W,
+        stride_h);
     printf("GB/s = %f Gops/s = %f\n", bytes / ttot / 1e9, ops / ttot / 1e9);
   } // for each shape
 
diff --git a/bench/FP16Benchmark.cc b/bench/FP16Benchmark.cc
index 8fbe878..c03f18a 100644
--- a/bench/FP16Benchmark.cc
+++ b/bench/FP16Benchmark.cc
@@ -16,9 +16,9 @@
 #include <omp.h>
 #endif
 
+#include "AlignedVec.h"
 #include "bench/BenchUtils.h"
 #include "fbgemm/FbgemmFP16.h"
-#include "AlignedVec.h"
 
 using namespace std;
 using namespace fbgemm;
diff --git a/bench/I8SpmdmBenchmark.cc b/bench/I8SpmdmBenchmark.cc
index d361bb5..07b73dc 100644
--- a/bench/I8SpmdmBenchmark.cc
+++ b/bench/I8SpmdmBenchmark.cc
@@ -17,9 +17,9 @@
 #include <omp.h>
 #endif
 
+#include "BenchUtils.h"
 #include "fbgemm/FbgemmI8Spmdm.h"
 #include "src/RefImplementations.h"
-#include "BenchUtils.h"
 
 using namespace std;
 using namespace fbgemm;
@@ -156,7 +156,7 @@ int main() {
 #pragma omp parallel
 #endif
           {
-#if defined (FBGEMM_MEASURE_TIME_BREAKDOWN) || !defined(_OPENMP)
+#if defined(FBGEMM_MEASURE_TIME_BREAKDOWN) || !defined(_OPENMP)
             int num_threads = 1;
             int tid = 0;
 #else
diff --git a/bench/Im2ColFusedRequantizeAcc16Benchmark.cc b/bench/Im2ColFusedRequantizeAcc16Benchmark.cc
index 8827b4c..e3c9da2 100644
--- a/bench/Im2ColFusedRequantizeAcc16Benchmark.cc
+++ b/bench/Im2ColFusedRequantizeAcc16Benchmark.cc
@@ -16,9 +16,9 @@
 #include <omp.h>
 #endif
 
+#include "BenchUtils.h"
 #include "fbgemm/Fbgemm.h"
 #include "src/RefImplementations.h"
-#include "BenchUtils.h"
 
 using namespace std;
 using namespace fbgemm;
@@ -189,11 +189,7 @@ void performance_test() {
         PackAWithIm2Col<uint8_t, int16_t>::rowOffsetBufferSize());
 
     PackAWithIm2Col<uint8_t, int16_t> packA(
-        conv_p,
-        Aint8.data(),
-        nullptr,
-        Aint8_zero_point,
-        row_offset_buf.data());
+        conv_p, Aint8.data(), nullptr, Aint8_zero_point, row_offset_buf.data());
 
     PackBMatrix<int8_t, int16_t> packedB(
         matrix_op_t::NoTranspose, KDim, NDim, Bint8.data(), NDim);
diff --git a/bench/Im2ColFusedRequantizeAcc32Benchmark.cc b/bench/Im2ColFusedRequantizeAcc32Benchmark.cc
index b87f7d7..153dc3b 100644
--- a/bench/Im2ColFusedRequantizeAcc32Benchmark.cc
+++ b/bench/Im2ColFusedRequantizeAcc32Benchmark.cc
@@ -16,9 +16,9 @@
 #include <omp.h>
 #endif
 
+#include "BenchUtils.h"
 #include "fbgemm/Fbgemm.h"
 #include "src/RefImplementations.h"
-#include "BenchUtils.h"
 
 using namespace std;
 using namespace fbgemm;
@@ -191,11 +191,7 @@ void performance_test() {
         PackAWithIm2Col<uint8_t, int32_t>::rowOffsetBufferSize());
 
     PackAWithIm2Col<uint8_t, int32_t> packA(
-        conv_p,
-        Aint8.data(),
-        nullptr,
-        Aint8_zero_point,
-        row_offset_buf.data());
+        conv_p, Aint8.data(), nullptr, Aint8_zero_point, row_offset_buf.data());
 
     PackBMatrix<int8_t, int32_t> packedB(
         matrix_op_t::NoTranspose, KDim, NDim, Bint8.data(), NDim);
diff --git a/bench/PackedFloatInOutBenchmark.cc b/bench/PackedFloatInOutBenchmark.cc
index 29a7547..dc9536e 100644
--- a/bench/PackedFloatInOutBenchmark.cc
+++ b/bench/PackedFloatInOutBenchmark.cc
@@ -19,50 +19,52 @@
 #include <mkl.h>
 #endif
 
+#include "BenchUtils.h"
 #include "fbgemm/Fbgemm.h"
 #include "src/RefImplementations.h"
 #include "test/QuantizationHelpers.h"
-#include "BenchUtils.h"
 
 using namespace std;
 using namespace fbgemm;
 
 void performance_test() {
   vector<vector<int>> shapes = {
-      {1, 128, 512},
-      {1, 1024, 256},
-      {1, 2048, 512},
-      {1, 4096, 1024},
-
-      {6, 256, 1024},
-      {6, 256, 2048},
-      {6, 512, 512},
-      {6, 1024, 256},
-      {6, 2048, 256},
-      {6, 2048, 512},
-      {6, 4096, 256},
-      {6, 4096, 1024},
-      {6, 4096, 2048},
-
-      {10, 2048, 256},
-      {10, 4096, 1024},
-
-      {20, 2048, 256},
-      {20, 4096, 1024},
-
-      {102, 1024, 512},
-      {102, 2323, 256},
-      {102, 512, 256},
-
-      {1, 800, 3200},
-      {1, 800, 8000},
-
-      {16, 256, 1500},
-      {16, 256, 1567},
-      {1, 128, 2876},
-      {16, 128, 1567},
-      {1, 128, 2722},
-      {16, 256, 512},
+    // NOTE: clang-format wants to use a different formatting but the current
+    // formatting should be easier to read.
+    {1, 128, 512},
+    {1, 1024, 256},
+    {1, 2048, 512},
+    {1, 4096, 1024},
+
+    {6, 256, 1024},
+    {6, 256, 2048},
+    {6, 512, 512},
+    {6, 1024, 256},
+    {6, 2048, 256},
+    {6, 2048, 512},
+    {6, 4096, 256},
+    {6, 4096, 1024},
+    {6, 4096, 2048},
+
+    {10, 2048, 256},
+    {10, 4096, 1024},
+
+    {20, 2048, 256},
+    {20, 4096, 1024},
+
+    {102, 1024, 512},
+    {102, 2323, 256},
+    {102, 512, 256},
+
+    {1, 800, 3200},
+    {1, 800, 8000},
+
+    {16, 256, 1500},
+    {16, 256, 1567},
+    {1, 128, 2876},
+    {16, 128, 1567},
+    {1, 128, 2722},
+    {16, 256, 512},
   };
   bool flush = true;
   std::vector<char> llc;
diff --git a/bench/PackedRequantizeAcc16Benchmark.cc b/bench/PackedRequantizeAcc16Benchmark.cc
index 4974ba2..367aa32 100644
--- a/bench/PackedRequantizeAcc16Benchmark.cc
+++ b/bench/PackedRequantizeAcc16Benchmark.cc
@@ -20,9 +20,9 @@
 #include <mkl.h>
 #endif
 
+#include "BenchUtils.h"
 #include "fbgemm/Fbgemm.h"
 #include "src/RefImplementations.h"
-#include "BenchUtils.h"
 
 using namespace std;
 using namespace fbgemm;
diff --git a/bench/PackedRequantizeAcc32Benchmark.cc b/bench/PackedRequantizeAcc32Benchmark.cc
index 94bb899..84d7c24 100644
--- a/bench/PackedRequantizeAcc32Benchmark.cc
+++ b/bench/PackedRequantizeAcc32Benchmark.cc
@@ -19,53 +19,55 @@
 #include <mkl.h>
 #endif
 
+#include "BenchUtils.h"
 #include "fbgemm/Fbgemm.h"
 #include "src/RefImplementations.h"
 #include "test/QuantizationHelpers.h"
-#include "BenchUtils.h"
 
 using namespace std;
 using namespace fbgemm;
 
 void performance_test() {
   vector<vector<int>> shapes = {
-      {156800, 4, 36},
-      {156800, 8, 36},
-      {156800, 16, 36},
-      {1, 128, 512},
-      {1, 1024, 256},
-      {1, 2048, 512},
-      {1, 4096, 1024},
-
-      {6, 256, 1024},
-      {6, 256, 2048},
-      {6, 512, 512},
-      {6, 1024, 256},
-      {6, 2048, 256},
-      {6, 2048, 512},
-      {6, 4096, 256},
-      {6, 4096, 1024},
-      {6, 4096, 2048},
-
-      {10, 2048, 256},
-      {10, 4096, 1024},
-
-      {20, 2048, 256},
-      {20, 4096, 1024},
-
-      {102, 1024, 512},
-      {102, 2323, 256},
-      {102, 512, 256},
-
-      {1, 800, 3200},
-      {1, 800, 8000},
-
-      {16, 256, 1500},
-      {16, 256, 1567},
-      {1, 128, 2876},
-      {16, 128, 1567},
-      {1, 128, 2722},
-      {16, 256, 512},
+    // NOTE: clang-format wants to use a different formatting but the current
+    // formatting should be easier to read.
+    {156800, 4, 36},
+    {156800, 8, 36},
+    {156800, 16, 36},
+    {1, 128, 512},
+    {1, 1024, 256},
+    {1, 2048, 512},
+    {1, 4096, 1024},
+
+    {6, 256, 1024},
+    {6, 256, 2048},
+    {6, 512, 512},
+    {6, 1024, 256},
+    {6, 2048, 256},
+    {6, 2048, 512},
+    {6, 4096, 256},
+    {6, 4096, 1024},
+    {6, 4096, 2048},
+
+    {10, 2048, 256},
+    {10, 4096, 1024},
+
+    {20, 2048, 256},
+    {20, 4096, 1024},
+
+    {102, 1024, 512},
+    {102, 2323, 256},
+    {102, 512, 256},
+
+    {1, 800, 3200},
+    {1, 800, 8000},
+
+    {16, 256, 1500},
+    {16, 256, 1567},
+    {1, 128, 2876},
+    {16, 128, 1567},
+    {1, 128, 2722},
+    {16, 256, 512},
   };
   bool flush = true;
   std::vector<char> llc;
-- 
cgit v1.2.3