Rudimentary tile benchmark. Keep in mind Multiply still needs optimization.

author: Kenneth Heafield <github@kheafield.com> 2020-04-24 02:39:26 +0300
committer: Kenneth Heafield <github@kheafield.com> 2020-04-24 02:39:26 +0300
commit: 6377ee4d9f051d7be0c9c290bb33ab66f27ea900 (patch)
tree: 10add606b83ef2eb32be76e2df0be36af848446c
parent: cceddf2df1a366e14aa49cb3c46c3294c9c57489 (diff)
2 files changed, 46 insertions, 1 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f2cdd89..02fd705 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -62,7 +62,7 @@ if(INTGEMM_DONT_BUILD_TESTS)
   return()
 endif()
 
-foreach(exe benchmark biasmultiply benchmark_quantizer)
+foreach(exe benchmark biasmultiply benchmark_quantizer benchmark_tile)
   add_executable(${exe} benchmarks/${exe}.cc)
   target_link_libraries(${exe} intgemm)
 endforeach()
diff --git a/benchmarks/benchmark_tile.cc b/benchmarks/benchmark_tile.cc
new file mode 100644
index 0000000..b7a5981
--- /dev/null
+++ b/benchmarks/benchmark_tile.cc
@@ -0,0 +1,45 @@
+#include "../aligned.h"
+#include "../stop_watch.h"
+#include "../test/test_matrices.h"
+#include "../tile/multiply.h"
+#include "../tile/dot.h"
+
+#include <chrono>
+#include <iomanip>
+#include <random>
+#include <vector>
+
+namespace intgemm {
+namespace {
+
+typedef TestMatrices8::AccessT Accessor;
+
+template <Index A_rows, Index B_cols> static inline void BenchmarkOne(Accessor access, Tile shape) {
+  const std::size_t kTries = 4;
+  auto start = std::chrono::steady_clock::now();
+  // Burn in.
+  AVX512VNNI::Multiply<Accessor, AVX512VNNI::Shifted8, A_rows, B_cols>(access, shape);
+  for (std::size_t t = 0; t < kTries; ++t) {
+    // TODO: try various multipliers, guard against old compilers, etc.
+    AVX512VNNI::Multiply<Accessor, AVX512VNNI::Shifted8, A_rows, B_cols>(access, shape);
+  }
+  auto end = std::chrono::steady_clock::now();
+  double took = std::chrono::duration<double>(end - start).count() / kTries;
+  std::cout << std::setw(8) << std::setprecision(4) << took << ' ' << std::setw(2) << A_rows << 'x' << std::setw(2) << B_cols << std::endl;
+}
+
+template <std::size_t... Iterator> static inline void BenchmarkKernels(Tile shape, index_sequence<Iterator...>) {
+  constexpr Index ColsMax = 16;
+  TestMatrices8 matrices(shape);
+  using unfurl = int[];
+  (void)unfurl{0, (
+    BenchmarkOne<(Iterator / ColsMax) + 1, (Iterator % ColsMax) + 1>(matrices.Accessor(), shape)
+  , 0)...};
+}
+
+} // namespace
+} // namespace intgemm
+
+int main() {
+  intgemm::BenchmarkKernels({1024, 1024, 1024}, intgemm::make_index_sequence<16*16>());
+}
author	Kenneth Heafield <github@kheafield.com>	2020-04-24 02:39:26 +0300
committer	Kenneth Heafield <github@kheafield.com>	2020-04-24 02:39:26 +0300
commit	6377ee4d9f051d7be0c9c290bb33ab66f27ea900 (patch)
tree	10add606b83ef2eb32be76e2df0be36af848446c
parent	cceddf2df1a366e14aa49cb3c46c3294c9c57489 (diff)