From 6377ee4d9f051d7be0c9c290bb33ab66f27ea900 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 23 Apr 2020 23:39:26 +0000 Subject: Rudimentary tile benchmark. Keep in mind Multiply still needs optimization. --- CMakeLists.txt | 2 +- benchmarks/benchmark_tile.cc | 45 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 benchmarks/benchmark_tile.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index f2cdd89..02fd705 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,7 +62,7 @@ if(INTGEMM_DONT_BUILD_TESTS) return() endif() -foreach(exe benchmark biasmultiply benchmark_quantizer) +foreach(exe benchmark biasmultiply benchmark_quantizer benchmark_tile) add_executable(${exe} benchmarks/${exe}.cc) target_link_libraries(${exe} intgemm) endforeach() diff --git a/benchmarks/benchmark_tile.cc b/benchmarks/benchmark_tile.cc new file mode 100644 index 0000000..b7a5981 --- /dev/null +++ b/benchmarks/benchmark_tile.cc @@ -0,0 +1,45 @@ +#include "../aligned.h" +#include "../stop_watch.h" +#include "../test/test_matrices.h" +#include "../tile/multiply.h" +#include "../tile/dot.h" + +#include +#include +#include +#include + +namespace intgemm { +namespace { + +typedef TestMatrices8::AccessT Accessor; + +template static inline void BenchmarkOne(Accessor access, Tile shape) { + const std::size_t kTries = 4; + auto start = std::chrono::steady_clock::now(); + // Burn in. + AVX512VNNI::Multiply(access, shape); + for (std::size_t t = 0; t < kTries; ++t) { + // TODO: try various multipliers, guard against old compilers, etc. + AVX512VNNI::Multiply(access, shape); + } + auto end = std::chrono::steady_clock::now(); + double took = std::chrono::duration(end - start).count() / kTries; + std::cout << std::setw(8) << std::setprecision(4) << took << ' ' << std::setw(2) << A_rows << 'x' << std::setw(2) << B_cols << std::endl; +} + +template static inline void BenchmarkKernels(Tile shape, index_sequence) { + constexpr Index ColsMax = 16; + TestMatrices8 matrices(shape); + using unfurl = int[]; + (void)unfurl{0, ( + BenchmarkOne<(Iterator / ColsMax) + 1, (Iterator % ColsMax) + 1>(matrices.Accessor(), shape) + , 0)...}; +} + +} // namespace +} // namespace intgemm + +int main() { + intgemm::BenchmarkKernels({1024, 1024, 1024}, intgemm::make_index_sequence<16*16>()); +} -- cgit v1.2.3