diff options
author | Kenneth Heafield <github@kheafield.com> | 2020-04-24 02:39:26 +0300 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2020-04-24 02:39:26 +0300 |
commit | 6377ee4d9f051d7be0c9c290bb33ab66f27ea900 (patch) | |
tree | 10add606b83ef2eb32be76e2df0be36af848446c | |
parent | cceddf2df1a366e14aa49cb3c46c3294c9c57489 (diff) |
Rudimentary tile benchmark. Keep in mind Multiply still needs optimization.
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | benchmarks/benchmark_tile.cc | 45 |
2 files changed, 46 insertions, 1 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index f2cdd89..02fd705 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,7 +62,7 @@ if(INTGEMM_DONT_BUILD_TESTS) return() endif() -foreach(exe benchmark biasmultiply benchmark_quantizer) +foreach(exe benchmark biasmultiply benchmark_quantizer benchmark_tile) add_executable(${exe} benchmarks/${exe}.cc) target_link_libraries(${exe} intgemm) endforeach() diff --git a/benchmarks/benchmark_tile.cc b/benchmarks/benchmark_tile.cc new file mode 100644 index 0000000..b7a5981 --- /dev/null +++ b/benchmarks/benchmark_tile.cc @@ -0,0 +1,45 @@ +#include "../aligned.h" +#include "../stop_watch.h" +#include "../test/test_matrices.h" +#include "../tile/multiply.h" +#include "../tile/dot.h" + +#include <chrono> +#include <iomanip> +#include <random> +#include <vector> + +namespace intgemm { +namespace { + +typedef TestMatrices8::AccessT Accessor; + +template <Index A_rows, Index B_cols> static inline void BenchmarkOne(Accessor access, Tile shape) { + const std::size_t kTries = 4; + auto start = std::chrono::steady_clock::now(); + // Burn in. + AVX512VNNI::Multiply<Accessor, AVX512VNNI::Shifted8, A_rows, B_cols>(access, shape); + for (std::size_t t = 0; t < kTries; ++t) { + // TODO: try various multipliers, guard against old compilers, etc. + AVX512VNNI::Multiply<Accessor, AVX512VNNI::Shifted8, A_rows, B_cols>(access, shape); + } + auto end = std::chrono::steady_clock::now(); + double took = std::chrono::duration<double>(end - start).count() / kTries; + std::cout << std::setw(8) << std::setprecision(4) << took << ' ' << std::setw(2) << A_rows << 'x' << std::setw(2) << B_cols << std::endl; +} + +template <std::size_t... Iterator> static inline void BenchmarkKernels(Tile shape, index_sequence<Iterator...>) { + constexpr Index ColsMax = 16; + TestMatrices8 matrices(shape); + using unfurl = int[]; + (void)unfurl{0, ( + BenchmarkOne<(Iterator / ColsMax) + 1, (Iterator % ColsMax) + 1>(matrices.Accessor(), shape) + , 0)...}; +} + +} // namespace +} // namespace intgemm + +int main() { + intgemm::BenchmarkKernels({1024, 1024, 1024}, intgemm::make_index_sequence<16*16>()); +} |