Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2020-04-24 02:39:26 +0300
committerKenneth Heafield <github@kheafield.com>2020-04-24 02:39:26 +0300
commit6377ee4d9f051d7be0c9c290bb33ab66f27ea900 (patch)
tree10add606b83ef2eb32be76e2df0be36af848446c
parentcceddf2df1a366e14aa49cb3c46c3294c9c57489 (diff)
Rudimentary tile benchmark. Keep in mind Multiply still needs optimization.
-rw-r--r--CMakeLists.txt2
-rw-r--r--benchmarks/benchmark_tile.cc45
2 files changed, 46 insertions, 1 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f2cdd89..02fd705 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -62,7 +62,7 @@ if(INTGEMM_DONT_BUILD_TESTS)
return()
endif()
-foreach(exe benchmark biasmultiply benchmark_quantizer)
+foreach(exe benchmark biasmultiply benchmark_quantizer benchmark_tile)
add_executable(${exe} benchmarks/${exe}.cc)
target_link_libraries(${exe} intgemm)
endforeach()
diff --git a/benchmarks/benchmark_tile.cc b/benchmarks/benchmark_tile.cc
new file mode 100644
index 0000000..b7a5981
--- /dev/null
+++ b/benchmarks/benchmark_tile.cc
@@ -0,0 +1,45 @@
+#include "../aligned.h"
+#include "../stop_watch.h"
+#include "../test/test_matrices.h"
+#include "../tile/multiply.h"
+#include "../tile/dot.h"
+
+#include <chrono>
+#include <iomanip>
+#include <random>
+#include <vector>
+
+namespace intgemm {
+namespace {
+
+typedef TestMatrices8::AccessT Accessor;
+
+template <Index A_rows, Index B_cols> static inline void BenchmarkOne(Accessor access, Tile shape) {
+ const std::size_t kTries = 4;
+ auto start = std::chrono::steady_clock::now();
+ // Burn in.
+ AVX512VNNI::Multiply<Accessor, AVX512VNNI::Shifted8, A_rows, B_cols>(access, shape);
+ for (std::size_t t = 0; t < kTries; ++t) {
+ // TODO: try various multipliers, guard against old compilers, etc.
+ AVX512VNNI::Multiply<Accessor, AVX512VNNI::Shifted8, A_rows, B_cols>(access, shape);
+ }
+ auto end = std::chrono::steady_clock::now();
+ double took = std::chrono::duration<double>(end - start).count() / kTries;
+ std::cout << std::setw(8) << std::setprecision(4) << took << ' ' << std::setw(2) << A_rows << 'x' << std::setw(2) << B_cols << std::endl;
+}
+
+template <std::size_t... Iterator> static inline void BenchmarkKernels(Tile shape, index_sequence<Iterator...>) {
+ constexpr Index ColsMax = 16;
+ TestMatrices8 matrices(shape);
+ using unfurl = int[];
+ (void)unfurl{0, (
+ BenchmarkOne<(Iterator / ColsMax) + 1, (Iterator % ColsMax) + 1>(matrices.Accessor(), shape)
+ , 0)...};
+}
+
+} // namespace
+} // namespace intgemm
+
+int main() {
+ intgemm::BenchmarkKernels({1024, 1024, 1024}, intgemm::make_index_sequence<16*16>());
+}