Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikolay Bogoychev <nheart@gmail.com>2020-03-17 21:11:30 +0300
committerNikolay Bogoychev <nheart@gmail.com>2020-03-17 21:11:30 +0300
commitbfb4d5df5fcfe68836719a38a5200006b8e37317 (patch)
treea69ed680959003205ea4b9bbe3827ee8800db651
parent9c59b066ffe6b6f81b41cefb8743f70e7b95ebee (diff)
-rw-r--r--benchmarks/multiply_tiles_benchmark.cc59
1 files changed, 45 insertions, 14 deletions
diff --git a/benchmarks/multiply_tiles_benchmark.cc b/benchmarks/multiply_tiles_benchmark.cc
index 33b7efe..7ae033b 100644
--- a/benchmarks/multiply_tiles_benchmark.cc
+++ b/benchmarks/multiply_tiles_benchmark.cc
@@ -8,6 +8,17 @@
using namespace intgemm;
+struct matrix_size {
+ const int M;
+ const int K;
+ const int N;
+
+ friend std::ostream& operator<<(std::ostream& os, const matrix_size& m) {
+ os << "Matrix size: M: " << m.M << " K: " << m.K << " N: " << m.N;
+ return os;
+ }
+};
+
static constexpr Index TestCases[][2] = {
{1, 1}, {1, 2}, {1, 3}, {1, 4},
{2, 1}, {2, 2}, {2, 3}, {2, 4},
@@ -33,6 +44,9 @@ struct BenchmarkLoop {
AlignedVector<int32_t> C(A_rows * B_cols);
std::chrono::duration<double> duration = std::chrono::nanoseconds::zero();
+ if (B_cols % TileColumnsMultiplier*8 != 0) {
+ return;
+ }
for (Index i = 0; i < repeats; ++i) {
std::generate(A_prepared.begin(), A_prepared.end(), [&]() {
return dist(gen);
@@ -47,7 +61,7 @@ struct BenchmarkLoop {
duration += std::chrono::system_clock::now() - start;
}
- std::cout << repeats << " iterations of " << Backend::kName << " with tile = " << TileRows << "x" << 8 * TileColumnsMultiplier << " took: " << duration.count() << " seconds." << std::endl;
+ std::cout << repeats << " " << A_rows << "x" << width << "x" << B_cols << " iterations of " << Backend::kName << " with tile = " << TileRows << "x" << 8 * TileColumnsMultiplier << " took: " << duration.count() << " seconds." << std::endl;
}
};
@@ -86,22 +100,39 @@ int main(int argc, char** argv) {
std::cerr << "Warning: Unknown architecture '" << argv[i] << "'!" << std::endl;
}
+std::vector<matrix_size> matrices = {
+ {1024, 1024, 1024},
+ {768, 768, 768},
+ {256, 10368, 256},
+ {256, 5312, 256},
+ {8, 2048, 256},
+ {320, 256, 256},
+ {472, 256, 256},
+ {248, 256, 256},
+ {200, 256, 256},
+ {1, 64, 8}};
+
#define ARCH_BENCHMARK_IMPL(arch, A_rows, width, B_cols) \
if (selected_archs[i].compare(#arch) == 0) { StaticLoop<BenchmarkLoop<arch>, MakeStaticLoopIterator<TestCasesN>>((A_rows), (width), (B_cols), repeats); }
- for (int i = 0; i < selected_archs.size(); ++i) {
- ARCH_BENCHMARK_IMPL(SSSE3_8bit, 768, 768, 768)
- ARCH_BENCHMARK_IMPL(SSE2_16bit, 768, 768, 768)
- ARCH_BENCHMARK_IMPL(AVX2_8bit, 768, 768, 768)
- ARCH_BENCHMARK_IMPL(AVX2_16bit, 768, 768, 768)
- // Tiling not supported yet
- // #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
- // ARCH_BENCHMARK_IMPL(AVX512BW_8bit, 768, 768, 768)
- // ARCH_BENCHMARK_IMPL(AVX512BW_8bit, 768, 768, 768)
- // #endif
- #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
- ARCH_BENCHMARK_IMPL(AVX512VNNI_8bit, 768, 768, 768)
- #endif
+ for (auto&& matrix : matrices) {
+ Index m = matrix.M;
+ Index n = matrix.N;
+ Index k = matrix.K;
+ for (int i = 0; i < selected_archs.size(); ++i) {
+ ARCH_BENCHMARK_IMPL(SSSE3_8bit, m, n, k)
+ ARCH_BENCHMARK_IMPL(SSE2_16bit, m, n, k)
+ ARCH_BENCHMARK_IMPL(AVX2_8bit, m, n, k)
+ ARCH_BENCHMARK_IMPL(AVX2_16bit, m, n, k)
+ // Tiling not supported yet
+ // #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
+ // ARCH_BENCHMARK_IMPL(AVX512BW_8bit, 768, 768, 768)
+ // ARCH_BENCHMARK_IMPL(AVX512BW_8bit, 768, 768, 768)
+ // #endif
+ #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
+ ARCH_BENCHMARK_IMPL(AVX512VNNI_8bit, m, n, k)
+ #endif
+ }
}
return 0;