diff options
author | Nikolay Bogoychev <nheart@gmail.com> | 2020-03-17 21:11:30 +0300 |
---|---|---|
committer | Nikolay Bogoychev <nheart@gmail.com> | 2020-03-17 21:11:30 +0300 |
commit | bfb4d5df5fcfe68836719a38a5200006b8e37317 (patch) | |
tree | a69ed680959003205ea4b9bbe3827ee8800db651 | |
parent | 9c59b066ffe6b6f81b41cefb8743f70e7b95ebee (diff) |
More matrix sizesmultiply-tiling-manymatrices
-rw-r--r-- | benchmarks/multiply_tiles_benchmark.cc | 59 |
1 files changed, 45 insertions, 14 deletions
diff --git a/benchmarks/multiply_tiles_benchmark.cc b/benchmarks/multiply_tiles_benchmark.cc index 33b7efe..7ae033b 100644 --- a/benchmarks/multiply_tiles_benchmark.cc +++ b/benchmarks/multiply_tiles_benchmark.cc @@ -8,6 +8,17 @@ using namespace intgemm; +struct matrix_size { + const int M; + const int K; + const int N; + + friend std::ostream& operator<<(std::ostream& os, const matrix_size& m) { + os << "Matrix size: M: " << m.M << " K: " << m.K << " N: " << m.N; + return os; + } +}; + static constexpr Index TestCases[][2] = { {1, 1}, {1, 2}, {1, 3}, {1, 4}, {2, 1}, {2, 2}, {2, 3}, {2, 4}, @@ -33,6 +44,9 @@ struct BenchmarkLoop { AlignedVector<int32_t> C(A_rows * B_cols); std::chrono::duration<double> duration = std::chrono::nanoseconds::zero(); + if (B_cols % TileColumnsMultiplier*8 != 0) { + return; + } for (Index i = 0; i < repeats; ++i) { std::generate(A_prepared.begin(), A_prepared.end(), [&]() { return dist(gen); @@ -47,7 +61,7 @@ struct BenchmarkLoop { duration += std::chrono::system_clock::now() - start; } - std::cout << repeats << " iterations of " << Backend::kName << " with tile = " << TileRows << "x" << 8 * TileColumnsMultiplier << " took: " << duration.count() << " seconds." << std::endl; + std::cout << repeats << " " << A_rows << "x" << width << "x" << B_cols << " iterations of " << Backend::kName << " with tile = " << TileRows << "x" << 8 * TileColumnsMultiplier << " took: " << duration.count() << " seconds." << std::endl; } }; @@ -86,22 +100,39 @@ int main(int argc, char** argv) { std::cerr << "Warning: Unknown architecture '" << argv[i] << "'!" << std::endl; } +std::vector<matrix_size> matrices = { + {1024, 1024, 1024}, + {768, 768, 768}, + {256, 10368, 256}, + {256, 5312, 256}, + {8, 2048, 256}, + {320, 256, 256}, + {472, 256, 256}, + {248, 256, 256}, + {200, 256, 256}, + {1, 64, 8}}; + #define ARCH_BENCHMARK_IMPL(arch, A_rows, width, B_cols) \ if (selected_archs[i].compare(#arch) == 0) { StaticLoop<BenchmarkLoop<arch>, MakeStaticLoopIterator<TestCasesN>>((A_rows), (width), (B_cols), repeats); } - for (int i = 0; i < selected_archs.size(); ++i) { - ARCH_BENCHMARK_IMPL(SSSE3_8bit, 768, 768, 768) - ARCH_BENCHMARK_IMPL(SSE2_16bit, 768, 768, 768) - ARCH_BENCHMARK_IMPL(AVX2_8bit, 768, 768, 768) - ARCH_BENCHMARK_IMPL(AVX2_16bit, 768, 768, 768) - // Tiling not supported yet - // #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW - // ARCH_BENCHMARK_IMPL(AVX512BW_8bit, 768, 768, 768) - // ARCH_BENCHMARK_IMPL(AVX512BW_8bit, 768, 768, 768) - // #endif - #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI - ARCH_BENCHMARK_IMPL(AVX512VNNI_8bit, 768, 768, 768) - #endif + for (auto&& matrix : matrices) { + Index m = matrix.M; + Index n = matrix.N; + Index k = matrix.K; + for (int i = 0; i < selected_archs.size(); ++i) { + ARCH_BENCHMARK_IMPL(SSSE3_8bit, m, n, k) + ARCH_BENCHMARK_IMPL(SSE2_16bit, m, n, k) + ARCH_BENCHMARK_IMPL(AVX2_8bit, m, n, k) + ARCH_BENCHMARK_IMPL(AVX2_16bit, m, n, k) + // Tiling not supported yet + // #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW + // ARCH_BENCHMARK_IMPL(AVX512BW_8bit, 768, 768, 768) + // ARCH_BENCHMARK_IMPL(AVX512BW_8bit, 768, 768, 768) + // #endif + #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI + ARCH_BENCHMARK_IMPL(AVX512VNNI_8bit, m, n, k) + #endif + } } return 0; |