diff options
author | Kenneth Heafield <github@kheafield.com> | 2020-03-19 00:40:29 +0300 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2020-03-19 00:40:29 +0300 |
commit | 4744e37f0623382830cc9afc09590f7c00a309be (patch) | |
tree | 5f81f70774c699fe30f5c26aa8e67cd212df733f | |
parent | 0da7a269c1113bc103a71f284b2275b6d2ea5d23 (diff) |
Oops was all the same implementation
-rw-r--r-- | benchmarks/benchmark_log4.cc | 6 |
1 files changed, 2 insertions, 4 deletions
diff --git a/benchmarks/benchmark_log4.cc b/benchmarks/benchmark_log4.cc index cf11a19..aaa8a27 100644 --- a/benchmarks/benchmark_log4.cc +++ b/benchmarks/benchmark_log4.cc @@ -57,7 +57,6 @@ struct MAddUBS16 { }; struct MAddUBS32 { - // Overkill on the target option. template <typename Iterator> INTGEMM_AVX512BW __attribute__((always_inline)) static void body(const __m512i *a, const __m512i *b, __m512i *c, __m512i *, uint64_t *) { // Into 16 __m512i added = _mm512_maddubs_epi16(a[Iterator::template I<0>()], b[Iterator::template I<0>()]); @@ -112,8 +111,7 @@ template <class Backend, Index Unroll> INTGEMM_AVX512VNNI void Try(const __m512i for (int s = 0; s < kSamples; ++s) { StopWatch w(stats); for (const __m512i *a_it = a_begin, *b_it = b_begin; a_it != a_end; a_it += Unroll, b_it += Unroll) { - StaticLoop<MAddUBS32, MakeStaticLoopIterator<Unroll> >(a_it, b_it, &accum[0], &subtractreg, &subtract65535); - + StaticLoop<Backend, MakeStaticLoopIterator<Unroll> >(a_it, b_it, &accum[0], &subtractreg, &subtract65535); } } @@ -123,7 +121,7 @@ template <class Backend, Index Unroll> INTGEMM_AVX512VNNI void Try(const __m512i int64_t total = std::accumulate(result, result + sizeof(result) / sizeof(uint64_t), -255 * subtract65535); std::memcpy(result, &subtractreg, sizeof(subtractreg)); total = std::accumulate(result, result + sizeof(subtractreg) / sizeof(uint64_t), total); - asm volatile("" : "+r" (total)); + asm volatile("" : "+r" (total), "+r" (subtract65535)); std::cout << Summarize(stats, a_end - a_begin, scale) << " " << name << '\n'; } |