diff options
author | Jongsoo Park <jongsoo@fb.com> | 2020-04-03 20:08:17 +0300 |
---|---|---|
committer | Jongsoo Park <jongsoo@fb.com> | 2020-04-03 22:15:00 +0300 |
commit | 1bda6aec6d3f85071fb77a2786848a5b5ed11ca5 (patch) | |
tree | e7cff01ea9d87850aefb10b6910183f287f2eef3 | |
parent | 1798e5643532e7006250dcbfcfb38627477f3d24 (diff) |
add dummy compute to cache evict func (#337)
Summary:
Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/337
https://github.com/pytorch/FBGEMM/issues/333
Reviewed By: jianyuh
Differential Revision: D20799659
fbshipit-source-id: 6b38301b39423806cb4a5087c0925116c336854f
-rw-r--r-- | bench/BenchUtils.h | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/bench/BenchUtils.h b/bench/BenchUtils.h index 2b63068..64dafd3 100644 --- a/bench/BenchUtils.h +++ b/bench/BenchUtils.h @@ -15,6 +15,7 @@ #include <omp.h> #endif #include "./AlignedVec.h" +#include "fbgemm/FbgemmBuild.h" namespace fbgemm { @@ -31,16 +32,27 @@ int fbgemm_get_num_threads(); int fbgemm_get_thread_num(); template <typename T> -void cache_evict(const T& vec) { +NOINLINE +float cache_evict(const T& vec) { auto const size = vec.size(); auto const elemSize = sizeof(typename T::value_type); auto const dataSize = size * elemSize; const char* data = reinterpret_cast<const char*>(vec.data()); constexpr int CACHE_LINE_SIZE = 64; + // Not having this dummy computation significantly slows down the computation + // that follows. + float dummy = 0.0f; for (std::size_t i = 0; i < dataSize; i += CACHE_LINE_SIZE) { + dummy += data[i] * 1.0f; + _mm_mfence(); +#ifndef _MSC_VER + asm volatile("" ::: "memory"); +#endif _mm_clflush(&data[i]); } + + return dummy; } /** |