diff options
author | Chao Mei <chaomei@google.com> | 2021-04-06 05:07:24 +0300 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2021-04-06 05:07:45 +0300 |
commit | 38a9266b832767a3f535a74a9e0cf39f7892e594 (patch) | |
tree | 1cb63d0a0092cc974d3ff87246037f3e2a45bf57 /ruy/trmul.cc | |
parent | 939449243eb36e5b668cc00a1c936f2b1ad4dc27 (diff) |
Create a utility library to suppress floating-point denormals, and apply it to every task execution of every thread.
PiperOrigin-RevId: 366919663
Diffstat (limited to 'ruy/trmul.cc')
-rw-r--r-- | ruy/trmul.cc | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/ruy/trmul.cc b/ruy/trmul.cc index 9345f0c..602660b 100644 --- a/ruy/trmul.cc +++ b/ruy/trmul.cc @@ -30,6 +30,7 @@ limitations under the License. #include "ruy/cpu_cache_params.h" #include "ruy/cpuinfo.h" #include "ruy/ctx.h" +#include "ruy/denormal.h" #include "ruy/mat.h" #include "ruy/matrix.h" #include "ruy/mul_params.h" @@ -307,6 +308,12 @@ void TrMul(Ctx* ctx, TrMulParams* params) { GetTentativeThreadCount(ctx, rows, cols, depth); const auto& cpu_cache_params = ctx->mutable_cpuinfo()->CacheParams(); + // Suppress denormals to avoid computation inefficiency. + // Note this only handles the denormal suppression on the main thread. As for + // worker threads, the suppression is handled in each thread's main loop. See + // the corresponding code in thread_pool.cc for details. + ScopedSuppressDenormals suppress_denormals; + // Case of running this TrMul as a simple loop. // This is a good place to start reading this function: all the rest // of this function is just an optimized, but functionally equivalent, |