diff options
author | Young Jin Kim <youki@microsoft.com> | 2020-05-11 23:43:46 +0300 |
---|---|---|
committer | Young Jin Kim <youki@microsoft.com> | 2020-05-11 23:43:46 +0300 |
commit | 182cbfdd5276af42490ad7d8db93f71ea9086b8a (patch) | |
tree | bf051178c718d11f78eef114fb839e2e968eeb45 | |
parent | 84e66a976046180187724aff60a236c5378fde7c (diff) |
Make jit code generation static thread-local
-rw-r--r-- | src/CodeCache.h | 67 | ||||
-rw-r--r-- | src/GenerateKernel.h | 10 | ||||
-rw-r--r-- | src/GenerateKernelU8S8S32ACC16.cc | 2 | ||||
-rw-r--r-- | src/GenerateKernelU8S8S32ACC16Avx512.cc | 2 | ||||
-rw-r--r-- | src/GenerateKernelU8S8S32ACC32.cc | 2 | ||||
-rw-r--r-- | src/GenerateKernelU8S8S32ACC32Avx512.cc | 2 | ||||
-rw-r--r-- | src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc | 2 |
7 files changed, 64 insertions, 23 deletions
diff --git a/src/CodeCache.h b/src/CodeCache.h index 08e9c9b..f1aa003 100644 --- a/src/CodeCache.h +++ b/src/CodeCache.h @@ -8,7 +8,18 @@ #include <condition_variable> #include <future> #include <map> -#include <mutex> + +//#if __cplusplus >= 201402L && !defined(__APPLE__) +//// For C++14, use shared_timed_mutex. +//// some macOS C++14 compilers don't support shared_timed_mutex. +//#define FBGEMM_USE_SHARED_TIMED_MUTEX +//#endif + +//#ifdef FBGEMM_USE_SHARED_TIMED_MUTEX +//#include <shared_mutex> +//#else +//#include <mutex> +//#endif namespace fbgemm { @@ -17,32 +28,59 @@ namespace fbgemm { * @tparam Key Type of unique key (typically a tuple) * @tparam Value Type of the microkernel function (Typically a function pointer) */ -template <typename KEY, typename VALUE> class CodeCache { -private: - std::map<KEY, std::shared_future<VALUE>> values_; - std::mutex mutex_; +template <typename KEY, typename VALUE> +class CodeCache { + private: + static thread_local std::map<KEY, std::shared_future<VALUE>> values_; +//#ifdef FBGEMM_USE_SHARED_TIMED_MUTEX +// std::shared_timed_mutex mutex_; +//#else +// std::mutex mutex_; +//#endif -public: - CodeCache(const CodeCache &) = delete; - CodeCache &operator=(const CodeCache &) = delete; + public: + CodeCache(const CodeCache&) = delete; + CodeCache& operator=(const CodeCache&) = delete; CodeCache(){}; - VALUE getOrCreate(const KEY &key, std::function<VALUE()> generatorFunction) { + VALUE getOrCreate(const KEY& key, std::function<VALUE()> generatorFunction) { std::shared_future<VALUE> returnFuture; std::promise<VALUE> returnPromise; bool needsToGenerate = false; // Check for existance of the key { - std::unique_lock<std::mutex> lock(mutex_); +//#ifdef FBGEMM_USE_SHARED_TIMED_MUTEX +// //mutex_.lock_shared(); +//#else +// std::unique_lock<std::mutex> lock(mutex_); +//#endif auto it = values_.find(key); if (it != values_.end()) { returnFuture = it->second; +//#ifdef FBGEMM_USE_SHARED_TIMED_MUTEX +// //mutex_.unlock_shared(); +//#endif } else { - values_[key] = returnFuture = returnPromise.get_future().share(); - needsToGenerate = true; +//#ifdef FBGEMM_USE_SHARED_TIMED_MUTEX + //mutex_.unlock_shared(); + + //mutex_.lock(); + // Need to look up again because there could be race condition from + // the time gap between mutex_.unlock_shared() and mutex_.lock() + //it = values_.find(key); + //if (it == values_.end()) { +//#endif + values_[key] = returnFuture = returnPromise.get_future().share(); + needsToGenerate = true; +//#ifdef FBGEMM_USE_SHARED_TIMED_MUTEX + //} else { + // returnFuture = it->second; + //} + //mutex_.unlock(); +//#endif } } @@ -56,4 +94,7 @@ public: } }; -} // namespace fbgemm +template <typename KEY, typename VALUE> +thread_local std::map<KEY, std::shared_future<VALUE>> CodeCache<KEY, VALUE>::values_; + +} // namespace fbgemm
\ No newline at end of file diff --git a/src/GenerateKernel.h b/src/GenerateKernel.h index c0fece4..da38337 100644 --- a/src/GenerateKernel.h +++ b/src/GenerateKernel.h @@ -167,19 +167,19 @@ class CodeGenBase { return rt; } - static std::mutex rtMutex_; ///< Controll access to runtime; + //static std::mutex rtMutex_; ///< Controll access to runtime; // The hash depends on accumulate, mc, nc, ncb, kcb, nr, mr, nr_min - static CodeCache<std::tuple<bool, int, int, int, int, int, int, int>, + static thread_local CodeCache<std::tuple<bool, int, int, int, int, int, int, int>, jit_micro_kernel_fp> codeCache_; ///< JIT Code Cache for reuse. }; -template <typename TA, typename TB, typename TC, typename accT> -std::mutex CodeGenBase<TA, TB, TC, accT>::rtMutex_; +//template <typename TA, typename TB, typename TC, typename accT> +//std::mutex CodeGenBase<TA, TB, TC, accT>::rtMutex_; template <typename TA, typename TB, typename TC, typename accT> -CodeCache<std::tuple<bool, int, int, int, int, int, int, int>, +thread_local CodeCache<std::tuple<bool, int, int, int, int, int, int, int>, typename CodeGenBase<TA, TB, TC, accT>::jit_micro_kernel_fp> CodeGenBase<TA, TB, TC, accT>::codeCache_; diff --git a/src/GenerateKernelU8S8S32ACC16.cc b/src/GenerateKernelU8S8S32ACC16.cc index 205af14..1dd581f 100644 --- a/src/GenerateKernelU8S8S32ACC16.cc +++ b/src/GenerateKernelU8S8S32ACC16.cc @@ -342,7 +342,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx2>( jit_micro_kernel_fp fn; asmjit::Error err; { - std::unique_lock<std::mutex> lock(rtMutex_); + //std::unique_lock<std::mutex> lock(rtMutex_); err = runtime().add(&fn, &code); } if (err) { diff --git a/src/GenerateKernelU8S8S32ACC16Avx512.cc b/src/GenerateKernelU8S8S32ACC16Avx512.cc index 819f33b..8442365 100644 --- a/src/GenerateKernelU8S8S32ACC16Avx512.cc +++ b/src/GenerateKernelU8S8S32ACC16Avx512.cc @@ -410,7 +410,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx512>( jit_micro_kernel_fp fn; asmjit::Error err; { - std::unique_lock<std::mutex> lock(rtMutex_); + //std::unique_lock<std::mutex> lock(rtMutex_); err = runtime().add(&fn, &code); } if (err) { diff --git a/src/GenerateKernelU8S8S32ACC32.cc b/src/GenerateKernelU8S8S32ACC32.cc index dc9c534..b052968 100644 --- a/src/GenerateKernelU8S8S32ACC32.cc +++ b/src/GenerateKernelU8S8S32ACC32.cc @@ -330,7 +330,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx2>( jit_micro_kernel_fp fn; asmjit::Error err; { - std::unique_lock<std::mutex> lock(rtMutex_); + //std::unique_lock<std::mutex> lock(rtMutex_); err = runtime().add(&fn, &code); } if (err) { diff --git a/src/GenerateKernelU8S8S32ACC32Avx512.cc b/src/GenerateKernelU8S8S32ACC32Avx512.cc index 5037292..bf82bb0 100644 --- a/src/GenerateKernelU8S8S32ACC32Avx512.cc +++ b/src/GenerateKernelU8S8S32ACC32Avx512.cc @@ -422,7 +422,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx512>( jit_micro_kernel_fp fn; asmjit::Error err; { - std::unique_lock<std::mutex> lock(rtMutex_); + //std::unique_lock<std::mutex> lock(rtMutex_); err = runtime().add(&fn, &code); } if (err) { diff --git a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc index bd8be1f..816d35b 100644 --- a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc +++ b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc @@ -415,7 +415,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate< jit_micro_kernel_fp fn; asmjit::Error err; { - std::unique_lock<std::mutex> lock(rtMutex_); + //std::unique_lock<std::mutex> lock(rtMutex_); err = runtime().add(&fn, &code); } if (err) { |