Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYoung Jin Kim <youki@microsoft.com>2020-05-11 23:43:46 +0300
committerYoung Jin Kim <youki@microsoft.com>2020-05-11 23:43:46 +0300
commit182cbfdd5276af42490ad7d8db93f71ea9086b8a (patch)
treebf051178c718d11f78eef114fb839e2e968eeb45
parent84e66a976046180187724aff60a236c5378fde7c (diff)
Make jit code generation static thread-local
-rw-r--r--src/CodeCache.h67
-rw-r--r--src/GenerateKernel.h10
-rw-r--r--src/GenerateKernelU8S8S32ACC16.cc2
-rw-r--r--src/GenerateKernelU8S8S32ACC16Avx512.cc2
-rw-r--r--src/GenerateKernelU8S8S32ACC32.cc2
-rw-r--r--src/GenerateKernelU8S8S32ACC32Avx512.cc2
-rw-r--r--src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc2
7 files changed, 64 insertions, 23 deletions
diff --git a/src/CodeCache.h b/src/CodeCache.h
index 08e9c9b..f1aa003 100644
--- a/src/CodeCache.h
+++ b/src/CodeCache.h
@@ -8,7 +8,18 @@
#include <condition_variable>
#include <future>
#include <map>
-#include <mutex>
+
+//#if __cplusplus >= 201402L && !defined(__APPLE__)
+//// For C++14, use shared_timed_mutex.
+//// some macOS C++14 compilers don't support shared_timed_mutex.
+//#define FBGEMM_USE_SHARED_TIMED_MUTEX
+//#endif
+
+//#ifdef FBGEMM_USE_SHARED_TIMED_MUTEX
+//#include <shared_mutex>
+//#else
+//#include <mutex>
+//#endif
namespace fbgemm {
@@ -17,32 +28,59 @@ namespace fbgemm {
* @tparam Key Type of unique key (typically a tuple)
* @tparam Value Type of the microkernel function (Typically a function pointer)
*/
-template <typename KEY, typename VALUE> class CodeCache {
-private:
- std::map<KEY, std::shared_future<VALUE>> values_;
- std::mutex mutex_;
+template <typename KEY, typename VALUE>
+class CodeCache {
+ private:
+ static thread_local std::map<KEY, std::shared_future<VALUE>> values_;
+//#ifdef FBGEMM_USE_SHARED_TIMED_MUTEX
+// std::shared_timed_mutex mutex_;
+//#else
+// std::mutex mutex_;
+//#endif
-public:
- CodeCache(const CodeCache &) = delete;
- CodeCache &operator=(const CodeCache &) = delete;
+ public:
+ CodeCache(const CodeCache&) = delete;
+ CodeCache& operator=(const CodeCache&) = delete;
CodeCache(){};
- VALUE getOrCreate(const KEY &key, std::function<VALUE()> generatorFunction) {
+ VALUE getOrCreate(const KEY& key, std::function<VALUE()> generatorFunction) {
std::shared_future<VALUE> returnFuture;
std::promise<VALUE> returnPromise;
bool needsToGenerate = false;
// Check for existance of the key
{
- std::unique_lock<std::mutex> lock(mutex_);
+//#ifdef FBGEMM_USE_SHARED_TIMED_MUTEX
+// //mutex_.lock_shared();
+//#else
+// std::unique_lock<std::mutex> lock(mutex_);
+//#endif
auto it = values_.find(key);
if (it != values_.end()) {
returnFuture = it->second;
+//#ifdef FBGEMM_USE_SHARED_TIMED_MUTEX
+// //mutex_.unlock_shared();
+//#endif
} else {
- values_[key] = returnFuture = returnPromise.get_future().share();
- needsToGenerate = true;
+//#ifdef FBGEMM_USE_SHARED_TIMED_MUTEX
+ //mutex_.unlock_shared();
+
+ //mutex_.lock();
+ // Need to look up again because there could be race condition from
+ // the time gap between mutex_.unlock_shared() and mutex_.lock()
+ //it = values_.find(key);
+ //if (it == values_.end()) {
+//#endif
+ values_[key] = returnFuture = returnPromise.get_future().share();
+ needsToGenerate = true;
+//#ifdef FBGEMM_USE_SHARED_TIMED_MUTEX
+ //} else {
+ // returnFuture = it->second;
+ //}
+ //mutex_.unlock();
+//#endif
}
}
@@ -56,4 +94,7 @@ public:
}
};
-} // namespace fbgemm
+template <typename KEY, typename VALUE>
+thread_local std::map<KEY, std::shared_future<VALUE>> CodeCache<KEY, VALUE>::values_;
+
+} // namespace fbgemm \ No newline at end of file
diff --git a/src/GenerateKernel.h b/src/GenerateKernel.h
index c0fece4..da38337 100644
--- a/src/GenerateKernel.h
+++ b/src/GenerateKernel.h
@@ -167,19 +167,19 @@ class CodeGenBase {
return rt;
}
- static std::mutex rtMutex_; ///< Controll access to runtime;
+ //static std::mutex rtMutex_; ///< Controll access to runtime;
// The hash depends on accumulate, mc, nc, ncb, kcb, nr, mr, nr_min
- static CodeCache<std::tuple<bool, int, int, int, int, int, int, int>,
+ static thread_local CodeCache<std::tuple<bool, int, int, int, int, int, int, int>,
jit_micro_kernel_fp>
codeCache_; ///< JIT Code Cache for reuse.
};
-template <typename TA, typename TB, typename TC, typename accT>
-std::mutex CodeGenBase<TA, TB, TC, accT>::rtMutex_;
+//template <typename TA, typename TB, typename TC, typename accT>
+//std::mutex CodeGenBase<TA, TB, TC, accT>::rtMutex_;
template <typename TA, typename TB, typename TC, typename accT>
-CodeCache<std::tuple<bool, int, int, int, int, int, int, int>,
+thread_local CodeCache<std::tuple<bool, int, int, int, int, int, int, int>,
typename CodeGenBase<TA, TB, TC, accT>::jit_micro_kernel_fp>
CodeGenBase<TA, TB, TC, accT>::codeCache_;
diff --git a/src/GenerateKernelU8S8S32ACC16.cc b/src/GenerateKernelU8S8S32ACC16.cc
index 205af14..1dd581f 100644
--- a/src/GenerateKernelU8S8S32ACC16.cc
+++ b/src/GenerateKernelU8S8S32ACC16.cc
@@ -342,7 +342,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx2>(
jit_micro_kernel_fp fn;
asmjit::Error err;
{
- std::unique_lock<std::mutex> lock(rtMutex_);
+ //std::unique_lock<std::mutex> lock(rtMutex_);
err = runtime().add(&fn, &code);
}
if (err) {
diff --git a/src/GenerateKernelU8S8S32ACC16Avx512.cc b/src/GenerateKernelU8S8S32ACC16Avx512.cc
index 819f33b..8442365 100644
--- a/src/GenerateKernelU8S8S32ACC16Avx512.cc
+++ b/src/GenerateKernelU8S8S32ACC16Avx512.cc
@@ -410,7 +410,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx512>(
jit_micro_kernel_fp fn;
asmjit::Error err;
{
- std::unique_lock<std::mutex> lock(rtMutex_);
+ //std::unique_lock<std::mutex> lock(rtMutex_);
err = runtime().add(&fn, &code);
}
if (err) {
diff --git a/src/GenerateKernelU8S8S32ACC32.cc b/src/GenerateKernelU8S8S32ACC32.cc
index dc9c534..b052968 100644
--- a/src/GenerateKernelU8S8S32ACC32.cc
+++ b/src/GenerateKernelU8S8S32ACC32.cc
@@ -330,7 +330,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx2>(
jit_micro_kernel_fp fn;
asmjit::Error err;
{
- std::unique_lock<std::mutex> lock(rtMutex_);
+ //std::unique_lock<std::mutex> lock(rtMutex_);
err = runtime().add(&fn, &code);
}
if (err) {
diff --git a/src/GenerateKernelU8S8S32ACC32Avx512.cc b/src/GenerateKernelU8S8S32ACC32Avx512.cc
index 5037292..bf82bb0 100644
--- a/src/GenerateKernelU8S8S32ACC32Avx512.cc
+++ b/src/GenerateKernelU8S8S32ACC32Avx512.cc
@@ -422,7 +422,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx512>(
jit_micro_kernel_fp fn;
asmjit::Error err;
{
- std::unique_lock<std::mutex> lock(rtMutex_);
+ //std::unique_lock<std::mutex> lock(rtMutex_);
err = runtime().add(&fn, &code);
}
if (err) {
diff --git a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
index bd8be1f..816d35b 100644
--- a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
+++ b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
@@ -415,7 +415,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<
jit_micro_kernel_fp fn;
asmjit::Error err;
{
- std::unique_lock<std::mutex> lock(rtMutex_);
+ //std::unique_lock<std::mutex> lock(rtMutex_);
err = runtime().add(&fn, &code);
}
if (err) {