Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/FBGEMM.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYoung Jin Kim <youki@microsoft.com>2020-05-14 10:41:36 +0300
committerYoung Jin Kim <youki@microsoft.com>2020-05-14 10:41:36 +0300
commitf8274286ea12f71c455b75efe301df2f2086790d (patch)
tree8f8d19dd8b19ea9aaa829a7ebbe1d72a8ecfd4dc
parenteb2317d3f32b617031681be55b43883c23611a35 (diff)
Change code cache to the unordered_map
-rw-r--r--src/CodeCache.h16
-rw-r--r--src/GenerateKernel.h4
-rw-r--r--src/GenerateKernelU8S8S32ACC16.cc32
-rw-r--r--src/GenerateKernelU8S8S32ACC16Avx512.cc32
-rw-r--r--src/GenerateKernelU8S8S32ACC32.cc32
-rw-r--r--src/GenerateKernelU8S8S32ACC32Avx512.cc32
-rw-r--r--src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc32
-rw-r--r--src/GroupwiseConv.h8
-rw-r--r--src/GroupwiseConvAcc32Avx2.cc14
9 files changed, 130 insertions, 72 deletions
diff --git a/src/CodeCache.h b/src/CodeCache.h
index 8eb28d8..023ad4b 100644
--- a/src/CodeCache.h
+++ b/src/CodeCache.h
@@ -7,7 +7,7 @@
#pragma once
#include <condition_variable>
#include <future>
-#include <map>
+#include <unordered_map>
#if __cplusplus >= 201402L && !defined(__APPLE__)
// For C++14, use shared_timed_mutex.
@@ -23,6 +23,18 @@
namespace fbgemm {
+template <class T> using hash = std::hash<T>;
+
+// This combinator is based on boost::hash_combine, but uses
+// std::hash as the hash implementation. Used as a drop-in
+// replacement for boost::hash_combine.
+
+template <class T>
+inline void hash_combine(std::size_t& seed, T const& v) {
+ hash<T> hasher;
+ seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+}
+
/**
* @brief Thread safe cache for microkernels, ensures single creation per key.
* @tparam Key Type of unique key (typically a tuple)
@@ -31,7 +43,7 @@ namespace fbgemm {
template <typename KEY, typename VALUE>
class CodeCache {
private:
- std::map<KEY, std::shared_future<VALUE>> values_;
+ std::unordered_map<KEY, std::shared_future<VALUE>> values_;
#ifdef FBGEMM_USE_SHARED_TIMED_MUTEX
std::shared_timed_mutex mutex_;
#else
diff --git a/src/GenerateKernel.h b/src/GenerateKernel.h
index c0fece4..b5a4d4f 100644
--- a/src/GenerateKernel.h
+++ b/src/GenerateKernel.h
@@ -170,7 +170,7 @@ class CodeGenBase {
static std::mutex rtMutex_; ///< Controll access to runtime;
// The hash depends on accumulate, mc, nc, ncb, kcb, nr, mr, nr_min
- static CodeCache<std::tuple<bool, int, int, int, int, int, int, int>,
+ static CodeCache<size_t, //std::tuple<bool, int, int, int, int, int, int, int>,
jit_micro_kernel_fp>
codeCache_; ///< JIT Code Cache for reuse.
};
@@ -179,7 +179,7 @@ template <typename TA, typename TB, typename TC, typename accT>
std::mutex CodeGenBase<TA, TB, TC, accT>::rtMutex_;
template <typename TA, typename TB, typename TC, typename accT>
-CodeCache<std::tuple<bool, int, int, int, int, int, int, int>,
+CodeCache<size_t, //std::tuple<bool, int, int, int, int, int, int, int>,
typename CodeGenBase<TA, TB, TC, accT>::jit_micro_kernel_fp>
CodeGenBase<TA, TB, TC, accT>::codeCache_;
diff --git a/src/GenerateKernelU8S8S32ACC16.cc b/src/GenerateKernelU8S8S32ACC16.cc
index 205af14..ff23828 100644
--- a/src/GenerateKernelU8S8S32ACC16.cc
+++ b/src/GenerateKernelU8S8S32ACC16.cc
@@ -132,7 +132,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx2>(
int32_t nc,
int32_t kc,
int32_t /* unused */) {
- std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
+ //std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
int kBlock;
int nBlock;
int mRegBlockSize;
@@ -158,17 +158,25 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx2>(
PackingTraits<uint8_t, int16_t, inst_set_t::avx2>::ROW_INTERLEAVE;
}
- kernelSig = std::make_tuple(
- accum,
- mc,
- nc,
- nBlock,
- kBlock,
- mRegBlockSize,
- nRegBlockSize,
- nRegBlockSizeMin);
-
- return codeCache_.getOrCreate(kernelSig, [&]() -> jit_micro_kernel_fp {
+ //kernelSig = std::make_tuple(
+ // accum,
+ // mc,
+ // nc,
+ // nBlock,
+ // kBlock,
+ // mRegBlockSize,
+ // nRegBlockSize,
+ // nRegBlockSizeMin);
+ size_t hashVal = hash<bool>()(accum);
+ hash_combine(hashVal, mc);
+ hash_combine(hashVal, nc);
+ hash_combine(hashVal, nBlock);
+ hash_combine(hashVal, kBlock);
+ hash_combine(hashVal, mRegBlockSize);
+ hash_combine(hashVal, nRegBlockSize);
+ hash_combine(hashVal, nRegBlockSizeMin);
+
+ return codeCache_.getOrCreate(hashVal, [&]() -> jit_micro_kernel_fp {
asmjit::CodeHolder code;
code.init(runtime().codeInfo());
x86::Assembler assembler(&code);
diff --git a/src/GenerateKernelU8S8S32ACC16Avx512.cc b/src/GenerateKernelU8S8S32ACC16Avx512.cc
index 819f33b..3cac600 100644
--- a/src/GenerateKernelU8S8S32ACC16Avx512.cc
+++ b/src/GenerateKernelU8S8S32ACC16Avx512.cc
@@ -139,7 +139,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx512>(
int32_t nc,
int32_t kc,
int32_t /* unused */) {
- std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
+ //std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
int kBlock;
int nBlock;
int mRegBlockSize;
@@ -165,17 +165,25 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx512>(
PackingTraits<uint8_t, int16_t, inst_set_t::avx512>::ROW_INTERLEAVE;
}
- kernelSig = std::make_tuple(
- accum,
- mc,
- nc,
- nBlock,
- kBlock,
- mRegBlockSize,
- nRegBlockSize,
- nRegBlockSizeMin);
-
- return codeCache_.getOrCreate(kernelSig, [&]() -> jit_micro_kernel_fp {
+ //kernelSig = std::make_tuple(
+ // accum,
+ // mc,
+ // nc,
+ // nBlock,
+ // kBlock,
+ // mRegBlockSize,
+ // nRegBlockSize,
+ // nRegBlockSizeMin);
+ size_t hashVal = hash<bool>()(accum);
+ hash_combine(hashVal, mc);
+ hash_combine(hashVal, nc);
+ hash_combine(hashVal, nBlock);
+ hash_combine(hashVal, kBlock);
+ hash_combine(hashVal, mRegBlockSize);
+ hash_combine(hashVal, nRegBlockSize);
+ hash_combine(hashVal, nRegBlockSizeMin);
+
+ return codeCache_.getOrCreate(hashVal, [&]() -> jit_micro_kernel_fp {
asmjit::CodeHolder code;
code.init(runtime().codeInfo());
x86::Assembler assembler(&code);
diff --git a/src/GenerateKernelU8S8S32ACC32.cc b/src/GenerateKernelU8S8S32ACC32.cc
index dc9c534..5fca144 100644
--- a/src/GenerateKernelU8S8S32ACC32.cc
+++ b/src/GenerateKernelU8S8S32ACC32.cc
@@ -137,7 +137,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx2>(
int32_t nc,
int32_t kc,
int32_t /* unused */) {
- std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
+ //std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
int kBlock;
int nBlock;
int mRegBlockSize;
@@ -163,17 +163,25 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx2>(
PackingTraits<uint8_t, int32_t, inst_set_t::avx2>::ROW_INTERLEAVE;
}
- kernelSig = std::make_tuple(
- accum,
- mc,
- nc,
- nBlock,
- kBlock,
- mRegBlockSize,
- nRegBlockSize,
- nRegBlockSizeMin);
-
- return codeCache_.getOrCreate(kernelSig, [&]() -> jit_micro_kernel_fp {
+ //kernelSig = std::make_tuple(
+ // accum,
+ // mc,
+ // nc,
+ // nBlock,
+ // kBlock,
+ // mRegBlockSize,
+ // nRegBlockSize,
+ // nRegBlockSizeMin);
+ size_t hashVal = hash<bool>()(accum);
+ hash_combine(hashVal, mc);
+ hash_combine(hashVal, nc);
+ hash_combine(hashVal, nBlock);
+ hash_combine(hashVal, kBlock);
+ hash_combine(hashVal, mRegBlockSize);
+ hash_combine(hashVal, nRegBlockSize);
+ hash_combine(hashVal, nRegBlockSizeMin);
+
+ return codeCache_.getOrCreate(hashVal, [&]() -> jit_micro_kernel_fp {
asmjit::CodeHolder code;
code.init(runtime().codeInfo());
x86::Assembler assembler(&code);
diff --git a/src/GenerateKernelU8S8S32ACC32Avx512.cc b/src/GenerateKernelU8S8S32ACC32Avx512.cc
index 5037292..fc83858 100644
--- a/src/GenerateKernelU8S8S32ACC32Avx512.cc
+++ b/src/GenerateKernelU8S8S32ACC32Avx512.cc
@@ -138,7 +138,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx512>(
int32_t nc,
int32_t kc,
int32_t /* unused */) {
- std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
+ //std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
int kBlock;
int nBlock;
int mRegBlockSize;
@@ -164,17 +164,25 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx512>(
PackingTraits<uint8_t, int32_t, inst_set_t::avx512>::ROW_INTERLEAVE;
}
- kernelSig = std::make_tuple(
- accum,
- mc,
- nc,
- nBlock,
- kBlock,
- mRegBlockSize,
- nRegBlockSize,
- nRegBlockSizeMin);
-
- return codeCache_.getOrCreate(kernelSig, [&]() -> jit_micro_kernel_fp {
+ //kernelSig = std::make_tuple(
+ // accum,
+ // mc,
+ // nc,
+ // nBlock,
+ // kBlock,
+ // mRegBlockSize,
+ // nRegBlockSize,
+ // nRegBlockSizeMin);
+ size_t hashVal = hash<bool>()(accum);
+ hash_combine(hashVal, mc);
+ hash_combine(hashVal, nc);
+ hash_combine(hashVal, nBlock);
+ hash_combine(hashVal, kBlock);
+ hash_combine(hashVal, mRegBlockSize);
+ hash_combine(hashVal, nRegBlockSize);
+ hash_combine(hashVal, nRegBlockSizeMin);
+
+ return codeCache_.getOrCreate(hashVal, [&]() -> jit_micro_kernel_fp {
asmjit::CodeHolder code;
code.init(runtime().codeInfo());
x86::Assembler assembler(&code);
diff --git a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
index bd8be1f..5dc0d36 100644
--- a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
+++ b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
@@ -129,7 +129,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<
int32_t nc,
int32_t kc,
int32_t /* unused */) {
- std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
+ //std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
int kBlock;
int nBlock;
int mRegBlockSize;
@@ -157,17 +157,25 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<
ROW_INTERLEAVE;
}
- kernelSig = std::make_tuple(
- accum,
- mc,
- nc,
- nBlock,
- kBlock,
- mRegBlockSize,
- nRegBlockSize,
- nRegBlockSizeMin);
-
- return codeCache_.getOrCreate(kernelSig, [&]() -> jit_micro_kernel_fp {
+ //kernelSig = std::make_tuple(
+ // accum,
+ // mc,
+ // nc,
+ // nBlock,
+ // kBlock,
+ // mRegBlockSize,
+ // nRegBlockSize,
+ // nRegBlockSizeMin);
+ size_t hashVal = hash<bool>()(accum);
+ hash_combine(hashVal, mc);
+ hash_combine(hashVal, nc);
+ hash_combine(hashVal, nBlock);
+ hash_combine(hashVal, kBlock);
+ hash_combine(hashVal, mRegBlockSize);
+ hash_combine(hashVal, nRegBlockSize);
+ hash_combine(hashVal, nRegBlockSizeMin);
+
+ return codeCache_.getOrCreate(hashVal, [&]() -> jit_micro_kernel_fp {
asmjit::CodeHolder code;
code.init(runtime().codeInfo());
x86::Assembler assembler(&code);
diff --git a/src/GroupwiseConv.h b/src/GroupwiseConv.h
index 58ee24d..8bb9beb 100644
--- a/src/GroupwiseConv.h
+++ b/src/GroupwiseConv.h
@@ -230,9 +230,9 @@ class GenConvKernel {
static std::mutex rtMutex_; ///< Controll access to runtime;
- static CodeCache<std::tuple<bool, int, int, int>, jit_conv_kernel_fp>
+ static CodeCache<size_t /*std::tuple<bool, int, int, int>*/, jit_conv_kernel_fp>
codeCache_; ///< JIT Code Cache for reuse.
- static CodeCache<std::tuple<bool, int, int, int>, jit_rowoffset_kernel_fp>
+ static CodeCache<size_t /*std::tuple<bool, int, int, int>*/, jit_rowoffset_kernel_fp>
codeCacheRowOffset_; ///< JIT Code Cache for row offset kernel.
private:
@@ -285,11 +285,11 @@ template <int SPATIAL_DIM, typename accT>
std::mutex GenConvKernel<SPATIAL_DIM, accT>::rtMutex_;
template <int SPATIAL_DIM, typename accT>
-CodeCache<std::tuple<bool, int, int, int>, jit_conv_kernel_fp>
+CodeCache<size_t /*std::tuple<bool, int, int, int>*/, jit_conv_kernel_fp>
GenConvKernel<SPATIAL_DIM, accT>::codeCache_;
template <int SPATIAL_DIM, typename accT>
-CodeCache<std::tuple<bool, int, int, int>, jit_rowoffset_kernel_fp>
+CodeCache<size_t /*std::tuple<bool, int, int, int>*/, jit_rowoffset_kernel_fp>
GenConvKernel<SPATIAL_DIM, accT>::codeCacheRowOffset_;
} // namespace fbgemm
diff --git a/src/GroupwiseConvAcc32Avx2.cc b/src/GroupwiseConvAcc32Avx2.cc
index 396e792..7ff29ea 100644
--- a/src/GroupwiseConvAcc32Avx2.cc
+++ b/src/GroupwiseConvAcc32Avx2.cc
@@ -60,14 +60,20 @@ void calculateRowOffsets(
}
template <int SPATIAL_DIM = 2>
-tuple<bool, int, int, int> getKernelSig(
+size_t/*tuple<bool, int, int, int>*/ getKernelSig(
const conv_param_t<SPATIAL_DIM>& conv_param,
bool isAZeroPointZero) {
int C_per_G = conv_param.IC / conv_param.G;
int K_per_G = conv_param.OC / conv_param.G;
- auto kernelSig =
- std::make_tuple(isAZeroPointZero, conv_param.G, C_per_G, K_per_G);
- return kernelSig;
+ //auto kernelSig =
+ // std::make_tuple(isAZeroPointZero, conv_param.G, C_per_G, K_per_G);
+
+ size_t hashVal = hash<bool>()(isAZeroPointZero);
+ hash_combine(hashVal, conv_param.G);
+ hash_combine(hashVal, C_per_G);
+ hash_combine(hashVal, K_per_G);
+
+ return hashVal;
}
template <int SPATIAL_DIM = 2, typename accT = int32_t>