diff options
author | Young Jin Kim <youki@microsoft.com> | 2020-05-14 10:41:36 +0300 |
---|---|---|
committer | Young Jin Kim <youki@microsoft.com> | 2020-05-14 10:41:36 +0300 |
commit | f8274286ea12f71c455b75efe301df2f2086790d (patch) | |
tree | 8f8d19dd8b19ea9aaa829a7ebbe1d72a8ecfd4dc | |
parent | eb2317d3f32b617031681be55b43883c23611a35 (diff) |
Change code cache to the unordered_map
-rw-r--r-- | src/CodeCache.h | 16 | ||||
-rw-r--r-- | src/GenerateKernel.h | 4 | ||||
-rw-r--r-- | src/GenerateKernelU8S8S32ACC16.cc | 32 | ||||
-rw-r--r-- | src/GenerateKernelU8S8S32ACC16Avx512.cc | 32 | ||||
-rw-r--r-- | src/GenerateKernelU8S8S32ACC32.cc | 32 | ||||
-rw-r--r-- | src/GenerateKernelU8S8S32ACC32Avx512.cc | 32 | ||||
-rw-r--r-- | src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc | 32 | ||||
-rw-r--r-- | src/GroupwiseConv.h | 8 | ||||
-rw-r--r-- | src/GroupwiseConvAcc32Avx2.cc | 14 |
9 files changed, 130 insertions, 72 deletions
diff --git a/src/CodeCache.h b/src/CodeCache.h index 8eb28d8..023ad4b 100644 --- a/src/CodeCache.h +++ b/src/CodeCache.h @@ -7,7 +7,7 @@ #pragma once #include <condition_variable> #include <future> -#include <map> +#include <unordered_map> #if __cplusplus >= 201402L && !defined(__APPLE__) // For C++14, use shared_timed_mutex. @@ -23,6 +23,18 @@ namespace fbgemm { +template <class T> using hash = std::hash<T>; + +// This combinator is based on boost::hash_combine, but uses +// std::hash as the hash implementation. Used as a drop-in +// replacement for boost::hash_combine. + +template <class T> +inline void hash_combine(std::size_t& seed, T const& v) { + hash<T> hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + /** * @brief Thread safe cache for microkernels, ensures single creation per key. * @tparam Key Type of unique key (typically a tuple) @@ -31,7 +43,7 @@ namespace fbgemm { template <typename KEY, typename VALUE> class CodeCache { private: - std::map<KEY, std::shared_future<VALUE>> values_; + std::unordered_map<KEY, std::shared_future<VALUE>> values_; #ifdef FBGEMM_USE_SHARED_TIMED_MUTEX std::shared_timed_mutex mutex_; #else diff --git a/src/GenerateKernel.h b/src/GenerateKernel.h index c0fece4..b5a4d4f 100644 --- a/src/GenerateKernel.h +++ b/src/GenerateKernel.h @@ -170,7 +170,7 @@ class CodeGenBase { static std::mutex rtMutex_; ///< Controll access to runtime; // The hash depends on accumulate, mc, nc, ncb, kcb, nr, mr, nr_min - static CodeCache<std::tuple<bool, int, int, int, int, int, int, int>, + static CodeCache<size_t, //std::tuple<bool, int, int, int, int, int, int, int>, jit_micro_kernel_fp> codeCache_; ///< JIT Code Cache for reuse. }; @@ -179,7 +179,7 @@ template <typename TA, typename TB, typename TC, typename accT> std::mutex CodeGenBase<TA, TB, TC, accT>::rtMutex_; template <typename TA, typename TB, typename TC, typename accT> -CodeCache<std::tuple<bool, int, int, int, int, int, int, int>, +CodeCache<size_t, //std::tuple<bool, int, int, int, int, int, int, int>, typename CodeGenBase<TA, TB, TC, accT>::jit_micro_kernel_fp> CodeGenBase<TA, TB, TC, accT>::codeCache_; diff --git a/src/GenerateKernelU8S8S32ACC16.cc b/src/GenerateKernelU8S8S32ACC16.cc index 205af14..ff23828 100644 --- a/src/GenerateKernelU8S8S32ACC16.cc +++ b/src/GenerateKernelU8S8S32ACC16.cc @@ -132,7 +132,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx2>( int32_t nc, int32_t kc, int32_t /* unused */) { - std::tuple<bool, int, int, int, int, int, int, int> kernelSig; + //std::tuple<bool, int, int, int, int, int, int, int> kernelSig; int kBlock; int nBlock; int mRegBlockSize; @@ -158,17 +158,25 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx2>( PackingTraits<uint8_t, int16_t, inst_set_t::avx2>::ROW_INTERLEAVE; } - kernelSig = std::make_tuple( - accum, - mc, - nc, - nBlock, - kBlock, - mRegBlockSize, - nRegBlockSize, - nRegBlockSizeMin); - - return codeCache_.getOrCreate(kernelSig, [&]() -> jit_micro_kernel_fp { + //kernelSig = std::make_tuple( + // accum, + // mc, + // nc, + // nBlock, + // kBlock, + // mRegBlockSize, + // nRegBlockSize, + // nRegBlockSizeMin); + size_t hashVal = hash<bool>()(accum); + hash_combine(hashVal, mc); + hash_combine(hashVal, nc); + hash_combine(hashVal, nBlock); + hash_combine(hashVal, kBlock); + hash_combine(hashVal, mRegBlockSize); + hash_combine(hashVal, nRegBlockSize); + hash_combine(hashVal, nRegBlockSizeMin); + + return codeCache_.getOrCreate(hashVal, [&]() -> jit_micro_kernel_fp { asmjit::CodeHolder code; code.init(runtime().codeInfo()); x86::Assembler assembler(&code); diff --git a/src/GenerateKernelU8S8S32ACC16Avx512.cc b/src/GenerateKernelU8S8S32ACC16Avx512.cc index 819f33b..3cac600 100644 --- a/src/GenerateKernelU8S8S32ACC16Avx512.cc +++ b/src/GenerateKernelU8S8S32ACC16Avx512.cc @@ -139,7 +139,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx512>( int32_t nc, int32_t kc, int32_t /* unused */) { - std::tuple<bool, int, int, int, int, int, int, int> kernelSig; + //std::tuple<bool, int, int, int, int, int, int, int> kernelSig; int kBlock; int nBlock; int mRegBlockSize; @@ -165,17 +165,25 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx512>( PackingTraits<uint8_t, int16_t, inst_set_t::avx512>::ROW_INTERLEAVE; } - kernelSig = std::make_tuple( - accum, - mc, - nc, - nBlock, - kBlock, - mRegBlockSize, - nRegBlockSize, - nRegBlockSizeMin); - - return codeCache_.getOrCreate(kernelSig, [&]() -> jit_micro_kernel_fp { + //kernelSig = std::make_tuple( + // accum, + // mc, + // nc, + // nBlock, + // kBlock, + // mRegBlockSize, + // nRegBlockSize, + // nRegBlockSizeMin); + size_t hashVal = hash<bool>()(accum); + hash_combine(hashVal, mc); + hash_combine(hashVal, nc); + hash_combine(hashVal, nBlock); + hash_combine(hashVal, kBlock); + hash_combine(hashVal, mRegBlockSize); + hash_combine(hashVal, nRegBlockSize); + hash_combine(hashVal, nRegBlockSizeMin); + + return codeCache_.getOrCreate(hashVal, [&]() -> jit_micro_kernel_fp { asmjit::CodeHolder code; code.init(runtime().codeInfo()); x86::Assembler assembler(&code); diff --git a/src/GenerateKernelU8S8S32ACC32.cc b/src/GenerateKernelU8S8S32ACC32.cc index dc9c534..5fca144 100644 --- a/src/GenerateKernelU8S8S32ACC32.cc +++ b/src/GenerateKernelU8S8S32ACC32.cc @@ -137,7 +137,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx2>( int32_t nc, int32_t kc, int32_t /* unused */) { - std::tuple<bool, int, int, int, int, int, int, int> kernelSig; + //std::tuple<bool, int, int, int, int, int, int, int> kernelSig; int kBlock; int nBlock; int mRegBlockSize; @@ -163,17 +163,25 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx2>( PackingTraits<uint8_t, int32_t, inst_set_t::avx2>::ROW_INTERLEAVE; } - kernelSig = std::make_tuple( - accum, - mc, - nc, - nBlock, - kBlock, - mRegBlockSize, - nRegBlockSize, - nRegBlockSizeMin); - - return codeCache_.getOrCreate(kernelSig, [&]() -> jit_micro_kernel_fp { + //kernelSig = std::make_tuple( + // accum, + // mc, + // nc, + // nBlock, + // kBlock, + // mRegBlockSize, + // nRegBlockSize, + // nRegBlockSizeMin); + size_t hashVal = hash<bool>()(accum); + hash_combine(hashVal, mc); + hash_combine(hashVal, nc); + hash_combine(hashVal, nBlock); + hash_combine(hashVal, kBlock); + hash_combine(hashVal, mRegBlockSize); + hash_combine(hashVal, nRegBlockSize); + hash_combine(hashVal, nRegBlockSizeMin); + + return codeCache_.getOrCreate(hashVal, [&]() -> jit_micro_kernel_fp { asmjit::CodeHolder code; code.init(runtime().codeInfo()); x86::Assembler assembler(&code); diff --git a/src/GenerateKernelU8S8S32ACC32Avx512.cc b/src/GenerateKernelU8S8S32ACC32Avx512.cc index 5037292..fc83858 100644 --- a/src/GenerateKernelU8S8S32ACC32Avx512.cc +++ b/src/GenerateKernelU8S8S32ACC32Avx512.cc @@ -138,7 +138,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx512>( int32_t nc, int32_t kc, int32_t /* unused */) { - std::tuple<bool, int, int, int, int, int, int, int> kernelSig; + //std::tuple<bool, int, int, int, int, int, int, int> kernelSig; int kBlock; int nBlock; int mRegBlockSize; @@ -164,17 +164,25 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx512>( PackingTraits<uint8_t, int32_t, inst_set_t::avx512>::ROW_INTERLEAVE; } - kernelSig = std::make_tuple( - accum, - mc, - nc, - nBlock, - kBlock, - mRegBlockSize, - nRegBlockSize, - nRegBlockSizeMin); - - return codeCache_.getOrCreate(kernelSig, [&]() -> jit_micro_kernel_fp { + //kernelSig = std::make_tuple( + // accum, + // mc, + // nc, + // nBlock, + // kBlock, + // mRegBlockSize, + // nRegBlockSize, + // nRegBlockSizeMin); + size_t hashVal = hash<bool>()(accum); + hash_combine(hashVal, mc); + hash_combine(hashVal, nc); + hash_combine(hashVal, nBlock); + hash_combine(hashVal, kBlock); + hash_combine(hashVal, mRegBlockSize); + hash_combine(hashVal, nRegBlockSize); + hash_combine(hashVal, nRegBlockSizeMin); + + return codeCache_.getOrCreate(hashVal, [&]() -> jit_micro_kernel_fp { asmjit::CodeHolder code; code.init(runtime().codeInfo()); x86::Assembler assembler(&code); diff --git a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc index bd8be1f..5dc0d36 100644 --- a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc +++ b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc @@ -129,7 +129,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate< int32_t nc, int32_t kc, int32_t /* unused */) { - std::tuple<bool, int, int, int, int, int, int, int> kernelSig; + //std::tuple<bool, int, int, int, int, int, int, int> kernelSig; int kBlock; int nBlock; int mRegBlockSize; @@ -157,17 +157,25 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate< ROW_INTERLEAVE; } - kernelSig = std::make_tuple( - accum, - mc, - nc, - nBlock, - kBlock, - mRegBlockSize, - nRegBlockSize, - nRegBlockSizeMin); - - return codeCache_.getOrCreate(kernelSig, [&]() -> jit_micro_kernel_fp { + //kernelSig = std::make_tuple( + // accum, + // mc, + // nc, + // nBlock, + // kBlock, + // mRegBlockSize, + // nRegBlockSize, + // nRegBlockSizeMin); + size_t hashVal = hash<bool>()(accum); + hash_combine(hashVal, mc); + hash_combine(hashVal, nc); + hash_combine(hashVal, nBlock); + hash_combine(hashVal, kBlock); + hash_combine(hashVal, mRegBlockSize); + hash_combine(hashVal, nRegBlockSize); + hash_combine(hashVal, nRegBlockSizeMin); + + return codeCache_.getOrCreate(hashVal, [&]() -> jit_micro_kernel_fp { asmjit::CodeHolder code; code.init(runtime().codeInfo()); x86::Assembler assembler(&code); diff --git a/src/GroupwiseConv.h b/src/GroupwiseConv.h index 58ee24d..8bb9beb 100644 --- a/src/GroupwiseConv.h +++ b/src/GroupwiseConv.h @@ -230,9 +230,9 @@ class GenConvKernel { static std::mutex rtMutex_; ///< Controll access to runtime; - static CodeCache<std::tuple<bool, int, int, int>, jit_conv_kernel_fp> + static CodeCache<size_t /*std::tuple<bool, int, int, int>*/, jit_conv_kernel_fp> codeCache_; ///< JIT Code Cache for reuse. - static CodeCache<std::tuple<bool, int, int, int>, jit_rowoffset_kernel_fp> + static CodeCache<size_t /*std::tuple<bool, int, int, int>*/, jit_rowoffset_kernel_fp> codeCacheRowOffset_; ///< JIT Code Cache for row offset kernel. private: @@ -285,11 +285,11 @@ template <int SPATIAL_DIM, typename accT> std::mutex GenConvKernel<SPATIAL_DIM, accT>::rtMutex_; template <int SPATIAL_DIM, typename accT> -CodeCache<std::tuple<bool, int, int, int>, jit_conv_kernel_fp> +CodeCache<size_t /*std::tuple<bool, int, int, int>*/, jit_conv_kernel_fp> GenConvKernel<SPATIAL_DIM, accT>::codeCache_; template <int SPATIAL_DIM, typename accT> -CodeCache<std::tuple<bool, int, int, int>, jit_rowoffset_kernel_fp> +CodeCache<size_t /*std::tuple<bool, int, int, int>*/, jit_rowoffset_kernel_fp> GenConvKernel<SPATIAL_DIM, accT>::codeCacheRowOffset_; } // namespace fbgemm diff --git a/src/GroupwiseConvAcc32Avx2.cc b/src/GroupwiseConvAcc32Avx2.cc index 396e792..7ff29ea 100644 --- a/src/GroupwiseConvAcc32Avx2.cc +++ b/src/GroupwiseConvAcc32Avx2.cc @@ -60,14 +60,20 @@ void calculateRowOffsets( } template <int SPATIAL_DIM = 2> -tuple<bool, int, int, int> getKernelSig( +size_t/*tuple<bool, int, int, int>*/ getKernelSig( const conv_param_t<SPATIAL_DIM>& conv_param, bool isAZeroPointZero) { int C_per_G = conv_param.IC / conv_param.G; int K_per_G = conv_param.OC / conv_param.G; - auto kernelSig = - std::make_tuple(isAZeroPointZero, conv_param.G, C_per_G, K_per_G); - return kernelSig; + //auto kernelSig = + // std::make_tuple(isAZeroPointZero, conv_param.G, C_per_G, K_per_G); + + size_t hashVal = hash<bool>()(isAZeroPointZero); + hash_combine(hashVal, conv_param.G); + hash_combine(hashVal, C_per_G); + hash_combine(hashVal, K_per_G); + + return hashVal; } template <int SPATIAL_DIM = 2, typename accT = int32_t> |