Change code cache to the unordered_map

author: Young Jin Kim <youki@microsoft.com> 2020-05-14 10:41:36 +0300
committer: Young Jin Kim <youki@microsoft.com> 2020-05-14 10:41:36 +0300
commit: f8274286ea12f71c455b75efe301df2f2086790d (patch)
tree: 8f8d19dd8b19ea9aaa829a7ebbe1d72a8ecfd4dc
parent: eb2317d3f32b617031681be55b43883c23611a35 (diff)
9 files changed, 130 insertions, 72 deletions
diff --git a/src/CodeCache.h b/src/CodeCache.h
index 8eb28d8..023ad4b 100644
--- a/src/CodeCache.h
+++ b/src/CodeCache.h
@@ -7,7 +7,7 @@
 #pragma once
 #include <condition_variable>
 #include <future>
-#include <map>
+#include <unordered_map>
 
 #if __cplusplus >= 201402L && !defined(__APPLE__)
 // For C++14, use shared_timed_mutex.
@@ -23,6 +23,18 @@
 
 namespace fbgemm {
 
+template <class T> using hash = std::hash<T>;
+
+// This combinator is based on boost::hash_combine, but uses
+// std::hash as the hash implementation. Used as a drop-in
+// replacement for boost::hash_combine.
+
+template <class T>
+inline void hash_combine(std::size_t& seed, T const& v) {
+  hash<T> hasher;
+  seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+}
+
 /**
  * @brief Thread safe cache for microkernels, ensures single creation per key.
  * @tparam Key Type of unique key (typically a tuple)
@@ -31,7 +43,7 @@ namespace fbgemm {
 template <typename KEY, typename VALUE>
 class CodeCache {
  private:
-  std::map<KEY, std::shared_future<VALUE>> values_;
+  std::unordered_map<KEY, std::shared_future<VALUE>> values_;
 #ifdef FBGEMM_USE_SHARED_TIMED_MUTEX
   std::shared_timed_mutex mutex_;
 #else
diff --git a/src/GenerateKernel.h b/src/GenerateKernel.h
index c0fece4..b5a4d4f 100644
--- a/src/GenerateKernel.h
+++ b/src/GenerateKernel.h
@@ -170,7 +170,7 @@ class CodeGenBase {
   static std::mutex rtMutex_;    ///< Controll access to runtime;
 
   // The hash depends on accumulate, mc, nc, ncb, kcb, nr, mr, nr_min
-  static CodeCache<std::tuple<bool, int, int, int, int, int, int, int>,
+  static CodeCache<size_t, //std::tuple<bool, int, int, int, int, int, int, int>,
                    jit_micro_kernel_fp>
       codeCache_; ///< JIT Code Cache for reuse.
 };
@@ -179,7 +179,7 @@ template <typename TA, typename TB, typename TC, typename accT>
 std::mutex CodeGenBase<TA, TB, TC, accT>::rtMutex_;
 
 template <typename TA, typename TB, typename TC, typename accT>
-CodeCache<std::tuple<bool, int, int, int, int, int, int, int>,
+CodeCache<size_t, //std::tuple<bool, int, int, int, int, int, int, int>,
           typename CodeGenBase<TA, TB, TC, accT>::jit_micro_kernel_fp>
     CodeGenBase<TA, TB, TC, accT>::codeCache_;
 
diff --git a/src/GenerateKernelU8S8S32ACC16.cc b/src/GenerateKernelU8S8S32ACC16.cc
index 205af14..ff23828 100644
--- a/src/GenerateKernelU8S8S32ACC16.cc
+++ b/src/GenerateKernelU8S8S32ACC16.cc
@@ -132,7 +132,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx2>(
     int32_t nc,
     int32_t kc,
     int32_t /* unused */) {
-  std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
+  //std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
   int kBlock;
   int nBlock;
   int mRegBlockSize;
@@ -158,17 +158,25 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx2>(
         PackingTraits<uint8_t, int16_t, inst_set_t::avx2>::ROW_INTERLEAVE;
   }
 
-  kernelSig = std::make_tuple(
-      accum,
-      mc,
-      nc,
-      nBlock,
-      kBlock,
-      mRegBlockSize,
-      nRegBlockSize,
-      nRegBlockSizeMin);
-
-  return codeCache_.getOrCreate(kernelSig, [&]() -> jit_micro_kernel_fp {
+  //kernelSig = std::make_tuple(
+  //    accum,
+  //    mc,
+  //    nc,
+  //    nBlock,
+  //    kBlock,
+  //    mRegBlockSize,
+  //    nRegBlockSize,
+  //    nRegBlockSizeMin);
+  size_t hashVal = hash<bool>()(accum);
+  hash_combine(hashVal, mc);
+  hash_combine(hashVal, nc);
+  hash_combine(hashVal, nBlock);
+  hash_combine(hashVal, kBlock);
+  hash_combine(hashVal, mRegBlockSize);
+  hash_combine(hashVal, nRegBlockSize);
+  hash_combine(hashVal, nRegBlockSizeMin);
+
+  return codeCache_.getOrCreate(hashVal, [&]() -> jit_micro_kernel_fp {
     asmjit::CodeHolder code;
     code.init(runtime().codeInfo());
     x86::Assembler assembler(&code);
diff --git a/src/GenerateKernelU8S8S32ACC16Avx512.cc b/src/GenerateKernelU8S8S32ACC16Avx512.cc
index 819f33b..3cac600 100644
--- a/src/GenerateKernelU8S8S32ACC16Avx512.cc
+++ b/src/GenerateKernelU8S8S32ACC16Avx512.cc
@@ -139,7 +139,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx512>(
     int32_t nc,
     int32_t kc,
     int32_t /* unused */) {
-  std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
+  //std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
   int kBlock;
   int nBlock;
   int mRegBlockSize;
@@ -165,17 +165,25 @@ CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<inst_set_t::avx512>(
         PackingTraits<uint8_t, int16_t, inst_set_t::avx512>::ROW_INTERLEAVE;
   }
 
-  kernelSig = std::make_tuple(
-      accum,
-      mc,
-      nc,
-      nBlock,
-      kBlock,
-      mRegBlockSize,
-      nRegBlockSize,
-      nRegBlockSizeMin);
-
-  return codeCache_.getOrCreate(kernelSig, [&]() -> jit_micro_kernel_fp {
+  //kernelSig = std::make_tuple(
+  //    accum,
+  //    mc,
+  //    nc,
+  //    nBlock,
+  //    kBlock,
+  //    mRegBlockSize,
+  //    nRegBlockSize,
+  //    nRegBlockSizeMin);
+  size_t hashVal = hash<bool>()(accum);
+  hash_combine(hashVal, mc);
+  hash_combine(hashVal, nc);
+  hash_combine(hashVal, nBlock);
+  hash_combine(hashVal, kBlock);
+  hash_combine(hashVal, mRegBlockSize);
+  hash_combine(hashVal, nRegBlockSize);
+  hash_combine(hashVal, nRegBlockSizeMin);
+
+  return codeCache_.getOrCreate(hashVal, [&]() -> jit_micro_kernel_fp {
     asmjit::CodeHolder code;
     code.init(runtime().codeInfo());
     x86::Assembler assembler(&code);
diff --git a/src/GenerateKernelU8S8S32ACC32.cc b/src/GenerateKernelU8S8S32ACC32.cc
index dc9c534..5fca144 100644
--- a/src/GenerateKernelU8S8S32ACC32.cc
+++ b/src/GenerateKernelU8S8S32ACC32.cc
@@ -137,7 +137,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx2>(
     int32_t nc,
     int32_t kc,
     int32_t /* unused */) {
-  std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
+  //std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
   int kBlock;
   int nBlock;
   int mRegBlockSize;
@@ -163,17 +163,25 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx2>(
         PackingTraits<uint8_t, int32_t, inst_set_t::avx2>::ROW_INTERLEAVE;
   }
 
-  kernelSig = std::make_tuple(
-      accum,
-      mc,
-      nc,
-      nBlock,
-      kBlock,
-      mRegBlockSize,
-      nRegBlockSize,
-      nRegBlockSizeMin);
-
-  return codeCache_.getOrCreate(kernelSig, [&]() -> jit_micro_kernel_fp {
+  //kernelSig = std::make_tuple(
+  //    accum,
+  //    mc,
+  //    nc,
+  //    nBlock,
+  //    kBlock,
+  //    mRegBlockSize,
+  //    nRegBlockSize,
+  //    nRegBlockSizeMin);
+  size_t hashVal = hash<bool>()(accum);
+  hash_combine(hashVal, mc);
+  hash_combine(hashVal, nc);
+  hash_combine(hashVal, nBlock);
+  hash_combine(hashVal, kBlock);
+  hash_combine(hashVal, mRegBlockSize);
+  hash_combine(hashVal, nRegBlockSize);
+  hash_combine(hashVal, nRegBlockSizeMin);
+
+  return codeCache_.getOrCreate(hashVal, [&]() -> jit_micro_kernel_fp {
     asmjit::CodeHolder code;
     code.init(runtime().codeInfo());
     x86::Assembler assembler(&code);
diff --git a/src/GenerateKernelU8S8S32ACC32Avx512.cc b/src/GenerateKernelU8S8S32ACC32Avx512.cc
index 5037292..fc83858 100644
--- a/src/GenerateKernelU8S8S32ACC32Avx512.cc
+++ b/src/GenerateKernelU8S8S32ACC32Avx512.cc
@@ -138,7 +138,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx512>(
     int32_t nc,
     int32_t kc,
     int32_t /* unused */) {
-  std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
+  //std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
   int kBlock;
   int nBlock;
   int mRegBlockSize;
@@ -164,17 +164,25 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<inst_set_t::avx512>(
         PackingTraits<uint8_t, int32_t, inst_set_t::avx512>::ROW_INTERLEAVE;
   }
 
-  kernelSig = std::make_tuple(
-      accum,
-      mc,
-      nc,
-      nBlock,
-      kBlock,
-      mRegBlockSize,
-      nRegBlockSize,
-      nRegBlockSizeMin);
-
-  return codeCache_.getOrCreate(kernelSig, [&]() -> jit_micro_kernel_fp {
+  //kernelSig = std::make_tuple(
+  //    accum,
+  //    mc,
+  //    nc,
+  //    nBlock,
+  //    kBlock,
+  //    mRegBlockSize,
+  //    nRegBlockSize,
+  //    nRegBlockSizeMin);
+  size_t hashVal = hash<bool>()(accum);
+  hash_combine(hashVal, mc);
+  hash_combine(hashVal, nc);
+  hash_combine(hashVal, nBlock);
+  hash_combine(hashVal, kBlock);
+  hash_combine(hashVal, mRegBlockSize);
+  hash_combine(hashVal, nRegBlockSize);
+  hash_combine(hashVal, nRegBlockSizeMin);
+
+  return codeCache_.getOrCreate(hashVal, [&]() -> jit_micro_kernel_fp {
     asmjit::CodeHolder code;
     code.init(runtime().codeInfo());
     x86::Assembler assembler(&code);
diff --git a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
index bd8be1f..5dc0d36 100644
--- a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
+++ b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc
@@ -129,7 +129,7 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<
     int32_t nc,
     int32_t kc,
     int32_t /* unused */) {
-  std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
+  //std::tuple<bool, int, int, int, int, int, int, int> kernelSig;
   int kBlock;
   int nBlock;
   int mRegBlockSize;
@@ -157,17 +157,25 @@ CodeGenBase<uint8_t, int8_t, int32_t, int32_t>::getOrCreate<
         ROW_INTERLEAVE;
   }
 
-  kernelSig = std::make_tuple(
-      accum,
-      mc,
-      nc,
-      nBlock,
-      kBlock,
-      mRegBlockSize,
-      nRegBlockSize,
-      nRegBlockSizeMin);
-
-  return codeCache_.getOrCreate(kernelSig, [&]() -> jit_micro_kernel_fp {
+  //kernelSig = std::make_tuple(
+  //    accum,
+  //    mc,
+  //    nc,
+  //    nBlock,
+  //    kBlock,
+  //    mRegBlockSize,
+  //    nRegBlockSize,
+  //    nRegBlockSizeMin);
+  size_t hashVal = hash<bool>()(accum);
+  hash_combine(hashVal, mc);
+  hash_combine(hashVal, nc);
+  hash_combine(hashVal, nBlock);
+  hash_combine(hashVal, kBlock);
+  hash_combine(hashVal, mRegBlockSize);
+  hash_combine(hashVal, nRegBlockSize);
+  hash_combine(hashVal, nRegBlockSizeMin);
+
+  return codeCache_.getOrCreate(hashVal, [&]() -> jit_micro_kernel_fp {
     asmjit::CodeHolder code;
     code.init(runtime().codeInfo());
     x86::Assembler assembler(&code);
diff --git a/src/GroupwiseConv.h b/src/GroupwiseConv.h
index 58ee24d..8bb9beb 100644
--- a/src/GroupwiseConv.h
+++ b/src/GroupwiseConv.h
@@ -230,9 +230,9 @@ class GenConvKernel {
 
   static std::mutex rtMutex_; ///< Controll access to runtime;
 
-  static CodeCache<std::tuple<bool, int, int, int>, jit_conv_kernel_fp>
+  static CodeCache<size_t /*std::tuple<bool, int, int, int>*/, jit_conv_kernel_fp>
       codeCache_; ///< JIT Code Cache for reuse.
-  static CodeCache<std::tuple<bool, int, int, int>, jit_rowoffset_kernel_fp>
+  static CodeCache<size_t /*std::tuple<bool, int, int, int>*/, jit_rowoffset_kernel_fp>
       codeCacheRowOffset_; ///< JIT Code Cache for row offset kernel.
 
 private:
@@ -285,11 +285,11 @@ template <int SPATIAL_DIM, typename accT>
 std::mutex GenConvKernel<SPATIAL_DIM, accT>::rtMutex_;
 
 template <int SPATIAL_DIM, typename accT>
-CodeCache<std::tuple<bool, int, int, int>, jit_conv_kernel_fp>
+CodeCache<size_t /*std::tuple<bool, int, int, int>*/, jit_conv_kernel_fp>
     GenConvKernel<SPATIAL_DIM, accT>::codeCache_;
 
 template <int SPATIAL_DIM, typename accT>
-CodeCache<std::tuple<bool, int, int, int>, jit_rowoffset_kernel_fp>
+CodeCache<size_t /*std::tuple<bool, int, int, int>*/, jit_rowoffset_kernel_fp>
     GenConvKernel<SPATIAL_DIM, accT>::codeCacheRowOffset_;
 
 } // namespace fbgemm
diff --git a/src/GroupwiseConvAcc32Avx2.cc b/src/GroupwiseConvAcc32Avx2.cc
index 396e792..7ff29ea 100644
--- a/src/GroupwiseConvAcc32Avx2.cc
+++ b/src/GroupwiseConvAcc32Avx2.cc
@@ -60,14 +60,20 @@ void calculateRowOffsets(
 }
 
 template <int SPATIAL_DIM = 2>
-tuple<bool, int, int, int> getKernelSig(
+size_t/*tuple<bool, int, int, int>*/ getKernelSig(
     const conv_param_t<SPATIAL_DIM>& conv_param,
     bool isAZeroPointZero) {
   int C_per_G = conv_param.IC / conv_param.G;
   int K_per_G = conv_param.OC / conv_param.G;
-  auto kernelSig =
-      std::make_tuple(isAZeroPointZero, conv_param.G, C_per_G, K_per_G);
-  return kernelSig;
+  //auto kernelSig =
+  //    std::make_tuple(isAZeroPointZero, conv_param.G, C_per_G, K_per_G);
+
+  size_t hashVal = hash<bool>()(isAZeroPointZero);
+  hash_combine(hashVal, conv_param.G);
+  hash_combine(hashVal, C_per_G);
+  hash_combine(hashVal, K_per_G);
+
+  return hashVal;
 }
 
 template <int SPATIAL_DIM = 2, typename accT = int32_t>
author	Young Jin Kim <youki@microsoft.com>	2020-05-14 10:41:36 +0300
committer	Young Jin Kim <youki@microsoft.com>	2020-05-14 10:41:36 +0300
commit	f8274286ea12f71c455b75efe301df2f2086790d (patch)
tree	8f8d19dd8b19ea9aaa829a7ebbe1d72a8ecfd4dc
parent	eb2317d3f32b617031681be55b43883c23611a35 (diff)